1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
22 #include <sys/param.h>
25 #include <sys/utsname.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/fusion/include/std_pair.hpp>
31 #if defined(__FreeBSD__)
32 #define XATTR_CREATE 0x1
33 #define XATTR_REPLACE 0x2
35 #include <sys/xattr.h>
38 #if defined(__linux__)
39 #include <linux/falloc.h>
42 #include <sys/statvfs.h>
44 #include "common/config.h"
45 #include "common/version.h"
48 #include "messages/MClientSession.h"
49 #include "messages/MClientReconnect.h"
50 #include "messages/MClientRequest.h"
51 #include "messages/MClientRequestForward.h"
52 #include "messages/MClientReply.h"
53 #include "messages/MClientCaps.h"
54 #include "messages/MClientLease.h"
55 #include "messages/MClientSnap.h"
56 #include "messages/MCommandReply.h"
57 #include "messages/MOSDMap.h"
58 #include "messages/MClientQuota.h"
59 #include "messages/MClientCapRelease.h"
60 #include "messages/MMDSMap.h"
61 #include "messages/MFSMap.h"
62 #include "messages/MFSMapUser.h"
64 #include "mon/MonClient.h"
66 #include "mds/flock.h"
67 #include "osd/OSDMap.h"
68 #include "osdc/Filer.h"
70 #include "common/Cond.h"
71 #include "common/Mutex.h"
72 #include "common/perf_counters.h"
73 #include "common/admin_socket.h"
74 #include "common/errno.h"
75 #include "include/str_list.h"
77 #define dout_subsys ceph_subsys_client
79 #include "include/lru.h"
80 #include "include/compat.h"
81 #include "include/stringify.h"
87 #include "ClientSnapRealm.h"
89 #include "MetaSession.h"
90 #include "MetaRequest.h"
91 #include "ObjecterWriteback.h"
92 #include "posix_acl.h"
94 #include "include/assert.h"
95 #include "include/stat.h"
97 #include "include/cephfs/ceph_statx.h"
106 #define dout_prefix *_dout << "client." << whoami << " "
108 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
110 // FreeBSD fails to define this
114 // Darwin fails to define this
123 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
125 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
127 Client
*client
= static_cast<Client
*>(p
);
128 client
->flush_set_callback(oset
);
134 Client::CommandHook::CommandHook(Client
*client
) :
139 bool Client::CommandHook::call(std::string command
, cmdmap_t
& cmdmap
,
140 std::string format
, bufferlist
& out
)
142 Formatter
*f
= Formatter::create(format
);
143 f
->open_object_section("result");
144 m_client
->client_lock
.Lock();
145 if (command
== "mds_requests")
146 m_client
->dump_mds_requests(f
);
147 else if (command
== "mds_sessions")
148 m_client
->dump_mds_sessions(f
);
149 else if (command
== "dump_cache")
150 m_client
->dump_cache(f
);
151 else if (command
== "kick_stale_sessions")
152 m_client
->_kick_stale_sessions();
153 else if (command
== "status")
154 m_client
->dump_status(f
);
156 assert(0 == "bad command registered");
157 m_client
->client_lock
.Unlock();
167 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
168 : inode(in
), offset(0), next_offset(2),
169 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
173 void Client::_reset_faked_inos()
176 free_faked_inos
.clear();
177 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
178 last_used_faked_ino
= 0;
179 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
182 void Client::_assign_faked_ino(Inode
*in
)
184 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
185 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
186 last_used_faked_ino
= 0;
187 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
189 assert(it
!= free_faked_inos
.end());
190 if (last_used_faked_ino
< it
.get_start()) {
191 assert(it
.get_len() > 0);
192 last_used_faked_ino
= it
.get_start();
194 ++last_used_faked_ino
;
195 assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
197 in
->faked_ino
= last_used_faked_ino
;
198 free_faked_inos
.erase(in
->faked_ino
);
199 faked_ino_map
[in
->faked_ino
] = in
->vino();
202 void Client::_release_faked_ino(Inode
*in
)
204 free_faked_inos
.insert(in
->faked_ino
);
205 faked_ino_map
.erase(in
->faked_ino
);
208 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
213 else if (faked_ino_map
.count(ino
))
214 vino
= faked_ino_map
[ino
];
216 vino
= vinodeno_t(0, CEPH_NOSNAP
);
217 ldout(cct
, 10) << "map_faked_ino " << ino
<< " -> " << vino
<< dendl
;
221 vinodeno_t
Client::map_faked_ino(ino_t ino
)
223 Mutex::Locker
lock(client_lock
);
224 return _map_faked_ino(ino
);
229 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
230 : Dispatcher(m
->cct
),
231 m_command_hook(this),
232 timer(m
->cct
, client_lock
),
233 callback_handle(NULL
),
234 switch_interrupt_cb(NULL
),
236 ino_invalidate_cb(NULL
),
237 dentry_invalidate_cb(NULL
),
240 can_invalidate_dentries(false),
241 require_remount(false),
242 async_ino_invalidator(m
->cct
),
243 async_dentry_invalidator(m
->cct
),
244 interrupt_finisher(m
->cct
),
245 remount_finisher(m
->cct
),
246 objecter_finisher(m
->cct
),
248 messenger(m
), monclient(mc
),
250 whoami(mc
->get_global_id()), cap_epoch_barrier(0),
251 last_tid(0), oldest_tid(0), last_flush_tid(1),
253 mounted(false), unmounting(false), blacklisted(false),
254 local_osd(-1), local_osd_epoch(0),
255 unsafe_sync_write(0),
256 client_lock("Client::client_lock")
262 num_flushing_caps
= 0;
264 _dir_vxattrs_name_size
= _vxattrs_calcu_name_size(_dir_vxattrs
);
265 _file_vxattrs_name_size
= _vxattrs_calcu_name_size(_file_vxattrs
);
267 user_id
= cct
->_conf
->client_mount_uid
;
268 group_id
= cct
->_conf
->client_mount_gid
;
271 if (cct
->_conf
->client_acl_type
== "posix_acl")
272 acl_type
= POSIX_ACL
;
274 lru
.lru_set_max(cct
->_conf
->client_cache_size
);
275 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
278 free_fd_set
.insert(10, 1<<30);
280 mdsmap
.reset(new MDSMap
);
283 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
285 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
286 client_flush_set_callback
, // all commit callback
288 cct
->_conf
->client_oc_size
,
289 cct
->_conf
->client_oc_max_objects
,
290 cct
->_conf
->client_oc_max_dirty
,
291 cct
->_conf
->client_oc_target_dirty
,
292 cct
->_conf
->client_oc_max_dirty_age
,
294 objecter_finisher
.start();
295 filer
.reset(new Filer(objecter
, &objecter_finisher
));
296 objecter
->enable_blacklist_events();
302 assert(!client_lock
.is_locked());
304 // It is necessary to hold client_lock, because any inode destruction
305 // may call into ObjectCacher, which asserts that it's lock (which is
306 // client_lock) is held.
309 client_lock
.Unlock();
312 void Client::tear_down_cache()
315 for (ceph::unordered_map
<int, Fh
*>::iterator it
= fd_map
.begin();
319 ldout(cct
, 1) << "tear_down_cache forcing close of fh " << it
->first
<< " ino " << fh
->inode
->ino
<< dendl
;
324 while (!opened_dirs
.empty()) {
325 dir_result_t
*dirp
= *opened_dirs
.begin();
326 ldout(cct
, 1) << "tear_down_cache forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
336 assert(lru
.lru_get_size() == 0);
339 assert(inode_map
.size() <= 1 + root_parents
.size());
340 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
344 while (!root_parents
.empty())
345 root_parents
.erase(root_parents
.begin());
350 assert(inode_map
.empty());
353 inodeno_t
Client::get_root_ino()
355 Mutex::Locker
l(client_lock
);
356 if (use_faked_inos())
357 return root
->faked_ino
;
362 Inode
*Client::get_root()
364 Mutex::Locker
l(client_lock
);
372 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
375 in
->make_long_path(path
);
376 ldout(cct
, 1) << "dump_inode: "
377 << (disconnected
? "DISCONNECTED ":"")
378 << "inode " << in
->ino
380 << " ref " << in
->get_num_ref()
384 f
->open_object_section("inode");
385 f
->dump_stream("path") << path
;
387 f
->dump_int("disconnected", 1);
394 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
395 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
396 it
!= in
->dir
->dentries
.end();
398 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
400 f
->open_object_section("dentry");
404 if (it
->second
->inode
)
405 dump_inode(f
, it
->second
->inode
.get(), did
, false);
410 void Client::dump_cache(Formatter
*f
)
414 ldout(cct
, 1) << "dump_cache" << dendl
;
417 f
->open_array_section("cache");
420 dump_inode(f
, root
, did
, true);
422 // make a second pass to catch anything disconnected
423 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
424 it
!= inode_map
.end();
426 if (did
.count(it
->second
))
428 dump_inode(f
, it
->second
, did
, true);
435 void Client::dump_status(Formatter
*f
)
437 assert(client_lock
.is_locked_by_me());
439 ldout(cct
, 1) << __func__
<< dendl
;
441 const epoch_t osd_epoch
442 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
445 f
->open_object_section("metadata");
446 for (const auto& kv
: metadata
)
447 f
->dump_string(kv
.first
.c_str(), kv
.second
);
450 f
->dump_int("dentry_count", lru
.lru_get_size());
451 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
452 f
->dump_int("id", get_nodeid().v
);
453 f
->dump_int("inode_count", inode_map
.size());
454 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
455 f
->dump_int("osd_epoch", osd_epoch
);
456 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
463 objectcacher
->start();
466 assert(!initialized
);
468 messenger
->add_dispatcher_tail(this);
469 client_lock
.Unlock();
475 void Client::_finish_init()
479 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
480 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
481 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
482 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
483 logger
.reset(plb
.create_perf_counters());
484 cct
->get_perfcounters_collection()->add(logger
.get());
486 client_lock
.Unlock();
488 cct
->_conf
->add_observer(this);
490 AdminSocket
* admin_socket
= cct
->get_admin_socket();
491 int ret
= admin_socket
->register_command("mds_requests",
494 "show in-progress mds requests");
496 lderr(cct
) << "error registering admin socket command: "
497 << cpp_strerror(-ret
) << dendl
;
499 ret
= admin_socket
->register_command("mds_sessions",
502 "show mds session state");
504 lderr(cct
) << "error registering admin socket command: "
505 << cpp_strerror(-ret
) << dendl
;
507 ret
= admin_socket
->register_command("dump_cache",
510 "show in-memory metadata cache contents");
512 lderr(cct
) << "error registering admin socket command: "
513 << cpp_strerror(-ret
) << dendl
;
515 ret
= admin_socket
->register_command("kick_stale_sessions",
516 "kick_stale_sessions",
518 "kick sessions that were remote reset");
520 lderr(cct
) << "error registering admin socket command: "
521 << cpp_strerror(-ret
) << dendl
;
523 ret
= admin_socket
->register_command("status",
526 "show overall client status");
528 lderr(cct
) << "error registering admin socket command: "
529 << cpp_strerror(-ret
) << dendl
;
534 client_lock
.Unlock();
537 void Client::shutdown()
539 ldout(cct
, 1) << "shutdown" << dendl
;
541 // If we were not mounted, but were being used for sending
542 // MDS commands, we may have sessions that need closing.
545 client_lock
.Unlock();
547 cct
->_conf
->remove_observer(this);
549 AdminSocket
* admin_socket
= cct
->get_admin_socket();
550 admin_socket
->unregister_command("mds_requests");
551 admin_socket
->unregister_command("mds_sessions");
552 admin_socket
->unregister_command("dump_cache");
553 admin_socket
->unregister_command("kick_stale_sessions");
554 admin_socket
->unregister_command("status");
556 if (ino_invalidate_cb
) {
557 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
558 async_ino_invalidator
.wait_for_empty();
559 async_ino_invalidator
.stop();
562 if (dentry_invalidate_cb
) {
563 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
564 async_dentry_invalidator
.wait_for_empty();
565 async_dentry_invalidator
.stop();
568 if (switch_interrupt_cb
) {
569 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
570 interrupt_finisher
.wait_for_empty();
571 interrupt_finisher
.stop();
575 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
576 remount_finisher
.wait_for_empty();
577 remount_finisher
.stop();
580 objectcacher
->stop(); // outside of client_lock! this does a join.
586 client_lock
.Unlock();
588 objecter_finisher
.wait_for_empty();
589 objecter_finisher
.stop();
592 cct
->get_perfcounters_collection()->remove(logger
.get());
598 // ===================
599 // metadata cache stuff
601 void Client::trim_cache(bool trim_kernel_dcache
)
603 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << lru
.lru_get_max() << dendl
;
605 while (lru
.lru_get_size() != last
) {
606 last
= lru
.lru_get_size();
608 if (lru
.lru_get_size() <= lru
.lru_get_max()) break;
611 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
618 if (trim_kernel_dcache
&& lru
.lru_get_size() > lru
.lru_get_max())
619 _invalidate_kernel_dcache();
622 if (lru
.lru_get_size() == 0 && root
&& root
->get_num_ref() == 0 && inode_map
.size() == 1 + root_parents
.size()) {
623 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
627 while (!root_parents
.empty())
628 root_parents
.erase(root_parents
.begin());
634 void Client::trim_cache_for_reconnect(MetaSession
*s
)
636 mds_rank_t mds
= s
->mds_num
;
637 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
<< dendl
;
640 list
<Dentry
*> skipped
;
641 while (lru
.lru_get_size() > 0) {
642 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
646 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
647 dn
->dir
->parent_inode
->caps
.count(mds
)) {
651 skipped
.push_back(dn
);
654 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
655 lru
.lru_insert_mid(*p
);
657 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
658 << " trimmed " << trimmed
<< " dentries" << dendl
;
660 if (s
->caps
.size() > 0)
661 _invalidate_kernel_dcache();
664 void Client::trim_dentry(Dentry
*dn
)
666 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
667 << " in dir " << hex
<< dn
->dir
->parent_inode
->ino
670 Inode
*diri
= dn
->dir
->parent_inode
;
671 diri
->dir_release_count
++;
672 clear_dir_complete_and_ordered(diri
, true);
674 unlink(dn
, false, false); // drop dir, drop dentry
678 void Client::update_inode_file_bits(Inode
*in
,
679 uint64_t truncate_seq
, uint64_t truncate_size
,
680 uint64_t size
, uint64_t change_attr
,
681 uint64_t time_warp_seq
, utime_t ctime
,
684 version_t inline_version
,
685 bufferlist
& inline_data
,
689 ldout(cct
, 10) << "update_inode_file_bits " << *in
<< " " << ccap_string(issued
)
690 << " mtime " << mtime
<< dendl
;
691 ldout(cct
, 25) << "truncate_seq: mds " << truncate_seq
<< " local "
692 << in
->truncate_seq
<< " time_warp_seq: mds " << time_warp_seq
693 << " local " << in
->time_warp_seq
<< dendl
;
694 uint64_t prior_size
= in
->size
;
696 if (inline_version
> in
->inline_version
) {
697 in
->inline_data
= inline_data
;
698 in
->inline_version
= inline_version
;
701 /* always take a newer change attr */
702 if (change_attr
> in
->change_attr
)
703 in
->change_attr
= change_attr
;
705 if (truncate_seq
> in
->truncate_seq
||
706 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
707 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
709 in
->reported_size
= size
;
710 if (truncate_seq
!= in
->truncate_seq
) {
711 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
712 << truncate_seq
<< dendl
;
713 in
->truncate_seq
= truncate_seq
;
714 in
->oset
.truncate_seq
= truncate_seq
;
716 // truncate cached file data
717 if (prior_size
> size
) {
718 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
722 // truncate inline data
723 if (in
->inline_version
< CEPH_INLINE_NONE
) {
724 uint32_t len
= in
->inline_data
.length();
726 in
->inline_data
.splice(size
, len
- size
);
729 if (truncate_seq
>= in
->truncate_seq
&&
730 in
->truncate_size
!= truncate_size
) {
732 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
733 << truncate_size
<< dendl
;
734 in
->truncate_size
= truncate_size
;
735 in
->oset
.truncate_size
= truncate_size
;
737 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
741 // be careful with size, mtime, atime
742 if (issued
& (CEPH_CAP_FILE_EXCL
|
744 CEPH_CAP_FILE_BUFFER
|
746 CEPH_CAP_XATTR_EXCL
)) {
747 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
748 if (ctime
> in
->ctime
)
750 if (time_warp_seq
> in
->time_warp_seq
) {
751 ldout(cct
, 10) << "mds time_warp_seq " << time_warp_seq
<< " on inode " << *in
752 << " is higher than local time_warp_seq "
753 << in
->time_warp_seq
<< dendl
;
754 //the mds updated times, so take those!
757 in
->time_warp_seq
= time_warp_seq
;
758 } else if (time_warp_seq
== in
->time_warp_seq
) {
760 if (mtime
> in
->mtime
)
762 if (atime
> in
->atime
)
764 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
765 //ignore mds values as we have a higher seq
768 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
769 if (time_warp_seq
>= in
->time_warp_seq
) {
773 in
->time_warp_seq
= time_warp_seq
;
777 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
778 << time_warp_seq
<< " is lower than local time_warp_seq "
784 void Client::_fragmap_remove_non_leaves(Inode
*in
)
786 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
787 if (!in
->dirfragtree
.is_leaf(p
->first
))
788 in
->fragmap
.erase(p
++);
793 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
795 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
796 if (p
->second
== mds
)
797 in
->fragmap
.erase(p
++);
802 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
803 MetaSession
*session
,
804 const UserPerm
& request_perms
)
807 bool was_new
= false;
808 if (inode_map
.count(st
->vino
)) {
809 in
= inode_map
[st
->vino
];
810 ldout(cct
, 12) << "add_update_inode had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
812 in
= new Inode(this, st
->vino
, &st
->layout
);
813 inode_map
[st
->vino
] = in
;
815 if (use_faked_inos())
816 _assign_faked_ino(in
);
822 } else if (!mounted
) {
823 root_parents
[root_ancestor
] = in
;
828 in
->ino
= st
->vino
.ino
;
829 in
->snapid
= st
->vino
.snapid
;
830 in
->mode
= st
->mode
& S_IFMT
;
835 if (in
->is_symlink())
836 in
->symlink
= st
->symlink
;
839 ldout(cct
, 12) << "add_update_inode adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
842 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
844 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
845 bool updating_inode
= false;
847 if (st
->version
== 0 ||
848 (in
->version
& ~1) < st
->version
) {
849 updating_inode
= true;
852 issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
853 issued
|= implemented
;
855 in
->version
= st
->version
;
857 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
861 in
->btime
= st
->btime
;
864 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
865 in
->nlink
= st
->nlink
;
868 in
->dirstat
= st
->dirstat
;
869 in
->rstat
= st
->rstat
;
870 in
->quota
= st
->quota
;
871 in
->layout
= st
->layout
;
874 in
->dir_layout
= st
->dir_layout
;
875 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
878 update_inode_file_bits(in
, st
->truncate_seq
, st
->truncate_size
, st
->size
,
879 st
->change_attr
, st
->time_warp_seq
, st
->ctime
,
880 st
->mtime
, st
->atime
, st
->inline_version
,
881 st
->inline_data
, issued
);
882 } else if (st
->inline_version
> in
->inline_version
) {
883 in
->inline_data
= st
->inline_data
;
884 in
->inline_version
= st
->inline_version
;
887 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
888 st
->xattrbl
.length() &&
889 st
->xattr_version
> in
->xattr_version
) {
890 bufferlist::iterator p
= st
->xattrbl
.begin();
891 ::decode(in
->xattrs
, p
);
892 in
->xattr_version
= st
->xattr_version
;
895 // move me if/when version reflects fragtree changes.
896 if (in
->dirfragtree
!= st
->dirfragtree
) {
897 in
->dirfragtree
= st
->dirfragtree
;
898 _fragmap_remove_non_leaves(in
);
901 if (in
->snapid
== CEPH_NOSNAP
) {
902 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.seq
,
903 st
->cap
.mseq
, inodeno_t(st
->cap
.realm
), st
->cap
.flags
,
905 if (in
->auth_cap
&& in
->auth_cap
->session
== session
)
906 in
->max_size
= st
->max_size
;
908 in
->snap_caps
|= st
->cap
.caps
;
910 // setting I_COMPLETE needs to happen after adding the cap
911 if (updating_inode
&&
913 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
914 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
915 in
->dirstat
.nfiles
== 0 &&
916 in
->dirstat
.nsubdirs
== 0) {
917 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
918 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
920 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
921 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
922 in
->dir
->readdir_cache
.clear();
923 for (auto p
= in
->dir
->dentries
.begin();
924 p
!= in
->dir
->dentries
.end();
926 unlink(p
->second
, true, true); // keep dir, keep dentry
928 if (in
->dir
->dentries
.empty())
938 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
940 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
941 Inode
*in
, utime_t from
, MetaSession
*session
,
945 if (dir
->dentries
.count(dname
))
946 dn
= dir
->dentries
[dname
];
948 ldout(cct
, 12) << "insert_dentry_inode '" << dname
<< "' vino " << in
->vino()
949 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
952 if (dn
&& dn
->inode
) {
953 if (dn
->inode
->vino() == in
->vino()) {
955 ldout(cct
, 12) << " had dentry " << dname
956 << " with correct vino " << dn
->inode
->vino()
959 ldout(cct
, 12) << " had dentry " << dname
960 << " with WRONG vino " << dn
->inode
->vino()
962 unlink(dn
, true, true); // keep dir, keep dentry
966 if (!dn
|| !dn
->inode
) {
967 InodeRef
tmp_ref(in
);
969 if (old_dentry
->dir
!= dir
) {
970 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
971 old_diri
->dir_ordered_count
++;
972 clear_dir_complete_and_ordered(old_diri
, false);
974 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
976 Inode
*diri
= dir
->parent_inode
;
977 diri
->dir_ordered_count
++;
978 clear_dir_complete_and_ordered(diri
, false);
979 dn
= link(dir
, dname
, in
, dn
);
982 update_dentry_lease(dn
, dlease
, from
, session
);
986 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
989 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
993 if (dlease
->mask
& CEPH_LOCK_DN
) {
994 if (dttl
> dn
->lease_ttl
) {
995 ldout(cct
, 10) << "got dentry lease on " << dn
->name
996 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
997 dn
->lease_ttl
= dttl
;
998 dn
->lease_mds
= session
->mds_num
;
999 dn
->lease_seq
= dlease
->seq
;
1000 dn
->lease_gen
= session
->cap_gen
;
1003 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1008 * update MDS location cache for a single inode
1010 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1013 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1014 if (dst
->auth
>= 0) {
1015 in
->fragmap
[dst
->frag
] = dst
->auth
;
1017 in
->fragmap
.erase(dst
->frag
);
1019 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1020 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1021 _fragmap_remove_non_leaves(in
);
1025 in
->dir_replicated
= !dst
->dist
.empty(); // FIXME that's just one frag!
1029 if (!st->dirfrag_dist.empty()) { // FIXME
1030 set<int> dist = st->dirfrag_dist.begin()->second;
1031 if (dist.empty() && !in->dir_contacts.empty())
1032 ldout(cct, 9) << "lost dist spec for " << in->ino
1033 << " " << dist << dendl;
1034 if (!dist.empty() && in->dir_contacts.empty())
1035 ldout(cct, 9) << "got dist spec for " << in->ino
1036 << " " << dist << dendl;
1037 in->dir_contacts = dist;
1042 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1044 if (diri
->flags
& I_COMPLETE
) {
1046 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1047 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1049 if (diri
->flags
& I_DIR_ORDERED
) {
1050 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1051 diri
->flags
&= ~I_DIR_ORDERED
;
1055 diri
->dir
->readdir_cache
.clear();
1060 * insert results from readdir or lssnap into the metadata cache.
1062 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1064 MClientReply
*reply
= request
->reply
;
1065 ConnectionRef con
= request
->reply
->get_connection();
1066 uint64_t features
= con
->get_features();
1068 dir_result_t
*dirp
= request
->dirp
;
1071 // the extra buffer list is only set for readdir and lssnap replies
1072 bufferlist::iterator p
= reply
->get_extra_bl().begin();
1075 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1077 diri
= open_snapdir(diri
);
1080 // only open dir if we're actually adding stuff to it!
1081 Dir
*dir
= diri
->open_dir();
1091 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1092 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1094 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1095 unsigned readdir_offset
= dirp
->next_offset
;
1096 string readdir_start
= dirp
->last_name
;
1097 assert(!readdir_start
.empty() || readdir_offset
== 2);
1099 unsigned last_hash
= 0;
1101 if (!readdir_start
.empty()) {
1102 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1103 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1104 /* mds understands offset_hash */
1105 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1109 if (fg
!= dst
.frag
) {
1110 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1114 readdir_start
.clear();
1115 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1119 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1120 << ", hash_order=" << hash_order
1121 << ", readdir_start " << readdir_start
1122 << ", last_hash " << last_hash
1123 << ", next_offset " << readdir_offset
<< dendl
;
1125 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1126 fg
.is_leftmost() && readdir_offset
== 2 &&
1127 !(hash_order
&& last_hash
)) {
1128 dirp
->release_count
= diri
->dir_release_count
;
1129 dirp
->ordered_count
= diri
->dir_ordered_count
;
1130 dirp
->start_shared_gen
= diri
->shared_gen
;
1131 dirp
->cache_index
= 0;
1134 dirp
->buffer_frag
= fg
;
1136 _readdir_drop_dirp_buffer(dirp
);
1137 dirp
->buffer
.reserve(numdn
);
1141 for (unsigned i
=0; i
<numdn
; i
++) {
1143 ::decode(dlease
, p
);
1144 InodeStat
ist(p
, features
);
1146 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1148 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1151 if (diri
->dir
->dentries
.count(dname
)) {
1152 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1153 if (olddn
->inode
!= in
) {
1154 // replace incorrect dentry
1155 unlink(olddn
, true, true); // keep dir, dentry
1156 dn
= link(dir
, dname
, in
, olddn
);
1157 assert(dn
== olddn
);
1165 dn
= link(dir
, dname
, in
, NULL
);
1168 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1170 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1171 if (hash
!= last_hash
)
1174 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1176 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1178 // add to readdir cache
1179 if (dirp
->release_count
== diri
->dir_release_count
&&
1180 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1181 dirp
->start_shared_gen
== diri
->shared_gen
) {
1182 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1184 assert(!dirp
->inode
->is_complete_and_ordered());
1185 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1187 dir
->readdir_cache
.push_back(dn
);
1188 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1189 if (dirp
->inode
->is_complete_and_ordered())
1190 assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1192 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1194 assert(0 == "unexpected readdir buffer idx");
1196 dirp
->cache_index
++;
1198 // add to cached result list
1199 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, in
));
1200 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1204 dirp
->last_name
= dname
;
1206 dirp
->next_offset
= 2;
1208 dirp
->next_offset
= readdir_offset
;
1210 if (dir
->is_empty())
1217 * insert a trace from a MDS reply into the cache.
1219 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1221 MClientReply
*reply
= request
->reply
;
1222 int op
= request
->get_op();
1224 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1225 << " is_target=" << (int)reply
->head
.is_target
1226 << " is_dentry=" << (int)reply
->head
.is_dentry
1229 bufferlist::iterator p
= reply
->get_trace_bl().begin();
1230 if (request
->got_unsafe
) {
1231 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1237 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1239 Dentry
*d
= request
->dentry();
1241 Inode
*diri
= d
->dir
->parent_inode
;
1242 diri
->dir_release_count
++;
1243 clear_dir_complete_and_ordered(diri
, true);
1246 if (d
&& reply
->get_result() == 0) {
1247 if (op
== CEPH_MDS_OP_RENAME
) {
1249 Dentry
*od
= request
->old_dentry();
1250 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1252 unlink(od
, true, true); // keep dir, dentry
1253 } else if (op
== CEPH_MDS_OP_RMDIR
||
1254 op
== CEPH_MDS_OP_UNLINK
) {
1256 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1257 unlink(d
, true, true); // keep dir, dentry
1263 ConnectionRef con
= request
->reply
->get_connection();
1264 uint64_t features
= con
->get_features();
1265 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1268 SnapRealm
*realm
= NULL
;
1269 if (reply
->snapbl
.length())
1270 update_snap_trace(reply
->snapbl
, &realm
);
1272 ldout(cct
, 10) << " hrm "
1273 << " is_target=" << (int)reply
->head
.is_target
1274 << " is_dentry=" << (int)reply
->head
.is_dentry
1283 if (reply
->head
.is_dentry
) {
1284 dirst
.decode(p
, features
);
1287 ::decode(dlease
, p
);
1291 if (reply
->head
.is_target
) {
1292 ist
.decode(p
, features
);
1293 if (cct
->_conf
->client_debug_getattr_caps
) {
1294 unsigned wanted
= 0;
1295 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1296 wanted
= request
->head
.args
.getattr
.mask
;
1297 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1298 wanted
= request
->head
.args
.open
.mask
;
1300 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1301 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1302 assert(0 == "MDS reply does not contain xattrs");
1305 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1310 if (reply
->head
.is_dentry
) {
1311 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1313 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1316 Dir
*dir
= diri
->open_dir();
1317 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1318 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1321 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1322 dn
= diri
->dir
->dentries
[dname
];
1324 diri
->dir_ordered_count
++;
1325 clear_dir_complete_and_ordered(diri
, false);
1326 unlink(dn
, true, true); // keep dir, dentry
1329 if (dlease
.duration_ms
> 0) {
1331 Dir
*dir
= diri
->open_dir();
1332 dn
= link(dir
, dname
, NULL
, NULL
);
1334 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1337 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1338 op
== CEPH_MDS_OP_MKSNAP
) {
1339 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1340 // fake it for snap lookup
1341 vinodeno_t vino
= ist
.vino
;
1342 vino
.snapid
= CEPH_SNAPDIR
;
1343 assert(inode_map
.count(vino
));
1344 diri
= inode_map
[vino
];
1346 string dname
= request
->path
.last_dentry();
1349 dlease
.duration_ms
= 0;
1352 Dir
*dir
= diri
->open_dir();
1353 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1355 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1356 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1358 unlink(dn
, true, true); // keep dir, dentry
1364 if (op
== CEPH_MDS_OP_READDIR
||
1365 op
== CEPH_MDS_OP_LSSNAP
) {
1366 insert_readdir_results(request
, session
, in
);
1367 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1368 // hack: return parent inode instead
1372 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1373 // pin the target inode if its parent dentry is not pinned
1374 request
->set_other_inode(in
);
1379 put_snap_realm(realm
);
1381 request
->target
= in
;
1387 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1389 mds_rank_t mds
= MDS_RANK_NONE
;
1391 bool is_hash
= false;
1397 if (req
->resend_mds
>= 0) {
1398 mds
= req
->resend_mds
;
1399 req
->resend_mds
= -1;
1400 ldout(cct
, 10) << "choose_target_mds resend_mds specified as mds." << mds
<< dendl
;
1404 if (cct
->_conf
->client_use_random_mds
)
1410 ldout(cct
, 20) << "choose_target_mds starting with req->inode " << *in
<< dendl
;
1411 if (req
->path
.depth()) {
1412 hash
= in
->hash_dentry_name(req
->path
[0]);
1413 ldout(cct
, 20) << "choose_target_mds inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1414 << " on " << req
->path
[0]
1415 << " => " << hash
<< dendl
;
1420 in
= de
->inode
.get();
1421 ldout(cct
, 20) << "choose_target_mds starting with req->dentry inode " << *in
<< dendl
;
1423 in
= de
->dir
->parent_inode
;
1424 hash
= in
->hash_dentry_name(de
->name
);
1425 ldout(cct
, 20) << "choose_target_mds dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1426 << " on " << de
->name
1427 << " => " << hash
<< dendl
;
1432 if (in
->snapid
!= CEPH_NOSNAP
) {
1433 ldout(cct
, 10) << "choose_target_mds " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1434 while (in
->snapid
!= CEPH_NOSNAP
) {
1435 if (in
->snapid
== CEPH_SNAPDIR
)
1436 in
= in
->snapdir_parent
.get();
1437 else if (!in
->dn_set
.empty())
1438 /* In most cases there will only be one dentry, so getting it
1439 * will be the correct action. If there are multiple hard links,
1440 * I think the MDS should be able to redirect as needed*/
1441 in
= in
->get_first_parent()->dir
->parent_inode
;
1443 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1450 ldout(cct
, 20) << "choose_target_mds " << *in
<< " is_hash=" << is_hash
1451 << " hash=" << hash
<< dendl
;
1453 if (is_hash
&& S_ISDIR(in
->mode
) && !in
->fragmap
.empty()) {
1454 frag_t fg
= in
->dirfragtree
[hash
];
1455 if (in
->fragmap
.count(fg
)) {
1456 mds
= in
->fragmap
[fg
];
1459 ldout(cct
, 10) << "choose_target_mds from dirfragtree hash" << dendl
;
1464 if (req
->auth_is_best())
1466 if (!cap
&& !in
->caps
.empty())
1467 cap
= in
->caps
.begin()->second
;
1470 mds
= cap
->session
->mds_num
;
1471 ldout(cct
, 10) << "choose_target_mds from caps on inode " << *in
<< dendl
;
1478 mds
= _get_random_up_mds();
1479 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1483 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1488 void Client::connect_mds_targets(mds_rank_t mds
)
1490 ldout(cct
, 10) << "connect_mds_targets for mds." << mds
<< dendl
;
1491 assert(mds_sessions
.count(mds
));
1492 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1493 for (set
<mds_rank_t
>::const_iterator q
= info
.export_targets
.begin();
1494 q
!= info
.export_targets
.end();
1496 if (mds_sessions
.count(*q
) == 0 &&
1497 mdsmap
->is_clientreplay_or_active_or_stopping(*q
)) {
1498 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1499 << " export target mds." << *q
<< dendl
;
1500 _open_mds_session(*q
);
1505 void Client::dump_mds_sessions(Formatter
*f
)
1507 f
->dump_int("id", get_nodeid().v
);
1508 f
->open_array_section("sessions");
1509 for (map
<mds_rank_t
,MetaSession
*>::const_iterator p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ++p
) {
1510 f
->open_object_section("session");
1515 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1517 void Client::dump_mds_requests(Formatter
*f
)
1519 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1520 p
!= mds_requests
.end();
1522 f
->open_object_section("request");
1528 int Client::verify_reply_trace(int r
,
1529 MetaRequest
*request
, MClientReply
*reply
,
1530 InodeRef
*ptarget
, bool *pcreated
,
1531 const UserPerm
& perms
)
1533 // check whether this request actually did the create, and set created flag
1534 bufferlist extra_bl
;
1535 inodeno_t created_ino
;
1536 bool got_created_ino
= false;
1537 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1539 extra_bl
.claim(reply
->get_extra_bl());
1540 if (extra_bl
.length() >= 8) {
1541 // if the extra bufferlist has a buffer, we assume its the created inode
1542 // and that this request to create succeeded in actually creating
1543 // the inode (won the race with other create requests)
1544 ::decode(created_ino
, extra_bl
);
1545 got_created_ino
= true;
1546 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1550 *pcreated
= got_created_ino
;
1552 if (request
->target
) {
1553 *ptarget
= request
->target
;
1554 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1556 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1557 (*ptarget
) = p
->second
;
1558 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1560 // we got a traceless reply, and need to look up what we just
1561 // created. for now, do this by name. someday, do this by the
1562 // ino... which we know! FIXME.
1564 Dentry
*d
= request
->dentry();
1567 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1568 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1569 << " got_ino " << got_created_ino
1570 << " ino " << created_ino
1572 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1575 // if the dentry is not linked, just do our best. see #5021.
1576 assert(0 == "how did this happen? i want logs!");
1579 Inode
*in
= request
->inode();
1580 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1581 << in
->ino
<< dendl
;
1582 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1586 // verify ino returned in reply and trace_dist are the same
1587 if (got_created_ino
&&
1588 created_ino
.val
!= target
->ino
.val
) {
1589 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1593 ptarget
->swap(target
);
1605 * Blocking helper to make an MDS request.
1607 * If the ptarget flag is set, behavior changes slightly: the caller
1608 * expects to get a pointer to the inode we are creating or operating
1609 * on. As a result, we will follow up any traceless mutation reply
1610 * with a getattr or lookup to transparently handle a traceless reply
1611 * from the MDS (as when the MDS restarts and the client has to replay
1614 * @param request the MetaRequest to execute
1615 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1616 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1617 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1618 * @param use_mds [optional] prefer a specific mds (-1 for default)
1619 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1621 int Client::make_request(MetaRequest
*request
,
1622 const UserPerm
& perms
,
1623 InodeRef
*ptarget
, bool *pcreated
,
1629 // assign a unique tid
1630 ceph_tid_t tid
= ++last_tid
;
1631 request
->set_tid(tid
);
1634 request
->op_stamp
= ceph_clock_now();
1637 mds_requests
[tid
] = request
->get();
1638 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1641 request
->set_caller_perms(perms
);
1643 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1644 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1645 request
->set_oldest_client_tid(1);
1647 request
->set_oldest_client_tid(oldest_tid
);
1652 request
->resend_mds
= use_mds
;
1655 if (request
->aborted())
1659 request
->abort(-EBLACKLISTED
);
1665 request
->caller_cond
= &caller_cond
;
1668 Inode
*hash_diri
= NULL
;
1669 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1670 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1671 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1672 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1674 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1675 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1677 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1678 request
->resend_mds
= _get_random_up_mds();
1681 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1682 wait_on_list(waiting_for_mdsmap
);
1688 MetaSession
*session
= NULL
;
1689 if (!have_open_session(mds
)) {
1690 session
= _get_or_open_mds_session(mds
);
1693 if (session
->state
== MetaSession::STATE_OPENING
) {
1694 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1695 wait_on_context_list(session
->waiting_for_open
);
1696 // Abort requests on REJECT from MDS
1697 if (rejected_by_mds
.count(mds
)) {
1698 request
->abort(-EPERM
);
1704 if (!have_open_session(mds
))
1707 session
= mds_sessions
[mds
];
1711 send_request(request
, session
);
1714 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1715 request
->kick
= false;
1716 while (!request
->reply
&& // reply
1717 request
->resend_mds
< 0 && // forward
1719 caller_cond
.Wait(client_lock
);
1720 request
->caller_cond
= NULL
;
1722 // did we get a reply?
1727 if (!request
->reply
) {
1728 assert(request
->aborted());
1729 assert(!request
->got_unsafe
);
1730 r
= request
->get_abort_code();
1731 request
->item
.remove_myself();
1732 unregister_request(request
);
1733 put_request(request
); // ours
1738 MClientReply
*reply
= request
->reply
;
1739 request
->reply
= NULL
;
1740 r
= reply
->get_result();
1742 request
->success
= true;
1744 // kick dispatcher (we've got it!)
1745 assert(request
->dispatch_cond
);
1746 request
->dispatch_cond
->Signal();
1747 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1748 request
->dispatch_cond
= 0;
1750 if (r
>= 0 && ptarget
)
1751 r
= verify_reply_trace(r
, request
, reply
, ptarget
, pcreated
, perms
);
1754 pdirbl
->claim(reply
->get_extra_bl());
1757 utime_t lat
= ceph_clock_now();
1758 lat
-= request
->sent_stamp
;
1759 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1760 logger
->tinc(l_c_lat
, lat
);
1761 logger
->tinc(l_c_reply
, lat
);
1763 put_request(request
);
1769 void Client::unregister_request(MetaRequest
*req
)
1771 mds_requests
.erase(req
->tid
);
1772 if (req
->tid
== oldest_tid
) {
1773 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1775 if (p
== mds_requests
.end()) {
1779 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1780 oldest_tid
= p
->first
;
1789 void Client::put_request(MetaRequest
*request
)
1791 if (request
->_put()) {
1793 if (request
->success
)
1794 op
= request
->get_op();
1796 request
->take_other_inode(&other_in
);
1800 (op
== CEPH_MDS_OP_RMDIR
||
1801 op
== CEPH_MDS_OP_RENAME
||
1802 op
== CEPH_MDS_OP_RMSNAP
)) {
1803 _try_to_trim_inode(other_in
.get(), false);
1808 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1809 mds_rank_t mds
, int drop
,
1810 int unless
, int force
)
1812 ldout(cct
, 20) << "encode_inode_release enter(in:" << *in
<< ", req:" << req
1813 << " mds:" << mds
<< ", drop:" << drop
<< ", unless:" << unless
1814 << ", have:" << ", force:" << force
<< ")" << dendl
;
1816 if (in
->caps
.count(mds
)) {
1817 Cap
*caps
= in
->caps
[mds
];
1818 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1819 if ((drop
& caps
->issued
) &&
1820 !(unless
& caps
->issued
)) {
1821 ldout(cct
, 25) << "Dropping caps. Initial " << ccap_string(caps
->issued
) << dendl
;
1822 caps
->issued
&= ~drop
;
1823 caps
->implemented
&= ~drop
;
1825 ldout(cct
, 25) << "Now have: " << ccap_string(caps
->issued
) << dendl
;
1830 ceph_mds_request_release rel
;
1832 rel
.cap_id
= caps
->cap_id
;
1833 rel
.seq
= caps
->seq
;
1834 rel
.issue_seq
= caps
->issue_seq
;
1835 rel
.mseq
= caps
->mseq
;
1836 rel
.caps
= caps
->implemented
;
1837 rel
.wanted
= caps
->wanted
;
1840 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1843 ldout(cct
, 25) << "encode_inode_release exit(in:" << *in
<< ") released:"
1844 << released
<< dendl
;
1848 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1849 mds_rank_t mds
, int drop
, int unless
)
1851 ldout(cct
, 20) << "encode_dentry_release enter(dn:"
1852 << dn
<< ")" << dendl
;
1855 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
1856 mds
, drop
, unless
, 1);
1857 if (released
&& dn
->lease_mds
== mds
) {
1858 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
1859 MClientRequest::Release
& rel
= req
->cap_releases
.back();
1860 rel
.item
.dname_len
= dn
->name
.length();
1861 rel
.item
.dname_seq
= dn
->lease_seq
;
1862 rel
.dname
= dn
->name
;
1864 ldout(cct
, 25) << "encode_dentry_release exit(dn:"
1865 << dn
<< ")" << dendl
;
1870 * This requires the MClientRequest *request member to be set.
1871 * It will error out horribly without one.
1872 * Additionally, if you set any *drop member, you'd better have
1873 * set the corresponding dentry!
1875 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
1877 ldout(cct
, 20) << "encode_cap_releases enter (req: "
1878 << req
<< ", mds: " << mds
<< ")" << dendl
;
1879 if (req
->inode_drop
&& req
->inode())
1880 encode_inode_release(req
->inode(), req
,
1881 mds
, req
->inode_drop
,
1884 if (req
->old_inode_drop
&& req
->old_inode())
1885 encode_inode_release(req
->old_inode(), req
,
1886 mds
, req
->old_inode_drop
,
1887 req
->old_inode_unless
);
1888 if (req
->other_inode_drop
&& req
->other_inode())
1889 encode_inode_release(req
->other_inode(), req
,
1890 mds
, req
->other_inode_drop
,
1891 req
->other_inode_unless
);
1893 if (req
->dentry_drop
&& req
->dentry())
1894 encode_dentry_release(req
->dentry(), req
,
1895 mds
, req
->dentry_drop
,
1896 req
->dentry_unless
);
1898 if (req
->old_dentry_drop
&& req
->old_dentry())
1899 encode_dentry_release(req
->old_dentry(), req
,
1900 mds
, req
->old_dentry_drop
,
1901 req
->old_dentry_unless
);
1902 ldout(cct
, 25) << "encode_cap_releases exit (req: "
1903 << req
<< ", mds " << mds
<<dendl
;
1906 bool Client::have_open_session(mds_rank_t mds
)
1909 mds_sessions
.count(mds
) &&
1910 (mds_sessions
[mds
]->state
== MetaSession::STATE_OPEN
||
1911 mds_sessions
[mds
]->state
== MetaSession::STATE_STALE
);
1914 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
1916 if (mds_sessions
.count(mds
) == 0)
1918 MetaSession
*s
= mds_sessions
[mds
];
1924 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
1926 if (mds_sessions
.count(mds
))
1927 return mds_sessions
[mds
];
1928 return _open_mds_session(mds
);
1932 * Populate a map of strings with client-identifying metadata,
1933 * such as the hostname. Call this once at initialization.
1935 void Client::populate_metadata(const std::string
&mount_root
)
1941 metadata
["hostname"] = u
.nodename
;
1942 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
1944 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
1947 metadata
["pid"] = stringify(getpid());
1949 // Ceph entity id (the '0' in "client.0")
1950 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
1952 // Our mount position
1953 if (!mount_root
.empty()) {
1954 metadata
["root"] = mount_root
;
1958 metadata
["ceph_version"] = pretty_version_to_str();
1959 metadata
["ceph_sha1"] = git_version_to_str();
1961 // Apply any metadata from the user's configured overrides
1962 std::vector
<std::string
> tokens
;
1963 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
1964 for (const auto &i
: tokens
) {
1965 auto eqpos
= i
.find("=");
1966 // Throw out anything that isn't of the form "<str>=<str>"
1967 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
1968 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
1971 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
1976 * Optionally add or override client metadata fields.
1978 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
1980 Mutex::Locker
l(client_lock
);
1981 assert(initialized
);
1983 if (metadata
.count(k
)) {
1984 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
1985 << "' from '" << metadata
[k
] << "' to '" << v
<< "'" << dendl
;
1991 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
1993 ldout(cct
, 10) << "_open_mds_session mds." << mds
<< dendl
;
1994 assert(mds_sessions
.count(mds
) == 0);
1995 MetaSession
*session
= new MetaSession
;
1996 session
->mds_num
= mds
;
1998 session
->inst
= mdsmap
->get_inst(mds
);
1999 session
->con
= messenger
->get_connection(session
->inst
);
2000 session
->state
= MetaSession::STATE_OPENING
;
2001 session
->mds_state
= MDSMap::STATE_NULL
;
2002 mds_sessions
[mds
] = session
;
2004 // Maybe skip sending a request to open if this MDS daemon
2005 // has previously sent us a REJECT.
2006 if (rejected_by_mds
.count(mds
)) {
2007 if (rejected_by_mds
[mds
] == session
->inst
) {
2008 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " skipping "
2009 "because we were rejected" << dendl
;
2012 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " old inst "
2013 "rejected us, trying with new inst" << dendl
;
2014 rejected_by_mds
.erase(mds
);
2018 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_OPEN
);
2019 m
->client_meta
= metadata
;
2020 session
->con
->send_message(m
);
2024 void Client::_close_mds_session(MetaSession
*s
)
2026 ldout(cct
, 2) << "_close_mds_session mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2027 s
->state
= MetaSession::STATE_CLOSING
;
2028 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2031 void Client::_closed_mds_session(MetaSession
*s
)
2033 s
->state
= MetaSession::STATE_CLOSED
;
2034 s
->con
->mark_down();
2035 signal_context_list(s
->waiting_for_open
);
2036 mount_cond
.Signal();
2037 remove_session_caps(s
);
2038 kick_requests_closed(s
);
2039 mds_sessions
.erase(s
->mds_num
);
2043 void Client::handle_client_session(MClientSession
*m
)
2045 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2046 ldout(cct
, 10) << "handle_client_session " << *m
<< " from mds." << from
<< dendl
;
2048 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2050 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2055 switch (m
->get_op()) {
2056 case CEPH_SESSION_OPEN
:
2057 renew_caps(session
);
2058 session
->state
= MetaSession::STATE_OPEN
;
2060 mount_cond
.Signal();
2062 connect_mds_targets(from
);
2063 signal_context_list(session
->waiting_for_open
);
2066 case CEPH_SESSION_CLOSE
:
2067 _closed_mds_session(session
);
2070 case CEPH_SESSION_RENEWCAPS
:
2071 if (session
->cap_renew_seq
== m
->get_seq()) {
2073 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2074 wake_inode_waiters(session
);
2078 case CEPH_SESSION_STALE
:
2079 renew_caps(session
);
2082 case CEPH_SESSION_RECALL_STATE
:
2083 trim_caps(session
, m
->get_max_caps());
2086 case CEPH_SESSION_FLUSHMSG
:
2087 session
->con
->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2090 case CEPH_SESSION_FORCE_RO
:
2091 force_session_readonly(session
);
2094 case CEPH_SESSION_REJECT
:
2095 rejected_by_mds
[session
->mds_num
] = session
->inst
;
2096 _closed_mds_session(session
);
2107 bool Client::_any_stale_sessions() const
2109 assert(client_lock
.is_locked_by_me());
2111 for (const auto &i
: mds_sessions
) {
2112 if (i
.second
->state
== MetaSession::STATE_STALE
) {
2120 void Client::_kick_stale_sessions()
2122 ldout(cct
, 1) << "kick_stale_sessions" << dendl
;
2124 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2125 p
!= mds_sessions
.end(); ) {
2126 MetaSession
*s
= p
->second
;
2128 if (s
->state
== MetaSession::STATE_STALE
)
2129 _closed_mds_session(s
);
2133 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2134 bool drop_cap_releases
)
2137 mds_rank_t mds
= session
->mds_num
;
2138 ldout(cct
, 10) << "send_request rebuilding request " << request
->get_tid()
2139 << " for mds." << mds
<< dendl
;
2140 MClientRequest
*r
= build_client_request(request
);
2141 if (request
->dentry()) {
2142 r
->set_dentry_wanted();
2144 if (request
->got_unsafe
) {
2145 r
->set_replayed_op();
2146 if (request
->target
)
2147 r
->head
.ino
= request
->target
->ino
;
2149 encode_cap_releases(request
, mds
);
2150 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2151 request
->cap_releases
.clear();
2153 r
->releases
.swap(request
->cap_releases
);
2155 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2156 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2157 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2158 r
->set_osdmap_epoch(o
.get_epoch());
2162 if (request
->mds
== -1) {
2163 request
->sent_stamp
= ceph_clock_now();
2164 ldout(cct
, 20) << "send_request set sent_stamp to " << request
->sent_stamp
<< dendl
;
2168 Inode
*in
= request
->inode();
2169 if (in
&& in
->caps
.count(mds
))
2170 request
->sent_on_mseq
= in
->caps
[mds
]->mseq
;
2172 session
->requests
.push_back(&request
->item
);
2174 ldout(cct
, 10) << "send_request " << *r
<< " to mds." << mds
<< dendl
;
2175 session
->con
->send_message(r
);
2178 MClientRequest
* Client::build_client_request(MetaRequest
*request
)
2180 MClientRequest
*req
= new MClientRequest(request
->get_op());
2181 req
->set_tid(request
->tid
);
2182 req
->set_stamp(request
->op_stamp
);
2183 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2185 // if the filepath's haven't been set, set them!
2186 if (request
->path
.empty()) {
2187 Inode
*in
= request
->inode();
2188 Dentry
*de
= request
->dentry();
2190 in
->make_nosnap_relative_path(request
->path
);
2193 de
->inode
->make_nosnap_relative_path(request
->path
);
2195 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2196 request
->path
.push_dentry(de
->name
);
2198 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2199 << " No path, inode, or appropriately-endowed dentry given!"
2201 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2202 << " No path, inode, or dentry given!"
2205 req
->set_filepath(request
->get_filepath());
2206 req
->set_filepath2(request
->get_filepath2());
2207 req
->set_data(request
->data
);
2208 req
->set_retry_attempt(request
->retry_attempt
++);
2209 req
->head
.num_fwd
= request
->num_fwd
;
2211 int gid_count
= request
->perms
.get_gids(&_gids
);
2212 req
->set_gid_list(gid_count
, _gids
);
2218 void Client::handle_client_request_forward(MClientRequestForward
*fwd
)
2220 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2221 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2226 ceph_tid_t tid
= fwd
->get_tid();
2228 if (mds_requests
.count(tid
) == 0) {
2229 ldout(cct
, 10) << "handle_client_request_forward no pending request on tid " << tid
<< dendl
;
2234 MetaRequest
*request
= mds_requests
[tid
];
2237 // reset retry counter
2238 request
->retry_attempt
= 0;
2240 // request not forwarded, or dest mds has no session.
2242 ldout(cct
, 10) << "handle_client_request tid " << tid
2243 << " fwd " << fwd
->get_num_fwd()
2244 << " to mds." << fwd
->get_dest_mds()
2245 << ", resending to " << fwd
->get_dest_mds()
2249 request
->item
.remove_myself();
2250 request
->num_fwd
= fwd
->get_num_fwd();
2251 request
->resend_mds
= fwd
->get_dest_mds();
2252 request
->caller_cond
->Signal();
2257 bool Client::is_dir_operation(MetaRequest
*req
)
2259 int op
= req
->get_op();
2260 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2261 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2262 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2263 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2268 void Client::handle_client_reply(MClientReply
*reply
)
2270 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2271 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2277 ceph_tid_t tid
= reply
->get_tid();
2278 bool is_safe
= reply
->is_safe();
2280 if (mds_requests
.count(tid
) == 0) {
2281 lderr(cct
) << "handle_client_reply no pending request on tid " << tid
2282 << " safe is:" << is_safe
<< dendl
;
2286 MetaRequest
*request
= mds_requests
.at(tid
);
2288 ldout(cct
, 20) << "handle_client_reply got a reply. Safe:" << is_safe
2289 << " tid " << tid
<< dendl
;
2291 if (request
->got_unsafe
&& !is_safe
) {
2292 //duplicate response
2293 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2294 << mds_num
<< " safe:" << is_safe
<< dendl
;
2299 if (-ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2300 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2301 << " from mds." << request
->mds
<< dendl
;
2302 request
->send_to_auth
= true;
2303 request
->resend_mds
= choose_target_mds(request
);
2304 Inode
*in
= request
->inode();
2305 if (request
->resend_mds
>= 0 &&
2306 request
->resend_mds
== request
->mds
&&
2308 in
->caps
.count(request
->resend_mds
) == 0 ||
2309 request
->sent_on_mseq
== in
->caps
[request
->resend_mds
]->mseq
)) {
2310 // have to return ESTALE
2312 request
->caller_cond
->Signal();
2316 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2319 assert(request
->reply
== NULL
);
2320 request
->reply
= reply
;
2321 insert_trace(request
, session
);
2323 // Handle unsafe reply
2325 request
->got_unsafe
= true;
2326 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2327 if (is_dir_operation(request
)) {
2328 Inode
*dir
= request
->inode();
2330 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2332 if (request
->target
) {
2333 InodeRef
&in
= request
->target
;
2334 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2338 // Only signal the caller once (on the first reply):
2339 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2340 if (!is_safe
|| !request
->got_unsafe
) {
2342 request
->dispatch_cond
= &cond
;
2345 ldout(cct
, 20) << "handle_client_reply signalling caller " << (void*)request
->caller_cond
<< dendl
;
2346 request
->caller_cond
->Signal();
2348 // wake for kick back
2349 while (request
->dispatch_cond
) {
2350 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid " << tid
<< " " << &cond
<< dendl
;
2351 cond
.Wait(client_lock
);
2356 // the filesystem change is committed to disk
2357 // we're done, clean up
2358 if (request
->got_unsafe
) {
2359 request
->unsafe_item
.remove_myself();
2360 request
->unsafe_dir_item
.remove_myself();
2361 request
->unsafe_target_item
.remove_myself();
2362 signal_cond_list(request
->waitfor_safe
);
2364 request
->item
.remove_myself();
2365 unregister_request(request
);
2368 mount_cond
.Signal();
2371 void Client::_handle_full_flag(int64_t pool
)
2373 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2374 << "on " << pool
<< dendl
;
2375 // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
2376 // to do this rather than blocking, because otherwise when we fill up we
2377 // potentially lock caps forever on files with dirty pages, and we need
2378 // to be able to release those caps to the MDS so that it can delete files
2379 // and free up space.
2380 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-ENOSPC
, pool
);
2382 // For all inodes with layouts in this pool and a pending flush write op
2383 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2384 // from ObjectCacher so that it doesn't re-issue the write in response to
2385 // the ENOSPC error.
2386 // Fortunately since we're cancelling everything in a given pool, we don't
2387 // need to know which ops belong to which ObjectSet, we can just blow all
2388 // the un-flushed cached data away and mark any dirty inodes' async_err
2389 // field with -ENOSPC as long as we're sure all the ops we cancelled were
2390 // affecting this pool, and all the objectsets we're purging were also
2392 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2393 i
!= inode_map
.end(); ++i
)
2395 Inode
*inode
= i
->second
;
2396 if (inode
->oset
.dirty_or_tx
2397 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2398 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2399 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2400 objectcacher
->purge_set(&inode
->oset
);
2401 inode
->set_async_err(-ENOSPC
);
2405 if (cancelled_epoch
!= (epoch_t
)-1) {
2406 set_cap_epoch_barrier(cancelled_epoch
);
2410 void Client::handle_osd_map(MOSDMap
*m
)
2412 std::set
<entity_addr_t
> new_blacklists
;
2413 objecter
->consume_blacklist_events(&new_blacklists
);
2415 const auto myaddr
= messenger
->get_myaddr();
2416 if (!blacklisted
&& new_blacklists
.count(myaddr
)) {
2417 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2418 return o
.get_epoch();
2420 lderr(cct
) << "I was blacklisted at osd epoch " << epoch
<< dendl
;
2422 for (std::map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2423 p
!= mds_requests
.end(); ) {
2424 auto req
= p
->second
;
2426 req
->abort(-EBLACKLISTED
);
2427 if (req
->caller_cond
) {
2429 req
->caller_cond
->Signal();
2433 // Progress aborts on any requests that were on this waitlist. Any
2434 // requests that were on a waiting_for_open session waitlist
2435 // will get kicked during close session below.
2436 signal_cond_list(waiting_for_mdsmap
);
2438 // Force-close all sessions: assume this is not abandoning any state
2439 // on the MDS side because the MDS will have seen the blacklist too.
2440 while(!mds_sessions
.empty()) {
2441 auto i
= mds_sessions
.begin();
2442 auto session
= i
->second
;
2443 _closed_mds_session(session
);
2446 // Since we know all our OSD ops will fail, cancel them all preemtively,
2447 // so that on an unhealthy cluster we can umount promptly even if e.g.
2448 // some PGs were inaccessible.
2449 objecter
->op_cancel_writes(-EBLACKLISTED
);
2451 } else if (blacklisted
) {
2452 // Handle case where we were blacklisted but no longer are
2453 blacklisted
= objecter
->with_osdmap([myaddr
](const OSDMap
&o
){
2454 return o
.is_blacklisted(myaddr
);});
2457 if (objecter
->osdmap_full_flag()) {
2458 _handle_full_flag(-1);
2460 // Accumulate local list of full pools so that I can drop
2461 // the objecter lock before re-entering objecter in
2463 std::vector
<int64_t> full_pools
;
2465 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2466 for (const auto& kv
: o
.get_pools()) {
2467 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2468 full_pools
.push_back(kv
.first
);
2473 for (auto p
: full_pools
)
2474 _handle_full_flag(p
);
2476 // Subscribe to subsequent maps to watch for the full flag going
2477 // away. For the global full flag objecter does this for us, but
2478 // it pays no attention to the per-pool full flag so in this branch
2479 // we do it ourselves.
2480 if (!full_pools
.empty()) {
2481 objecter
->maybe_request_map();
2489 // ------------------------
2490 // incoming messages
2493 bool Client::ms_dispatch(Message
*m
)
2495 Mutex::Locker
l(client_lock
);
2497 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2502 switch (m
->get_type()) {
2503 // mounting and mds sessions
2504 case CEPH_MSG_MDS_MAP
:
2505 handle_mds_map(static_cast<MMDSMap
*>(m
));
2507 case CEPH_MSG_FS_MAP
:
2508 handle_fs_map(static_cast<MFSMap
*>(m
));
2510 case CEPH_MSG_FS_MAP_USER
:
2511 handle_fs_map_user(static_cast<MFSMapUser
*>(m
));
2513 case CEPH_MSG_CLIENT_SESSION
:
2514 handle_client_session(static_cast<MClientSession
*>(m
));
2517 case CEPH_MSG_OSD_MAP
:
2518 handle_osd_map(static_cast<MOSDMap
*>(m
));
2522 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2523 handle_client_request_forward(static_cast<MClientRequestForward
*>(m
));
2525 case CEPH_MSG_CLIENT_REPLY
:
2526 handle_client_reply(static_cast<MClientReply
*>(m
));
2529 case CEPH_MSG_CLIENT_SNAP
:
2530 handle_snap(static_cast<MClientSnap
*>(m
));
2532 case CEPH_MSG_CLIENT_CAPS
:
2533 handle_caps(static_cast<MClientCaps
*>(m
));
2535 case CEPH_MSG_CLIENT_LEASE
:
2536 handle_lease(static_cast<MClientLease
*>(m
));
2538 case MSG_COMMAND_REPLY
:
2539 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2540 handle_command_reply(static_cast<MCommandReply
*>(m
));
2545 case CEPH_MSG_CLIENT_QUOTA
:
2546 handle_quota(static_cast<MClientQuota
*>(m
));
2555 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2556 << "+" << inode_map
.size() << dendl
;
2557 long unsigned size
= lru
.lru_get_size() + inode_map
.size();
2559 if (size
< lru
.lru_get_size() + inode_map
.size()) {
2560 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2561 mount_cond
.Signal();
2563 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2564 << "+" << inode_map
.size() << dendl
;
2571 void Client::handle_fs_map(MFSMap
*m
)
2573 fsmap
.reset(new FSMap(m
->get_fsmap()));
2576 signal_cond_list(waiting_for_fsmap
);
2578 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2581 void Client::handle_fs_map_user(MFSMapUser
*m
)
2583 fsmap_user
.reset(new FSMapUser
);
2584 *fsmap_user
= m
->get_fsmap();
2587 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2588 signal_cond_list(waiting_for_fsmap
);
2591 void Client::handle_mds_map(MMDSMap
* m
)
2593 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2594 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch()
2595 << " is identical to or older than our "
2596 << mdsmap
->get_epoch() << dendl
;
2601 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch() << dendl
;
2603 std::unique_ptr
<MDSMap
> oldmap(new MDSMap
);
2604 oldmap
.swap(mdsmap
);
2606 mdsmap
->decode(m
->get_encoded());
2608 // Cancel any commands for missing or laggy GIDs
2609 std::list
<ceph_tid_t
> cancel_ops
;
2610 auto &commands
= command_table
.get_commands();
2611 for (const auto &i
: commands
) {
2612 auto &op
= i
.second
;
2613 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2614 if (mdsmap
->is_dne_gid(op_mds_gid
) || mdsmap
->is_laggy_gid(op_mds_gid
)) {
2615 ldout(cct
, 1) << __func__
<< ": cancelling command op " << i
.first
<< dendl
;
2616 cancel_ops
.push_back(i
.first
);
2618 std::ostringstream ss
;
2619 ss
<< "MDS " << op_mds_gid
<< " went away";
2620 *(op
.outs
) = ss
.str();
2622 op
.con
->mark_down();
2624 op
.on_finish
->complete(-ETIMEDOUT
);
2629 for (std::list
<ceph_tid_t
>::iterator i
= cancel_ops
.begin();
2630 i
!= cancel_ops
.end(); ++i
) {
2631 command_table
.erase(*i
);
2635 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2636 p
!= mds_sessions
.end(); ) {
2637 mds_rank_t mds
= p
->first
;
2638 MetaSession
*session
= p
->second
;
2641 int oldstate
= oldmap
->get_state(mds
);
2642 int newstate
= mdsmap
->get_state(mds
);
2643 if (!mdsmap
->is_up(mds
)) {
2644 session
->con
->mark_down();
2645 } else if (mdsmap
->get_inst(mds
) != session
->inst
) {
2646 session
->con
->mark_down();
2647 session
->inst
= mdsmap
->get_inst(mds
);
2648 // When new MDS starts to take over, notify kernel to trim unused entries
2649 // in its dcache/icache. Hopefully, the kernel will release some unused
2650 // inodes before the new MDS enters reconnect state.
2651 trim_cache_for_reconnect(session
);
2652 } else if (oldstate
== newstate
)
2653 continue; // no change
2655 session
->mds_state
= newstate
;
2656 if (newstate
== MDSMap::STATE_RECONNECT
) {
2657 session
->con
= messenger
->get_connection(session
->inst
);
2658 send_reconnect(session
);
2659 } else if (newstate
>= MDSMap::STATE_ACTIVE
) {
2660 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2661 // kick new requests
2662 kick_requests(session
);
2663 kick_flushing_caps(session
);
2664 signal_context_list(session
->waiting_for_open
);
2665 kick_maxsize_requests(session
);
2666 wake_inode_waiters(session
);
2668 connect_mds_targets(mds
);
2669 } else if (newstate
== MDSMap::STATE_NULL
&&
2670 mds
>= mdsmap
->get_max_mds()) {
2671 _closed_mds_session(session
);
2675 // kick any waiting threads
2676 signal_cond_list(waiting_for_mdsmap
);
2680 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2683 void Client::send_reconnect(MetaSession
*session
)
2685 mds_rank_t mds
= session
->mds_num
;
2686 ldout(cct
, 10) << "send_reconnect to mds." << mds
<< dendl
;
2688 // trim unused caps to reduce MDS's cache rejoin time
2689 trim_cache_for_reconnect(session
);
2691 session
->readonly
= false;
2693 if (session
->release
) {
2694 session
->release
->put();
2695 session
->release
= NULL
;
2698 // reset my cap seq number
2700 //connect to the mds' offload targets
2701 connect_mds_targets(mds
);
2702 //make sure unsafe requests get saved
2703 resend_unsafe_requests(session
);
2705 MClientReconnect
*m
= new MClientReconnect
;
2707 // i have an open session.
2708 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2709 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2710 p
!= inode_map
.end();
2712 Inode
*in
= p
->second
;
2713 if (in
->caps
.count(mds
)) {
2714 ldout(cct
, 10) << " caps on " << p
->first
2715 << " " << ccap_string(in
->caps
[mds
]->issued
)
2716 << " wants " << ccap_string(in
->caps_wanted())
2719 in
->make_long_path(path
);
2720 ldout(cct
, 10) << " path " << path
<< dendl
;
2723 _encode_filelocks(in
, flockbl
);
2725 Cap
*cap
= in
->caps
[mds
];
2726 cap
->seq
= 0; // reset seq.
2727 cap
->issue_seq
= 0; // reset seq.
2728 cap
->mseq
= 0; // reset seq.
2729 cap
->issued
= cap
->implemented
;
2731 snapid_t snap_follows
= 0;
2732 if (!in
->cap_snaps
.empty())
2733 snap_follows
= in
->cap_snaps
.begin()->first
;
2735 m
->add_cap(p
->first
.ino
,
2737 path
.get_ino(), path
.get_path(), // ino
2738 in
->caps_wanted(), // wanted
2739 cap
->issued
, // issued
2744 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2745 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2746 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2747 did_snaprealm
.insert(in
->snaprealm
->ino
);
2752 early_kick_flushing_caps(session
);
2754 session
->con
->send_message(m
);
2756 mount_cond
.Signal();
2760 void Client::kick_requests(MetaSession
*session
)
2762 ldout(cct
, 10) << "kick_requests for mds." << session
->mds_num
<< dendl
;
2763 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2764 p
!= mds_requests
.end();
2766 MetaRequest
*req
= p
->second
;
2767 if (req
->got_unsafe
)
2769 if (req
->aborted()) {
2770 if (req
->caller_cond
) {
2772 req
->caller_cond
->Signal();
2776 if (req
->retry_attempt
> 0)
2777 continue; // new requests only
2778 if (req
->mds
== session
->mds_num
) {
2779 send_request(p
->second
, session
);
2784 void Client::resend_unsafe_requests(MetaSession
*session
)
2786 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
2789 send_request(*iter
, session
);
2791 // also re-send old requests when MDS enters reconnect stage. So that MDS can
2792 // process completed requests in clientreplay stage.
2793 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2794 p
!= mds_requests
.end();
2796 MetaRequest
*req
= p
->second
;
2797 if (req
->got_unsafe
)
2801 if (req
->retry_attempt
== 0)
2802 continue; // old requests only
2803 if (req
->mds
== session
->mds_num
)
2804 send_request(req
, session
, true);
2808 void Client::wait_unsafe_requests()
2810 list
<MetaRequest
*> last_unsafe_reqs
;
2811 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2812 p
!= mds_sessions
.end();
2814 MetaSession
*s
= p
->second
;
2815 if (!s
->unsafe_requests
.empty()) {
2816 MetaRequest
*req
= s
->unsafe_requests
.back();
2818 last_unsafe_reqs
.push_back(req
);
2822 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
2823 p
!= last_unsafe_reqs
.end();
2825 MetaRequest
*req
= *p
;
2826 if (req
->unsafe_item
.is_on_list())
2827 wait_on_list(req
->waitfor_safe
);
2832 void Client::kick_requests_closed(MetaSession
*session
)
2834 ldout(cct
, 10) << "kick_requests_closed for mds." << session
->mds_num
<< dendl
;
2835 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2836 p
!= mds_requests
.end(); ) {
2837 MetaRequest
*req
= p
->second
;
2839 if (req
->mds
== session
->mds_num
) {
2840 if (req
->caller_cond
) {
2842 req
->caller_cond
->Signal();
2844 req
->item
.remove_myself();
2845 if (req
->got_unsafe
) {
2846 lderr(cct
) << "kick_requests_closed removing unsafe request " << req
->get_tid() << dendl
;
2847 req
->unsafe_item
.remove_myself();
2848 req
->unsafe_dir_item
.remove_myself();
2849 req
->unsafe_target_item
.remove_myself();
2850 signal_cond_list(req
->waitfor_safe
);
2851 unregister_request(req
);
2855 assert(session
->requests
.empty());
2856 assert(session
->unsafe_requests
.empty());
2866 void Client::got_mds_push(MetaSession
*s
)
2869 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
2870 if (s
->state
== MetaSession::STATE_CLOSING
) {
2871 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2875 void Client::handle_lease(MClientLease
*m
)
2877 ldout(cct
, 10) << "handle_lease " << *m
<< dendl
;
2879 assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
2881 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
2882 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
2888 got_mds_push(session
);
2890 ceph_seq_t seq
= m
->get_seq();
2893 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
2894 if (inode_map
.count(vino
) == 0) {
2895 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
2898 in
= inode_map
[vino
];
2900 if (m
->get_mask() & CEPH_LOCK_DN
) {
2901 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
2902 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
2905 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
2906 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
2911 m
->get_connection()->send_message(
2913 CEPH_MDS_LEASE_RELEASE
, seq
,
2914 m
->get_mask(), m
->get_ino(), m
->get_first(), m
->get_last(), m
->dname
));
2918 void Client::put_inode(Inode
*in
, int n
)
2920 ldout(cct
, 10) << "put_inode on " << *in
<< dendl
;
2921 int left
= in
->_put(n
);
2924 remove_all_caps(in
);
2926 ldout(cct
, 10) << "put_inode deleting " << *in
<< dendl
;
2927 bool unclean
= objectcacher
->release_set(&in
->oset
);
2929 inode_map
.erase(in
->vino());
2930 if (use_faked_inos())
2931 _release_faked_ino(in
);
2936 while (!root_parents
.empty())
2937 root_parents
.erase(root_parents
.begin());
2944 void Client::close_dir(Dir
*dir
)
2946 Inode
*in
= dir
->parent_inode
;
2947 ldout(cct
, 15) << "close_dir dir " << dir
<< " on " << in
<< dendl
;
2948 assert(dir
->is_empty());
2949 assert(in
->dir
== dir
);
2950 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
2951 if (!in
->dn_set
.empty())
2952 in
->get_first_parent()->put(); // unpin dentry
2956 put_inode(in
); // unpin inode
2960 * Don't call this with in==NULL, use get_or_create for that
2961 * leave dn set to default NULL unless you're trying to add
2962 * a new inode to a pre-created Dentry
2964 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
2967 // create a new Dentry
2973 dir
->dentries
[dn
->name
] = dn
;
2974 lru
.lru_insert_mid(dn
); // mid or top?
2976 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2977 << " dn " << dn
<< " (new dn)" << dendl
;
2979 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2980 << " dn " << dn
<< " (old dn)" << dendl
;
2983 if (in
) { // link to inode
2987 dn
->get(); // dir -> dn pin
2989 dn
->get(); // ll_ref -> dn pin
2992 assert(in
->dn_set
.count(dn
) == 0);
2994 // only one parent for directories!
2995 if (in
->is_dir() && !in
->dn_set
.empty()) {
2996 Dentry
*olddn
= in
->get_first_parent();
2997 assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
2998 Inode
*old_diri
= olddn
->dir
->parent_inode
;
2999 old_diri
->dir_release_count
++;
3000 clear_dir_complete_and_ordered(old_diri
, true);
3001 unlink(olddn
, true, true); // keep dir, dentry
3004 in
->dn_set
.insert(dn
);
3006 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3012 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3016 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3017 << " inode " << dn
->inode
<< dendl
;
3019 // unlink from inode
3023 dn
->put(); // dir -> dn pin
3025 dn
->put(); // ll_ref -> dn pin
3028 assert(in
->dn_set
.count(dn
));
3029 in
->dn_set
.erase(dn
);
3030 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3036 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3039 dn
->dir
->dentries
.erase(dn
->name
);
3040 if (dn
->dir
->is_empty() && !keepdir
)
3051 * For asynchronous flushes, check for errors from the IO and
3052 * update the inode if necessary
3054 class C_Client_FlushComplete
: public Context
{
3059 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3060 void finish(int r
) override
{
3061 assert(client
->client_lock
.is_locked_by_me());
3063 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3064 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3065 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3066 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3067 inode
->set_async_err(r
);
3077 void Client::get_cap_ref(Inode
*in
, int cap
)
3079 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3080 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3081 ldout(cct
, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in
<< dendl
;
3084 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3085 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3086 ldout(cct
, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in
<< dendl
;
3089 in
->get_cap_ref(cap
);
3092 void Client::put_cap_ref(Inode
*in
, int cap
)
3094 int last
= in
->put_cap_ref(cap
);
3097 int drop
= last
& ~in
->caps_issued();
3098 if (in
->snapid
== CEPH_NOSNAP
) {
3099 if ((last
& CEPH_CAP_FILE_WR
) &&
3100 !in
->cap_snaps
.empty() &&
3101 in
->cap_snaps
.rbegin()->second
.writing
) {
3102 ldout(cct
, 10) << "put_cap_ref finishing pending cap_snap on " << *in
<< dendl
;
3103 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3104 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3105 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3107 if (last
& CEPH_CAP_FILE_BUFFER
) {
3108 for (auto &p
: in
->cap_snaps
)
3109 p
.second
.dirty_data
= 0;
3110 signal_cond_list(in
->waitfor_commit
);
3111 ldout(cct
, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3115 if (last
& CEPH_CAP_FILE_CACHE
) {
3116 ldout(cct
, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in
<< dendl
;
3122 put_inode(in
, put_nref
);
3126 int Client::get_caps(Inode
*in
, int need
, int want
, int *phave
, loff_t endoff
)
3128 int r
= check_pool_perm(in
, need
);
3133 int file_wanted
= in
->caps_file_wanted();
3134 if ((file_wanted
& need
) != need
) {
3135 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3136 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3142 int have
= in
->caps_issued(&implemented
);
3144 bool waitfor_caps
= false;
3145 bool waitfor_commit
= false;
3147 if (have
& need
& CEPH_CAP_FILE_WR
) {
3149 (endoff
>= (loff_t
)in
->max_size
||
3150 endoff
> (loff_t
)(in
->size
<< 1)) &&
3151 endoff
> (loff_t
)in
->wanted_max_size
) {
3152 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3153 in
->wanted_max_size
= endoff
;
3157 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3158 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3159 waitfor_caps
= true;
3161 if (!in
->cap_snaps
.empty()) {
3162 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3163 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3164 waitfor_caps
= true;
3166 for (auto &p
: in
->cap_snaps
) {
3167 if (p
.second
.dirty_data
) {
3168 waitfor_commit
= true;
3172 if (waitfor_commit
) {
3173 _flush(in
, new C_Client_FlushComplete(this, in
));
3174 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3179 if (!waitfor_caps
&& !waitfor_commit
) {
3180 if ((have
& need
) == need
) {
3181 int revoking
= implemented
& ~have
;
3182 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3183 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3184 << " revoking " << ccap_string(revoking
)
3186 if ((revoking
& want
) == 0) {
3187 *phave
= need
| (have
& want
);
3188 in
->get_cap_ref(need
);
3192 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3193 waitfor_caps
= true;
3196 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3197 in
->auth_cap
->session
->readonly
)
3200 if (in
->flags
& I_CAP_DROPPED
) {
3201 int mds_wanted
= in
->caps_mds_wanted();
3202 if ((mds_wanted
& need
) != need
) {
3203 int ret
= _renew_caps(in
);
3208 if ((mds_wanted
& file_wanted
) ==
3209 (file_wanted
& (CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_WR
))) {
3210 in
->flags
&= ~I_CAP_DROPPED
;
3215 wait_on_list(in
->waitfor_caps
);
3216 else if (waitfor_commit
)
3217 wait_on_list(in
->waitfor_commit
);
3221 int Client::get_caps_used(Inode
*in
)
3223 unsigned used
= in
->caps_used();
3224 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3225 !objectcacher
->set_is_empty(&in
->oset
))
3226 used
|= CEPH_CAP_FILE_CACHE
;
3230 void Client::cap_delay_requeue(Inode
*in
)
3232 ldout(cct
, 10) << "cap_delay_requeue on " << *in
<< dendl
;
3233 in
->hold_caps_until
= ceph_clock_now();
3234 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3235 delayed_caps
.push_back(&in
->cap_item
);
3238 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3239 bool sync
, int used
, int want
, int retain
,
3240 int flush
, ceph_tid_t flush_tid
)
3242 int held
= cap
->issued
| cap
->implemented
;
3243 int revoking
= cap
->implemented
& ~cap
->issued
;
3244 retain
&= ~revoking
;
3245 int dropping
= cap
->issued
& ~retain
;
3246 int op
= CEPH_CAP_OP_UPDATE
;
3248 ldout(cct
, 10) << "send_cap " << *in
3249 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3250 << (sync
? " sync " : " async ")
3251 << " used " << ccap_string(used
)
3252 << " want " << ccap_string(want
)
3253 << " flush " << ccap_string(flush
)
3254 << " retain " << ccap_string(retain
)
3255 << " held "<< ccap_string(held
)
3256 << " revoking " << ccap_string(revoking
)
3257 << " dropping " << ccap_string(dropping
)
3260 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3261 const int would_have_issued
= cap
->issued
& retain
;
3262 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3264 // - tell the server we think issued is whatever they issued plus whatever we implemented
3265 // - leave what we have implemented in place
3266 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3267 cap
->issued
= cap
->issued
| cap
->implemented
;
3269 // Make an exception for revoking xattr caps: we are injecting
3270 // failure to release other caps, but allow xattr because client
3271 // will block on xattr ops if it can't release these to MDS (#9800)
3272 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3273 cap
->issued
^= xattr_mask
& revoking
;
3274 cap
->implemented
^= xattr_mask
& revoking
;
3276 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3277 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3280 cap
->issued
&= retain
;
3281 cap
->implemented
&= cap
->issued
| used
;
3284 snapid_t follows
= 0;
3287 follows
= in
->snaprealm
->get_snap_context().seq
;
3289 MClientCaps
*m
= new MClientCaps(op
,
3292 cap
->cap_id
, cap
->seq
,
3298 m
->caller_uid
= in
->cap_dirtier_uid
;
3299 m
->caller_gid
= in
->cap_dirtier_gid
;
3301 m
->head
.issue_seq
= cap
->issue_seq
;
3302 m
->set_tid(flush_tid
);
3304 m
->head
.uid
= in
->uid
;
3305 m
->head
.gid
= in
->gid
;
3306 m
->head
.mode
= in
->mode
;
3308 m
->head
.nlink
= in
->nlink
;
3310 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3311 ::encode(in
->xattrs
, m
->xattrbl
);
3312 m
->head
.xattr_version
= in
->xattr_version
;
3316 m
->max_size
= in
->max_size
;
3317 m
->truncate_seq
= in
->truncate_seq
;
3318 m
->truncate_size
= in
->truncate_size
;
3319 m
->mtime
= in
->mtime
;
3320 m
->atime
= in
->atime
;
3321 m
->ctime
= in
->ctime
;
3322 m
->btime
= in
->btime
;
3323 m
->time_warp_seq
= in
->time_warp_seq
;
3324 m
->change_attr
= in
->change_attr
;
3326 m
->flags
|= CLIENT_CAPS_SYNC
;
3328 if (flush
& CEPH_CAP_FILE_WR
) {
3329 m
->inline_version
= in
->inline_version
;
3330 m
->inline_data
= in
->inline_data
;
3333 in
->reported_size
= in
->size
;
3334 m
->set_snap_follows(follows
);
3336 if (cap
== in
->auth_cap
) {
3337 m
->set_max_size(in
->wanted_max_size
);
3338 in
->requested_max_size
= in
->wanted_max_size
;
3339 ldout(cct
, 15) << "auth cap, setting max_size = " << in
->requested_max_size
<< dendl
;
3342 if (!session
->flushing_caps_tids
.empty())
3343 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3345 session
->con
->send_message(m
);
3348 static bool is_max_size_approaching(Inode
*in
)
3350 /* mds will adjust max size according to the reported size */
3351 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3353 if (in
->size
>= in
->max_size
)
3355 /* half of previous max_size increment has been used */
3356 if (in
->max_size
> in
->reported_size
&&
3357 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3365 * Examine currently used and wanted versus held caps. Release, flush or ack
3366 * revoked caps to the MDS as appropriate.
3368 * @param in the inode to check
3369 * @param flags flags to apply to cap check
3371 void Client::check_caps(Inode
*in
, unsigned flags
)
3373 unsigned wanted
= in
->caps_wanted();
3374 unsigned used
= get_caps_used(in
);
3377 if (in
->is_dir() && (in
->flags
& I_COMPLETE
)) {
3378 // we do this here because we don't want to drop to Fs (and then
3379 // drop the Fs if we do a create!) if that alone makes us send lookups
3380 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3381 wanted
|= CEPH_CAP_FILE_EXCL
;
3385 int issued
= in
->caps_issued(&implemented
);
3386 int revoking
= implemented
& ~issued
;
3388 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3391 retain
|= CEPH_CAP_ANY
;
3393 retain
|= CEPH_CAP_ANY_SHARED
;
3396 ldout(cct
, 10) << "check_caps on " << *in
3397 << " wanted " << ccap_string(wanted
)
3398 << " used " << ccap_string(used
)
3399 << " issued " << ccap_string(issued
)
3400 << " revoking " << ccap_string(revoking
)
3401 << " flags=" << flags
3404 if (in
->snapid
!= CEPH_NOSNAP
)
3405 return; //snap caps last forever, can't write
3407 if (in
->caps
.empty())
3408 return; // guard if at end of func
3410 if ((revoking
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) &&
3411 (used
& CEPH_CAP_FILE_CACHE
) && !(used
& CEPH_CAP_FILE_BUFFER
))
3414 if (!in
->cap_snaps
.empty())
3417 if (flags
& CHECK_CAPS_NODELAY
)
3418 in
->hold_caps_until
= utime_t();
3420 cap_delay_requeue(in
);
3422 utime_t now
= ceph_clock_now();
3424 map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin();
3425 while (it
!= in
->caps
.end()) {
3426 mds_rank_t mds
= it
->first
;
3427 Cap
*cap
= it
->second
;
3430 MetaSession
*session
= mds_sessions
[mds
];
3434 if (in
->auth_cap
&& cap
!= in
->auth_cap
)
3435 cap_used
&= ~in
->auth_cap
->issued
;
3437 revoking
= cap
->implemented
& ~cap
->issued
;
3439 ldout(cct
, 10) << " cap mds." << mds
3440 << " issued " << ccap_string(cap
->issued
)
3441 << " implemented " << ccap_string(cap
->implemented
)
3442 << " revoking " << ccap_string(revoking
) << dendl
;
3444 if (in
->wanted_max_size
> in
->max_size
&&
3445 in
->wanted_max_size
> in
->requested_max_size
&&
3446 cap
== in
->auth_cap
)
3449 /* approaching file_max? */
3450 if ((cap
->issued
& CEPH_CAP_FILE_WR
) &&
3451 cap
== in
->auth_cap
&&
3452 is_max_size_approaching(in
)) {
3453 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3454 << ", reported " << in
->reported_size
<< dendl
;
3458 /* completed revocation? */
3459 if (revoking
&& (revoking
& cap_used
) == 0) {
3460 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
->implemented
& ~cap
->issued
) << dendl
;
3464 /* want more caps from mds? */
3465 if (wanted
& ~(cap
->wanted
| cap
->issued
))
3468 if (!revoking
&& unmounting
&& (cap_used
== 0))
3471 if (wanted
== cap
->wanted
&& // mds knows what we want.
3472 ((cap
->issued
& ~retain
) == 0) &&// and we don't have anything we wouldn't like
3473 !in
->dirty_caps
) // and we have no dirty caps
3476 if (now
< in
->hold_caps_until
) {
3477 ldout(cct
, 10) << "delaying cap release" << dendl
;
3482 // re-send old cap/snapcap flushes first.
3483 if (session
->mds_state
>= MDSMap::STATE_RECONNECT
&&
3484 session
->mds_state
< MDSMap::STATE_ACTIVE
&&
3485 session
->early_flushing_caps
.count(in
) == 0) {
3486 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3487 << " to mds." << session
->mds_num
<< dendl
;
3488 session
->early_flushing_caps
.insert(in
);
3489 if (in
->cap_snaps
.size())
3490 flush_snaps(in
, true);
3491 if (in
->flushing_caps
)
3492 flush_caps(in
, session
, flags
& CHECK_CAPS_SYNCHRONOUS
);
3496 ceph_tid_t flush_tid
;
3497 if (in
->auth_cap
== cap
&& in
->dirty_caps
) {
3498 flushing
= mark_caps_flushing(in
, &flush_tid
);
3504 send_cap(in
, session
, cap
, flags
& CHECK_CAPS_SYNCHRONOUS
, cap_used
, wanted
,
3505 retain
, flushing
, flush_tid
);
3510 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3512 int used
= get_caps_used(in
);
3513 int dirty
= in
->caps_dirty();
3514 ldout(cct
, 10) << "queue_cap_snap " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3516 if (in
->cap_snaps
.size() &&
3517 in
->cap_snaps
.rbegin()->second
.writing
) {
3518 ldout(cct
, 10) << "queue_cap_snap already have pending cap_snap on " << *in
<< dendl
;
3520 } else if (in
->caps_dirty() ||
3521 (used
& CEPH_CAP_FILE_WR
) ||
3522 (dirty
& CEPH_CAP_ANY_WR
)) {
3523 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3524 assert(capsnapem
.second
== true); /* element inserted */
3525 CapSnap
&capsnap
= capsnapem
.first
->second
;
3526 capsnap
.context
= old_snapc
;
3527 capsnap
.issued
= in
->caps_issued();
3528 capsnap
.dirty
= in
->caps_dirty();
3530 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3532 capsnap
.uid
= in
->uid
;
3533 capsnap
.gid
= in
->gid
;
3534 capsnap
.mode
= in
->mode
;
3535 capsnap
.btime
= in
->btime
;
3536 capsnap
.xattrs
= in
->xattrs
;
3537 capsnap
.xattr_version
= in
->xattr_version
;
3539 if (used
& CEPH_CAP_FILE_WR
) {
3540 ldout(cct
, 10) << "queue_cap_snap WR used on " << *in
<< dendl
;
3541 capsnap
.writing
= 1;
3543 finish_cap_snap(in
, capsnap
, used
);
3546 ldout(cct
, 10) << "queue_cap_snap not dirty|writing on " << *in
<< dendl
;
3550 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3552 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3553 capsnap
.size
= in
->size
;
3554 capsnap
.mtime
= in
->mtime
;
3555 capsnap
.atime
= in
->atime
;
3556 capsnap
.ctime
= in
->ctime
;
3557 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3558 capsnap
.change_attr
= in
->change_attr
;
3560 capsnap
.dirty
|= in
->caps_dirty();
3562 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3563 capsnap
.inline_data
= in
->inline_data
;
3564 capsnap
.inline_version
= in
->inline_version
;
3567 if (used
& CEPH_CAP_FILE_BUFFER
) {
3568 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3569 << " WRBUFFER, delaying" << dendl
;
3571 capsnap
.dirty_data
= 0;
3576 void Client::_flushed_cap_snap(Inode
*in
, snapid_t seq
)
3578 ldout(cct
, 10) << "_flushed_cap_snap seq " << seq
<< " on " << *in
<< dendl
;
3579 in
->cap_snaps
.at(seq
).dirty_data
= 0;
3583 void Client::flush_snaps(Inode
*in
, bool all_again
)
3585 ldout(cct
, 10) << "flush_snaps on " << *in
<< " all_again " << all_again
<< dendl
;
3586 assert(in
->cap_snaps
.size());
3589 assert(in
->auth_cap
);
3590 MetaSession
*session
= in
->auth_cap
->session
;
3591 int mseq
= in
->auth_cap
->mseq
;
3593 for (auto &p
: in
->cap_snaps
) {
3594 CapSnap
&capsnap
= p
.second
;
3596 // only flush once per session
3597 if (capsnap
.flush_tid
> 0)
3601 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3602 << " follows " << p
.first
3603 << " size " << capsnap
.size
3604 << " mtime " << capsnap
.mtime
3605 << " dirty_data=" << capsnap
.dirty_data
3606 << " writing=" << capsnap
.writing
3607 << " on " << *in
<< dendl
;
3608 if (capsnap
.dirty_data
|| capsnap
.writing
)
3611 if (capsnap
.flush_tid
== 0) {
3612 capsnap
.flush_tid
= ++last_flush_tid
;
3613 if (!in
->flushing_cap_item
.is_on_list())
3614 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3615 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3618 MClientCaps
*m
= new MClientCaps(CEPH_CAP_OP_FLUSHSNAP
, in
->ino
, in
->snaprealm
->ino
, 0, mseq
,
3621 m
->caller_uid
= user_id
;
3623 m
->caller_gid
= group_id
;
3625 m
->set_client_tid(capsnap
.flush_tid
);
3626 m
->head
.snap_follows
= p
.first
;
3628 m
->head
.caps
= capsnap
.issued
;
3629 m
->head
.dirty
= capsnap
.dirty
;
3631 m
->head
.uid
= capsnap
.uid
;
3632 m
->head
.gid
= capsnap
.gid
;
3633 m
->head
.mode
= capsnap
.mode
;
3634 m
->btime
= capsnap
.btime
;
3636 m
->size
= capsnap
.size
;
3638 m
->head
.xattr_version
= capsnap
.xattr_version
;
3639 ::encode(capsnap
.xattrs
, m
->xattrbl
);
3641 m
->ctime
= capsnap
.ctime
;
3642 m
->btime
= capsnap
.btime
;
3643 m
->mtime
= capsnap
.mtime
;
3644 m
->atime
= capsnap
.atime
;
3645 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3646 m
->change_attr
= capsnap
.change_attr
;
3648 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3649 m
->inline_version
= in
->inline_version
;
3650 m
->inline_data
= in
->inline_data
;
3653 assert(!session
->flushing_caps_tids
.empty());
3654 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3656 session
->con
->send_message(m
);
3662 void Client::wait_on_list(list
<Cond
*>& ls
)
3665 ls
.push_back(&cond
);
3666 cond
.Wait(client_lock
);
3670 void Client::signal_cond_list(list
<Cond
*>& ls
)
3672 for (list
<Cond
*>::iterator it
= ls
.begin(); it
!= ls
.end(); ++it
)
3676 void Client::wait_on_context_list(list
<Context
*>& ls
)
3681 ls
.push_back(new C_Cond(&cond
, &done
, &r
));
3683 cond
.Wait(client_lock
);
3686 void Client::signal_context_list(list
<Context
*>& ls
)
3688 while (!ls
.empty()) {
3689 ls
.front()->complete(0);
3694 void Client::wake_inode_waiters(MetaSession
*s
)
3696 xlist
<Cap
*>::iterator iter
= s
->caps
.begin();
3697 while (!iter
.end()){
3698 signal_cond_list((*iter
)->inode
->waitfor_caps
);
3704 // flush dirty data (from objectcache)
3706 class C_Client_CacheInvalidate
: public Context
{
3710 int64_t offset
, length
;
3712 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
3713 client(c
), offset(off
), length(len
) {
3714 if (client
->use_faked_inos())
3715 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
3719 void finish(int r
) override
{
3720 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
3721 assert(!client
->client_lock
.is_locked_by_me());
3722 client
->_async_invalidate(ino
, offset
, length
);
3726 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
3730 ldout(cct
, 10) << "_async_invalidate " << ino
<< " " << off
<< "~" << len
<< dendl
;
3731 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
3734 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
3736 if (ino_invalidate_cb
)
3737 // we queue the invalidate, which calls the callback and decrements the ref
3738 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
3741 void Client::_invalidate_inode_cache(Inode
*in
)
3743 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< dendl
;
3745 // invalidate our userspace inode cache
3746 if (cct
->_conf
->client_oc
)
3747 objectcacher
->release_set(&in
->oset
);
3749 _schedule_invalidate_callback(in
, 0, 0);
3752 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
3754 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< " " << off
<< "~" << len
<< dendl
;
3756 // invalidate our userspace inode cache
3757 if (cct
->_conf
->client_oc
) {
3758 vector
<ObjectExtent
> ls
;
3759 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
3760 objectcacher
->discard_set(&in
->oset
, ls
);
3763 _schedule_invalidate_callback(in
, off
, len
);
3766 bool Client::_release(Inode
*in
)
3768 ldout(cct
, 20) << "_release " << *in
<< dendl
;
3769 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3770 _invalidate_inode_cache(in
);
3776 bool Client::_flush(Inode
*in
, Context
*onfinish
)
3778 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
3780 if (!in
->oset
.dirty_or_tx
) {
3781 ldout(cct
, 10) << " nothing to flush" << dendl
;
3782 onfinish
->complete(0);
3786 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
3787 ldout(cct
, 1) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
3788 objectcacher
->purge_set(&in
->oset
);
3790 onfinish
->complete(-ENOSPC
);
3795 return objectcacher
->flush_set(&in
->oset
, onfinish
);
3798 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
3800 assert(client_lock
.is_locked());
3801 if (!in
->oset
.dirty_or_tx
) {
3802 ldout(cct
, 10) << " nothing to flush" << dendl
;
3806 Mutex
flock("Client::_flush_range flock");
3809 Context
*onflush
= new C_SafeCond(&flock
, &cond
, &safe
);
3810 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
3811 offset
, size
, onflush
);
3814 client_lock
.Unlock();
3823 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
3825 // Mutex::Locker l(client_lock);
3826 assert(client_lock
.is_locked()); // will be called via dispatch() -> objecter -> ...
3827 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
3832 void Client::_flushed(Inode
*in
)
3834 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
3836 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
3841 // checks common to add_update_cap, handle_cap_grant
3842 void Client::check_cap_issue(Inode
*in
, Cap
*cap
, unsigned issued
)
3844 unsigned had
= in
->caps_issued();
3846 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
3847 !(had
& CEPH_CAP_FILE_CACHE
))
3850 if ((issued
& CEPH_CAP_FILE_SHARED
) &&
3851 !(had
& CEPH_CAP_FILE_SHARED
)) {
3855 clear_dir_complete_and_ordered(in
, true);
3859 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
3860 unsigned issued
, unsigned seq
, unsigned mseq
, inodeno_t realm
,
3861 int flags
, const UserPerm
& cap_perms
)
3864 mds_rank_t mds
= mds_session
->mds_num
;
3865 if (in
->caps
.count(mds
)) {
3866 cap
= in
->caps
[mds
];
3869 * auth mds of the inode changed. we received the cap export
3870 * message, but still haven't received the cap import message.
3871 * handle_cap_export() updated the new auth MDS' cap.
3873 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
3874 * a message that was send before the cap import message. So
3875 * don't remove caps.
3877 if (ceph_seq_cmp(seq
, cap
->seq
) <= 0) {
3878 assert(cap
== in
->auth_cap
);
3879 assert(cap
->cap_id
== cap_id
);
3882 issued
|= cap
->issued
;
3883 flags
|= CEPH_CAP_FLAG_AUTH
;
3886 mds_session
->num_caps
++;
3887 if (!in
->is_any_caps()) {
3888 assert(in
->snaprealm
== 0);
3889 in
->snaprealm
= get_snap_realm(realm
);
3890 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
3891 ldout(cct
, 15) << "add_update_cap first one, opened snaprealm " << in
->snaprealm
<< dendl
;
3893 in
->caps
[mds
] = cap
= new Cap
;
3895 mds_session
->caps
.push_back(&cap
->cap_item
);
3896 cap
->session
= mds_session
;
3898 cap
->gen
= mds_session
->cap_gen
;
3899 cap_list
.push_back(&in
->cap_item
);
3902 check_cap_issue(in
, cap
, issued
);
3904 if (flags
& CEPH_CAP_FLAG_AUTH
) {
3905 if (in
->auth_cap
!= cap
&&
3906 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
3907 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
3908 ldout(cct
, 10) << "add_update_cap changing auth cap: "
3909 << "add myself to new auth MDS' flushing caps list" << dendl
;
3910 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
3916 unsigned old_caps
= cap
->issued
;
3917 cap
->cap_id
= cap_id
;
3918 cap
->issued
|= issued
;
3919 cap
->implemented
|= issued
;
3921 cap
->issue_seq
= seq
;
3923 cap
->latest_perms
= cap_perms
;
3924 ldout(cct
, 10) << "add_update_cap issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
->issued
)
3925 << " from mds." << mds
3929 if ((issued
& ~old_caps
) && in
->auth_cap
== cap
) {
3930 // non-auth MDS is revoking the newly grant caps ?
3931 for (map
<mds_rank_t
,Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
3932 if (it
->second
== cap
)
3934 if (it
->second
->implemented
& ~it
->second
->issued
& issued
) {
3935 check_caps(in
, CHECK_CAPS_NODELAY
);
3941 if (issued
& ~old_caps
)
3942 signal_cond_list(in
->waitfor_caps
);
3945 void Client::remove_cap(Cap
*cap
, bool queue_release
)
3947 Inode
*in
= cap
->inode
;
3948 MetaSession
*session
= cap
->session
;
3949 mds_rank_t mds
= cap
->session
->mds_num
;
3951 ldout(cct
, 10) << "remove_cap mds." << mds
<< " on " << *in
<< dendl
;
3953 if (queue_release
) {
3954 session
->enqueue_cap_release(
3962 if (in
->auth_cap
== cap
) {
3963 if (in
->flushing_cap_item
.is_on_list()) {
3964 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
3965 in
->flushing_cap_item
.remove_myself();
3967 in
->auth_cap
= NULL
;
3969 assert(in
->caps
.count(mds
));
3970 in
->caps
.erase(mds
);
3972 cap
->cap_item
.remove_myself();
3976 if (!in
->is_any_caps()) {
3977 ldout(cct
, 15) << "remove_cap last one, closing snaprealm " << in
->snaprealm
<< dendl
;
3978 in
->snaprealm_item
.remove_myself();
3979 put_snap_realm(in
->snaprealm
);
3984 void Client::remove_all_caps(Inode
*in
)
3986 while (!in
->caps
.empty())
3987 remove_cap(in
->caps
.begin()->second
, true);
3990 void Client::remove_session_caps(MetaSession
*s
)
3992 ldout(cct
, 10) << "remove_session_caps mds." << s
->mds_num
<< dendl
;
3994 while (s
->caps
.size()) {
3995 Cap
*cap
= *s
->caps
.begin();
3996 Inode
*in
= cap
->inode
;
3997 bool dirty_caps
= false, cap_snaps
= false;
3998 if (in
->auth_cap
== cap
) {
3999 cap_snaps
= !in
->cap_snaps
.empty();
4000 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4001 in
->wanted_max_size
= 0;
4002 in
->requested_max_size
= 0;
4003 in
->flags
|= I_CAP_DROPPED
;
4005 remove_cap(cap
, false);
4006 signal_cond_list(in
->waitfor_caps
);
4008 InodeRef
tmp_ref(in
);
4009 in
->cap_snaps
.clear();
4012 lderr(cct
) << "remove_session_caps still has dirty|flushing caps on " << *in
<< dendl
;
4013 if (in
->flushing_caps
) {
4014 num_flushing_caps
--;
4015 in
->flushing_cap_tids
.clear();
4017 in
->flushing_caps
= 0;
4022 s
->flushing_caps_tids
.clear();
4026 class C_Client_Remount
: public Context
{
4030 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4031 void finish(int r
) override
{
4033 r
= client
->remount_cb(client
->callback_handle
);
4035 client_t whoami
= client
->get_nodeid();
4036 lderr(client
->cct
) << "tried to remount (to trim kernel dentries) and got error "
4038 if (client
->require_remount
&& !client
->unmounting
) {
4039 assert(0 == "failed to remount for kernel dentry trimming");
4045 void Client::_invalidate_kernel_dcache()
4049 if (can_invalidate_dentries
&& dentry_invalidate_cb
&& root
->dir
) {
4050 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4051 p
!= root
->dir
->dentries
.end();
4053 if (p
->second
->inode
)
4054 _schedule_invalidate_dentry_callback(p
->second
, false);
4056 } else if (remount_cb
) {
4058 // when remounting a file system, linux kernel trims all unused dentries in the fs
4059 remount_finisher
.queue(new C_Client_Remount(this));
4063 void Client::trim_caps(MetaSession
*s
, int max
)
4065 mds_rank_t mds
= s
->mds_num
;
4066 int caps_size
= s
->caps
.size();
4067 ldout(cct
, 10) << "trim_caps mds." << mds
<< " max " << max
4068 << " caps " << caps_size
<< dendl
;
4071 xlist
<Cap
*>::iterator p
= s
->caps
.begin();
4072 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4074 Inode
*in
= cap
->inode
;
4076 // Increment p early because it will be invalidated if cap
4077 // is deleted inside remove_cap
4080 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4081 int mine
= cap
->issued
| cap
->implemented
;
4082 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4083 // disposable non-auth cap
4084 if (!(get_caps_used(in
) & ~oissued
& mine
)) {
4085 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4086 remove_cap(cap
, true);
4090 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4092 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4093 InodeRef
tmp_ref(in
);
4094 while (q
!= in
->dn_set
.end()) {
4096 if (dn
->lru_is_expireable()) {
4097 if (can_invalidate_dentries
&&
4098 dn
->dir
->parent_inode
->ino
== MDS_INO_ROOT
) {
4099 // Only issue one of these per DN for inodes in root: handle
4100 // others more efficiently by calling for root-child DNs at
4101 // the end of this function.
4102 _schedule_invalidate_dentry_callback(dn
, true);
4106 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4110 if (all
&& in
->ino
!= MDS_INO_ROOT
) {
4111 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4117 if (s
->caps
.size() > max
)
4118 _invalidate_kernel_dcache();
4121 void Client::force_session_readonly(MetaSession
*s
)
4124 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4125 Inode
*in
= (*p
)->inode
;
4126 if (in
->caps_wanted() & CEPH_CAP_FILE_WR
)
4127 signal_cond_list(in
->waitfor_caps
);
4131 void Client::mark_caps_dirty(Inode
*in
, int caps
)
4133 ldout(cct
, 10) << "mark_caps_dirty " << *in
<< " " << ccap_string(in
->dirty_caps
) << " -> "
4134 << ccap_string(in
->dirty_caps
| caps
) << dendl
;
4135 if (caps
&& !in
->caps_dirty())
4137 in
->dirty_caps
|= caps
;
4140 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4142 MetaSession
*session
= in
->auth_cap
->session
;
4144 int flushing
= in
->dirty_caps
;
4147 ceph_tid_t flush_tid
= ++last_flush_tid
;
4148 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4150 if (!in
->flushing_caps
) {
4151 ldout(cct
, 10) << "mark_caps_flushing " << ccap_string(flushing
) << " " << *in
<< dendl
;
4152 num_flushing_caps
++;
4154 ldout(cct
, 10) << "mark_caps_flushing (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4157 in
->flushing_caps
|= flushing
;
4160 if (!in
->flushing_cap_item
.is_on_list())
4161 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4162 session
->flushing_caps_tids
.insert(flush_tid
);
4168 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4170 for (auto &p
: in
->cap_snaps
) {
4171 CapSnap
&capsnap
= p
.second
;
4172 if (capsnap
.flush_tid
> 0) {
4173 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4174 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4177 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4178 it
!= in
->flushing_cap_tids
.end();
4180 old_s
->flushing_caps_tids
.erase(it
->first
);
4181 new_s
->flushing_caps_tids
.insert(it
->first
);
4183 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4187 * Flush all caps back to the MDS. Because the callers generally wait on the
4188 * result of this function (syncfs and umount cases), we set
4189 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4191 void Client::flush_caps_sync()
4193 ldout(cct
, 10) << __func__
<< dendl
;
4194 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
4196 unsigned flags
= CHECK_CAPS_NODELAY
;
4200 delayed_caps
.pop_front();
4201 if (p
.end() && cap_list
.empty())
4202 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4203 check_caps(in
, flags
);
4207 p
= cap_list
.begin();
4209 unsigned flags
= CHECK_CAPS_NODELAY
;
4214 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4215 check_caps(in
, flags
);
4219 void Client::flush_caps(Inode
*in
, MetaSession
*session
, bool sync
)
4221 ldout(cct
, 10) << "flush_caps " << in
<< " mds." << session
->mds_num
<< dendl
;
4222 Cap
*cap
= in
->auth_cap
;
4223 assert(cap
->session
== session
);
4225 for (map
<ceph_tid_t
,int>::iterator p
= in
->flushing_cap_tids
.begin();
4226 p
!= in
->flushing_cap_tids
.end();
4228 bool req_sync
= false;
4230 /* If this is a synchronous request, then flush the journal on last one */
4231 if (sync
&& (p
->first
== in
->flushing_cap_tids
.rbegin()->first
))
4234 send_cap(in
, session
, cap
, req_sync
,
4235 (get_caps_used(in
) | in
->caps_dirty()),
4236 in
->caps_wanted(), (cap
->issued
| cap
->implemented
),
4237 p
->second
, p
->first
);
4241 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4243 while (in
->flushing_caps
) {
4244 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4245 assert(it
!= in
->flushing_cap_tids
.end());
4246 if (it
->first
> want
)
4248 ldout(cct
, 10) << "wait_sync_caps on " << *in
<< " flushing "
4249 << ccap_string(it
->second
) << " want " << want
4250 << " last " << it
->first
<< dendl
;
4251 wait_on_list(in
->waitfor_caps
);
4255 void Client::wait_sync_caps(ceph_tid_t want
)
4258 ldout(cct
, 10) << "wait_sync_caps want " << want
<< " (last is " << last_flush_tid
<< ", "
4259 << num_flushing_caps
<< " total flushing)" << dendl
;
4260 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
4261 p
!= mds_sessions
.end();
4263 MetaSession
*s
= p
->second
;
4264 if (s
->flushing_caps_tids
.empty())
4266 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4267 if (oldest_tid
<= want
) {
4268 ldout(cct
, 10) << " waiting on mds." << p
->first
<< " tid " << oldest_tid
4269 << " (want " << want
<< ")" << dendl
;
4270 sync_cond
.Wait(client_lock
);
4276 void Client::kick_flushing_caps(MetaSession
*session
)
4278 mds_rank_t mds
= session
->mds_num
;
4279 ldout(cct
, 10) << "kick_flushing_caps mds." << mds
<< dendl
;
4281 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4283 if (session
->early_flushing_caps
.count(in
))
4285 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4286 if (in
->cap_snaps
.size())
4287 flush_snaps(in
, true);
4288 if (in
->flushing_caps
)
4289 flush_caps(in
, session
);
4292 session
->early_flushing_caps
.clear();
4295 void Client::early_kick_flushing_caps(MetaSession
*session
)
4297 session
->early_flushing_caps
.clear();
4299 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4301 assert(in
->auth_cap
);
4303 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4304 // stage. This guarantees that MDS processes the cap flush message before issuing
4305 // the flushing caps to other client.
4306 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
)
4309 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4310 << " to mds." << session
->mds_num
<< dendl
;
4312 session
->early_flushing_caps
.insert(in
);
4314 if (in
->cap_snaps
.size())
4315 flush_snaps(in
, true);
4316 if (in
->flushing_caps
)
4317 flush_caps(in
, session
);
4322 void Client::kick_maxsize_requests(MetaSession
*session
)
4324 xlist
<Cap
*>::iterator iter
= session
->caps
.begin();
4325 while (!iter
.end()){
4326 (*iter
)->inode
->requested_max_size
= 0;
4327 (*iter
)->inode
->wanted_max_size
= 0;
4328 signal_cond_list((*iter
)->inode
->waitfor_caps
);
4333 void SnapRealm::build_snap_context()
4335 set
<snapid_t
> snaps
;
4336 snapid_t max_seq
= seq
;
4338 // start with prior_parents?
4339 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4340 snaps
.insert(prior_parent_snaps
[i
]);
4342 // current parent's snaps
4344 const SnapContext
& psnapc
= pparent
->get_snap_context();
4345 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4346 if (psnapc
.snaps
[i
] >= parent_since
)
4347 snaps
.insert(psnapc
.snaps
[i
]);
4348 if (psnapc
.seq
> max_seq
)
4349 max_seq
= psnapc
.seq
;
4353 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4354 snaps
.insert(my_snaps
[i
]);
4357 cached_snap_context
.seq
= max_seq
;
4358 cached_snap_context
.snaps
.resize(0);
4359 cached_snap_context
.snaps
.reserve(snaps
.size());
4360 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4361 cached_snap_context
.snaps
.push_back(*p
);
4364 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4369 while (!q
.empty()) {
4373 ldout(cct
, 10) << "invalidate_snaprealm_and_children " << *realm
<< dendl
;
4374 realm
->invalidate_cache();
4376 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4377 p
!= realm
->pchildren
.end();
4383 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4385 SnapRealm
*realm
= snap_realms
[r
];
4387 snap_realms
[r
] = realm
= new SnapRealm(r
);
4388 ldout(cct
, 20) << "get_snap_realm " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4393 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4395 if (snap_realms
.count(r
) == 0) {
4396 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " fail" << dendl
;
4399 SnapRealm
*realm
= snap_realms
[r
];
4400 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4405 void Client::put_snap_realm(SnapRealm
*realm
)
4407 ldout(cct
, 20) << "put_snap_realm " << realm
->ino
<< " " << realm
4408 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4409 if (--realm
->nref
== 0) {
4410 snap_realms
.erase(realm
->ino
);
4411 if (realm
->pparent
) {
4412 realm
->pparent
->pchildren
.erase(realm
);
4413 put_snap_realm(realm
->pparent
);
4419 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4421 if (realm
->parent
!= parent
) {
4422 ldout(cct
, 10) << "adjust_realm_parent " << *realm
4423 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4424 realm
->parent
= parent
;
4425 if (realm
->pparent
) {
4426 realm
->pparent
->pchildren
.erase(realm
);
4427 put_snap_realm(realm
->pparent
);
4429 realm
->pparent
= get_snap_realm(parent
);
4430 realm
->pparent
->pchildren
.insert(realm
);
4436 static bool has_new_snaps(const SnapContext
& old_snapc
,
4437 const SnapContext
& new_snapc
)
4439 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4443 void Client::update_snap_trace(bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4445 SnapRealm
*first_realm
= NULL
;
4446 ldout(cct
, 10) << "update_snap_trace len " << bl
.length() << dendl
;
4448 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4450 bufferlist::iterator p
= bl
.begin();
4454 SnapRealm
*realm
= get_snap_realm(info
.ino());
4456 bool invalidate
= false;
4458 if (info
.seq() > realm
->seq
) {
4459 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4463 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4464 // flush me + children
4467 while (!q
.empty()) {
4468 SnapRealm
*realm
= q
.front();
4471 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4472 p
!= realm
->pchildren
.end();
4476 if (dirty_realms
.count(realm
) == 0) {
4478 dirty_realms
[realm
] = realm
->get_snap_context();
4484 realm
->seq
= info
.seq();
4485 realm
->created
= info
.created();
4486 realm
->parent_since
= info
.parent_since();
4487 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4488 realm
->my_snaps
= info
.my_snaps
;
4492 // _always_ verify parent
4493 if (adjust_realm_parent(realm
, info
.parent()))
4497 invalidate_snaprealm_and_children(realm
);
4498 ldout(cct
, 15) << "update_snap_trace " << *realm
<< " self|parent updated" << dendl
;
4499 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4501 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq()
4502 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4506 first_realm
= realm
;
4508 put_snap_realm(realm
);
4511 for (map
<SnapRealm
*, SnapContext
>::iterator q
= dirty_realms
.begin();
4512 q
!= dirty_realms
.end();
4514 SnapRealm
*realm
= q
->first
;
4515 // if there are new snaps ?
4516 if (has_new_snaps(q
->second
, realm
->get_snap_context())) {
4517 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4518 xlist
<Inode
*>::iterator r
= realm
->inodes_with_caps
.begin();
4522 queue_cap_snap(in
, q
->second
);
4525 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4527 put_snap_realm(realm
);
4531 *realm_ret
= first_realm
;
4533 put_snap_realm(first_realm
);
4536 void Client::handle_snap(MClientSnap
*m
)
4538 ldout(cct
, 10) << "handle_snap " << *m
<< dendl
;
4539 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4540 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4546 got_mds_push(session
);
4548 map
<Inode
*, SnapContext
> to_move
;
4549 SnapRealm
*realm
= 0;
4551 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
4552 assert(m
->head
.split
);
4554 bufferlist::iterator p
= m
->bl
.begin();
4556 assert(info
.ino() == m
->head
.split
);
4558 // flush, then move, ino's.
4559 realm
= get_snap_realm(info
.ino());
4560 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
4561 for (vector
<inodeno_t
>::iterator p
= m
->split_inos
.begin();
4562 p
!= m
->split_inos
.end();
4564 vinodeno_t
vino(*p
, CEPH_NOSNAP
);
4565 if (inode_map
.count(vino
)) {
4566 Inode
*in
= inode_map
[vino
];
4567 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
4569 if (in
->snaprealm
->created
> info
.created()) {
4570 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
4571 << *in
->snaprealm
<< dendl
;
4574 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
4577 in
->snaprealm_item
.remove_myself();
4578 to_move
[in
] = in
->snaprealm
->get_snap_context();
4579 put_snap_realm(in
->snaprealm
);
4583 // move child snaprealms, too
4584 for (vector
<inodeno_t
>::iterator p
= m
->split_realms
.begin();
4585 p
!= m
->split_realms
.end();
4587 ldout(cct
, 10) << "adjusting snaprealm " << *p
<< " parent" << dendl
;
4588 SnapRealm
*child
= get_snap_realm_maybe(*p
);
4591 adjust_realm_parent(child
, realm
->ino
);
4592 put_snap_realm(child
);
4596 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
4599 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
4600 Inode
*in
= p
->first
;
4601 in
->snaprealm
= realm
;
4602 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4604 // queue for snap writeback
4605 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
4606 queue_cap_snap(in
, p
->second
);
4608 put_snap_realm(realm
);
4614 void Client::handle_quota(MClientQuota
*m
)
4616 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4617 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4623 got_mds_push(session
);
4625 ldout(cct
, 10) << "handle_quota " << *m
<< " from mds." << mds
<< dendl
;
4627 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
4628 if (inode_map
.count(vino
)) {
4630 in
= inode_map
[vino
];
4633 in
->quota
= m
->quota
;
4634 in
->rstat
= m
->rstat
;
4641 void Client::handle_caps(MClientCaps
*m
)
4643 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4644 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4650 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
4651 // Pause RADOS operations until we see the required epoch
4652 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
4655 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
4656 // Record the barrier so that we will transmit it to MDS when releasing
4657 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
4660 got_mds_push(session
);
4662 m
->clear_payload(); // for if/when we send back to MDS
4665 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
4666 if (inode_map
.count(vino
))
4667 in
= inode_map
[vino
];
4669 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
4670 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
4671 session
->enqueue_cap_release(
4678 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< ", dropping" << dendl
;
4682 // in case the mds is waiting on e.g. a revocation
4683 flush_cap_releases();
4687 switch (m
->get_op()) {
4688 case CEPH_CAP_OP_EXPORT
:
4689 return handle_cap_export(session
, in
, m
);
4690 case CEPH_CAP_OP_FLUSHSNAP_ACK
:
4691 return handle_cap_flushsnap_ack(session
, in
, m
);
4692 case CEPH_CAP_OP_IMPORT
:
4693 handle_cap_import(session
, in
, m
);
4696 if (in
->caps
.count(mds
) == 0) {
4697 ldout(cct
, 5) << "handle_caps don't have " << *in
<< " cap on mds." << mds
<< dendl
;
4702 Cap
*cap
= in
->caps
[mds
];
4704 switch (m
->get_op()) {
4705 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
4706 case CEPH_CAP_OP_IMPORT
:
4707 case CEPH_CAP_OP_REVOKE
:
4708 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, cap
, m
);
4709 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, cap
, m
);
4715 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4717 mds_rank_t mds
= session
->mds_num
;
4719 ldout(cct
, 5) << "handle_cap_import ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4720 << " IMPORT from mds." << mds
<< dendl
;
4722 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4725 if (m
->peer
.cap_id
&& in
->caps
.count(peer_mds
)) {
4726 cap
= in
->caps
[peer_mds
];
4728 cap_perms
= cap
->latest_perms
;
4733 SnapRealm
*realm
= NULL
;
4734 update_snap_trace(m
->snapbl
, &realm
);
4736 add_update_cap(in
, session
, m
->get_cap_id(),
4737 m
->get_caps(), m
->get_seq(), m
->get_mseq(), m
->get_realm(),
4738 CEPH_CAP_FLAG_AUTH
, cap_perms
);
4740 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
4741 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
4745 put_snap_realm(realm
);
4747 if (in
->auth_cap
&& in
->auth_cap
->session
->mds_num
== mds
) {
4748 // reflush any/all caps (if we are now the auth_cap)
4749 if (in
->cap_snaps
.size())
4750 flush_snaps(in
, true);
4751 if (in
->flushing_caps
)
4752 flush_caps(in
, session
);
4756 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4758 mds_rank_t mds
= session
->mds_num
;
4760 ldout(cct
, 5) << "handle_cap_export ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4761 << " EXPORT from mds." << mds
<< dendl
;
4764 if (in
->caps
.count(mds
))
4765 cap
= in
->caps
[mds
];
4767 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4769 if (cap
&& cap
->cap_id
== m
->get_cap_id()) {
4770 if (m
->peer
.cap_id
) {
4771 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
4772 if (in
->caps
.count(peer_mds
)) {
4773 Cap
*tcap
= in
->caps
[peer_mds
];
4774 if (tcap
->cap_id
!= m
->peer
.cap_id
||
4775 ceph_seq_cmp(tcap
->seq
, m
->peer
.seq
) < 0) {
4776 tcap
->cap_id
= m
->peer
.cap_id
;
4777 tcap
->seq
= m
->peer
.seq
- 1;
4778 tcap
->issue_seq
= tcap
->seq
;
4779 tcap
->mseq
= m
->peer
.mseq
;
4780 tcap
->issued
|= cap
->issued
;
4781 tcap
->implemented
|= cap
->issued
;
4782 if (cap
== in
->auth_cap
)
4783 in
->auth_cap
= tcap
;
4784 if (in
->auth_cap
== tcap
&& in
->flushing_cap_item
.is_on_list())
4785 adjust_session_flushing_caps(in
, session
, tsession
);
4788 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
->issued
,
4789 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
4790 cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
4794 if (cap
== in
->auth_cap
)
4795 in
->flags
|= I_CAP_DROPPED
;
4798 remove_cap(cap
, false);
4804 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4806 mds_rank_t mds
= session
->mds_num
;
4807 assert(in
->caps
[mds
]);
4809 ldout(cct
, 10) << "handle_cap_trunc on ino " << *in
4810 << " size " << in
->size
<< " -> " << m
->get_size()
4813 int implemented
= 0;
4814 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
4815 issued
|= implemented
;
4816 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(),
4817 m
->get_size(), m
->get_change_attr(), m
->get_time_warp_seq(),
4818 m
->get_ctime(), m
->get_mtime(), m
->get_atime(),
4819 m
->inline_version
, m
->inline_data
, issued
);
4823 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
4825 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
4826 int dirty
= m
->get_dirty();
4830 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4831 it
!= in
->flushing_cap_tids
.end(); ) {
4832 if (it
->first
== flush_ack_tid
)
4833 cleaned
= it
->second
;
4834 if (it
->first
<= flush_ack_tid
) {
4835 session
->flushing_caps_tids
.erase(it
->first
);
4836 in
->flushing_cap_tids
.erase(it
++);
4840 cleaned
&= ~it
->second
;
4846 ldout(cct
, 5) << "handle_cap_flush_ack mds." << session
->mds_num
4847 << " cleaned " << ccap_string(cleaned
) << " on " << *in
4848 << " with " << ccap_string(dirty
) << dendl
;
4851 signal_cond_list(in
->waitfor_caps
);
4852 if (session
->flushing_caps_tids
.empty() ||
4853 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
4858 in
->cap_dirtier_uid
= -1;
4859 in
->cap_dirtier_gid
= -1;
4863 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
4865 if (in
->flushing_caps
) {
4866 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
4867 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
4868 in
->flushing_caps
&= ~cleaned
;
4869 if (in
->flushing_caps
== 0) {
4870 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
4871 num_flushing_caps
--;
4872 if (in
->cap_snaps
.empty())
4873 in
->flushing_cap_item
.remove_myself();
4875 if (!in
->caps_dirty())
4884 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4886 mds_rank_t mds
= session
->mds_num
;
4887 assert(in
->caps
[mds
]);
4888 snapid_t follows
= m
->get_snap_follows();
4890 if (in
->cap_snaps
.count(follows
)) {
4891 CapSnap
&capsnap
= in
->cap_snaps
.at(follows
);
4892 if (m
->get_client_tid() != capsnap
.flush_tid
) {
4893 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != " << capsnap
.flush_tid
<< dendl
;
4895 ldout(cct
, 5) << "handle_cap_flushedsnap mds." << mds
<< " flushed snap follows " << follows
4896 << " on " << *in
<< dendl
;
4898 if (in
->get_num_ref() == 1)
4899 tmp_ref
= in
; // make sure inode not get freed while erasing item from in->cap_snaps
4900 if (in
->flushing_caps
== 0 && in
->cap_snaps
.empty())
4901 in
->flushing_cap_item
.remove_myself();
4902 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4903 in
->cap_snaps
.erase(follows
);
4906 ldout(cct
, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds
<< " flushed snap follows " << follows
4907 << " on " << *in
<< dendl
;
4908 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
4914 class C_Client_DentryInvalidate
: public Context
{
4921 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
4922 client(c
), name(dn
->name
) {
4923 if (client
->use_faked_inos()) {
4924 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
4926 ino
.ino
= dn
->inode
->faked_ino
;
4928 dirino
= dn
->dir
->parent_inode
->vino();
4930 ino
= dn
->inode
->vino();
4933 ino
.ino
= inodeno_t();
4935 void finish(int r
) override
{
4936 // _async_dentry_invalidate is responsible for its own locking
4937 assert(!client
->client_lock
.is_locked_by_me());
4938 client
->_async_dentry_invalidate(dirino
, ino
, name
);
4942 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
4946 ldout(cct
, 10) << "_async_dentry_invalidate '" << name
<< "' ino " << ino
4947 << " in dir " << dirino
<< dendl
;
4948 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
);
4951 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
4953 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
4954 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
4957 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
4959 int ref
= in
->get_num_ref();
4961 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
4962 for (auto p
= in
->dir
->dentries
.begin();
4963 p
!= in
->dir
->dentries
.end(); ) {
4964 Dentry
*dn
= p
->second
;
4966 /* rmsnap removes whole subtree, need trim inodes recursively.
4967 * we don't need to invalidate dentries recursively. because
4968 * invalidating a directory dentry effectively invalidate
4970 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
4971 _try_to_trim_inode(dn
->inode
.get(), false);
4973 if (dn
->lru_is_expireable())
4974 unlink(dn
, true, false); // keep dir, drop dentry
4976 if (in
->dir
->dentries
.empty()) {
4982 if (ref
> 0 && (in
->flags
& I_SNAPDIR_OPEN
)) {
4983 InodeRef snapdir
= open_snapdir(in
);
4984 _try_to_trim_inode(snapdir
.get(), false);
4988 if (ref
> 0 && in
->ll_ref
> 0 && sched_inval
) {
4989 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4990 while (q
!= in
->dn_set
.end()) {
4992 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
4993 // so in->dn_set doesn't always reflect the state of kernel's dcache.
4994 _schedule_invalidate_dentry_callback(dn
, true);
4995 unlink(dn
, true, true);
5000 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
5002 mds_rank_t mds
= session
->mds_num
;
5003 int used
= get_caps_used(in
);
5004 int wanted
= in
->caps_wanted();
5006 const int old_caps
= cap
->issued
;
5007 const int new_caps
= m
->get_caps();
5008 ldout(cct
, 5) << "handle_cap_grant on in " << m
->get_ino()
5009 << " mds." << mds
<< " seq " << m
->get_seq()
5010 << " caps now " << ccap_string(new_caps
)
5011 << " was " << ccap_string(old_caps
) << dendl
;
5012 cap
->seq
= m
->get_seq();
5014 in
->layout
= m
->get_layout();
5017 int implemented
= 0;
5018 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
5019 issued
|= implemented
;
5021 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
5022 in
->mode
= m
->head
.mode
;
5023 in
->uid
= m
->head
.uid
;
5024 in
->gid
= m
->head
.gid
;
5025 in
->btime
= m
->btime
;
5027 bool deleted_inode
= false;
5028 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
5029 in
->nlink
= m
->head
.nlink
;
5030 if (in
->nlink
== 0 &&
5031 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5032 deleted_inode
= true;
5034 if ((issued
& CEPH_CAP_XATTR_EXCL
) == 0 &&
5035 m
->xattrbl
.length() &&
5036 m
->head
.xattr_version
> in
->xattr_version
) {
5037 bufferlist::iterator p
= m
->xattrbl
.begin();
5038 ::decode(in
->xattrs
, p
);
5039 in
->xattr_version
= m
->head
.xattr_version
;
5041 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(), m
->get_size(),
5042 m
->get_change_attr(), m
->get_time_warp_seq(), m
->get_ctime(),
5043 m
->get_mtime(), m
->get_atime(),
5044 m
->inline_version
, m
->inline_data
, issued
);
5047 if (cap
== in
->auth_cap
&&
5048 m
->get_max_size() != in
->max_size
) {
5049 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5050 in
->max_size
= m
->get_max_size();
5051 if (in
->max_size
> in
->wanted_max_size
) {
5052 in
->wanted_max_size
= 0;
5053 in
->requested_max_size
= 0;
5058 if (m
->get_op() == CEPH_CAP_OP_IMPORT
&& m
->get_wanted() != wanted
)
5061 check_cap_issue(in
, cap
, new_caps
);
5064 if (old_caps
& ~new_caps
) {
5065 ldout(cct
, 10) << " revocation of " << ccap_string(~new_caps
& old_caps
) << dendl
;
5066 cap
->issued
= new_caps
;
5067 cap
->implemented
|= new_caps
;
5069 if (((used
& ~new_caps
) & CEPH_CAP_FILE_BUFFER
)
5070 && !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5071 // waitin' for flush
5072 } else if ((old_caps
& ~new_caps
) & CEPH_CAP_FILE_CACHE
) {
5076 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5080 } else if (old_caps
== new_caps
) {
5081 ldout(cct
, 10) << " caps unchanged at " << ccap_string(old_caps
) << dendl
;
5083 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~old_caps
) << dendl
;
5084 cap
->issued
= new_caps
;
5085 cap
->implemented
|= new_caps
;
5087 if (cap
== in
->auth_cap
) {
5088 // non-auth MDS is revoking the newly grant caps ?
5089 for (map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
5090 if (it
->second
== cap
)
5092 if (it
->second
->implemented
& ~it
->second
->issued
& new_caps
) {
5105 signal_cond_list(in
->waitfor_caps
);
5107 // may drop inode's last ref
5109 _try_to_trim_inode(in
, true);
5114 int Client::_getgrouplist(gid_t
** sgids
, uid_t uid
, gid_t gid
)
5116 // cppcheck-suppress variableScope
5121 sgid_count
= getgroups_cb(callback_handle
, &sgid_buf
);
5122 if (sgid_count
> 0) {
5128 #if HAVE_GETGROUPLIST
5132 ldout(cct
, 3) << "getting user entry failed" << dendl
;
5135 //use PAM to get the group list
5136 // initial number of group entries, defaults to posix standard of 16
5137 // PAM implementations may provide more than 16 groups....
5139 sgid_buf
= (gid_t
*)malloc(sgid_count
* sizeof(gid_t
));
5140 if (sgid_buf
== NULL
) {
5141 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5146 #if defined(__APPLE__)
5147 if (getgrouplist(pw
->pw_name
, gid
, (int*)sgid_buf
, &sgid_count
) == -1) {
5149 if (getgrouplist(pw
->pw_name
, gid
, sgid_buf
, &sgid_count
) == -1) {
5151 // we need to resize the group list and try again
5152 void *_realloc
= NULL
;
5153 if ((_realloc
= realloc(sgid_buf
, sgid_count
* sizeof(gid_t
))) == NULL
) {
5154 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5158 sgid_buf
= (gid_t
*)_realloc
;
5161 // list was successfully retrieved
5171 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5173 if (perms
.uid() == 0)
5176 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5177 int ret
= _posix_acl_permission(in
, perms
, want
);
5182 // check permissions before doing anything else
5183 if (!in
->check_mode(perms
, want
))
5188 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5189 const UserPerm
& perms
)
5191 int r
= _getattr_for_perm(in
, perms
);
5196 if (strncmp(name
, "system.", 7) == 0) {
5197 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5200 r
= inode_permission(in
, perms
, want
);
5203 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5207 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5208 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5212 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5213 const UserPerm
& perms
)
5215 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5216 int r
= _getattr_for_perm(in
, perms
);
5220 if (mask
& CEPH_SETATTR_SIZE
) {
5221 r
= inode_permission(in
, perms
, MAY_WRITE
);
5227 if (mask
& CEPH_SETATTR_UID
) {
5228 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5231 if (mask
& CEPH_SETATTR_GID
) {
5232 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5233 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5237 if (mask
& CEPH_SETATTR_MODE
) {
5238 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5241 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5242 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5243 stx
->stx_mode
&= ~S_ISGID
;
5246 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5247 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5248 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5249 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5250 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5251 check_mask
|= CEPH_SETATTR_MTIME
;
5252 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5253 check_mask
|= CEPH_SETATTR_ATIME
;
5254 if (check_mask
& mask
) {
5257 r
= inode_permission(in
, perms
, MAY_WRITE
);
5265 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5269 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5271 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5274 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5276 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5277 want
= MAY_READ
| MAY_WRITE
;
5278 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5280 if (flags
& O_TRUNC
)
5284 switch (in
->mode
& S_IFMT
) {
5289 if (want
& MAY_WRITE
) {
5296 r
= _getattr_for_perm(in
, perms
);
5300 r
= inode_permission(in
, perms
, want
);
5302 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5306 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5308 ldout(cct
, 20) << __func__
<< *dir
<< "; " << perms
<< dendl
;
5309 int r
= _getattr_for_perm(dir
, perms
);
5313 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5315 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5319 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5321 ldout(cct
, 20) << __func__
<< *dir
<< "; " << perms
<< dendl
;
5322 int r
= _getattr_for_perm(dir
, perms
);
5326 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5328 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5332 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5334 ldout(cct
, 20) << __func__
<< *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5335 int r
= _getattr_for_perm(dir
, perms
);
5339 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5343 /* 'name == NULL' means rmsnap */
5344 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5346 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5349 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5353 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5357 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5359 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5360 int r
= _getattr_for_perm(in
, perms
);
5364 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5370 if (!S_ISREG(in
->mode
))
5373 if (in
->mode
& S_ISUID
)
5376 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5379 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5381 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5385 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5387 int mask
= CEPH_STAT_CAP_MODE
;
5389 if (acl_type
!= NO_ACL
) {
5390 mask
|= CEPH_STAT_CAP_XATTR
;
5391 force
= in
->xattr_version
== 0;
5393 return _getattr(in
, mask
, perms
, force
);
5396 vinodeno_t
Client::_get_vino(Inode
*in
)
5398 /* The caller must hold the client lock */
5399 return vinodeno_t(in
->ino
, in
->snapid
);
5402 inodeno_t
Client::_get_inodeno(Inode
*in
)
5404 /* The caller must hold the client lock */
5410 * Resolve an MDS spec to a list of MDS daemon GIDs.
5412 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5413 * It may be '*' in which case it matches all GIDs.
5415 * If no error is returned, the `targets` vector will be populated with at least
5418 int Client::resolve_mds(
5419 const std::string
&mds_spec
,
5420 std::vector
<mds_gid_t
> *targets
)
5423 assert(targets
!= nullptr);
5426 std::stringstream ss
;
5427 int role_r
= fsmap
->parse_role(mds_spec
, &role
, ss
);
5429 // We got a role, resolve it to a GID
5430 ldout(cct
, 10) << __func__
<< ": resolved '" << mds_spec
<< "' to role '"
5431 << role
<< "'" << dendl
;
5433 fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
).global_id
);
5437 std::string strtol_err
;
5438 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5439 if (strtol_err
.empty()) {
5440 // It is a possible GID
5441 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5442 if (fsmap
->gid_exists(mds_gid
)) {
5443 ldout(cct
, 10) << __func__
<< ": validated GID " << mds_gid
<< dendl
;
5444 targets
->push_back(mds_gid
);
5446 lderr(cct
) << __func__
<< ": GID " << mds_gid
<< " not in MDS map"
5450 } else if (mds_spec
== "*") {
5451 // It is a wildcard: use all MDSs
5452 const auto mds_info
= fsmap
->get_mds_info();
5454 if (mds_info
.empty()) {
5455 lderr(cct
) << __func__
<< ": * passed but no MDS daemons found" << dendl
;
5459 for (const auto i
: mds_info
) {
5460 targets
->push_back(i
.first
);
5463 // It did not parse as an integer, it is not a wildcard, it must be a name
5464 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5466 lderr(cct
) << "MDS ID '" << mds_spec
<< "' not found" << dendl
;
5468 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5472 ldout(cct
, 10) << __func__
<< ": resolved ID '" << mds_spec
5473 << "' to GID " << mds_gid
<< dendl
;
5474 targets
->push_back(mds_gid
);
5483 * Authenticate with mon and establish global ID
5485 int Client::authenticate()
5487 assert(client_lock
.is_locked_by_me());
5489 if (monclient
->is_authenticated()) {
5493 client_lock
.Unlock();
5494 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5500 whoami
= monclient
->get_global_id();
5501 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5506 int Client::fetch_fsmap(bool user
)
5509 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5510 // rather than MDSMap because no one MDSMap contains all the daemons, and
5511 // a `tell` can address any daemon.
5512 version_t fsmap_latest
;
5515 monclient
->get_version("fsmap", &fsmap_latest
, NULL
, &cond
);
5516 client_lock
.Unlock();
5519 } while (r
== -EAGAIN
);
5522 lderr(cct
) << "Failed to learn FSMap version: " << cpp_strerror(r
) << dendl
;
5526 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5529 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5530 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5531 monclient
->renew_subs();
5532 wait_on_list(waiting_for_fsmap
);
5535 assert(fsmap_user
->get_epoch() >= fsmap_latest
);
5537 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
5538 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5539 monclient
->renew_subs();
5540 wait_on_list(waiting_for_fsmap
);
5543 assert(fsmap
->get_epoch() >= fsmap_latest
);
5545 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
5546 << fsmap_latest
<< dendl
;
5552 * @mds_spec one of ID, rank, GID, "*"
5555 int Client::mds_command(
5556 const std::string
&mds_spec
,
5557 const vector
<string
>& cmd
,
5558 const bufferlist
& inbl
,
5563 Mutex::Locker
lock(client_lock
);
5565 assert(initialized
);
5573 r
= fetch_fsmap(false);
5578 // Look up MDS target(s) of the command
5579 std::vector
<mds_gid_t
> targets
;
5580 r
= resolve_mds(mds_spec
, &targets
);
5585 // If daemons are laggy, we won't send them commands. If all
5586 // are laggy then we fail.
5587 std::vector
<mds_gid_t
> non_laggy
;
5588 for (const auto gid
: targets
) {
5589 const auto info
= fsmap
->get_info_gid(gid
);
5590 if (!info
.laggy()) {
5591 non_laggy
.push_back(gid
);
5594 if (non_laggy
.size() == 0) {
5595 *outs
= "All targeted MDS daemons are laggy";
5599 if (metadata
.empty()) {
5600 // We are called on an unmounted client, so metadata
5601 // won't be initialized yet.
5602 populate_metadata("");
5605 // Send commands to targets
5606 C_GatherBuilder
gather(cct
, onfinish
);
5607 for (const auto target_gid
: non_laggy
) {
5608 const auto info
= fsmap
->get_info_gid(target_gid
);
5610 // Open a connection to the target MDS
5611 entity_inst_t inst
= info
.get_inst();
5612 ConnectionRef conn
= messenger
->get_connection(inst
);
5614 // Generate MDSCommandOp state
5615 auto &op
= command_table
.start_command();
5617 op
.on_finish
= gather
.new_sub();
5622 op
.mds_gid
= target_gid
;
5625 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
5626 << " tid=" << op
.tid
<< cmd
<< dendl
;
5628 // Construct and send MCommand
5629 MCommand
*m
= op
.get_message(monclient
->get_fsid());
5630 conn
->send_message(m
);
5637 void Client::handle_command_reply(MCommandReply
*m
)
5639 ceph_tid_t
const tid
= m
->get_tid();
5641 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
5643 if (!command_table
.exists(tid
)) {
5644 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
5649 auto &op
= command_table
.get_command(tid
);
5651 op
.outbl
->claim(m
->get_data());
5658 op
.on_finish
->complete(m
->r
);
5661 command_table
.erase(tid
);
5666 // -------------------
5669 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
5672 Mutex::Locker
lock(client_lock
);
5675 ldout(cct
, 5) << "already mounted" << dendl
;
5679 int r
= authenticate();
5681 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
5685 std::string want
= "mdsmap";
5686 const auto &mds_ns
= cct
->_conf
->client_mds_namespace
;
5687 if (!mds_ns
.empty()) {
5688 r
= fetch_fsmap(true);
5691 fs_cluster_id_t cid
= fsmap_user
->get_fs_cid(mds_ns
);
5692 if (cid
== FS_CLUSTER_ID_NONE
)
5695 std::ostringstream oss
;
5696 oss
<< want
<< "." << cid
;
5699 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
5701 monclient
->sub_want(want
, 0, 0);
5702 monclient
->renew_subs();
5704 tick(); // start tick
5708 auto availability
= mdsmap
->is_cluster_available();
5709 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
5711 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
5712 return CEPH_FUSE_NO_MDS_UP
;
5713 } else if (availability
== MDSMap::AVAILABLE
) {
5714 // Continue to mount
5716 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
5717 // Else, wait. MDSMonitor will update the map to bring
5718 // us to a conclusion eventually.
5719 wait_on_list(waiting_for_mdsmap
);
5721 // Unexpected value!
5727 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
5729 filepath
fp(CEPH_INO_ROOT
);
5730 if (!mount_root
.empty()) {
5731 fp
= filepath(mount_root
.c_str());
5734 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
5735 req
->set_filepath(fp
);
5736 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
5737 int res
= make_request(req
, perms
);
5739 if (res
== -EACCES
&& root
) {
5740 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
5758 if (!cct
->_conf
->client_trace
.empty()) {
5759 traceout
.open(cct
->_conf
->client_trace
.c_str());
5760 if (traceout
.is_open()) {
5761 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5763 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5768 ldout(cct, 3) << "op: // client trace data structs" << dendl;
5769 ldout(cct, 3) << "op: struct stat st;" << dendl;
5770 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
5771 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
5772 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
5773 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
5774 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
5775 ldout(cct, 3) << "op: int fd;" << dendl;
5782 void Client::_close_sessions()
5784 while (!mds_sessions
.empty()) {
5785 // send session closes!
5786 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5787 p
!= mds_sessions
.end();
5789 if (p
->second
->state
!= MetaSession::STATE_CLOSING
) {
5790 _close_mds_session(p
->second
);
5794 // wait for sessions to close
5795 ldout(cct
, 2) << "waiting for " << mds_sessions
.size() << " mds sessions to close" << dendl
;
5796 mount_cond
.Wait(client_lock
);
5800 void Client::flush_mdlog_sync()
5802 if (mds_requests
.empty())
5804 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5805 p
!= mds_sessions
.end();
5807 MetaSession
*s
= p
->second
;
5812 void Client::flush_mdlog(MetaSession
*session
)
5814 // Only send this to Luminous or newer MDS daemons, older daemons
5815 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
5816 const uint64_t features
= session
->con
->get_features();
5817 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
5818 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
5819 session
->con
->send_message(m
);
5824 void Client::unmount()
5826 Mutex::Locker
lock(client_lock
);
5828 assert(mounted
); // caller is confused?
5830 ldout(cct
, 2) << "unmounting" << dendl
;
5833 flush_mdlog_sync(); // flush the mdlog for pending requests, if any
5834 while (!mds_requests
.empty()) {
5835 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests" << dendl
;
5836 mount_cond
.Wait(client_lock
);
5840 timer
.cancel_event(tick_event
);
5845 // clean up any unclosed files
5846 while (!fd_map
.empty()) {
5847 Fh
*fh
= fd_map
.begin()->second
;
5848 fd_map
.erase(fd_map
.begin());
5849 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
5853 while (!ll_unclosed_fh_set
.empty()) {
5854 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
5856 ll_unclosed_fh_set
.erase(fh
);
5857 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
5861 while (!opened_dirs
.empty()) {
5862 dir_result_t
*dirp
= *opened_dirs
.begin();
5863 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
5870 ldout(cct
, 0) << " skipping clean shutdown, we are blacklisted" << dendl
;
5872 if (cct
->_conf
->client_oc
) {
5873 // Purge all cached data so that ObjectCacher doesn't get hung up
5874 // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED
5875 // is to just leave things marked dirty
5876 // (http://tracker.ceph.com/issues/9105)
5877 for (const auto &i
: inode_map
) {
5878 objectcacher
->purge_set(&(i
.second
->oset
));
5886 while (unsafe_sync_write
> 0) {
5887 ldout(cct
, 0) << unsafe_sync_write
<< " unsafe_sync_writes, waiting" << dendl
;
5888 mount_cond
.Wait(client_lock
);
5891 if (cct
->_conf
->client_oc
) {
5892 // flush/release all buffered data
5893 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
5894 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
5895 p
!= inode_map
.end();
5899 Inode
*in
= p
->second
;
5901 ldout(cct
, 0) << "null inode_map entry ino " << p
->first
<< dendl
;
5904 if (!in
->caps
.empty()) {
5905 InodeRef
tmp_ref(in
);
5907 _flush(in
, new C_Client_FlushComplete(this, in
));
5913 wait_sync_caps(last_flush_tid
);
5919 while (lru
.lru_get_size() > 0 ||
5920 !inode_map
.empty()) {
5921 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
5922 << "+" << inode_map
.size() << " items"
5923 << ", waiting (for caps to release?)"
5925 utime_t until
= ceph_clock_now() + utime_t(5, 0);
5926 int r
= mount_cond
.WaitUntil(client_lock
, until
);
5927 if (r
== ETIMEDOUT
) {
5931 assert(lru
.lru_get_size() == 0);
5932 assert(inode_map
.empty());
5935 if (!cct
->_conf
->client_trace
.empty()) {
5936 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5944 ldout(cct
, 2) << "unmounted." << dendl
;
5949 class C_C_Tick
: public Context
{
5952 explicit C_C_Tick(Client
*c
) : client(c
) {}
5953 void finish(int r
) override
{
5954 // Called back via Timer, which takes client_lock for us
5955 assert(client
->client_lock
.is_locked_by_me());
5960 void Client::flush_cap_releases()
5962 // send any cap releases
5963 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5964 p
!= mds_sessions
.end();
5966 if (p
->second
->release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
5968 if (cct
->_conf
->client_inject_release_failure
) {
5969 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
5970 p
->second
->release
->put();
5972 p
->second
->con
->send_message(p
->second
->release
);
5974 p
->second
->release
= 0;
5981 if (cct
->_conf
->client_debug_inject_tick_delay
> 0) {
5982 sleep(cct
->_conf
->client_debug_inject_tick_delay
);
5983 assert(0 == cct
->_conf
->set_val("client_debug_inject_tick_delay", "0"));
5984 cct
->_conf
->apply_changes(NULL
);
5987 ldout(cct
, 21) << "tick" << dendl
;
5988 tick_event
= new C_C_Tick(this);
5989 timer
.add_event_after(cct
->_conf
->client_tick_interval
, tick_event
);
5991 utime_t now
= ceph_clock_now();
5993 if (!mounted
&& !mds_requests
.empty()) {
5994 MetaRequest
*req
= mds_requests
.begin()->second
;
5995 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
5996 req
->abort(-ETIMEDOUT
);
5997 if (req
->caller_cond
) {
5999 req
->caller_cond
->Signal();
6001 signal_cond_list(waiting_for_mdsmap
);
6002 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6003 p
!= mds_sessions
.end();
6005 signal_context_list(p
->second
->waiting_for_open
);
6009 if (mdsmap
->get_epoch()) {
6011 utime_t el
= now
- last_cap_renew
;
6012 if (el
> mdsmap
->get_session_timeout() / 3.0)
6015 flush_cap_releases();
6019 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
6023 if (in
->hold_caps_until
> now
)
6025 delayed_caps
.pop_front();
6026 cap_list
.push_back(&in
->cap_item
);
6027 check_caps(in
, CHECK_CAPS_NODELAY
);
6033 void Client::renew_caps()
6035 ldout(cct
, 10) << "renew_caps()" << dendl
;
6036 last_cap_renew
= ceph_clock_now();
6038 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6039 p
!= mds_sessions
.end();
6041 ldout(cct
, 15) << "renew_caps requesting from mds." << p
->first
<< dendl
;
6042 if (mdsmap
->get_state(p
->first
) >= MDSMap::STATE_REJOIN
)
6043 renew_caps(p
->second
);
6047 void Client::renew_caps(MetaSession
*session
)
6049 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6050 session
->last_cap_renew_request
= ceph_clock_now();
6051 uint64_t seq
= ++session
->cap_renew_seq
;
6052 session
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6056 // ===============================================================
6057 // high level (POSIXy) interface
6059 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6060 InodeRef
*target
, const UserPerm
& perms
)
6062 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6063 MetaRequest
*req
= new MetaRequest(op
);
6065 dir
->make_nosnap_relative_path(path
);
6066 path
.push_dentry(name
);
6067 req
->set_filepath(path
);
6068 req
->set_inode(dir
);
6069 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6070 mask
|= DEBUG_GETATTR_CAPS
;
6071 req
->head
.args
.getattr
.mask
= mask
;
6073 ldout(cct
, 10) << "_do_lookup on " << path
<< dendl
;
6075 int r
= make_request(req
, perms
, target
);
6076 ldout(cct
, 10) << "_do_lookup res is " << r
<< dendl
;
6080 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6081 const UserPerm
& perms
)
6086 if (!dir
->is_dir()) {
6091 if (dname
== "..") {
6092 if (dir
->dn_set
.empty())
6095 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6104 if (dname
.length() > NAME_MAX
) {
6109 if (dname
== cct
->_conf
->client_snapdir
&&
6110 dir
->snapid
== CEPH_NOSNAP
) {
6111 *target
= open_snapdir(dir
);
6116 dir
->dir
->dentries
.count(dname
)) {
6117 dn
= dir
->dir
->dentries
[dname
];
6119 ldout(cct
, 20) << "_lookup have dn " << dname
<< " mds." << dn
->lease_mds
<< " ttl " << dn
->lease_ttl
6120 << " seq " << dn
->lease_seq
6123 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
)) {
6124 // is dn lease valid?
6125 utime_t now
= ceph_clock_now();
6126 if (dn
->lease_mds
>= 0 &&
6127 dn
->lease_ttl
> now
&&
6128 mds_sessions
.count(dn
->lease_mds
)) {
6129 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6130 if (s
->cap_ttl
> now
&&
6131 s
->cap_gen
== dn
->lease_gen
) {
6132 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6133 // make trim_caps() behave.
6134 dir
->try_touch_cap(dn
->lease_mds
);
6137 ldout(cct
, 20) << " bad lease, cap_ttl " << s
->cap_ttl
<< ", cap_gen " << s
->cap_gen
6138 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6141 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
)) {
6142 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6143 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
)))
6145 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6146 ldout(cct
, 10) << "_lookup concluded ENOENT locally for "
6147 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6152 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6155 // can we conclude ENOENT locally?
6156 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
6157 (dir
->flags
& I_COMPLETE
)) {
6158 ldout(cct
, 10) << "_lookup concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6163 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6168 *target
= dn
->inode
;
6176 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6178 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6182 int Client::get_or_create(Inode
*dir
, const char* name
,
6183 Dentry
**pdn
, bool expect_null
)
6186 ldout(cct
, 20) << "get_or_create " << *dir
<< " name " << name
<< dendl
;
6188 if (dir
->dir
->dentries
.count(name
)) {
6189 Dentry
*dn
= dir
->dir
->dentries
[name
];
6191 // is dn lease valid?
6192 utime_t now
= ceph_clock_now();
6194 dn
->lease_mds
>= 0 &&
6195 dn
->lease_ttl
> now
&&
6196 mds_sessions
.count(dn
->lease_mds
)) {
6197 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6198 if (s
->cap_ttl
> now
&&
6199 s
->cap_gen
== dn
->lease_gen
) {
6206 // otherwise link up a new one
6207 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6214 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6215 const UserPerm
& perms
, bool followsym
, int mask
)
6217 filepath path
= origpath
;
6219 if (origpath
.absolute())
6225 ldout(cct
, 10) << "path_walk " << path
<< dendl
;
6230 while (i
< path
.depth() && cur
) {
6232 const string
&dname
= path
[i
];
6233 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6234 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6236 if (cct
->_conf
->client_permissions
) {
6237 int r
= may_lookup(cur
.get(), perms
);
6240 caps
= CEPH_CAP_AUTH_SHARED
;
6243 /* Get extra requested caps on the last component */
6244 if (i
== (path
.depth() - 1))
6246 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
);
6249 // only follow trailing symlink if followsym. always follow
6250 // 'directory' symlinks.
6251 if (next
&& next
->is_symlink()) {
6253 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6254 if (symlinks
> MAXSYMLINKS
) {
6258 if (i
< path
.depth() - 1) {
6260 // replace consumed components of path with symlink dir target
6261 filepath
resolved(next
->symlink
.c_str());
6262 resolved
.append(path
.postfixpath(i
+ 1));
6265 if (next
->symlink
[0] == '/') {
6269 } else if (followsym
) {
6270 if (next
->symlink
[0] == '/') {
6271 path
= next
->symlink
.c_str();
6276 filepath
more(next
->symlink
.c_str());
6277 // we need to remove the symlink component from off of the path
6278 // before adding the target that the symlink points to. remain
6279 // at the same position in the path.
6299 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
)
6301 Mutex::Locker
lock(client_lock
);
6302 tout(cct
) << "link" << std::endl
;
6303 tout(cct
) << relexisting
<< std::endl
;
6304 tout(cct
) << relpath
<< std::endl
;
6306 filepath
existing(relexisting
);
6309 int r
= path_walk(existing
, &in
, perm
, true);
6312 if (std::string(relpath
) == "/") {
6316 filepath
path(relpath
);
6317 string name
= path
.last_dentry();
6320 r
= path_walk(path
, &dir
, perm
, true);
6323 if (cct
->_conf
->client_permissions
) {
6324 if (S_ISDIR(in
->mode
)) {
6328 r
= may_hardlink(in
.get(), perm
);
6331 r
= may_create(dir
.get(), perm
);
6335 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
);
6339 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
6341 Mutex::Locker
lock(client_lock
);
6342 tout(cct
) << "unlink" << std::endl
;
6343 tout(cct
) << relpath
<< std::endl
;
6345 if (std::string(relpath
) == "/")
6348 filepath
path(relpath
);
6349 string name
= path
.last_dentry();
6352 int r
= path_walk(path
, &dir
, perm
);
6355 if (cct
->_conf
->client_permissions
) {
6356 r
= may_delete(dir
.get(), name
.c_str(), perm
);
6360 return _unlink(dir
.get(), name
.c_str(), perm
);
6363 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
)
6365 Mutex::Locker
lock(client_lock
);
6366 tout(cct
) << "rename" << std::endl
;
6367 tout(cct
) << relfrom
<< std::endl
;
6368 tout(cct
) << relto
<< std::endl
;
6370 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
6373 filepath
from(relfrom
);
6375 string fromname
= from
.last_dentry();
6377 string toname
= to
.last_dentry();
6380 InodeRef fromdir
, todir
;
6381 int r
= path_walk(from
, &fromdir
, perm
);
6384 r
= path_walk(to
, &todir
, perm
);
6388 if (cct
->_conf
->client_permissions
) {
6389 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
6392 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
6393 if (r
< 0 && r
!= -ENOENT
)
6396 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
);
6403 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
)
6405 Mutex::Locker
lock(client_lock
);
6406 tout(cct
) << "mkdir" << std::endl
;
6407 tout(cct
) << relpath
<< std::endl
;
6408 tout(cct
) << mode
<< std::endl
;
6409 ldout(cct
, 10) << "mkdir: " << relpath
<< dendl
;
6411 if (std::string(relpath
) == "/")
6414 filepath
path(relpath
);
6415 string name
= path
.last_dentry();
6418 int r
= path_walk(path
, &dir
, perm
);
6421 if (cct
->_conf
->client_permissions
) {
6422 r
= may_create(dir
.get(), perm
);
6426 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
);
6429 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
6431 Mutex::Locker
lock(client_lock
);
6432 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
6433 tout(cct
) << "mkdirs" << std::endl
;
6434 tout(cct
) << relpath
<< std::endl
;
6435 tout(cct
) << mode
<< std::endl
;
6437 //get through existing parts of path
6438 filepath
path(relpath
);
6440 int r
= 0, caps
= 0;
6443 for (i
=0; i
<path
.depth(); ++i
) {
6444 if (cct
->_conf
->client_permissions
) {
6445 r
= may_lookup(cur
.get(), perms
);
6448 caps
= CEPH_CAP_AUTH_SHARED
;
6450 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
6455 //check that we have work left to do
6456 if (i
==path
.depth()) return -EEXIST
;
6457 if (r
!=-ENOENT
) return r
;
6458 ldout(cct
, 20) << "mkdirs got through " << i
<< " directories on path " << relpath
<< dendl
;
6459 //make new directory at each level
6460 for (; i
<path
.depth(); ++i
) {
6461 if (cct
->_conf
->client_permissions
) {
6462 r
= may_create(cur
.get(), perms
);
6467 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
6469 //check proper creation/existence
6470 if(-EEXIST
== r
&& i
< path
.depth() - 1) {
6471 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
6475 //move to new dir and continue
6477 ldout(cct
, 20) << "mkdirs: successfully created directory "
6478 << filepath(cur
->ino
).get_path() << dendl
;
6483 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
6485 Mutex::Locker
lock(client_lock
);
6486 tout(cct
) << "rmdir" << std::endl
;
6487 tout(cct
) << relpath
<< std::endl
;
6489 if (std::string(relpath
) == "/")
6492 filepath
path(relpath
);
6493 string name
= path
.last_dentry();
6496 int r
= path_walk(path
, &dir
, perms
);
6499 if (cct
->_conf
->client_permissions
) {
6500 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
6504 return _rmdir(dir
.get(), name
.c_str(), perms
);
6507 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
6509 Mutex::Locker
lock(client_lock
);
6510 tout(cct
) << "mknod" << std::endl
;
6511 tout(cct
) << relpath
<< std::endl
;
6512 tout(cct
) << mode
<< std::endl
;
6513 tout(cct
) << rdev
<< std::endl
;
6515 if (std::string(relpath
) == "/")
6518 filepath
path(relpath
);
6519 string name
= path
.last_dentry();
6522 int r
= path_walk(path
, &dir
, perms
);
6525 if (cct
->_conf
->client_permissions
) {
6526 int r
= may_create(dir
.get(), perms
);
6530 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
6535 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
)
6537 Mutex::Locker
lock(client_lock
);
6538 tout(cct
) << "symlink" << std::endl
;
6539 tout(cct
) << target
<< std::endl
;
6540 tout(cct
) << relpath
<< std::endl
;
6542 if (std::string(relpath
) == "/")
6545 filepath
path(relpath
);
6546 string name
= path
.last_dentry();
6549 int r
= path_walk(path
, &dir
, perms
);
6552 if (cct
->_conf
->client_permissions
) {
6553 int r
= may_create(dir
.get(), perms
);
6557 return _symlink(dir
.get(), name
.c_str(), target
, perms
);
6560 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
6562 Mutex::Locker
lock(client_lock
);
6563 tout(cct
) << "readlink" << std::endl
;
6564 tout(cct
) << relpath
<< std::endl
;
6566 filepath
path(relpath
);
6568 int r
= path_walk(path
, &in
, perms
, false);
6572 return _readlink(in
.get(), buf
, size
);
6575 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
6577 if (!in
->is_symlink())
6580 // copy into buf (at most size bytes)
6581 int r
= in
->symlink
.length();
6584 memcpy(buf
, in
->symlink
.c_str(), r
);
6591 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
6593 bool yes
= in
->caps_issued_mask(mask
);
6595 ldout(cct
, 10) << "_getattr mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
6599 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6601 in
->make_nosnap_relative_path(path
);
6602 req
->set_filepath(path
);
6604 req
->head
.args
.getattr
.mask
= mask
;
6606 int res
= make_request(req
, perms
);
6607 ldout(cct
, 10) << "_getattr result=" << res
<< dendl
;
6611 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6612 const UserPerm
& perms
, InodeRef
*inp
)
6614 int issued
= in
->caps_issued();
6616 ldout(cct
, 10) << "_setattr mask " << mask
<< " issued " <<
6617 ccap_string(issued
) << dendl
;
6619 if (in
->snapid
!= CEPH_NOSNAP
) {
6622 if ((mask
& CEPH_SETATTR_SIZE
) &&
6623 (unsigned long)stx
->stx_size
> in
->size
&&
6624 is_quota_bytes_exceeded(in
, (unsigned long)stx
->stx_size
- in
->size
,
6629 // make the change locally?
6630 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
6631 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
6632 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
6633 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
6634 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
6637 * This works because we implicitly flush the caps as part of the
6638 * request, so the cap update check will happen with the writeback
6639 * cap context, and then the setattr check will happen with the
6642 * In reality this pattern is likely pretty rare (different users
6643 * setattr'ing the same file). If that turns out not to be the
6644 * case later, we can build a more complex pipelined cap writeback
6648 mask
|= CEPH_SETATTR_CTIME
;
6653 // caller just needs us to bump the ctime
6654 in
->ctime
= ceph_clock_now();
6655 in
->cap_dirtier_uid
= perms
.uid();
6656 in
->cap_dirtier_gid
= perms
.gid();
6657 if (issued
& CEPH_CAP_AUTH_EXCL
)
6658 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6659 else if (issued
& CEPH_CAP_FILE_EXCL
)
6660 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6661 else if (issued
& CEPH_CAP_XATTR_EXCL
)
6662 mark_caps_dirty(in
, CEPH_CAP_XATTR_EXCL
);
6664 mask
|= CEPH_SETATTR_CTIME
;
6667 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
6668 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
6670 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
6672 if (mask
& CEPH_SETATTR_UID
) {
6673 in
->ctime
= ceph_clock_now();
6674 in
->cap_dirtier_uid
= perms
.uid();
6675 in
->cap_dirtier_gid
= perms
.gid();
6676 in
->uid
= stx
->stx_uid
;
6677 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6678 mask
&= ~CEPH_SETATTR_UID
;
6680 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6682 if (mask
& CEPH_SETATTR_GID
) {
6683 in
->ctime
= ceph_clock_now();
6684 in
->cap_dirtier_uid
= perms
.uid();
6685 in
->cap_dirtier_gid
= perms
.gid();
6686 in
->gid
= stx
->stx_gid
;
6687 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6688 mask
&= ~CEPH_SETATTR_GID
;
6690 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6693 if (mask
& CEPH_SETATTR_MODE
) {
6694 in
->ctime
= ceph_clock_now();
6695 in
->cap_dirtier_uid
= perms
.uid();
6696 in
->cap_dirtier_gid
= perms
.gid();
6697 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
6698 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6699 mask
&= ~CEPH_SETATTR_MODE
;
6700 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6701 } else if (kill_sguid
&& S_ISREG(in
->mode
)) {
6702 /* Must squash the any setuid/setgid bits with an ownership change */
6703 in
->mode
&= ~S_ISUID
;
6704 if ((in
->mode
& (S_ISGID
|S_IXGRP
)) == (S_ISGID
|S_IXGRP
))
6705 in
->mode
&= ~S_ISGID
;
6706 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6709 if (mask
& CEPH_SETATTR_BTIME
) {
6710 in
->ctime
= ceph_clock_now();
6711 in
->cap_dirtier_uid
= perms
.uid();
6712 in
->cap_dirtier_gid
= perms
.gid();
6713 in
->btime
= utime_t(stx
->stx_btime
);
6714 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6715 mask
&= ~CEPH_SETATTR_BTIME
;
6716 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
6718 } else if (mask
& CEPH_SETATTR_SIZE
) {
6719 /* If we don't have Ax, then we must ask the server to clear them on truncate */
6720 mask
|= CEPH_SETATTR_KILL_SGUID
;
6723 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
6724 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
6725 if (mask
& CEPH_SETATTR_MTIME
)
6726 in
->mtime
= utime_t(stx
->stx_mtime
);
6727 if (mask
& CEPH_SETATTR_ATIME
)
6728 in
->atime
= utime_t(stx
->stx_atime
);
6729 in
->ctime
= ceph_clock_now();
6730 in
->cap_dirtier_uid
= perms
.uid();
6731 in
->cap_dirtier_gid
= perms
.gid();
6732 in
->time_warp_seq
++;
6733 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6734 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
6743 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
6747 in
->make_nosnap_relative_path(path
);
6748 req
->set_filepath(path
);
6751 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
6752 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6754 if (mask
& CEPH_SETATTR_MODE
) {
6755 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
6756 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6757 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6759 if (mask
& CEPH_SETATTR_UID
) {
6760 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
6761 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6762 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6764 if (mask
& CEPH_SETATTR_GID
) {
6765 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
6766 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6767 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6769 if (mask
& CEPH_SETATTR_BTIME
) {
6770 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
6771 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6773 if (mask
& CEPH_SETATTR_MTIME
) {
6774 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
6775 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
| CEPH_CAP_FILE_RD
|
6778 if (mask
& CEPH_SETATTR_ATIME
) {
6779 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
6780 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
6783 if (mask
& CEPH_SETATTR_SIZE
) {
6784 if ((unsigned long)stx
->stx_size
< mdsmap
->get_max_filesize()) {
6785 req
->head
.args
.setattr
.size
= stx
->stx_size
;
6786 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
6789 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
6792 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
| CEPH_CAP_FILE_RD
|
6795 req
->head
.args
.setattr
.mask
= mask
;
6797 req
->regetattr_mask
= mask
;
6799 int res
= make_request(req
, perms
, inp
);
6800 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
6804 /* Note that we only care about attrs that setattr cares about */
6805 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
6807 stx
->stx_size
= st
->st_size
;
6808 stx
->stx_mode
= st
->st_mode
;
6809 stx
->stx_uid
= st
->st_uid
;
6810 stx
->stx_gid
= st
->st_gid
;
6811 stx
->stx_mtime
= st
->st_mtim
;
6812 stx
->stx_atime
= st
->st_atim
;
6815 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6816 const UserPerm
& perms
, InodeRef
*inp
)
6818 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
6821 if (mask
& CEPH_SETATTR_MODE
)
6822 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
6826 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
6827 const UserPerm
& perms
)
6829 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
6830 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
6831 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
6832 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
6833 if (cct
->_conf
->client_permissions
) {
6834 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
6838 return __setattrx(in
.get(), stx
, mask
, perms
);
6841 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
6842 const UserPerm
& perms
)
6844 struct ceph_statx stx
;
6846 stat_to_statx(attr
, &stx
);
6847 mask
&= ~CEPH_SETATTR_BTIME
;
6848 return _setattrx(in
, &stx
, mask
, perms
);
6851 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
6852 const UserPerm
& perms
)
6854 Mutex::Locker
lock(client_lock
);
6855 tout(cct
) << "setattr" << std::endl
;
6856 tout(cct
) << relpath
<< std::endl
;
6857 tout(cct
) << mask
<< std::endl
;
6859 filepath
path(relpath
);
6861 int r
= path_walk(path
, &in
, perms
);
6864 return _setattr(in
, attr
, mask
, perms
);
6867 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
6868 const UserPerm
& perms
, int flags
)
6870 Mutex::Locker
lock(client_lock
);
6871 tout(cct
) << "setattrx" << std::endl
;
6872 tout(cct
) << relpath
<< std::endl
;
6873 tout(cct
) << mask
<< std::endl
;
6875 filepath
path(relpath
);
6877 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
6880 return _setattrx(in
, stx
, mask
, perms
);
6883 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
6885 Mutex::Locker
lock(client_lock
);
6886 tout(cct
) << "fsetattr" << std::endl
;
6887 tout(cct
) << fd
<< std::endl
;
6888 tout(cct
) << mask
<< std::endl
;
6890 Fh
*f
= get_filehandle(fd
);
6893 #if defined(__linux__) && defined(O_PATH)
6894 if (f
->flags
& O_PATH
)
6897 return _setattr(f
->inode
, attr
, mask
, perms
);
6900 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
6902 Mutex::Locker
lock(client_lock
);
6903 tout(cct
) << "fsetattr" << std::endl
;
6904 tout(cct
) << fd
<< std::endl
;
6905 tout(cct
) << mask
<< std::endl
;
6907 Fh
*f
= get_filehandle(fd
);
6910 #if defined(__linux__) && defined(O_PATH)
6911 if (f
->flags
& O_PATH
)
6914 return _setattrx(f
->inode
, stx
, mask
, perms
);
6917 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
6918 frag_info_t
*dirstat
, int mask
)
6920 ldout(cct
, 3) << "stat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6921 Mutex::Locker
lock(client_lock
);
6922 tout(cct
) << "stat" << std::endl
;
6923 tout(cct
) << relpath
<< std::endl
;
6924 filepath
path(relpath
);
6926 int r
= path_walk(path
, &in
, perms
, true, mask
);
6929 r
= _getattr(in
, mask
, perms
);
6931 ldout(cct
, 3) << "stat exit on error!" << dendl
;
6934 fill_stat(in
, stbuf
, dirstat
);
6935 ldout(cct
, 3) << "stat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6939 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
6943 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
6944 if (flags
& AT_NO_ATTR_SYNC
)
6947 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
6948 mask
|= CEPH_CAP_PIN
;
6949 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6950 mask
|= CEPH_CAP_AUTH_SHARED
;
6951 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6952 mask
|= CEPH_CAP_LINK_SHARED
;
6953 if (want
& (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
6954 mask
|= CEPH_CAP_FILE_SHARED
;
6955 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
6956 mask
|= CEPH_CAP_XATTR_SHARED
;
6961 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
6962 const UserPerm
& perms
,
6963 unsigned int want
, unsigned int flags
)
6965 ldout(cct
, 3) << "statx enter (relpath " << relpath
<< " want " << want
<< ")" << dendl
;
6966 Mutex::Locker
lock(client_lock
);
6967 tout(cct
) << "statx" << std::endl
;
6968 tout(cct
) << relpath
<< std::endl
;
6969 filepath
path(relpath
);
6972 unsigned mask
= statx_to_mask(flags
, want
);
6974 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
6978 r
= _getattr(in
, mask
, perms
);
6980 ldout(cct
, 3) << "statx exit on error!" << dendl
;
6984 fill_statx(in
, mask
, stx
);
6985 ldout(cct
, 3) << "statx exit (relpath " << relpath
<< " mask " << stx
->stx_mask
<< ")" << dendl
;
6989 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
6990 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
6992 ldout(cct
, 3) << "lstat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6993 Mutex::Locker
lock(client_lock
);
6994 tout(cct
) << "lstat" << std::endl
;
6995 tout(cct
) << relpath
<< std::endl
;
6996 filepath
path(relpath
);
6998 // don't follow symlinks
6999 int r
= path_walk(path
, &in
, perms
, false, mask
);
7002 r
= _getattr(in
, mask
, perms
);
7004 ldout(cct
, 3) << "lstat exit on error!" << dendl
;
7007 fill_stat(in
, stbuf
, dirstat
);
7008 ldout(cct
, 3) << "lstat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7012 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7014 ldout(cct
, 10) << "fill_stat on " << in
->ino
<< " snap/dev" << in
->snapid
7015 << " mode 0" << oct
<< in
->mode
<< dec
7016 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7017 memset(st
, 0, sizeof(struct stat
));
7018 if (use_faked_inos())
7019 st
->st_ino
= in
->faked_ino
;
7021 st
->st_ino
= in
->ino
;
7022 st
->st_dev
= in
->snapid
;
7023 st
->st_mode
= in
->mode
;
7024 st
->st_rdev
= in
->rdev
;
7025 st
->st_nlink
= in
->nlink
;
7026 st
->st_uid
= in
->uid
;
7027 st
->st_gid
= in
->gid
;
7028 if (in
->ctime
> in
->mtime
) {
7029 stat_set_ctime_sec(st
, in
->ctime
.sec());
7030 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7032 stat_set_ctime_sec(st
, in
->mtime
.sec());
7033 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7035 stat_set_atime_sec(st
, in
->atime
.sec());
7036 stat_set_atime_nsec(st
, in
->atime
.nsec());
7037 stat_set_mtime_sec(st
, in
->mtime
.sec());
7038 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7040 if (cct
->_conf
->client_dirsize_rbytes
)
7041 st
->st_size
= in
->rstat
.rbytes
;
7043 st
->st_size
= in
->dirstat
.size();
7046 st
->st_size
= in
->size
;
7047 st
->st_blocks
= (in
->size
+ 511) >> 9;
7049 st
->st_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7052 *dirstat
= in
->dirstat
;
7056 return in
->caps_issued();
7059 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7061 ldout(cct
, 10) << "fill_statx on " << in
->ino
<< " snap/dev" << in
->snapid
7062 << " mode 0" << oct
<< in
->mode
<< dec
7063 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7064 memset(stx
, 0, sizeof(struct ceph_statx
));
7067 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7068 * so that all bits are set.
7073 /* These are always considered to be available */
7074 stx
->stx_dev
= in
->snapid
;
7075 stx
->stx_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7077 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7078 stx
->stx_mode
= S_IFMT
& in
->mode
;
7079 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7080 stx
->stx_rdev
= in
->rdev
;
7081 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7083 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7084 stx
->stx_uid
= in
->uid
;
7085 stx
->stx_gid
= in
->gid
;
7086 stx
->stx_mode
= in
->mode
;
7087 in
->btime
.to_timespec(&stx
->stx_btime
);
7088 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7091 if (mask
& CEPH_CAP_LINK_SHARED
) {
7092 stx
->stx_nlink
= in
->nlink
;
7093 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7096 if (mask
& CEPH_CAP_FILE_SHARED
) {
7098 in
->atime
.to_timespec(&stx
->stx_atime
);
7099 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7102 if (cct
->_conf
->client_dirsize_rbytes
)
7103 stx
->stx_size
= in
->rstat
.rbytes
;
7105 stx
->stx_size
= in
->dirstat
.size();
7106 stx
->stx_blocks
= 1;
7108 stx
->stx_size
= in
->size
;
7109 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
7111 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
7112 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
7115 /* Change time and change_attr both require all shared caps to view */
7116 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
7117 stx
->stx_version
= in
->change_attr
;
7118 if (in
->ctime
> in
->mtime
)
7119 in
->ctime
.to_timespec(&stx
->stx_ctime
);
7121 in
->mtime
.to_timespec(&stx
->stx_ctime
);
7122 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
7127 void Client::touch_dn(Dentry
*dn
)
7132 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7134 Mutex::Locker
lock(client_lock
);
7135 tout(cct
) << "chmod" << std::endl
;
7136 tout(cct
) << relpath
<< std::endl
;
7137 tout(cct
) << mode
<< std::endl
;
7138 filepath
path(relpath
);
7140 int r
= path_walk(path
, &in
, perms
);
7144 attr
.st_mode
= mode
;
7145 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7148 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
7150 Mutex::Locker
lock(client_lock
);
7151 tout(cct
) << "fchmod" << std::endl
;
7152 tout(cct
) << fd
<< std::endl
;
7153 tout(cct
) << mode
<< std::endl
;
7154 Fh
*f
= get_filehandle(fd
);
7157 #if defined(__linux__) && defined(O_PATH)
7158 if (f
->flags
& O_PATH
)
7162 attr
.st_mode
= mode
;
7163 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
7166 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7168 Mutex::Locker
lock(client_lock
);
7169 tout(cct
) << "lchmod" << std::endl
;
7170 tout(cct
) << relpath
<< std::endl
;
7171 tout(cct
) << mode
<< std::endl
;
7172 filepath
path(relpath
);
7174 // don't follow symlinks
7175 int r
= path_walk(path
, &in
, perms
, false);
7179 attr
.st_mode
= mode
;
7180 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7183 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7184 const UserPerm
& perms
)
7186 Mutex::Locker
lock(client_lock
);
7187 tout(cct
) << "chown" << std::endl
;
7188 tout(cct
) << relpath
<< std::endl
;
7189 tout(cct
) << new_uid
<< std::endl
;
7190 tout(cct
) << new_gid
<< std::endl
;
7191 filepath
path(relpath
);
7193 int r
= path_walk(path
, &in
, perms
);
7197 attr
.st_uid
= new_uid
;
7198 attr
.st_gid
= new_gid
;
7200 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7201 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7202 return _setattr(in
, &attr
, mask
, perms
);
7205 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
7207 Mutex::Locker
lock(client_lock
);
7208 tout(cct
) << "fchown" << std::endl
;
7209 tout(cct
) << fd
<< std::endl
;
7210 tout(cct
) << new_uid
<< std::endl
;
7211 tout(cct
) << new_gid
<< std::endl
;
7212 Fh
*f
= get_filehandle(fd
);
7215 #if defined(__linux__) && defined(O_PATH)
7216 if (f
->flags
& O_PATH
)
7220 attr
.st_uid
= new_uid
;
7221 attr
.st_gid
= new_gid
;
7223 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7224 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7225 return _setattr(f
->inode
, &attr
, mask
, perms
);
7228 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7229 const UserPerm
& perms
)
7231 Mutex::Locker
lock(client_lock
);
7232 tout(cct
) << "lchown" << std::endl
;
7233 tout(cct
) << relpath
<< std::endl
;
7234 tout(cct
) << new_uid
<< std::endl
;
7235 tout(cct
) << new_gid
<< std::endl
;
7236 filepath
path(relpath
);
7238 // don't follow symlinks
7239 int r
= path_walk(path
, &in
, perms
, false);
7243 attr
.st_uid
= new_uid
;
7244 attr
.st_gid
= new_gid
;
7246 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7247 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7248 return _setattr(in
, &attr
, mask
, perms
);
7251 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
7252 const UserPerm
& perms
)
7254 Mutex::Locker
lock(client_lock
);
7255 tout(cct
) << "utime" << std::endl
;
7256 tout(cct
) << relpath
<< std::endl
;
7257 tout(cct
) << buf
->modtime
<< std::endl
;
7258 tout(cct
) << buf
->actime
<< std::endl
;
7259 filepath
path(relpath
);
7261 int r
= path_walk(path
, &in
, perms
);
7265 stat_set_mtime_sec(&attr
, buf
->modtime
);
7266 stat_set_mtime_nsec(&attr
, 0);
7267 stat_set_atime_sec(&attr
, buf
->actime
);
7268 stat_set_atime_nsec(&attr
, 0);
7269 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7272 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
7273 const UserPerm
& perms
)
7275 Mutex::Locker
lock(client_lock
);
7276 tout(cct
) << "lutime" << std::endl
;
7277 tout(cct
) << relpath
<< std::endl
;
7278 tout(cct
) << buf
->modtime
<< std::endl
;
7279 tout(cct
) << buf
->actime
<< std::endl
;
7280 filepath
path(relpath
);
7282 // don't follow symlinks
7283 int r
= path_walk(path
, &in
, perms
, false);
7287 stat_set_mtime_sec(&attr
, buf
->modtime
);
7288 stat_set_mtime_nsec(&attr
, 0);
7289 stat_set_atime_sec(&attr
, buf
->actime
);
7290 stat_set_atime_nsec(&attr
, 0);
7291 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7294 int Client::flock(int fd
, int operation
, uint64_t owner
)
7296 Mutex::Locker
lock(client_lock
);
7297 tout(cct
) << "flock" << std::endl
;
7298 tout(cct
) << fd
<< std::endl
;
7299 tout(cct
) << operation
<< std::endl
;
7300 tout(cct
) << owner
<< std::endl
;
7301 Fh
*f
= get_filehandle(fd
);
7305 return _flock(f
, operation
, owner
);
7308 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7310 Mutex::Locker
lock(client_lock
);
7311 tout(cct
) << "opendir" << std::endl
;
7312 tout(cct
) << relpath
<< std::endl
;
7313 filepath
path(relpath
);
7315 int r
= path_walk(path
, &in
, perms
, true);
7318 if (cct
->_conf
->client_permissions
) {
7319 int r
= may_open(in
.get(), O_RDONLY
, perms
);
7323 r
= _opendir(in
.get(), dirpp
, perms
);
7324 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
7326 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
7330 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7334 *dirpp
= new dir_result_t(in
, perms
);
7335 opened_dirs
.insert(*dirpp
);
7336 ldout(cct
, 3) << "_opendir(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
7341 int Client::closedir(dir_result_t
*dir
)
7343 Mutex::Locker
lock(client_lock
);
7344 tout(cct
) << "closedir" << std::endl
;
7345 tout(cct
) << (unsigned long)dir
<< std::endl
;
7347 ldout(cct
, 3) << "closedir(" << dir
<< ") = 0" << dendl
;
7352 void Client::_closedir(dir_result_t
*dirp
)
7354 ldout(cct
, 10) << "_closedir(" << dirp
<< ")" << dendl
;
7356 ldout(cct
, 10) << "_closedir detaching inode " << dirp
->inode
<< dendl
;
7357 dirp
->inode
.reset();
7359 _readdir_drop_dirp_buffer(dirp
);
7360 opened_dirs
.erase(dirp
);
7364 void Client::rewinddir(dir_result_t
*dirp
)
7366 Mutex::Locker
lock(client_lock
);
7368 ldout(cct
, 3) << "rewinddir(" << dirp
<< ")" << dendl
;
7369 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7370 _readdir_drop_dirp_buffer(d
);
7374 loff_t
Client::telldir(dir_result_t
*dirp
)
7376 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7377 ldout(cct
, 3) << "telldir(" << dirp
<< ") = " << d
->offset
<< dendl
;
7381 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
7383 Mutex::Locker
lock(client_lock
);
7385 ldout(cct
, 3) << "seekdir(" << dirp
<< ", " << offset
<< ")" << dendl
;
7387 if (offset
== dirp
->offset
)
7390 if (offset
> dirp
->offset
)
7391 dirp
->release_count
= 0; // bump if we do a forward seek
7393 dirp
->ordered_count
= 0; // disable filling readdir cache
7395 if (dirp
->hash_order()) {
7396 if (dirp
->offset
> offset
) {
7397 _readdir_drop_dirp_buffer(dirp
);
7402 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
7403 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
7404 _readdir_drop_dirp_buffer(dirp
);
7409 dirp
->offset
= offset
;
7414 // ino_t d_ino; /* inode number */
7415 // off_t d_off; /* offset to the next dirent */
7416 // unsigned short d_reclen; /* length of this record */
7417 // unsigned char d_type; /* type of file */
7418 // char d_name[256]; /* filename */
7420 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
7422 strncpy(de
->d_name
, name
, 255);
7423 de
->d_name
[255] = '\0';
7426 #if !defined(DARWIN) && !defined(__FreeBSD__)
7427 de
->d_off
= next_off
;
7430 de
->d_type
= IFTODT(type
);
7431 ldout(cct
, 10) << "fill_dirent '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
7432 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
7436 void Client::_readdir_next_frag(dir_result_t
*dirp
)
7438 frag_t fg
= dirp
->buffer_frag
;
7440 if (fg
.is_rightmost()) {
7441 ldout(cct
, 10) << "_readdir_next_frag advance from " << fg
<< " to END" << dendl
;
7448 ldout(cct
, 10) << "_readdir_next_frag advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
7450 if (dirp
->hash_order()) {
7452 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
7453 if (dirp
->offset
< new_offset
) // don't decrease offset
7454 dirp
->offset
= new_offset
;
7456 dirp
->last_name
.clear();
7457 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7458 _readdir_rechoose_frag(dirp
);
7462 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
7464 assert(dirp
->inode
);
7466 if (dirp
->hash_order())
7469 frag_t cur
= frag_t(dirp
->offset_high());
7470 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
7472 ldout(cct
, 10) << "_readdir_rechoose_frag frag " << cur
<< " maps to " << fg
<< dendl
;
7473 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7474 dirp
->last_name
.clear();
7475 dirp
->next_offset
= 2;
7479 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
7481 ldout(cct
, 10) << "_readdir_drop_dirp_buffer " << dirp
<< dendl
;
7482 dirp
->buffer
.clear();
7485 int Client::_readdir_get_frag(dir_result_t
*dirp
)
7488 assert(dirp
->inode
);
7490 // get the current frag.
7492 if (dirp
->hash_order())
7493 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
7495 fg
= frag_t(dirp
->offset_high());
7497 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
7498 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
7500 int op
= CEPH_MDS_OP_READDIR
;
7501 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
7502 op
= CEPH_MDS_OP_LSSNAP
;
7504 InodeRef
& diri
= dirp
->inode
;
7506 MetaRequest
*req
= new MetaRequest(op
);
7508 diri
->make_nosnap_relative_path(path
);
7509 req
->set_filepath(path
);
7510 req
->set_inode(diri
.get());
7511 req
->head
.args
.readdir
.frag
= fg
;
7512 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
7513 if (dirp
->last_name
.length()) {
7514 req
->path2
.set_path(dirp
->last_name
.c_str());
7515 } else if (dirp
->hash_order()) {
7516 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
7521 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
7523 if (res
== -EAGAIN
) {
7524 ldout(cct
, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl
;
7525 _readdir_rechoose_frag(dirp
);
7526 return _readdir_get_frag(dirp
);
7530 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " got frag " << dirp
->buffer_frag
7531 << " size " << dirp
->buffer
.size() << dendl
;
7533 ldout(cct
, 10) << "_readdir_get_frag got error " << res
<< ", setting end flag" << dendl
;
7540 struct dentry_off_lt
{
7541 bool operator()(const Dentry
* dn
, int64_t off
) const {
7542 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
7546 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
7547 int caps
, bool getref
)
7549 assert(client_lock
.is_locked());
7550 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
7551 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
7553 Dir
*dir
= dirp
->inode
->dir
;
7556 ldout(cct
, 10) << " dir is empty" << dendl
;
7561 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
7562 dir
->readdir_cache
.end(),
7563 dirp
->offset
, dentry_off_lt());
7567 if (!dirp
->inode
->is_complete_and_ordered())
7569 if (pd
== dir
->readdir_cache
.end())
7572 if (dn
->inode
== NULL
) {
7573 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
7577 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
7578 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
7583 int r
= _getattr(dn
->inode
, caps
, dirp
->perms
);
7587 struct ceph_statx stx
;
7589 fill_statx(dn
->inode
, caps
, &stx
);
7591 uint64_t next_off
= dn
->offset
+ 1;
7593 if (pd
== dir
->readdir_cache
.end())
7594 next_off
= dir_result_t::END
;
7597 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7599 in
= dn
->inode
.get();
7603 dn_name
= dn
->name
; // fill in name while we have lock
7605 client_lock
.Unlock();
7606 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
7608 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
7609 << " = " << r
<< dendl
;
7614 dirp
->offset
= next_off
;
7616 dirp
->next_offset
= 2;
7618 dirp
->next_offset
= dirp
->offset_low();
7619 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
7624 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
7629 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
7630 unsigned want
, unsigned flags
, bool getref
)
7632 int caps
= statx_to_mask(flags
, want
);
7634 Mutex::Locker
lock(client_lock
);
7636 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
7638 ldout(cct
, 10) << "readdir_r_cb " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
7639 << dec
<< " at_end=" << dirp
->at_end()
7640 << " hash_order=" << dirp
->hash_order() << dendl
;
7643 struct ceph_statx stx
;
7644 memset(&de
, 0, sizeof(de
));
7645 memset(&stx
, 0, sizeof(stx
));
7647 InodeRef
& diri
= dirp
->inode
;
7652 if (dirp
->offset
== 0) {
7653 ldout(cct
, 15) << " including ." << dendl
;
7654 assert(diri
->dn_set
.size() < 2); // can't have multiple hard-links to a dir
7655 uint64_t next_off
= 1;
7658 r
= _getattr(diri
, caps
, dirp
->perms
);
7662 fill_statx(diri
, caps
, &stx
);
7663 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
7665 Inode
*inode
= NULL
;
7671 client_lock
.Unlock();
7672 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7677 dirp
->offset
= next_off
;
7681 if (dirp
->offset
== 1) {
7682 ldout(cct
, 15) << " including .." << dendl
;
7683 uint64_t next_off
= 2;
7685 if (diri
->dn_set
.empty())
7688 in
= diri
->get_first_parent()->inode
;
7691 r
= _getattr(diri
, caps
, dirp
->perms
);
7695 fill_statx(in
, caps
, &stx
);
7696 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
7698 Inode
*inode
= NULL
;
7704 client_lock
.Unlock();
7705 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7710 dirp
->offset
= next_off
;
7715 // can we read from our cache?
7716 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
7717 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
7718 << dirp
->inode
->is_complete_and_ordered()
7719 << " issued " << ccap_string(dirp
->inode
->caps_issued())
7721 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
7722 dirp
->inode
->is_complete_and_ordered() &&
7723 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
)) {
7724 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
7733 bool check_caps
= true;
7734 if (!dirp
->is_cached()) {
7735 int r
= _readdir_get_frag(dirp
);
7738 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
7739 // different than the requested one. (our dirfragtree was outdated)
7742 frag_t fg
= dirp
->buffer_frag
;
7744 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
7745 << " offset " << hex
<< dirp
->offset
<< dendl
;
7747 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
7748 dirp
->offset
, dir_result_t::dentry_off_lt());
7749 it
!= dirp
->buffer
.end();
7751 dir_result_t::dentry
&entry
= *it
;
7753 uint64_t next_off
= entry
.offset
+ 1;
7757 r
= _getattr(entry
.inode
, caps
, dirp
->perms
);
7762 fill_statx(entry
.inode
, caps
, &stx
);
7763 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7765 Inode
*inode
= NULL
;
7767 inode
= entry
.inode
.get();
7771 client_lock
.Unlock();
7772 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
7775 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
7776 << " = " << r
<< dendl
;
7780 dirp
->offset
= next_off
;
7785 if (dirp
->next_offset
> 2) {
7786 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
7787 _readdir_drop_dirp_buffer(dirp
);
7791 if (!fg
.is_rightmost()) {
7793 _readdir_next_frag(dirp
);
7797 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
7798 diri
->dir_release_count
== dirp
->release_count
) {
7799 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
7800 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
7802 assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
7803 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
7805 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
7807 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
7808 diri
->flags
|= I_COMPLETE
;
7820 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
7822 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
7829 * 1 if we got a dirent
7830 * 0 for end of directory
7834 struct single_readdir
{
7836 struct ceph_statx
*stx
;
7841 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
7842 struct ceph_statx
*stx
, off_t off
,
7845 single_readdir
*c
= static_cast<single_readdir
*>(p
);
7848 return -1; // already filled this dirent
7858 struct dirent
*Client::readdir(dir_result_t
*d
)
7861 static struct dirent de
;
7868 // our callback fills the dirent and sets sr.full=true on first
7869 // call, and returns -1 the second time around.
7870 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
7872 errno
= -ret
; // this sucks.
7873 return (dirent
*) NULL
;
7878 return (dirent
*) NULL
;
7881 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
7882 struct ceph_statx
*stx
, unsigned want
,
7883 unsigned flags
, Inode
**out
)
7891 // our callback fills the dirent and sets sr.full=true on first
7892 // call, and returns -1 the second time around.
7893 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
7905 struct getdents_result
{
7912 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
7913 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
7915 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
7921 dlen
= strlen(de
->d_name
) + 1;
7923 if (c
->pos
+ dlen
> c
->buflen
)
7924 return -1; // doesn't fit
7927 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
7929 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
7935 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
7940 gr
.fullent
= fullent
;
7943 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
7945 if (r
< 0) { // some error
7946 if (r
== -1) { // buffer ran out of space
7947 if (gr
.pos
) { // but we got some entries already!
7949 } // or we need a larger buffer
7951 } else { // actual error, return it
7960 struct getdir_result
{
7961 list
<string
> *contents
;
7965 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
7967 getdir_result
*r
= static_cast<getdir_result
*>(p
);
7969 r
->contents
->push_back(de
->d_name
);
7974 int Client::getdir(const char *relpath
, list
<string
>& contents
,
7975 const UserPerm
& perms
)
7977 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
7979 Mutex::Locker
lock(client_lock
);
7980 tout(cct
) << "getdir" << std::endl
;
7981 tout(cct
) << relpath
<< std::endl
;
7985 int r
= opendir(relpath
, &d
, perms
);
7990 gr
.contents
= &contents
;
7992 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
8002 /****** file i/o **********/
8003 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
8004 mode_t mode
, int stripe_unit
, int stripe_count
,
8005 int object_size
, const char *data_pool
)
8007 ldout(cct
, 3) << "open enter(" << relpath
<< ", " << ceph_flags_sys2wire(flags
) << "," << mode
<< ")" << dendl
;
8008 Mutex::Locker
lock(client_lock
);
8009 tout(cct
) << "open" << std::endl
;
8010 tout(cct
) << relpath
<< std::endl
;
8011 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
8015 #if defined(__linux__) && defined(O_PATH)
8016 /* When the O_PATH is being specified, others flags than O_DIRECTORY
8017 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
8018 * in kernel (fs/open.c). */
8020 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
8023 filepath
path(relpath
);
8025 bool created
= false;
8026 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
8027 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
8028 int r
= path_walk(path
, &in
, perms
, followsym
, ceph_caps_for_mode(mode
));
8030 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
8033 #if defined(__linux__) && defined(O_PATH)
8034 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
8036 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
8040 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
8041 filepath dirpath
= path
;
8042 string dname
= dirpath
.last_dentry();
8043 dirpath
.pop_dentry();
8045 r
= path_walk(dirpath
, &dir
, perms
, true,
8046 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0);
8049 if (cct
->_conf
->client_permissions
) {
8050 r
= may_create(dir
.get(), perms
);
8054 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
8055 stripe_count
, object_size
, data_pool
, &created
, perms
);
8061 // posix says we can only check permissions of existing files
8062 if (cct
->_conf
->client_permissions
) {
8063 r
= may_open(in
.get(), flags
, perms
);
8070 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
8072 // allocate a integer file descriptor
8075 assert(fd_map
.count(r
) == 0);
8080 tout(cct
) << r
<< std::endl
;
8081 ldout(cct
, 3) << "open exit(" << path
<< ", " << ceph_flags_sys2wire(flags
) << ") = " << r
<< dendl
;
8085 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
, mode_t mode
)
8087 /* Use default file striping parameters */
8088 return open(relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
);
8091 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
8092 const UserPerm
& perms
)
8094 Mutex::Locker
lock(client_lock
);
8095 ldout(cct
, 3) << "lookup_hash enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
8097 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
8099 req
->set_filepath(path
);
8101 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
8103 sprintf(f
, "%u", h
);
8104 filepath
path2(dirino
);
8105 path2
.push_dentry(string(f
));
8106 req
->set_filepath2(path2
);
8108 int r
= make_request(req
, perms
, NULL
, NULL
,
8109 rand() % mdsmap
->get_num_in_mds());
8110 ldout(cct
, 3) << "lookup_hash exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
8116 * Load inode into local cache.
8118 * If inode pointer is non-NULL, and take a reference on
8119 * the resulting Inode object in one operation, so that caller
8120 * can safely assume inode will still be there after return.
8122 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8124 Mutex::Locker
lock(client_lock
);
8125 ldout(cct
, 3) << "lookup_ino enter(" << ino
<< ")" << dendl
;
8127 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
8129 req
->set_filepath(path
);
8131 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8132 if (r
== 0 && inode
!= NULL
) {
8133 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
8134 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
8135 assert(p
!= inode_map
.end());
8139 ldout(cct
, 3) << "lookup_ino exit(" << ino
<< ") = " << r
<< dendl
;
8146 * Find the parent inode of `ino` and insert it into
8147 * our cache. Conditionally also set `parent` to a referenced
8148 * Inode* if caller provides non-NULL value.
8150 int Client::lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8152 Mutex::Locker
lock(client_lock
);
8153 ldout(cct
, 3) << "lookup_parent enter(" << ino
->ino
<< ")" << dendl
;
8155 if (!ino
->dn_set
.empty()) {
8156 // if we exposed the parent here, we'd need to check permissions,
8157 // but right now we just rely on the MDS doing so in make_request
8158 ldout(cct
, 3) << "lookup_parent dentry already present" << dendl
;
8162 if (ino
->is_root()) {
8164 ldout(cct
, 3) << "ino is root, no parent" << dendl
;
8168 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
8169 filepath
path(ino
->ino
);
8170 req
->set_filepath(path
);
8173 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
8174 // Give caller a reference to the parent ino if they provided a pointer.
8175 if (parent
!= NULL
) {
8177 *parent
= target
.get();
8179 ldout(cct
, 3) << "lookup_parent found parent " << (*parent
)->ino
<< dendl
;
8184 ldout(cct
, 3) << "lookup_parent exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8190 * Populate the parent dentry for `ino`, provided it is
8191 * a child of `parent`.
8193 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8195 assert(parent
->is_dir());
8197 Mutex::Locker
lock(client_lock
);
8198 ldout(cct
, 3) << "lookup_name enter(" << ino
->ino
<< ")" << dendl
;
8200 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
8201 req
->set_filepath2(filepath(parent
->ino
));
8202 req
->set_filepath(filepath(ino
->ino
));
8203 req
->set_inode(ino
);
8205 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8206 ldout(cct
, 3) << "lookup_name exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8211 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
8219 f
->actor_perms
= perms
;
8221 ldout(cct
, 10) << "_create_fh " << in
->ino
<< " mode " << cmode
<< dendl
;
8223 if (in
->snapid
!= CEPH_NOSNAP
) {
8224 in
->snap_cap_refs
++;
8225 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
8226 << ccap_string(in
->caps_issued()) << dendl
;
8229 const md_config_t
*conf
= cct
->_conf
;
8230 f
->readahead
.set_trigger_requests(1);
8231 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
8232 uint64_t max_readahead
= Readahead::NO_LIMIT
;
8233 if (conf
->client_readahead_max_bytes
) {
8234 max_readahead
= MIN(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
8236 if (conf
->client_readahead_max_periods
) {
8237 max_readahead
= MIN(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
8239 f
->readahead
.set_max_readahead_size(max_readahead
);
8240 vector
<uint64_t> alignments
;
8241 alignments
.push_back(in
->layout
.get_period());
8242 alignments
.push_back(in
->layout
.stripe_unit
);
8243 f
->readahead
.set_alignments(alignments
);
8248 int Client::_release_fh(Fh
*f
)
8250 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
8251 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
8252 Inode
*in
= f
->inode
.get();
8253 ldout(cct
, 5) << "_release_fh " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
8255 if (in
->snapid
== CEPH_NOSNAP
) {
8256 if (in
->put_open_ref(f
->mode
)) {
8257 _flush(in
, new C_Client_FlushComplete(this, in
));
8261 assert(in
->snap_cap_refs
> 0);
8262 in
->snap_cap_refs
--;
8265 _release_filelocks(f
);
8267 // Finally, read any async err (i.e. from flushes)
8268 int err
= f
->take_async_err();
8270 ldout(cct
, 1) << "_release_fh " << f
<< " on inode " << *in
<< " caught async_err = "
8271 << cpp_strerror(err
) << dendl
;
8273 ldout(cct
, 10) << "_release_fh " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
8281 void Client::_put_fh(Fh
*f
)
8283 int left
= f
->put();
8289 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
8290 const UserPerm
& perms
)
8292 if (in
->snapid
!= CEPH_NOSNAP
&&
8293 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
8297 // use normalized flags to generate cmode
8298 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
8301 int want
= ceph_caps_for_mode(cmode
);
8304 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
8306 if ((flags
& O_TRUNC
) == 0 &&
8307 in
->caps_issued_mask(want
)) {
8309 check_caps(in
, CHECK_CAPS_NODELAY
);
8311 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8313 in
->make_nosnap_relative_path(path
);
8314 req
->set_filepath(path
);
8315 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
& ~O_CREAT
);
8316 req
->head
.args
.open
.mode
= mode
;
8317 req
->head
.args
.open
.pool
= -1;
8318 if (cct
->_conf
->client_debug_getattr_caps
)
8319 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8321 req
->head
.args
.open
.mask
= 0;
8322 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
8324 result
= make_request(req
, perms
);
8330 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
8332 in
->put_open_ref(cmode
);
8340 int Client::_renew_caps(Inode
*in
)
8342 int wanted
= in
->caps_file_wanted();
8343 if (in
->is_any_caps() &&
8344 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
8345 check_caps(in
, CHECK_CAPS_NODELAY
);
8350 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
8352 else if (wanted
& CEPH_CAP_FILE_RD
)
8354 else if (wanted
& CEPH_CAP_FILE_WR
)
8357 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8359 in
->make_nosnap_relative_path(path
);
8360 req
->set_filepath(path
);
8361 req
->head
.args
.open
.flags
= flags
;
8362 req
->head
.args
.open
.pool
= -1;
8363 if (cct
->_conf
->client_debug_getattr_caps
)
8364 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8366 req
->head
.args
.open
.mask
= 0;
8369 // duplicate in case Cap goes away; not sure if that race is a concern?
8370 const UserPerm
*pperm
= in
->get_best_perms();
8374 int ret
= make_request(req
, perms
);
8378 int Client::close(int fd
)
8380 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
8381 Mutex::Locker
lock(client_lock
);
8382 tout(cct
) << "close" << std::endl
;
8383 tout(cct
) << fd
<< std::endl
;
8385 Fh
*fh
= get_filehandle(fd
);
8388 int err
= _release_fh(fh
);
8391 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
8399 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
8401 Mutex::Locker
lock(client_lock
);
8402 tout(cct
) << "lseek" << std::endl
;
8403 tout(cct
) << fd
<< std::endl
;
8404 tout(cct
) << offset
<< std::endl
;
8405 tout(cct
) << whence
<< std::endl
;
8407 Fh
*f
= get_filehandle(fd
);
8410 #if defined(__linux__) && defined(O_PATH)
8411 if (f
->flags
& O_PATH
)
8414 return _lseek(f
, offset
, whence
);
8417 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
8419 Inode
*in
= f
->inode
.get();
8432 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8435 f
->pos
= in
->size
+ offset
;
8442 ldout(cct
, 3) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
8447 void Client::lock_fh_pos(Fh
*f
)
8449 ldout(cct
, 10) << "lock_fh_pos " << f
<< dendl
;
8451 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
8453 f
->pos_waiters
.push_back(&cond
);
8454 ldout(cct
, 10) << "lock_fh_pos BLOCKING on " << f
<< dendl
;
8455 while (f
->pos_locked
|| f
->pos_waiters
.front() != &cond
)
8456 cond
.Wait(client_lock
);
8457 ldout(cct
, 10) << "lock_fh_pos UNBLOCKING on " << f
<< dendl
;
8458 assert(f
->pos_waiters
.front() == &cond
);
8459 f
->pos_waiters
.pop_front();
8462 f
->pos_locked
= true;
8465 void Client::unlock_fh_pos(Fh
*f
)
8467 ldout(cct
, 10) << "unlock_fh_pos " << f
<< dendl
;
8468 f
->pos_locked
= false;
8471 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
8473 if (!in
->inline_data
.length()) {
8474 onfinish
->complete(0);
8479 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
8480 object_t oid
= oid_buf
;
8482 ObjectOperation create_ops
;
8483 create_ops
.create(false);
8485 objecter
->mutate(oid
,
8486 OSDMap::file_to_object_locator(in
->layout
),
8488 in
->snaprealm
->get_snap_context(),
8489 ceph::real_clock::now(),
8493 bufferlist inline_version_bl
;
8494 ::encode(in
->inline_version
, inline_version_bl
);
8496 ObjectOperation uninline_ops
;
8497 uninline_ops
.cmpxattr("inline_version",
8498 CEPH_OSD_CMPXATTR_OP_GT
,
8499 CEPH_OSD_CMPXATTR_MODE_U64
,
8501 bufferlist inline_data
= in
->inline_data
;
8502 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
8503 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
8505 objecter
->mutate(oid
,
8506 OSDMap::file_to_object_locator(in
->layout
),
8508 in
->snaprealm
->get_snap_context(),
8509 ceph::real_clock::now(),
8518 // blocking osd interface
8520 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
8522 Mutex::Locker
lock(client_lock
);
8523 tout(cct
) << "read" << std::endl
;
8524 tout(cct
) << fd
<< std::endl
;
8525 tout(cct
) << size
<< std::endl
;
8526 tout(cct
) << offset
<< std::endl
;
8528 Fh
*f
= get_filehandle(fd
);
8531 #if defined(__linux__) && defined(O_PATH)
8532 if (f
->flags
& O_PATH
)
8536 int r
= _read(f
, offset
, size
, &bl
);
8537 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8539 bl
.copy(0, bl
.length(), buf
);
8545 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
8549 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
8552 int Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
8554 const md_config_t
*conf
= cct
->_conf
;
8555 Inode
*in
= f
->inode
.get();
8557 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
8559 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8561 bool movepos
= false;
8567 loff_t start_pos
= offset
;
8569 if (in
->inline_version
== 0) {
8570 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8576 assert(in
->inline_version
> 0);
8581 int r
= get_caps(in
, CEPH_CAP_FILE_RD
, CEPH_CAP_FILE_CACHE
, &have
, -1);
8587 if (f
->flags
& O_DIRECT
)
8588 have
&= ~CEPH_CAP_FILE_CACHE
;
8590 Mutex
uninline_flock("Client::_read_uninline_data flock");
8592 bool uninline_done
= false;
8593 int uninline_ret
= 0;
8594 Context
*onuninline
= NULL
;
8596 if (in
->inline_version
< CEPH_INLINE_NONE
) {
8597 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
8598 onuninline
= new C_SafeCond(&uninline_flock
,
8602 uninline_data(in
, onuninline
);
8604 uint32_t len
= in
->inline_data
.length();
8606 uint64_t endoff
= offset
+ size
;
8607 if (endoff
> in
->size
)
8611 if (endoff
<= len
) {
8612 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
8614 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
8615 bl
->append_zero(endoff
- len
);
8617 } else if ((uint64_t)offset
< endoff
) {
8618 bl
->append_zero(endoff
- offset
);
8625 if (!conf
->client_debug_force_sync_read
&&
8626 (conf
->client_oc
&& (have
& CEPH_CAP_FILE_CACHE
))) {
8628 if (f
->flags
& O_RSYNC
) {
8629 _flush_range(in
, offset
, size
);
8631 r
= _read_async(f
, offset
, size
, bl
);
8635 if (f
->flags
& O_DIRECT
)
8636 _flush_range(in
, offset
, size
);
8638 bool checkeof
= false;
8639 r
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
8646 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8649 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8654 if ((uint64_t)offset
< in
->size
)
8662 f
->pos
= start_pos
+ bl
->length();
8670 client_lock
.Unlock();
8671 uninline_flock
.Lock();
8672 while (!uninline_done
)
8673 uninline_cond
.Wait(uninline_flock
);
8674 uninline_flock
.Unlock();
8677 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
8678 in
->inline_data
.clear();
8679 in
->inline_version
= CEPH_INLINE_NONE
;
8680 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
8687 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8693 return bl
->length();
8696 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
8699 f
->readahead
.inc_pending();
8702 Client::C_Readahead::~C_Readahead() {
8703 f
->readahead
.dec_pending();
8707 void Client::C_Readahead::finish(int r
) {
8708 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
8709 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8712 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
8714 const md_config_t
*conf
= cct
->_conf
;
8715 Inode
*in
= f
->inode
.get();
8717 ldout(cct
, 10) << "_read_async " << *in
<< " " << off
<< "~" << len
<< dendl
;
8719 // trim read based on file size?
8720 if (off
>= in
->size
)
8724 if (off
+ len
> in
->size
) {
8725 len
= in
->size
- off
;
8728 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
8729 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
8730 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
8732 // read (and possibly block)
8734 Mutex
flock("Client::_read_async flock");
8737 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &rvalue
);
8738 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8739 off
, len
, bl
, 0, onfinish
);
8741 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8742 client_lock
.Unlock();
8748 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8755 if(f
->readahead
.get_min_readahead_size() > 0) {
8756 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
8757 if (readahead_extent
.second
> 0) {
8758 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
8759 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
8760 Context
*onfinish2
= new C_Readahead(this, f
);
8761 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8762 readahead_extent
.first
, readahead_extent
.second
,
8763 NULL
, 0, onfinish2
);
8765 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
8766 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8768 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
8777 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
8780 Inode
*in
= f
->inode
.get();
8785 ldout(cct
, 10) << "_read_sync " << *in
<< " " << off
<< "~" << len
<< dendl
;
8787 Mutex
flock("Client::_read_sync flock");
8792 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
8796 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
8798 in
->truncate_size
, in
->truncate_seq
,
8800 client_lock
.Unlock();
8807 // if we get ENOENT from OSD, assume 0 bytes returned
8818 bl
->claim_append(tbl
);
8821 if (r
>= 0 && r
< wanted
) {
8822 if (pos
< in
->size
) {
8823 // zero up to known EOF
8824 int64_t some
= in
->size
- pos
;
8846 * we keep count of uncommitted sync writes on the inode, so that
8849 void Client::_sync_write_commit(Inode
*in
)
8851 assert(unsafe_sync_write
> 0);
8852 unsafe_sync_write
--;
8854 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
8856 ldout(cct
, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write
<< dendl
;
8857 if (unsafe_sync_write
== 0 && unmounting
) {
8858 ldout(cct
, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl
;
8859 mount_cond
.Signal();
8863 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
8865 Mutex::Locker
lock(client_lock
);
8866 tout(cct
) << "write" << std::endl
;
8867 tout(cct
) << fd
<< std::endl
;
8868 tout(cct
) << size
<< std::endl
;
8869 tout(cct
) << offset
<< std::endl
;
8871 Fh
*fh
= get_filehandle(fd
);
8874 #if defined(__linux__) && defined(O_PATH)
8875 if (fh
->flags
& O_PATH
)
8878 int r
= _write(fh
, offset
, size
, buf
, NULL
, 0);
8879 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8883 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
8887 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
8890 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
8892 Mutex::Locker
lock(client_lock
);
8893 tout(cct
) << fd
<< std::endl
;
8894 tout(cct
) << offset
<< std::endl
;
8896 Fh
*fh
= get_filehandle(fd
);
8899 #if defined(__linux__) && defined(O_PATH)
8900 if (fh
->flags
& O_PATH
)
8903 loff_t totallen
= 0;
8904 for (unsigned i
= 0; i
< iovcnt
; i
++) {
8905 totallen
+= iov
[i
].iov_len
;
8908 int w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
8909 ldout(cct
, 3) << "pwritev(" << fd
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
8913 int r
= _read(fh
, offset
, totallen
, &bl
);
8914 ldout(cct
, 3) << "preadv(" << fd
<< ", " << offset
<< ") = " << r
<< dendl
;
8919 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
8921 * This piece of code aims to handle the case that bufferlist does not have enough data
8922 * to fill in the iov
8924 if (resid
< iov
[j
].iov_len
) {
8925 bl
.copy(bufoff
, resid
, (char *)iov
[j
].iov_base
);
8928 bl
.copy(bufoff
, iov
[j
].iov_len
, (char *)iov
[j
].iov_base
);
8930 resid
-= iov
[j
].iov_len
;
8931 bufoff
+= iov
[j
].iov_len
;
8937 int Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
8938 const struct iovec
*iov
, int iovcnt
)
8940 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
8943 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
8944 Inode
*in
= f
->inode
.get();
8946 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
8950 assert(in
->snapid
== CEPH_NOSNAP
);
8952 // was Fh opened as writeable?
8953 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
8957 uint64_t endoff
= offset
+ size
;
8958 if (endoff
> in
->size
&& is_quota_bytes_exceeded(in
, endoff
- in
->size
,
8963 // use/adjust fd pos?
8967 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
8968 * change out from under us.
8970 if (f
->flags
& O_APPEND
) {
8971 int r
= _lseek(f
, 0, SEEK_END
);
8978 f
->pos
= offset
+size
;
8982 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8984 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
8987 utime_t start
= ceph_clock_now();
8989 if (in
->inline_version
== 0) {
8990 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8993 assert(in
->inline_version
> 0);
8996 // copy into fresh buffer (since our write may be resub, async)
9000 bl
.append(buf
, size
);
9002 for (int i
= 0; i
< iovcnt
; i
++) {
9003 if (iov
[i
].iov_len
> 0) {
9004 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
9010 uint64_t totalwritten
;
9012 int r
= get_caps(in
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
,
9013 CEPH_CAP_FILE_BUFFER
, &have
, endoff
);
9017 /* clear the setuid/setgid bits, if any */
9018 if (unlikely((in
->mode
& S_ISUID
) ||
9019 (in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))) {
9020 struct ceph_statx stx
= { 0 };
9022 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9023 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
9027 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9030 if (f
->flags
& O_DIRECT
)
9031 have
&= ~CEPH_CAP_FILE_BUFFER
;
9033 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
9035 Mutex
uninline_flock("Client::_write_uninline_data flock");
9037 bool uninline_done
= false;
9038 int uninline_ret
= 0;
9039 Context
*onuninline
= NULL
;
9041 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9042 if (endoff
> cct
->_conf
->client_max_inline_size
||
9043 endoff
> CEPH_INLINE_MAX_SIZE
||
9044 !(have
& CEPH_CAP_FILE_BUFFER
)) {
9045 onuninline
= new C_SafeCond(&uninline_flock
,
9049 uninline_data(in
, onuninline
);
9051 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9053 uint32_t len
= in
->inline_data
.length();
9056 in
->inline_data
.copy(endoff
, len
- endoff
, bl
);
9059 in
->inline_data
.splice(offset
, len
- offset
);
9060 else if (offset
> len
)
9061 in
->inline_data
.append_zero(offset
- len
);
9063 in
->inline_data
.append(bl
);
9064 in
->inline_version
++;
9066 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9072 if (cct
->_conf
->client_oc
&& (have
& CEPH_CAP_FILE_BUFFER
)) {
9073 // do buffered write
9074 if (!in
->oset
.dirty_or_tx
)
9075 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
9077 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9079 // async, caching, non-blocking.
9080 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
9081 in
->snaprealm
->get_snap_context(),
9082 offset
, size
, bl
, ceph::real_clock::now(),
9084 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9089 // flush cached write if O_SYNC is set on file fh
9090 // O_DSYNC == O_SYNC on linux < 2.6.33
9091 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
9092 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
9093 _flush_range(in
, offset
, size
);
9096 if (f
->flags
& O_DIRECT
)
9097 _flush_range(in
, offset
, size
);
9099 // simple, non-atomic sync write
9100 Mutex
flock("Client::_write flock");
9103 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
9105 unsafe_sync_write
++;
9106 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
); // released by onsafe callback
9108 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
9109 offset
, size
, bl
, ceph::real_clock::now(), 0,
9110 in
->truncate_size
, in
->truncate_seq
,
9112 client_lock
.Unlock();
9119 _sync_write_commit(in
);
9122 // if we get here, write was successful, update client metadata
9125 lat
= ceph_clock_now();
9127 logger
->tinc(l_c_wrlat
, lat
);
9129 totalwritten
= size
;
9130 r
= (int)totalwritten
;
9133 if (totalwritten
+ offset
> in
->size
) {
9134 in
->size
= totalwritten
+ offset
;
9135 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9137 if (is_quota_bytes_approaching(in
, f
->actor_perms
)) {
9138 check_caps(in
, CHECK_CAPS_NODELAY
);
9139 } else if (is_max_size_approaching(in
)) {
9143 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
9145 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
9149 in
->mtime
= ceph_clock_now();
9151 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9156 client_lock
.Unlock();
9157 uninline_flock
.Lock();
9158 while (!uninline_done
)
9159 uninline_cond
.Wait(uninline_flock
);
9160 uninline_flock
.Unlock();
9163 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
9164 in
->inline_data
.clear();
9165 in
->inline_version
= CEPH_INLINE_NONE
;
9166 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9172 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
9176 int Client::_flush(Fh
*f
)
9178 Inode
*in
= f
->inode
.get();
9179 int err
= f
->take_async_err();
9181 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
9182 << cpp_strerror(err
) << dendl
;
9184 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9190 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
9192 struct ceph_statx stx
;
9193 stx
.stx_size
= length
;
9194 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
9197 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
9199 Mutex::Locker
lock(client_lock
);
9200 tout(cct
) << "ftruncate" << std::endl
;
9201 tout(cct
) << fd
<< std::endl
;
9202 tout(cct
) << length
<< std::endl
;
9204 Fh
*f
= get_filehandle(fd
);
9207 #if defined(__linux__) && defined(O_PATH)
9208 if (f
->flags
& O_PATH
)
9212 attr
.st_size
= length
;
9213 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
9216 int Client::fsync(int fd
, bool syncdataonly
)
9218 Mutex::Locker
lock(client_lock
);
9219 tout(cct
) << "fsync" << std::endl
;
9220 tout(cct
) << fd
<< std::endl
;
9221 tout(cct
) << syncdataonly
<< std::endl
;
9223 Fh
*f
= get_filehandle(fd
);
9226 #if defined(__linux__) && defined(O_PATH)
9227 if (f
->flags
& O_PATH
)
9230 int r
= _fsync(f
, syncdataonly
);
9232 // The IOs in this fsync were okay, but maybe something happened
9233 // in the background that we shoudl be reporting?
9234 r
= f
->take_async_err();
9235 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
9236 << ") = 0, async_err = " << r
<< dendl
;
9238 // Assume that an error we encountered during fsync, even reported
9239 // synchronously, would also have applied the error to the Fh, and we
9240 // should clear it here to avoid returning the same error again on next
9242 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
9244 f
->take_async_err();
9249 int Client::_fsync(Inode
*in
, bool syncdataonly
)
9252 Mutex
lock("Client::_fsync::lock");
9255 C_SafeCond
*object_cacher_completion
= NULL
;
9256 ceph_tid_t flush_tid
= 0;
9259 ldout(cct
, 3) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
9261 if (cct
->_conf
->client_oc
) {
9262 object_cacher_completion
= new C_SafeCond(&lock
, &cond
, &done
, &r
);
9263 tmp_ref
= in
; // take a reference; C_SafeCond doesn't and _flush won't either
9264 _flush(in
, object_cacher_completion
);
9265 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
9268 if (!syncdataonly
&& in
->dirty_caps
) {
9269 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
9270 if (in
->flushing_caps
)
9271 flush_tid
= last_flush_tid
;
9272 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
9274 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
9275 MetaRequest
*req
= in
->unsafe_ops
.back();
9276 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
9279 wait_on_list(req
->waitfor_safe
);
9283 if (object_cacher_completion
) { // wait on a real reply instead of guessing
9284 client_lock
.Unlock();
9286 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9291 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
9293 // FIXME: this can starve
9294 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
9295 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
9296 << " uncommitted, waiting" << dendl
;
9297 wait_on_list(in
->waitfor_commit
);
9303 wait_sync_caps(in
, flush_tid
);
9305 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
9307 ldout(cct
, 1) << "ino " << in
->ino
<< " failed to commit to disk! "
9308 << cpp_strerror(-r
) << dendl
;
9314 int Client::_fsync(Fh
*f
, bool syncdataonly
)
9316 ldout(cct
, 3) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
9317 return _fsync(f
->inode
.get(), syncdataonly
);
9320 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
9322 Mutex::Locker
lock(client_lock
);
9323 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
9324 tout(cct
) << fd
<< std::endl
;
9326 Fh
*f
= get_filehandle(fd
);
9329 int r
= _getattr(f
->inode
, mask
, perms
);
9332 fill_stat(f
->inode
, stbuf
, NULL
);
9333 ldout(cct
, 3) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
9337 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
9338 unsigned int want
, unsigned int flags
)
9340 Mutex::Locker
lock(client_lock
);
9341 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
9342 tout(cct
) << fd
<< std::endl
;
9344 Fh
*f
= get_filehandle(fd
);
9348 unsigned mask
= statx_to_mask(flags
, want
);
9351 if (mask
&& !f
->inode
->caps_issued_mask(mask
)) {
9352 r
= _getattr(f
->inode
, mask
, perms
);
9354 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
9359 fill_statx(f
->inode
, mask
, stx
);
9360 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
9364 // not written yet, but i want to link!
9366 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
9367 const UserPerm
& perms
)
9369 Mutex::Locker
lock(client_lock
);
9370 tout(cct
) << "chdir" << std::endl
;
9371 tout(cct
) << relpath
<< std::endl
;
9372 filepath
path(relpath
);
9374 int r
= path_walk(path
, &in
, perms
);
9379 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
9381 getcwd(new_cwd
, perms
);
9385 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
9388 ldout(cct
, 10) << "getcwd " << *cwd
<< dendl
;
9390 Inode
*in
= cwd
.get();
9391 while (in
!= root
) {
9392 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
9394 // A cwd or ancester is unlinked
9395 if (in
->dn_set
.empty()) {
9399 Dentry
*dn
= in
->get_first_parent();
9404 ldout(cct
, 10) << "getcwd looking up parent for " << *in
<< dendl
;
9405 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9406 filepath
path(in
->ino
);
9407 req
->set_filepath(path
);
9409 int res
= make_request(req
, perms
);
9418 path
.push_front_dentry(dn
->name
);
9419 in
= dn
->dir
->parent_inode
;
9422 dir
+= path
.get_path();
9425 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
9426 const UserPerm
& perms
)
9428 Mutex::Locker
l(client_lock
);
9429 tout(cct
) << "statfs" << std::endl
;
9434 const vector
<int64_t> &data_pools
= mdsmap
->get_data_pools();
9435 if (data_pools
.size() == 1) {
9436 objecter
->get_fs_stats(stats
, data_pools
[0], &cond
);
9438 objecter
->get_fs_stats(stats
, boost::optional
<int64_t>(), &cond
);
9441 client_lock
.Unlock();
9442 int rval
= cond
.wait();
9446 ldout(cct
, 1) << "underlying call to statfs returned error: "
9447 << cpp_strerror(rval
)
9452 memset(stbuf
, 0, sizeof(*stbuf
));
9455 * we're going to set a block size of 4MB so we can represent larger
9456 * FSes without overflowing. Additionally convert the space
9457 * measurements from KB to bytes while making them in terms of
9458 * blocks. We use 4MB only because it is big enough, and because it
9459 * actually *is* the (ceph) default block size.
9461 const int CEPH_BLOCK_SHIFT
= 22;
9462 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
9463 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
9464 stbuf
->f_files
= stats
.num_objects
;
9465 stbuf
->f_ffree
= -1;
9466 stbuf
->f_favail
= -1;
9467 stbuf
->f_fsid
= -1; // ??
9468 stbuf
->f_flag
= 0; // ??
9469 stbuf
->f_namemax
= NAME_MAX
;
9471 // Usually quota_root will == root_ancestor, but if the mount root has no
9472 // quota but we can see a parent of it that does have a quota, we'll
9473 // respect that one instead.
9474 assert(root
!= nullptr);
9475 Inode
*quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
, perms
);
9477 // get_quota_root should always give us something
9478 // because client quotas are always enabled
9479 assert(quota_root
!= nullptr);
9481 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
9483 // Skip the getattr if any sessions are stale, as we don't want to
9484 // block `df` if this client has e.g. been evicted, or if the MDS cluster
9486 if (!_any_stale_sessions()) {
9487 int r
= _getattr(quota_root
, 0, perms
, true);
9489 // Ignore return value: error getting latest inode metadata is not a good
9490 // reason to break "df".
9491 lderr(cct
) << "Error in getattr on quota root 0x"
9492 << std::hex
<< quota_root
->ino
<< std::dec
9493 << " statfs result may be outdated" << dendl
;
9497 // Special case: if there is a size quota set on the Inode acting
9498 // as the root for this client mount, then report the quota status
9499 // as the filesystem statistics.
9500 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
9501 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
9502 // It is possible for a quota to be exceeded: arithmetic here must
9503 // handle case where used > total.
9504 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
9506 stbuf
->f_blocks
= total
;
9507 stbuf
->f_bfree
= free
;
9508 stbuf
->f_bavail
= free
;
9510 // General case: report the cluster statistics returned from RADOS. Because
9511 // multiple pools may be used without one filesystem namespace via
9512 // layouts, this is the most correct thing we can do.
9513 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
9514 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9515 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9521 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
9522 struct flock
*fl
, uint64_t owner
, bool removing
)
9524 ldout(cct
, 10) << "_do_filelock ino " << in
->ino
9525 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
9526 << " type " << fl
->l_type
<< " owner " << owner
9527 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
9530 if (F_RDLCK
== fl
->l_type
)
9531 lock_cmd
= CEPH_LOCK_SHARED
;
9532 else if (F_WRLCK
== fl
->l_type
)
9533 lock_cmd
= CEPH_LOCK_EXCL
;
9534 else if (F_UNLCK
== fl
->l_type
)
9535 lock_cmd
= CEPH_LOCK_UNLOCK
;
9539 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
9543 * Set the most significant bit, so that MDS knows the 'owner'
9544 * is sufficient to identify the owner of lock. (old code uses
9545 * both 'owner' and 'pid')
9547 owner
|= (1ULL << 63);
9549 MetaRequest
*req
= new MetaRequest(op
);
9551 in
->make_nosnap_relative_path(path
);
9552 req
->set_filepath(path
);
9555 req
->head
.args
.filelock_change
.rule
= lock_type
;
9556 req
->head
.args
.filelock_change
.type
= lock_cmd
;
9557 req
->head
.args
.filelock_change
.owner
= owner
;
9558 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
9559 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
9560 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
9561 req
->head
.args
.filelock_change
.wait
= sleep
;
9566 if (sleep
&& switch_interrupt_cb
) {
9568 switch_interrupt_cb(callback_handle
, req
->get());
9569 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9570 // disable interrupt
9571 switch_interrupt_cb(callback_handle
, NULL
);
9572 if (ret
== 0 && req
->aborted()) {
9573 // effect of this lock request has been revoked by the 'lock intr' request
9574 ret
= req
->get_abort_code();
9578 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9582 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
9583 ceph_filelock filelock
;
9584 bufferlist::iterator p
= bl
.begin();
9585 ::decode(filelock
, p
);
9587 if (CEPH_LOCK_SHARED
== filelock
.type
)
9588 fl
->l_type
= F_RDLCK
;
9589 else if (CEPH_LOCK_EXCL
== filelock
.type
)
9590 fl
->l_type
= F_WRLCK
;
9592 fl
->l_type
= F_UNLCK
;
9594 fl
->l_whence
= SEEK_SET
;
9595 fl
->l_start
= filelock
.start
;
9596 fl
->l_len
= filelock
.length
;
9597 fl
->l_pid
= filelock
.pid
;
9598 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
9599 ceph_lock_state_t
*lock_state
;
9600 if (lock_type
== CEPH_LOCK_FCNTL
) {
9601 if (!in
->fcntl_locks
)
9602 in
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9603 lock_state
= in
->fcntl_locks
;
9604 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
9605 if (!in
->flock_locks
)
9606 in
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9607 lock_state
= in
->flock_locks
;
9612 _update_lock_state(fl
, owner
, lock_state
);
9615 if (lock_type
== CEPH_LOCK_FCNTL
) {
9616 if (!fh
->fcntl_locks
)
9617 fh
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9618 lock_state
= fh
->fcntl_locks
;
9620 if (!fh
->flock_locks
)
9621 fh
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9622 lock_state
= fh
->flock_locks
;
9624 _update_lock_state(fl
, owner
, lock_state
);
9632 int Client::_interrupt_filelock(MetaRequest
*req
)
9634 // Set abort code, but do not kick. The abort code prevents the request
9635 // from being re-sent.
9638 return 0; // haven't sent the request
9640 Inode
*in
= req
->inode();
9643 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
9644 lock_type
= CEPH_LOCK_FLOCK_INTR
;
9645 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
9646 lock_type
= CEPH_LOCK_FCNTL_INTR
;
9652 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
9654 in
->make_nosnap_relative_path(path
);
9655 intr_req
->set_filepath(path
);
9656 intr_req
->set_inode(in
);
9657 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
9658 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
9659 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
9661 UserPerm
perms(req
->get_uid(), req
->get_gid());
9662 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
9665 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
9667 if (!in
->fcntl_locks
&& !in
->flock_locks
)
9670 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
9671 ::encode(nr_fcntl_locks
, bl
);
9672 if (nr_fcntl_locks
) {
9673 ceph_lock_state_t
* lock_state
= in
->fcntl_locks
;
9674 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9675 p
!= lock_state
->held_locks
.end();
9677 ::encode(p
->second
, bl
);
9680 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
9681 ::encode(nr_flock_locks
, bl
);
9682 if (nr_flock_locks
) {
9683 ceph_lock_state_t
* lock_state
= in
->flock_locks
;
9684 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9685 p
!= lock_state
->held_locks
.end();
9687 ::encode(p
->second
, bl
);
9690 ldout(cct
, 10) << "_encode_filelocks ino " << in
->ino
<< ", " << nr_fcntl_locks
9691 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
9694 void Client::_release_filelocks(Fh
*fh
)
9696 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
9699 Inode
*in
= fh
->inode
.get();
9700 ldout(cct
, 10) << "_release_filelocks " << fh
<< " ino " << in
->ino
<< dendl
;
9702 list
<pair
<int, ceph_filelock
> > to_release
;
9704 if (fh
->fcntl_locks
) {
9705 ceph_lock_state_t
* lock_state
= fh
->fcntl_locks
;
9706 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9707 p
!= lock_state
->held_locks
.end();
9709 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, p
->second
));
9710 delete fh
->fcntl_locks
;
9712 if (fh
->flock_locks
) {
9713 ceph_lock_state_t
* lock_state
= fh
->flock_locks
;
9714 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9715 p
!= lock_state
->held_locks
.end();
9717 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, p
->second
));
9718 delete fh
->flock_locks
;
9721 if (to_release
.empty())
9725 memset(&fl
, 0, sizeof(fl
));
9726 fl
.l_whence
= SEEK_SET
;
9727 fl
.l_type
= F_UNLCK
;
9729 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
9730 p
!= to_release
.end();
9732 fl
.l_start
= p
->second
.start
;
9733 fl
.l_len
= p
->second
.length
;
9734 fl
.l_pid
= p
->second
.pid
;
9735 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
9736 p
->second
.owner
, true);
9740 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
9741 ceph_lock_state_t
*lock_state
)
9744 if (F_RDLCK
== fl
->l_type
)
9745 lock_cmd
= CEPH_LOCK_SHARED
;
9746 else if (F_WRLCK
== fl
->l_type
)
9747 lock_cmd
= CEPH_LOCK_EXCL
;
9749 lock_cmd
= CEPH_LOCK_UNLOCK
;;
9751 ceph_filelock filelock
;
9752 filelock
.start
= fl
->l_start
;
9753 filelock
.length
= fl
->l_len
;
9754 filelock
.client
= 0;
9755 // see comment in _do_filelock()
9756 filelock
.owner
= owner
| (1ULL << 63);
9757 filelock
.pid
= fl
->l_pid
;
9758 filelock
.type
= lock_cmd
;
9760 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
9761 list
<ceph_filelock
> activated_locks
;
9762 lock_state
->remove_lock(filelock
, activated_locks
);
9764 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
9769 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
9771 Inode
*in
= fh
->inode
.get();
9772 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
9773 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
9777 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
9779 Inode
*in
= fh
->inode
.get();
9780 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
9781 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
9782 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
9786 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
9788 Inode
*in
= fh
->inode
.get();
9789 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
9791 int sleep
= !(cmd
& LOCK_NB
);
9810 memset(&fl
, 0, sizeof(fl
));
9812 fl
.l_whence
= SEEK_SET
;
9814 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
9815 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
9819 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
9821 /* Since the only thing this does is wrap a call to statfs, and
9822 statfs takes a lock, it doesn't seem we have a need to split it
9824 return statfs(0, stbuf
, perms
);
9827 void Client::ll_register_callbacks(struct client_callback_args
*args
)
9831 Mutex::Locker
l(client_lock
);
9832 ldout(cct
, 10) << "ll_register_callbacks cb " << args
->handle
9833 << " invalidate_ino_cb " << args
->ino_cb
9834 << " invalidate_dentry_cb " << args
->dentry_cb
9835 << " getgroups_cb" << args
->getgroups_cb
9836 << " switch_interrupt_cb " << args
->switch_intr_cb
9837 << " remount_cb " << args
->remount_cb
9839 callback_handle
= args
->handle
;
9841 ino_invalidate_cb
= args
->ino_cb
;
9842 async_ino_invalidator
.start();
9844 if (args
->dentry_cb
) {
9845 dentry_invalidate_cb
= args
->dentry_cb
;
9846 async_dentry_invalidator
.start();
9848 if (args
->switch_intr_cb
) {
9849 switch_interrupt_cb
= args
->switch_intr_cb
;
9850 interrupt_finisher
.start();
9852 if (args
->remount_cb
) {
9853 remount_cb
= args
->remount_cb
;
9854 remount_finisher
.start();
9856 getgroups_cb
= args
->getgroups_cb
;
9857 umask_cb
= args
->umask_cb
;
9860 int Client::test_dentry_handling(bool can_invalidate
)
9864 can_invalidate_dentries
= can_invalidate
;
9866 if (can_invalidate_dentries
) {
9867 assert(dentry_invalidate_cb
);
9868 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
9869 } else if (remount_cb
) {
9870 ldout(cct
, 1) << "using remount_cb" << dendl
;
9871 int s
= remount_cb(callback_handle
);
9873 lderr(cct
) << "Failed to invoke remount, needed to ensure kernel dcache consistency"
9876 if (cct
->_conf
->client_die_on_failed_remount
) {
9877 require_remount
= true;
9881 lderr(cct
) << "no method to invalidate kernel dentry cache; expect issues!" << dendl
;
9882 if (cct
->_conf
->client_die_on_failed_remount
)
9888 int Client::_sync_fs()
9890 ldout(cct
, 10) << "_sync_fs" << dendl
;
9893 Mutex
lock("Client::_fsync::lock");
9895 bool flush_done
= false;
9896 if (cct
->_conf
->client_oc
)
9897 objectcacher
->flush_all(new C_SafeCond(&lock
, &cond
, &flush_done
));
9903 ceph_tid_t flush_tid
= last_flush_tid
;
9905 // wait for unsafe mds requests
9906 wait_unsafe_requests();
9908 wait_sync_caps(flush_tid
);
9911 client_lock
.Unlock();
9913 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9923 int Client::sync_fs()
9925 Mutex::Locker
l(client_lock
);
9929 int64_t Client::drop_caches()
9931 Mutex::Locker
l(client_lock
);
9932 return objectcacher
->release_all();
9936 int Client::lazyio_propogate(int fd
, loff_t offset
, size_t count
)
9938 Mutex::Locker
l(client_lock
);
9939 ldout(cct
, 3) << "op: client->lazyio_propogate(" << fd
9940 << ", " << offset
<< ", " << count
<< ")" << dendl
;
9942 Fh
*f
= get_filehandle(fd
);
9952 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
9954 Mutex::Locker
l(client_lock
);
9955 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
9956 << ", " << offset
<< ", " << count
<< ")" << dendl
;
9958 Fh
*f
= get_filehandle(fd
);
9961 Inode
*in
= f
->inode
.get();
9970 // =============================
9973 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
)
9975 Mutex::Locker
l(client_lock
);
9976 filepath
path(relpath
);
9978 int r
= path_walk(path
, &in
, perm
);
9981 if (cct
->_conf
->client_permissions
) {
9982 r
= may_create(in
.get(), perm
);
9986 Inode
*snapdir
= open_snapdir(in
.get());
9987 return _mkdir(snapdir
, name
, 0, perm
);
9989 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
)
9991 Mutex::Locker
l(client_lock
);
9992 filepath
path(relpath
);
9994 int r
= path_walk(path
, &in
, perms
);
9997 if (cct
->_conf
->client_permissions
) {
9998 r
= may_delete(in
.get(), NULL
, perms
);
10002 Inode
*snapdir
= open_snapdir(in
.get());
10003 return _rmdir(snapdir
, name
, perms
);
10006 // =============================
10009 int Client::get_caps_issued(int fd
) {
10011 Mutex::Locker
lock(client_lock
);
10013 Fh
*f
= get_filehandle(fd
);
10017 return f
->inode
->caps_issued();
10020 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
10022 Mutex::Locker
lock(client_lock
);
10025 int r
= path_walk(p
, &in
, perms
, true);
10028 return in
->caps_issued();
10031 // =========================================
10034 Inode
*Client::open_snapdir(Inode
*diri
)
10037 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
10038 if (!inode_map
.count(vino
)) {
10039 in
= new Inode(this, vino
, &diri
->layout
);
10041 in
->ino
= diri
->ino
;
10042 in
->snapid
= CEPH_SNAPDIR
;
10043 in
->mode
= diri
->mode
;
10044 in
->uid
= diri
->uid
;
10045 in
->gid
= diri
->gid
;
10046 in
->mtime
= diri
->mtime
;
10047 in
->ctime
= diri
->ctime
;
10048 in
->btime
= diri
->btime
;
10049 in
->size
= diri
->size
;
10050 in
->change_attr
= diri
->change_attr
;
10052 in
->dirfragtree
.clear();
10053 in
->snapdir_parent
= diri
;
10054 diri
->flags
|= I_SNAPDIR_OPEN
;
10055 inode_map
[vino
] = in
;
10056 if (use_faked_inos())
10057 _assign_faked_ino(in
);
10058 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
10060 in
= inode_map
[vino
];
10061 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
10066 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
10067 Inode
**out
, const UserPerm
& perms
)
10069 Mutex::Locker
lock(client_lock
);
10070 vinodeno_t vparent
= _get_vino(parent
);
10071 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
<< dendl
;
10072 tout(cct
) << "ll_lookup" << std::endl
;
10073 tout(cct
) << name
<< std::endl
;
10076 if (!cct
->_conf
->fuse_default_permissions
) {
10077 r
= may_lookup(parent
, perms
);
10082 string
dname(name
);
10085 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
10092 fill_stat(in
, attr
);
10096 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
10097 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
10098 tout(cct
) << attr
->st_ino
<< std::endl
;
10103 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
10104 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
10105 const UserPerm
& perms
)
10107 Mutex::Locker
lock(client_lock
);
10108 vinodeno_t vparent
= _get_vino(parent
);
10109 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
<< dendl
;
10110 tout(cct
) << "ll_lookupx" << std::endl
;
10111 tout(cct
) << name
<< std::endl
;
10114 if (!cct
->_conf
->fuse_default_permissions
) {
10115 r
= may_lookup(parent
, perms
);
10120 string
dname(name
);
10123 unsigned mask
= statx_to_mask(flags
, want
);
10124 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
10130 fill_statx(in
, mask
, stx
);
10134 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
10135 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
10136 tout(cct
) << stx
->stx_ino
<< std::endl
;
10141 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
10142 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
10144 Mutex::Locker
lock(client_lock
);
10145 filepath
fp(name
, 0);
10148 unsigned mask
= statx_to_mask(flags
, want
);
10150 ldout(cct
, 3) << "ll_walk" << name
<< dendl
;
10151 tout(cct
) << "ll_walk" << std::endl
;
10152 tout(cct
) << name
<< std::endl
;
10154 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
10156 /* zero out mask, just in case... */
10163 fill_statx(in
, mask
, stx
);
10170 void Client::_ll_get(Inode
*in
)
10172 if (in
->ll_ref
== 0) {
10174 if (in
->is_dir() && !in
->dn_set
.empty()) {
10175 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10176 in
->get_first_parent()->get(); // pin dentry
10180 ldout(cct
, 20) << "_ll_get " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
10183 int Client::_ll_put(Inode
*in
, int num
)
10186 ldout(cct
, 20) << "_ll_put " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
10187 if (in
->ll_ref
== 0) {
10188 if (in
->is_dir() && !in
->dn_set
.empty()) {
10189 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10190 in
->get_first_parent()->put(); // unpin dentry
10199 void Client::_ll_drop_pins()
10201 ldout(cct
, 10) << "_ll_drop_pins" << dendl
;
10202 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
10203 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
10204 it
!= inode_map
.end();
10206 Inode
*in
= it
->second
;
10210 _ll_put(in
, in
->ll_ref
);
10214 bool Client::ll_forget(Inode
*in
, int count
)
10216 Mutex::Locker
lock(client_lock
);
10217 inodeno_t ino
= _get_inodeno(in
);
10219 ldout(cct
, 3) << "ll_forget " << ino
<< " " << count
<< dendl
;
10220 tout(cct
) << "ll_forget" << std::endl
;
10221 tout(cct
) << ino
.val
<< std::endl
;
10222 tout(cct
) << count
<< std::endl
;
10224 if (ino
== 1) return true; // ignore forget on root.
10227 if (in
->ll_ref
< count
) {
10228 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
10229 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
10230 _ll_put(in
, in
->ll_ref
);
10233 if (_ll_put(in
, count
) == 0)
10240 bool Client::ll_put(Inode
*in
)
10242 /* ll_forget already takes the lock */
10243 return ll_forget(in
, 1);
10246 snapid_t
Client::ll_get_snapid(Inode
*in
)
10248 Mutex::Locker
lock(client_lock
);
10252 Inode
*Client::ll_get_inode(ino_t ino
)
10254 Mutex::Locker
lock(client_lock
);
10255 vinodeno_t vino
= _map_faked_ino(ino
);
10256 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10257 if (p
== inode_map
.end())
10259 Inode
*in
= p
->second
;
10264 Inode
*Client::ll_get_inode(vinodeno_t vino
)
10266 Mutex::Locker
lock(client_lock
);
10267 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10268 if (p
== inode_map
.end())
10270 Inode
*in
= p
->second
;
10275 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
10277 vinodeno_t vino
= _get_vino(in
);
10279 ldout(cct
, 3) << "ll_getattr " << vino
<< dendl
;
10280 tout(cct
) << "ll_getattr" << std::endl
;
10281 tout(cct
) << vino
.ino
.val
<< std::endl
;
10283 if (vino
.snapid
< CEPH_NOSNAP
)
10286 return _getattr(in
, caps
, perms
);
10289 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
10291 Mutex::Locker
lock(client_lock
);
10293 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
10296 fill_stat(in
, attr
);
10297 ldout(cct
, 3) << "ll_getattr " << _get_vino(in
) << " = " << res
<< dendl
;
10301 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
10302 unsigned int flags
, const UserPerm
& perms
)
10304 Mutex::Locker
lock(client_lock
);
10307 unsigned mask
= statx_to_mask(flags
, want
);
10309 if (mask
&& !in
->caps_issued_mask(mask
))
10310 res
= _ll_getattr(in
, mask
, perms
);
10313 fill_statx(in
, mask
, stx
);
10314 ldout(cct
, 3) << "ll_getattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10318 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10319 const UserPerm
& perms
, InodeRef
*inp
)
10321 vinodeno_t vino
= _get_vino(in
);
10323 ldout(cct
, 3) << "ll_setattrx " << vino
<< " mask " << hex
<< mask
<< dec
10325 tout(cct
) << "ll_setattrx" << std::endl
;
10326 tout(cct
) << vino
.ino
.val
<< std::endl
;
10327 tout(cct
) << stx
->stx_mode
<< std::endl
;
10328 tout(cct
) << stx
->stx_uid
<< std::endl
;
10329 tout(cct
) << stx
->stx_gid
<< std::endl
;
10330 tout(cct
) << stx
->stx_size
<< std::endl
;
10331 tout(cct
) << stx
->stx_mtime
<< std::endl
;
10332 tout(cct
) << stx
->stx_atime
<< std::endl
;
10333 tout(cct
) << stx
->stx_btime
<< std::endl
;
10334 tout(cct
) << mask
<< std::endl
;
10336 if (!cct
->_conf
->fuse_default_permissions
) {
10337 int res
= may_setattr(in
, stx
, mask
, perms
);
10342 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
10344 return __setattrx(in
, stx
, mask
, perms
, inp
);
10347 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10348 const UserPerm
& perms
)
10350 Mutex::Locker
lock(client_lock
);
10351 InodeRef
target(in
);
10352 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
10354 assert(in
== target
.get());
10355 fill_statx(in
, in
->caps_issued(), stx
);
10358 ldout(cct
, 3) << "ll_setattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10362 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
10363 const UserPerm
& perms
)
10365 struct ceph_statx stx
;
10366 stat_to_statx(attr
, &stx
);
10368 Mutex::Locker
lock(client_lock
);
10369 InodeRef
target(in
);
10370 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
10372 assert(in
== target
.get());
10373 fill_stat(in
, attr
);
10376 ldout(cct
, 3) << "ll_setattr " << _get_vino(in
) << " = " << res
<< dendl
;
10384 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
10385 const UserPerm
& perms
)
10387 Mutex::Locker
lock(client_lock
);
10389 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10392 return _getxattr(in
, name
, value
, size
, perms
);
10395 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
10396 const UserPerm
& perms
)
10398 Mutex::Locker
lock(client_lock
);
10400 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10403 return _getxattr(in
, name
, value
, size
, perms
);
10406 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
10407 const UserPerm
& perms
)
10409 Mutex::Locker
lock(client_lock
);
10410 Fh
*f
= get_filehandle(fd
);
10413 return _getxattr(f
->inode
, name
, value
, size
, perms
);
10416 int Client::listxattr(const char *path
, char *list
, size_t size
,
10417 const UserPerm
& perms
)
10419 Mutex::Locker
lock(client_lock
);
10421 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10424 return Client::_listxattr(in
.get(), list
, size
, perms
);
10427 int Client::llistxattr(const char *path
, char *list
, size_t size
,
10428 const UserPerm
& perms
)
10430 Mutex::Locker
lock(client_lock
);
10432 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10435 return Client::_listxattr(in
.get(), list
, size
, perms
);
10438 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
10440 Mutex::Locker
lock(client_lock
);
10441 Fh
*f
= get_filehandle(fd
);
10444 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
10447 int Client::removexattr(const char *path
, const char *name
,
10448 const UserPerm
& perms
)
10450 Mutex::Locker
lock(client_lock
);
10452 int r
= Client::path_walk(path
, &in
, perms
, true);
10455 return _removexattr(in
, name
, perms
);
10458 int Client::lremovexattr(const char *path
, const char *name
,
10459 const UserPerm
& perms
)
10461 Mutex::Locker
lock(client_lock
);
10463 int r
= Client::path_walk(path
, &in
, perms
, false);
10466 return _removexattr(in
, name
, perms
);
10469 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
10471 Mutex::Locker
lock(client_lock
);
10472 Fh
*f
= get_filehandle(fd
);
10475 return _removexattr(f
->inode
, name
, perms
);
10478 int Client::setxattr(const char *path
, const char *name
, const void *value
,
10479 size_t size
, int flags
, const UserPerm
& perms
)
10481 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10483 Mutex::Locker
lock(client_lock
);
10485 int r
= Client::path_walk(path
, &in
, perms
, true);
10488 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10491 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
10492 size_t size
, int flags
, const UserPerm
& perms
)
10494 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10496 Mutex::Locker
lock(client_lock
);
10498 int r
= Client::path_walk(path
, &in
, perms
, false);
10501 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10504 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
10505 int flags
, const UserPerm
& perms
)
10507 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10509 Mutex::Locker
lock(client_lock
);
10510 Fh
*f
= get_filehandle(fd
);
10513 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
10516 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
10517 const UserPerm
& perms
)
10521 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10525 // Do a force getattr to get the latest quota before returning
10526 // a value to userspace.
10527 r
= _getattr(in
, 0, perms
, true);
10529 // Error from getattr!
10533 // call pointer-to-member function
10535 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
10536 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
10542 if (r
> (int)size
) {
10544 } else if (r
> 0) {
10545 memcpy(value
, buf
, r
);
10551 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
10556 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10560 if (in
->xattrs
.count(n
)) {
10561 r
= in
->xattrs
[n
].length();
10562 if (r
> 0 && size
!= 0) {
10563 if (size
>= (unsigned)r
)
10564 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
10571 ldout(cct
, 3) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
10575 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
10576 const UserPerm
& perms
)
10578 if (cct
->_conf
->client_permissions
) {
10579 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
10583 return _getxattr(in
.get(), name
, value
, size
, perms
);
10586 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
10587 size_t size
, const UserPerm
& perms
)
10589 Mutex::Locker
lock(client_lock
);
10591 vinodeno_t vino
= _get_vino(in
);
10593 ldout(cct
, 3) << "ll_getxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10594 tout(cct
) << "ll_getxattr" << std::endl
;
10595 tout(cct
) << vino
.ino
.val
<< std::endl
;
10596 tout(cct
) << name
<< std::endl
;
10598 if (!cct
->_conf
->fuse_default_permissions
) {
10599 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
10604 return _getxattr(in
, name
, value
, size
, perms
);
10607 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
10608 const UserPerm
& perms
)
10610 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10612 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10613 p
!= in
->xattrs
.end();
10615 r
+= p
->first
.length() + 1;
10617 const VXattr
*vxattrs
= _get_vxattrs(in
);
10618 r
+= _vxattrs_name_size(vxattrs
);
10621 if (size
>= (unsigned)r
) {
10622 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10623 p
!= in
->xattrs
.end();
10625 memcpy(name
, p
->first
.c_str(), p
->first
.length());
10626 name
+= p
->first
.length();
10631 for (int i
= 0; !vxattrs
[i
].name
.empty(); i
++) {
10632 const VXattr
& vxattr
= vxattrs
[i
];
10635 // call pointer-to-member function
10636 if(vxattr
.exists_cb
&& !(this->*(vxattr
.exists_cb
))(in
))
10638 memcpy(name
, vxattr
.name
.c_str(), vxattr
.name
.length());
10639 name
+= vxattr
.name
.length();
10648 ldout(cct
, 3) << "_listxattr(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
10652 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
10653 const UserPerm
& perms
)
10655 Mutex::Locker
lock(client_lock
);
10657 vinodeno_t vino
= _get_vino(in
);
10659 ldout(cct
, 3) << "ll_listxattr " << vino
<< " size " << size
<< dendl
;
10660 tout(cct
) << "ll_listxattr" << std::endl
;
10661 tout(cct
) << vino
.ino
.val
<< std::endl
;
10662 tout(cct
) << size
<< std::endl
;
10664 return _listxattr(in
, names
, size
, perms
);
10667 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
10668 size_t size
, int flags
, const UserPerm
& perms
)
10671 int xattr_flags
= 0;
10673 xattr_flags
|= CEPH_XATTR_REMOVE
;
10674 if (flags
& XATTR_CREATE
)
10675 xattr_flags
|= CEPH_XATTR_CREATE
;
10676 if (flags
& XATTR_REPLACE
)
10677 xattr_flags
|= CEPH_XATTR_REPLACE
;
10679 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
10681 in
->make_nosnap_relative_path(path
);
10682 req
->set_filepath(path
);
10683 req
->set_string2(name
);
10684 req
->set_inode(in
);
10685 req
->head
.args
.setxattr
.flags
= xattr_flags
;
10688 bl
.append((const char*)value
, size
);
10691 int res
= make_request(req
, perms
);
10694 ldout(cct
, 3) << "_setxattr(" << in
->ino
<< ", \"" << name
<< "\") = " <<
10699 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
10700 size_t size
, int flags
, const UserPerm
& perms
)
10702 if (in
->snapid
!= CEPH_NOSNAP
) {
10706 bool posix_acl_xattr
= false;
10707 if (acl_type
== POSIX_ACL
)
10708 posix_acl_xattr
= !strncmp(name
, "system.", 7);
10710 if (strncmp(name
, "user.", 5) &&
10711 strncmp(name
, "security.", 9) &&
10712 strncmp(name
, "trusted.", 8) &&
10713 strncmp(name
, "ceph.", 5) &&
10715 return -EOPNOTSUPP
;
10717 if (posix_acl_xattr
) {
10718 if (!strcmp(name
, ACL_EA_ACCESS
)) {
10719 mode_t new_mode
= in
->mode
;
10721 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
10728 if (new_mode
!= in
->mode
) {
10729 struct ceph_statx stx
;
10730 stx
.stx_mode
= new_mode
;
10731 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
10736 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
10738 if (!S_ISDIR(in
->mode
))
10740 int ret
= posix_acl_check(value
, size
);
10749 return -EOPNOTSUPP
;
10752 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10753 if (vxattr
&& vxattr
->readonly
)
10754 return -EOPNOTSUPP
;
10757 return _do_setxattr(in
, name
, value
, size
, flags
, perms
);
10760 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
10761 size_t size
, int flags
, const UserPerm
& perms
)
10763 if (cct
->_conf
->client_permissions
) {
10764 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
10768 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
10771 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
10774 if (name
== "layout") {
10775 string::iterator begin
= value
.begin();
10776 string::iterator end
= value
.end();
10777 keys_and_values
<string::iterator
> p
; // create instance of parser
10778 std::map
<string
, string
> m
; // map to receive results
10779 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
10784 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
10785 if (q
->first
== "pool") {
10790 } else if (name
== "layout.pool") {
10794 if (tmp
.length()) {
10797 pool
= boost::lexical_cast
<unsigned>(tmp
);
10798 if (!osdmap
->have_pg_pool(pool
))
10800 } catch (boost::bad_lexical_cast
const&) {
10801 pool
= osdmap
->lookup_pg_pool_name(tmp
);
10811 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
10813 // For setting pool of layout, MetaRequest need osdmap epoch.
10814 // There is a race which create a new data pool but client and mds both don't have.
10815 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
10816 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
10817 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
10818 string
rest(strstr(name
, "layout"));
10819 string
v((const char*)value
, size
);
10820 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
10821 return _setxattr_check_data_pool(rest
, v
, &o
);
10824 if (r
== -ENOENT
) {
10826 objecter
->wait_for_latest_osdmap(&ctx
);
10832 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
10833 size_t size
, int flags
, const UserPerm
& perms
)
10835 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10837 Mutex::Locker
lock(client_lock
);
10839 vinodeno_t vino
= _get_vino(in
);
10841 ldout(cct
, 3) << "ll_setxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10842 tout(cct
) << "ll_setxattr" << std::endl
;
10843 tout(cct
) << vino
.ino
.val
<< std::endl
;
10844 tout(cct
) << name
<< std::endl
;
10846 if (!cct
->_conf
->fuse_default_permissions
) {
10847 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
10851 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10854 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
10856 if (in
->snapid
!= CEPH_NOSNAP
) {
10860 // same xattrs supported by kernel client
10861 if (strncmp(name
, "user.", 5) &&
10862 strncmp(name
, "system.", 7) &&
10863 strncmp(name
, "security.", 9) &&
10864 strncmp(name
, "trusted.", 8) &&
10865 strncmp(name
, "ceph.", 5))
10866 return -EOPNOTSUPP
;
10868 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10869 if (vxattr
&& vxattr
->readonly
)
10870 return -EOPNOTSUPP
;
10872 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
10874 in
->make_nosnap_relative_path(path
);
10875 req
->set_filepath(path
);
10876 req
->set_filepath2(name
);
10877 req
->set_inode(in
);
10879 int res
= make_request(req
, perms
);
10882 ldout(cct
, 3) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
10886 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
10888 if (cct
->_conf
->client_permissions
) {
10889 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
10893 return _removexattr(in
.get(), name
, perms
);
10896 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
10898 Mutex::Locker
lock(client_lock
);
10900 vinodeno_t vino
= _get_vino(in
);
10902 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
10903 tout(cct
) << "ll_removexattr" << std::endl
;
10904 tout(cct
) << vino
.ino
.val
<< std::endl
;
10905 tout(cct
) << name
<< std::endl
;
10907 if (!cct
->_conf
->fuse_default_permissions
) {
10908 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
10913 return _removexattr(in
, name
, perms
);
10916 bool Client::_vxattrcb_quota_exists(Inode
*in
)
10918 return in
->quota
.is_enable();
10920 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
10922 return snprintf(val
, size
,
10923 "max_bytes=%lld max_files=%lld",
10924 (long long int)in
->quota
.max_bytes
,
10925 (long long int)in
->quota
.max_files
);
10927 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
10929 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
10931 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
10933 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
10936 bool Client::_vxattrcb_layout_exists(Inode
*in
)
10938 return in
->layout
!= file_layout_t();
10940 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
10942 int r
= snprintf(val
, size
,
10943 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
10944 (unsigned long long)in
->layout
.stripe_unit
,
10945 (unsigned long long)in
->layout
.stripe_count
,
10946 (unsigned long long)in
->layout
.object_size
);
10947 objecter
->with_osdmap([&](const OSDMap
& o
) {
10948 if (o
.have_pg_pool(in
->layout
.pool_id
))
10949 r
+= snprintf(val
+ r
, size
- r
, "%s",
10950 o
.get_pool_name(in
->layout
.pool_id
).c_str());
10952 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
10953 (uint64_t)in
->layout
.pool_id
);
10955 if (in
->layout
.pool_ns
.length())
10956 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
10957 in
->layout
.pool_ns
.c_str());
10960 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
10962 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_unit
);
10964 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
10966 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_count
);
10968 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
10970 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.object_size
);
10972 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
10975 objecter
->with_osdmap([&](const OSDMap
& o
) {
10976 if (o
.have_pg_pool(in
->layout
.pool_id
))
10977 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
10978 in
->layout
.pool_id
).c_str());
10980 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
10984 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
10986 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
10988 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
10990 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
10992 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
10994 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nfiles
);
10996 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
10998 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nsubdirs
);
11000 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
11002 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
11004 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
11006 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rfiles
);
11008 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
11010 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rsubdirs
);
11012 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
11014 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rbytes
);
11016 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
11018 return snprintf(val
, size
, "%ld.09%ld", (long)in
->rstat
.rctime
.sec(),
11019 (long)in
->rstat
.rctime
.nsec());
11022 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
11023 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
11025 #define XATTR_NAME_CEPH(_type, _name) \
11027 name: CEPH_XATTR_NAME(_type, _name), \
11028 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11033 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
11035 name: CEPH_XATTR_NAME2(_type, _name, _field), \
11036 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
11039 exists_cb: &Client::_vxattrcb_layout_exists, \
11041 #define XATTR_QUOTA_FIELD(_type, _name) \
11043 name: CEPH_XATTR_NAME(_type, _name), \
11044 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11047 exists_cb: &Client::_vxattrcb_quota_exists, \
11050 const Client::VXattr
Client::_dir_vxattrs
[] = {
11052 name
: "ceph.dir.layout",
11053 getxattr_cb
: &Client::_vxattrcb_layout
,
11056 exists_cb
: &Client::_vxattrcb_layout_exists
,
11058 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
11059 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
11060 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
11061 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
11062 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
11063 XATTR_NAME_CEPH(dir
, entries
),
11064 XATTR_NAME_CEPH(dir
, files
),
11065 XATTR_NAME_CEPH(dir
, subdirs
),
11066 XATTR_NAME_CEPH(dir
, rentries
),
11067 XATTR_NAME_CEPH(dir
, rfiles
),
11068 XATTR_NAME_CEPH(dir
, rsubdirs
),
11069 XATTR_NAME_CEPH(dir
, rbytes
),
11070 XATTR_NAME_CEPH(dir
, rctime
),
11072 name
: "ceph.quota",
11073 getxattr_cb
: &Client::_vxattrcb_quota
,
11076 exists_cb
: &Client::_vxattrcb_quota_exists
,
11078 XATTR_QUOTA_FIELD(quota
, max_bytes
),
11079 XATTR_QUOTA_FIELD(quota
, max_files
),
11080 { name
: "" } /* Required table terminator */
11083 const Client::VXattr
Client::_file_vxattrs
[] = {
11085 name
: "ceph.file.layout",
11086 getxattr_cb
: &Client::_vxattrcb_layout
,
11089 exists_cb
: &Client::_vxattrcb_layout_exists
,
11091 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
11092 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
11093 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
11094 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
11095 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
11096 { name
: "" } /* Required table terminator */
11099 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
11102 return _dir_vxattrs
;
11103 else if (in
->is_file())
11104 return _file_vxattrs
;
11108 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
11110 if (strncmp(name
, "ceph.", 5) == 0) {
11111 const VXattr
*vxattr
= _get_vxattrs(in
);
11113 while (!vxattr
->name
.empty()) {
11114 if (vxattr
->name
== name
)
11123 size_t Client::_vxattrs_calcu_name_size(const VXattr
*vxattr
)
11126 while (!vxattr
->name
.empty()) {
11127 if (!vxattr
->hidden
)
11128 len
+= vxattr
->name
.length() + 1;
11134 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
11136 Mutex::Locker
lock(client_lock
);
11138 vinodeno_t vino
= _get_vino(in
);
11140 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
11141 tout(cct
) << "ll_readlink" << std::endl
;
11142 tout(cct
) << vino
.ino
.val
<< std::endl
;
11144 set
<Dentry
*>::iterator dn
= in
->dn_set
.begin();
11145 while (dn
!= in
->dn_set
.end()) {
11150 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
11151 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
11155 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
11156 const UserPerm
& perms
, InodeRef
*inp
)
11158 ldout(cct
, 3) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
11159 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
11160 << ", gid " << perms
.gid() << ")" << dendl
;
11162 if (strlen(name
) > NAME_MAX
)
11163 return -ENAMETOOLONG
;
11165 if (dir
->snapid
!= CEPH_NOSNAP
) {
11168 if (is_quota_files_exceeded(dir
, perms
)) {
11172 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
11175 dir
->make_nosnap_relative_path(path
);
11176 path
.push_dentry(name
);
11177 req
->set_filepath(path
);
11178 req
->set_inode(dir
);
11179 req
->head
.args
.mknod
.rdev
= rdev
;
11180 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11181 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11183 bufferlist xattrs_bl
;
11184 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11187 req
->head
.args
.mknod
.mode
= mode
;
11188 if (xattrs_bl
.length() > 0)
11189 req
->set_data(xattrs_bl
);
11192 res
= get_or_create(dir
, name
, &de
);
11195 req
->set_dentry(de
);
11197 res
= make_request(req
, perms
, inp
);
11201 ldout(cct
, 3) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11209 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
11210 dev_t rdev
, struct stat
*attr
, Inode
**out
,
11211 const UserPerm
& perms
)
11213 Mutex::Locker
lock(client_lock
);
11215 vinodeno_t vparent
= _get_vino(parent
);
11217 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
11218 tout(cct
) << "ll_mknod" << std::endl
;
11219 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11220 tout(cct
) << name
<< std::endl
;
11221 tout(cct
) << mode
<< std::endl
;
11222 tout(cct
) << rdev
<< std::endl
;
11224 if (!cct
->_conf
->fuse_default_permissions
) {
11225 int r
= may_create(parent
, perms
);
11231 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11233 fill_stat(in
, attr
);
11236 tout(cct
) << attr
->st_ino
<< std::endl
;
11237 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
11238 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11243 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
11244 dev_t rdev
, Inode
**out
,
11245 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11246 const UserPerm
& perms
)
11248 unsigned caps
= statx_to_mask(flags
, want
);
11249 Mutex::Locker
lock(client_lock
);
11251 vinodeno_t vparent
= _get_vino(parent
);
11253 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
11254 tout(cct
) << "ll_mknodx" << std::endl
;
11255 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11256 tout(cct
) << name
<< std::endl
;
11257 tout(cct
) << mode
<< std::endl
;
11258 tout(cct
) << rdev
<< std::endl
;
11260 if (!cct
->_conf
->fuse_default_permissions
) {
11261 int r
= may_create(parent
, perms
);
11267 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11269 fill_statx(in
, caps
, stx
);
11272 tout(cct
) << stx
->stx_ino
<< std::endl
;
11273 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
11274 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11279 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
11280 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
11281 int object_size
, const char *data_pool
, bool *created
,
11282 const UserPerm
& perms
)
11284 ldout(cct
, 3) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
11285 mode
<< dec
<< ")" << dendl
;
11287 if (strlen(name
) > NAME_MAX
)
11288 return -ENAMETOOLONG
;
11289 if (dir
->snapid
!= CEPH_NOSNAP
) {
11292 if (is_quota_files_exceeded(dir
, perms
)) {
11296 // use normalized flags to generate cmode
11297 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
11301 int64_t pool_id
= -1;
11302 if (data_pool
&& *data_pool
) {
11303 pool_id
= objecter
->with_osdmap(
11304 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
11307 if (pool_id
> 0xffffffffll
)
11308 return -ERANGE
; // bummer!
11311 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
11314 dir
->make_nosnap_relative_path(path
);
11315 path
.push_dentry(name
);
11316 req
->set_filepath(path
);
11317 req
->set_inode(dir
);
11318 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
| O_CREAT
);
11320 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
11321 req
->head
.args
.open
.stripe_count
= stripe_count
;
11322 req
->head
.args
.open
.object_size
= object_size
;
11323 if (cct
->_conf
->client_debug_getattr_caps
)
11324 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
11326 req
->head
.args
.open
.mask
= 0;
11327 req
->head
.args
.open
.pool
= pool_id
;
11328 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11329 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11332 bufferlist xattrs_bl
;
11333 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11336 req
->head
.args
.open
.mode
= mode
;
11337 if (xattrs_bl
.length() > 0)
11338 req
->set_data(xattrs_bl
);
11341 res
= get_or_create(dir
, name
, &de
);
11344 req
->set_dentry(de
);
11346 res
= make_request(req
, perms
, inp
, created
);
11351 /* If the caller passed a value in fhp, do the open */
11353 (*inp
)->get_open_ref(cmode
);
11354 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
11360 ldout(cct
, 3) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
11361 << " layout " << stripe_unit
11362 << ' ' << stripe_count
11363 << ' ' << object_size
11364 <<") = " << res
<< dendl
;
11373 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
11376 ldout(cct
, 3) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
11377 << mode
<< dec
<< ", uid " << perm
.uid()
11378 << ", gid " << perm
.gid() << ")" << dendl
;
11380 if (strlen(name
) > NAME_MAX
)
11381 return -ENAMETOOLONG
;
11383 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11386 if (is_quota_files_exceeded(dir
, perm
)) {
11389 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
?
11390 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
11393 dir
->make_nosnap_relative_path(path
);
11394 path
.push_dentry(name
);
11395 req
->set_filepath(path
);
11396 req
->set_inode(dir
);
11397 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11398 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11401 bufferlist xattrs_bl
;
11402 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perm
);
11405 req
->head
.args
.mkdir
.mode
= mode
;
11406 if (xattrs_bl
.length() > 0)
11407 req
->set_data(xattrs_bl
);
11410 res
= get_or_create(dir
, name
, &de
);
11413 req
->set_dentry(de
);
11415 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
11416 res
= make_request(req
, perm
, inp
);
11417 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
11421 ldout(cct
, 3) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11429 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
11430 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
11432 Mutex::Locker
lock(client_lock
);
11434 vinodeno_t vparent
= _get_vino(parent
);
11436 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
11437 tout(cct
) << "ll_mkdir" << std::endl
;
11438 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11439 tout(cct
) << name
<< std::endl
;
11440 tout(cct
) << mode
<< std::endl
;
11442 if (!cct
->_conf
->fuse_default_permissions
) {
11443 int r
= may_create(parent
, perm
);
11449 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
11451 fill_stat(in
, attr
);
11454 tout(cct
) << attr
->st_ino
<< std::endl
;
11455 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
11456 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11461 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
11462 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11463 const UserPerm
& perms
)
11465 Mutex::Locker
lock(client_lock
);
11467 vinodeno_t vparent
= _get_vino(parent
);
11469 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
11470 tout(cct
) << "ll_mkdirx" << std::endl
;
11471 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11472 tout(cct
) << name
<< std::endl
;
11473 tout(cct
) << mode
<< std::endl
;
11475 if (!cct
->_conf
->fuse_default_permissions
) {
11476 int r
= may_create(parent
, perms
);
11482 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
11484 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11490 tout(cct
) << stx
->stx_ino
<< std::endl
;
11491 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
11492 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11497 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
11498 const UserPerm
& perms
, InodeRef
*inp
)
11500 ldout(cct
, 3) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
11501 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
11504 if (strlen(name
) > NAME_MAX
)
11505 return -ENAMETOOLONG
;
11507 if (dir
->snapid
!= CEPH_NOSNAP
) {
11510 if (is_quota_files_exceeded(dir
, perms
)) {
11514 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
11517 dir
->make_nosnap_relative_path(path
);
11518 path
.push_dentry(name
);
11519 req
->set_filepath(path
);
11520 req
->set_inode(dir
);
11521 req
->set_string2(target
);
11522 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11523 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11526 int res
= get_or_create(dir
, name
, &de
);
11529 req
->set_dentry(de
);
11531 res
= make_request(req
, perms
, inp
);
11534 ldout(cct
, 3) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
11543 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
11544 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
11546 Mutex::Locker
lock(client_lock
);
11548 vinodeno_t vparent
= _get_vino(parent
);
11550 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
11552 tout(cct
) << "ll_symlink" << std::endl
;
11553 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11554 tout(cct
) << name
<< std::endl
;
11555 tout(cct
) << value
<< std::endl
;
11557 if (!cct
->_conf
->fuse_default_permissions
) {
11558 int r
= may_create(parent
, perms
);
11564 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11566 fill_stat(in
, attr
);
11569 tout(cct
) << attr
->st_ino
<< std::endl
;
11570 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
11571 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11576 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
11577 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
11578 unsigned flags
, const UserPerm
& perms
)
11580 Mutex::Locker
lock(client_lock
);
11582 vinodeno_t vparent
= _get_vino(parent
);
11584 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
11586 tout(cct
) << "ll_symlinkx" << std::endl
;
11587 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11588 tout(cct
) << name
<< std::endl
;
11589 tout(cct
) << value
<< std::endl
;
11591 if (!cct
->_conf
->fuse_default_permissions
) {
11592 int r
= may_create(parent
, perms
);
11598 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11600 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11603 tout(cct
) << stx
->stx_ino
<< std::endl
;
11604 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
11605 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11610 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
11612 ldout(cct
, 3) << "_unlink(" << dir
->ino
<< " " << name
11613 << " uid " << perm
.uid() << " gid " << perm
.gid()
11616 if (dir
->snapid
!= CEPH_NOSNAP
) {
11620 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
11623 dir
->make_nosnap_relative_path(path
);
11624 path
.push_dentry(name
);
11625 req
->set_filepath(path
);
11630 int res
= get_or_create(dir
, name
, &de
);
11633 req
->set_dentry(de
);
11634 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11635 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11637 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
11640 req
->set_other_inode(otherin
.get());
11641 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11643 req
->set_inode(dir
);
11645 res
= make_request(req
, perm
);
11648 ldout(cct
, 3) << "unlink(" << path
<< ") = " << res
<< dendl
;
11656 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
11658 Mutex::Locker
lock(client_lock
);
11660 vinodeno_t vino
= _get_vino(in
);
11662 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
11663 tout(cct
) << "ll_unlink" << std::endl
;
11664 tout(cct
) << vino
.ino
.val
<< std::endl
;
11665 tout(cct
) << name
<< std::endl
;
11667 if (!cct
->_conf
->fuse_default_permissions
) {
11668 int r
= may_delete(in
, name
, perm
);
11672 return _unlink(in
, name
, perm
);
11675 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
11677 ldout(cct
, 3) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
11678 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
11680 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11684 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
:CEPH_MDS_OP_RMDIR
);
11686 dir
->make_nosnap_relative_path(path
);
11687 path
.push_dentry(name
);
11688 req
->set_filepath(path
);
11690 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11691 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11692 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11697 int res
= get_or_create(dir
, name
, &de
);
11700 res
= _lookup(dir
, name
, 0, &in
, perms
);
11703 if (req
->get_op() == CEPH_MDS_OP_RMDIR
) {
11704 req
->set_inode(dir
);
11705 req
->set_dentry(de
);
11706 req
->set_other_inode(in
.get());
11708 unlink(de
, true, true);
11709 req
->set_other_inode(in
.get());
11712 res
= make_request(req
, perms
);
11715 ldout(cct
, 3) << "rmdir(" << path
<< ") = " << res
<< dendl
;
11723 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
11725 Mutex::Locker
lock(client_lock
);
11727 vinodeno_t vino
= _get_vino(in
);
11729 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
11730 tout(cct
) << "ll_rmdir" << std::endl
;
11731 tout(cct
) << vino
.ino
.val
<< std::endl
;
11732 tout(cct
) << name
<< std::endl
;
11734 if (!cct
->_conf
->fuse_default_permissions
) {
11735 int r
= may_delete(in
, name
, perms
);
11740 return _rmdir(in
, name
, perms
);
11743 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
)
11745 ldout(cct
, 3) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
11746 << todir
->ino
<< " " << toname
11747 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
11750 if (fromdir
->snapid
!= todir
->snapid
)
11753 int op
= CEPH_MDS_OP_RENAME
;
11754 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
11755 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
11756 op
= CEPH_MDS_OP_RENAMESNAP
;
11760 if (fromdir
!= todir
) {
11761 Inode
*fromdir_root
=
11762 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
11763 Inode
*todir_root
=
11764 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
11765 if (fromdir_root
!= todir_root
) {
11771 MetaRequest
*req
= new MetaRequest(op
);
11774 fromdir
->make_nosnap_relative_path(from
);
11775 from
.push_dentry(fromname
);
11777 todir
->make_nosnap_relative_path(to
);
11778 to
.push_dentry(toname
);
11779 req
->set_filepath(to
);
11780 req
->set_filepath2(from
);
11783 int res
= get_or_create(fromdir
, fromname
, &oldde
);
11787 res
= get_or_create(todir
, toname
, &de
);
11791 if (op
== CEPH_MDS_OP_RENAME
) {
11792 req
->set_old_dentry(oldde
);
11793 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
11794 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
11796 req
->set_dentry(de
);
11797 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11798 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11800 InodeRef oldin
, otherin
;
11801 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
11804 req
->set_old_inode(oldin
.get());
11805 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
11807 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
11808 if (res
!= 0 && res
!= -ENOENT
) {
11810 } else if (res
== 0) {
11811 req
->set_other_inode(otherin
.get());
11812 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11815 req
->set_inode(todir
);
11817 // renamesnap reply contains no tracedn, so we need to invalidate
11819 unlink(oldde
, true, true);
11820 unlink(de
, true, true);
11823 res
= make_request(req
, perm
, &target
);
11824 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
11826 // renamed item from our cache
11829 ldout(cct
, 3) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
11837 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
11838 const char *newname
, const UserPerm
& perm
)
11840 Mutex::Locker
lock(client_lock
);
11842 vinodeno_t vparent
= _get_vino(parent
);
11843 vinodeno_t vnewparent
= _get_vino(newparent
);
11845 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
11846 << vnewparent
<< " " << newname
<< dendl
;
11847 tout(cct
) << "ll_rename" << std::endl
;
11848 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11849 tout(cct
) << name
<< std::endl
;
11850 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
11851 tout(cct
) << newname
<< std::endl
;
11853 if (!cct
->_conf
->fuse_default_permissions
) {
11854 int r
= may_delete(parent
, name
, perm
);
11857 r
= may_delete(newparent
, newname
, perm
);
11858 if (r
< 0 && r
!= -ENOENT
)
11862 return _rename(parent
, name
, newparent
, newname
, perm
);
11865 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, InodeRef
*inp
)
11867 ldout(cct
, 3) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
11868 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
11870 if (strlen(newname
) > NAME_MAX
)
11871 return -ENAMETOOLONG
;
11873 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
11876 if (is_quota_files_exceeded(dir
, perm
)) {
11880 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
11882 filepath
path(newname
, dir
->ino
);
11883 req
->set_filepath(path
);
11884 filepath
existing(in
->ino
);
11885 req
->set_filepath2(existing
);
11887 req
->set_inode(dir
);
11888 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
11889 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
11892 int res
= get_or_create(dir
, newname
, &de
);
11895 req
->set_dentry(de
);
11897 res
= make_request(req
, perm
, inp
);
11898 ldout(cct
, 10) << "link result is " << res
<< dendl
;
11901 ldout(cct
, 3) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
11909 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
11910 const UserPerm
& perm
)
11912 Mutex::Locker
lock(client_lock
);
11914 vinodeno_t vino
= _get_vino(in
);
11915 vinodeno_t vnewparent
= _get_vino(newparent
);
11917 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
11919 tout(cct
) << "ll_link" << std::endl
;
11920 tout(cct
) << vino
.ino
.val
<< std::endl
;
11921 tout(cct
) << vnewparent
<< std::endl
;
11922 tout(cct
) << newname
<< std::endl
;
11927 if (!cct
->_conf
->fuse_default_permissions
) {
11928 if (S_ISDIR(in
->mode
))
11931 r
= may_hardlink(in
, perm
);
11935 r
= may_create(newparent
, perm
);
11940 return _link(in
, newparent
, newname
, perm
, &target
);
11943 int Client::ll_num_osds(void)
11945 Mutex::Locker
lock(client_lock
);
11946 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
11949 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
11951 Mutex::Locker
lock(client_lock
);
11953 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
11954 if (!o
.exists(osd
))
11956 g
= o
.get_addr(osd
);
11961 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
11962 *addr
= ntohl(nb_addr
);
11965 uint32_t Client::ll_stripe_unit(Inode
*in
)
11967 Mutex::Locker
lock(client_lock
);
11968 return in
->layout
.stripe_unit
;
11971 uint64_t Client::ll_snap_seq(Inode
*in
)
11973 Mutex::Locker
lock(client_lock
);
11974 return in
->snaprealm
->seq
;
11977 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
11979 Mutex::Locker
lock(client_lock
);
11980 *layout
= in
->layout
;
11984 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
11986 return ll_file_layout(fh
->inode
.get(), layout
);
11989 /* Currently we cannot take advantage of redundancy in reads, since we
11990 would have to go through all possible placement groups (a
11991 potentially quite large number determined by a hash), and use CRUSH
11992 to calculate the appropriate set of OSDs for each placement group,
11993 then index into that. An array with one entry per OSD is much more
11994 tractable and works for demonstration purposes. */
11996 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
11997 file_layout_t
* layout
)
11999 Mutex::Locker
lock(client_lock
);
12000 inodeno_t ino
= ll_get_inodeno(in
);
12001 uint32_t object_size
= layout
->object_size
;
12002 uint32_t su
= layout
->stripe_unit
;
12003 uint32_t stripe_count
= layout
->stripe_count
;
12004 uint64_t stripes_per_object
= object_size
/ su
;
12006 uint64_t stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
12007 uint64_t stripepos
= blockno
% stripe_count
; // which object in the object set (X)
12008 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
12009 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
12011 object_t oid
= file_object_t(ino
, objectno
);
12012 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12013 ceph_object_layout olayout
=
12014 o
.file_to_object_layout(oid
, *layout
);
12015 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
12018 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
12023 /* Return the offset of the block, internal to the object */
12025 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
12027 Mutex::Locker
lock(client_lock
);
12028 file_layout_t
*layout
=&(in
->layout
);
12029 uint32_t object_size
= layout
->object_size
;
12030 uint32_t su
= layout
->stripe_unit
;
12031 uint64_t stripes_per_object
= object_size
/ su
;
12033 return (blockno
% stripes_per_object
) * su
;
12036 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
12037 const UserPerm
& perms
)
12039 Mutex::Locker
lock(client_lock
);
12041 vinodeno_t vino
= _get_vino(in
);
12043 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
12044 tout(cct
) << "ll_opendir" << std::endl
;
12045 tout(cct
) << vino
.ino
.val
<< std::endl
;
12047 if (!cct
->_conf
->fuse_default_permissions
) {
12048 int r
= may_open(in
, flags
, perms
);
12053 int r
= _opendir(in
, dirpp
, perms
);
12054 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
12056 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
12061 int Client::ll_releasedir(dir_result_t
*dirp
)
12063 Mutex::Locker
lock(client_lock
);
12064 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
12065 tout(cct
) << "ll_releasedir" << std::endl
;
12066 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12071 int Client::ll_fsyncdir(dir_result_t
*dirp
)
12073 Mutex::Locker
lock(client_lock
);
12074 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
12075 tout(cct
) << "ll_fsyncdir" << std::endl
;
12076 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12078 return _fsync(dirp
->inode
.get(), false);
12081 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
12083 assert(!(flags
& O_CREAT
));
12085 Mutex::Locker
lock(client_lock
);
12087 vinodeno_t vino
= _get_vino(in
);
12089 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
12090 tout(cct
) << "ll_open" << std::endl
;
12091 tout(cct
) << vino
.ino
.val
<< std::endl
;
12092 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12095 if (!cct
->_conf
->fuse_default_permissions
) {
12096 r
= may_open(in
, flags
, perms
);
12101 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
12104 Fh
*fhptr
= fhp
? *fhp
: NULL
;
12106 ll_unclosed_fh_set
.insert(fhptr
);
12108 tout(cct
) << (unsigned long)fhptr
<< std::endl
;
12109 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
12110 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
12114 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12115 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
12116 const UserPerm
& perms
)
12120 vinodeno_t vparent
= _get_vino(parent
);
12122 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12123 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
12124 << ", gid " << perms
.gid() << dendl
;
12125 tout(cct
) << "ll_create" << std::endl
;
12126 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12127 tout(cct
) << name
<< std::endl
;
12128 tout(cct
) << mode
<< std::endl
;
12129 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12131 bool created
= false;
12132 int r
= _lookup(parent
, name
, caps
, in
, perms
);
12134 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
12137 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
12138 if (!cct
->_conf
->fuse_default_permissions
) {
12139 r
= may_create(parent
, perms
);
12143 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
12154 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
12156 if (!cct
->_conf
->fuse_default_permissions
) {
12157 r
= may_open(in
->get(), flags
, perms
);
12160 int release_r
= _release_fh(*fhp
);
12161 assert(release_r
== 0); // during create, no async data ops should have happened
12166 if (*fhp
== NULL
) {
12167 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
12175 ll_unclosed_fh_set
.insert(*fhp
);
12180 Inode
*inode
= in
->get();
12181 if (use_faked_inos())
12182 ino
= inode
->faked_ino
;
12187 tout(cct
) << (unsigned long)*fhp
<< std::endl
;
12188 tout(cct
) << ino
<< std::endl
;
12189 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12190 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
12191 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
12196 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12197 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
12198 const UserPerm
& perms
)
12200 Mutex::Locker
lock(client_lock
);
12203 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
12208 // passing an Inode in outp requires an additional ref
12213 fill_stat(in
, attr
);
12221 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
12222 int oflags
, Inode
**outp
, Fh
**fhp
,
12223 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
12224 const UserPerm
& perms
)
12226 unsigned caps
= statx_to_mask(lflags
, want
);
12227 Mutex::Locker
lock(client_lock
);
12231 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
12235 // passing an Inode in outp requires an additional ref
12240 fill_statx(in
, caps
, stx
);
12249 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
12251 Mutex::Locker
lock(client_lock
);
12252 tout(cct
) << "ll_lseek" << std::endl
;
12253 tout(cct
) << offset
<< std::endl
;
12254 tout(cct
) << whence
<< std::endl
;
12256 return _lseek(fh
, offset
, whence
);
12259 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
12261 Mutex::Locker
lock(client_lock
);
12262 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
12263 tout(cct
) << "ll_read" << std::endl
;
12264 tout(cct
) << (unsigned long)fh
<< std::endl
;
12265 tout(cct
) << off
<< std::endl
;
12266 tout(cct
) << len
<< std::endl
;
12268 return _read(fh
, off
, len
, bl
);
12271 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
12275 file_layout_t
* layout
)
12277 Mutex::Locker
lock(client_lock
);
12278 vinodeno_t vino
= ll_get_vino(in
);
12279 object_t oid
= file_object_t(vino
.ino
, blockid
);
12280 C_SaferCond onfinish
;
12283 objecter
->read(oid
,
12284 object_locator_t(layout
->pool_id
),
12289 CEPH_OSD_FLAG_READ
,
12292 client_lock
.Unlock();
12293 int r
= onfinish
.wait();
12294 client_lock
.Lock();
12297 bl
.copy(0, bl
.length(), buf
);
12304 /* It appears that the OSD doesn't return success unless the entire
12305 buffer was written, return the write length on success. */
12307 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
12308 char* buf
, uint64_t offset
,
12309 uint64_t length
, file_layout_t
* layout
,
12310 uint64_t snapseq
, uint32_t sync
)
12312 Mutex
flock("Client::ll_write_block flock");
12313 vinodeno_t vino
= ll_get_vino(in
);
12322 if (true || sync
) {
12323 /* if write is stable, the epilogue is waiting on
12325 onsafe
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
12328 /* if write is unstable, we just place a barrier for
12329 * future commits to wait on */
12330 /*onsafe = new C_Block_Sync(this, vino.ino,
12331 barrier_interval(offset, offset + length), &r);
12335 object_t oid
= file_object_t(vino
.ino
, blockid
);
12336 SnapContext fakesnap
;
12338 if (length
> 0) bp
= buffer::copy(buf
, length
);
12342 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
12345 fakesnap
.seq
= snapseq
;
12347 /* lock just in time */
12348 client_lock
.Lock();
12350 objecter
->write(oid
,
12351 object_locator_t(layout
->pool_id
),
12356 ceph::real_clock::now(),
12360 client_lock
.Unlock();
12361 if (!done
/* also !sync */) {
12375 int Client::ll_commit_blocks(Inode
*in
,
12379 Mutex::Locker
lock(client_lock
);
12381 BarrierContext *bctx;
12382 vinodeno_t vino = ll_get_vino(in);
12383 uint64_t ino = vino.ino;
12385 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
12386 << offset << " to " << length << dendl;
12392 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
12393 if (p != barriers.end()) {
12394 barrier_interval civ(offset, offset + length);
12395 p->second->commit_barrier(civ);
12401 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
12403 Mutex::Locker
lock(client_lock
);
12404 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
12405 "~" << len
<< dendl
;
12406 tout(cct
) << "ll_write" << std::endl
;
12407 tout(cct
) << (unsigned long)fh
<< std::endl
;
12408 tout(cct
) << off
<< std::endl
;
12409 tout(cct
) << len
<< std::endl
;
12411 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
12412 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
12417 int Client::ll_flush(Fh
*fh
)
12419 Mutex::Locker
lock(client_lock
);
12420 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12421 tout(cct
) << "ll_flush" << std::endl
;
12422 tout(cct
) << (unsigned long)fh
<< std::endl
;
12427 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
12429 Mutex::Locker
lock(client_lock
);
12430 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12431 tout(cct
) << "ll_fsync" << std::endl
;
12432 tout(cct
) << (unsigned long)fh
<< std::endl
;
12434 int r
= _fsync(fh
, syncdataonly
);
12436 // If we're returning an error, clear it from the FH
12437 fh
->take_async_err();
12442 #ifdef FALLOC_FL_PUNCH_HOLE
12444 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12446 if (offset
< 0 || length
<= 0)
12449 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
12450 return -EOPNOTSUPP
;
12452 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
12453 return -EOPNOTSUPP
;
12455 Inode
*in
= fh
->inode
.get();
12457 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
12458 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
12462 if (in
->snapid
!= CEPH_NOSNAP
)
12465 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
12468 uint64_t size
= offset
+ length
;
12469 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
12471 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
)) {
12476 int r
= get_caps(in
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
12480 Mutex
uninline_flock("Client::_fallocate_uninline_data flock");
12481 Cond uninline_cond
;
12482 bool uninline_done
= false;
12483 int uninline_ret
= 0;
12484 Context
*onuninline
= NULL
;
12486 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
12487 if (in
->inline_version
< CEPH_INLINE_NONE
&&
12488 (have
& CEPH_CAP_FILE_BUFFER
)) {
12490 int len
= in
->inline_data
.length();
12491 if (offset
< len
) {
12493 in
->inline_data
.copy(0, offset
, bl
);
12495 if (offset
+ size
> len
)
12496 size
= len
- offset
;
12498 bl
.append_zero(size
);
12499 if (offset
+ size
< len
)
12500 in
->inline_data
.copy(offset
+ size
, len
- offset
- size
, bl
);
12501 in
->inline_data
= bl
;
12502 in
->inline_version
++;
12504 in
->mtime
= ceph_clock_now();
12506 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12508 if (in
->inline_version
< CEPH_INLINE_NONE
) {
12509 onuninline
= new C_SafeCond(&uninline_flock
,
12513 uninline_data(in
, onuninline
);
12516 Mutex
flock("Client::_punch_hole flock");
12519 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
12521 unsafe_sync_write
++;
12522 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
12524 _invalidate_inode_cache(in
, offset
, length
);
12525 filer
->zero(in
->ino
, &in
->layout
,
12526 in
->snaprealm
->get_snap_context(),
12528 ceph::real_clock::now(),
12529 0, true, onfinish
);
12530 in
->mtime
= ceph_clock_now();
12532 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12534 client_lock
.Unlock();
12539 client_lock
.Lock();
12540 _sync_write_commit(in
);
12542 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
12543 uint64_t size
= offset
+ length
;
12544 if (size
> in
->size
) {
12546 in
->mtime
= ceph_clock_now();
12548 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12550 if (is_quota_bytes_approaching(in
, fh
->actor_perms
)) {
12551 check_caps(in
, CHECK_CAPS_NODELAY
);
12552 } else if (is_max_size_approaching(in
)) {
12559 client_lock
.Unlock();
12560 uninline_flock
.Lock();
12561 while (!uninline_done
)
12562 uninline_cond
.Wait(uninline_flock
);
12563 uninline_flock
.Unlock();
12564 client_lock
.Lock();
12566 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
12567 in
->inline_data
.clear();
12568 in
->inline_version
= CEPH_INLINE_NONE
;
12569 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12575 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
12580 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12582 return -EOPNOTSUPP
;
12588 int Client::ll_fallocate(Fh
*fh
, int mode
, loff_t offset
, loff_t length
)
12590 Mutex::Locker
lock(client_lock
);
12591 ldout(cct
, 3) << "ll_fallocate " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12592 tout(cct
) << "ll_fallocate " << mode
<< " " << offset
<< " " << length
<< std::endl
;
12593 tout(cct
) << (unsigned long)fh
<< std::endl
;
12595 return _fallocate(fh
, mode
, offset
, length
);
12598 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
12600 Mutex::Locker
lock(client_lock
);
12601 tout(cct
) << "fallocate " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
12603 Fh
*fh
= get_filehandle(fd
);
12606 #if defined(__linux__) && defined(O_PATH)
12607 if (fh
->flags
& O_PATH
)
12610 return _fallocate(fh
, mode
, offset
, length
);
12613 int Client::ll_release(Fh
*fh
)
12615 Mutex::Locker
lock(client_lock
);
12616 ldout(cct
, 3) << "ll_release (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
12618 tout(cct
) << "ll_release (fh)" << std::endl
;
12619 tout(cct
) << (unsigned long)fh
<< std::endl
;
12621 if (ll_unclosed_fh_set
.count(fh
))
12622 ll_unclosed_fh_set
.erase(fh
);
12623 return _release_fh(fh
);
12626 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
12628 Mutex::Locker
lock(client_lock
);
12630 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
12631 tout(cct
) << "ll_getk (fh)" << (unsigned long)fh
<< std::endl
;
12633 return _getlk(fh
, fl
, owner
);
12636 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
12638 Mutex::Locker
lock(client_lock
);
12640 ldout(cct
, 3) << "ll_setlk (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
12641 tout(cct
) << "ll_setk (fh)" << (unsigned long)fh
<< std::endl
;
12643 return _setlk(fh
, fl
, owner
, sleep
);
12646 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
12648 Mutex::Locker
lock(client_lock
);
12650 ldout(cct
, 3) << "ll_flock (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
12651 tout(cct
) << "ll_flock (fh)" << (unsigned long)fh
<< std::endl
;
12653 return _flock(fh
, cmd
, owner
);
12656 class C_Client_RequestInterrupt
: public Context
{
12661 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
12664 void finish(int r
) override
{
12665 Mutex::Locker
l(client
->client_lock
);
12666 assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
12667 client
->_interrupt_filelock(req
);
12668 client
->put_request(req
);
12672 void Client::ll_interrupt(void *d
)
12674 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
12675 ldout(cct
, 3) << "ll_interrupt tid " << req
->get_tid() << dendl
;
12676 tout(cct
) << "ll_interrupt tid " << req
->get_tid() << std::endl
;
12677 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
12680 // =========================================
12683 // expose file layouts
12685 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
12686 const UserPerm
& perms
)
12688 Mutex::Locker
lock(client_lock
);
12690 filepath
path(relpath
);
12692 int r
= path_walk(path
, &in
, perms
);
12698 ldout(cct
, 3) << "describe_layout(" << relpath
<< ") = 0" << dendl
;
12702 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
12704 Mutex::Locker
lock(client_lock
);
12706 Fh
*f
= get_filehandle(fd
);
12709 Inode
*in
= f
->inode
.get();
12713 ldout(cct
, 3) << "fdescribe_layout(" << fd
<< ") = 0" << dendl
;
12717 int64_t Client::get_default_pool_id()
12719 Mutex::Locker
lock(client_lock
);
12720 /* first data pool is the default */
12721 return mdsmap
->get_first_data_pool();
12726 int64_t Client::get_pool_id(const char *pool_name
)
12728 Mutex::Locker
lock(client_lock
);
12729 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
12733 string
Client::get_pool_name(int64_t pool
)
12735 Mutex::Locker
lock(client_lock
);
12736 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
12737 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
12741 int Client::get_pool_replication(int64_t pool
)
12743 Mutex::Locker
lock(client_lock
);
12744 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
12745 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -ENOENT
;
12749 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
12751 Mutex::Locker
lock(client_lock
);
12753 Fh
*f
= get_filehandle(fd
);
12756 Inode
*in
= f
->inode
.get();
12758 vector
<ObjectExtent
> extents
;
12759 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
12760 assert(extents
.size() == 1);
12762 objecter
->with_osdmap([&](const OSDMap
& o
) {
12763 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
12764 o
.pg_to_acting_osds(pg
, osds
);
12771 * Return the remainder of the extent (stripe unit)
12773 * If length = 1 is passed to Striper::file_to_extents we get a single
12774 * extent back, but its length is one so we still need to compute the length
12775 * to the end of the stripe unit.
12777 * If length = su then we may get 1 or 2 objects back in the extents vector
12778 * which would have to be examined. Even then, the offsets are local to the
12779 * object, so matching up to the file offset is extra work.
12781 * It seems simpler to stick with length = 1 and manually compute the
12785 uint64_t su
= in
->layout
.stripe_unit
;
12786 *len
= su
- (off
% su
);
12792 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
12794 Mutex::Locker
lock(client_lock
);
12797 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12798 return o
.crush
->get_full_location_ordered(id
, path
);
12802 int Client::get_file_stripe_address(int fd
, loff_t offset
,
12803 vector
<entity_addr_t
>& address
)
12805 Mutex::Locker
lock(client_lock
);
12807 Fh
*f
= get_filehandle(fd
);
12810 Inode
*in
= f
->inode
.get();
12813 vector
<ObjectExtent
> extents
;
12814 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
12815 in
->truncate_size
, extents
);
12816 assert(extents
.size() == 1);
12818 // now we have the object and its 'layout'
12819 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12820 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
12822 o
.pg_to_acting_osds(pg
, osds
);
12825 for (unsigned i
= 0; i
< osds
.size(); i
++) {
12826 entity_addr_t addr
= o
.get_addr(osds
[i
]);
12827 address
.push_back(addr
);
12833 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
12835 Mutex::Locker
lock(client_lock
);
12836 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12837 if (!o
.exists(osd
))
12840 addr
= o
.get_addr(osd
);
12845 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
12846 loff_t length
, loff_t offset
)
12848 Mutex::Locker
lock(client_lock
);
12850 Fh
*f
= get_filehandle(fd
);
12853 Inode
*in
= f
->inode
.get();
12855 // map to a list of extents
12856 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
12858 ldout(cct
, 3) << "enumerate_layout(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
12864 * find an osd with the same ip. -1 if none.
12866 int Client::get_local_osd()
12868 Mutex::Locker
lock(client_lock
);
12869 objecter
->with_osdmap([this](const OSDMap
& o
) {
12870 if (o
.get_epoch() != local_osd_epoch
) {
12871 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddr());
12872 local_osd_epoch
= o
.get_epoch();
12883 // ===============================
12885 void Client::ms_handle_connect(Connection
*con
)
12887 ldout(cct
, 10) << "ms_handle_connect on " << con
->get_peer_addr() << dendl
;
12890 bool Client::ms_handle_reset(Connection
*con
)
12892 ldout(cct
, 0) << "ms_handle_reset on " << con
->get_peer_addr() << dendl
;
12896 void Client::ms_handle_remote_reset(Connection
*con
)
12898 ldout(cct
, 0) << "ms_handle_remote_reset on " << con
->get_peer_addr() << dendl
;
12899 Mutex::Locker
l(client_lock
);
12900 switch (con
->get_peer_type()) {
12901 case CEPH_ENTITY_TYPE_MDS
:
12903 // kludge to figure out which mds this is; fixme with a Connection* state
12904 mds_rank_t mds
= MDS_RANK_NONE
;
12905 MetaSession
*s
= NULL
;
12906 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
12907 p
!= mds_sessions
.end();
12909 if (mdsmap
->get_addr(p
->first
) == con
->get_peer_addr()) {
12915 assert (s
!= NULL
);
12916 switch (s
->state
) {
12917 case MetaSession::STATE_CLOSING
:
12918 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
12919 _closed_mds_session(s
);
12922 case MetaSession::STATE_OPENING
:
12924 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
12925 list
<Context
*> waiters
;
12926 waiters
.swap(s
->waiting_for_open
);
12927 _closed_mds_session(s
);
12928 MetaSession
*news
= _get_or_open_mds_session(mds
);
12929 news
->waiting_for_open
.swap(waiters
);
12933 case MetaSession::STATE_OPEN
:
12935 const md_config_t
*conf
= cct
->_conf
;
12936 if (conf
->client_reconnect_stale
) {
12937 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
12938 _closed_mds_session(s
);
12940 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
12941 s
->state
= MetaSession::STATE_STALE
;
12946 case MetaSession::STATE_NEW
:
12947 case MetaSession::STATE_CLOSED
:
12957 bool Client::ms_handle_refused(Connection
*con
)
12959 ldout(cct
, 1) << "ms_handle_refused on " << con
->get_peer_addr() << dendl
;
12963 bool Client::ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
)
12965 if (dest_type
== CEPH_ENTITY_TYPE_MON
)
12967 *authorizer
= monclient
->build_authorizer(dest_type
);
12971 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
12974 utime_t now
= ceph_clock_now();
12977 if (cur
!= in
&& cur
->quota
.is_enable())
12980 Inode
*parent_in
= NULL
;
12981 if (!cur
->dn_set
.empty()) {
12982 for (auto p
= cur
->dn_set
.begin(); p
!= cur
->dn_set
.end(); ++p
) {
12984 if (dn
->lease_mds
>= 0 &&
12985 dn
->lease_ttl
> now
&&
12986 mds_sessions
.count(dn
->lease_mds
)) {
12987 parent_in
= dn
->dir
->parent_inode
;
12989 Inode
*diri
= dn
->dir
->parent_inode
;
12990 if (diri
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
12991 diri
->shared_gen
== dn
->cap_shared_gen
) {
12992 parent_in
= dn
->dir
->parent_inode
;
12998 } else if (root_parents
.count(cur
)) {
12999 parent_in
= root_parents
[cur
].get();
13007 if (cur
== root_ancestor
)
13010 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
13011 filepath
path(cur
->ino
);
13012 req
->set_filepath(path
);
13013 req
->set_inode(cur
);
13015 InodeRef parent_ref
;
13016 int ret
= make_request(req
, perms
, &parent_ref
);
13018 ldout(cct
, 1) << __func__
<< " " << in
->vino()
13019 << " failed to find parent of " << cur
->vino()
13020 << " err " << ret
<< dendl
;
13021 // FIXME: what to do?
13022 cur
= root_ancestor
;
13026 now
= ceph_clock_now();
13028 cur
= parent_ref
.get();
13030 cur
= in
; // start over
13033 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << cur
->vino() << dendl
;
13038 * Traverse quota ancestors of the Inode, return true
13039 * if any of them passes the passed function
13041 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
13042 std::function
<bool (const Inode
&in
)> test
)
13045 assert(in
!= NULL
);
13050 if (in
== root_ancestor
) {
13051 // We're done traversing, drop out
13054 // Continue up the tree
13055 in
= get_quota_root(in
, perms
);
13062 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
13064 return check_quota_condition(in
, perms
,
13065 [](const Inode
&in
) {
13066 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
13070 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
13071 const UserPerm
& perms
)
13073 return check_quota_condition(in
, perms
,
13074 [&new_bytes
](const Inode
&in
) {
13075 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
13076 > in
.quota
.max_bytes
;
13080 bool Client::is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
)
13082 return check_quota_condition(in
, perms
,
13083 [](const Inode
&in
) {
13084 if (in
.quota
.max_bytes
) {
13085 if (in
.rstat
.rbytes
>= in
.quota
.max_bytes
) {
13089 assert(in
.size
>= in
.reported_size
);
13090 const uint64_t space
= in
.quota
.max_bytes
- in
.rstat
.rbytes
;
13091 const uint64_t size
= in
.size
- in
.reported_size
;
13092 return (space
>> 4) < size
;
13106 int Client::check_pool_perm(Inode
*in
, int need
)
13108 if (!cct
->_conf
->client_check_pool_perm
)
13111 int64_t pool_id
= in
->layout
.pool_id
;
13112 std::string pool_ns
= in
->layout
.pool_ns
;
13113 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
13116 auto it
= pool_perms
.find(perm_key
);
13117 if (it
== pool_perms
.end())
13119 if (it
->second
== POOL_CHECKING
) {
13120 // avoid concurrent checkings
13121 wait_on_list(waiting_for_pool_perm
);
13124 assert(have
& POOL_CHECKED
);
13130 if (in
->snapid
!= CEPH_NOSNAP
) {
13131 // pool permission check needs to write to the first object. But for snapshot,
13132 // head of the first object may have alread been deleted. To avoid creating
13133 // orphan object, skip the check for now.
13137 pool_perms
[perm_key
] = POOL_CHECKING
;
13140 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
13141 object_t oid
= oid_buf
;
13143 SnapContext nullsnapc
;
13145 C_SaferCond rd_cond
;
13146 ObjectOperation rd_op
;
13147 rd_op
.stat(NULL
, (ceph::real_time
*)nullptr, NULL
);
13149 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
13150 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
13152 C_SaferCond wr_cond
;
13153 ObjectOperation wr_op
;
13154 wr_op
.create(true);
13156 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
13157 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
13159 client_lock
.Unlock();
13160 int rd_ret
= rd_cond
.wait();
13161 int wr_ret
= wr_cond
.wait();
13162 client_lock
.Lock();
13164 bool errored
= false;
13166 if (rd_ret
== 0 || rd_ret
== -ENOENT
)
13168 else if (rd_ret
!= -EPERM
) {
13169 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13170 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13174 if (wr_ret
== 0 || wr_ret
== -EEXIST
)
13175 have
|= POOL_WRITE
;
13176 else if (wr_ret
!= -EPERM
) {
13177 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13178 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13183 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
13184 // Raise EIO because actual error code might be misleading for
13185 // userspace filesystem user.
13186 pool_perms
.erase(perm_key
);
13187 signal_cond_list(waiting_for_pool_perm
);
13191 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
13192 signal_cond_list(waiting_for_pool_perm
);
13195 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
13196 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13197 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
13200 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
13201 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13202 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
13209 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
13211 if (acl_type
== POSIX_ACL
) {
13212 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13213 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13215 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
13221 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
13223 if (acl_type
== NO_ACL
)
13226 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
13230 if (acl_type
== POSIX_ACL
) {
13231 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13232 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13233 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
13234 r
= posix_acl_access_chmod(acl
, mode
);
13237 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
13243 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
13247 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
13248 const UserPerm
& perms
)
13250 if (acl_type
== NO_ACL
)
13253 if (S_ISLNK(*mode
))
13256 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
13260 if (acl_type
== POSIX_ACL
) {
13261 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
13262 map
<string
, bufferptr
> xattrs
;
13264 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
13265 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
13266 r
= posix_acl_inherit_mode(acl
, mode
);
13271 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
13275 xattrs
[ACL_EA_ACCESS
] = acl
;
13278 if (S_ISDIR(*mode
))
13279 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
13283 ::encode(xattrs
, xattrs_bl
);
13286 *mode
&= ~umask_cb(callback_handle
);
13291 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
13295 void Client::set_filer_flags(int flags
)
13297 Mutex::Locker
l(client_lock
);
13298 assert(flags
== 0 ||
13299 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13300 objecter
->add_global_op_flags(flags
);
13303 void Client::clear_filer_flags(int flags
)
13305 Mutex::Locker
l(client_lock
);
13306 assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13307 objecter
->clear_global_op_flag(flags
);
13311 * This is included in cap release messages, to cause
13312 * the MDS to wait until this OSD map epoch. It is necessary
13313 * in corner cases where we cancel RADOS ops, so that
13314 * nobody else tries to do IO to the same objects in
13315 * the same epoch as the cancelled ops.
13317 void Client::set_cap_epoch_barrier(epoch_t e
)
13319 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
13320 cap_epoch_barrier
= e
;
13323 const char** Client::get_tracked_conf_keys() const
13325 static const char* keys
[] = {
13326 "client_cache_size",
13327 "client_cache_mid",
13334 void Client::handle_conf_change(const struct md_config_t
*conf
,
13335 const std::set
<std::string
> &changed
)
13337 Mutex::Locker
lock(client_lock
);
13339 if (changed
.count("client_cache_size") ||
13340 changed
.count("client_cache_mid")) {
13341 lru
.lru_set_max(cct
->_conf
->client_cache_size
);
13342 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
13344 if (changed
.count("client_acl_type")) {
13346 if (cct
->_conf
->client_acl_type
== "posix_acl")
13347 acl_type
= POSIX_ACL
;
13351 void Client::init_groups(UserPerm
*perms
)
13354 int count
= _getgrouplist(&sgids
, perms
->uid(), perms
->gid());
13355 perms
->init_gids(sgids
, count
);
13358 void intrusive_ptr_add_ref(Inode
*in
)
13363 void intrusive_ptr_release(Inode
*in
)
13365 in
->client
->put_inode(in
);
13368 mds_rank_t
Client::_get_random_up_mds() const
13370 assert(client_lock
.is_locked_by_me());
13372 std::set
<mds_rank_t
> up
;
13373 mdsmap
->get_up_mds_set(up
);
13376 return MDS_RANK_NONE
;
13377 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
13378 for (int n
= rand() % up
.size(); n
; n
--)
13384 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
)
13385 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, NULL
, 0, 0))
13387 monclient
->set_messenger(m
);
13388 objecter
->set_client_incarnation(0);
13391 StandaloneClient::~StandaloneClient()
13394 objecter
= nullptr;
13397 int StandaloneClient::init()
13400 objectcacher
->start();
13403 client_lock
.Lock();
13404 assert(!initialized
);
13406 messenger
->add_dispatcher_tail(objecter
);
13407 messenger
->add_dispatcher_tail(this);
13409 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
13410 int r
= monclient
->init();
13412 // need to do cleanup because we're in an intermediate init state
13414 client_lock
.Unlock();
13415 objecter
->shutdown();
13416 objectcacher
->stop();
13417 monclient
->shutdown();
13422 client_lock
.Unlock();
13428 void StandaloneClient::shutdown()
13430 Client::shutdown();
13431 objecter
->shutdown();
13432 monclient
->shutdown();