1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
22 #include <sys/param.h>
25 #include <sys/utsname.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/fusion/include/std_pair.hpp>
31 #if defined(__FreeBSD__)
32 #define XATTR_CREATE 0x1
33 #define XATTR_REPLACE 0x2
35 #include <sys/xattr.h>
38 #if defined(__linux__)
39 #include <linux/falloc.h>
42 #include <sys/statvfs.h>
44 #include "common/config.h"
45 #include "common/version.h"
48 #include "messages/MClientSession.h"
49 #include "messages/MClientReconnect.h"
50 #include "messages/MClientRequest.h"
51 #include "messages/MClientRequestForward.h"
52 #include "messages/MClientReply.h"
53 #include "messages/MClientCaps.h"
54 #include "messages/MClientLease.h"
55 #include "messages/MClientSnap.h"
56 #include "messages/MCommandReply.h"
57 #include "messages/MOSDMap.h"
58 #include "messages/MClientQuota.h"
59 #include "messages/MClientCapRelease.h"
60 #include "messages/MMDSMap.h"
61 #include "messages/MFSMap.h"
62 #include "messages/MFSMapUser.h"
64 #include "mon/MonClient.h"
66 #include "mds/flock.h"
67 #include "osd/OSDMap.h"
68 #include "osdc/Filer.h"
70 #include "common/Cond.h"
71 #include "common/Mutex.h"
72 #include "common/perf_counters.h"
73 #include "common/admin_socket.h"
74 #include "common/errno.h"
75 #include "include/str_list.h"
77 #define dout_subsys ceph_subsys_client
79 #include "include/lru.h"
80 #include "include/compat.h"
81 #include "include/stringify.h"
86 #include "Delegation.h"
88 #include "ClientSnapRealm.h"
90 #include "MetaSession.h"
91 #include "MetaRequest.h"
92 #include "ObjecterWriteback.h"
93 #include "posix_acl.h"
95 #include "include/assert.h"
96 #include "include/stat.h"
98 #include "include/cephfs/ceph_statx.h"
100 #if HAVE_GETGROUPLIST
107 #define dout_prefix *_dout << "client." << whoami << " "
109 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
111 // FreeBSD fails to define this
115 // Darwin fails to define this
124 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
126 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
128 Client
*client
= static_cast<Client
*>(p
);
129 client
->flush_set_callback(oset
);
135 Client::CommandHook::CommandHook(Client
*client
) :
140 bool Client::CommandHook::call(std::string command
, cmdmap_t
& cmdmap
,
141 std::string format
, bufferlist
& out
)
143 Formatter
*f
= Formatter::create(format
);
144 f
->open_object_section("result");
145 m_client
->client_lock
.Lock();
146 if (command
== "mds_requests")
147 m_client
->dump_mds_requests(f
);
148 else if (command
== "mds_sessions")
149 m_client
->dump_mds_sessions(f
);
150 else if (command
== "dump_cache")
151 m_client
->dump_cache(f
);
152 else if (command
== "kick_stale_sessions")
153 m_client
->_kick_stale_sessions();
154 else if (command
== "status")
155 m_client
->dump_status(f
);
157 assert(0 == "bad command registered");
158 m_client
->client_lock
.Unlock();
168 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
169 : inode(in
), offset(0), next_offset(2),
170 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
174 void Client::_reset_faked_inos()
177 free_faked_inos
.clear();
178 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
179 last_used_faked_ino
= 0;
180 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
183 void Client::_assign_faked_ino(Inode
*in
)
185 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
186 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
187 last_used_faked_ino
= 0;
188 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
190 assert(it
!= free_faked_inos
.end());
191 if (last_used_faked_ino
< it
.get_start()) {
192 assert(it
.get_len() > 0);
193 last_used_faked_ino
= it
.get_start();
195 ++last_used_faked_ino
;
196 assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
198 in
->faked_ino
= last_used_faked_ino
;
199 free_faked_inos
.erase(in
->faked_ino
);
200 faked_ino_map
[in
->faked_ino
] = in
->vino();
203 void Client::_release_faked_ino(Inode
*in
)
205 free_faked_inos
.insert(in
->faked_ino
);
206 faked_ino_map
.erase(in
->faked_ino
);
209 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
214 else if (faked_ino_map
.count(ino
))
215 vino
= faked_ino_map
[ino
];
217 vino
= vinodeno_t(0, CEPH_NOSNAP
);
218 ldout(cct
, 10) << "map_faked_ino " << ino
<< " -> " << vino
<< dendl
;
222 vinodeno_t
Client::map_faked_ino(ino_t ino
)
224 Mutex::Locker
lock(client_lock
);
225 return _map_faked_ino(ino
);
230 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
231 : Dispatcher(m
->cct
),
232 m_command_hook(this),
233 timer(m
->cct
, client_lock
),
234 callback_handle(NULL
),
235 switch_interrupt_cb(NULL
),
237 ino_invalidate_cb(NULL
),
238 dentry_invalidate_cb(NULL
),
241 can_invalidate_dentries(false),
242 async_ino_invalidator(m
->cct
),
243 async_dentry_invalidator(m
->cct
),
244 interrupt_finisher(m
->cct
),
245 remount_finisher(m
->cct
),
246 objecter_finisher(m
->cct
),
248 messenger(m
), monclient(mc
),
250 whoami(mc
->get_global_id()), cap_epoch_barrier(0),
251 last_tid(0), oldest_tid(0), last_flush_tid(1),
253 mounted(false), unmounting(false), blacklisted(false),
254 local_osd(-ENXIO
), local_osd_epoch(0),
255 unsafe_sync_write(0),
256 client_lock("Client::client_lock"),
263 num_flushing_caps
= 0;
265 _dir_vxattrs_name_size
= _vxattrs_calcu_name_size(_dir_vxattrs
);
266 _file_vxattrs_name_size
= _vxattrs_calcu_name_size(_file_vxattrs
);
268 user_id
= cct
->_conf
->client_mount_uid
;
269 group_id
= cct
->_conf
->client_mount_gid
;
272 if (cct
->_conf
->client_acl_type
== "posix_acl")
273 acl_type
= POSIX_ACL
;
275 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
278 free_fd_set
.insert(10, 1<<30);
280 mdsmap
.reset(new MDSMap
);
283 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
285 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
286 client_flush_set_callback
, // all commit callback
288 cct
->_conf
->client_oc_size
,
289 cct
->_conf
->client_oc_max_objects
,
290 cct
->_conf
->client_oc_max_dirty
,
291 cct
->_conf
->client_oc_target_dirty
,
292 cct
->_conf
->client_oc_max_dirty_age
,
294 objecter_finisher
.start();
295 filer
.reset(new Filer(objecter
, &objecter_finisher
));
296 objecter
->enable_blacklist_events();
302 assert(!client_lock
.is_locked());
304 // It is necessary to hold client_lock, because any inode destruction
305 // may call into ObjectCacher, which asserts that it's lock (which is
306 // client_lock) is held.
309 client_lock
.Unlock();
312 void Client::tear_down_cache()
315 for (ceph::unordered_map
<int, Fh
*>::iterator it
= fd_map
.begin();
319 ldout(cct
, 1) << "tear_down_cache forcing close of fh " << it
->first
<< " ino " << fh
->inode
->ino
<< dendl
;
324 while (!opened_dirs
.empty()) {
325 dir_result_t
*dirp
= *opened_dirs
.begin();
326 ldout(cct
, 1) << "tear_down_cache forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
335 assert(lru
.lru_get_size() == 0);
338 assert(inode_map
.size() <= 1 + root_parents
.size());
339 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
343 while (!root_parents
.empty())
344 root_parents
.erase(root_parents
.begin());
349 assert(inode_map
.empty());
352 inodeno_t
Client::get_root_ino()
354 Mutex::Locker
l(client_lock
);
355 if (use_faked_inos())
356 return root
->faked_ino
;
361 Inode
*Client::get_root()
363 Mutex::Locker
l(client_lock
);
371 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
374 in
->make_long_path(path
);
375 ldout(cct
, 1) << "dump_inode: "
376 << (disconnected
? "DISCONNECTED ":"")
377 << "inode " << in
->ino
379 << " ref " << in
->get_num_ref()
383 f
->open_object_section("inode");
384 f
->dump_stream("path") << path
;
386 f
->dump_int("disconnected", 1);
393 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
394 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
395 it
!= in
->dir
->dentries
.end();
397 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
399 f
->open_object_section("dentry");
403 if (it
->second
->inode
)
404 dump_inode(f
, it
->second
->inode
.get(), did
, false);
409 void Client::dump_cache(Formatter
*f
)
413 ldout(cct
, 1) << "dump_cache" << dendl
;
416 f
->open_array_section("cache");
419 dump_inode(f
, root
, did
, true);
421 // make a second pass to catch anything disconnected
422 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
423 it
!= inode_map
.end();
425 if (did
.count(it
->second
))
427 dump_inode(f
, it
->second
, did
, true);
434 void Client::dump_status(Formatter
*f
)
436 assert(client_lock
.is_locked_by_me());
438 ldout(cct
, 1) << __func__
<< dendl
;
440 const epoch_t osd_epoch
441 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
444 f
->open_object_section("metadata");
445 for (const auto& kv
: metadata
)
446 f
->dump_string(kv
.first
.c_str(), kv
.second
);
449 f
->dump_int("dentry_count", lru
.lru_get_size());
450 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
451 f
->dump_int("id", get_nodeid().v
);
452 f
->dump_int("inode_count", inode_map
.size());
453 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
454 f
->dump_int("osd_epoch", osd_epoch
);
455 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
462 objectcacher
->start();
465 assert(!initialized
);
467 messenger
->add_dispatcher_tail(this);
468 client_lock
.Unlock();
474 void Client::_finish_init()
478 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
479 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
480 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
481 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
482 logger
.reset(plb
.create_perf_counters());
483 cct
->get_perfcounters_collection()->add(logger
.get());
485 client_lock
.Unlock();
487 cct
->_conf
->add_observer(this);
489 AdminSocket
* admin_socket
= cct
->get_admin_socket();
490 int ret
= admin_socket
->register_command("mds_requests",
493 "show in-progress mds requests");
495 lderr(cct
) << "error registering admin socket command: "
496 << cpp_strerror(-ret
) << dendl
;
498 ret
= admin_socket
->register_command("mds_sessions",
501 "show mds session state");
503 lderr(cct
) << "error registering admin socket command: "
504 << cpp_strerror(-ret
) << dendl
;
506 ret
= admin_socket
->register_command("dump_cache",
509 "show in-memory metadata cache contents");
511 lderr(cct
) << "error registering admin socket command: "
512 << cpp_strerror(-ret
) << dendl
;
514 ret
= admin_socket
->register_command("kick_stale_sessions",
515 "kick_stale_sessions",
517 "kick sessions that were remote reset");
519 lderr(cct
) << "error registering admin socket command: "
520 << cpp_strerror(-ret
) << dendl
;
522 ret
= admin_socket
->register_command("status",
525 "show overall client status");
527 lderr(cct
) << "error registering admin socket command: "
528 << cpp_strerror(-ret
) << dendl
;
533 client_lock
.Unlock();
536 void Client::shutdown()
538 ldout(cct
, 1) << "shutdown" << dendl
;
540 // If we were not mounted, but were being used for sending
541 // MDS commands, we may have sessions that need closing.
544 client_lock
.Unlock();
546 cct
->_conf
->remove_observer(this);
548 AdminSocket
* admin_socket
= cct
->get_admin_socket();
549 admin_socket
->unregister_command("mds_requests");
550 admin_socket
->unregister_command("mds_sessions");
551 admin_socket
->unregister_command("dump_cache");
552 admin_socket
->unregister_command("kick_stale_sessions");
553 admin_socket
->unregister_command("status");
555 if (ino_invalidate_cb
) {
556 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
557 async_ino_invalidator
.wait_for_empty();
558 async_ino_invalidator
.stop();
561 if (dentry_invalidate_cb
) {
562 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
563 async_dentry_invalidator
.wait_for_empty();
564 async_dentry_invalidator
.stop();
567 if (switch_interrupt_cb
) {
568 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
569 interrupt_finisher
.wait_for_empty();
570 interrupt_finisher
.stop();
574 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
575 remount_finisher
.wait_for_empty();
576 remount_finisher
.stop();
579 objectcacher
->stop(); // outside of client_lock! this does a join.
585 client_lock
.Unlock();
587 objecter_finisher
.wait_for_empty();
588 objecter_finisher
.stop();
591 cct
->get_perfcounters_collection()->remove(logger
.get());
597 // ===================
598 // metadata cache stuff
600 void Client::trim_cache(bool trim_kernel_dcache
)
602 uint64_t max
= cct
->_conf
->client_cache_size
;
603 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << max
<< dendl
;
605 while (lru
.lru_get_size() != last
) {
606 last
= lru
.lru_get_size();
608 if (!unmounting
&& lru
.lru_get_size() <= max
) break;
611 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
618 if (trim_kernel_dcache
&& lru
.lru_get_size() > max
)
619 _invalidate_kernel_dcache();
622 if (lru
.lru_get_size() == 0 && root
&& root
->get_num_ref() == 0 && inode_map
.size() == 1 + root_parents
.size()) {
623 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
627 while (!root_parents
.empty())
628 root_parents
.erase(root_parents
.begin());
634 void Client::trim_cache_for_reconnect(MetaSession
*s
)
636 mds_rank_t mds
= s
->mds_num
;
637 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
<< dendl
;
640 list
<Dentry
*> skipped
;
641 while (lru
.lru_get_size() > 0) {
642 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
646 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
647 dn
->dir
->parent_inode
->caps
.count(mds
)) {
651 skipped
.push_back(dn
);
654 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
655 lru
.lru_insert_mid(*p
);
657 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
658 << " trimmed " << trimmed
<< " dentries" << dendl
;
660 if (s
->caps
.size() > 0)
661 _invalidate_kernel_dcache();
664 void Client::trim_dentry(Dentry
*dn
)
666 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
667 << " in dir " << hex
<< dn
->dir
->parent_inode
->ino
670 Inode
*diri
= dn
->dir
->parent_inode
;
671 diri
->dir_release_count
++;
672 clear_dir_complete_and_ordered(diri
, true);
674 unlink(dn
, false, false); // drop dir, drop dentry
678 void Client::update_inode_file_bits(Inode
*in
,
679 uint64_t truncate_seq
, uint64_t truncate_size
,
680 uint64_t size
, uint64_t change_attr
,
681 uint64_t time_warp_seq
, utime_t ctime
,
684 version_t inline_version
,
685 bufferlist
& inline_data
,
689 ldout(cct
, 10) << "update_inode_file_bits " << *in
<< " " << ccap_string(issued
)
690 << " mtime " << mtime
<< dendl
;
691 ldout(cct
, 25) << "truncate_seq: mds " << truncate_seq
<< " local "
692 << in
->truncate_seq
<< " time_warp_seq: mds " << time_warp_seq
693 << " local " << in
->time_warp_seq
<< dendl
;
694 uint64_t prior_size
= in
->size
;
696 if (inline_version
> in
->inline_version
) {
697 in
->inline_data
= inline_data
;
698 in
->inline_version
= inline_version
;
701 /* always take a newer change attr */
702 if (change_attr
> in
->change_attr
)
703 in
->change_attr
= change_attr
;
705 if (truncate_seq
> in
->truncate_seq
||
706 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
707 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
709 in
->reported_size
= size
;
710 if (truncate_seq
!= in
->truncate_seq
) {
711 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
712 << truncate_seq
<< dendl
;
713 in
->truncate_seq
= truncate_seq
;
714 in
->oset
.truncate_seq
= truncate_seq
;
716 // truncate cached file data
717 if (prior_size
> size
) {
718 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
722 // truncate inline data
723 if (in
->inline_version
< CEPH_INLINE_NONE
) {
724 uint32_t len
= in
->inline_data
.length();
726 in
->inline_data
.splice(size
, len
- size
);
729 if (truncate_seq
>= in
->truncate_seq
&&
730 in
->truncate_size
!= truncate_size
) {
732 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
733 << truncate_size
<< dendl
;
734 in
->truncate_size
= truncate_size
;
735 in
->oset
.truncate_size
= truncate_size
;
737 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
741 // be careful with size, mtime, atime
742 if (issued
& (CEPH_CAP_FILE_EXCL
|
744 CEPH_CAP_FILE_BUFFER
|
746 CEPH_CAP_XATTR_EXCL
)) {
747 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
748 if (ctime
> in
->ctime
)
750 if (time_warp_seq
> in
->time_warp_seq
) {
751 ldout(cct
, 10) << "mds time_warp_seq " << time_warp_seq
<< " on inode " << *in
752 << " is higher than local time_warp_seq "
753 << in
->time_warp_seq
<< dendl
;
754 //the mds updated times, so take those!
757 in
->time_warp_seq
= time_warp_seq
;
758 } else if (time_warp_seq
== in
->time_warp_seq
) {
760 if (mtime
> in
->mtime
)
762 if (atime
> in
->atime
)
764 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
765 //ignore mds values as we have a higher seq
768 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
769 if (time_warp_seq
>= in
->time_warp_seq
) {
773 in
->time_warp_seq
= time_warp_seq
;
777 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
778 << time_warp_seq
<< " is lower than local time_warp_seq "
784 void Client::_fragmap_remove_non_leaves(Inode
*in
)
786 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
787 if (!in
->dirfragtree
.is_leaf(p
->first
))
788 in
->fragmap
.erase(p
++);
793 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
795 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
796 if (p
->second
== mds
)
797 in
->fragmap
.erase(p
++);
802 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
803 MetaSession
*session
,
804 const UserPerm
& request_perms
)
807 bool was_new
= false;
808 if (inode_map
.count(st
->vino
)) {
809 in
= inode_map
[st
->vino
];
810 ldout(cct
, 12) << "add_update_inode had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
812 in
= new Inode(this, st
->vino
, &st
->layout
);
813 inode_map
[st
->vino
] = in
;
815 if (use_faked_inos())
816 _assign_faked_ino(in
);
822 } else if (!mounted
) {
823 root_parents
[root_ancestor
] = in
;
828 in
->ino
= st
->vino
.ino
;
829 in
->snapid
= st
->vino
.snapid
;
830 in
->mode
= st
->mode
& S_IFMT
;
835 if (in
->is_symlink())
836 in
->symlink
= st
->symlink
;
839 ldout(cct
, 12) << "add_update_inode adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
842 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
844 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
845 bool updating_inode
= false;
847 if (st
->version
== 0 ||
848 (in
->version
& ~1) < st
->version
) {
849 updating_inode
= true;
852 issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
853 issued
|= implemented
;
855 in
->version
= st
->version
;
857 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
861 in
->btime
= st
->btime
;
864 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
865 in
->nlink
= st
->nlink
;
868 in
->dirstat
= st
->dirstat
;
869 in
->rstat
= st
->rstat
;
870 in
->quota
= st
->quota
;
871 in
->layout
= st
->layout
;
874 in
->dir_layout
= st
->dir_layout
;
875 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
878 update_inode_file_bits(in
, st
->truncate_seq
, st
->truncate_size
, st
->size
,
879 st
->change_attr
, st
->time_warp_seq
, st
->ctime
,
880 st
->mtime
, st
->atime
, st
->inline_version
,
881 st
->inline_data
, issued
);
882 } else if (st
->inline_version
> in
->inline_version
) {
883 in
->inline_data
= st
->inline_data
;
884 in
->inline_version
= st
->inline_version
;
887 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
888 st
->xattrbl
.length() &&
889 st
->xattr_version
> in
->xattr_version
) {
890 bufferlist::iterator p
= st
->xattrbl
.begin();
891 ::decode(in
->xattrs
, p
);
892 in
->xattr_version
= st
->xattr_version
;
895 // move me if/when version reflects fragtree changes.
896 if (in
->dirfragtree
!= st
->dirfragtree
) {
897 in
->dirfragtree
= st
->dirfragtree
;
898 _fragmap_remove_non_leaves(in
);
901 if (in
->snapid
== CEPH_NOSNAP
) {
902 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.seq
,
903 st
->cap
.mseq
, inodeno_t(st
->cap
.realm
), st
->cap
.flags
,
905 if (in
->auth_cap
&& in
->auth_cap
->session
== session
)
906 in
->max_size
= st
->max_size
;
908 in
->snap_caps
|= st
->cap
.caps
;
910 // setting I_COMPLETE needs to happen after adding the cap
911 if (updating_inode
&&
913 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
914 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
915 in
->dirstat
.nfiles
== 0 &&
916 in
->dirstat
.nsubdirs
== 0) {
917 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
918 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
920 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
921 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
922 in
->dir
->readdir_cache
.clear();
923 for (auto p
= in
->dir
->dentries
.begin();
924 p
!= in
->dir
->dentries
.end();
926 unlink(p
->second
, true, true); // keep dir, keep dentry
928 if (in
->dir
->dentries
.empty())
938 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
940 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
941 Inode
*in
, utime_t from
, MetaSession
*session
,
945 if (dir
->dentries
.count(dname
))
946 dn
= dir
->dentries
[dname
];
948 ldout(cct
, 12) << "insert_dentry_inode '" << dname
<< "' vino " << in
->vino()
949 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
952 if (dn
&& dn
->inode
) {
953 if (dn
->inode
->vino() == in
->vino()) {
955 ldout(cct
, 12) << " had dentry " << dname
956 << " with correct vino " << dn
->inode
->vino()
959 ldout(cct
, 12) << " had dentry " << dname
960 << " with WRONG vino " << dn
->inode
->vino()
962 unlink(dn
, true, true); // keep dir, keep dentry
966 if (!dn
|| !dn
->inode
) {
967 InodeRef
tmp_ref(in
);
969 if (old_dentry
->dir
!= dir
) {
970 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
971 old_diri
->dir_ordered_count
++;
972 clear_dir_complete_and_ordered(old_diri
, false);
974 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
976 Inode
*diri
= dir
->parent_inode
;
977 diri
->dir_ordered_count
++;
978 clear_dir_complete_and_ordered(diri
, false);
979 dn
= link(dir
, dname
, in
, dn
);
982 update_dentry_lease(dn
, dlease
, from
, session
);
986 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
989 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
993 if (dlease
->mask
& CEPH_LOCK_DN
) {
994 if (dttl
> dn
->lease_ttl
) {
995 ldout(cct
, 10) << "got dentry lease on " << dn
->name
996 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
997 dn
->lease_ttl
= dttl
;
998 dn
->lease_mds
= session
->mds_num
;
999 dn
->lease_seq
= dlease
->seq
;
1000 dn
->lease_gen
= session
->cap_gen
;
1003 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1008 * update MDS location cache for a single inode
1010 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1013 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1014 if (dst
->auth
>= 0) {
1015 in
->fragmap
[dst
->frag
] = dst
->auth
;
1017 in
->fragmap
.erase(dst
->frag
);
1019 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1020 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1021 _fragmap_remove_non_leaves(in
);
1025 in
->dir_replicated
= !dst
->dist
.empty(); // FIXME that's just one frag!
1029 if (!st->dirfrag_dist.empty()) { // FIXME
1030 set<int> dist = st->dirfrag_dist.begin()->second;
1031 if (dist.empty() && !in->dir_contacts.empty())
1032 ldout(cct, 9) << "lost dist spec for " << in->ino
1033 << " " << dist << dendl;
1034 if (!dist.empty() && in->dir_contacts.empty())
1035 ldout(cct, 9) << "got dist spec for " << in->ino
1036 << " " << dist << dendl;
1037 in->dir_contacts = dist;
1042 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1044 if (diri
->flags
& I_COMPLETE
) {
1046 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1047 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1049 if (diri
->flags
& I_DIR_ORDERED
) {
1050 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1051 diri
->flags
&= ~I_DIR_ORDERED
;
1055 diri
->dir
->readdir_cache
.clear();
1060 * insert results from readdir or lssnap into the metadata cache.
1062 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1064 MClientReply
*reply
= request
->reply
;
1065 ConnectionRef con
= request
->reply
->get_connection();
1066 uint64_t features
= con
->get_features();
1068 dir_result_t
*dirp
= request
->dirp
;
1071 // the extra buffer list is only set for readdir and lssnap replies
1072 bufferlist::iterator p
= reply
->get_extra_bl().begin();
1075 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1077 diri
= open_snapdir(diri
);
1080 // only open dir if we're actually adding stuff to it!
1081 Dir
*dir
= diri
->open_dir();
1091 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1092 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1094 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1095 unsigned readdir_offset
= dirp
->next_offset
;
1096 string readdir_start
= dirp
->last_name
;
1097 assert(!readdir_start
.empty() || readdir_offset
== 2);
1099 unsigned last_hash
= 0;
1101 if (!readdir_start
.empty()) {
1102 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1103 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1104 /* mds understands offset_hash */
1105 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1109 if (fg
!= dst
.frag
) {
1110 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1114 readdir_start
.clear();
1115 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1119 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1120 << ", hash_order=" << hash_order
1121 << ", readdir_start " << readdir_start
1122 << ", last_hash " << last_hash
1123 << ", next_offset " << readdir_offset
<< dendl
;
1125 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1126 fg
.is_leftmost() && readdir_offset
== 2 &&
1127 !(hash_order
&& last_hash
)) {
1128 dirp
->release_count
= diri
->dir_release_count
;
1129 dirp
->ordered_count
= diri
->dir_ordered_count
;
1130 dirp
->start_shared_gen
= diri
->shared_gen
;
1131 dirp
->cache_index
= 0;
1134 dirp
->buffer_frag
= fg
;
1136 _readdir_drop_dirp_buffer(dirp
);
1137 dirp
->buffer
.reserve(numdn
);
1141 for (unsigned i
=0; i
<numdn
; i
++) {
1143 ::decode(dlease
, p
);
1144 InodeStat
ist(p
, features
);
1146 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1148 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1151 if (diri
->dir
->dentries
.count(dname
)) {
1152 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1153 if (olddn
->inode
!= in
) {
1154 // replace incorrect dentry
1155 unlink(olddn
, true, true); // keep dir, dentry
1156 dn
= link(dir
, dname
, in
, olddn
);
1157 assert(dn
== olddn
);
1165 dn
= link(dir
, dname
, in
, NULL
);
1168 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1170 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1171 if (hash
!= last_hash
)
1174 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1176 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1178 // add to readdir cache
1179 if (dirp
->release_count
== diri
->dir_release_count
&&
1180 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1181 dirp
->start_shared_gen
== diri
->shared_gen
) {
1182 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1184 assert(!dirp
->inode
->is_complete_and_ordered());
1185 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1187 dir
->readdir_cache
.push_back(dn
);
1188 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1189 if (dirp
->inode
->is_complete_and_ordered())
1190 assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1192 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1194 assert(0 == "unexpected readdir buffer idx");
1196 dirp
->cache_index
++;
1198 // add to cached result list
1199 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, in
));
1200 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1204 dirp
->last_name
= dname
;
1206 dirp
->next_offset
= 2;
1208 dirp
->next_offset
= readdir_offset
;
1210 if (dir
->is_empty())
1217 * insert a trace from a MDS reply into the cache.
1219 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1221 MClientReply
*reply
= request
->reply
;
1222 int op
= request
->get_op();
1224 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1225 << " is_target=" << (int)reply
->head
.is_target
1226 << " is_dentry=" << (int)reply
->head
.is_dentry
1229 bufferlist::iterator p
= reply
->get_trace_bl().begin();
1230 if (request
->got_unsafe
) {
1231 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1237 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1239 Dentry
*d
= request
->dentry();
1241 Inode
*diri
= d
->dir
->parent_inode
;
1242 diri
->dir_release_count
++;
1243 clear_dir_complete_and_ordered(diri
, true);
1246 if (d
&& reply
->get_result() == 0) {
1247 if (op
== CEPH_MDS_OP_RENAME
) {
1249 Dentry
*od
= request
->old_dentry();
1250 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1252 unlink(od
, true, true); // keep dir, dentry
1253 } else if (op
== CEPH_MDS_OP_RMDIR
||
1254 op
== CEPH_MDS_OP_UNLINK
) {
1256 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1257 unlink(d
, true, true); // keep dir, dentry
1263 ConnectionRef con
= request
->reply
->get_connection();
1264 uint64_t features
= con
->get_features();
1265 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1268 SnapRealm
*realm
= NULL
;
1269 if (reply
->snapbl
.length())
1270 update_snap_trace(reply
->snapbl
, &realm
);
1272 ldout(cct
, 10) << " hrm "
1273 << " is_target=" << (int)reply
->head
.is_target
1274 << " is_dentry=" << (int)reply
->head
.is_dentry
1283 if (reply
->head
.is_dentry
) {
1284 dirst
.decode(p
, features
);
1287 ::decode(dlease
, p
);
1291 if (reply
->head
.is_target
) {
1292 ist
.decode(p
, features
);
1293 if (cct
->_conf
->client_debug_getattr_caps
) {
1294 unsigned wanted
= 0;
1295 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1296 wanted
= request
->head
.args
.getattr
.mask
;
1297 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1298 wanted
= request
->head
.args
.open
.mask
;
1300 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1301 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1302 assert(0 == "MDS reply does not contain xattrs");
1305 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1310 if (reply
->head
.is_dentry
) {
1311 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1313 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1316 Dir
*dir
= diri
->open_dir();
1317 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1318 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1321 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1322 dn
= diri
->dir
->dentries
[dname
];
1324 diri
->dir_ordered_count
++;
1325 clear_dir_complete_and_ordered(diri
, false);
1326 unlink(dn
, true, true); // keep dir, dentry
1329 if (dlease
.duration_ms
> 0) {
1331 Dir
*dir
= diri
->open_dir();
1332 dn
= link(dir
, dname
, NULL
, NULL
);
1334 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1337 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1338 op
== CEPH_MDS_OP_MKSNAP
) {
1339 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1340 // fake it for snap lookup
1341 vinodeno_t vino
= ist
.vino
;
1342 vino
.snapid
= CEPH_SNAPDIR
;
1343 assert(inode_map
.count(vino
));
1344 diri
= inode_map
[vino
];
1346 string dname
= request
->path
.last_dentry();
1349 dlease
.duration_ms
= 0;
1352 Dir
*dir
= diri
->open_dir();
1353 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1355 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1356 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1358 unlink(dn
, true, true); // keep dir, dentry
1364 if (op
== CEPH_MDS_OP_READDIR
||
1365 op
== CEPH_MDS_OP_LSSNAP
) {
1366 insert_readdir_results(request
, session
, in
);
1367 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1368 // hack: return parent inode instead
1372 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1373 // pin the target inode if its parent dentry is not pinned
1374 request
->set_other_inode(in
);
1379 put_snap_realm(realm
);
1381 request
->target
= in
;
1387 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1389 mds_rank_t mds
= MDS_RANK_NONE
;
1391 bool is_hash
= false;
1397 if (req
->resend_mds
>= 0) {
1398 mds
= req
->resend_mds
;
1399 req
->resend_mds
= -1;
1400 ldout(cct
, 10) << "choose_target_mds resend_mds specified as mds." << mds
<< dendl
;
1404 if (cct
->_conf
->client_use_random_mds
)
1410 ldout(cct
, 20) << "choose_target_mds starting with req->inode " << *in
<< dendl
;
1411 if (req
->path
.depth()) {
1412 hash
= in
->hash_dentry_name(req
->path
[0]);
1413 ldout(cct
, 20) << "choose_target_mds inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1414 << " on " << req
->path
[0]
1415 << " => " << hash
<< dendl
;
1420 in
= de
->inode
.get();
1421 ldout(cct
, 20) << "choose_target_mds starting with req->dentry inode " << *in
<< dendl
;
1423 in
= de
->dir
->parent_inode
;
1424 hash
= in
->hash_dentry_name(de
->name
);
1425 ldout(cct
, 20) << "choose_target_mds dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1426 << " on " << de
->name
1427 << " => " << hash
<< dendl
;
1432 if (in
->snapid
!= CEPH_NOSNAP
) {
1433 ldout(cct
, 10) << "choose_target_mds " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1434 while (in
->snapid
!= CEPH_NOSNAP
) {
1435 if (in
->snapid
== CEPH_SNAPDIR
)
1436 in
= in
->snapdir_parent
.get();
1437 else if (!in
->dn_set
.empty())
1438 /* In most cases there will only be one dentry, so getting it
1439 * will be the correct action. If there are multiple hard links,
1440 * I think the MDS should be able to redirect as needed*/
1441 in
= in
->get_first_parent()->dir
->parent_inode
;
1443 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1450 ldout(cct
, 20) << "choose_target_mds " << *in
<< " is_hash=" << is_hash
1451 << " hash=" << hash
<< dendl
;
1453 if (is_hash
&& S_ISDIR(in
->mode
) && !in
->fragmap
.empty()) {
1454 frag_t fg
= in
->dirfragtree
[hash
];
1455 if (in
->fragmap
.count(fg
)) {
1456 mds
= in
->fragmap
[fg
];
1459 ldout(cct
, 10) << "choose_target_mds from dirfragtree hash" << dendl
;
1464 if (req
->auth_is_best())
1466 if (!cap
&& !in
->caps
.empty())
1467 cap
= in
->caps
.begin()->second
;
1470 mds
= cap
->session
->mds_num
;
1471 ldout(cct
, 10) << "choose_target_mds from caps on inode " << *in
<< dendl
;
1478 mds
= _get_random_up_mds();
1479 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1483 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1488 void Client::connect_mds_targets(mds_rank_t mds
)
1490 ldout(cct
, 10) << "connect_mds_targets for mds." << mds
<< dendl
;
1491 assert(mds_sessions
.count(mds
));
1492 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1493 for (set
<mds_rank_t
>::const_iterator q
= info
.export_targets
.begin();
1494 q
!= info
.export_targets
.end();
1496 if (mds_sessions
.count(*q
) == 0 &&
1497 mdsmap
->is_clientreplay_or_active_or_stopping(*q
)) {
1498 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1499 << " export target mds." << *q
<< dendl
;
1500 _open_mds_session(*q
);
1505 void Client::dump_mds_sessions(Formatter
*f
)
1507 f
->dump_int("id", get_nodeid().v
);
1508 f
->open_array_section("sessions");
1509 for (map
<mds_rank_t
,MetaSession
*>::const_iterator p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ++p
) {
1510 f
->open_object_section("session");
1515 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1517 void Client::dump_mds_requests(Formatter
*f
)
1519 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1520 p
!= mds_requests
.end();
1522 f
->open_object_section("request");
1528 int Client::verify_reply_trace(int r
,
1529 MetaRequest
*request
, MClientReply
*reply
,
1530 InodeRef
*ptarget
, bool *pcreated
,
1531 const UserPerm
& perms
)
1533 // check whether this request actually did the create, and set created flag
1534 bufferlist extra_bl
;
1535 inodeno_t created_ino
;
1536 bool got_created_ino
= false;
1537 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1539 extra_bl
.claim(reply
->get_extra_bl());
1540 if (extra_bl
.length() >= 8) {
1541 // if the extra bufferlist has a buffer, we assume its the created inode
1542 // and that this request to create succeeded in actually creating
1543 // the inode (won the race with other create requests)
1544 ::decode(created_ino
, extra_bl
);
1545 got_created_ino
= true;
1546 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1550 *pcreated
= got_created_ino
;
1552 if (request
->target
) {
1553 *ptarget
= request
->target
;
1554 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1556 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1557 (*ptarget
) = p
->second
;
1558 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1560 // we got a traceless reply, and need to look up what we just
1561 // created. for now, do this by name. someday, do this by the
1562 // ino... which we know! FIXME.
1564 Dentry
*d
= request
->dentry();
1567 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1568 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1569 << " got_ino " << got_created_ino
1570 << " ino " << created_ino
1572 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1575 // if the dentry is not linked, just do our best. see #5021.
1576 assert(0 == "how did this happen? i want logs!");
1579 Inode
*in
= request
->inode();
1580 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1581 << in
->ino
<< dendl
;
1582 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1586 // verify ino returned in reply and trace_dist are the same
1587 if (got_created_ino
&&
1588 created_ino
.val
!= target
->ino
.val
) {
1589 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1593 ptarget
->swap(target
);
1605 * Blocking helper to make an MDS request.
1607 * If the ptarget flag is set, behavior changes slightly: the caller
1608 * expects to get a pointer to the inode we are creating or operating
1609 * on. As a result, we will follow up any traceless mutation reply
1610 * with a getattr or lookup to transparently handle a traceless reply
1611 * from the MDS (as when the MDS restarts and the client has to replay
1614 * @param request the MetaRequest to execute
1615 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1616 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1617 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1618 * @param use_mds [optional] prefer a specific mds (-1 for default)
1619 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1621 int Client::make_request(MetaRequest
*request
,
1622 const UserPerm
& perms
,
1623 InodeRef
*ptarget
, bool *pcreated
,
1629 // assign a unique tid
1630 ceph_tid_t tid
= ++last_tid
;
1631 request
->set_tid(tid
);
1634 request
->op_stamp
= ceph_clock_now();
1637 mds_requests
[tid
] = request
->get();
1638 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1641 request
->set_caller_perms(perms
);
1643 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1644 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1645 request
->set_oldest_client_tid(1);
1647 request
->set_oldest_client_tid(oldest_tid
);
1652 request
->resend_mds
= use_mds
;
1655 if (request
->aborted())
1659 request
->abort(-EBLACKLISTED
);
1665 request
->caller_cond
= &caller_cond
;
1668 Inode
*hash_diri
= NULL
;
1669 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1670 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1671 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1672 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1674 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1675 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1677 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1678 request
->resend_mds
= _get_random_up_mds();
1681 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1682 wait_on_list(waiting_for_mdsmap
);
1688 MetaSession
*session
= NULL
;
1689 if (!have_open_session(mds
)) {
1690 session
= _get_or_open_mds_session(mds
);
1693 if (session
->state
== MetaSession::STATE_OPENING
) {
1694 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1695 wait_on_context_list(session
->waiting_for_open
);
1696 // Abort requests on REJECT from MDS
1697 if (rejected_by_mds
.count(mds
)) {
1698 request
->abort(-EPERM
);
1704 if (!have_open_session(mds
))
1707 session
= mds_sessions
[mds
];
1711 send_request(request
, session
);
1714 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1715 request
->kick
= false;
1716 while (!request
->reply
&& // reply
1717 request
->resend_mds
< 0 && // forward
1719 caller_cond
.Wait(client_lock
);
1720 request
->caller_cond
= NULL
;
1722 // did we get a reply?
1727 if (!request
->reply
) {
1728 assert(request
->aborted());
1729 assert(!request
->got_unsafe
);
1730 r
= request
->get_abort_code();
1731 request
->item
.remove_myself();
1732 unregister_request(request
);
1733 put_request(request
); // ours
1738 MClientReply
*reply
= request
->reply
;
1739 request
->reply
= NULL
;
1740 r
= reply
->get_result();
1742 request
->success
= true;
1744 // kick dispatcher (we've got it!)
1745 assert(request
->dispatch_cond
);
1746 request
->dispatch_cond
->Signal();
1747 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1748 request
->dispatch_cond
= 0;
1750 if (r
>= 0 && ptarget
)
1751 r
= verify_reply_trace(r
, request
, reply
, ptarget
, pcreated
, perms
);
1754 pdirbl
->claim(reply
->get_extra_bl());
1757 utime_t lat
= ceph_clock_now();
1758 lat
-= request
->sent_stamp
;
1759 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1760 logger
->tinc(l_c_lat
, lat
);
1761 logger
->tinc(l_c_reply
, lat
);
1763 put_request(request
);
1769 void Client::unregister_request(MetaRequest
*req
)
1771 mds_requests
.erase(req
->tid
);
1772 if (req
->tid
== oldest_tid
) {
1773 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1775 if (p
== mds_requests
.end()) {
1779 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1780 oldest_tid
= p
->first
;
1789 void Client::put_request(MetaRequest
*request
)
1791 if (request
->_put()) {
1793 if (request
->success
)
1794 op
= request
->get_op();
1796 request
->take_other_inode(&other_in
);
1800 (op
== CEPH_MDS_OP_RMDIR
||
1801 op
== CEPH_MDS_OP_RENAME
||
1802 op
== CEPH_MDS_OP_RMSNAP
)) {
1803 _try_to_trim_inode(other_in
.get(), false);
1808 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1809 mds_rank_t mds
, int drop
,
1810 int unless
, int force
)
1812 ldout(cct
, 20) << "encode_inode_release enter(in:" << *in
<< ", req:" << req
1813 << " mds:" << mds
<< ", drop:" << drop
<< ", unless:" << unless
1814 << ", have:" << ", force:" << force
<< ")" << dendl
;
1816 if (in
->caps
.count(mds
)) {
1817 Cap
*caps
= in
->caps
[mds
];
1818 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1819 if ((drop
& caps
->issued
) &&
1820 !(unless
& caps
->issued
)) {
1821 ldout(cct
, 25) << "Dropping caps. Initial " << ccap_string(caps
->issued
) << dendl
;
1822 caps
->issued
&= ~drop
;
1823 caps
->implemented
&= ~drop
;
1825 ldout(cct
, 25) << "Now have: " << ccap_string(caps
->issued
) << dendl
;
1830 ceph_mds_request_release rel
;
1832 rel
.cap_id
= caps
->cap_id
;
1833 rel
.seq
= caps
->seq
;
1834 rel
.issue_seq
= caps
->issue_seq
;
1835 rel
.mseq
= caps
->mseq
;
1836 rel
.caps
= caps
->implemented
;
1837 rel
.wanted
= caps
->wanted
;
1840 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1843 ldout(cct
, 25) << "encode_inode_release exit(in:" << *in
<< ") released:"
1844 << released
<< dendl
;
1848 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1849 mds_rank_t mds
, int drop
, int unless
)
1851 ldout(cct
, 20) << "encode_dentry_release enter(dn:"
1852 << dn
<< ")" << dendl
;
1855 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
1856 mds
, drop
, unless
, 1);
1857 if (released
&& dn
->lease_mds
== mds
) {
1858 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
1859 MClientRequest::Release
& rel
= req
->cap_releases
.back();
1860 rel
.item
.dname_len
= dn
->name
.length();
1861 rel
.item
.dname_seq
= dn
->lease_seq
;
1862 rel
.dname
= dn
->name
;
1864 ldout(cct
, 25) << "encode_dentry_release exit(dn:"
1865 << dn
<< ")" << dendl
;
1870 * This requires the MClientRequest *request member to be set.
1871 * It will error out horribly without one.
1872 * Additionally, if you set any *drop member, you'd better have
1873 * set the corresponding dentry!
1875 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
1877 ldout(cct
, 20) << "encode_cap_releases enter (req: "
1878 << req
<< ", mds: " << mds
<< ")" << dendl
;
1879 if (req
->inode_drop
&& req
->inode())
1880 encode_inode_release(req
->inode(), req
,
1881 mds
, req
->inode_drop
,
1884 if (req
->old_inode_drop
&& req
->old_inode())
1885 encode_inode_release(req
->old_inode(), req
,
1886 mds
, req
->old_inode_drop
,
1887 req
->old_inode_unless
);
1888 if (req
->other_inode_drop
&& req
->other_inode())
1889 encode_inode_release(req
->other_inode(), req
,
1890 mds
, req
->other_inode_drop
,
1891 req
->other_inode_unless
);
1893 if (req
->dentry_drop
&& req
->dentry())
1894 encode_dentry_release(req
->dentry(), req
,
1895 mds
, req
->dentry_drop
,
1896 req
->dentry_unless
);
1898 if (req
->old_dentry_drop
&& req
->old_dentry())
1899 encode_dentry_release(req
->old_dentry(), req
,
1900 mds
, req
->old_dentry_drop
,
1901 req
->old_dentry_unless
);
1902 ldout(cct
, 25) << "encode_cap_releases exit (req: "
1903 << req
<< ", mds " << mds
<<dendl
;
1906 bool Client::have_open_session(mds_rank_t mds
)
1909 mds_sessions
.count(mds
) &&
1910 (mds_sessions
[mds
]->state
== MetaSession::STATE_OPEN
||
1911 mds_sessions
[mds
]->state
== MetaSession::STATE_STALE
);
1914 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
1916 if (mds_sessions
.count(mds
) == 0)
1918 MetaSession
*s
= mds_sessions
[mds
];
1924 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
1926 if (mds_sessions
.count(mds
))
1927 return mds_sessions
[mds
];
1928 return _open_mds_session(mds
);
1932 * Populate a map of strings with client-identifying metadata,
1933 * such as the hostname. Call this once at initialization.
1935 void Client::populate_metadata(const std::string
&mount_root
)
1941 metadata
["hostname"] = u
.nodename
;
1942 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
1944 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
1947 metadata
["pid"] = stringify(getpid());
1949 // Ceph entity id (the '0' in "client.0")
1950 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
1952 // Our mount position
1953 if (!mount_root
.empty()) {
1954 metadata
["root"] = mount_root
;
1958 metadata
["ceph_version"] = pretty_version_to_str();
1959 metadata
["ceph_sha1"] = git_version_to_str();
1961 // Apply any metadata from the user's configured overrides
1962 std::vector
<std::string
> tokens
;
1963 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
1964 for (const auto &i
: tokens
) {
1965 auto eqpos
= i
.find("=");
1966 // Throw out anything that isn't of the form "<str>=<str>"
1967 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
1968 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
1971 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
1976 * Optionally add or override client metadata fields.
1978 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
1980 Mutex::Locker
l(client_lock
);
1981 assert(initialized
);
1983 if (metadata
.count(k
)) {
1984 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
1985 << "' from '" << metadata
[k
] << "' to '" << v
<< "'" << dendl
;
1991 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
1993 ldout(cct
, 10) << "_open_mds_session mds." << mds
<< dendl
;
1994 assert(mds_sessions
.count(mds
) == 0);
1995 MetaSession
*session
= new MetaSession
;
1996 session
->mds_num
= mds
;
1998 session
->inst
= mdsmap
->get_inst(mds
);
1999 session
->con
= messenger
->get_connection(session
->inst
);
2000 session
->state
= MetaSession::STATE_OPENING
;
2001 session
->mds_state
= MDSMap::STATE_NULL
;
2002 mds_sessions
[mds
] = session
;
2004 // Maybe skip sending a request to open if this MDS daemon
2005 // has previously sent us a REJECT.
2006 if (rejected_by_mds
.count(mds
)) {
2007 if (rejected_by_mds
[mds
] == session
->inst
) {
2008 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " skipping "
2009 "because we were rejected" << dendl
;
2012 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " old inst "
2013 "rejected us, trying with new inst" << dendl
;
2014 rejected_by_mds
.erase(mds
);
2018 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_OPEN
);
2019 m
->client_meta
= metadata
;
2020 session
->con
->send_message(m
);
2024 void Client::_close_mds_session(MetaSession
*s
)
2026 ldout(cct
, 2) << "_close_mds_session mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2027 s
->state
= MetaSession::STATE_CLOSING
;
2028 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2031 void Client::_closed_mds_session(MetaSession
*s
)
2033 s
->state
= MetaSession::STATE_CLOSED
;
2034 s
->con
->mark_down();
2035 signal_context_list(s
->waiting_for_open
);
2036 mount_cond
.Signal();
2037 remove_session_caps(s
);
2038 kick_requests_closed(s
);
2039 mds_sessions
.erase(s
->mds_num
);
2043 void Client::handle_client_session(MClientSession
*m
)
2045 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2046 ldout(cct
, 10) << "handle_client_session " << *m
<< " from mds." << from
<< dendl
;
2048 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2050 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2055 switch (m
->get_op()) {
2056 case CEPH_SESSION_OPEN
:
2057 renew_caps(session
);
2058 session
->state
= MetaSession::STATE_OPEN
;
2060 mount_cond
.Signal();
2062 connect_mds_targets(from
);
2063 signal_context_list(session
->waiting_for_open
);
2066 case CEPH_SESSION_CLOSE
:
2067 _closed_mds_session(session
);
2070 case CEPH_SESSION_RENEWCAPS
:
2071 if (session
->cap_renew_seq
== m
->get_seq()) {
2073 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2074 wake_inode_waiters(session
);
2078 case CEPH_SESSION_STALE
:
2079 renew_caps(session
);
2082 case CEPH_SESSION_RECALL_STATE
:
2083 trim_caps(session
, m
->get_max_caps());
2086 case CEPH_SESSION_FLUSHMSG
:
2087 session
->con
->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2090 case CEPH_SESSION_FORCE_RO
:
2091 force_session_readonly(session
);
2094 case CEPH_SESSION_REJECT
:
2095 rejected_by_mds
[session
->mds_num
] = session
->inst
;
2096 _closed_mds_session(session
);
2107 bool Client::_any_stale_sessions() const
2109 assert(client_lock
.is_locked_by_me());
2111 for (const auto &i
: mds_sessions
) {
2112 if (i
.second
->state
== MetaSession::STATE_STALE
) {
2120 void Client::_kick_stale_sessions()
2122 ldout(cct
, 1) << "kick_stale_sessions" << dendl
;
2124 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2125 p
!= mds_sessions
.end(); ) {
2126 MetaSession
*s
= p
->second
;
2128 if (s
->state
== MetaSession::STATE_STALE
)
2129 _closed_mds_session(s
);
2133 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2134 bool drop_cap_releases
)
2137 mds_rank_t mds
= session
->mds_num
;
2138 ldout(cct
, 10) << "send_request rebuilding request " << request
->get_tid()
2139 << " for mds." << mds
<< dendl
;
2140 MClientRequest
*r
= build_client_request(request
);
2141 if (request
->dentry()) {
2142 r
->set_dentry_wanted();
2144 if (request
->got_unsafe
) {
2145 r
->set_replayed_op();
2146 if (request
->target
)
2147 r
->head
.ino
= request
->target
->ino
;
2149 encode_cap_releases(request
, mds
);
2150 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2151 request
->cap_releases
.clear();
2153 r
->releases
.swap(request
->cap_releases
);
2155 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2156 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2157 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2158 r
->set_osdmap_epoch(o
.get_epoch());
2162 if (request
->mds
== -1) {
2163 request
->sent_stamp
= ceph_clock_now();
2164 ldout(cct
, 20) << "send_request set sent_stamp to " << request
->sent_stamp
<< dendl
;
2168 Inode
*in
= request
->inode();
2169 if (in
&& in
->caps
.count(mds
))
2170 request
->sent_on_mseq
= in
->caps
[mds
]->mseq
;
2172 session
->requests
.push_back(&request
->item
);
2174 ldout(cct
, 10) << "send_request " << *r
<< " to mds." << mds
<< dendl
;
2175 session
->con
->send_message(r
);
2178 MClientRequest
* Client::build_client_request(MetaRequest
*request
)
2180 MClientRequest
*req
= new MClientRequest(request
->get_op());
2181 req
->set_tid(request
->tid
);
2182 req
->set_stamp(request
->op_stamp
);
2183 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2185 // if the filepath's haven't been set, set them!
2186 if (request
->path
.empty()) {
2187 Inode
*in
= request
->inode();
2188 Dentry
*de
= request
->dentry();
2190 in
->make_nosnap_relative_path(request
->path
);
2193 de
->inode
->make_nosnap_relative_path(request
->path
);
2195 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2196 request
->path
.push_dentry(de
->name
);
2198 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2199 << " No path, inode, or appropriately-endowed dentry given!"
2201 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2202 << " No path, inode, or dentry given!"
2205 req
->set_filepath(request
->get_filepath());
2206 req
->set_filepath2(request
->get_filepath2());
2207 req
->set_data(request
->data
);
2208 req
->set_retry_attempt(request
->retry_attempt
++);
2209 req
->head
.num_fwd
= request
->num_fwd
;
2211 int gid_count
= request
->perms
.get_gids(&_gids
);
2212 req
->set_gid_list(gid_count
, _gids
);
2218 void Client::handle_client_request_forward(MClientRequestForward
*fwd
)
2220 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2221 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2226 ceph_tid_t tid
= fwd
->get_tid();
2228 if (mds_requests
.count(tid
) == 0) {
2229 ldout(cct
, 10) << "handle_client_request_forward no pending request on tid " << tid
<< dendl
;
2234 MetaRequest
*request
= mds_requests
[tid
];
2237 // reset retry counter
2238 request
->retry_attempt
= 0;
2240 // request not forwarded, or dest mds has no session.
2242 ldout(cct
, 10) << "handle_client_request tid " << tid
2243 << " fwd " << fwd
->get_num_fwd()
2244 << " to mds." << fwd
->get_dest_mds()
2245 << ", resending to " << fwd
->get_dest_mds()
2249 request
->item
.remove_myself();
2250 request
->num_fwd
= fwd
->get_num_fwd();
2251 request
->resend_mds
= fwd
->get_dest_mds();
2252 request
->caller_cond
->Signal();
2257 bool Client::is_dir_operation(MetaRequest
*req
)
2259 int op
= req
->get_op();
2260 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2261 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2262 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2263 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2268 void Client::handle_client_reply(MClientReply
*reply
)
2270 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2271 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2277 ceph_tid_t tid
= reply
->get_tid();
2278 bool is_safe
= reply
->is_safe();
2280 if (mds_requests
.count(tid
) == 0) {
2281 lderr(cct
) << "handle_client_reply no pending request on tid " << tid
2282 << " safe is:" << is_safe
<< dendl
;
2286 MetaRequest
*request
= mds_requests
.at(tid
);
2288 ldout(cct
, 20) << "handle_client_reply got a reply. Safe:" << is_safe
2289 << " tid " << tid
<< dendl
;
2291 if (request
->got_unsafe
&& !is_safe
) {
2292 //duplicate response
2293 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2294 << mds_num
<< " safe:" << is_safe
<< dendl
;
2299 if (-ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2300 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2301 << " from mds." << request
->mds
<< dendl
;
2302 request
->send_to_auth
= true;
2303 request
->resend_mds
= choose_target_mds(request
);
2304 Inode
*in
= request
->inode();
2305 if (request
->resend_mds
>= 0 &&
2306 request
->resend_mds
== request
->mds
&&
2308 in
->caps
.count(request
->resend_mds
) == 0 ||
2309 request
->sent_on_mseq
== in
->caps
[request
->resend_mds
]->mseq
)) {
2310 // have to return ESTALE
2312 request
->caller_cond
->Signal();
2316 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2319 assert(request
->reply
== NULL
);
2320 request
->reply
= reply
;
2321 insert_trace(request
, session
);
2323 // Handle unsafe reply
2325 request
->got_unsafe
= true;
2326 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2327 if (is_dir_operation(request
)) {
2328 Inode
*dir
= request
->inode();
2330 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2332 if (request
->target
) {
2333 InodeRef
&in
= request
->target
;
2334 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2338 // Only signal the caller once (on the first reply):
2339 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2340 if (!is_safe
|| !request
->got_unsafe
) {
2342 request
->dispatch_cond
= &cond
;
2345 ldout(cct
, 20) << "handle_client_reply signalling caller " << (void*)request
->caller_cond
<< dendl
;
2346 request
->caller_cond
->Signal();
2348 // wake for kick back
2349 while (request
->dispatch_cond
) {
2350 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid " << tid
<< " " << &cond
<< dendl
;
2351 cond
.Wait(client_lock
);
2356 // the filesystem change is committed to disk
2357 // we're done, clean up
2358 if (request
->got_unsafe
) {
2359 request
->unsafe_item
.remove_myself();
2360 request
->unsafe_dir_item
.remove_myself();
2361 request
->unsafe_target_item
.remove_myself();
2362 signal_cond_list(request
->waitfor_safe
);
2364 request
->item
.remove_myself();
2365 unregister_request(request
);
2368 mount_cond
.Signal();
2371 void Client::_handle_full_flag(int64_t pool
)
2373 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2374 << "on " << pool
<< dendl
;
2375 // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
2376 // to do this rather than blocking, because otherwise when we fill up we
2377 // potentially lock caps forever on files with dirty pages, and we need
2378 // to be able to release those caps to the MDS so that it can delete files
2379 // and free up space.
2380 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-ENOSPC
, pool
);
2382 // For all inodes with layouts in this pool and a pending flush write op
2383 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2384 // from ObjectCacher so that it doesn't re-issue the write in response to
2385 // the ENOSPC error.
2386 // Fortunately since we're cancelling everything in a given pool, we don't
2387 // need to know which ops belong to which ObjectSet, we can just blow all
2388 // the un-flushed cached data away and mark any dirty inodes' async_err
2389 // field with -ENOSPC as long as we're sure all the ops we cancelled were
2390 // affecting this pool, and all the objectsets we're purging were also
2392 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2393 i
!= inode_map
.end(); ++i
)
2395 Inode
*inode
= i
->second
;
2396 if (inode
->oset
.dirty_or_tx
2397 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2398 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2399 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2400 objectcacher
->purge_set(&inode
->oset
);
2401 inode
->set_async_err(-ENOSPC
);
2405 if (cancelled_epoch
!= (epoch_t
)-1) {
2406 set_cap_epoch_barrier(cancelled_epoch
);
2410 void Client::handle_osd_map(MOSDMap
*m
)
2412 std::set
<entity_addr_t
> new_blacklists
;
2413 objecter
->consume_blacklist_events(&new_blacklists
);
2415 const auto myaddr
= messenger
->get_myaddr();
2416 if (!blacklisted
&& new_blacklists
.count(myaddr
)) {
2417 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2418 return o
.get_epoch();
2420 lderr(cct
) << "I was blacklisted at osd epoch " << epoch
<< dendl
;
2422 for (std::map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2423 p
!= mds_requests
.end(); ) {
2424 auto req
= p
->second
;
2426 req
->abort(-EBLACKLISTED
);
2427 if (req
->caller_cond
) {
2429 req
->caller_cond
->Signal();
2433 // Progress aborts on any requests that were on this waitlist. Any
2434 // requests that were on a waiting_for_open session waitlist
2435 // will get kicked during close session below.
2436 signal_cond_list(waiting_for_mdsmap
);
2438 // Force-close all sessions: assume this is not abandoning any state
2439 // on the MDS side because the MDS will have seen the blacklist too.
2440 while(!mds_sessions
.empty()) {
2441 auto i
= mds_sessions
.begin();
2442 auto session
= i
->second
;
2443 _closed_mds_session(session
);
2446 // Since we know all our OSD ops will fail, cancel them all preemtively,
2447 // so that on an unhealthy cluster we can umount promptly even if e.g.
2448 // some PGs were inaccessible.
2449 objecter
->op_cancel_writes(-EBLACKLISTED
);
2451 } else if (blacklisted
) {
2452 // Handle case where we were blacklisted but no longer are
2453 blacklisted
= objecter
->with_osdmap([myaddr
](const OSDMap
&o
){
2454 return o
.is_blacklisted(myaddr
);});
2457 if (objecter
->osdmap_full_flag()) {
2458 _handle_full_flag(-1);
2460 // Accumulate local list of full pools so that I can drop
2461 // the objecter lock before re-entering objecter in
2463 std::vector
<int64_t> full_pools
;
2465 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2466 for (const auto& kv
: o
.get_pools()) {
2467 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2468 full_pools
.push_back(kv
.first
);
2473 for (auto p
: full_pools
)
2474 _handle_full_flag(p
);
2476 // Subscribe to subsequent maps to watch for the full flag going
2477 // away. For the global full flag objecter does this for us, but
2478 // it pays no attention to the per-pool full flag so in this branch
2479 // we do it ourselves.
2480 if (!full_pools
.empty()) {
2481 objecter
->maybe_request_map();
2489 // ------------------------
2490 // incoming messages
2493 bool Client::ms_dispatch(Message
*m
)
2495 Mutex::Locker
l(client_lock
);
2497 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2502 switch (m
->get_type()) {
2503 // mounting and mds sessions
2504 case CEPH_MSG_MDS_MAP
:
2505 handle_mds_map(static_cast<MMDSMap
*>(m
));
2507 case CEPH_MSG_FS_MAP
:
2508 handle_fs_map(static_cast<MFSMap
*>(m
));
2510 case CEPH_MSG_FS_MAP_USER
:
2511 handle_fs_map_user(static_cast<MFSMapUser
*>(m
));
2513 case CEPH_MSG_CLIENT_SESSION
:
2514 handle_client_session(static_cast<MClientSession
*>(m
));
2517 case CEPH_MSG_OSD_MAP
:
2518 handle_osd_map(static_cast<MOSDMap
*>(m
));
2522 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2523 handle_client_request_forward(static_cast<MClientRequestForward
*>(m
));
2525 case CEPH_MSG_CLIENT_REPLY
:
2526 handle_client_reply(static_cast<MClientReply
*>(m
));
2529 case CEPH_MSG_CLIENT_SNAP
:
2530 handle_snap(static_cast<MClientSnap
*>(m
));
2532 case CEPH_MSG_CLIENT_CAPS
:
2533 handle_caps(static_cast<MClientCaps
*>(m
));
2535 case CEPH_MSG_CLIENT_LEASE
:
2536 handle_lease(static_cast<MClientLease
*>(m
));
2538 case MSG_COMMAND_REPLY
:
2539 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2540 handle_command_reply(static_cast<MCommandReply
*>(m
));
2545 case CEPH_MSG_CLIENT_QUOTA
:
2546 handle_quota(static_cast<MClientQuota
*>(m
));
2555 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2556 << "+" << inode_map
.size() << dendl
;
2557 long unsigned size
= lru
.lru_get_size() + inode_map
.size();
2559 if (size
< lru
.lru_get_size() + inode_map
.size()) {
2560 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2561 mount_cond
.Signal();
2563 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2564 << "+" << inode_map
.size() << dendl
;
2571 void Client::handle_fs_map(MFSMap
*m
)
2573 fsmap
.reset(new FSMap(m
->get_fsmap()));
2576 signal_cond_list(waiting_for_fsmap
);
2578 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2581 void Client::handle_fs_map_user(MFSMapUser
*m
)
2583 fsmap_user
.reset(new FSMapUser
);
2584 *fsmap_user
= m
->get_fsmap();
2587 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2588 signal_cond_list(waiting_for_fsmap
);
2591 void Client::handle_mds_map(MMDSMap
* m
)
2593 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2594 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch()
2595 << " is identical to or older than our "
2596 << mdsmap
->get_epoch() << dendl
;
2601 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch() << dendl
;
2603 std::unique_ptr
<MDSMap
> oldmap(new MDSMap
);
2604 oldmap
.swap(mdsmap
);
2606 mdsmap
->decode(m
->get_encoded());
2608 // Cancel any commands for missing or laggy GIDs
2609 std::list
<ceph_tid_t
> cancel_ops
;
2610 auto &commands
= command_table
.get_commands();
2611 for (const auto &i
: commands
) {
2612 auto &op
= i
.second
;
2613 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2614 if (mdsmap
->is_dne_gid(op_mds_gid
) || mdsmap
->is_laggy_gid(op_mds_gid
)) {
2615 ldout(cct
, 1) << __func__
<< ": cancelling command op " << i
.first
<< dendl
;
2616 cancel_ops
.push_back(i
.first
);
2618 std::ostringstream ss
;
2619 ss
<< "MDS " << op_mds_gid
<< " went away";
2620 *(op
.outs
) = ss
.str();
2622 op
.con
->mark_down();
2624 op
.on_finish
->complete(-ETIMEDOUT
);
2629 for (std::list
<ceph_tid_t
>::iterator i
= cancel_ops
.begin();
2630 i
!= cancel_ops
.end(); ++i
) {
2631 command_table
.erase(*i
);
2635 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2636 p
!= mds_sessions
.end(); ) {
2637 mds_rank_t mds
= p
->first
;
2638 MetaSession
*session
= p
->second
;
2641 int oldstate
= oldmap
->get_state(mds
);
2642 int newstate
= mdsmap
->get_state(mds
);
2643 if (!mdsmap
->is_up(mds
)) {
2644 session
->con
->mark_down();
2645 } else if (mdsmap
->get_inst(mds
) != session
->inst
) {
2646 session
->con
->mark_down();
2647 session
->inst
= mdsmap
->get_inst(mds
);
2648 // When new MDS starts to take over, notify kernel to trim unused entries
2649 // in its dcache/icache. Hopefully, the kernel will release some unused
2650 // inodes before the new MDS enters reconnect state.
2651 trim_cache_for_reconnect(session
);
2652 } else if (oldstate
== newstate
)
2653 continue; // no change
2655 session
->mds_state
= newstate
;
2656 if (newstate
== MDSMap::STATE_RECONNECT
) {
2657 session
->con
= messenger
->get_connection(session
->inst
);
2658 send_reconnect(session
);
2659 } else if (newstate
>= MDSMap::STATE_ACTIVE
) {
2660 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2661 // kick new requests
2662 kick_requests(session
);
2663 kick_flushing_caps(session
);
2664 signal_context_list(session
->waiting_for_open
);
2665 kick_maxsize_requests(session
);
2666 wake_inode_waiters(session
);
2668 connect_mds_targets(mds
);
2669 } else if (newstate
== MDSMap::STATE_NULL
&&
2670 mds
>= mdsmap
->get_max_mds()) {
2671 _closed_mds_session(session
);
2675 // kick any waiting threads
2676 signal_cond_list(waiting_for_mdsmap
);
2680 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2683 void Client::send_reconnect(MetaSession
*session
)
2685 mds_rank_t mds
= session
->mds_num
;
2686 ldout(cct
, 10) << "send_reconnect to mds." << mds
<< dendl
;
2688 // trim unused caps to reduce MDS's cache rejoin time
2689 trim_cache_for_reconnect(session
);
2691 session
->readonly
= false;
2693 if (session
->release
) {
2694 session
->release
->put();
2695 session
->release
= NULL
;
2698 // reset my cap seq number
2700 //connect to the mds' offload targets
2701 connect_mds_targets(mds
);
2702 //make sure unsafe requests get saved
2703 resend_unsafe_requests(session
);
2705 MClientReconnect
*m
= new MClientReconnect
;
2707 // i have an open session.
2708 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2709 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2710 p
!= inode_map
.end();
2712 Inode
*in
= p
->second
;
2713 if (in
->caps
.count(mds
)) {
2714 ldout(cct
, 10) << " caps on " << p
->first
2715 << " " << ccap_string(in
->caps
[mds
]->issued
)
2716 << " wants " << ccap_string(in
->caps_wanted())
2719 in
->make_long_path(path
);
2720 ldout(cct
, 10) << " path " << path
<< dendl
;
2723 _encode_filelocks(in
, flockbl
);
2725 Cap
*cap
= in
->caps
[mds
];
2726 cap
->seq
= 0; // reset seq.
2727 cap
->issue_seq
= 0; // reset seq.
2728 cap
->mseq
= 0; // reset seq.
2729 cap
->issued
= cap
->implemented
;
2731 snapid_t snap_follows
= 0;
2732 if (!in
->cap_snaps
.empty())
2733 snap_follows
= in
->cap_snaps
.begin()->first
;
2735 m
->add_cap(p
->first
.ino
,
2737 path
.get_ino(), path
.get_path(), // ino
2738 in
->caps_wanted(), // wanted
2739 cap
->issued
, // issued
2744 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2745 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2746 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2747 did_snaprealm
.insert(in
->snaprealm
->ino
);
2752 early_kick_flushing_caps(session
);
2754 session
->con
->send_message(m
);
2756 mount_cond
.Signal();
2760 void Client::kick_requests(MetaSession
*session
)
2762 ldout(cct
, 10) << "kick_requests for mds." << session
->mds_num
<< dendl
;
2763 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2764 p
!= mds_requests
.end();
2766 MetaRequest
*req
= p
->second
;
2767 if (req
->got_unsafe
)
2769 if (req
->aborted()) {
2770 if (req
->caller_cond
) {
2772 req
->caller_cond
->Signal();
2776 if (req
->retry_attempt
> 0)
2777 continue; // new requests only
2778 if (req
->mds
== session
->mds_num
) {
2779 send_request(p
->second
, session
);
2784 void Client::resend_unsafe_requests(MetaSession
*session
)
2786 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
2789 send_request(*iter
, session
);
2791 // also re-send old requests when MDS enters reconnect stage. So that MDS can
2792 // process completed requests in clientreplay stage.
2793 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2794 p
!= mds_requests
.end();
2796 MetaRequest
*req
= p
->second
;
2797 if (req
->got_unsafe
)
2801 if (req
->retry_attempt
== 0)
2802 continue; // old requests only
2803 if (req
->mds
== session
->mds_num
)
2804 send_request(req
, session
, true);
2808 void Client::wait_unsafe_requests()
2810 list
<MetaRequest
*> last_unsafe_reqs
;
2811 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2812 p
!= mds_sessions
.end();
2814 MetaSession
*s
= p
->second
;
2815 if (!s
->unsafe_requests
.empty()) {
2816 MetaRequest
*req
= s
->unsafe_requests
.back();
2818 last_unsafe_reqs
.push_back(req
);
2822 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
2823 p
!= last_unsafe_reqs
.end();
2825 MetaRequest
*req
= *p
;
2826 if (req
->unsafe_item
.is_on_list())
2827 wait_on_list(req
->waitfor_safe
);
2832 void Client::kick_requests_closed(MetaSession
*session
)
2834 ldout(cct
, 10) << "kick_requests_closed for mds." << session
->mds_num
<< dendl
;
2835 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2836 p
!= mds_requests
.end(); ) {
2837 MetaRequest
*req
= p
->second
;
2839 if (req
->mds
== session
->mds_num
) {
2840 if (req
->caller_cond
) {
2842 req
->caller_cond
->Signal();
2844 req
->item
.remove_myself();
2845 if (req
->got_unsafe
) {
2846 lderr(cct
) << "kick_requests_closed removing unsafe request " << req
->get_tid() << dendl
;
2847 req
->unsafe_item
.remove_myself();
2848 req
->unsafe_dir_item
.remove_myself();
2849 req
->unsafe_target_item
.remove_myself();
2850 signal_cond_list(req
->waitfor_safe
);
2851 unregister_request(req
);
2855 assert(session
->requests
.empty());
2856 assert(session
->unsafe_requests
.empty());
2866 void Client::got_mds_push(MetaSession
*s
)
2869 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
2870 if (s
->state
== MetaSession::STATE_CLOSING
) {
2871 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2875 void Client::handle_lease(MClientLease
*m
)
2877 ldout(cct
, 10) << "handle_lease " << *m
<< dendl
;
2879 assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
2881 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
2882 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
2888 got_mds_push(session
);
2890 ceph_seq_t seq
= m
->get_seq();
2893 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
2894 if (inode_map
.count(vino
) == 0) {
2895 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
2898 in
= inode_map
[vino
];
2900 if (m
->get_mask() & CEPH_LOCK_DN
) {
2901 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
2902 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
2905 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
2906 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
2911 m
->get_connection()->send_message(
2913 CEPH_MDS_LEASE_RELEASE
, seq
,
2914 m
->get_mask(), m
->get_ino(), m
->get_first(), m
->get_last(), m
->dname
));
2918 void Client::put_inode(Inode
*in
, int n
)
2920 ldout(cct
, 10) << "put_inode on " << *in
<< dendl
;
2921 int left
= in
->_put(n
);
2924 remove_all_caps(in
);
2926 ldout(cct
, 10) << "put_inode deleting " << *in
<< dendl
;
2927 bool unclean
= objectcacher
->release_set(&in
->oset
);
2929 inode_map
.erase(in
->vino());
2930 if (use_faked_inos())
2931 _release_faked_ino(in
);
2936 while (!root_parents
.empty())
2937 root_parents
.erase(root_parents
.begin());
2944 void Client::close_dir(Dir
*dir
)
2946 Inode
*in
= dir
->parent_inode
;
2947 ldout(cct
, 15) << "close_dir dir " << dir
<< " on " << in
<< dendl
;
2948 assert(dir
->is_empty());
2949 assert(in
->dir
== dir
);
2950 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
2951 if (!in
->dn_set
.empty())
2952 in
->get_first_parent()->put(); // unpin dentry
2956 put_inode(in
); // unpin inode
2960 * Don't call this with in==NULL, use get_or_create for that
2961 * leave dn set to default NULL unless you're trying to add
2962 * a new inode to a pre-created Dentry
2964 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
2967 // create a new Dentry
2973 dir
->dentries
[dn
->name
] = dn
;
2974 lru
.lru_insert_mid(dn
); // mid or top?
2976 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2977 << " dn " << dn
<< " (new dn)" << dendl
;
2979 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2980 << " dn " << dn
<< " (old dn)" << dendl
;
2983 if (in
) { // link to inode
2987 dn
->get(); // dir -> dn pin
2989 dn
->get(); // ll_ref -> dn pin
2992 assert(in
->dn_set
.count(dn
) == 0);
2994 // only one parent for directories!
2995 if (in
->is_dir() && !in
->dn_set
.empty()) {
2996 Dentry
*olddn
= in
->get_first_parent();
2997 assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
2998 Inode
*old_diri
= olddn
->dir
->parent_inode
;
2999 old_diri
->dir_release_count
++;
3000 clear_dir_complete_and_ordered(old_diri
, true);
3001 unlink(olddn
, true, true); // keep dir, dentry
3004 in
->dn_set
.insert(dn
);
3006 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3012 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3016 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3017 << " inode " << dn
->inode
<< dendl
;
3019 // unlink from inode
3023 dn
->put(); // dir -> dn pin
3025 dn
->put(); // ll_ref -> dn pin
3028 assert(in
->dn_set
.count(dn
));
3029 in
->dn_set
.erase(dn
);
3030 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3036 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3039 dn
->dir
->dentries
.erase(dn
->name
);
3040 if (dn
->dir
->is_empty() && !keepdir
)
3051 * For asynchronous flushes, check for errors from the IO and
3052 * update the inode if necessary
3054 class C_Client_FlushComplete
: public Context
{
3059 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3060 void finish(int r
) override
{
3061 assert(client
->client_lock
.is_locked_by_me());
3063 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3064 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3065 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3066 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3067 inode
->set_async_err(r
);
3077 void Client::get_cap_ref(Inode
*in
, int cap
)
3079 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3080 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3081 ldout(cct
, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in
<< dendl
;
3084 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3085 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3086 ldout(cct
, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in
<< dendl
;
3089 in
->get_cap_ref(cap
);
3092 void Client::put_cap_ref(Inode
*in
, int cap
)
3094 int last
= in
->put_cap_ref(cap
);
3097 int drop
= last
& ~in
->caps_issued();
3098 if (in
->snapid
== CEPH_NOSNAP
) {
3099 if ((last
& CEPH_CAP_FILE_WR
) &&
3100 !in
->cap_snaps
.empty() &&
3101 in
->cap_snaps
.rbegin()->second
.writing
) {
3102 ldout(cct
, 10) << "put_cap_ref finishing pending cap_snap on " << *in
<< dendl
;
3103 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3104 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3105 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3107 if (last
& CEPH_CAP_FILE_BUFFER
) {
3108 for (auto &p
: in
->cap_snaps
)
3109 p
.second
.dirty_data
= 0;
3110 signal_cond_list(in
->waitfor_commit
);
3111 ldout(cct
, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3115 if (last
& CEPH_CAP_FILE_CACHE
) {
3116 ldout(cct
, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in
<< dendl
;
3122 put_inode(in
, put_nref
);
3126 int Client::get_caps(Inode
*in
, int need
, int want
, int *phave
, loff_t endoff
)
3128 int r
= check_pool_perm(in
, need
);
3133 int file_wanted
= in
->caps_file_wanted();
3134 if ((file_wanted
& need
) != need
) {
3135 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3136 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3142 int have
= in
->caps_issued(&implemented
);
3144 bool waitfor_caps
= false;
3145 bool waitfor_commit
= false;
3147 if (have
& need
& CEPH_CAP_FILE_WR
) {
3149 (endoff
>= (loff_t
)in
->max_size
||
3150 endoff
> (loff_t
)(in
->size
<< 1)) &&
3151 endoff
> (loff_t
)in
->wanted_max_size
) {
3152 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3153 in
->wanted_max_size
= endoff
;
3157 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3158 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3159 waitfor_caps
= true;
3161 if (!in
->cap_snaps
.empty()) {
3162 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3163 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3164 waitfor_caps
= true;
3166 for (auto &p
: in
->cap_snaps
) {
3167 if (p
.second
.dirty_data
) {
3168 waitfor_commit
= true;
3172 if (waitfor_commit
) {
3173 _flush(in
, new C_Client_FlushComplete(this, in
));
3174 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3179 if (!waitfor_caps
&& !waitfor_commit
) {
3180 if ((have
& need
) == need
) {
3181 int revoking
= implemented
& ~have
;
3182 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3183 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3184 << " revoking " << ccap_string(revoking
)
3186 if ((revoking
& want
) == 0) {
3187 *phave
= need
| (have
& want
);
3188 in
->get_cap_ref(need
);
3192 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3193 waitfor_caps
= true;
3196 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3197 in
->auth_cap
->session
->readonly
)
3200 if (in
->flags
& I_CAP_DROPPED
) {
3201 int mds_wanted
= in
->caps_mds_wanted();
3202 if ((mds_wanted
& need
) != need
) {
3203 int ret
= _renew_caps(in
);
3208 if ((mds_wanted
& file_wanted
) ==
3209 (file_wanted
& (CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_WR
))) {
3210 in
->flags
&= ~I_CAP_DROPPED
;
3215 wait_on_list(in
->waitfor_caps
);
3216 else if (waitfor_commit
)
3217 wait_on_list(in
->waitfor_commit
);
3221 int Client::get_caps_used(Inode
*in
)
3223 unsigned used
= in
->caps_used();
3224 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3225 !objectcacher
->set_is_empty(&in
->oset
))
3226 used
|= CEPH_CAP_FILE_CACHE
;
3230 void Client::cap_delay_requeue(Inode
*in
)
3232 ldout(cct
, 10) << "cap_delay_requeue on " << *in
<< dendl
;
3233 in
->hold_caps_until
= ceph_clock_now();
3234 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3235 delayed_caps
.push_back(&in
->cap_item
);
3238 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3239 bool sync
, int used
, int want
, int retain
,
3240 int flush
, ceph_tid_t flush_tid
)
3242 int held
= cap
->issued
| cap
->implemented
;
3243 int revoking
= cap
->implemented
& ~cap
->issued
;
3244 retain
&= ~revoking
;
3245 int dropping
= cap
->issued
& ~retain
;
3246 int op
= CEPH_CAP_OP_UPDATE
;
3248 ldout(cct
, 10) << "send_cap " << *in
3249 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3250 << (sync
? " sync " : " async ")
3251 << " used " << ccap_string(used
)
3252 << " want " << ccap_string(want
)
3253 << " flush " << ccap_string(flush
)
3254 << " retain " << ccap_string(retain
)
3255 << " held "<< ccap_string(held
)
3256 << " revoking " << ccap_string(revoking
)
3257 << " dropping " << ccap_string(dropping
)
3260 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3261 const int would_have_issued
= cap
->issued
& retain
;
3262 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3264 // - tell the server we think issued is whatever they issued plus whatever we implemented
3265 // - leave what we have implemented in place
3266 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3267 cap
->issued
= cap
->issued
| cap
->implemented
;
3269 // Make an exception for revoking xattr caps: we are injecting
3270 // failure to release other caps, but allow xattr because client
3271 // will block on xattr ops if it can't release these to MDS (#9800)
3272 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3273 cap
->issued
^= xattr_mask
& revoking
;
3274 cap
->implemented
^= xattr_mask
& revoking
;
3276 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3277 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3280 cap
->issued
&= retain
;
3281 cap
->implemented
&= cap
->issued
| used
;
3284 snapid_t follows
= 0;
3287 follows
= in
->snaprealm
->get_snap_context().seq
;
3289 MClientCaps
*m
= new MClientCaps(op
,
3292 cap
->cap_id
, cap
->seq
,
3298 m
->caller_uid
= in
->cap_dirtier_uid
;
3299 m
->caller_gid
= in
->cap_dirtier_gid
;
3301 m
->head
.issue_seq
= cap
->issue_seq
;
3302 m
->set_tid(flush_tid
);
3304 m
->head
.uid
= in
->uid
;
3305 m
->head
.gid
= in
->gid
;
3306 m
->head
.mode
= in
->mode
;
3308 m
->head
.nlink
= in
->nlink
;
3310 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3311 ::encode(in
->xattrs
, m
->xattrbl
);
3312 m
->head
.xattr_version
= in
->xattr_version
;
3316 m
->max_size
= in
->max_size
;
3317 m
->truncate_seq
= in
->truncate_seq
;
3318 m
->truncate_size
= in
->truncate_size
;
3319 m
->mtime
= in
->mtime
;
3320 m
->atime
= in
->atime
;
3321 m
->ctime
= in
->ctime
;
3322 m
->btime
= in
->btime
;
3323 m
->time_warp_seq
= in
->time_warp_seq
;
3324 m
->change_attr
= in
->change_attr
;
3326 m
->flags
|= CLIENT_CAPS_SYNC
;
3328 if (flush
& CEPH_CAP_FILE_WR
) {
3329 m
->inline_version
= in
->inline_version
;
3330 m
->inline_data
= in
->inline_data
;
3333 in
->reported_size
= in
->size
;
3334 m
->set_snap_follows(follows
);
3336 if (cap
== in
->auth_cap
) {
3337 m
->set_max_size(in
->wanted_max_size
);
3338 in
->requested_max_size
= in
->wanted_max_size
;
3339 ldout(cct
, 15) << "auth cap, setting max_size = " << in
->requested_max_size
<< dendl
;
3342 if (!session
->flushing_caps_tids
.empty())
3343 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3345 session
->con
->send_message(m
);
3348 static bool is_max_size_approaching(Inode
*in
)
3350 /* mds will adjust max size according to the reported size */
3351 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3353 if (in
->size
>= in
->max_size
)
3355 /* half of previous max_size increment has been used */
3356 if (in
->max_size
> in
->reported_size
&&
3357 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3365 * Examine currently used and wanted versus held caps. Release, flush or ack
3366 * revoked caps to the MDS as appropriate.
3368 * @param in the inode to check
3369 * @param flags flags to apply to cap check
3371 void Client::check_caps(Inode
*in
, unsigned flags
)
3373 unsigned wanted
= in
->caps_wanted();
3374 unsigned used
= get_caps_used(in
);
3377 if (in
->is_dir() && (in
->flags
& I_COMPLETE
)) {
3378 // we do this here because we don't want to drop to Fs (and then
3379 // drop the Fs if we do a create!) if that alone makes us send lookups
3380 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3381 wanted
|= CEPH_CAP_FILE_EXCL
;
3385 int issued
= in
->caps_issued(&implemented
);
3386 int revoking
= implemented
& ~issued
;
3388 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3391 retain
|= CEPH_CAP_ANY
;
3393 retain
|= CEPH_CAP_ANY_SHARED
;
3396 ldout(cct
, 10) << "check_caps on " << *in
3397 << " wanted " << ccap_string(wanted
)
3398 << " used " << ccap_string(used
)
3399 << " issued " << ccap_string(issued
)
3400 << " revoking " << ccap_string(revoking
)
3401 << " flags=" << flags
3404 if (in
->snapid
!= CEPH_NOSNAP
)
3405 return; //snap caps last forever, can't write
3407 if (in
->caps
.empty())
3408 return; // guard if at end of func
3410 if ((revoking
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) &&
3411 (used
& CEPH_CAP_FILE_CACHE
) && !(used
& CEPH_CAP_FILE_BUFFER
)) {
3413 used
&= ~CEPH_CAP_FILE_CACHE
;
3416 if (!in
->cap_snaps
.empty())
3419 if (flags
& CHECK_CAPS_NODELAY
)
3420 in
->hold_caps_until
= utime_t();
3422 cap_delay_requeue(in
);
3424 utime_t now
= ceph_clock_now();
3426 map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin();
3427 while (it
!= in
->caps
.end()) {
3428 mds_rank_t mds
= it
->first
;
3429 Cap
*cap
= it
->second
;
3432 MetaSession
*session
= mds_sessions
[mds
];
3436 if (in
->auth_cap
&& cap
!= in
->auth_cap
)
3437 cap_used
&= ~in
->auth_cap
->issued
;
3439 revoking
= cap
->implemented
& ~cap
->issued
;
3441 ldout(cct
, 10) << " cap mds." << mds
3442 << " issued " << ccap_string(cap
->issued
)
3443 << " implemented " << ccap_string(cap
->implemented
)
3444 << " revoking " << ccap_string(revoking
) << dendl
;
3446 if (in
->wanted_max_size
> in
->max_size
&&
3447 in
->wanted_max_size
> in
->requested_max_size
&&
3448 cap
== in
->auth_cap
)
3451 /* approaching file_max? */
3452 if ((cap
->issued
& CEPH_CAP_FILE_WR
) &&
3453 cap
== in
->auth_cap
&&
3454 is_max_size_approaching(in
)) {
3455 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3456 << ", reported " << in
->reported_size
<< dendl
;
3460 /* completed revocation? */
3461 if (revoking
&& (revoking
& cap_used
) == 0) {
3462 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
->implemented
& ~cap
->issued
) << dendl
;
3466 /* want more caps from mds? */
3467 if (wanted
& ~(cap
->wanted
| cap
->issued
))
3470 if (!revoking
&& unmounting
&& (cap_used
== 0))
3473 if (wanted
== cap
->wanted
&& // mds knows what we want.
3474 ((cap
->issued
& ~retain
) == 0) &&// and we don't have anything we wouldn't like
3475 !in
->dirty_caps
) // and we have no dirty caps
3478 if (now
< in
->hold_caps_until
) {
3479 ldout(cct
, 10) << "delaying cap release" << dendl
;
3484 // re-send old cap/snapcap flushes first.
3485 if (session
->mds_state
>= MDSMap::STATE_RECONNECT
&&
3486 session
->mds_state
< MDSMap::STATE_ACTIVE
&&
3487 session
->early_flushing_caps
.count(in
) == 0) {
3488 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3489 << " to mds." << session
->mds_num
<< dendl
;
3490 session
->early_flushing_caps
.insert(in
);
3491 if (in
->cap_snaps
.size())
3492 flush_snaps(in
, true);
3493 if (in
->flushing_caps
)
3494 flush_caps(in
, session
, flags
& CHECK_CAPS_SYNCHRONOUS
);
3498 ceph_tid_t flush_tid
;
3499 if (in
->auth_cap
== cap
&& in
->dirty_caps
) {
3500 flushing
= mark_caps_flushing(in
, &flush_tid
);
3506 send_cap(in
, session
, cap
, flags
& CHECK_CAPS_SYNCHRONOUS
, cap_used
, wanted
,
3507 retain
, flushing
, flush_tid
);
3512 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3514 int used
= get_caps_used(in
);
3515 int dirty
= in
->caps_dirty();
3516 ldout(cct
, 10) << "queue_cap_snap " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3518 if (in
->cap_snaps
.size() &&
3519 in
->cap_snaps
.rbegin()->second
.writing
) {
3520 ldout(cct
, 10) << "queue_cap_snap already have pending cap_snap on " << *in
<< dendl
;
3522 } else if (in
->caps_dirty() ||
3523 (used
& CEPH_CAP_FILE_WR
) ||
3524 (dirty
& CEPH_CAP_ANY_WR
)) {
3525 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3526 assert(capsnapem
.second
== true); /* element inserted */
3527 CapSnap
&capsnap
= capsnapem
.first
->second
;
3528 capsnap
.context
= old_snapc
;
3529 capsnap
.issued
= in
->caps_issued();
3530 capsnap
.dirty
= in
->caps_dirty();
3532 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3534 capsnap
.uid
= in
->uid
;
3535 capsnap
.gid
= in
->gid
;
3536 capsnap
.mode
= in
->mode
;
3537 capsnap
.btime
= in
->btime
;
3538 capsnap
.xattrs
= in
->xattrs
;
3539 capsnap
.xattr_version
= in
->xattr_version
;
3541 if (used
& CEPH_CAP_FILE_WR
) {
3542 ldout(cct
, 10) << "queue_cap_snap WR used on " << *in
<< dendl
;
3543 capsnap
.writing
= 1;
3545 finish_cap_snap(in
, capsnap
, used
);
3548 ldout(cct
, 10) << "queue_cap_snap not dirty|writing on " << *in
<< dendl
;
3552 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3554 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3555 capsnap
.size
= in
->size
;
3556 capsnap
.mtime
= in
->mtime
;
3557 capsnap
.atime
= in
->atime
;
3558 capsnap
.ctime
= in
->ctime
;
3559 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3560 capsnap
.change_attr
= in
->change_attr
;
3562 capsnap
.dirty
|= in
->caps_dirty();
3564 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3565 capsnap
.inline_data
= in
->inline_data
;
3566 capsnap
.inline_version
= in
->inline_version
;
3569 if (used
& CEPH_CAP_FILE_BUFFER
) {
3570 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3571 << " WRBUFFER, delaying" << dendl
;
3573 capsnap
.dirty_data
= 0;
3578 void Client::_flushed_cap_snap(Inode
*in
, snapid_t seq
)
3580 ldout(cct
, 10) << "_flushed_cap_snap seq " << seq
<< " on " << *in
<< dendl
;
3581 in
->cap_snaps
.at(seq
).dirty_data
= 0;
3585 void Client::flush_snaps(Inode
*in
, bool all_again
)
3587 ldout(cct
, 10) << "flush_snaps on " << *in
<< " all_again " << all_again
<< dendl
;
3588 assert(in
->cap_snaps
.size());
3591 assert(in
->auth_cap
);
3592 MetaSession
*session
= in
->auth_cap
->session
;
3593 int mseq
= in
->auth_cap
->mseq
;
3595 for (auto &p
: in
->cap_snaps
) {
3596 CapSnap
&capsnap
= p
.second
;
3598 // only flush once per session
3599 if (capsnap
.flush_tid
> 0)
3603 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3604 << " follows " << p
.first
3605 << " size " << capsnap
.size
3606 << " mtime " << capsnap
.mtime
3607 << " dirty_data=" << capsnap
.dirty_data
3608 << " writing=" << capsnap
.writing
3609 << " on " << *in
<< dendl
;
3610 if (capsnap
.dirty_data
|| capsnap
.writing
)
3613 if (capsnap
.flush_tid
== 0) {
3614 capsnap
.flush_tid
= ++last_flush_tid
;
3615 if (!in
->flushing_cap_item
.is_on_list())
3616 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3617 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3620 MClientCaps
*m
= new MClientCaps(CEPH_CAP_OP_FLUSHSNAP
, in
->ino
, in
->snaprealm
->ino
, 0, mseq
,
3623 m
->caller_uid
= user_id
;
3625 m
->caller_gid
= group_id
;
3627 m
->set_client_tid(capsnap
.flush_tid
);
3628 m
->head
.snap_follows
= p
.first
;
3630 m
->head
.caps
= capsnap
.issued
;
3631 m
->head
.dirty
= capsnap
.dirty
;
3633 m
->head
.uid
= capsnap
.uid
;
3634 m
->head
.gid
= capsnap
.gid
;
3635 m
->head
.mode
= capsnap
.mode
;
3636 m
->btime
= capsnap
.btime
;
3638 m
->size
= capsnap
.size
;
3640 m
->head
.xattr_version
= capsnap
.xattr_version
;
3641 ::encode(capsnap
.xattrs
, m
->xattrbl
);
3643 m
->ctime
= capsnap
.ctime
;
3644 m
->btime
= capsnap
.btime
;
3645 m
->mtime
= capsnap
.mtime
;
3646 m
->atime
= capsnap
.atime
;
3647 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3648 m
->change_attr
= capsnap
.change_attr
;
3650 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3651 m
->inline_version
= in
->inline_version
;
3652 m
->inline_data
= in
->inline_data
;
3655 assert(!session
->flushing_caps_tids
.empty());
3656 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3658 session
->con
->send_message(m
);
3664 void Client::wait_on_list(list
<Cond
*>& ls
)
3667 ls
.push_back(&cond
);
3668 cond
.Wait(client_lock
);
3672 void Client::signal_cond_list(list
<Cond
*>& ls
)
3674 for (list
<Cond
*>::iterator it
= ls
.begin(); it
!= ls
.end(); ++it
)
3678 void Client::wait_on_context_list(list
<Context
*>& ls
)
3683 ls
.push_back(new C_Cond(&cond
, &done
, &r
));
3685 cond
.Wait(client_lock
);
3688 void Client::signal_context_list(list
<Context
*>& ls
)
3690 while (!ls
.empty()) {
3691 ls
.front()->complete(0);
3696 void Client::wake_inode_waiters(MetaSession
*s
)
3698 xlist
<Cap
*>::iterator iter
= s
->caps
.begin();
3699 while (!iter
.end()){
3700 signal_cond_list((*iter
)->inode
->waitfor_caps
);
3706 // flush dirty data (from objectcache)
3708 class C_Client_CacheInvalidate
: public Context
{
3712 int64_t offset
, length
;
3714 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
3715 client(c
), offset(off
), length(len
) {
3716 if (client
->use_faked_inos())
3717 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
3721 void finish(int r
) override
{
3722 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
3723 assert(!client
->client_lock
.is_locked_by_me());
3724 client
->_async_invalidate(ino
, offset
, length
);
3728 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
3732 ldout(cct
, 10) << "_async_invalidate " << ino
<< " " << off
<< "~" << len
<< dendl
;
3733 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
3736 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
3738 if (ino_invalidate_cb
)
3739 // we queue the invalidate, which calls the callback and decrements the ref
3740 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
3743 void Client::_invalidate_inode_cache(Inode
*in
)
3745 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< dendl
;
3747 // invalidate our userspace inode cache
3748 if (cct
->_conf
->client_oc
) {
3749 objectcacher
->release_set(&in
->oset
);
3750 if (!objectcacher
->set_is_empty(&in
->oset
))
3751 lderr(cct
) << "failed to invalidate cache for " << *in
<< dendl
;
3754 _schedule_invalidate_callback(in
, 0, 0);
3757 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
3759 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< " " << off
<< "~" << len
<< dendl
;
3761 // invalidate our userspace inode cache
3762 if (cct
->_conf
->client_oc
) {
3763 vector
<ObjectExtent
> ls
;
3764 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
3765 objectcacher
->discard_set(&in
->oset
, ls
);
3768 _schedule_invalidate_callback(in
, off
, len
);
3771 bool Client::_release(Inode
*in
)
3773 ldout(cct
, 20) << "_release " << *in
<< dendl
;
3774 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3775 _invalidate_inode_cache(in
);
3781 bool Client::_flush(Inode
*in
, Context
*onfinish
)
3783 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
3785 if (!in
->oset
.dirty_or_tx
) {
3786 ldout(cct
, 10) << " nothing to flush" << dendl
;
3787 onfinish
->complete(0);
3791 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
3792 ldout(cct
, 1) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
3793 objectcacher
->purge_set(&in
->oset
);
3795 onfinish
->complete(-ENOSPC
);
3800 return objectcacher
->flush_set(&in
->oset
, onfinish
);
3803 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
3805 assert(client_lock
.is_locked());
3806 if (!in
->oset
.dirty_or_tx
) {
3807 ldout(cct
, 10) << " nothing to flush" << dendl
;
3811 Mutex
flock("Client::_flush_range flock");
3814 Context
*onflush
= new C_SafeCond(&flock
, &cond
, &safe
);
3815 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
3816 offset
, size
, onflush
);
3819 client_lock
.Unlock();
3828 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
3830 // Mutex::Locker l(client_lock);
3831 assert(client_lock
.is_locked()); // will be called via dispatch() -> objecter -> ...
3832 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
3837 void Client::_flushed(Inode
*in
)
3839 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
3841 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
3846 // checks common to add_update_cap, handle_cap_grant
3847 void Client::check_cap_issue(Inode
*in
, Cap
*cap
, unsigned issued
)
3849 unsigned had
= in
->caps_issued();
3851 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
3852 !(had
& CEPH_CAP_FILE_CACHE
))
3855 if ((issued
& CEPH_CAP_FILE_SHARED
) &&
3856 !(had
& CEPH_CAP_FILE_SHARED
)) {
3860 clear_dir_complete_and_ordered(in
, true);
3864 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
3865 unsigned issued
, unsigned seq
, unsigned mseq
, inodeno_t realm
,
3866 int flags
, const UserPerm
& cap_perms
)
3869 mds_rank_t mds
= mds_session
->mds_num
;
3870 if (in
->caps
.count(mds
)) {
3871 cap
= in
->caps
[mds
];
3874 * auth mds of the inode changed. we received the cap export
3875 * message, but still haven't received the cap import message.
3876 * handle_cap_export() updated the new auth MDS' cap.
3878 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
3879 * a message that was send before the cap import message. So
3880 * don't remove caps.
3882 if (ceph_seq_cmp(seq
, cap
->seq
) <= 0) {
3883 assert(cap
== in
->auth_cap
);
3884 assert(cap
->cap_id
== cap_id
);
3887 issued
|= cap
->issued
;
3888 flags
|= CEPH_CAP_FLAG_AUTH
;
3891 mds_session
->num_caps
++;
3892 if (!in
->is_any_caps()) {
3893 assert(in
->snaprealm
== 0);
3894 in
->snaprealm
= get_snap_realm(realm
);
3895 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
3896 ldout(cct
, 15) << "add_update_cap first one, opened snaprealm " << in
->snaprealm
<< dendl
;
3898 in
->caps
[mds
] = cap
= new Cap
;
3900 mds_session
->caps
.push_back(&cap
->cap_item
);
3901 cap
->session
= mds_session
;
3903 cap
->gen
= mds_session
->cap_gen
;
3904 cap_list
.push_back(&in
->cap_item
);
3907 check_cap_issue(in
, cap
, issued
);
3909 if (flags
& CEPH_CAP_FLAG_AUTH
) {
3910 if (in
->auth_cap
!= cap
&&
3911 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
3912 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
3913 ldout(cct
, 10) << "add_update_cap changing auth cap: "
3914 << "add myself to new auth MDS' flushing caps list" << dendl
;
3915 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
3921 unsigned old_caps
= cap
->issued
;
3922 cap
->cap_id
= cap_id
;
3923 cap
->issued
|= issued
;
3924 cap
->implemented
|= issued
;
3926 cap
->issue_seq
= seq
;
3928 cap
->latest_perms
= cap_perms
;
3929 ldout(cct
, 10) << "add_update_cap issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
->issued
)
3930 << " from mds." << mds
3934 if ((issued
& ~old_caps
) && in
->auth_cap
== cap
) {
3935 // non-auth MDS is revoking the newly grant caps ?
3936 for (map
<mds_rank_t
,Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
3937 if (it
->second
== cap
)
3939 if (it
->second
->implemented
& ~it
->second
->issued
& issued
) {
3940 check_caps(in
, CHECK_CAPS_NODELAY
);
3946 if (issued
& ~old_caps
)
3947 signal_cond_list(in
->waitfor_caps
);
3950 void Client::remove_cap(Cap
*cap
, bool queue_release
)
3952 Inode
*in
= cap
->inode
;
3953 MetaSession
*session
= cap
->session
;
3954 mds_rank_t mds
= cap
->session
->mds_num
;
3956 ldout(cct
, 10) << "remove_cap mds." << mds
<< " on " << *in
<< dendl
;
3958 if (queue_release
) {
3959 session
->enqueue_cap_release(
3967 if (in
->auth_cap
== cap
) {
3968 if (in
->flushing_cap_item
.is_on_list()) {
3969 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
3970 in
->flushing_cap_item
.remove_myself();
3972 in
->auth_cap
= NULL
;
3974 assert(in
->caps
.count(mds
));
3975 in
->caps
.erase(mds
);
3977 cap
->cap_item
.remove_myself();
3981 if (!in
->is_any_caps()) {
3982 ldout(cct
, 15) << "remove_cap last one, closing snaprealm " << in
->snaprealm
<< dendl
;
3983 in
->snaprealm_item
.remove_myself();
3984 put_snap_realm(in
->snaprealm
);
3989 void Client::remove_all_caps(Inode
*in
)
3991 while (!in
->caps
.empty())
3992 remove_cap(in
->caps
.begin()->second
, true);
3995 void Client::remove_session_caps(MetaSession
*s
)
3997 ldout(cct
, 10) << "remove_session_caps mds." << s
->mds_num
<< dendl
;
3999 while (s
->caps
.size()) {
4000 Cap
*cap
= *s
->caps
.begin();
4001 Inode
*in
= cap
->inode
;
4002 bool dirty_caps
= false, cap_snaps
= false;
4003 if (in
->auth_cap
== cap
) {
4004 cap_snaps
= !in
->cap_snaps
.empty();
4005 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4006 in
->wanted_max_size
= 0;
4007 in
->requested_max_size
= 0;
4008 in
->flags
|= I_CAP_DROPPED
;
4010 remove_cap(cap
, false);
4011 signal_cond_list(in
->waitfor_caps
);
4013 InodeRef
tmp_ref(in
);
4014 in
->cap_snaps
.clear();
4017 lderr(cct
) << "remove_session_caps still has dirty|flushing caps on " << *in
<< dendl
;
4018 if (in
->flushing_caps
) {
4019 num_flushing_caps
--;
4020 in
->flushing_cap_tids
.clear();
4022 in
->flushing_caps
= 0;
4027 s
->flushing_caps_tids
.clear();
4031 int Client::_do_remount(void)
4034 int r
= remount_cb(callback_handle
);
4037 client_t whoami
= get_nodeid();
4040 "failed to remount (to trim kernel dentries): "
4041 "errno = " << e
<< " (" << strerror(e
) << ")" << dendl
;
4044 "failed to remount (to trim kernel dentries): "
4045 "return code = " << r
<< dendl
;
4047 bool should_abort
= cct
->_conf
->get_val
<bool>("client_die_on_failed_remount") ||
4048 cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate");
4049 if (should_abort
&& !unmounting
) {
4050 lderr(cct
) << "failed to remount for kernel dentry trimming; quitting!" << dendl
;
4057 class C_Client_Remount
: public Context
{
4061 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4062 void finish(int r
) override
{
4064 client
->_do_remount();
4068 void Client::_invalidate_kernel_dcache()
4072 if (can_invalidate_dentries
) {
4073 if (dentry_invalidate_cb
&& root
->dir
) {
4074 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4075 p
!= root
->dir
->dentries
.end();
4077 if (p
->second
->inode
)
4078 _schedule_invalidate_dentry_callback(p
->second
, false);
4081 } else if (remount_cb
) {
4083 // when remounting a file system, linux kernel trims all unused dentries in the fs
4084 remount_finisher
.queue(new C_Client_Remount(this));
4088 void Client::trim_caps(MetaSession
*s
, int max
)
4090 mds_rank_t mds
= s
->mds_num
;
4091 int caps_size
= s
->caps
.size();
4092 ldout(cct
, 10) << "trim_caps mds." << mds
<< " max " << max
4093 << " caps " << caps_size
<< dendl
;
4096 xlist
<Cap
*>::iterator p
= s
->caps
.begin();
4097 std::set
<InodeRef
> anchor
; /* prevent put_inode from deleting all caps during traversal */
4098 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4100 InodeRef
in(cap
->inode
);
4102 // Increment p early because it will be invalidated if cap
4103 // is deleted inside remove_cap
4106 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4107 int mine
= cap
->issued
| cap
->implemented
;
4108 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4109 // disposable non-auth cap
4110 if (!(get_caps_used(in
.get()) & ~oissued
& mine
)) {
4111 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4112 remove_cap(cap
, true);
4113 /* N.B. no need to push onto anchor, as we are only removing one cap */
4117 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4119 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4120 while (q
!= in
->dn_set
.end()) {
4122 if (dn
->lru_is_expireable()) {
4123 if (can_invalidate_dentries
&&
4124 dn
->dir
->parent_inode
->ino
== MDS_INO_ROOT
) {
4125 // Only issue one of these per DN for inodes in root: handle
4126 // others more efficiently by calling for root-child DNs at
4127 // the end of this function.
4128 _schedule_invalidate_dentry_callback(dn
, true);
4130 ldout(cct
, 20) << " anchoring inode: " << in
->ino
<< dendl
;
4134 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4138 if (all
&& in
->ino
!= MDS_INO_ROOT
) {
4139 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4144 ldout(cct
, 20) << " clearing anchored inodes" << dendl
;
4147 caps_size
= s
->caps
.size();
4148 if (caps_size
> max
)
4149 _invalidate_kernel_dcache();
4152 void Client::force_session_readonly(MetaSession
*s
)
4155 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4156 Inode
*in
= (*p
)->inode
;
4157 if (in
->caps_wanted() & CEPH_CAP_FILE_WR
)
4158 signal_cond_list(in
->waitfor_caps
);
4162 void Client::mark_caps_dirty(Inode
*in
, int caps
)
4164 ldout(cct
, 10) << "mark_caps_dirty " << *in
<< " " << ccap_string(in
->dirty_caps
) << " -> "
4165 << ccap_string(in
->dirty_caps
| caps
) << dendl
;
4166 if (caps
&& !in
->caps_dirty())
4168 in
->dirty_caps
|= caps
;
4171 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4173 MetaSession
*session
= in
->auth_cap
->session
;
4175 int flushing
= in
->dirty_caps
;
4178 ceph_tid_t flush_tid
= ++last_flush_tid
;
4179 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4181 if (!in
->flushing_caps
) {
4182 ldout(cct
, 10) << "mark_caps_flushing " << ccap_string(flushing
) << " " << *in
<< dendl
;
4183 num_flushing_caps
++;
4185 ldout(cct
, 10) << "mark_caps_flushing (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4188 in
->flushing_caps
|= flushing
;
4191 if (!in
->flushing_cap_item
.is_on_list())
4192 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4193 session
->flushing_caps_tids
.insert(flush_tid
);
4199 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4201 for (auto &p
: in
->cap_snaps
) {
4202 CapSnap
&capsnap
= p
.second
;
4203 if (capsnap
.flush_tid
> 0) {
4204 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4205 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4208 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4209 it
!= in
->flushing_cap_tids
.end();
4211 old_s
->flushing_caps_tids
.erase(it
->first
);
4212 new_s
->flushing_caps_tids
.insert(it
->first
);
4214 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4218 * Flush all caps back to the MDS. Because the callers generally wait on the
4219 * result of this function (syncfs and umount cases), we set
4220 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4222 void Client::flush_caps_sync()
4224 ldout(cct
, 10) << __func__
<< dendl
;
4225 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
4227 unsigned flags
= CHECK_CAPS_NODELAY
;
4231 delayed_caps
.pop_front();
4232 if (p
.end() && cap_list
.empty())
4233 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4234 check_caps(in
, flags
);
4238 p
= cap_list
.begin();
4240 unsigned flags
= CHECK_CAPS_NODELAY
;
4245 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4246 check_caps(in
, flags
);
4250 void Client::flush_caps(Inode
*in
, MetaSession
*session
, bool sync
)
4252 ldout(cct
, 10) << "flush_caps " << in
<< " mds." << session
->mds_num
<< dendl
;
4253 Cap
*cap
= in
->auth_cap
;
4254 assert(cap
->session
== session
);
4256 for (map
<ceph_tid_t
,int>::iterator p
= in
->flushing_cap_tids
.begin();
4257 p
!= in
->flushing_cap_tids
.end();
4259 bool req_sync
= false;
4261 /* If this is a synchronous request, then flush the journal on last one */
4262 if (sync
&& (p
->first
== in
->flushing_cap_tids
.rbegin()->first
))
4265 send_cap(in
, session
, cap
, req_sync
,
4266 (get_caps_used(in
) | in
->caps_dirty()),
4267 in
->caps_wanted(), (cap
->issued
| cap
->implemented
),
4268 p
->second
, p
->first
);
4272 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4274 while (in
->flushing_caps
) {
4275 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4276 assert(it
!= in
->flushing_cap_tids
.end());
4277 if (it
->first
> want
)
4279 ldout(cct
, 10) << "wait_sync_caps on " << *in
<< " flushing "
4280 << ccap_string(it
->second
) << " want " << want
4281 << " last " << it
->first
<< dendl
;
4282 wait_on_list(in
->waitfor_caps
);
4286 void Client::wait_sync_caps(ceph_tid_t want
)
4289 ldout(cct
, 10) << "wait_sync_caps want " << want
<< " (last is " << last_flush_tid
<< ", "
4290 << num_flushing_caps
<< " total flushing)" << dendl
;
4291 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
4292 p
!= mds_sessions
.end();
4294 MetaSession
*s
= p
->second
;
4295 if (s
->flushing_caps_tids
.empty())
4297 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4298 if (oldest_tid
<= want
) {
4299 ldout(cct
, 10) << " waiting on mds." << p
->first
<< " tid " << oldest_tid
4300 << " (want " << want
<< ")" << dendl
;
4301 sync_cond
.Wait(client_lock
);
4307 void Client::kick_flushing_caps(MetaSession
*session
)
4309 mds_rank_t mds
= session
->mds_num
;
4310 ldout(cct
, 10) << "kick_flushing_caps mds." << mds
<< dendl
;
4312 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4314 if (session
->early_flushing_caps
.count(in
))
4316 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4317 if (in
->cap_snaps
.size())
4318 flush_snaps(in
, true);
4319 if (in
->flushing_caps
)
4320 flush_caps(in
, session
);
4323 session
->early_flushing_caps
.clear();
4326 void Client::early_kick_flushing_caps(MetaSession
*session
)
4328 session
->early_flushing_caps
.clear();
4330 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4332 assert(in
->auth_cap
);
4334 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4335 // stage. This guarantees that MDS processes the cap flush message before issuing
4336 // the flushing caps to other client.
4337 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
)
4340 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4341 << " to mds." << session
->mds_num
<< dendl
;
4343 session
->early_flushing_caps
.insert(in
);
4345 if (in
->cap_snaps
.size())
4346 flush_snaps(in
, true);
4347 if (in
->flushing_caps
)
4348 flush_caps(in
, session
);
4353 void Client::kick_maxsize_requests(MetaSession
*session
)
4355 xlist
<Cap
*>::iterator iter
= session
->caps
.begin();
4356 while (!iter
.end()){
4357 (*iter
)->inode
->requested_max_size
= 0;
4358 (*iter
)->inode
->wanted_max_size
= 0;
4359 signal_cond_list((*iter
)->inode
->waitfor_caps
);
4364 void SnapRealm::build_snap_context()
4366 set
<snapid_t
> snaps
;
4367 snapid_t max_seq
= seq
;
4369 // start with prior_parents?
4370 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4371 snaps
.insert(prior_parent_snaps
[i
]);
4373 // current parent's snaps
4375 const SnapContext
& psnapc
= pparent
->get_snap_context();
4376 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4377 if (psnapc
.snaps
[i
] >= parent_since
)
4378 snaps
.insert(psnapc
.snaps
[i
]);
4379 if (psnapc
.seq
> max_seq
)
4380 max_seq
= psnapc
.seq
;
4384 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4385 snaps
.insert(my_snaps
[i
]);
4388 cached_snap_context
.seq
= max_seq
;
4389 cached_snap_context
.snaps
.resize(0);
4390 cached_snap_context
.snaps
.reserve(snaps
.size());
4391 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4392 cached_snap_context
.snaps
.push_back(*p
);
4395 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4400 while (!q
.empty()) {
4404 ldout(cct
, 10) << "invalidate_snaprealm_and_children " << *realm
<< dendl
;
4405 realm
->invalidate_cache();
4407 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4408 p
!= realm
->pchildren
.end();
4414 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4416 SnapRealm
*realm
= snap_realms
[r
];
4418 snap_realms
[r
] = realm
= new SnapRealm(r
);
4419 ldout(cct
, 20) << "get_snap_realm " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4424 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4426 if (snap_realms
.count(r
) == 0) {
4427 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " fail" << dendl
;
4430 SnapRealm
*realm
= snap_realms
[r
];
4431 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4436 void Client::put_snap_realm(SnapRealm
*realm
)
4438 ldout(cct
, 20) << "put_snap_realm " << realm
->ino
<< " " << realm
4439 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4440 if (--realm
->nref
== 0) {
4441 snap_realms
.erase(realm
->ino
);
4442 if (realm
->pparent
) {
4443 realm
->pparent
->pchildren
.erase(realm
);
4444 put_snap_realm(realm
->pparent
);
4450 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4452 if (realm
->parent
!= parent
) {
4453 ldout(cct
, 10) << "adjust_realm_parent " << *realm
4454 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4455 realm
->parent
= parent
;
4456 if (realm
->pparent
) {
4457 realm
->pparent
->pchildren
.erase(realm
);
4458 put_snap_realm(realm
->pparent
);
4460 realm
->pparent
= get_snap_realm(parent
);
4461 realm
->pparent
->pchildren
.insert(realm
);
4467 static bool has_new_snaps(const SnapContext
& old_snapc
,
4468 const SnapContext
& new_snapc
)
4470 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4474 void Client::update_snap_trace(bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4476 SnapRealm
*first_realm
= NULL
;
4477 ldout(cct
, 10) << "update_snap_trace len " << bl
.length() << dendl
;
4479 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4481 bufferlist::iterator p
= bl
.begin();
4485 SnapRealm
*realm
= get_snap_realm(info
.ino());
4487 bool invalidate
= false;
4489 if (info
.seq() > realm
->seq
) {
4490 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4494 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4495 // flush me + children
4498 while (!q
.empty()) {
4499 SnapRealm
*realm
= q
.front();
4502 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4503 p
!= realm
->pchildren
.end();
4507 if (dirty_realms
.count(realm
) == 0) {
4509 dirty_realms
[realm
] = realm
->get_snap_context();
4515 realm
->seq
= info
.seq();
4516 realm
->created
= info
.created();
4517 realm
->parent_since
= info
.parent_since();
4518 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4519 realm
->my_snaps
= info
.my_snaps
;
4523 // _always_ verify parent
4524 if (adjust_realm_parent(realm
, info
.parent()))
4528 invalidate_snaprealm_and_children(realm
);
4529 ldout(cct
, 15) << "update_snap_trace " << *realm
<< " self|parent updated" << dendl
;
4530 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4532 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq()
4533 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4537 first_realm
= realm
;
4539 put_snap_realm(realm
);
4542 for (map
<SnapRealm
*, SnapContext
>::iterator q
= dirty_realms
.begin();
4543 q
!= dirty_realms
.end();
4545 SnapRealm
*realm
= q
->first
;
4546 // if there are new snaps ?
4547 if (has_new_snaps(q
->second
, realm
->get_snap_context())) {
4548 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4549 xlist
<Inode
*>::iterator r
= realm
->inodes_with_caps
.begin();
4553 queue_cap_snap(in
, q
->second
);
4556 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4558 put_snap_realm(realm
);
4562 *realm_ret
= first_realm
;
4564 put_snap_realm(first_realm
);
4567 void Client::handle_snap(MClientSnap
*m
)
4569 ldout(cct
, 10) << "handle_snap " << *m
<< dendl
;
4570 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4571 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4577 got_mds_push(session
);
4579 map
<Inode
*, SnapContext
> to_move
;
4580 SnapRealm
*realm
= 0;
4582 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
4583 assert(m
->head
.split
);
4585 bufferlist::iterator p
= m
->bl
.begin();
4587 assert(info
.ino() == m
->head
.split
);
4589 // flush, then move, ino's.
4590 realm
= get_snap_realm(info
.ino());
4591 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
4592 for (vector
<inodeno_t
>::iterator p
= m
->split_inos
.begin();
4593 p
!= m
->split_inos
.end();
4595 vinodeno_t
vino(*p
, CEPH_NOSNAP
);
4596 if (inode_map
.count(vino
)) {
4597 Inode
*in
= inode_map
[vino
];
4598 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
4600 if (in
->snaprealm
->created
> info
.created()) {
4601 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
4602 << *in
->snaprealm
<< dendl
;
4605 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
4608 in
->snaprealm_item
.remove_myself();
4609 to_move
[in
] = in
->snaprealm
->get_snap_context();
4610 put_snap_realm(in
->snaprealm
);
4614 // move child snaprealms, too
4615 for (vector
<inodeno_t
>::iterator p
= m
->split_realms
.begin();
4616 p
!= m
->split_realms
.end();
4618 ldout(cct
, 10) << "adjusting snaprealm " << *p
<< " parent" << dendl
;
4619 SnapRealm
*child
= get_snap_realm_maybe(*p
);
4622 adjust_realm_parent(child
, realm
->ino
);
4623 put_snap_realm(child
);
4627 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
4630 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
4631 Inode
*in
= p
->first
;
4632 in
->snaprealm
= realm
;
4633 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4635 // queue for snap writeback
4636 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
4637 queue_cap_snap(in
, p
->second
);
4639 put_snap_realm(realm
);
4645 void Client::handle_quota(MClientQuota
*m
)
4647 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4648 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4654 got_mds_push(session
);
4656 ldout(cct
, 10) << "handle_quota " << *m
<< " from mds." << mds
<< dendl
;
4658 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
4659 if (inode_map
.count(vino
)) {
4661 in
= inode_map
[vino
];
4664 in
->quota
= m
->quota
;
4665 in
->rstat
= m
->rstat
;
4672 void Client::handle_caps(MClientCaps
*m
)
4674 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4675 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4681 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
4682 // Pause RADOS operations until we see the required epoch
4683 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
4686 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
4687 // Record the barrier so that we will transmit it to MDS when releasing
4688 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
4691 got_mds_push(session
);
4693 m
->clear_payload(); // for if/when we send back to MDS
4696 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
4697 if (inode_map
.count(vino
))
4698 in
= inode_map
[vino
];
4700 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
4701 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
4702 session
->enqueue_cap_release(
4709 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< ", dropping" << dendl
;
4713 // in case the mds is waiting on e.g. a revocation
4714 flush_cap_releases();
4718 switch (m
->get_op()) {
4719 case CEPH_CAP_OP_EXPORT
:
4720 return handle_cap_export(session
, in
, m
);
4721 case CEPH_CAP_OP_FLUSHSNAP_ACK
:
4722 return handle_cap_flushsnap_ack(session
, in
, m
);
4723 case CEPH_CAP_OP_IMPORT
:
4724 handle_cap_import(session
, in
, m
);
4727 if (in
->caps
.count(mds
) == 0) {
4728 ldout(cct
, 5) << "handle_caps don't have " << *in
<< " cap on mds." << mds
<< dendl
;
4733 Cap
*cap
= in
->caps
[mds
];
4735 switch (m
->get_op()) {
4736 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
4737 case CEPH_CAP_OP_IMPORT
:
4738 case CEPH_CAP_OP_REVOKE
:
4739 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, cap
, m
);
4740 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, cap
, m
);
4746 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4748 mds_rank_t mds
= session
->mds_num
;
4750 ldout(cct
, 5) << "handle_cap_import ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4751 << " IMPORT from mds." << mds
<< dendl
;
4753 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4756 if (m
->peer
.cap_id
&& in
->caps
.count(peer_mds
)) {
4757 cap
= in
->caps
[peer_mds
];
4759 cap_perms
= cap
->latest_perms
;
4764 SnapRealm
*realm
= NULL
;
4765 update_snap_trace(m
->snapbl
, &realm
);
4767 add_update_cap(in
, session
, m
->get_cap_id(),
4768 m
->get_caps(), m
->get_seq(), m
->get_mseq(), m
->get_realm(),
4769 CEPH_CAP_FLAG_AUTH
, cap_perms
);
4771 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
4772 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
4776 put_snap_realm(realm
);
4778 if (in
->auth_cap
&& in
->auth_cap
->session
->mds_num
== mds
) {
4779 // reflush any/all caps (if we are now the auth_cap)
4780 if (in
->cap_snaps
.size())
4781 flush_snaps(in
, true);
4782 if (in
->flushing_caps
)
4783 flush_caps(in
, session
);
4787 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4789 mds_rank_t mds
= session
->mds_num
;
4791 ldout(cct
, 5) << "handle_cap_export ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4792 << " EXPORT from mds." << mds
<< dendl
;
4795 if (in
->caps
.count(mds
))
4796 cap
= in
->caps
[mds
];
4798 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4800 if (cap
&& cap
->cap_id
== m
->get_cap_id()) {
4801 if (m
->peer
.cap_id
) {
4802 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
4803 if (in
->caps
.count(peer_mds
)) {
4804 Cap
*tcap
= in
->caps
[peer_mds
];
4805 if (tcap
->cap_id
== m
->peer
.cap_id
&&
4806 ceph_seq_cmp(tcap
->seq
, m
->peer
.seq
) < 0) {
4807 tcap
->cap_id
= m
->peer
.cap_id
;
4808 tcap
->seq
= m
->peer
.seq
- 1;
4809 tcap
->issue_seq
= tcap
->seq
;
4810 tcap
->mseq
= m
->peer
.mseq
;
4811 tcap
->issued
|= cap
->issued
;
4812 tcap
->implemented
|= cap
->issued
;
4813 if (cap
== in
->auth_cap
)
4814 in
->auth_cap
= tcap
;
4815 if (in
->auth_cap
== tcap
&& in
->flushing_cap_item
.is_on_list())
4816 adjust_session_flushing_caps(in
, session
, tsession
);
4819 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
->issued
,
4820 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
4821 cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
4825 if (cap
== in
->auth_cap
)
4826 in
->flags
|= I_CAP_DROPPED
;
4829 remove_cap(cap
, false);
4835 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4837 mds_rank_t mds
= session
->mds_num
;
4838 assert(in
->caps
[mds
]);
4840 ldout(cct
, 10) << "handle_cap_trunc on ino " << *in
4841 << " size " << in
->size
<< " -> " << m
->get_size()
4844 int implemented
= 0;
4845 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
4846 issued
|= implemented
;
4847 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(),
4848 m
->get_size(), m
->get_change_attr(), m
->get_time_warp_seq(),
4849 m
->get_ctime(), m
->get_mtime(), m
->get_atime(),
4850 m
->inline_version
, m
->inline_data
, issued
);
4854 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
4856 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
4857 int dirty
= m
->get_dirty();
4861 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4862 it
!= in
->flushing_cap_tids
.end(); ) {
4863 if (it
->first
== flush_ack_tid
)
4864 cleaned
= it
->second
;
4865 if (it
->first
<= flush_ack_tid
) {
4866 session
->flushing_caps_tids
.erase(it
->first
);
4867 in
->flushing_cap_tids
.erase(it
++);
4871 cleaned
&= ~it
->second
;
4877 ldout(cct
, 5) << "handle_cap_flush_ack mds." << session
->mds_num
4878 << " cleaned " << ccap_string(cleaned
) << " on " << *in
4879 << " with " << ccap_string(dirty
) << dendl
;
4882 signal_cond_list(in
->waitfor_caps
);
4883 if (session
->flushing_caps_tids
.empty() ||
4884 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
4889 in
->cap_dirtier_uid
= -1;
4890 in
->cap_dirtier_gid
= -1;
4894 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
4896 if (in
->flushing_caps
) {
4897 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
4898 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
4899 in
->flushing_caps
&= ~cleaned
;
4900 if (in
->flushing_caps
== 0) {
4901 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
4902 num_flushing_caps
--;
4903 if (in
->cap_snaps
.empty())
4904 in
->flushing_cap_item
.remove_myself();
4906 if (!in
->caps_dirty())
4915 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4917 mds_rank_t mds
= session
->mds_num
;
4918 assert(in
->caps
[mds
]);
4919 snapid_t follows
= m
->get_snap_follows();
4921 if (in
->cap_snaps
.count(follows
)) {
4922 CapSnap
&capsnap
= in
->cap_snaps
.at(follows
);
4923 if (m
->get_client_tid() != capsnap
.flush_tid
) {
4924 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != " << capsnap
.flush_tid
<< dendl
;
4926 ldout(cct
, 5) << "handle_cap_flushedsnap mds." << mds
<< " flushed snap follows " << follows
4927 << " on " << *in
<< dendl
;
4929 if (in
->get_num_ref() == 1)
4930 tmp_ref
= in
; // make sure inode not get freed while erasing item from in->cap_snaps
4931 if (in
->flushing_caps
== 0 && in
->cap_snaps
.empty())
4932 in
->flushing_cap_item
.remove_myself();
4933 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4934 in
->cap_snaps
.erase(follows
);
4937 ldout(cct
, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds
<< " flushed snap follows " << follows
4938 << " on " << *in
<< dendl
;
4939 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
4945 class C_Client_DentryInvalidate
: public Context
{
4952 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
4953 client(c
), name(dn
->name
) {
4954 if (client
->use_faked_inos()) {
4955 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
4957 ino
.ino
= dn
->inode
->faked_ino
;
4959 dirino
= dn
->dir
->parent_inode
->vino();
4961 ino
= dn
->inode
->vino();
4964 ino
.ino
= inodeno_t();
4966 void finish(int r
) override
{
4967 // _async_dentry_invalidate is responsible for its own locking
4968 assert(!client
->client_lock
.is_locked_by_me());
4969 client
->_async_dentry_invalidate(dirino
, ino
, name
);
4973 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
4977 ldout(cct
, 10) << "_async_dentry_invalidate '" << name
<< "' ino " << ino
4978 << " in dir " << dirino
<< dendl
;
4979 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
);
4982 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
4984 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
4985 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
4988 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
4990 int ref
= in
->get_num_ref();
4992 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
4993 for (auto p
= in
->dir
->dentries
.begin();
4994 p
!= in
->dir
->dentries
.end(); ) {
4995 Dentry
*dn
= p
->second
;
4997 /* rmsnap removes whole subtree, need trim inodes recursively.
4998 * we don't need to invalidate dentries recursively. because
4999 * invalidating a directory dentry effectively invalidate
5001 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
5002 _try_to_trim_inode(dn
->inode
.get(), false);
5004 if (dn
->lru_is_expireable())
5005 unlink(dn
, true, false); // keep dir, drop dentry
5007 if (in
->dir
->dentries
.empty()) {
5013 if (ref
> 0 && (in
->flags
& I_SNAPDIR_OPEN
)) {
5014 InodeRef snapdir
= open_snapdir(in
);
5015 _try_to_trim_inode(snapdir
.get(), false);
5019 if (ref
> 0 && in
->ll_ref
> 0 && sched_inval
) {
5020 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
5021 while (q
!= in
->dn_set
.end()) {
5023 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
5024 // so in->dn_set doesn't always reflect the state of kernel's dcache.
5025 _schedule_invalidate_dentry_callback(dn
, true);
5026 unlink(dn
, true, true);
5031 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
5033 mds_rank_t mds
= session
->mds_num
;
5034 int used
= get_caps_used(in
);
5035 int wanted
= in
->caps_wanted();
5037 const int old_caps
= cap
->issued
;
5038 const int new_caps
= m
->get_caps();
5039 ldout(cct
, 5) << "handle_cap_grant on in " << m
->get_ino()
5040 << " mds." << mds
<< " seq " << m
->get_seq()
5041 << " caps now " << ccap_string(new_caps
)
5042 << " was " << ccap_string(old_caps
) << dendl
;
5043 cap
->seq
= m
->get_seq();
5045 in
->layout
= m
->get_layout();
5048 int implemented
= 0;
5049 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
5050 issued
|= implemented
;
5052 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
5053 in
->mode
= m
->head
.mode
;
5054 in
->uid
= m
->head
.uid
;
5055 in
->gid
= m
->head
.gid
;
5056 in
->btime
= m
->btime
;
5058 bool deleted_inode
= false;
5059 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
5060 in
->nlink
= m
->head
.nlink
;
5061 if (in
->nlink
== 0 &&
5062 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5063 deleted_inode
= true;
5065 if ((issued
& CEPH_CAP_XATTR_EXCL
) == 0 &&
5066 m
->xattrbl
.length() &&
5067 m
->head
.xattr_version
> in
->xattr_version
) {
5068 bufferlist::iterator p
= m
->xattrbl
.begin();
5069 ::decode(in
->xattrs
, p
);
5070 in
->xattr_version
= m
->head
.xattr_version
;
5072 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(), m
->get_size(),
5073 m
->get_change_attr(), m
->get_time_warp_seq(), m
->get_ctime(),
5074 m
->get_mtime(), m
->get_atime(),
5075 m
->inline_version
, m
->inline_data
, issued
);
5078 if (cap
== in
->auth_cap
&&
5079 m
->get_max_size() != in
->max_size
) {
5080 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5081 in
->max_size
= m
->get_max_size();
5082 if (in
->max_size
> in
->wanted_max_size
) {
5083 in
->wanted_max_size
= 0;
5084 in
->requested_max_size
= 0;
5089 if (m
->get_op() == CEPH_CAP_OP_IMPORT
&& m
->get_wanted() != wanted
)
5092 check_cap_issue(in
, cap
, new_caps
);
5095 int revoked
= old_caps
& ~new_caps
;
5097 ldout(cct
, 10) << " revocation of " << ccap_string(revoked
) << dendl
;
5098 cap
->issued
= new_caps
;
5099 cap
->implemented
|= new_caps
;
5101 // recall delegations if we're losing caps necessary for them
5102 if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_RD
))
5103 in
->recall_deleg(false);
5104 else if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_WR
))
5105 in
->recall_deleg(true);
5107 if (((used
& ~new_caps
) & CEPH_CAP_FILE_BUFFER
)
5108 && !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5109 // waitin' for flush
5110 } else if ((old_caps
& ~new_caps
) & CEPH_CAP_FILE_CACHE
) {
5114 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5118 } else if (old_caps
== new_caps
) {
5119 ldout(cct
, 10) << " caps unchanged at " << ccap_string(old_caps
) << dendl
;
5121 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~old_caps
) << dendl
;
5122 cap
->issued
= new_caps
;
5123 cap
->implemented
|= new_caps
;
5125 if (cap
== in
->auth_cap
) {
5126 // non-auth MDS is revoking the newly grant caps ?
5127 for (map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
5128 if (it
->second
== cap
)
5130 if (it
->second
->implemented
& ~it
->second
->issued
& new_caps
) {
5143 signal_cond_list(in
->waitfor_caps
);
5145 // may drop inode's last ref
5147 _try_to_trim_inode(in
, true);
5152 int Client::_getgrouplist(gid_t
** sgids
, uid_t uid
, gid_t gid
)
5154 // cppcheck-suppress variableScope
5159 sgid_count
= getgroups_cb(callback_handle
, &sgid_buf
);
5160 if (sgid_count
> 0) {
5166 #if HAVE_GETGROUPLIST
5170 ldout(cct
, 3) << "getting user entry failed" << dendl
;
5173 //use PAM to get the group list
5174 // initial number of group entries, defaults to posix standard of 16
5175 // PAM implementations may provide more than 16 groups....
5177 sgid_buf
= (gid_t
*)malloc(sgid_count
* sizeof(gid_t
));
5178 if (sgid_buf
== NULL
) {
5179 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5184 #if defined(__APPLE__)
5185 if (getgrouplist(pw
->pw_name
, gid
, (int*)sgid_buf
, &sgid_count
) == -1) {
5187 if (getgrouplist(pw
->pw_name
, gid
, sgid_buf
, &sgid_count
) == -1) {
5189 // we need to resize the group list and try again
5190 void *_realloc
= NULL
;
5191 if ((_realloc
= realloc(sgid_buf
, sgid_count
* sizeof(gid_t
))) == NULL
) {
5192 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5196 sgid_buf
= (gid_t
*)_realloc
;
5199 // list was successfully retrieved
5209 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5211 if (perms
.uid() == 0)
5214 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5215 int ret
= _posix_acl_permission(in
, perms
, want
);
5220 // check permissions before doing anything else
5221 if (!in
->check_mode(perms
, want
))
5226 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5227 const UserPerm
& perms
)
5229 int r
= _getattr_for_perm(in
, perms
);
5234 if (strncmp(name
, "system.", 7) == 0) {
5235 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5238 r
= inode_permission(in
, perms
, want
);
5241 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5245 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5246 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5250 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5251 const UserPerm
& perms
)
5253 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5254 int r
= _getattr_for_perm(in
, perms
);
5258 if (mask
& CEPH_SETATTR_SIZE
) {
5259 r
= inode_permission(in
, perms
, MAY_WRITE
);
5265 if (mask
& CEPH_SETATTR_UID
) {
5266 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5269 if (mask
& CEPH_SETATTR_GID
) {
5270 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5271 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5275 if (mask
& CEPH_SETATTR_MODE
) {
5276 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5279 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5280 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5281 stx
->stx_mode
&= ~S_ISGID
;
5284 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5285 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5286 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5287 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5288 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5289 check_mask
|= CEPH_SETATTR_MTIME
;
5290 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5291 check_mask
|= CEPH_SETATTR_ATIME
;
5292 if (check_mask
& mask
) {
5295 r
= inode_permission(in
, perms
, MAY_WRITE
);
5303 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5307 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5309 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5312 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5314 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5315 want
= MAY_READ
| MAY_WRITE
;
5316 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5318 if (flags
& O_TRUNC
)
5322 switch (in
->mode
& S_IFMT
) {
5327 if (want
& MAY_WRITE
) {
5334 r
= _getattr_for_perm(in
, perms
);
5338 r
= inode_permission(in
, perms
, want
);
5340 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5344 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5346 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5347 int r
= _getattr_for_perm(dir
, perms
);
5351 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5353 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5357 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5359 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5360 int r
= _getattr_for_perm(dir
, perms
);
5364 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5366 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5370 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5372 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5373 int r
= _getattr_for_perm(dir
, perms
);
5377 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5381 /* 'name == NULL' means rmsnap */
5382 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5384 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5387 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5391 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5395 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5397 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5398 int r
= _getattr_for_perm(in
, perms
);
5402 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5408 if (!S_ISREG(in
->mode
))
5411 if (in
->mode
& S_ISUID
)
5414 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5417 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5419 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5423 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5425 int mask
= CEPH_STAT_CAP_MODE
;
5427 if (acl_type
!= NO_ACL
) {
5428 mask
|= CEPH_STAT_CAP_XATTR
;
5429 force
= in
->xattr_version
== 0;
5431 return _getattr(in
, mask
, perms
, force
);
5434 vinodeno_t
Client::_get_vino(Inode
*in
)
5436 /* The caller must hold the client lock */
5437 return vinodeno_t(in
->ino
, in
->snapid
);
5440 inodeno_t
Client::_get_inodeno(Inode
*in
)
5442 /* The caller must hold the client lock */
5448 * Resolve an MDS spec to a list of MDS daemon GIDs.
5450 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5451 * It may be '*' in which case it matches all GIDs.
5453 * If no error is returned, the `targets` vector will be populated with at least
5456 int Client::resolve_mds(
5457 const std::string
&mds_spec
,
5458 std::vector
<mds_gid_t
> *targets
)
5461 assert(targets
!= nullptr);
5464 std::stringstream ss
;
5465 int role_r
= fsmap
->parse_role(mds_spec
, &role
, ss
);
5467 // We got a role, resolve it to a GID
5468 ldout(cct
, 10) << __func__
<< ": resolved '" << mds_spec
<< "' to role '"
5469 << role
<< "'" << dendl
;
5471 fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
).global_id
);
5475 std::string strtol_err
;
5476 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5477 if (strtol_err
.empty()) {
5478 // It is a possible GID
5479 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5480 if (fsmap
->gid_exists(mds_gid
)) {
5481 ldout(cct
, 10) << __func__
<< ": validated GID " << mds_gid
<< dendl
;
5482 targets
->push_back(mds_gid
);
5484 lderr(cct
) << __func__
<< ": GID " << mds_gid
<< " not in MDS map"
5488 } else if (mds_spec
== "*") {
5489 // It is a wildcard: use all MDSs
5490 const auto mds_info
= fsmap
->get_mds_info();
5492 if (mds_info
.empty()) {
5493 lderr(cct
) << __func__
<< ": * passed but no MDS daemons found" << dendl
;
5497 for (const auto i
: mds_info
) {
5498 targets
->push_back(i
.first
);
5501 // It did not parse as an integer, it is not a wildcard, it must be a name
5502 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5504 lderr(cct
) << "MDS ID '" << mds_spec
<< "' not found" << dendl
;
5506 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5510 ldout(cct
, 10) << __func__
<< ": resolved ID '" << mds_spec
5511 << "' to GID " << mds_gid
<< dendl
;
5512 targets
->push_back(mds_gid
);
5521 * Authenticate with mon and establish global ID
5523 int Client::authenticate()
5525 assert(client_lock
.is_locked_by_me());
5527 if (monclient
->is_authenticated()) {
5531 client_lock
.Unlock();
5532 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5538 whoami
= monclient
->get_global_id();
5539 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5544 int Client::fetch_fsmap(bool user
)
5547 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5548 // rather than MDSMap because no one MDSMap contains all the daemons, and
5549 // a `tell` can address any daemon.
5550 version_t fsmap_latest
;
5553 monclient
->get_version("fsmap", &fsmap_latest
, NULL
, &cond
);
5554 client_lock
.Unlock();
5557 } while (r
== -EAGAIN
);
5560 lderr(cct
) << "Failed to learn FSMap version: " << cpp_strerror(r
) << dendl
;
5564 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5567 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5568 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5569 monclient
->renew_subs();
5570 wait_on_list(waiting_for_fsmap
);
5573 assert(fsmap_user
->get_epoch() >= fsmap_latest
);
5575 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
5576 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5577 monclient
->renew_subs();
5578 wait_on_list(waiting_for_fsmap
);
5581 assert(fsmap
->get_epoch() >= fsmap_latest
);
5583 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
5584 << fsmap_latest
<< dendl
;
5590 * @mds_spec one of ID, rank, GID, "*"
5593 int Client::mds_command(
5594 const std::string
&mds_spec
,
5595 const vector
<string
>& cmd
,
5596 const bufferlist
& inbl
,
5601 Mutex::Locker
lock(client_lock
);
5612 r
= fetch_fsmap(false);
5617 // Look up MDS target(s) of the command
5618 std::vector
<mds_gid_t
> targets
;
5619 r
= resolve_mds(mds_spec
, &targets
);
5624 // If daemons are laggy, we won't send them commands. If all
5625 // are laggy then we fail.
5626 std::vector
<mds_gid_t
> non_laggy
;
5627 for (const auto gid
: targets
) {
5628 const auto info
= fsmap
->get_info_gid(gid
);
5629 if (!info
.laggy()) {
5630 non_laggy
.push_back(gid
);
5633 if (non_laggy
.size() == 0) {
5634 *outs
= "All targeted MDS daemons are laggy";
5638 if (metadata
.empty()) {
5639 // We are called on an unmounted client, so metadata
5640 // won't be initialized yet.
5641 populate_metadata("");
5644 // Send commands to targets
5645 C_GatherBuilder
gather(cct
, onfinish
);
5646 for (const auto target_gid
: non_laggy
) {
5647 const auto info
= fsmap
->get_info_gid(target_gid
);
5649 // Open a connection to the target MDS
5650 entity_inst_t inst
= info
.get_inst();
5651 ConnectionRef conn
= messenger
->get_connection(inst
);
5653 // Generate MDSCommandOp state
5654 auto &op
= command_table
.start_command();
5656 op
.on_finish
= gather
.new_sub();
5661 op
.mds_gid
= target_gid
;
5664 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
5665 << " tid=" << op
.tid
<< cmd
<< dendl
;
5667 // Construct and send MCommand
5668 MCommand
*m
= op
.get_message(monclient
->get_fsid());
5669 conn
->send_message(m
);
5676 void Client::handle_command_reply(MCommandReply
*m
)
5678 ceph_tid_t
const tid
= m
->get_tid();
5680 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
5682 if (!command_table
.exists(tid
)) {
5683 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
5688 auto &op
= command_table
.get_command(tid
);
5690 op
.outbl
->claim(m
->get_data());
5697 op
.on_finish
->complete(m
->r
);
5700 command_table
.erase(tid
);
5705 // -------------------
5708 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
5711 Mutex::Locker
lock(client_lock
);
5714 ldout(cct
, 5) << "already mounted" << dendl
;
5720 int r
= authenticate();
5722 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
5726 std::string want
= "mdsmap";
5727 const auto &mds_ns
= cct
->_conf
->client_mds_namespace
;
5728 if (!mds_ns
.empty()) {
5729 r
= fetch_fsmap(true);
5732 fs_cluster_id_t cid
= fsmap_user
->get_fs_cid(mds_ns
);
5733 if (cid
== FS_CLUSTER_ID_NONE
)
5736 std::ostringstream oss
;
5737 oss
<< want
<< "." << cid
;
5740 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
5742 monclient
->sub_want(want
, 0, 0);
5743 monclient
->renew_subs();
5745 tick(); // start tick
5749 auto availability
= mdsmap
->is_cluster_available();
5750 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
5752 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
5753 return CEPH_FUSE_NO_MDS_UP
;
5754 } else if (availability
== MDSMap::AVAILABLE
) {
5755 // Continue to mount
5757 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
5758 // Else, wait. MDSMonitor will update the map to bring
5759 // us to a conclusion eventually.
5760 wait_on_list(waiting_for_mdsmap
);
5762 // Unexpected value!
5768 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
5770 filepath
fp(CEPH_INO_ROOT
);
5771 if (!mount_root
.empty()) {
5772 fp
= filepath(mount_root
.c_str());
5775 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
5776 req
->set_filepath(fp
);
5777 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
5778 int res
= make_request(req
, perms
);
5780 if (res
== -EACCES
&& root
) {
5781 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
5799 if (!cct
->_conf
->client_trace
.empty()) {
5800 traceout
.open(cct
->_conf
->client_trace
.c_str());
5801 if (traceout
.is_open()) {
5802 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5804 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5809 ldout(cct, 3) << "op: // client trace data structs" << dendl;
5810 ldout(cct, 3) << "op: struct stat st;" << dendl;
5811 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
5812 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
5813 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
5814 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
5815 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
5816 ldout(cct, 3) << "op: int fd;" << dendl;
5823 void Client::_close_sessions()
5825 while (!mds_sessions
.empty()) {
5826 // send session closes!
5827 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5828 p
!= mds_sessions
.end();
5830 if (p
->second
->state
!= MetaSession::STATE_CLOSING
) {
5831 _close_mds_session(p
->second
);
5835 // wait for sessions to close
5836 ldout(cct
, 2) << "waiting for " << mds_sessions
.size() << " mds sessions to close" << dendl
;
5837 mount_cond
.Wait(client_lock
);
5841 void Client::flush_mdlog_sync()
5843 if (mds_requests
.empty())
5845 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5846 p
!= mds_sessions
.end();
5848 MetaSession
*s
= p
->second
;
5853 void Client::flush_mdlog(MetaSession
*session
)
5855 // Only send this to Luminous or newer MDS daemons, older daemons
5856 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
5857 const uint64_t features
= session
->con
->get_features();
5858 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
5859 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
5860 session
->con
->send_message(m
);
5865 void Client::_unmount()
5870 ldout(cct
, 2) << "unmounting" << dendl
;
5875 flush_mdlog_sync(); // flush the mdlog for pending requests, if any
5876 while (!mds_requests
.empty()) {
5877 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests" << dendl
;
5878 mount_cond
.Wait(client_lock
);
5882 timer
.cancel_event(tick_event
);
5887 // clean up any unclosed files
5888 while (!fd_map
.empty()) {
5889 Fh
*fh
= fd_map
.begin()->second
;
5890 fd_map
.erase(fd_map
.begin());
5891 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
5895 while (!ll_unclosed_fh_set
.empty()) {
5896 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
5898 ll_unclosed_fh_set
.erase(fh
);
5899 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
5903 while (!opened_dirs
.empty()) {
5904 dir_result_t
*dirp
= *opened_dirs
.begin();
5905 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
5912 ldout(cct
, 0) << " skipping clean shutdown, we are blacklisted" << dendl
;
5914 if (cct
->_conf
->client_oc
) {
5915 // Purge all cached data so that ObjectCacher doesn't get hung up
5916 // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED
5917 // is to just leave things marked dirty
5918 // (http://tracker.ceph.com/issues/9105)
5919 for (const auto &i
: inode_map
) {
5920 objectcacher
->purge_set(&(i
.second
->oset
));
5928 while (unsafe_sync_write
> 0) {
5929 ldout(cct
, 0) << unsafe_sync_write
<< " unsafe_sync_writes, waiting" << dendl
;
5930 mount_cond
.Wait(client_lock
);
5933 if (cct
->_conf
->client_oc
) {
5934 // flush/release all buffered data
5935 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
5936 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
5937 p
!= inode_map
.end();
5941 Inode
*in
= p
->second
;
5943 ldout(cct
, 0) << "null inode_map entry ino " << p
->first
<< dendl
;
5946 if (!in
->caps
.empty()) {
5947 InodeRef
tmp_ref(in
);
5949 _flush(in
, new C_Client_FlushComplete(this, in
));
5955 wait_sync_caps(last_flush_tid
);
5960 while (lru
.lru_get_size() > 0 ||
5961 !inode_map
.empty()) {
5962 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
5963 << "+" << inode_map
.size() << " items"
5964 << ", waiting (for caps to release?)"
5966 utime_t until
= ceph_clock_now() + utime_t(5, 0);
5967 int r
= mount_cond
.WaitUntil(client_lock
, until
);
5968 if (r
== ETIMEDOUT
) {
5972 assert(lru
.lru_get_size() == 0);
5973 assert(inode_map
.empty());
5976 if (!cct
->_conf
->client_trace
.empty()) {
5977 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5985 ldout(cct
, 2) << "unmounted." << dendl
;
5988 void Client::unmount()
5990 Mutex::Locker
lock(client_lock
);
5994 void Client::flush_cap_releases()
5996 // send any cap releases
5997 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5998 p
!= mds_sessions
.end();
6000 if (p
->second
->release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
6002 if (cct
->_conf
->client_inject_release_failure
) {
6003 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
6004 p
->second
->release
->put();
6006 p
->second
->con
->send_message(p
->second
->release
);
6008 p
->second
->release
= 0;
6015 if (cct
->_conf
->client_debug_inject_tick_delay
> 0) {
6016 sleep(cct
->_conf
->client_debug_inject_tick_delay
);
6017 assert(0 == cct
->_conf
->set_val("client_debug_inject_tick_delay", "0"));
6018 cct
->_conf
->apply_changes(NULL
);
6021 ldout(cct
, 21) << "tick" << dendl
;
6022 tick_event
= timer
.add_event_after(
6023 cct
->_conf
->client_tick_interval
,
6024 new FunctionContext([this](int) {
6025 // Called back via Timer, which takes client_lock for us
6026 assert(client_lock
.is_locked_by_me());
6029 utime_t now
= ceph_clock_now();
6031 if (!mounted
&& !mds_requests
.empty()) {
6032 MetaRequest
*req
= mds_requests
.begin()->second
;
6033 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
6034 req
->abort(-ETIMEDOUT
);
6035 if (req
->caller_cond
) {
6037 req
->caller_cond
->Signal();
6039 signal_cond_list(waiting_for_mdsmap
);
6040 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6041 p
!= mds_sessions
.end();
6043 signal_context_list(p
->second
->waiting_for_open
);
6047 if (mdsmap
->get_epoch()) {
6049 utime_t el
= now
- last_cap_renew
;
6050 if (el
> mdsmap
->get_session_timeout() / 3.0)
6053 flush_cap_releases();
6057 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
6061 if (in
->hold_caps_until
> now
)
6063 delayed_caps
.pop_front();
6064 cap_list
.push_back(&in
->cap_item
);
6065 check_caps(in
, CHECK_CAPS_NODELAY
);
6071 void Client::renew_caps()
6073 ldout(cct
, 10) << "renew_caps()" << dendl
;
6074 last_cap_renew
= ceph_clock_now();
6076 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6077 p
!= mds_sessions
.end();
6079 ldout(cct
, 15) << "renew_caps requesting from mds." << p
->first
<< dendl
;
6080 if (mdsmap
->get_state(p
->first
) >= MDSMap::STATE_REJOIN
)
6081 renew_caps(p
->second
);
6085 void Client::renew_caps(MetaSession
*session
)
6087 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6088 session
->last_cap_renew_request
= ceph_clock_now();
6089 uint64_t seq
= ++session
->cap_renew_seq
;
6090 session
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6094 // ===============================================================
6095 // high level (POSIXy) interface
6097 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6098 InodeRef
*target
, const UserPerm
& perms
)
6100 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6101 MetaRequest
*req
= new MetaRequest(op
);
6103 dir
->make_nosnap_relative_path(path
);
6104 path
.push_dentry(name
);
6105 req
->set_filepath(path
);
6106 req
->set_inode(dir
);
6107 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6108 mask
|= DEBUG_GETATTR_CAPS
;
6109 req
->head
.args
.getattr
.mask
= mask
;
6111 ldout(cct
, 10) << "_do_lookup on " << path
<< dendl
;
6113 int r
= make_request(req
, perms
, target
);
6114 ldout(cct
, 10) << "_do_lookup res is " << r
<< dendl
;
6118 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6119 const UserPerm
& perms
)
6124 if (!dir
->is_dir()) {
6129 if (dname
== "..") {
6130 if (dir
->dn_set
.empty())
6133 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6142 if (dname
.length() > NAME_MAX
) {
6147 if (dname
== cct
->_conf
->client_snapdir
&&
6148 dir
->snapid
== CEPH_NOSNAP
) {
6149 *target
= open_snapdir(dir
);
6154 dir
->dir
->dentries
.count(dname
)) {
6155 dn
= dir
->dir
->dentries
[dname
];
6157 ldout(cct
, 20) << "_lookup have dn " << dname
<< " mds." << dn
->lease_mds
<< " ttl " << dn
->lease_ttl
6158 << " seq " << dn
->lease_seq
6161 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)) {
6162 // is dn lease valid?
6163 utime_t now
= ceph_clock_now();
6164 if (dn
->lease_mds
>= 0 &&
6165 dn
->lease_ttl
> now
&&
6166 mds_sessions
.count(dn
->lease_mds
)) {
6167 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6168 if (s
->cap_ttl
> now
&&
6169 s
->cap_gen
== dn
->lease_gen
) {
6170 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6171 // make trim_caps() behave.
6172 dir
->try_touch_cap(dn
->lease_mds
);
6175 ldout(cct
, 20) << " bad lease, cap_ttl " << s
->cap_ttl
<< ", cap_gen " << s
->cap_gen
6176 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6179 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
6180 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6181 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)))
6183 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6184 ldout(cct
, 10) << "_lookup concluded ENOENT locally for "
6185 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6190 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6193 // can we conclude ENOENT locally?
6194 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true) &&
6195 (dir
->flags
& I_COMPLETE
)) {
6196 ldout(cct
, 10) << "_lookup concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6201 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6206 *target
= dn
->inode
;
6214 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6216 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6220 int Client::get_or_create(Inode
*dir
, const char* name
,
6221 Dentry
**pdn
, bool expect_null
)
6224 ldout(cct
, 20) << "get_or_create " << *dir
<< " name " << name
<< dendl
;
6226 if (dir
->dir
->dentries
.count(name
)) {
6227 Dentry
*dn
= dir
->dir
->dentries
[name
];
6229 // is dn lease valid?
6230 utime_t now
= ceph_clock_now();
6232 dn
->lease_mds
>= 0 &&
6233 dn
->lease_ttl
> now
&&
6234 mds_sessions
.count(dn
->lease_mds
)) {
6235 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6236 if (s
->cap_ttl
> now
&&
6237 s
->cap_gen
== dn
->lease_gen
) {
6244 // otherwise link up a new one
6245 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6252 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6253 const UserPerm
& perms
, bool followsym
, int mask
)
6255 filepath path
= origpath
;
6257 if (origpath
.absolute())
6263 ldout(cct
, 10) << "path_walk " << path
<< dendl
;
6268 while (i
< path
.depth() && cur
) {
6270 const string
&dname
= path
[i
];
6271 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6272 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6274 if (cct
->_conf
->client_permissions
) {
6275 int r
= may_lookup(cur
.get(), perms
);
6278 caps
= CEPH_CAP_AUTH_SHARED
;
6281 /* Get extra requested caps on the last component */
6282 if (i
== (path
.depth() - 1))
6284 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
);
6287 // only follow trailing symlink if followsym. always follow
6288 // 'directory' symlinks.
6289 if (next
&& next
->is_symlink()) {
6291 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6292 if (symlinks
> MAXSYMLINKS
) {
6296 if (i
< path
.depth() - 1) {
6298 // replace consumed components of path with symlink dir target
6299 filepath
resolved(next
->symlink
.c_str());
6300 resolved
.append(path
.postfixpath(i
+ 1));
6303 if (next
->symlink
[0] == '/') {
6307 } else if (followsym
) {
6308 if (next
->symlink
[0] == '/') {
6309 path
= next
->symlink
.c_str();
6314 filepath
more(next
->symlink
.c_str());
6315 // we need to remove the symlink component from off of the path
6316 // before adding the target that the symlink points to. remain
6317 // at the same position in the path.
6337 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
)
6339 Mutex::Locker
lock(client_lock
);
6340 tout(cct
) << "link" << std::endl
;
6341 tout(cct
) << relexisting
<< std::endl
;
6342 tout(cct
) << relpath
<< std::endl
;
6347 filepath
existing(relexisting
);
6350 int r
= path_walk(existing
, &in
, perm
, true);
6353 if (std::string(relpath
) == "/") {
6357 filepath
path(relpath
);
6358 string name
= path
.last_dentry();
6361 r
= path_walk(path
, &dir
, perm
, true);
6364 if (cct
->_conf
->client_permissions
) {
6365 if (S_ISDIR(in
->mode
)) {
6369 r
= may_hardlink(in
.get(), perm
);
6372 r
= may_create(dir
.get(), perm
);
6376 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
);
6380 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
6382 Mutex::Locker
lock(client_lock
);
6383 tout(cct
) << "unlink" << std::endl
;
6384 tout(cct
) << relpath
<< std::endl
;
6389 if (std::string(relpath
) == "/")
6392 filepath
path(relpath
);
6393 string name
= path
.last_dentry();
6396 int r
= path_walk(path
, &dir
, perm
);
6399 if (cct
->_conf
->client_permissions
) {
6400 r
= may_delete(dir
.get(), name
.c_str(), perm
);
6404 return _unlink(dir
.get(), name
.c_str(), perm
);
6407 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
)
6409 Mutex::Locker
lock(client_lock
);
6410 tout(cct
) << "rename" << std::endl
;
6411 tout(cct
) << relfrom
<< std::endl
;
6412 tout(cct
) << relto
<< std::endl
;
6417 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
6420 filepath
from(relfrom
);
6422 string fromname
= from
.last_dentry();
6424 string toname
= to
.last_dentry();
6427 InodeRef fromdir
, todir
;
6428 int r
= path_walk(from
, &fromdir
, perm
);
6431 r
= path_walk(to
, &todir
, perm
);
6435 if (cct
->_conf
->client_permissions
) {
6436 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
6439 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
6440 if (r
< 0 && r
!= -ENOENT
)
6443 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
);
6450 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
)
6452 Mutex::Locker
lock(client_lock
);
6453 tout(cct
) << "mkdir" << std::endl
;
6454 tout(cct
) << relpath
<< std::endl
;
6455 tout(cct
) << mode
<< std::endl
;
6456 ldout(cct
, 10) << "mkdir: " << relpath
<< dendl
;
6461 if (std::string(relpath
) == "/")
6464 filepath
path(relpath
);
6465 string name
= path
.last_dentry();
6468 int r
= path_walk(path
, &dir
, perm
);
6471 if (cct
->_conf
->client_permissions
) {
6472 r
= may_create(dir
.get(), perm
);
6476 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
);
6479 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
6481 Mutex::Locker
lock(client_lock
);
6482 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
6483 tout(cct
) << "mkdirs" << std::endl
;
6484 tout(cct
) << relpath
<< std::endl
;
6485 tout(cct
) << mode
<< std::endl
;
6490 //get through existing parts of path
6491 filepath
path(relpath
);
6493 int r
= 0, caps
= 0;
6496 for (i
=0; i
<path
.depth(); ++i
) {
6497 if (cct
->_conf
->client_permissions
) {
6498 r
= may_lookup(cur
.get(), perms
);
6501 caps
= CEPH_CAP_AUTH_SHARED
;
6503 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
6508 //check that we have work left to do
6509 if (i
==path
.depth()) return -EEXIST
;
6510 if (r
!=-ENOENT
) return r
;
6511 ldout(cct
, 20) << "mkdirs got through " << i
<< " directories on path " << relpath
<< dendl
;
6512 //make new directory at each level
6513 for (; i
<path
.depth(); ++i
) {
6514 if (cct
->_conf
->client_permissions
) {
6515 r
= may_create(cur
.get(), perms
);
6520 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
6522 //check proper creation/existence
6523 if(-EEXIST
== r
&& i
< path
.depth() - 1) {
6524 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
6528 //move to new dir and continue
6530 ldout(cct
, 20) << "mkdirs: successfully created directory "
6531 << filepath(cur
->ino
).get_path() << dendl
;
6536 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
6538 Mutex::Locker
lock(client_lock
);
6539 tout(cct
) << "rmdir" << std::endl
;
6540 tout(cct
) << relpath
<< std::endl
;
6545 if (std::string(relpath
) == "/")
6548 filepath
path(relpath
);
6549 string name
= path
.last_dentry();
6552 int r
= path_walk(path
, &dir
, perms
);
6555 if (cct
->_conf
->client_permissions
) {
6556 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
6560 return _rmdir(dir
.get(), name
.c_str(), perms
);
6563 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
6565 Mutex::Locker
lock(client_lock
);
6566 tout(cct
) << "mknod" << std::endl
;
6567 tout(cct
) << relpath
<< std::endl
;
6568 tout(cct
) << mode
<< std::endl
;
6569 tout(cct
) << rdev
<< std::endl
;
6574 if (std::string(relpath
) == "/")
6577 filepath
path(relpath
);
6578 string name
= path
.last_dentry();
6581 int r
= path_walk(path
, &dir
, perms
);
6584 if (cct
->_conf
->client_permissions
) {
6585 int r
= may_create(dir
.get(), perms
);
6589 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
6594 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
)
6596 Mutex::Locker
lock(client_lock
);
6597 tout(cct
) << "symlink" << std::endl
;
6598 tout(cct
) << target
<< std::endl
;
6599 tout(cct
) << relpath
<< std::endl
;
6604 if (std::string(relpath
) == "/")
6607 filepath
path(relpath
);
6608 string name
= path
.last_dentry();
6611 int r
= path_walk(path
, &dir
, perms
);
6614 if (cct
->_conf
->client_permissions
) {
6615 int r
= may_create(dir
.get(), perms
);
6619 return _symlink(dir
.get(), name
.c_str(), target
, perms
);
6622 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
6624 Mutex::Locker
lock(client_lock
);
6625 tout(cct
) << "readlink" << std::endl
;
6626 tout(cct
) << relpath
<< std::endl
;
6631 filepath
path(relpath
);
6633 int r
= path_walk(path
, &in
, perms
, false);
6637 return _readlink(in
.get(), buf
, size
);
6640 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
6642 if (!in
->is_symlink())
6645 // copy into buf (at most size bytes)
6646 int r
= in
->symlink
.length();
6649 memcpy(buf
, in
->symlink
.c_str(), r
);
6656 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
6658 bool yes
= in
->caps_issued_mask(mask
, true);
6660 ldout(cct
, 10) << "_getattr mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
6664 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6666 in
->make_nosnap_relative_path(path
);
6667 req
->set_filepath(path
);
6669 req
->head
.args
.getattr
.mask
= mask
;
6671 int res
= make_request(req
, perms
);
6672 ldout(cct
, 10) << "_getattr result=" << res
<< dendl
;
6676 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6677 const UserPerm
& perms
, InodeRef
*inp
)
6679 int issued
= in
->caps_issued();
6681 ldout(cct
, 10) << "_setattr mask " << mask
<< " issued " <<
6682 ccap_string(issued
) << dendl
;
6684 if (in
->snapid
!= CEPH_NOSNAP
) {
6687 if ((mask
& CEPH_SETATTR_SIZE
) &&
6688 (unsigned long)stx
->stx_size
> in
->size
&&
6689 is_quota_bytes_exceeded(in
, (unsigned long)stx
->stx_size
- in
->size
,
6694 // make the change locally?
6695 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
6696 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
6697 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
6698 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
6699 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
6702 * This works because we implicitly flush the caps as part of the
6703 * request, so the cap update check will happen with the writeback
6704 * cap context, and then the setattr check will happen with the
6707 * In reality this pattern is likely pretty rare (different users
6708 * setattr'ing the same file). If that turns out not to be the
6709 * case later, we can build a more complex pipelined cap writeback
6713 mask
|= CEPH_SETATTR_CTIME
;
6718 // caller just needs us to bump the ctime
6719 in
->ctime
= ceph_clock_now();
6720 in
->cap_dirtier_uid
= perms
.uid();
6721 in
->cap_dirtier_gid
= perms
.gid();
6722 if (issued
& CEPH_CAP_AUTH_EXCL
)
6723 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6724 else if (issued
& CEPH_CAP_FILE_EXCL
)
6725 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6726 else if (issued
& CEPH_CAP_XATTR_EXCL
)
6727 mark_caps_dirty(in
, CEPH_CAP_XATTR_EXCL
);
6729 mask
|= CEPH_SETATTR_CTIME
;
6732 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
6733 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
6735 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
6737 if (mask
& CEPH_SETATTR_UID
) {
6738 in
->ctime
= ceph_clock_now();
6739 in
->cap_dirtier_uid
= perms
.uid();
6740 in
->cap_dirtier_gid
= perms
.gid();
6741 in
->uid
= stx
->stx_uid
;
6742 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6743 mask
&= ~CEPH_SETATTR_UID
;
6745 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6747 if (mask
& CEPH_SETATTR_GID
) {
6748 in
->ctime
= ceph_clock_now();
6749 in
->cap_dirtier_uid
= perms
.uid();
6750 in
->cap_dirtier_gid
= perms
.gid();
6751 in
->gid
= stx
->stx_gid
;
6752 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6753 mask
&= ~CEPH_SETATTR_GID
;
6755 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6758 if (mask
& CEPH_SETATTR_MODE
) {
6759 in
->ctime
= ceph_clock_now();
6760 in
->cap_dirtier_uid
= perms
.uid();
6761 in
->cap_dirtier_gid
= perms
.gid();
6762 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
6763 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6764 mask
&= ~CEPH_SETATTR_MODE
;
6765 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6766 } else if (kill_sguid
&& S_ISREG(in
->mode
) && (in
->mode
& (S_IXUSR
|S_IXGRP
|S_IXOTH
))) {
6767 /* Must squash the any setuid/setgid bits with an ownership change */
6768 in
->mode
&= ~(S_ISUID
|S_ISGID
);
6769 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6772 if (mask
& CEPH_SETATTR_BTIME
) {
6773 in
->ctime
= ceph_clock_now();
6774 in
->cap_dirtier_uid
= perms
.uid();
6775 in
->cap_dirtier_gid
= perms
.gid();
6776 in
->btime
= utime_t(stx
->stx_btime
);
6777 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6778 mask
&= ~CEPH_SETATTR_BTIME
;
6779 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
6781 } else if (mask
& CEPH_SETATTR_SIZE
) {
6782 /* If we don't have Ax, then we must ask the server to clear them on truncate */
6783 mask
|= CEPH_SETATTR_KILL_SGUID
;
6786 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
6787 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
6788 if (mask
& CEPH_SETATTR_MTIME
)
6789 in
->mtime
= utime_t(stx
->stx_mtime
);
6790 if (mask
& CEPH_SETATTR_ATIME
)
6791 in
->atime
= utime_t(stx
->stx_atime
);
6792 in
->ctime
= ceph_clock_now();
6793 in
->cap_dirtier_uid
= perms
.uid();
6794 in
->cap_dirtier_gid
= perms
.gid();
6795 in
->time_warp_seq
++;
6796 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6797 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
6806 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
6810 in
->make_nosnap_relative_path(path
);
6811 req
->set_filepath(path
);
6814 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
6815 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6817 if (mask
& CEPH_SETATTR_MODE
) {
6818 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
6819 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6820 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6822 if (mask
& CEPH_SETATTR_UID
) {
6823 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
6824 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6825 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6827 if (mask
& CEPH_SETATTR_GID
) {
6828 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
6829 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6830 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6832 if (mask
& CEPH_SETATTR_BTIME
) {
6833 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
6834 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6836 if (mask
& CEPH_SETATTR_MTIME
) {
6837 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
6838 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6841 if (mask
& CEPH_SETATTR_ATIME
) {
6842 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
6843 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
6846 if (mask
& CEPH_SETATTR_SIZE
) {
6847 if ((unsigned long)stx
->stx_size
< mdsmap
->get_max_filesize()) {
6848 req
->head
.args
.setattr
.size
= stx
->stx_size
;
6849 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
6852 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
6855 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6858 req
->head
.args
.setattr
.mask
= mask
;
6860 req
->regetattr_mask
= mask
;
6862 int res
= make_request(req
, perms
, inp
);
6863 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
6867 /* Note that we only care about attrs that setattr cares about */
6868 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
6870 stx
->stx_size
= st
->st_size
;
6871 stx
->stx_mode
= st
->st_mode
;
6872 stx
->stx_uid
= st
->st_uid
;
6873 stx
->stx_gid
= st
->st_gid
;
6874 stx
->stx_mtime
= st
->st_mtim
;
6875 stx
->stx_atime
= st
->st_atim
;
6878 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6879 const UserPerm
& perms
, InodeRef
*inp
)
6881 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
6884 if (mask
& CEPH_SETATTR_MODE
)
6885 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
6889 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
6890 const UserPerm
& perms
)
6892 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
6893 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
6894 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
6895 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
6896 if (cct
->_conf
->client_permissions
) {
6897 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
6901 return __setattrx(in
.get(), stx
, mask
, perms
);
6904 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
6905 const UserPerm
& perms
)
6907 struct ceph_statx stx
;
6909 stat_to_statx(attr
, &stx
);
6910 mask
&= ~CEPH_SETATTR_BTIME
;
6912 if ((mask
& CEPH_SETATTR_UID
) && attr
->st_uid
== static_cast<uid_t
>(-1)) {
6913 mask
&= ~CEPH_SETATTR_UID
;
6915 if ((mask
& CEPH_SETATTR_GID
) && attr
->st_gid
== static_cast<uid_t
>(-1)) {
6916 mask
&= ~CEPH_SETATTR_GID
;
6919 return _setattrx(in
, &stx
, mask
, perms
);
6922 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
6923 const UserPerm
& perms
)
6925 Mutex::Locker
lock(client_lock
);
6926 tout(cct
) << "setattr" << std::endl
;
6927 tout(cct
) << relpath
<< std::endl
;
6928 tout(cct
) << mask
<< std::endl
;
6933 filepath
path(relpath
);
6935 int r
= path_walk(path
, &in
, perms
);
6938 return _setattr(in
, attr
, mask
, perms
);
6941 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
6942 const UserPerm
& perms
, int flags
)
6944 Mutex::Locker
lock(client_lock
);
6945 tout(cct
) << "setattrx" << std::endl
;
6946 tout(cct
) << relpath
<< std::endl
;
6947 tout(cct
) << mask
<< std::endl
;
6952 filepath
path(relpath
);
6954 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
6957 return _setattrx(in
, stx
, mask
, perms
);
6960 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
6962 Mutex::Locker
lock(client_lock
);
6963 tout(cct
) << "fsetattr" << std::endl
;
6964 tout(cct
) << fd
<< std::endl
;
6965 tout(cct
) << mask
<< std::endl
;
6970 Fh
*f
= get_filehandle(fd
);
6973 #if defined(__linux__) && defined(O_PATH)
6974 if (f
->flags
& O_PATH
)
6977 return _setattr(f
->inode
, attr
, mask
, perms
);
6980 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
6982 Mutex::Locker
lock(client_lock
);
6983 tout(cct
) << "fsetattr" << std::endl
;
6984 tout(cct
) << fd
<< std::endl
;
6985 tout(cct
) << mask
<< std::endl
;
6990 Fh
*f
= get_filehandle(fd
);
6993 #if defined(__linux__) && defined(O_PATH)
6994 if (f
->flags
& O_PATH
)
6997 return _setattrx(f
->inode
, stx
, mask
, perms
);
7000 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
7001 frag_info_t
*dirstat
, int mask
)
7003 ldout(cct
, 3) << "stat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7004 Mutex::Locker
lock(client_lock
);
7005 tout(cct
) << "stat" << std::endl
;
7006 tout(cct
) << relpath
<< std::endl
;
7011 filepath
path(relpath
);
7013 int r
= path_walk(path
, &in
, perms
, true, mask
);
7016 r
= _getattr(in
, mask
, perms
);
7018 ldout(cct
, 3) << "stat exit on error!" << dendl
;
7021 fill_stat(in
, stbuf
, dirstat
);
7022 ldout(cct
, 3) << "stat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7026 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
7030 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
7031 if (flags
& AT_NO_ATTR_SYNC
)
7034 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
7035 mask
|= CEPH_CAP_PIN
;
7036 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7037 mask
|= CEPH_CAP_AUTH_SHARED
;
7038 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7039 mask
|= CEPH_CAP_LINK_SHARED
;
7040 if (want
& (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
7041 mask
|= CEPH_CAP_FILE_SHARED
;
7042 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
7043 mask
|= CEPH_CAP_XATTR_SHARED
;
7048 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
7049 const UserPerm
& perms
,
7050 unsigned int want
, unsigned int flags
)
7052 ldout(cct
, 3) << "statx enter (relpath " << relpath
<< " want " << want
<< ")" << dendl
;
7053 Mutex::Locker
lock(client_lock
);
7054 tout(cct
) << "statx" << std::endl
;
7055 tout(cct
) << relpath
<< std::endl
;
7060 filepath
path(relpath
);
7063 unsigned mask
= statx_to_mask(flags
, want
);
7065 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
7069 r
= _getattr(in
, mask
, perms
);
7071 ldout(cct
, 3) << "statx exit on error!" << dendl
;
7075 fill_statx(in
, mask
, stx
);
7076 ldout(cct
, 3) << "statx exit (relpath " << relpath
<< " mask " << stx
->stx_mask
<< ")" << dendl
;
7080 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
7081 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
7083 ldout(cct
, 3) << "lstat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7084 Mutex::Locker
lock(client_lock
);
7085 tout(cct
) << "lstat" << std::endl
;
7086 tout(cct
) << relpath
<< std::endl
;
7091 filepath
path(relpath
);
7093 // don't follow symlinks
7094 int r
= path_walk(path
, &in
, perms
, false, mask
);
7097 r
= _getattr(in
, mask
, perms
);
7099 ldout(cct
, 3) << "lstat exit on error!" << dendl
;
7102 fill_stat(in
, stbuf
, dirstat
);
7103 ldout(cct
, 3) << "lstat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7107 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7109 ldout(cct
, 10) << "fill_stat on " << in
->ino
<< " snap/dev" << in
->snapid
7110 << " mode 0" << oct
<< in
->mode
<< dec
7111 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7112 memset(st
, 0, sizeof(struct stat
));
7113 if (use_faked_inos())
7114 st
->st_ino
= in
->faked_ino
;
7116 st
->st_ino
= in
->ino
;
7117 st
->st_dev
= in
->snapid
;
7118 st
->st_mode
= in
->mode
;
7119 st
->st_rdev
= in
->rdev
;
7120 st
->st_nlink
= in
->nlink
;
7121 st
->st_uid
= in
->uid
;
7122 st
->st_gid
= in
->gid
;
7123 if (in
->ctime
> in
->mtime
) {
7124 stat_set_ctime_sec(st
, in
->ctime
.sec());
7125 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7127 stat_set_ctime_sec(st
, in
->mtime
.sec());
7128 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7130 stat_set_atime_sec(st
, in
->atime
.sec());
7131 stat_set_atime_nsec(st
, in
->atime
.nsec());
7132 stat_set_mtime_sec(st
, in
->mtime
.sec());
7133 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7135 if (cct
->_conf
->client_dirsize_rbytes
)
7136 st
->st_size
= in
->rstat
.rbytes
;
7138 st
->st_size
= in
->dirstat
.size();
7141 st
->st_size
= in
->size
;
7142 st
->st_blocks
= (in
->size
+ 511) >> 9;
7144 st
->st_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7147 *dirstat
= in
->dirstat
;
7151 return in
->caps_issued();
7154 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7156 ldout(cct
, 10) << "fill_statx on " << in
->ino
<< " snap/dev" << in
->snapid
7157 << " mode 0" << oct
<< in
->mode
<< dec
7158 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7159 memset(stx
, 0, sizeof(struct ceph_statx
));
7162 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7163 * so that all bits are set.
7168 /* These are always considered to be available */
7169 stx
->stx_dev
= in
->snapid
;
7170 stx
->stx_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7172 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7173 stx
->stx_mode
= S_IFMT
& in
->mode
;
7174 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7175 stx
->stx_rdev
= in
->rdev
;
7176 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7178 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7179 stx
->stx_uid
= in
->uid
;
7180 stx
->stx_gid
= in
->gid
;
7181 stx
->stx_mode
= in
->mode
;
7182 in
->btime
.to_timespec(&stx
->stx_btime
);
7183 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7186 if (mask
& CEPH_CAP_LINK_SHARED
) {
7187 stx
->stx_nlink
= in
->nlink
;
7188 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7191 if (mask
& CEPH_CAP_FILE_SHARED
) {
7193 in
->atime
.to_timespec(&stx
->stx_atime
);
7194 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7197 if (cct
->_conf
->client_dirsize_rbytes
)
7198 stx
->stx_size
= in
->rstat
.rbytes
;
7200 stx
->stx_size
= in
->dirstat
.size();
7201 stx
->stx_blocks
= 1;
7203 stx
->stx_size
= in
->size
;
7204 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
7206 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
7207 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
7210 /* Change time and change_attr both require all shared caps to view */
7211 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
7212 stx
->stx_version
= in
->change_attr
;
7213 if (in
->ctime
> in
->mtime
)
7214 in
->ctime
.to_timespec(&stx
->stx_ctime
);
7216 in
->mtime
.to_timespec(&stx
->stx_ctime
);
7217 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
7222 void Client::touch_dn(Dentry
*dn
)
7227 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7229 Mutex::Locker
lock(client_lock
);
7230 tout(cct
) << "chmod" << std::endl
;
7231 tout(cct
) << relpath
<< std::endl
;
7232 tout(cct
) << mode
<< std::endl
;
7237 filepath
path(relpath
);
7239 int r
= path_walk(path
, &in
, perms
);
7243 attr
.st_mode
= mode
;
7244 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7247 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
7249 Mutex::Locker
lock(client_lock
);
7250 tout(cct
) << "fchmod" << std::endl
;
7251 tout(cct
) << fd
<< std::endl
;
7252 tout(cct
) << mode
<< std::endl
;
7257 Fh
*f
= get_filehandle(fd
);
7260 #if defined(__linux__) && defined(O_PATH)
7261 if (f
->flags
& O_PATH
)
7265 attr
.st_mode
= mode
;
7266 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
7269 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7271 Mutex::Locker
lock(client_lock
);
7272 tout(cct
) << "lchmod" << std::endl
;
7273 tout(cct
) << relpath
<< std::endl
;
7274 tout(cct
) << mode
<< std::endl
;
7279 filepath
path(relpath
);
7281 // don't follow symlinks
7282 int r
= path_walk(path
, &in
, perms
, false);
7286 attr
.st_mode
= mode
;
7287 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7290 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7291 const UserPerm
& perms
)
7293 Mutex::Locker
lock(client_lock
);
7294 tout(cct
) << "chown" << std::endl
;
7295 tout(cct
) << relpath
<< std::endl
;
7296 tout(cct
) << new_uid
<< std::endl
;
7297 tout(cct
) << new_gid
<< std::endl
;
7302 filepath
path(relpath
);
7304 int r
= path_walk(path
, &in
, perms
);
7308 attr
.st_uid
= new_uid
;
7309 attr
.st_gid
= new_gid
;
7310 return _setattr(in
, &attr
, CEPH_SETATTR_UID
|CEPH_SETATTR_GID
, perms
);
7313 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
7315 Mutex::Locker
lock(client_lock
);
7316 tout(cct
) << "fchown" << std::endl
;
7317 tout(cct
) << fd
<< std::endl
;
7318 tout(cct
) << new_uid
<< std::endl
;
7319 tout(cct
) << new_gid
<< std::endl
;
7324 Fh
*f
= get_filehandle(fd
);
7327 #if defined(__linux__) && defined(O_PATH)
7328 if (f
->flags
& O_PATH
)
7332 attr
.st_uid
= new_uid
;
7333 attr
.st_gid
= new_gid
;
7335 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7336 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7337 return _setattr(f
->inode
, &attr
, mask
, perms
);
7340 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7341 const UserPerm
& perms
)
7343 Mutex::Locker
lock(client_lock
);
7344 tout(cct
) << "lchown" << std::endl
;
7345 tout(cct
) << relpath
<< std::endl
;
7346 tout(cct
) << new_uid
<< std::endl
;
7347 tout(cct
) << new_gid
<< std::endl
;
7352 filepath
path(relpath
);
7354 // don't follow symlinks
7355 int r
= path_walk(path
, &in
, perms
, false);
7359 attr
.st_uid
= new_uid
;
7360 attr
.st_gid
= new_gid
;
7362 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7363 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7364 return _setattr(in
, &attr
, mask
, perms
);
7367 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
7368 const UserPerm
& perms
)
7370 Mutex::Locker
lock(client_lock
);
7371 tout(cct
) << "utime" << std::endl
;
7372 tout(cct
) << relpath
<< std::endl
;
7373 tout(cct
) << buf
->modtime
<< std::endl
;
7374 tout(cct
) << buf
->actime
<< std::endl
;
7379 filepath
path(relpath
);
7381 int r
= path_walk(path
, &in
, perms
);
7385 stat_set_mtime_sec(&attr
, buf
->modtime
);
7386 stat_set_mtime_nsec(&attr
, 0);
7387 stat_set_atime_sec(&attr
, buf
->actime
);
7388 stat_set_atime_nsec(&attr
, 0);
7389 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7392 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
7393 const UserPerm
& perms
)
7395 Mutex::Locker
lock(client_lock
);
7396 tout(cct
) << "lutime" << std::endl
;
7397 tout(cct
) << relpath
<< std::endl
;
7398 tout(cct
) << buf
->modtime
<< std::endl
;
7399 tout(cct
) << buf
->actime
<< std::endl
;
7404 filepath
path(relpath
);
7406 // don't follow symlinks
7407 int r
= path_walk(path
, &in
, perms
, false);
7411 stat_set_mtime_sec(&attr
, buf
->modtime
);
7412 stat_set_mtime_nsec(&attr
, 0);
7413 stat_set_atime_sec(&attr
, buf
->actime
);
7414 stat_set_atime_nsec(&attr
, 0);
7415 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7418 int Client::flock(int fd
, int operation
, uint64_t owner
)
7420 Mutex::Locker
lock(client_lock
);
7421 tout(cct
) << "flock" << std::endl
;
7422 tout(cct
) << fd
<< std::endl
;
7423 tout(cct
) << operation
<< std::endl
;
7424 tout(cct
) << owner
<< std::endl
;
7429 Fh
*f
= get_filehandle(fd
);
7433 return _flock(f
, operation
, owner
);
7436 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7438 Mutex::Locker
lock(client_lock
);
7439 tout(cct
) << "opendir" << std::endl
;
7440 tout(cct
) << relpath
<< std::endl
;
7445 filepath
path(relpath
);
7447 int r
= path_walk(path
, &in
, perms
, true);
7450 if (cct
->_conf
->client_permissions
) {
7451 int r
= may_open(in
.get(), O_RDONLY
, perms
);
7455 r
= _opendir(in
.get(), dirpp
, perms
);
7456 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
7458 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
7462 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7466 *dirpp
= new dir_result_t(in
, perms
);
7467 opened_dirs
.insert(*dirpp
);
7468 ldout(cct
, 3) << "_opendir(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
7473 int Client::closedir(dir_result_t
*dir
)
7475 Mutex::Locker
lock(client_lock
);
7476 tout(cct
) << "closedir" << std::endl
;
7477 tout(cct
) << (unsigned long)dir
<< std::endl
;
7479 ldout(cct
, 3) << "closedir(" << dir
<< ") = 0" << dendl
;
7484 void Client::_closedir(dir_result_t
*dirp
)
7486 ldout(cct
, 10) << "_closedir(" << dirp
<< ")" << dendl
;
7488 ldout(cct
, 10) << "_closedir detaching inode " << dirp
->inode
<< dendl
;
7489 dirp
->inode
.reset();
7491 _readdir_drop_dirp_buffer(dirp
);
7492 opened_dirs
.erase(dirp
);
7496 void Client::rewinddir(dir_result_t
*dirp
)
7498 Mutex::Locker
lock(client_lock
);
7499 ldout(cct
, 3) << "rewinddir(" << dirp
<< ")" << dendl
;
7504 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7505 _readdir_drop_dirp_buffer(d
);
7509 loff_t
Client::telldir(dir_result_t
*dirp
)
7511 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7512 ldout(cct
, 3) << "telldir(" << dirp
<< ") = " << d
->offset
<< dendl
;
7516 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
7518 Mutex::Locker
lock(client_lock
);
7520 ldout(cct
, 3) << "seekdir(" << dirp
<< ", " << offset
<< ")" << dendl
;
7525 if (offset
== dirp
->offset
)
7528 if (offset
> dirp
->offset
)
7529 dirp
->release_count
= 0; // bump if we do a forward seek
7531 dirp
->ordered_count
= 0; // disable filling readdir cache
7533 if (dirp
->hash_order()) {
7534 if (dirp
->offset
> offset
) {
7535 _readdir_drop_dirp_buffer(dirp
);
7540 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
7541 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
7542 _readdir_drop_dirp_buffer(dirp
);
7547 dirp
->offset
= offset
;
7552 // ino_t d_ino; /* inode number */
7553 // off_t d_off; /* offset to the next dirent */
7554 // unsigned short d_reclen; /* length of this record */
7555 // unsigned char d_type; /* type of file */
7556 // char d_name[256]; /* filename */
7558 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
7560 strncpy(de
->d_name
, name
, 255);
7561 de
->d_name
[255] = '\0';
7564 #if !defined(DARWIN) && !defined(__FreeBSD__)
7565 de
->d_off
= next_off
;
7568 de
->d_type
= IFTODT(type
);
7569 ldout(cct
, 10) << "fill_dirent '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
7570 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
7574 void Client::_readdir_next_frag(dir_result_t
*dirp
)
7576 frag_t fg
= dirp
->buffer_frag
;
7578 if (fg
.is_rightmost()) {
7579 ldout(cct
, 10) << "_readdir_next_frag advance from " << fg
<< " to END" << dendl
;
7586 ldout(cct
, 10) << "_readdir_next_frag advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
7588 if (dirp
->hash_order()) {
7590 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
7591 if (dirp
->offset
< new_offset
) // don't decrease offset
7592 dirp
->offset
= new_offset
;
7594 dirp
->last_name
.clear();
7595 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7596 _readdir_rechoose_frag(dirp
);
7600 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
7602 assert(dirp
->inode
);
7604 if (dirp
->hash_order())
7607 frag_t cur
= frag_t(dirp
->offset_high());
7608 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
7610 ldout(cct
, 10) << "_readdir_rechoose_frag frag " << cur
<< " maps to " << fg
<< dendl
;
7611 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7612 dirp
->last_name
.clear();
7613 dirp
->next_offset
= 2;
7617 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
7619 ldout(cct
, 10) << "_readdir_drop_dirp_buffer " << dirp
<< dendl
;
7620 dirp
->buffer
.clear();
7623 int Client::_readdir_get_frag(dir_result_t
*dirp
)
7626 assert(dirp
->inode
);
7628 // get the current frag.
7630 if (dirp
->hash_order())
7631 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
7633 fg
= frag_t(dirp
->offset_high());
7635 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
7636 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
7638 int op
= CEPH_MDS_OP_READDIR
;
7639 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
7640 op
= CEPH_MDS_OP_LSSNAP
;
7642 InodeRef
& diri
= dirp
->inode
;
7644 MetaRequest
*req
= new MetaRequest(op
);
7646 diri
->make_nosnap_relative_path(path
);
7647 req
->set_filepath(path
);
7648 req
->set_inode(diri
.get());
7649 req
->head
.args
.readdir
.frag
= fg
;
7650 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
7651 if (dirp
->last_name
.length()) {
7652 req
->path2
.set_path(dirp
->last_name
);
7653 } else if (dirp
->hash_order()) {
7654 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
7659 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
7661 if (res
== -EAGAIN
) {
7662 ldout(cct
, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl
;
7663 _readdir_rechoose_frag(dirp
);
7664 return _readdir_get_frag(dirp
);
7668 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " got frag " << dirp
->buffer_frag
7669 << " size " << dirp
->buffer
.size() << dendl
;
7671 ldout(cct
, 10) << "_readdir_get_frag got error " << res
<< ", setting end flag" << dendl
;
7678 struct dentry_off_lt
{
7679 bool operator()(const Dentry
* dn
, int64_t off
) const {
7680 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
7684 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
7685 int caps
, bool getref
)
7687 assert(client_lock
.is_locked());
7688 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
7689 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
7691 Dir
*dir
= dirp
->inode
->dir
;
7694 ldout(cct
, 10) << " dir is empty" << dendl
;
7699 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
7700 dir
->readdir_cache
.end(),
7701 dirp
->offset
, dentry_off_lt());
7705 if (!dirp
->inode
->is_complete_and_ordered())
7707 if (pd
== dir
->readdir_cache
.end())
7710 if (dn
->inode
== NULL
) {
7711 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
7715 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
7716 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
7721 int r
= _getattr(dn
->inode
, caps
, dirp
->perms
);
7725 struct ceph_statx stx
;
7727 fill_statx(dn
->inode
, caps
, &stx
);
7729 uint64_t next_off
= dn
->offset
+ 1;
7731 if (pd
== dir
->readdir_cache
.end())
7732 next_off
= dir_result_t::END
;
7735 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7737 in
= dn
->inode
.get();
7741 dn_name
= dn
->name
; // fill in name while we have lock
7743 client_lock
.Unlock();
7744 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
7746 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
7747 << " = " << r
<< dendl
;
7752 dirp
->offset
= next_off
;
7754 dirp
->next_offset
= 2;
7756 dirp
->next_offset
= dirp
->offset_low();
7757 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
7762 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
7767 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
7768 unsigned want
, unsigned flags
, bool getref
)
7770 int caps
= statx_to_mask(flags
, want
);
7772 Mutex::Locker
lock(client_lock
);
7777 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
7779 ldout(cct
, 10) << "readdir_r_cb " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
7780 << dec
<< " at_end=" << dirp
->at_end()
7781 << " hash_order=" << dirp
->hash_order() << dendl
;
7784 struct ceph_statx stx
;
7785 memset(&de
, 0, sizeof(de
));
7786 memset(&stx
, 0, sizeof(stx
));
7788 InodeRef
& diri
= dirp
->inode
;
7793 if (dirp
->offset
== 0) {
7794 ldout(cct
, 15) << " including ." << dendl
;
7795 assert(diri
->dn_set
.size() < 2); // can't have multiple hard-links to a dir
7796 uint64_t next_off
= 1;
7799 r
= _getattr(diri
, caps
, dirp
->perms
);
7803 fill_statx(diri
, caps
, &stx
);
7804 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
7806 Inode
*inode
= NULL
;
7812 client_lock
.Unlock();
7813 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7818 dirp
->offset
= next_off
;
7822 if (dirp
->offset
== 1) {
7823 ldout(cct
, 15) << " including .." << dendl
;
7824 uint64_t next_off
= 2;
7826 if (diri
->dn_set
.empty())
7829 in
= diri
->get_first_parent()->dir
->parent_inode
;
7832 r
= _getattr(in
, caps
, dirp
->perms
);
7836 fill_statx(in
, caps
, &stx
);
7837 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
7839 Inode
*inode
= NULL
;
7845 client_lock
.Unlock();
7846 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7851 dirp
->offset
= next_off
;
7856 // can we read from our cache?
7857 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
7858 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
7859 << dirp
->inode
->is_complete_and_ordered()
7860 << " issued " << ccap_string(dirp
->inode
->caps_issued())
7862 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
7863 dirp
->inode
->is_complete_and_ordered() &&
7864 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
7865 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
7874 bool check_caps
= true;
7875 if (!dirp
->is_cached()) {
7876 int r
= _readdir_get_frag(dirp
);
7879 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
7880 // different than the requested one. (our dirfragtree was outdated)
7883 frag_t fg
= dirp
->buffer_frag
;
7885 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
7886 << " offset " << hex
<< dirp
->offset
<< dendl
;
7888 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
7889 dirp
->offset
, dir_result_t::dentry_off_lt());
7890 it
!= dirp
->buffer
.end();
7892 dir_result_t::dentry
&entry
= *it
;
7894 uint64_t next_off
= entry
.offset
+ 1;
7898 r
= _getattr(entry
.inode
, caps
, dirp
->perms
);
7903 fill_statx(entry
.inode
, caps
, &stx
);
7904 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7906 Inode
*inode
= NULL
;
7908 inode
= entry
.inode
.get();
7912 client_lock
.Unlock();
7913 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
7916 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
7917 << " = " << r
<< dendl
;
7921 dirp
->offset
= next_off
;
7926 if (dirp
->next_offset
> 2) {
7927 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
7928 _readdir_drop_dirp_buffer(dirp
);
7932 if (!fg
.is_rightmost()) {
7934 _readdir_next_frag(dirp
);
7938 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
7939 diri
->dir_release_count
== dirp
->release_count
) {
7940 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
7941 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
7943 assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
7944 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
7946 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
7948 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
7949 diri
->flags
|= I_COMPLETE
;
7961 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
7963 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
7970 * 1 if we got a dirent
7971 * 0 for end of directory
7975 struct single_readdir
{
7977 struct ceph_statx
*stx
;
7982 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
7983 struct ceph_statx
*stx
, off_t off
,
7986 single_readdir
*c
= static_cast<single_readdir
*>(p
);
7989 return -1; // already filled this dirent
7999 struct dirent
*Client::readdir(dir_result_t
*d
)
8002 static struct dirent de
;
8009 // our callback fills the dirent and sets sr.full=true on first
8010 // call, and returns -1 the second time around.
8011 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
8013 errno
= -ret
; // this sucks.
8014 return (dirent
*) NULL
;
8019 return (dirent
*) NULL
;
8022 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
8023 struct ceph_statx
*stx
, unsigned want
,
8024 unsigned flags
, Inode
**out
)
8032 // our callback fills the dirent and sets sr.full=true on first
8033 // call, and returns -1 the second time around.
8034 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
8046 struct getdents_result
{
8053 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
8054 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8056 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
8062 dlen
= strlen(de
->d_name
) + 1;
8064 if (c
->pos
+ dlen
> c
->buflen
)
8065 return -1; // doesn't fit
8068 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
8070 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
8076 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
8081 gr
.fullent
= fullent
;
8084 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
8086 if (r
< 0) { // some error
8087 if (r
== -1) { // buffer ran out of space
8088 if (gr
.pos
) { // but we got some entries already!
8090 } // or we need a larger buffer
8092 } else { // actual error, return it
8101 struct getdir_result
{
8102 list
<string
> *contents
;
8106 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8108 getdir_result
*r
= static_cast<getdir_result
*>(p
);
8110 r
->contents
->push_back(de
->d_name
);
8115 int Client::getdir(const char *relpath
, list
<string
>& contents
,
8116 const UserPerm
& perms
)
8118 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
8120 Mutex::Locker
lock(client_lock
);
8121 tout(cct
) << "getdir" << std::endl
;
8122 tout(cct
) << relpath
<< std::endl
;
8126 int r
= opendir(relpath
, &d
, perms
);
8131 gr
.contents
= &contents
;
8133 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
8143 /****** file i/o **********/
8144 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
8145 mode_t mode
, int stripe_unit
, int stripe_count
,
8146 int object_size
, const char *data_pool
)
8148 ldout(cct
, 3) << "open enter(" << relpath
<< ", " << ceph_flags_sys2wire(flags
) << "," << mode
<< ")" << dendl
;
8149 Mutex::Locker
lock(client_lock
);
8150 tout(cct
) << "open" << std::endl
;
8151 tout(cct
) << relpath
<< std::endl
;
8152 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
8159 #if defined(__linux__) && defined(O_PATH)
8160 /* When the O_PATH is being specified, others flags than O_DIRECTORY
8161 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
8162 * in kernel (fs/open.c). */
8164 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
8167 filepath
path(relpath
);
8169 bool created
= false;
8170 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
8171 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
8172 int r
= path_walk(path
, &in
, perms
, followsym
, ceph_caps_for_mode(mode
));
8174 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
8177 #if defined(__linux__) && defined(O_PATH)
8178 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
8180 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
8184 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
8185 filepath dirpath
= path
;
8186 string dname
= dirpath
.last_dentry();
8187 dirpath
.pop_dentry();
8189 r
= path_walk(dirpath
, &dir
, perms
, true,
8190 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0);
8193 if (cct
->_conf
->client_permissions
) {
8194 r
= may_create(dir
.get(), perms
);
8198 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
8199 stripe_count
, object_size
, data_pool
, &created
, perms
);
8205 // posix says we can only check permissions of existing files
8206 if (cct
->_conf
->client_permissions
) {
8207 r
= may_open(in
.get(), flags
, perms
);
8214 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
8216 // allocate a integer file descriptor
8219 assert(fd_map
.count(r
) == 0);
8224 tout(cct
) << r
<< std::endl
;
8225 ldout(cct
, 3) << "open exit(" << path
<< ", " << ceph_flags_sys2wire(flags
) << ") = " << r
<< dendl
;
8229 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
, mode_t mode
)
8231 /* Use default file striping parameters */
8232 return open(relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
);
8235 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
8236 const UserPerm
& perms
)
8238 Mutex::Locker
lock(client_lock
);
8239 ldout(cct
, 3) << "lookup_hash enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
8244 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
8246 req
->set_filepath(path
);
8248 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
8250 sprintf(f
, "%u", h
);
8251 filepath
path2(dirino
);
8252 path2
.push_dentry(string(f
));
8253 req
->set_filepath2(path2
);
8255 int r
= make_request(req
, perms
, NULL
, NULL
,
8256 rand() % mdsmap
->get_num_in_mds());
8257 ldout(cct
, 3) << "lookup_hash exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
8263 * Load inode into local cache.
8265 * If inode pointer is non-NULL, and take a reference on
8266 * the resulting Inode object in one operation, so that caller
8267 * can safely assume inode will still be there after return.
8269 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8271 Mutex::Locker
lock(client_lock
);
8272 ldout(cct
, 3) << "lookup_ino enter(" << ino
<< ")" << dendl
;
8277 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
8279 req
->set_filepath(path
);
8281 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8282 if (r
== 0 && inode
!= NULL
) {
8283 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
8284 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
8285 assert(p
!= inode_map
.end());
8289 ldout(cct
, 3) << "lookup_ino exit(" << ino
<< ") = " << r
<< dendl
;
8296 * Find the parent inode of `ino` and insert it into
8297 * our cache. Conditionally also set `parent` to a referenced
8298 * Inode* if caller provides non-NULL value.
8300 int Client::lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8302 Mutex::Locker
lock(client_lock
);
8303 ldout(cct
, 3) << "lookup_parent enter(" << ino
->ino
<< ")" << dendl
;
8308 if (!ino
->dn_set
.empty()) {
8309 // if we exposed the parent here, we'd need to check permissions,
8310 // but right now we just rely on the MDS doing so in make_request
8311 ldout(cct
, 3) << "lookup_parent dentry already present" << dendl
;
8315 if (ino
->is_root()) {
8317 ldout(cct
, 3) << "ino is root, no parent" << dendl
;
8321 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
8322 filepath
path(ino
->ino
);
8323 req
->set_filepath(path
);
8326 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
8327 // Give caller a reference to the parent ino if they provided a pointer.
8328 if (parent
!= NULL
) {
8330 *parent
= target
.get();
8332 ldout(cct
, 3) << "lookup_parent found parent " << (*parent
)->ino
<< dendl
;
8337 ldout(cct
, 3) << "lookup_parent exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8343 * Populate the parent dentry for `ino`, provided it is
8344 * a child of `parent`.
8346 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8348 assert(parent
->is_dir());
8350 Mutex::Locker
lock(client_lock
);
8351 ldout(cct
, 3) << "lookup_name enter(" << ino
->ino
<< ")" << dendl
;
8356 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
8357 req
->set_filepath2(filepath(parent
->ino
));
8358 req
->set_filepath(filepath(ino
->ino
));
8359 req
->set_inode(ino
);
8361 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8362 ldout(cct
, 3) << "lookup_name exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8367 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
8375 f
->actor_perms
= perms
;
8377 ldout(cct
, 10) << "_create_fh " << in
->ino
<< " mode " << cmode
<< dendl
;
8379 if (in
->snapid
!= CEPH_NOSNAP
) {
8380 in
->snap_cap_refs
++;
8381 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
8382 << ccap_string(in
->caps_issued()) << dendl
;
8385 const md_config_t
*conf
= cct
->_conf
;
8386 f
->readahead
.set_trigger_requests(1);
8387 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
8388 uint64_t max_readahead
= Readahead::NO_LIMIT
;
8389 if (conf
->client_readahead_max_bytes
) {
8390 max_readahead
= MIN(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
8392 if (conf
->client_readahead_max_periods
) {
8393 max_readahead
= MIN(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
8395 f
->readahead
.set_max_readahead_size(max_readahead
);
8396 vector
<uint64_t> alignments
;
8397 alignments
.push_back(in
->layout
.get_period());
8398 alignments
.push_back(in
->layout
.stripe_unit
);
8399 f
->readahead
.set_alignments(alignments
);
8404 int Client::_release_fh(Fh
*f
)
8406 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
8407 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
8408 Inode
*in
= f
->inode
.get();
8409 ldout(cct
, 5) << "_release_fh " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
8413 if (in
->snapid
== CEPH_NOSNAP
) {
8414 if (in
->put_open_ref(f
->mode
)) {
8415 _flush(in
, new C_Client_FlushComplete(this, in
));
8419 assert(in
->snap_cap_refs
> 0);
8420 in
->snap_cap_refs
--;
8423 _release_filelocks(f
);
8425 // Finally, read any async err (i.e. from flushes)
8426 int err
= f
->take_async_err();
8428 ldout(cct
, 1) << "_release_fh " << f
<< " on inode " << *in
<< " caught async_err = "
8429 << cpp_strerror(err
) << dendl
;
8431 ldout(cct
, 10) << "_release_fh " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
8439 void Client::_put_fh(Fh
*f
)
8441 int left
= f
->put();
8447 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
8448 const UserPerm
& perms
)
8450 if (in
->snapid
!= CEPH_NOSNAP
&&
8451 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
8455 // use normalized flags to generate cmode
8456 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
8459 int want
= ceph_caps_for_mode(cmode
);
8462 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
8464 if ((flags
& O_TRUNC
) == 0 && in
->caps_issued_mask(want
)) {
8466 check_caps(in
, CHECK_CAPS_NODELAY
);
8469 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8471 in
->make_nosnap_relative_path(path
);
8472 req
->set_filepath(path
);
8473 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
& ~O_CREAT
);
8474 req
->head
.args
.open
.mode
= mode
;
8475 req
->head
.args
.open
.pool
= -1;
8476 if (cct
->_conf
->client_debug_getattr_caps
)
8477 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8479 req
->head
.args
.open
.mask
= 0;
8480 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
8482 result
= make_request(req
, perms
);
8485 * NFS expects that delegations will be broken on a conflicting open,
8486 * not just when there is actual conflicting access to the file. SMB leases
8487 * and oplocks also have similar semantics.
8489 * Ensure that clients that have delegations enabled will wait on minimal
8490 * caps during open, just to ensure that other clients holding delegations
8491 * return theirs first.
8493 if (deleg_timeout
&& result
== 0) {
8496 if (cmode
& CEPH_FILE_MODE_WR
)
8497 need
|= CEPH_CAP_FILE_WR
;
8498 if (cmode
& CEPH_FILE_MODE_RD
)
8499 need
|= CEPH_CAP_FILE_RD
;
8501 result
= get_caps(in
, need
, want
, &have
, -1);
8503 ldout(cct
, 1) << "Unable to get caps after open of inode " << *in
<<
8504 " . Denying open: " <<
8505 cpp_strerror(result
) << dendl
;
8506 in
->put_open_ref(cmode
);
8508 put_cap_ref(in
, need
);
8516 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
8518 in
->put_open_ref(cmode
);
8526 int Client::_renew_caps(Inode
*in
)
8528 int wanted
= in
->caps_file_wanted();
8529 if (in
->is_any_caps() &&
8530 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
8531 check_caps(in
, CHECK_CAPS_NODELAY
);
8536 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
8538 else if (wanted
& CEPH_CAP_FILE_RD
)
8540 else if (wanted
& CEPH_CAP_FILE_WR
)
8543 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8545 in
->make_nosnap_relative_path(path
);
8546 req
->set_filepath(path
);
8547 req
->head
.args
.open
.flags
= flags
;
8548 req
->head
.args
.open
.pool
= -1;
8549 if (cct
->_conf
->client_debug_getattr_caps
)
8550 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8552 req
->head
.args
.open
.mask
= 0;
8555 // duplicate in case Cap goes away; not sure if that race is a concern?
8556 const UserPerm
*pperm
= in
->get_best_perms();
8560 int ret
= make_request(req
, perms
);
8564 int Client::close(int fd
)
8566 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
8567 Mutex::Locker
lock(client_lock
);
8568 tout(cct
) << "close" << std::endl
;
8569 tout(cct
) << fd
<< std::endl
;
8574 Fh
*fh
= get_filehandle(fd
);
8577 int err
= _release_fh(fh
);
8580 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
8588 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
8590 Mutex::Locker
lock(client_lock
);
8591 tout(cct
) << "lseek" << std::endl
;
8592 tout(cct
) << fd
<< std::endl
;
8593 tout(cct
) << offset
<< std::endl
;
8594 tout(cct
) << whence
<< std::endl
;
8599 Fh
*f
= get_filehandle(fd
);
8602 #if defined(__linux__) && defined(O_PATH)
8603 if (f
->flags
& O_PATH
)
8606 return _lseek(f
, offset
, whence
);
8609 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
8611 Inode
*in
= f
->inode
.get();
8624 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8627 f
->pos
= in
->size
+ offset
;
8634 ldout(cct
, 3) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
8639 void Client::lock_fh_pos(Fh
*f
)
8641 ldout(cct
, 10) << "lock_fh_pos " << f
<< dendl
;
8643 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
8645 f
->pos_waiters
.push_back(&cond
);
8646 ldout(cct
, 10) << "lock_fh_pos BLOCKING on " << f
<< dendl
;
8647 while (f
->pos_locked
|| f
->pos_waiters
.front() != &cond
)
8648 cond
.Wait(client_lock
);
8649 ldout(cct
, 10) << "lock_fh_pos UNBLOCKING on " << f
<< dendl
;
8650 assert(f
->pos_waiters
.front() == &cond
);
8651 f
->pos_waiters
.pop_front();
8654 f
->pos_locked
= true;
8657 void Client::unlock_fh_pos(Fh
*f
)
8659 ldout(cct
, 10) << "unlock_fh_pos " << f
<< dendl
;
8660 f
->pos_locked
= false;
8663 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
8665 if (!in
->inline_data
.length()) {
8666 onfinish
->complete(0);
8671 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
8672 object_t oid
= oid_buf
;
8674 ObjectOperation create_ops
;
8675 create_ops
.create(false);
8677 objecter
->mutate(oid
,
8678 OSDMap::file_to_object_locator(in
->layout
),
8680 in
->snaprealm
->get_snap_context(),
8681 ceph::real_clock::now(),
8685 bufferlist inline_version_bl
;
8686 ::encode(in
->inline_version
, inline_version_bl
);
8688 ObjectOperation uninline_ops
;
8689 uninline_ops
.cmpxattr("inline_version",
8690 CEPH_OSD_CMPXATTR_OP_GT
,
8691 CEPH_OSD_CMPXATTR_MODE_U64
,
8693 bufferlist inline_data
= in
->inline_data
;
8694 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
8695 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
8697 objecter
->mutate(oid
,
8698 OSDMap::file_to_object_locator(in
->layout
),
8700 in
->snaprealm
->get_snap_context(),
8701 ceph::real_clock::now(),
8710 // blocking osd interface
8712 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
8714 Mutex::Locker
lock(client_lock
);
8715 tout(cct
) << "read" << std::endl
;
8716 tout(cct
) << fd
<< std::endl
;
8717 tout(cct
) << size
<< std::endl
;
8718 tout(cct
) << offset
<< std::endl
;
8723 Fh
*f
= get_filehandle(fd
);
8726 #if defined(__linux__) && defined(O_PATH)
8727 if (f
->flags
& O_PATH
)
8731 int r
= _read(f
, offset
, size
, &bl
);
8732 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8734 bl
.copy(0, bl
.length(), buf
);
8740 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
8744 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
8747 int Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
8749 const md_config_t
*conf
= cct
->_conf
;
8750 Inode
*in
= f
->inode
.get();
8752 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
8754 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8756 bool movepos
= false;
8762 loff_t start_pos
= offset
;
8764 if (in
->inline_version
== 0) {
8765 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8771 assert(in
->inline_version
> 0);
8776 int r
= get_caps(in
, CEPH_CAP_FILE_RD
, CEPH_CAP_FILE_CACHE
, &have
, -1);
8782 if (f
->flags
& O_DIRECT
)
8783 have
&= ~CEPH_CAP_FILE_CACHE
;
8785 Mutex
uninline_flock("Client::_read_uninline_data flock");
8787 bool uninline_done
= false;
8788 int uninline_ret
= 0;
8789 Context
*onuninline
= NULL
;
8791 if (in
->inline_version
< CEPH_INLINE_NONE
) {
8792 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
8793 onuninline
= new C_SafeCond(&uninline_flock
,
8797 uninline_data(in
, onuninline
);
8799 uint32_t len
= in
->inline_data
.length();
8801 uint64_t endoff
= offset
+ size
;
8802 if (endoff
> in
->size
)
8806 if (endoff
<= len
) {
8807 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
8809 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
8810 bl
->append_zero(endoff
- len
);
8812 } else if ((uint64_t)offset
< endoff
) {
8813 bl
->append_zero(endoff
- offset
);
8820 if (!conf
->client_debug_force_sync_read
&&
8821 (conf
->client_oc
&& (have
& CEPH_CAP_FILE_CACHE
))) {
8823 if (f
->flags
& O_RSYNC
) {
8824 _flush_range(in
, offset
, size
);
8826 r
= _read_async(f
, offset
, size
, bl
);
8830 if (f
->flags
& O_DIRECT
)
8831 _flush_range(in
, offset
, size
);
8833 bool checkeof
= false;
8834 r
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
8841 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8844 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8849 if ((uint64_t)offset
< in
->size
)
8857 f
->pos
= start_pos
+ bl
->length();
8865 client_lock
.Unlock();
8866 uninline_flock
.Lock();
8867 while (!uninline_done
)
8868 uninline_cond
.Wait(uninline_flock
);
8869 uninline_flock
.Unlock();
8872 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
8873 in
->inline_data
.clear();
8874 in
->inline_version
= CEPH_INLINE_NONE
;
8875 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
8882 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8888 return bl
->length();
8891 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
8894 f
->readahead
.inc_pending();
8897 Client::C_Readahead::~C_Readahead() {
8898 f
->readahead
.dec_pending();
8902 void Client::C_Readahead::finish(int r
) {
8903 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
8904 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8907 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
8909 const md_config_t
*conf
= cct
->_conf
;
8910 Inode
*in
= f
->inode
.get();
8912 ldout(cct
, 10) << "_read_async " << *in
<< " " << off
<< "~" << len
<< dendl
;
8914 // trim read based on file size?
8915 if (off
>= in
->size
)
8919 if (off
+ len
> in
->size
) {
8920 len
= in
->size
- off
;
8923 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
8924 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
8925 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
8927 // read (and possibly block)
8929 Mutex
flock("Client::_read_async flock");
8932 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &rvalue
);
8933 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8934 off
, len
, bl
, 0, onfinish
);
8936 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8937 client_lock
.Unlock();
8943 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8950 if(f
->readahead
.get_min_readahead_size() > 0) {
8951 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
8952 if (readahead_extent
.second
> 0) {
8953 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
8954 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
8955 Context
*onfinish2
= new C_Readahead(this, f
);
8956 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8957 readahead_extent
.first
, readahead_extent
.second
,
8958 NULL
, 0, onfinish2
);
8960 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
8961 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8963 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
8972 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
8975 Inode
*in
= f
->inode
.get();
8980 ldout(cct
, 10) << "_read_sync " << *in
<< " " << off
<< "~" << len
<< dendl
;
8982 Mutex
flock("Client::_read_sync flock");
8987 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
8991 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
8993 in
->truncate_size
, in
->truncate_seq
,
8995 client_lock
.Unlock();
9002 // if we get ENOENT from OSD, assume 0 bytes returned
9013 bl
->claim_append(tbl
);
9016 if (r
>= 0 && r
< wanted
) {
9017 if (pos
< in
->size
) {
9018 // zero up to known EOF
9019 int64_t some
= in
->size
- pos
;
9041 * we keep count of uncommitted sync writes on the inode, so that
9044 void Client::_sync_write_commit(Inode
*in
)
9046 assert(unsafe_sync_write
> 0);
9047 unsafe_sync_write
--;
9049 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9051 ldout(cct
, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write
<< dendl
;
9052 if (unsafe_sync_write
== 0 && unmounting
) {
9053 ldout(cct
, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl
;
9054 mount_cond
.Signal();
9058 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
9060 Mutex::Locker
lock(client_lock
);
9061 tout(cct
) << "write" << std::endl
;
9062 tout(cct
) << fd
<< std::endl
;
9063 tout(cct
) << size
<< std::endl
;
9064 tout(cct
) << offset
<< std::endl
;
9069 Fh
*fh
= get_filehandle(fd
);
9072 #if defined(__linux__) && defined(O_PATH)
9073 if (fh
->flags
& O_PATH
)
9076 int r
= _write(fh
, offset
, size
, buf
, NULL
, 0);
9077 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
9081 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
9085 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
9088 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
9090 Mutex::Locker
lock(client_lock
);
9091 tout(cct
) << fd
<< std::endl
;
9092 tout(cct
) << offset
<< std::endl
;
9097 Fh
*fh
= get_filehandle(fd
);
9100 #if defined(__linux__) && defined(O_PATH)
9101 if (fh
->flags
& O_PATH
)
9104 loff_t totallen
= 0;
9105 for (unsigned i
= 0; i
< iovcnt
; i
++) {
9106 totallen
+= iov
[i
].iov_len
;
9109 int w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
9110 ldout(cct
, 3) << "pwritev(" << fd
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
9114 int r
= _read(fh
, offset
, totallen
, &bl
);
9115 ldout(cct
, 3) << "preadv(" << fd
<< ", " << offset
<< ") = " << r
<< dendl
;
9120 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
9122 * This piece of code aims to handle the case that bufferlist does not have enough data
9123 * to fill in the iov
9125 if (resid
< iov
[j
].iov_len
) {
9126 bl
.copy(bufoff
, resid
, (char *)iov
[j
].iov_base
);
9129 bl
.copy(bufoff
, iov
[j
].iov_len
, (char *)iov
[j
].iov_base
);
9131 resid
-= iov
[j
].iov_len
;
9132 bufoff
+= iov
[j
].iov_len
;
9138 int Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
9139 const struct iovec
*iov
, int iovcnt
)
9141 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
9144 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
9145 Inode
*in
= f
->inode
.get();
9147 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
9151 assert(in
->snapid
== CEPH_NOSNAP
);
9153 // was Fh opened as writeable?
9154 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
9158 uint64_t endoff
= offset
+ size
;
9159 if (endoff
> in
->size
&& is_quota_bytes_exceeded(in
, endoff
- in
->size
,
9164 // use/adjust fd pos?
9168 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
9169 * change out from under us.
9171 if (f
->flags
& O_APPEND
) {
9172 int r
= _lseek(f
, 0, SEEK_END
);
9179 f
->pos
= offset
+size
;
9183 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
9185 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
9188 utime_t start
= ceph_clock_now();
9190 if (in
->inline_version
== 0) {
9191 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
9194 assert(in
->inline_version
> 0);
9197 // copy into fresh buffer (since our write may be resub, async)
9201 bl
.append(buf
, size
);
9203 for (int i
= 0; i
< iovcnt
; i
++) {
9204 if (iov
[i
].iov_len
> 0) {
9205 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
9211 uint64_t totalwritten
;
9213 int r
= get_caps(in
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
,
9214 CEPH_CAP_FILE_BUFFER
, &have
, endoff
);
9218 /* clear the setuid/setgid bits, if any */
9219 if (unlikely(in
->mode
& (S_ISUID
|S_ISGID
)) && size
> 0) {
9220 struct ceph_statx stx
= { 0 };
9222 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9223 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
9227 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9230 if (f
->flags
& O_DIRECT
)
9231 have
&= ~CEPH_CAP_FILE_BUFFER
;
9233 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
9235 Mutex
uninline_flock("Client::_write_uninline_data flock");
9237 bool uninline_done
= false;
9238 int uninline_ret
= 0;
9239 Context
*onuninline
= NULL
;
9241 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9242 if (endoff
> cct
->_conf
->client_max_inline_size
||
9243 endoff
> CEPH_INLINE_MAX_SIZE
||
9244 !(have
& CEPH_CAP_FILE_BUFFER
)) {
9245 onuninline
= new C_SafeCond(&uninline_flock
,
9249 uninline_data(in
, onuninline
);
9251 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9253 uint32_t len
= in
->inline_data
.length();
9256 in
->inline_data
.copy(endoff
, len
- endoff
, bl
);
9259 in
->inline_data
.splice(offset
, len
- offset
);
9260 else if (offset
> len
)
9261 in
->inline_data
.append_zero(offset
- len
);
9263 in
->inline_data
.append(bl
);
9264 in
->inline_version
++;
9266 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9272 if (cct
->_conf
->client_oc
&& (have
& CEPH_CAP_FILE_BUFFER
)) {
9273 // do buffered write
9274 if (!in
->oset
.dirty_or_tx
)
9275 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
9277 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9279 // async, caching, non-blocking.
9280 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
9281 in
->snaprealm
->get_snap_context(),
9282 offset
, size
, bl
, ceph::real_clock::now(),
9284 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9289 // flush cached write if O_SYNC is set on file fh
9290 // O_DSYNC == O_SYNC on linux < 2.6.33
9291 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
9292 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
9293 _flush_range(in
, offset
, size
);
9296 if (f
->flags
& O_DIRECT
)
9297 _flush_range(in
, offset
, size
);
9299 // simple, non-atomic sync write
9300 Mutex
flock("Client::_write flock");
9303 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
9305 unsafe_sync_write
++;
9306 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
); // released by onsafe callback
9308 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
9309 offset
, size
, bl
, ceph::real_clock::now(), 0,
9310 in
->truncate_size
, in
->truncate_seq
,
9312 client_lock
.Unlock();
9319 _sync_write_commit(in
);
9322 // if we get here, write was successful, update client metadata
9325 lat
= ceph_clock_now();
9327 logger
->tinc(l_c_wrlat
, lat
);
9329 totalwritten
= size
;
9330 r
= (int)totalwritten
;
9333 if (totalwritten
+ offset
> in
->size
) {
9334 in
->size
= totalwritten
+ offset
;
9335 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9337 if (is_quota_bytes_approaching(in
, f
->actor_perms
)) {
9338 check_caps(in
, CHECK_CAPS_NODELAY
);
9339 } else if (is_max_size_approaching(in
)) {
9343 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
9345 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
9349 in
->mtime
= ceph_clock_now();
9351 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9356 client_lock
.Unlock();
9357 uninline_flock
.Lock();
9358 while (!uninline_done
)
9359 uninline_cond
.Wait(uninline_flock
);
9360 uninline_flock
.Unlock();
9363 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
9364 in
->inline_data
.clear();
9365 in
->inline_version
= CEPH_INLINE_NONE
;
9366 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9372 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
9376 int Client::_flush(Fh
*f
)
9378 Inode
*in
= f
->inode
.get();
9379 int err
= f
->take_async_err();
9381 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
9382 << cpp_strerror(err
) << dendl
;
9384 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9390 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
9392 struct ceph_statx stx
;
9393 stx
.stx_size
= length
;
9394 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
9397 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
9399 Mutex::Locker
lock(client_lock
);
9400 tout(cct
) << "ftruncate" << std::endl
;
9401 tout(cct
) << fd
<< std::endl
;
9402 tout(cct
) << length
<< std::endl
;
9407 Fh
*f
= get_filehandle(fd
);
9410 #if defined(__linux__) && defined(O_PATH)
9411 if (f
->flags
& O_PATH
)
9415 attr
.st_size
= length
;
9416 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
9419 int Client::fsync(int fd
, bool syncdataonly
)
9421 Mutex::Locker
lock(client_lock
);
9422 tout(cct
) << "fsync" << std::endl
;
9423 tout(cct
) << fd
<< std::endl
;
9424 tout(cct
) << syncdataonly
<< std::endl
;
9429 Fh
*f
= get_filehandle(fd
);
9432 #if defined(__linux__) && defined(O_PATH)
9433 if (f
->flags
& O_PATH
)
9436 int r
= _fsync(f
, syncdataonly
);
9438 // The IOs in this fsync were okay, but maybe something happened
9439 // in the background that we shoudl be reporting?
9440 r
= f
->take_async_err();
9441 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
9442 << ") = 0, async_err = " << r
<< dendl
;
9444 // Assume that an error we encountered during fsync, even reported
9445 // synchronously, would also have applied the error to the Fh, and we
9446 // should clear it here to avoid returning the same error again on next
9448 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
9450 f
->take_async_err();
9455 int Client::_fsync(Inode
*in
, bool syncdataonly
)
9458 Mutex
lock("Client::_fsync::lock");
9461 C_SafeCond
*object_cacher_completion
= NULL
;
9462 ceph_tid_t flush_tid
= 0;
9465 ldout(cct
, 3) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
9467 if (cct
->_conf
->client_oc
) {
9468 object_cacher_completion
= new C_SafeCond(&lock
, &cond
, &done
, &r
);
9469 tmp_ref
= in
; // take a reference; C_SafeCond doesn't and _flush won't either
9470 _flush(in
, object_cacher_completion
);
9471 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
9474 if (!syncdataonly
&& in
->dirty_caps
) {
9475 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
9476 if (in
->flushing_caps
)
9477 flush_tid
= last_flush_tid
;
9478 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
9480 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
9481 MetaRequest
*req
= in
->unsafe_ops
.back();
9482 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
9485 wait_on_list(req
->waitfor_safe
);
9489 if (object_cacher_completion
) { // wait on a real reply instead of guessing
9490 client_lock
.Unlock();
9492 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9497 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
9499 // FIXME: this can starve
9500 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
9501 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
9502 << " uncommitted, waiting" << dendl
;
9503 wait_on_list(in
->waitfor_commit
);
9509 wait_sync_caps(in
, flush_tid
);
9511 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
9513 ldout(cct
, 1) << "ino " << in
->ino
<< " failed to commit to disk! "
9514 << cpp_strerror(-r
) << dendl
;
9520 int Client::_fsync(Fh
*f
, bool syncdataonly
)
9522 ldout(cct
, 3) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
9523 return _fsync(f
->inode
.get(), syncdataonly
);
9526 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
9528 Mutex::Locker
lock(client_lock
);
9529 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
9530 tout(cct
) << fd
<< std::endl
;
9535 Fh
*f
= get_filehandle(fd
);
9538 int r
= _getattr(f
->inode
, mask
, perms
);
9541 fill_stat(f
->inode
, stbuf
, NULL
);
9542 ldout(cct
, 3) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
9546 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
9547 unsigned int want
, unsigned int flags
)
9549 Mutex::Locker
lock(client_lock
);
9550 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
9551 tout(cct
) << fd
<< std::endl
;
9556 Fh
*f
= get_filehandle(fd
);
9560 unsigned mask
= statx_to_mask(flags
, want
);
9563 if (mask
&& !f
->inode
->caps_issued_mask(mask
, true)) {
9564 r
= _getattr(f
->inode
, mask
, perms
);
9566 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
9571 fill_statx(f
->inode
, mask
, stx
);
9572 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
9576 // not written yet, but i want to link!
9578 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
9579 const UserPerm
& perms
)
9581 Mutex::Locker
lock(client_lock
);
9582 tout(cct
) << "chdir" << std::endl
;
9583 tout(cct
) << relpath
<< std::endl
;
9588 filepath
path(relpath
);
9590 int r
= path_walk(path
, &in
, perms
);
9595 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
9597 _getcwd(new_cwd
, perms
);
9601 void Client::_getcwd(string
& dir
, const UserPerm
& perms
)
9604 ldout(cct
, 10) << "getcwd " << *cwd
<< dendl
;
9606 Inode
*in
= cwd
.get();
9607 while (in
!= root
) {
9608 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
9610 // A cwd or ancester is unlinked
9611 if (in
->dn_set
.empty()) {
9615 Dentry
*dn
= in
->get_first_parent();
9620 ldout(cct
, 10) << "getcwd looking up parent for " << *in
<< dendl
;
9621 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9622 filepath
path(in
->ino
);
9623 req
->set_filepath(path
);
9625 int res
= make_request(req
, perms
);
9634 path
.push_front_dentry(dn
->name
);
9635 in
= dn
->dir
->parent_inode
;
9638 dir
+= path
.get_path();
9641 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
9643 Mutex::Locker
l(client_lock
);
9645 _getcwd(dir
, perms
);
9648 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
9649 const UserPerm
& perms
)
9651 Mutex::Locker
l(client_lock
);
9652 tout(cct
) << "statfs" << std::endl
;
9660 const vector
<int64_t> &data_pools
= mdsmap
->get_data_pools();
9661 if (data_pools
.size() == 1) {
9662 objecter
->get_fs_stats(stats
, data_pools
[0], &cond
);
9664 objecter
->get_fs_stats(stats
, boost::optional
<int64_t>(), &cond
);
9667 client_lock
.Unlock();
9668 int rval
= cond
.wait();
9672 ldout(cct
, 1) << "underlying call to statfs returned error: "
9673 << cpp_strerror(rval
)
9678 memset(stbuf
, 0, sizeof(*stbuf
));
9681 * we're going to set a block size of 4MB so we can represent larger
9682 * FSes without overflowing. Additionally convert the space
9683 * measurements from KB to bytes while making them in terms of
9684 * blocks. We use 4MB only because it is big enough, and because it
9685 * actually *is* the (ceph) default block size.
9687 const int CEPH_BLOCK_SHIFT
= 22;
9688 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
9689 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
9690 stbuf
->f_files
= stats
.num_objects
;
9691 stbuf
->f_ffree
= -1;
9692 stbuf
->f_favail
= -1;
9693 stbuf
->f_fsid
= -1; // ??
9694 stbuf
->f_flag
= 0; // ??
9695 stbuf
->f_namemax
= NAME_MAX
;
9697 // Usually quota_root will == root_ancestor, but if the mount root has no
9698 // quota but we can see a parent of it that does have a quota, we'll
9699 // respect that one instead.
9700 assert(root
!= nullptr);
9701 Inode
*quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
, perms
);
9703 // get_quota_root should always give us something
9704 // because client quotas are always enabled
9705 assert(quota_root
!= nullptr);
9707 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
9709 // Skip the getattr if any sessions are stale, as we don't want to
9710 // block `df` if this client has e.g. been evicted, or if the MDS cluster
9712 if (!_any_stale_sessions()) {
9713 int r
= _getattr(quota_root
, 0, perms
, true);
9715 // Ignore return value: error getting latest inode metadata is not a good
9716 // reason to break "df".
9717 lderr(cct
) << "Error in getattr on quota root 0x"
9718 << std::hex
<< quota_root
->ino
<< std::dec
9719 << " statfs result may be outdated" << dendl
;
9723 // Special case: if there is a size quota set on the Inode acting
9724 // as the root for this client mount, then report the quota status
9725 // as the filesystem statistics.
9726 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
9727 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
9728 // It is possible for a quota to be exceeded: arithmetic here must
9729 // handle case where used > total.
9730 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
9732 stbuf
->f_blocks
= total
;
9733 stbuf
->f_bfree
= free
;
9734 stbuf
->f_bavail
= free
;
9736 // General case: report the cluster statistics returned from RADOS. Because
9737 // multiple pools may be used without one filesystem namespace via
9738 // layouts, this is the most correct thing we can do.
9739 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
9740 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9741 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9747 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
9748 struct flock
*fl
, uint64_t owner
, bool removing
)
9750 ldout(cct
, 10) << "_do_filelock ino " << in
->ino
9751 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
9752 << " type " << fl
->l_type
<< " owner " << owner
9753 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
9756 if (F_RDLCK
== fl
->l_type
)
9757 lock_cmd
= CEPH_LOCK_SHARED
;
9758 else if (F_WRLCK
== fl
->l_type
)
9759 lock_cmd
= CEPH_LOCK_EXCL
;
9760 else if (F_UNLCK
== fl
->l_type
)
9761 lock_cmd
= CEPH_LOCK_UNLOCK
;
9765 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
9769 * Set the most significant bit, so that MDS knows the 'owner'
9770 * is sufficient to identify the owner of lock. (old code uses
9771 * both 'owner' and 'pid')
9773 owner
|= (1ULL << 63);
9775 MetaRequest
*req
= new MetaRequest(op
);
9777 in
->make_nosnap_relative_path(path
);
9778 req
->set_filepath(path
);
9781 req
->head
.args
.filelock_change
.rule
= lock_type
;
9782 req
->head
.args
.filelock_change
.type
= lock_cmd
;
9783 req
->head
.args
.filelock_change
.owner
= owner
;
9784 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
9785 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
9786 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
9787 req
->head
.args
.filelock_change
.wait
= sleep
;
9792 if (sleep
&& switch_interrupt_cb
) {
9794 switch_interrupt_cb(callback_handle
, req
->get());
9795 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9796 // disable interrupt
9797 switch_interrupt_cb(callback_handle
, NULL
);
9798 if (ret
== 0 && req
->aborted()) {
9799 // effect of this lock request has been revoked by the 'lock intr' request
9800 ret
= req
->get_abort_code();
9804 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9808 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
9809 ceph_filelock filelock
;
9810 bufferlist::iterator p
= bl
.begin();
9811 ::decode(filelock
, p
);
9813 if (CEPH_LOCK_SHARED
== filelock
.type
)
9814 fl
->l_type
= F_RDLCK
;
9815 else if (CEPH_LOCK_EXCL
== filelock
.type
)
9816 fl
->l_type
= F_WRLCK
;
9818 fl
->l_type
= F_UNLCK
;
9820 fl
->l_whence
= SEEK_SET
;
9821 fl
->l_start
= filelock
.start
;
9822 fl
->l_len
= filelock
.length
;
9823 fl
->l_pid
= filelock
.pid
;
9824 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
9825 ceph_lock_state_t
*lock_state
;
9826 if (lock_type
== CEPH_LOCK_FCNTL
) {
9827 if (!in
->fcntl_locks
)
9828 in
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9829 lock_state
= in
->fcntl_locks
;
9830 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
9831 if (!in
->flock_locks
)
9832 in
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9833 lock_state
= in
->flock_locks
;
9838 _update_lock_state(fl
, owner
, lock_state
);
9841 if (lock_type
== CEPH_LOCK_FCNTL
) {
9842 if (!fh
->fcntl_locks
)
9843 fh
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9844 lock_state
= fh
->fcntl_locks
;
9846 if (!fh
->flock_locks
)
9847 fh
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9848 lock_state
= fh
->flock_locks
;
9850 _update_lock_state(fl
, owner
, lock_state
);
9858 int Client::_interrupt_filelock(MetaRequest
*req
)
9860 // Set abort code, but do not kick. The abort code prevents the request
9861 // from being re-sent.
9864 return 0; // haven't sent the request
9866 Inode
*in
= req
->inode();
9869 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
9870 lock_type
= CEPH_LOCK_FLOCK_INTR
;
9871 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
9872 lock_type
= CEPH_LOCK_FCNTL_INTR
;
9878 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
9880 in
->make_nosnap_relative_path(path
);
9881 intr_req
->set_filepath(path
);
9882 intr_req
->set_inode(in
);
9883 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
9884 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
9885 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
9887 UserPerm
perms(req
->get_uid(), req
->get_gid());
9888 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
9891 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
9893 if (!in
->fcntl_locks
&& !in
->flock_locks
)
9896 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
9897 ::encode(nr_fcntl_locks
, bl
);
9898 if (nr_fcntl_locks
) {
9899 ceph_lock_state_t
* lock_state
= in
->fcntl_locks
;
9900 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9901 p
!= lock_state
->held_locks
.end();
9903 ::encode(p
->second
, bl
);
9906 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
9907 ::encode(nr_flock_locks
, bl
);
9908 if (nr_flock_locks
) {
9909 ceph_lock_state_t
* lock_state
= in
->flock_locks
;
9910 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9911 p
!= lock_state
->held_locks
.end();
9913 ::encode(p
->second
, bl
);
9916 ldout(cct
, 10) << "_encode_filelocks ino " << in
->ino
<< ", " << nr_fcntl_locks
9917 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
9920 void Client::_release_filelocks(Fh
*fh
)
9922 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
9925 Inode
*in
= fh
->inode
.get();
9926 ldout(cct
, 10) << "_release_filelocks " << fh
<< " ino " << in
->ino
<< dendl
;
9928 list
<pair
<int, ceph_filelock
> > to_release
;
9930 if (fh
->fcntl_locks
) {
9931 ceph_lock_state_t
* lock_state
= fh
->fcntl_locks
;
9932 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9933 p
!= lock_state
->held_locks
.end();
9935 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, p
->second
));
9936 delete fh
->fcntl_locks
;
9938 if (fh
->flock_locks
) {
9939 ceph_lock_state_t
* lock_state
= fh
->flock_locks
;
9940 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9941 p
!= lock_state
->held_locks
.end();
9943 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, p
->second
));
9944 delete fh
->flock_locks
;
9947 if (to_release
.empty())
9951 memset(&fl
, 0, sizeof(fl
));
9952 fl
.l_whence
= SEEK_SET
;
9953 fl
.l_type
= F_UNLCK
;
9955 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
9956 p
!= to_release
.end();
9958 fl
.l_start
= p
->second
.start
;
9959 fl
.l_len
= p
->second
.length
;
9960 fl
.l_pid
= p
->second
.pid
;
9961 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
9962 p
->second
.owner
, true);
9966 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
9967 ceph_lock_state_t
*lock_state
)
9970 if (F_RDLCK
== fl
->l_type
)
9971 lock_cmd
= CEPH_LOCK_SHARED
;
9972 else if (F_WRLCK
== fl
->l_type
)
9973 lock_cmd
= CEPH_LOCK_EXCL
;
9975 lock_cmd
= CEPH_LOCK_UNLOCK
;;
9977 ceph_filelock filelock
;
9978 filelock
.start
= fl
->l_start
;
9979 filelock
.length
= fl
->l_len
;
9980 filelock
.client
= 0;
9981 // see comment in _do_filelock()
9982 filelock
.owner
= owner
| (1ULL << 63);
9983 filelock
.pid
= fl
->l_pid
;
9984 filelock
.type
= lock_cmd
;
9986 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
9987 list
<ceph_filelock
> activated_locks
;
9988 lock_state
->remove_lock(filelock
, activated_locks
);
9990 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
9995 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
9997 Inode
*in
= fh
->inode
.get();
9998 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
9999 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
10003 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
10005 Inode
*in
= fh
->inode
.get();
10006 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
10007 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
10008 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
10012 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
10014 Inode
*in
= fh
->inode
.get();
10015 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
10017 int sleep
= !(cmd
& LOCK_NB
);
10036 memset(&fl
, 0, sizeof(fl
));
10038 fl
.l_whence
= SEEK_SET
;
10040 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
10041 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
10045 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
10047 /* Since the only thing this does is wrap a call to statfs, and
10048 statfs takes a lock, it doesn't seem we have a need to split it
10050 return statfs(0, stbuf
, perms
);
10053 void Client::ll_register_callbacks(struct client_callback_args
*args
)
10057 Mutex::Locker
l(client_lock
);
10058 ldout(cct
, 10) << "ll_register_callbacks cb " << args
->handle
10059 << " invalidate_ino_cb " << args
->ino_cb
10060 << " invalidate_dentry_cb " << args
->dentry_cb
10061 << " getgroups_cb" << args
->getgroups_cb
10062 << " switch_interrupt_cb " << args
->switch_intr_cb
10063 << " remount_cb " << args
->remount_cb
10065 callback_handle
= args
->handle
;
10066 if (args
->ino_cb
) {
10067 ino_invalidate_cb
= args
->ino_cb
;
10068 async_ino_invalidator
.start();
10070 if (args
->dentry_cb
) {
10071 dentry_invalidate_cb
= args
->dentry_cb
;
10072 async_dentry_invalidator
.start();
10074 if (args
->switch_intr_cb
) {
10075 switch_interrupt_cb
= args
->switch_intr_cb
;
10076 interrupt_finisher
.start();
10078 if (args
->remount_cb
) {
10079 remount_cb
= args
->remount_cb
;
10080 remount_finisher
.start();
10082 getgroups_cb
= args
->getgroups_cb
;
10083 umask_cb
= args
->umask_cb
;
10086 int Client::test_dentry_handling(bool can_invalidate
)
10090 can_invalidate_dentries
= can_invalidate
;
10092 if (can_invalidate_dentries
) {
10093 assert(dentry_invalidate_cb
);
10094 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
10096 } else if (remount_cb
) {
10097 ldout(cct
, 1) << "using remount_cb" << dendl
;
10101 bool should_abort
= cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate");
10102 if (should_abort
) {
10103 lderr(cct
) << "no method to invalidate kernel dentry cache; quitting!" << dendl
;
10106 lderr(cct
) << "no method to invalidate kernel dentry cache; expect issues!" << dendl
;
10112 int Client::_sync_fs()
10114 ldout(cct
, 10) << "_sync_fs" << dendl
;
10117 Mutex
lock("Client::_fsync::lock");
10119 bool flush_done
= false;
10120 if (cct
->_conf
->client_oc
)
10121 objectcacher
->flush_all(new C_SafeCond(&lock
, &cond
, &flush_done
));
10127 ceph_tid_t flush_tid
= last_flush_tid
;
10129 // wait for unsafe mds requests
10130 wait_unsafe_requests();
10132 wait_sync_caps(flush_tid
);
10135 client_lock
.Unlock();
10137 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
10138 while (!flush_done
)
10141 client_lock
.Lock();
10147 int Client::sync_fs()
10149 Mutex::Locker
l(client_lock
);
10157 int64_t Client::drop_caches()
10159 Mutex::Locker
l(client_lock
);
10160 return objectcacher
->release_all();
10164 int Client::lazyio_propogate(int fd
, loff_t offset
, size_t count
)
10166 Mutex::Locker
l(client_lock
);
10167 ldout(cct
, 3) << "op: client->lazyio_propogate(" << fd
10168 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10170 Fh
*f
= get_filehandle(fd
);
10180 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
10182 Mutex::Locker
l(client_lock
);
10183 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
10184 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10186 Fh
*f
= get_filehandle(fd
);
10189 Inode
*in
= f
->inode
.get();
10198 // =============================
10201 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
)
10203 Mutex::Locker
l(client_lock
);
10208 filepath
path(relpath
);
10210 int r
= path_walk(path
, &in
, perm
);
10213 if (cct
->_conf
->client_permissions
) {
10214 r
= may_create(in
.get(), perm
);
10218 Inode
*snapdir
= open_snapdir(in
.get());
10219 return _mkdir(snapdir
, name
, 0, perm
);
10222 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
)
10224 Mutex::Locker
l(client_lock
);
10229 filepath
path(relpath
);
10231 int r
= path_walk(path
, &in
, perms
);
10234 if (cct
->_conf
->client_permissions
) {
10235 r
= may_delete(in
.get(), NULL
, perms
);
10239 Inode
*snapdir
= open_snapdir(in
.get());
10240 return _rmdir(snapdir
, name
, perms
);
10243 // =============================
10246 int Client::get_caps_issued(int fd
) {
10248 Mutex::Locker
lock(client_lock
);
10253 Fh
*f
= get_filehandle(fd
);
10257 return f
->inode
->caps_issued();
10260 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
10262 Mutex::Locker
lock(client_lock
);
10269 int r
= path_walk(p
, &in
, perms
, true);
10272 return in
->caps_issued();
10275 // =========================================
10278 Inode
*Client::open_snapdir(Inode
*diri
)
10281 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
10282 if (!inode_map
.count(vino
)) {
10283 in
= new Inode(this, vino
, &diri
->layout
);
10285 in
->ino
= diri
->ino
;
10286 in
->snapid
= CEPH_SNAPDIR
;
10287 in
->mode
= diri
->mode
;
10288 in
->uid
= diri
->uid
;
10289 in
->gid
= diri
->gid
;
10290 in
->mtime
= diri
->mtime
;
10291 in
->ctime
= diri
->ctime
;
10292 in
->btime
= diri
->btime
;
10293 in
->size
= diri
->size
;
10294 in
->change_attr
= diri
->change_attr
;
10296 in
->dirfragtree
.clear();
10297 in
->snapdir_parent
= diri
;
10298 diri
->flags
|= I_SNAPDIR_OPEN
;
10299 inode_map
[vino
] = in
;
10300 if (use_faked_inos())
10301 _assign_faked_ino(in
);
10302 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
10304 in
= inode_map
[vino
];
10305 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
10310 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
10311 Inode
**out
, const UserPerm
& perms
)
10313 Mutex::Locker
lock(client_lock
);
10314 vinodeno_t vparent
= _get_vino(parent
);
10315 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
<< dendl
;
10316 tout(cct
) << "ll_lookup" << std::endl
;
10317 tout(cct
) << name
<< std::endl
;
10323 if (!cct
->_conf
->fuse_default_permissions
) {
10324 r
= may_lookup(parent
, perms
);
10329 string
dname(name
);
10332 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
10339 fill_stat(in
, attr
);
10343 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
10344 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
10345 tout(cct
) << attr
->st_ino
<< std::endl
;
10350 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
10351 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
10352 const UserPerm
& perms
)
10354 Mutex::Locker
lock(client_lock
);
10355 vinodeno_t vparent
= _get_vino(parent
);
10356 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
<< dendl
;
10357 tout(cct
) << "ll_lookupx" << std::endl
;
10358 tout(cct
) << name
<< std::endl
;
10364 if (!cct
->_conf
->fuse_default_permissions
) {
10365 r
= may_lookup(parent
, perms
);
10370 string
dname(name
);
10373 unsigned mask
= statx_to_mask(flags
, want
);
10374 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
10380 fill_statx(in
, mask
, stx
);
10384 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
10385 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
10386 tout(cct
) << stx
->stx_ino
<< std::endl
;
10391 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
10392 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
10394 Mutex::Locker
lock(client_lock
);
10399 filepath
fp(name
, 0);
10402 unsigned mask
= statx_to_mask(flags
, want
);
10404 ldout(cct
, 3) << "ll_walk" << name
<< dendl
;
10405 tout(cct
) << "ll_walk" << std::endl
;
10406 tout(cct
) << name
<< std::endl
;
10408 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
10410 /* zero out mask, just in case... */
10417 fill_statx(in
, mask
, stx
);
10424 void Client::_ll_get(Inode
*in
)
10426 if (in
->ll_ref
== 0) {
10428 if (in
->is_dir() && !in
->dn_set
.empty()) {
10429 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10430 in
->get_first_parent()->get(); // pin dentry
10434 ldout(cct
, 20) << "_ll_get " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
10437 int Client::_ll_put(Inode
*in
, int num
)
10440 ldout(cct
, 20) << "_ll_put " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
10441 if (in
->ll_ref
== 0) {
10442 if (in
->is_dir() && !in
->dn_set
.empty()) {
10443 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10444 in
->get_first_parent()->put(); // unpin dentry
10453 void Client::_ll_drop_pins()
10455 ldout(cct
, 10) << "_ll_drop_pins" << dendl
;
10456 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
10457 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
10458 it
!= inode_map
.end();
10460 Inode
*in
= it
->second
;
10464 _ll_put(in
, in
->ll_ref
);
10468 bool Client::ll_forget(Inode
*in
, int count
)
10470 Mutex::Locker
lock(client_lock
);
10471 inodeno_t ino
= _get_inodeno(in
);
10473 ldout(cct
, 3) << "ll_forget " << ino
<< " " << count
<< dendl
;
10474 tout(cct
) << "ll_forget" << std::endl
;
10475 tout(cct
) << ino
.val
<< std::endl
;
10476 tout(cct
) << count
<< std::endl
;
10478 // Ignore forget if we're no longer mounted
10482 if (ino
== 1) return true; // ignore forget on root.
10485 if (in
->ll_ref
< count
) {
10486 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
10487 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
10488 _ll_put(in
, in
->ll_ref
);
10491 if (_ll_put(in
, count
) == 0)
10498 bool Client::ll_put(Inode
*in
)
10500 /* ll_forget already takes the lock */
10501 return ll_forget(in
, 1);
10504 snapid_t
Client::ll_get_snapid(Inode
*in
)
10506 Mutex::Locker
lock(client_lock
);
10510 Inode
*Client::ll_get_inode(ino_t ino
)
10512 Mutex::Locker
lock(client_lock
);
10517 vinodeno_t vino
= _map_faked_ino(ino
);
10518 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10519 if (p
== inode_map
.end())
10521 Inode
*in
= p
->second
;
10526 Inode
*Client::ll_get_inode(vinodeno_t vino
)
10528 Mutex::Locker
lock(client_lock
);
10533 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10534 if (p
== inode_map
.end())
10536 Inode
*in
= p
->second
;
10541 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
10543 vinodeno_t vino
= _get_vino(in
);
10545 ldout(cct
, 3) << "ll_getattr " << vino
<< dendl
;
10546 tout(cct
) << "ll_getattr" << std::endl
;
10547 tout(cct
) << vino
.ino
.val
<< std::endl
;
10549 if (vino
.snapid
< CEPH_NOSNAP
)
10552 return _getattr(in
, caps
, perms
);
10555 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
10557 Mutex::Locker
lock(client_lock
);
10562 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
10565 fill_stat(in
, attr
);
10566 ldout(cct
, 3) << "ll_getattr " << _get_vino(in
) << " = " << res
<< dendl
;
10570 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
10571 unsigned int flags
, const UserPerm
& perms
)
10573 Mutex::Locker
lock(client_lock
);
10579 unsigned mask
= statx_to_mask(flags
, want
);
10581 if (mask
&& !in
->caps_issued_mask(mask
, true))
10582 res
= _ll_getattr(in
, mask
, perms
);
10585 fill_statx(in
, mask
, stx
);
10586 ldout(cct
, 3) << "ll_getattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10590 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10591 const UserPerm
& perms
, InodeRef
*inp
)
10593 vinodeno_t vino
= _get_vino(in
);
10595 ldout(cct
, 3) << "ll_setattrx " << vino
<< " mask " << hex
<< mask
<< dec
10597 tout(cct
) << "ll_setattrx" << std::endl
;
10598 tout(cct
) << vino
.ino
.val
<< std::endl
;
10599 tout(cct
) << stx
->stx_mode
<< std::endl
;
10600 tout(cct
) << stx
->stx_uid
<< std::endl
;
10601 tout(cct
) << stx
->stx_gid
<< std::endl
;
10602 tout(cct
) << stx
->stx_size
<< std::endl
;
10603 tout(cct
) << stx
->stx_mtime
<< std::endl
;
10604 tout(cct
) << stx
->stx_atime
<< std::endl
;
10605 tout(cct
) << stx
->stx_btime
<< std::endl
;
10606 tout(cct
) << mask
<< std::endl
;
10608 if (!cct
->_conf
->fuse_default_permissions
) {
10609 int res
= may_setattr(in
, stx
, mask
, perms
);
10614 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
10616 return __setattrx(in
, stx
, mask
, perms
, inp
);
10619 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10620 const UserPerm
& perms
)
10622 Mutex::Locker
lock(client_lock
);
10627 InodeRef
target(in
);
10628 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
10630 assert(in
== target
.get());
10631 fill_statx(in
, in
->caps_issued(), stx
);
10634 ldout(cct
, 3) << "ll_setattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10638 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
10639 const UserPerm
& perms
)
10641 struct ceph_statx stx
;
10642 stat_to_statx(attr
, &stx
);
10644 Mutex::Locker
lock(client_lock
);
10649 InodeRef
target(in
);
10650 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
10652 assert(in
== target
.get());
10653 fill_stat(in
, attr
);
10656 ldout(cct
, 3) << "ll_setattr " << _get_vino(in
) << " = " << res
<< dendl
;
10664 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
10665 const UserPerm
& perms
)
10667 Mutex::Locker
lock(client_lock
);
10673 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10676 return _getxattr(in
, name
, value
, size
, perms
);
10679 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
10680 const UserPerm
& perms
)
10682 Mutex::Locker
lock(client_lock
);
10688 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10691 return _getxattr(in
, name
, value
, size
, perms
);
10694 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
10695 const UserPerm
& perms
)
10697 Mutex::Locker
lock(client_lock
);
10702 Fh
*f
= get_filehandle(fd
);
10705 return _getxattr(f
->inode
, name
, value
, size
, perms
);
10708 int Client::listxattr(const char *path
, char *list
, size_t size
,
10709 const UserPerm
& perms
)
10711 Mutex::Locker
lock(client_lock
);
10717 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10720 return Client::_listxattr(in
.get(), list
, size
, perms
);
10723 int Client::llistxattr(const char *path
, char *list
, size_t size
,
10724 const UserPerm
& perms
)
10726 Mutex::Locker
lock(client_lock
);
10732 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10735 return Client::_listxattr(in
.get(), list
, size
, perms
);
10738 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
10740 Mutex::Locker
lock(client_lock
);
10745 Fh
*f
= get_filehandle(fd
);
10748 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
10751 int Client::removexattr(const char *path
, const char *name
,
10752 const UserPerm
& perms
)
10754 Mutex::Locker
lock(client_lock
);
10760 int r
= Client::path_walk(path
, &in
, perms
, true);
10763 return _removexattr(in
, name
, perms
);
10766 int Client::lremovexattr(const char *path
, const char *name
,
10767 const UserPerm
& perms
)
10769 Mutex::Locker
lock(client_lock
);
10775 int r
= Client::path_walk(path
, &in
, perms
, false);
10778 return _removexattr(in
, name
, perms
);
10781 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
10783 Mutex::Locker
lock(client_lock
);
10788 Fh
*f
= get_filehandle(fd
);
10791 return _removexattr(f
->inode
, name
, perms
);
10794 int Client::setxattr(const char *path
, const char *name
, const void *value
,
10795 size_t size
, int flags
, const UserPerm
& perms
)
10797 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10799 Mutex::Locker
lock(client_lock
);
10805 int r
= Client::path_walk(path
, &in
, perms
, true);
10808 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10811 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
10812 size_t size
, int flags
, const UserPerm
& perms
)
10814 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10816 Mutex::Locker
lock(client_lock
);
10822 int r
= Client::path_walk(path
, &in
, perms
, false);
10825 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10828 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
10829 int flags
, const UserPerm
& perms
)
10831 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10833 Mutex::Locker
lock(client_lock
);
10838 Fh
*f
= get_filehandle(fd
);
10841 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
10844 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
10845 const UserPerm
& perms
)
10849 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10853 // Do a force getattr to get the latest quota before returning
10854 // a value to userspace.
10855 r
= _getattr(in
, 0, perms
, true);
10857 // Error from getattr!
10861 // call pointer-to-member function
10863 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
10864 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
10870 if (r
> (int)size
) {
10872 } else if (r
> 0) {
10873 memcpy(value
, buf
, r
);
10879 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
10884 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10888 if (in
->xattrs
.count(n
)) {
10889 r
= in
->xattrs
[n
].length();
10890 if (r
> 0 && size
!= 0) {
10891 if (size
>= (unsigned)r
)
10892 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
10899 ldout(cct
, 3) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
10903 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
10904 const UserPerm
& perms
)
10906 if (cct
->_conf
->client_permissions
) {
10907 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
10911 return _getxattr(in
.get(), name
, value
, size
, perms
);
10914 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
10915 size_t size
, const UserPerm
& perms
)
10917 Mutex::Locker
lock(client_lock
);
10922 vinodeno_t vino
= _get_vino(in
);
10924 ldout(cct
, 3) << "ll_getxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10925 tout(cct
) << "ll_getxattr" << std::endl
;
10926 tout(cct
) << vino
.ino
.val
<< std::endl
;
10927 tout(cct
) << name
<< std::endl
;
10929 if (!cct
->_conf
->fuse_default_permissions
) {
10930 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
10935 return _getxattr(in
, name
, value
, size
, perms
);
10938 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
10939 const UserPerm
& perms
)
10941 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10943 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10944 p
!= in
->xattrs
.end();
10946 r
+= p
->first
.length() + 1;
10948 const VXattr
*vxattrs
= _get_vxattrs(in
);
10949 r
+= _vxattrs_name_size(vxattrs
);
10952 if (size
>= (unsigned)r
) {
10953 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10954 p
!= in
->xattrs
.end();
10956 memcpy(name
, p
->first
.c_str(), p
->first
.length());
10957 name
+= p
->first
.length();
10962 for (int i
= 0; !vxattrs
[i
].name
.empty(); i
++) {
10963 const VXattr
& vxattr
= vxattrs
[i
];
10966 // call pointer-to-member function
10967 if(vxattr
.exists_cb
&& !(this->*(vxattr
.exists_cb
))(in
))
10969 memcpy(name
, vxattr
.name
.c_str(), vxattr
.name
.length());
10970 name
+= vxattr
.name
.length();
10979 ldout(cct
, 3) << "_listxattr(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
10983 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
10984 const UserPerm
& perms
)
10986 Mutex::Locker
lock(client_lock
);
10991 vinodeno_t vino
= _get_vino(in
);
10993 ldout(cct
, 3) << "ll_listxattr " << vino
<< " size " << size
<< dendl
;
10994 tout(cct
) << "ll_listxattr" << std::endl
;
10995 tout(cct
) << vino
.ino
.val
<< std::endl
;
10996 tout(cct
) << size
<< std::endl
;
10998 return _listxattr(in
, names
, size
, perms
);
11001 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
11002 size_t size
, int flags
, const UserPerm
& perms
)
11005 int xattr_flags
= 0;
11007 xattr_flags
|= CEPH_XATTR_REMOVE
;
11008 if (flags
& XATTR_CREATE
)
11009 xattr_flags
|= CEPH_XATTR_CREATE
;
11010 if (flags
& XATTR_REPLACE
)
11011 xattr_flags
|= CEPH_XATTR_REPLACE
;
11013 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
11015 in
->make_nosnap_relative_path(path
);
11016 req
->set_filepath(path
);
11017 req
->set_string2(name
);
11018 req
->set_inode(in
);
11019 req
->head
.args
.setxattr
.flags
= xattr_flags
;
11022 bl
.append((const char*)value
, size
);
11025 int res
= make_request(req
, perms
);
11028 ldout(cct
, 3) << "_setxattr(" << in
->ino
<< ", \"" << name
<< "\") = " <<
11033 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
11034 size_t size
, int flags
, const UserPerm
& perms
)
11036 if (in
->snapid
!= CEPH_NOSNAP
) {
11040 bool posix_acl_xattr
= false;
11041 if (acl_type
== POSIX_ACL
)
11042 posix_acl_xattr
= !strncmp(name
, "system.", 7);
11044 if (strncmp(name
, "user.", 5) &&
11045 strncmp(name
, "security.", 9) &&
11046 strncmp(name
, "trusted.", 8) &&
11047 strncmp(name
, "ceph.", 5) &&
11049 return -EOPNOTSUPP
;
11051 if (posix_acl_xattr
) {
11052 if (!strcmp(name
, ACL_EA_ACCESS
)) {
11053 mode_t new_mode
= in
->mode
;
11055 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
11062 if (new_mode
!= in
->mode
) {
11063 struct ceph_statx stx
;
11064 stx
.stx_mode
= new_mode
;
11065 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
11070 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
11072 if (!S_ISDIR(in
->mode
))
11074 int ret
= posix_acl_check(value
, size
);
11083 return -EOPNOTSUPP
;
11086 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11087 if (vxattr
&& vxattr
->readonly
)
11088 return -EOPNOTSUPP
;
11091 return _do_setxattr(in
, name
, value
, size
, flags
, perms
);
11094 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
11095 size_t size
, int flags
, const UserPerm
& perms
)
11097 if (cct
->_conf
->client_permissions
) {
11098 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11102 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
11105 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
11108 if (name
== "layout") {
11109 string::iterator begin
= value
.begin();
11110 string::iterator end
= value
.end();
11111 keys_and_values
<string::iterator
> p
; // create instance of parser
11112 std::map
<string
, string
> m
; // map to receive results
11113 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
11118 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
11119 if (q
->first
== "pool") {
11124 } else if (name
== "layout.pool") {
11128 if (tmp
.length()) {
11131 pool
= boost::lexical_cast
<unsigned>(tmp
);
11132 if (!osdmap
->have_pg_pool(pool
))
11134 } catch (boost::bad_lexical_cast
const&) {
11135 pool
= osdmap
->lookup_pg_pool_name(tmp
);
11145 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
11147 // For setting pool of layout, MetaRequest need osdmap epoch.
11148 // There is a race which create a new data pool but client and mds both don't have.
11149 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
11150 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
11151 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
11152 string
rest(strstr(name
, "layout"));
11153 string
v((const char*)value
, size
);
11154 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
11155 return _setxattr_check_data_pool(rest
, v
, &o
);
11158 if (r
== -ENOENT
) {
11160 objecter
->wait_for_latest_osdmap(&ctx
);
11166 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
11167 size_t size
, int flags
, const UserPerm
& perms
)
11169 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
11171 Mutex::Locker
lock(client_lock
);
11176 vinodeno_t vino
= _get_vino(in
);
11178 ldout(cct
, 3) << "ll_setxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
11179 tout(cct
) << "ll_setxattr" << std::endl
;
11180 tout(cct
) << vino
.ino
.val
<< std::endl
;
11181 tout(cct
) << name
<< std::endl
;
11183 if (!cct
->_conf
->fuse_default_permissions
) {
11184 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11188 return _setxattr(in
, name
, value
, size
, flags
, perms
);
11191 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11193 if (in
->snapid
!= CEPH_NOSNAP
) {
11197 // same xattrs supported by kernel client
11198 if (strncmp(name
, "user.", 5) &&
11199 strncmp(name
, "system.", 7) &&
11200 strncmp(name
, "security.", 9) &&
11201 strncmp(name
, "trusted.", 8) &&
11202 strncmp(name
, "ceph.", 5))
11203 return -EOPNOTSUPP
;
11205 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11206 if (vxattr
&& vxattr
->readonly
)
11207 return -EOPNOTSUPP
;
11209 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
11211 in
->make_nosnap_relative_path(path
);
11212 req
->set_filepath(path
);
11213 req
->set_filepath2(name
);
11214 req
->set_inode(in
);
11216 int res
= make_request(req
, perms
);
11219 ldout(cct
, 3) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
11223 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
11225 if (cct
->_conf
->client_permissions
) {
11226 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11230 return _removexattr(in
.get(), name
, perms
);
11233 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11235 Mutex::Locker
lock(client_lock
);
11240 vinodeno_t vino
= _get_vino(in
);
11242 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
11243 tout(cct
) << "ll_removexattr" << std::endl
;
11244 tout(cct
) << vino
.ino
.val
<< std::endl
;
11245 tout(cct
) << name
<< std::endl
;
11247 if (!cct
->_conf
->fuse_default_permissions
) {
11248 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11253 return _removexattr(in
, name
, perms
);
11256 bool Client::_vxattrcb_quota_exists(Inode
*in
)
11258 return in
->quota
.is_enable();
11260 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
11262 return snprintf(val
, size
,
11263 "max_bytes=%lld max_files=%lld",
11264 (long long int)in
->quota
.max_bytes
,
11265 (long long int)in
->quota
.max_files
);
11267 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
11269 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
11271 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
11273 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
11276 bool Client::_vxattrcb_layout_exists(Inode
*in
)
11278 return in
->layout
!= file_layout_t();
11280 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
11282 int r
= snprintf(val
, size
,
11283 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
11284 (unsigned long long)in
->layout
.stripe_unit
,
11285 (unsigned long long)in
->layout
.stripe_count
,
11286 (unsigned long long)in
->layout
.object_size
);
11287 objecter
->with_osdmap([&](const OSDMap
& o
) {
11288 if (o
.have_pg_pool(in
->layout
.pool_id
))
11289 r
+= snprintf(val
+ r
, size
- r
, "%s",
11290 o
.get_pool_name(in
->layout
.pool_id
).c_str());
11292 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
11293 (uint64_t)in
->layout
.pool_id
);
11295 if (in
->layout
.pool_ns
.length())
11296 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
11297 in
->layout
.pool_ns
.c_str());
11300 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
11302 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_unit
);
11304 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
11306 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_count
);
11308 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
11310 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.object_size
);
11312 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
11315 objecter
->with_osdmap([&](const OSDMap
& o
) {
11316 if (o
.have_pg_pool(in
->layout
.pool_id
))
11317 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
11318 in
->layout
.pool_id
).c_str());
11320 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
11324 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
11326 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
11328 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
11330 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
11332 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
11334 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nfiles
);
11336 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
11338 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nsubdirs
);
11340 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
11342 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
11344 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
11346 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rfiles
);
11348 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
11350 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rsubdirs
);
11352 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
11354 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rbytes
);
11356 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
11358 return snprintf(val
, size
, "%ld.09%ld", (long)in
->rstat
.rctime
.sec(),
11359 (long)in
->rstat
.rctime
.nsec());
11362 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
11363 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
11365 #define XATTR_NAME_CEPH(_type, _name) \
11367 name: CEPH_XATTR_NAME(_type, _name), \
11368 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11373 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
11375 name: CEPH_XATTR_NAME2(_type, _name, _field), \
11376 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
11379 exists_cb: &Client::_vxattrcb_layout_exists, \
11381 #define XATTR_QUOTA_FIELD(_type, _name) \
11383 name: CEPH_XATTR_NAME(_type, _name), \
11384 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11387 exists_cb: &Client::_vxattrcb_quota_exists, \
11390 const Client::VXattr
Client::_dir_vxattrs
[] = {
11392 name
: "ceph.dir.layout",
11393 getxattr_cb
: &Client::_vxattrcb_layout
,
11396 exists_cb
: &Client::_vxattrcb_layout_exists
,
11398 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
11399 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
11400 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
11401 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
11402 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
11403 XATTR_NAME_CEPH(dir
, entries
),
11404 XATTR_NAME_CEPH(dir
, files
),
11405 XATTR_NAME_CEPH(dir
, subdirs
),
11406 XATTR_NAME_CEPH(dir
, rentries
),
11407 XATTR_NAME_CEPH(dir
, rfiles
),
11408 XATTR_NAME_CEPH(dir
, rsubdirs
),
11409 XATTR_NAME_CEPH(dir
, rbytes
),
11410 XATTR_NAME_CEPH(dir
, rctime
),
11412 name
: "ceph.quota",
11413 getxattr_cb
: &Client::_vxattrcb_quota
,
11416 exists_cb
: &Client::_vxattrcb_quota_exists
,
11418 XATTR_QUOTA_FIELD(quota
, max_bytes
),
11419 XATTR_QUOTA_FIELD(quota
, max_files
),
11420 { name
: "" } /* Required table terminator */
11423 const Client::VXattr
Client::_file_vxattrs
[] = {
11425 name
: "ceph.file.layout",
11426 getxattr_cb
: &Client::_vxattrcb_layout
,
11429 exists_cb
: &Client::_vxattrcb_layout_exists
,
11431 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
11432 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
11433 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
11434 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
11435 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
11436 { name
: "" } /* Required table terminator */
11439 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
11442 return _dir_vxattrs
;
11443 else if (in
->is_file())
11444 return _file_vxattrs
;
11448 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
11450 if (strncmp(name
, "ceph.", 5) == 0) {
11451 const VXattr
*vxattr
= _get_vxattrs(in
);
11453 while (!vxattr
->name
.empty()) {
11454 if (vxattr
->name
== name
)
11463 size_t Client::_vxattrs_calcu_name_size(const VXattr
*vxattr
)
11466 while (!vxattr
->name
.empty()) {
11467 if (!vxattr
->hidden
)
11468 len
+= vxattr
->name
.length() + 1;
11474 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
11476 Mutex::Locker
lock(client_lock
);
11481 vinodeno_t vino
= _get_vino(in
);
11483 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
11484 tout(cct
) << "ll_readlink" << std::endl
;
11485 tout(cct
) << vino
.ino
.val
<< std::endl
;
11487 set
<Dentry
*>::iterator dn
= in
->dn_set
.begin();
11488 while (dn
!= in
->dn_set
.end()) {
11493 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
11494 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
11498 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
11499 const UserPerm
& perms
, InodeRef
*inp
)
11501 ldout(cct
, 3) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
11502 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
11503 << ", gid " << perms
.gid() << ")" << dendl
;
11505 if (strlen(name
) > NAME_MAX
)
11506 return -ENAMETOOLONG
;
11508 if (dir
->snapid
!= CEPH_NOSNAP
) {
11511 if (is_quota_files_exceeded(dir
, perms
)) {
11515 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
11518 dir
->make_nosnap_relative_path(path
);
11519 path
.push_dentry(name
);
11520 req
->set_filepath(path
);
11521 req
->set_inode(dir
);
11522 req
->head
.args
.mknod
.rdev
= rdev
;
11523 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11524 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11526 bufferlist xattrs_bl
;
11527 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11530 req
->head
.args
.mknod
.mode
= mode
;
11531 if (xattrs_bl
.length() > 0)
11532 req
->set_data(xattrs_bl
);
11535 res
= get_or_create(dir
, name
, &de
);
11538 req
->set_dentry(de
);
11540 res
= make_request(req
, perms
, inp
);
11544 ldout(cct
, 3) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11552 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
11553 dev_t rdev
, struct stat
*attr
, Inode
**out
,
11554 const UserPerm
& perms
)
11556 Mutex::Locker
lock(client_lock
);
11561 vinodeno_t vparent
= _get_vino(parent
);
11563 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
11564 tout(cct
) << "ll_mknod" << std::endl
;
11565 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11566 tout(cct
) << name
<< std::endl
;
11567 tout(cct
) << mode
<< std::endl
;
11568 tout(cct
) << rdev
<< std::endl
;
11570 if (!cct
->_conf
->fuse_default_permissions
) {
11571 int r
= may_create(parent
, perms
);
11577 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11579 fill_stat(in
, attr
);
11582 tout(cct
) << attr
->st_ino
<< std::endl
;
11583 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
11584 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11589 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
11590 dev_t rdev
, Inode
**out
,
11591 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11592 const UserPerm
& perms
)
11594 unsigned caps
= statx_to_mask(flags
, want
);
11595 Mutex::Locker
lock(client_lock
);
11600 vinodeno_t vparent
= _get_vino(parent
);
11602 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
11603 tout(cct
) << "ll_mknodx" << std::endl
;
11604 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11605 tout(cct
) << name
<< std::endl
;
11606 tout(cct
) << mode
<< std::endl
;
11607 tout(cct
) << rdev
<< std::endl
;
11609 if (!cct
->_conf
->fuse_default_permissions
) {
11610 int r
= may_create(parent
, perms
);
11616 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11618 fill_statx(in
, caps
, stx
);
11621 tout(cct
) << stx
->stx_ino
<< std::endl
;
11622 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
11623 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11628 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
11629 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
11630 int object_size
, const char *data_pool
, bool *created
,
11631 const UserPerm
& perms
)
11633 ldout(cct
, 3) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
11634 mode
<< dec
<< ")" << dendl
;
11636 if (strlen(name
) > NAME_MAX
)
11637 return -ENAMETOOLONG
;
11638 if (dir
->snapid
!= CEPH_NOSNAP
) {
11641 if (is_quota_files_exceeded(dir
, perms
)) {
11645 // use normalized flags to generate cmode
11646 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
11650 int64_t pool_id
= -1;
11651 if (data_pool
&& *data_pool
) {
11652 pool_id
= objecter
->with_osdmap(
11653 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
11656 if (pool_id
> 0xffffffffll
)
11657 return -ERANGE
; // bummer!
11660 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
11663 dir
->make_nosnap_relative_path(path
);
11664 path
.push_dentry(name
);
11665 req
->set_filepath(path
);
11666 req
->set_inode(dir
);
11667 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
| O_CREAT
);
11669 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
11670 req
->head
.args
.open
.stripe_count
= stripe_count
;
11671 req
->head
.args
.open
.object_size
= object_size
;
11672 if (cct
->_conf
->client_debug_getattr_caps
)
11673 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
11675 req
->head
.args
.open
.mask
= 0;
11676 req
->head
.args
.open
.pool
= pool_id
;
11677 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11678 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11681 bufferlist xattrs_bl
;
11682 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11685 req
->head
.args
.open
.mode
= mode
;
11686 if (xattrs_bl
.length() > 0)
11687 req
->set_data(xattrs_bl
);
11690 res
= get_or_create(dir
, name
, &de
);
11693 req
->set_dentry(de
);
11695 res
= make_request(req
, perms
, inp
, created
);
11700 /* If the caller passed a value in fhp, do the open */
11702 (*inp
)->get_open_ref(cmode
);
11703 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
11709 ldout(cct
, 3) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
11710 << " layout " << stripe_unit
11711 << ' ' << stripe_count
11712 << ' ' << object_size
11713 <<") = " << res
<< dendl
;
11722 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
11725 ldout(cct
, 3) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
11726 << mode
<< dec
<< ", uid " << perm
.uid()
11727 << ", gid " << perm
.gid() << ")" << dendl
;
11729 if (strlen(name
) > NAME_MAX
)
11730 return -ENAMETOOLONG
;
11732 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11735 if (is_quota_files_exceeded(dir
, perm
)) {
11738 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
?
11739 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
11742 dir
->make_nosnap_relative_path(path
);
11743 path
.push_dentry(name
);
11744 req
->set_filepath(path
);
11745 req
->set_inode(dir
);
11746 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11747 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11750 bufferlist xattrs_bl
;
11751 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perm
);
11754 req
->head
.args
.mkdir
.mode
= mode
;
11755 if (xattrs_bl
.length() > 0)
11756 req
->set_data(xattrs_bl
);
11759 res
= get_or_create(dir
, name
, &de
);
11762 req
->set_dentry(de
);
11764 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
11765 res
= make_request(req
, perm
, inp
);
11766 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
11770 ldout(cct
, 3) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11778 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
11779 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
11781 Mutex::Locker
lock(client_lock
);
11786 vinodeno_t vparent
= _get_vino(parent
);
11788 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
11789 tout(cct
) << "ll_mkdir" << std::endl
;
11790 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11791 tout(cct
) << name
<< std::endl
;
11792 tout(cct
) << mode
<< std::endl
;
11794 if (!cct
->_conf
->fuse_default_permissions
) {
11795 int r
= may_create(parent
, perm
);
11801 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
11803 fill_stat(in
, attr
);
11806 tout(cct
) << attr
->st_ino
<< std::endl
;
11807 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
11808 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11813 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
11814 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11815 const UserPerm
& perms
)
11817 Mutex::Locker
lock(client_lock
);
11822 vinodeno_t vparent
= _get_vino(parent
);
11824 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
11825 tout(cct
) << "ll_mkdirx" << std::endl
;
11826 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11827 tout(cct
) << name
<< std::endl
;
11828 tout(cct
) << mode
<< std::endl
;
11830 if (!cct
->_conf
->fuse_default_permissions
) {
11831 int r
= may_create(parent
, perms
);
11837 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
11839 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11845 tout(cct
) << stx
->stx_ino
<< std::endl
;
11846 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
11847 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11852 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
11853 const UserPerm
& perms
, InodeRef
*inp
)
11855 ldout(cct
, 3) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
11856 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
11859 if (strlen(name
) > NAME_MAX
)
11860 return -ENAMETOOLONG
;
11862 if (dir
->snapid
!= CEPH_NOSNAP
) {
11865 if (is_quota_files_exceeded(dir
, perms
)) {
11869 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
11872 dir
->make_nosnap_relative_path(path
);
11873 path
.push_dentry(name
);
11874 req
->set_filepath(path
);
11875 req
->set_inode(dir
);
11876 req
->set_string2(target
);
11877 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11878 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11881 int res
= get_or_create(dir
, name
, &de
);
11884 req
->set_dentry(de
);
11886 res
= make_request(req
, perms
, inp
);
11889 ldout(cct
, 3) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
11898 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
11899 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
11901 Mutex::Locker
lock(client_lock
);
11906 vinodeno_t vparent
= _get_vino(parent
);
11908 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
11910 tout(cct
) << "ll_symlink" << std::endl
;
11911 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11912 tout(cct
) << name
<< std::endl
;
11913 tout(cct
) << value
<< std::endl
;
11915 if (!cct
->_conf
->fuse_default_permissions
) {
11916 int r
= may_create(parent
, perms
);
11922 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11924 fill_stat(in
, attr
);
11927 tout(cct
) << attr
->st_ino
<< std::endl
;
11928 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
11929 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11934 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
11935 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
11936 unsigned flags
, const UserPerm
& perms
)
11938 Mutex::Locker
lock(client_lock
);
11943 vinodeno_t vparent
= _get_vino(parent
);
11945 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
11947 tout(cct
) << "ll_symlinkx" << std::endl
;
11948 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11949 tout(cct
) << name
<< std::endl
;
11950 tout(cct
) << value
<< std::endl
;
11952 if (!cct
->_conf
->fuse_default_permissions
) {
11953 int r
= may_create(parent
, perms
);
11959 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11961 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11964 tout(cct
) << stx
->stx_ino
<< std::endl
;
11965 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
11966 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11971 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
11973 ldout(cct
, 3) << "_unlink(" << dir
->ino
<< " " << name
11974 << " uid " << perm
.uid() << " gid " << perm
.gid()
11977 if (dir
->snapid
!= CEPH_NOSNAP
) {
11981 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
11984 dir
->make_nosnap_relative_path(path
);
11985 path
.push_dentry(name
);
11986 req
->set_filepath(path
);
11992 int res
= get_or_create(dir
, name
, &de
);
11995 req
->set_dentry(de
);
11996 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11997 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11999 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
12003 in
= otherin
.get();
12004 req
->set_other_inode(in
);
12005 in
->break_all_delegs();
12006 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12008 req
->set_inode(dir
);
12010 res
= make_request(req
, perm
);
12013 ldout(cct
, 3) << "unlink(" << path
<< ") = " << res
<< dendl
;
12021 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
12023 Mutex::Locker
lock(client_lock
);
12028 vinodeno_t vino
= _get_vino(in
);
12030 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
12031 tout(cct
) << "ll_unlink" << std::endl
;
12032 tout(cct
) << vino
.ino
.val
<< std::endl
;
12033 tout(cct
) << name
<< std::endl
;
12035 if (!cct
->_conf
->fuse_default_permissions
) {
12036 int r
= may_delete(in
, name
, perm
);
12040 return _unlink(in
, name
, perm
);
12043 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
12045 ldout(cct
, 3) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
12046 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
12048 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
12052 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
: CEPH_MDS_OP_RMDIR
;
12053 MetaRequest
*req
= new MetaRequest(op
);
12055 dir
->make_nosnap_relative_path(path
);
12056 path
.push_dentry(name
);
12057 req
->set_filepath(path
);
12059 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12060 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12061 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12066 int res
= get_or_create(dir
, name
, &de
);
12069 if (op
== CEPH_MDS_OP_RMDIR
)
12070 req
->set_dentry(de
);
12074 res
= _lookup(dir
, name
, 0, &in
, perms
);
12077 if (op
== CEPH_MDS_OP_RMDIR
) {
12078 req
->set_inode(dir
);
12079 req
->set_other_inode(in
.get());
12081 unlink(de
, true, true);
12083 req
->set_other_inode(in
.get());
12086 res
= make_request(req
, perms
);
12089 ldout(cct
, 3) << "rmdir(" << path
<< ") = " << res
<< dendl
;
12097 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
12099 Mutex::Locker
lock(client_lock
);
12104 vinodeno_t vino
= _get_vino(in
);
12106 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
12107 tout(cct
) << "ll_rmdir" << std::endl
;
12108 tout(cct
) << vino
.ino
.val
<< std::endl
;
12109 tout(cct
) << name
<< std::endl
;
12111 if (!cct
->_conf
->fuse_default_permissions
) {
12112 int r
= may_delete(in
, name
, perms
);
12117 return _rmdir(in
, name
, perms
);
12120 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
)
12122 ldout(cct
, 3) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
12123 << todir
->ino
<< " " << toname
12124 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
12127 if (fromdir
->snapid
!= todir
->snapid
)
12130 int op
= CEPH_MDS_OP_RENAME
;
12131 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
12132 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
12133 op
= CEPH_MDS_OP_RENAMESNAP
;
12137 if (fromdir
!= todir
) {
12138 Inode
*fromdir_root
=
12139 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
12140 Inode
*todir_root
=
12141 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
12142 if (fromdir_root
!= todir_root
) {
12148 MetaRequest
*req
= new MetaRequest(op
);
12151 fromdir
->make_nosnap_relative_path(from
);
12152 from
.push_dentry(fromname
);
12154 todir
->make_nosnap_relative_path(to
);
12155 to
.push_dentry(toname
);
12156 req
->set_filepath(to
);
12157 req
->set_filepath2(from
);
12160 int res
= get_or_create(fromdir
, fromname
, &oldde
);
12164 res
= get_or_create(todir
, toname
, &de
);
12168 if (op
== CEPH_MDS_OP_RENAME
) {
12169 req
->set_old_dentry(oldde
);
12170 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
12171 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
12173 req
->set_dentry(de
);
12174 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12175 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12177 InodeRef oldin
, otherin
;
12178 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
12182 Inode
*oldinode
= oldin
.get();
12183 oldinode
->break_all_delegs();
12184 req
->set_old_inode(oldinode
);
12185 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
12187 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
12191 Inode
*in
= otherin
.get();
12192 req
->set_other_inode(in
);
12193 in
->break_all_delegs();
12195 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12203 req
->set_inode(todir
);
12205 // renamesnap reply contains no tracedn, so we need to invalidate
12207 unlink(oldde
, true, true);
12208 unlink(de
, true, true);
12211 res
= make_request(req
, perm
, &target
);
12212 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
12214 // renamed item from our cache
12217 ldout(cct
, 3) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
12225 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
12226 const char *newname
, const UserPerm
& perm
)
12228 Mutex::Locker
lock(client_lock
);
12233 vinodeno_t vparent
= _get_vino(parent
);
12234 vinodeno_t vnewparent
= _get_vino(newparent
);
12236 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
12237 << vnewparent
<< " " << newname
<< dendl
;
12238 tout(cct
) << "ll_rename" << std::endl
;
12239 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12240 tout(cct
) << name
<< std::endl
;
12241 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
12242 tout(cct
) << newname
<< std::endl
;
12244 if (!cct
->_conf
->fuse_default_permissions
) {
12245 int r
= may_delete(parent
, name
, perm
);
12248 r
= may_delete(newparent
, newname
, perm
);
12249 if (r
< 0 && r
!= -ENOENT
)
12253 return _rename(parent
, name
, newparent
, newname
, perm
);
12256 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, InodeRef
*inp
)
12258 ldout(cct
, 3) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
12259 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
12261 if (strlen(newname
) > NAME_MAX
)
12262 return -ENAMETOOLONG
;
12264 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
12267 if (is_quota_files_exceeded(dir
, perm
)) {
12271 in
->break_all_delegs();
12272 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
12274 filepath
path(newname
, dir
->ino
);
12275 req
->set_filepath(path
);
12276 filepath
existing(in
->ino
);
12277 req
->set_filepath2(existing
);
12279 req
->set_inode(dir
);
12280 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
12281 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
12284 int res
= get_or_create(dir
, newname
, &de
);
12287 req
->set_dentry(de
);
12289 res
= make_request(req
, perm
, inp
);
12290 ldout(cct
, 10) << "link result is " << res
<< dendl
;
12293 ldout(cct
, 3) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
12301 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
12302 const UserPerm
& perm
)
12304 Mutex::Locker
lock(client_lock
);
12309 vinodeno_t vino
= _get_vino(in
);
12310 vinodeno_t vnewparent
= _get_vino(newparent
);
12312 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
12314 tout(cct
) << "ll_link" << std::endl
;
12315 tout(cct
) << vino
.ino
.val
<< std::endl
;
12316 tout(cct
) << vnewparent
<< std::endl
;
12317 tout(cct
) << newname
<< std::endl
;
12322 if (!cct
->_conf
->fuse_default_permissions
) {
12323 if (S_ISDIR(in
->mode
))
12326 r
= may_hardlink(in
, perm
);
12330 r
= may_create(newparent
, perm
);
12335 return _link(in
, newparent
, newname
, perm
, &target
);
12338 int Client::ll_num_osds(void)
12340 Mutex::Locker
lock(client_lock
);
12341 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
12344 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
12346 Mutex::Locker
lock(client_lock
);
12349 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
12350 if (!o
.exists(osd
))
12352 g
= o
.get_addr(osd
);
12357 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
12358 *addr
= ntohl(nb_addr
);
12362 uint32_t Client::ll_stripe_unit(Inode
*in
)
12364 Mutex::Locker
lock(client_lock
);
12365 return in
->layout
.stripe_unit
;
12368 uint64_t Client::ll_snap_seq(Inode
*in
)
12370 Mutex::Locker
lock(client_lock
);
12371 return in
->snaprealm
->seq
;
12374 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
12376 Mutex::Locker
lock(client_lock
);
12377 *layout
= in
->layout
;
12381 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
12383 return ll_file_layout(fh
->inode
.get(), layout
);
12386 /* Currently we cannot take advantage of redundancy in reads, since we
12387 would have to go through all possible placement groups (a
12388 potentially quite large number determined by a hash), and use CRUSH
12389 to calculate the appropriate set of OSDs for each placement group,
12390 then index into that. An array with one entry per OSD is much more
12391 tractable and works for demonstration purposes. */
12393 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
12394 file_layout_t
* layout
)
12396 Mutex::Locker
lock(client_lock
);
12398 inodeno_t ino
= ll_get_inodeno(in
);
12399 uint32_t object_size
= layout
->object_size
;
12400 uint32_t su
= layout
->stripe_unit
;
12401 uint32_t stripe_count
= layout
->stripe_count
;
12402 uint64_t stripes_per_object
= object_size
/ su
;
12404 uint64_t stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
12405 uint64_t stripepos
= blockno
% stripe_count
; // which object in the object set (X)
12406 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
12407 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
12409 object_t oid
= file_object_t(ino
, objectno
);
12410 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12411 ceph_object_layout olayout
=
12412 o
.file_to_object_layout(oid
, *layout
);
12413 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
12416 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
12421 /* Return the offset of the block, internal to the object */
12423 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
12425 Mutex::Locker
lock(client_lock
);
12426 file_layout_t
*layout
=&(in
->layout
);
12427 uint32_t object_size
= layout
->object_size
;
12428 uint32_t su
= layout
->stripe_unit
;
12429 uint64_t stripes_per_object
= object_size
/ su
;
12431 return (blockno
% stripes_per_object
) * su
;
12434 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
12435 const UserPerm
& perms
)
12437 Mutex::Locker
lock(client_lock
);
12442 vinodeno_t vino
= _get_vino(in
);
12444 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
12445 tout(cct
) << "ll_opendir" << std::endl
;
12446 tout(cct
) << vino
.ino
.val
<< std::endl
;
12448 if (!cct
->_conf
->fuse_default_permissions
) {
12449 int r
= may_open(in
, flags
, perms
);
12454 int r
= _opendir(in
, dirpp
, perms
);
12455 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
12457 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
12462 int Client::ll_releasedir(dir_result_t
*dirp
)
12464 Mutex::Locker
lock(client_lock
);
12465 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
12466 tout(cct
) << "ll_releasedir" << std::endl
;
12467 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12476 int Client::ll_fsyncdir(dir_result_t
*dirp
)
12478 Mutex::Locker
lock(client_lock
);
12479 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
12480 tout(cct
) << "ll_fsyncdir" << std::endl
;
12481 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12486 return _fsync(dirp
->inode
.get(), false);
12489 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
12491 assert(!(flags
& O_CREAT
));
12493 Mutex::Locker
lock(client_lock
);
12498 vinodeno_t vino
= _get_vino(in
);
12500 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
12501 tout(cct
) << "ll_open" << std::endl
;
12502 tout(cct
) << vino
.ino
.val
<< std::endl
;
12503 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12506 if (!cct
->_conf
->fuse_default_permissions
) {
12507 r
= may_open(in
, flags
, perms
);
12512 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
12515 Fh
*fhptr
= fhp
? *fhp
: NULL
;
12517 ll_unclosed_fh_set
.insert(fhptr
);
12519 tout(cct
) << (unsigned long)fhptr
<< std::endl
;
12520 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
12521 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
12525 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12526 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
12527 const UserPerm
& perms
)
12531 vinodeno_t vparent
= _get_vino(parent
);
12533 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12534 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
12535 << ", gid " << perms
.gid() << dendl
;
12536 tout(cct
) << "ll_create" << std::endl
;
12537 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12538 tout(cct
) << name
<< std::endl
;
12539 tout(cct
) << mode
<< std::endl
;
12540 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12542 bool created
= false;
12543 int r
= _lookup(parent
, name
, caps
, in
, perms
);
12545 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
12548 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
12549 if (!cct
->_conf
->fuse_default_permissions
) {
12550 r
= may_create(parent
, perms
);
12554 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
12565 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
12567 if (!cct
->_conf
->fuse_default_permissions
) {
12568 r
= may_open(in
->get(), flags
, perms
);
12571 int release_r
= _release_fh(*fhp
);
12572 assert(release_r
== 0); // during create, no async data ops should have happened
12577 if (*fhp
== NULL
) {
12578 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
12586 ll_unclosed_fh_set
.insert(*fhp
);
12591 Inode
*inode
= in
->get();
12592 if (use_faked_inos())
12593 ino
= inode
->faked_ino
;
12598 tout(cct
) << (unsigned long)*fhp
<< std::endl
;
12599 tout(cct
) << ino
<< std::endl
;
12600 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12601 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
12602 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
12607 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12608 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
12609 const UserPerm
& perms
)
12611 Mutex::Locker
lock(client_lock
);
12617 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
12622 // passing an Inode in outp requires an additional ref
12627 fill_stat(in
, attr
);
12635 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
12636 int oflags
, Inode
**outp
, Fh
**fhp
,
12637 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
12638 const UserPerm
& perms
)
12640 unsigned caps
= statx_to_mask(lflags
, want
);
12641 Mutex::Locker
lock(client_lock
);
12647 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
12651 // passing an Inode in outp requires an additional ref
12656 fill_statx(in
, caps
, stx
);
12665 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
12667 Mutex::Locker
lock(client_lock
);
12668 tout(cct
) << "ll_lseek" << std::endl
;
12669 tout(cct
) << offset
<< std::endl
;
12670 tout(cct
) << whence
<< std::endl
;
12675 return _lseek(fh
, offset
, whence
);
12678 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
12680 Mutex::Locker
lock(client_lock
);
12681 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
12682 tout(cct
) << "ll_read" << std::endl
;
12683 tout(cct
) << (unsigned long)fh
<< std::endl
;
12684 tout(cct
) << off
<< std::endl
;
12685 tout(cct
) << len
<< std::endl
;
12690 return _read(fh
, off
, len
, bl
);
12693 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
12697 file_layout_t
* layout
)
12699 Mutex::Locker
lock(client_lock
);
12704 vinodeno_t vino
= _get_vino(in
);
12705 object_t oid
= file_object_t(vino
.ino
, blockid
);
12706 C_SaferCond onfinish
;
12709 objecter
->read(oid
,
12710 object_locator_t(layout
->pool_id
),
12715 CEPH_OSD_FLAG_READ
,
12718 client_lock
.Unlock();
12719 int r
= onfinish
.wait();
12720 client_lock
.Lock();
12723 bl
.copy(0, bl
.length(), buf
);
12730 /* It appears that the OSD doesn't return success unless the entire
12731 buffer was written, return the write length on success. */
12733 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
12734 char* buf
, uint64_t offset
,
12735 uint64_t length
, file_layout_t
* layout
,
12736 uint64_t snapseq
, uint32_t sync
)
12738 Mutex
flock("Client::ll_write_block flock");
12739 vinodeno_t vino
= ll_get_vino(in
);
12743 Context
*onsafe
= nullptr;
12748 if (true || sync
) {
12749 /* if write is stable, the epilogue is waiting on
12751 onsafe
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
12754 /* if write is unstable, we just place a barrier for
12755 * future commits to wait on */
12756 /*onsafe = new C_Block_Sync(this, vino.ino,
12757 barrier_interval(offset, offset + length), &r);
12761 object_t oid
= file_object_t(vino
.ino
, blockid
);
12762 SnapContext fakesnap
;
12764 if (length
> 0) bp
= buffer::copy(buf
, length
);
12768 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
12771 fakesnap
.seq
= snapseq
;
12773 /* lock just in time */
12774 client_lock
.Lock();
12776 client_lock
.Unlock();
12781 objecter
->write(oid
,
12782 object_locator_t(layout
->pool_id
),
12787 ceph::real_clock::now(),
12791 client_lock
.Unlock();
12792 if (!done
/* also !sync */) {
12806 int Client::ll_commit_blocks(Inode
*in
,
12810 Mutex::Locker
lock(client_lock
);
12812 BarrierContext *bctx;
12813 vinodeno_t vino = _get_vino(in);
12814 uint64_t ino = vino.ino;
12816 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
12817 << offset << " to " << length << dendl;
12823 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
12824 if (p != barriers.end()) {
12825 barrier_interval civ(offset, offset + length);
12826 p->second->commit_barrier(civ);
12832 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
12834 Mutex::Locker
lock(client_lock
);
12835 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
12836 "~" << len
<< dendl
;
12837 tout(cct
) << "ll_write" << std::endl
;
12838 tout(cct
) << (unsigned long)fh
<< std::endl
;
12839 tout(cct
) << off
<< std::endl
;
12840 tout(cct
) << len
<< std::endl
;
12845 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
12846 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
12851 int Client::ll_flush(Fh
*fh
)
12853 Mutex::Locker
lock(client_lock
);
12854 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12855 tout(cct
) << "ll_flush" << std::endl
;
12856 tout(cct
) << (unsigned long)fh
<< std::endl
;
12864 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
12866 Mutex::Locker
lock(client_lock
);
12867 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12868 tout(cct
) << "ll_fsync" << std::endl
;
12869 tout(cct
) << (unsigned long)fh
<< std::endl
;
12874 int r
= _fsync(fh
, syncdataonly
);
12876 // If we're returning an error, clear it from the FH
12877 fh
->take_async_err();
12882 #ifdef FALLOC_FL_PUNCH_HOLE
12884 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12886 if (offset
< 0 || length
<= 0)
12889 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
12890 return -EOPNOTSUPP
;
12892 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
12893 return -EOPNOTSUPP
;
12895 Inode
*in
= fh
->inode
.get();
12897 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
12898 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
12902 if (in
->snapid
!= CEPH_NOSNAP
)
12905 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
12908 uint64_t size
= offset
+ length
;
12909 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
12911 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
)) {
12916 int r
= get_caps(in
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
12920 Mutex
uninline_flock("Client::_fallocate_uninline_data flock");
12921 Cond uninline_cond
;
12922 bool uninline_done
= false;
12923 int uninline_ret
= 0;
12924 Context
*onuninline
= NULL
;
12926 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
12927 if (in
->inline_version
< CEPH_INLINE_NONE
&&
12928 (have
& CEPH_CAP_FILE_BUFFER
)) {
12930 int len
= in
->inline_data
.length();
12931 if (offset
< len
) {
12933 in
->inline_data
.copy(0, offset
, bl
);
12935 if (offset
+ size
> len
)
12936 size
= len
- offset
;
12938 bl
.append_zero(size
);
12939 if (offset
+ size
< len
)
12940 in
->inline_data
.copy(offset
+ size
, len
- offset
- size
, bl
);
12941 in
->inline_data
= bl
;
12942 in
->inline_version
++;
12944 in
->mtime
= ceph_clock_now();
12946 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12948 if (in
->inline_version
< CEPH_INLINE_NONE
) {
12949 onuninline
= new C_SafeCond(&uninline_flock
,
12953 uninline_data(in
, onuninline
);
12956 Mutex
flock("Client::_punch_hole flock");
12959 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
12961 unsafe_sync_write
++;
12962 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
12964 _invalidate_inode_cache(in
, offset
, length
);
12965 filer
->zero(in
->ino
, &in
->layout
,
12966 in
->snaprealm
->get_snap_context(),
12968 ceph::real_clock::now(),
12969 0, true, onfinish
);
12970 in
->mtime
= ceph_clock_now();
12972 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12974 client_lock
.Unlock();
12979 client_lock
.Lock();
12980 _sync_write_commit(in
);
12982 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
12983 uint64_t size
= offset
+ length
;
12984 if (size
> in
->size
) {
12986 in
->mtime
= ceph_clock_now();
12988 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12990 if (is_quota_bytes_approaching(in
, fh
->actor_perms
)) {
12991 check_caps(in
, CHECK_CAPS_NODELAY
);
12992 } else if (is_max_size_approaching(in
)) {
12999 client_lock
.Unlock();
13000 uninline_flock
.Lock();
13001 while (!uninline_done
)
13002 uninline_cond
.Wait(uninline_flock
);
13003 uninline_flock
.Unlock();
13004 client_lock
.Lock();
13006 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
13007 in
->inline_data
.clear();
13008 in
->inline_version
= CEPH_INLINE_NONE
;
13009 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
13015 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
13020 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
13022 return -EOPNOTSUPP
;
13028 int Client::ll_fallocate(Fh
*fh
, int mode
, loff_t offset
, loff_t length
)
13030 Mutex::Locker
lock(client_lock
);
13031 ldout(cct
, 3) << "ll_fallocate " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
13032 tout(cct
) << "ll_fallocate " << mode
<< " " << offset
<< " " << length
<< std::endl
;
13033 tout(cct
) << (unsigned long)fh
<< std::endl
;
13038 return _fallocate(fh
, mode
, offset
, length
);
13041 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
13043 Mutex::Locker
lock(client_lock
);
13044 tout(cct
) << "fallocate " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
13049 Fh
*fh
= get_filehandle(fd
);
13052 #if defined(__linux__) && defined(O_PATH)
13053 if (fh
->flags
& O_PATH
)
13056 return _fallocate(fh
, mode
, offset
, length
);
13059 int Client::ll_release(Fh
*fh
)
13061 Mutex::Locker
lock(client_lock
);
13062 ldout(cct
, 3) << "ll_release (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
13064 tout(cct
) << "ll_release (fh)" << std::endl
;
13065 tout(cct
) << (unsigned long)fh
<< std::endl
;
13070 if (ll_unclosed_fh_set
.count(fh
))
13071 ll_unclosed_fh_set
.erase(fh
);
13072 return _release_fh(fh
);
13075 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
13077 Mutex::Locker
lock(client_lock
);
13079 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
13080 tout(cct
) << "ll_getk (fh)" << (unsigned long)fh
<< std::endl
;
13085 return _getlk(fh
, fl
, owner
);
13088 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
13090 Mutex::Locker
lock(client_lock
);
13092 ldout(cct
, 3) << "ll_setlk (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13093 tout(cct
) << "ll_setk (fh)" << (unsigned long)fh
<< std::endl
;
13098 return _setlk(fh
, fl
, owner
, sleep
);
13101 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
13103 Mutex::Locker
lock(client_lock
);
13105 ldout(cct
, 3) << "ll_flock (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13106 tout(cct
) << "ll_flock (fh)" << (unsigned long)fh
<< std::endl
;
13111 return _flock(fh
, cmd
, owner
);
13114 int Client::set_deleg_timeout(uint32_t timeout
)
13116 Mutex::Locker
lock(client_lock
);
13119 * The whole point is to prevent blacklisting so we must time out the
13120 * delegation before the session autoclose timeout kicks in.
13122 if (timeout
>= mdsmap
->get_session_autoclose())
13125 deleg_timeout
= timeout
;
13129 int Client::ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
)
13133 Mutex::Locker
lock(client_lock
);
13138 Inode
*inode
= fh
->inode
.get();
13141 case CEPH_DELEGATION_NONE
:
13142 inode
->unset_deleg(fh
);
13147 ret
= inode
->set_deleg(fh
, cmd
, cb
, priv
);
13148 } catch (std::bad_alloc
) {
13156 class C_Client_RequestInterrupt
: public Context
{
13161 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
13164 void finish(int r
) override
{
13165 Mutex::Locker
l(client
->client_lock
);
13166 assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
13167 client
->_interrupt_filelock(req
);
13168 client
->put_request(req
);
13172 void Client::ll_interrupt(void *d
)
13174 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
13175 ldout(cct
, 3) << "ll_interrupt tid " << req
->get_tid() << dendl
;
13176 tout(cct
) << "ll_interrupt tid " << req
->get_tid() << std::endl
;
13177 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
13180 // =========================================
13183 // expose file layouts
13185 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
13186 const UserPerm
& perms
)
13188 Mutex::Locker
lock(client_lock
);
13193 filepath
path(relpath
);
13195 int r
= path_walk(path
, &in
, perms
);
13201 ldout(cct
, 3) << "describe_layout(" << relpath
<< ") = 0" << dendl
;
13205 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
13207 Mutex::Locker
lock(client_lock
);
13212 Fh
*f
= get_filehandle(fd
);
13215 Inode
*in
= f
->inode
.get();
13219 ldout(cct
, 3) << "fdescribe_layout(" << fd
<< ") = 0" << dendl
;
13223 int64_t Client::get_default_pool_id()
13225 Mutex::Locker
lock(client_lock
);
13230 /* first data pool is the default */
13231 return mdsmap
->get_first_data_pool();
13236 int64_t Client::get_pool_id(const char *pool_name
)
13238 Mutex::Locker
lock(client_lock
);
13243 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
13247 string
Client::get_pool_name(int64_t pool
)
13249 Mutex::Locker
lock(client_lock
);
13254 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13255 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
13259 int Client::get_pool_replication(int64_t pool
)
13261 Mutex::Locker
lock(client_lock
);
13266 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13267 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -ENOENT
;
13271 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
13273 Mutex::Locker
lock(client_lock
);
13278 Fh
*f
= get_filehandle(fd
);
13281 Inode
*in
= f
->inode
.get();
13283 vector
<ObjectExtent
> extents
;
13284 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
13285 assert(extents
.size() == 1);
13287 objecter
->with_osdmap([&](const OSDMap
& o
) {
13288 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13289 o
.pg_to_acting_osds(pg
, osds
);
13296 * Return the remainder of the extent (stripe unit)
13298 * If length = 1 is passed to Striper::file_to_extents we get a single
13299 * extent back, but its length is one so we still need to compute the length
13300 * to the end of the stripe unit.
13302 * If length = su then we may get 1 or 2 objects back in the extents vector
13303 * which would have to be examined. Even then, the offsets are local to the
13304 * object, so matching up to the file offset is extra work.
13306 * It seems simpler to stick with length = 1 and manually compute the
13310 uint64_t su
= in
->layout
.stripe_unit
;
13311 *len
= su
- (off
% su
);
13317 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
13319 Mutex::Locker
lock(client_lock
);
13326 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13327 return o
.crush
->get_full_location_ordered(id
, path
);
13331 int Client::get_file_stripe_address(int fd
, loff_t offset
,
13332 vector
<entity_addr_t
>& address
)
13334 Mutex::Locker
lock(client_lock
);
13339 Fh
*f
= get_filehandle(fd
);
13342 Inode
*in
= f
->inode
.get();
13345 vector
<ObjectExtent
> extents
;
13346 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
13347 in
->truncate_size
, extents
);
13348 assert(extents
.size() == 1);
13350 // now we have the object and its 'layout'
13351 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13352 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13354 o
.pg_to_acting_osds(pg
, osds
);
13357 for (unsigned i
= 0; i
< osds
.size(); i
++) {
13358 entity_addr_t addr
= o
.get_addr(osds
[i
]);
13359 address
.push_back(addr
);
13365 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
13367 Mutex::Locker
lock(client_lock
);
13372 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13373 if (!o
.exists(osd
))
13376 addr
= o
.get_addr(osd
);
13381 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
13382 loff_t length
, loff_t offset
)
13384 Mutex::Locker
lock(client_lock
);
13389 Fh
*f
= get_filehandle(fd
);
13392 Inode
*in
= f
->inode
.get();
13394 // map to a list of extents
13395 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
13397 ldout(cct
, 3) << "enumerate_layout(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
13402 /* find an osd with the same ip. -ENXIO if none. */
13403 int Client::get_local_osd()
13405 Mutex::Locker
lock(client_lock
);
13410 objecter
->with_osdmap([this](const OSDMap
& o
) {
13411 if (o
.get_epoch() != local_osd_epoch
) {
13412 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddr());
13413 local_osd_epoch
= o
.get_epoch();
13424 // ===============================
13426 void Client::ms_handle_connect(Connection
*con
)
13428 ldout(cct
, 10) << "ms_handle_connect on " << con
->get_peer_addr() << dendl
;
13431 bool Client::ms_handle_reset(Connection
*con
)
13433 ldout(cct
, 0) << "ms_handle_reset on " << con
->get_peer_addr() << dendl
;
13437 void Client::ms_handle_remote_reset(Connection
*con
)
13439 ldout(cct
, 0) << "ms_handle_remote_reset on " << con
->get_peer_addr() << dendl
;
13440 Mutex::Locker
l(client_lock
);
13441 switch (con
->get_peer_type()) {
13442 case CEPH_ENTITY_TYPE_MDS
:
13444 // kludge to figure out which mds this is; fixme with a Connection* state
13445 mds_rank_t mds
= MDS_RANK_NONE
;
13446 MetaSession
*s
= NULL
;
13447 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
13448 p
!= mds_sessions
.end();
13450 if (mdsmap
->get_addr(p
->first
) == con
->get_peer_addr()) {
13456 assert (s
!= NULL
);
13457 switch (s
->state
) {
13458 case MetaSession::STATE_CLOSING
:
13459 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
13460 _closed_mds_session(s
);
13463 case MetaSession::STATE_OPENING
:
13465 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
13466 list
<Context
*> waiters
;
13467 waiters
.swap(s
->waiting_for_open
);
13468 _closed_mds_session(s
);
13469 MetaSession
*news
= _get_or_open_mds_session(mds
);
13470 news
->waiting_for_open
.swap(waiters
);
13474 case MetaSession::STATE_OPEN
:
13476 const md_config_t
*conf
= cct
->_conf
;
13477 if (conf
->client_reconnect_stale
) {
13478 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
13479 _closed_mds_session(s
);
13481 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
13482 s
->state
= MetaSession::STATE_STALE
;
13487 case MetaSession::STATE_NEW
:
13488 case MetaSession::STATE_CLOSED
:
13498 bool Client::ms_handle_refused(Connection
*con
)
13500 ldout(cct
, 1) << "ms_handle_refused on " << con
->get_peer_addr() << dendl
;
13504 bool Client::ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
)
13506 if (dest_type
== CEPH_ENTITY_TYPE_MON
)
13508 *authorizer
= monclient
->build_authorizer(dest_type
);
13512 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
13515 utime_t now
= ceph_clock_now();
13518 if (cur
!= in
&& cur
->quota
.is_enable())
13521 Inode
*parent_in
= NULL
;
13522 if (!cur
->dn_set
.empty()) {
13523 for (auto p
= cur
->dn_set
.begin(); p
!= cur
->dn_set
.end(); ++p
) {
13525 if (dn
->lease_mds
>= 0 &&
13526 dn
->lease_ttl
> now
&&
13527 mds_sessions
.count(dn
->lease_mds
)) {
13528 parent_in
= dn
->dir
->parent_inode
;
13530 Inode
*diri
= dn
->dir
->parent_inode
;
13531 if (diri
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
13532 diri
->shared_gen
== dn
->cap_shared_gen
) {
13533 parent_in
= dn
->dir
->parent_inode
;
13539 } else if (root_parents
.count(cur
)) {
13540 parent_in
= root_parents
[cur
].get();
13548 if (cur
== root_ancestor
)
13552 if (cur
->nlink
== 0) {
13553 cur
= root_ancestor
;
13557 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
13558 filepath
path(cur
->ino
);
13559 req
->set_filepath(path
);
13560 req
->set_inode(cur
);
13562 InodeRef parent_ref
;
13563 int ret
= make_request(req
, perms
, &parent_ref
);
13565 ldout(cct
, 1) << __func__
<< " " << in
->vino()
13566 << " failed to find parent of " << cur
->vino()
13567 << " err " << ret
<< dendl
;
13568 // FIXME: what to do?
13569 cur
= root_ancestor
;
13573 now
= ceph_clock_now();
13575 cur
= parent_ref
.get();
13577 cur
= in
; // start over
13580 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << cur
->vino() << dendl
;
13585 * Traverse quota ancestors of the Inode, return true
13586 * if any of them passes the passed function
13588 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
13589 std::function
<bool (const Inode
&in
)> test
)
13592 assert(in
!= NULL
);
13597 if (in
== root_ancestor
) {
13598 // We're done traversing, drop out
13601 // Continue up the tree
13602 in
= get_quota_root(in
, perms
);
13609 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
13611 return check_quota_condition(in
, perms
,
13612 [](const Inode
&in
) {
13613 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
13617 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
13618 const UserPerm
& perms
)
13620 return check_quota_condition(in
, perms
,
13621 [&new_bytes
](const Inode
&in
) {
13622 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
13623 > in
.quota
.max_bytes
;
13627 bool Client::is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
)
13629 return check_quota_condition(in
, perms
,
13630 [](const Inode
&in
) {
13631 if (in
.quota
.max_bytes
) {
13632 if (in
.rstat
.rbytes
>= in
.quota
.max_bytes
) {
13636 assert(in
.size
>= in
.reported_size
);
13637 const uint64_t space
= in
.quota
.max_bytes
- in
.rstat
.rbytes
;
13638 const uint64_t size
= in
.size
- in
.reported_size
;
13639 return (space
>> 4) < size
;
13653 int Client::check_pool_perm(Inode
*in
, int need
)
13655 if (!cct
->_conf
->client_check_pool_perm
)
13658 int64_t pool_id
= in
->layout
.pool_id
;
13659 std::string pool_ns
= in
->layout
.pool_ns
;
13660 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
13663 auto it
= pool_perms
.find(perm_key
);
13664 if (it
== pool_perms
.end())
13666 if (it
->second
== POOL_CHECKING
) {
13667 // avoid concurrent checkings
13668 wait_on_list(waiting_for_pool_perm
);
13671 assert(have
& POOL_CHECKED
);
13677 if (in
->snapid
!= CEPH_NOSNAP
) {
13678 // pool permission check needs to write to the first object. But for snapshot,
13679 // head of the first object may have alread been deleted. To avoid creating
13680 // orphan object, skip the check for now.
13684 pool_perms
[perm_key
] = POOL_CHECKING
;
13687 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
13688 object_t oid
= oid_buf
;
13690 SnapContext nullsnapc
;
13692 C_SaferCond rd_cond
;
13693 ObjectOperation rd_op
;
13694 rd_op
.stat(NULL
, (ceph::real_time
*)nullptr, NULL
);
13696 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
13697 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
13699 C_SaferCond wr_cond
;
13700 ObjectOperation wr_op
;
13701 wr_op
.create(true);
13703 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
13704 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
13706 client_lock
.Unlock();
13707 int rd_ret
= rd_cond
.wait();
13708 int wr_ret
= wr_cond
.wait();
13709 client_lock
.Lock();
13711 bool errored
= false;
13713 if (rd_ret
== 0 || rd_ret
== -ENOENT
)
13715 else if (rd_ret
!= -EPERM
) {
13716 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13717 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13721 if (wr_ret
== 0 || wr_ret
== -EEXIST
)
13722 have
|= POOL_WRITE
;
13723 else if (wr_ret
!= -EPERM
) {
13724 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13725 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13730 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
13731 // Raise EIO because actual error code might be misleading for
13732 // userspace filesystem user.
13733 pool_perms
.erase(perm_key
);
13734 signal_cond_list(waiting_for_pool_perm
);
13738 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
13739 signal_cond_list(waiting_for_pool_perm
);
13742 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
13743 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13744 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
13747 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
13748 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13749 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
13756 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
13758 if (acl_type
== POSIX_ACL
) {
13759 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13760 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13762 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
13768 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
13770 if (acl_type
== NO_ACL
)
13773 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
13777 if (acl_type
== POSIX_ACL
) {
13778 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13779 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13780 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
13781 r
= posix_acl_access_chmod(acl
, mode
);
13784 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
13790 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
13794 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
13795 const UserPerm
& perms
)
13797 if (acl_type
== NO_ACL
)
13800 if (S_ISLNK(*mode
))
13803 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
13807 if (acl_type
== POSIX_ACL
) {
13808 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
13809 map
<string
, bufferptr
> xattrs
;
13811 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
13812 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
13813 r
= posix_acl_inherit_mode(acl
, mode
);
13818 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
13822 xattrs
[ACL_EA_ACCESS
] = acl
;
13825 if (S_ISDIR(*mode
))
13826 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
13830 ::encode(xattrs
, xattrs_bl
);
13833 *mode
&= ~umask_cb(callback_handle
);
13838 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
13842 void Client::set_filer_flags(int flags
)
13844 Mutex::Locker
l(client_lock
);
13845 assert(flags
== 0 ||
13846 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13847 objecter
->add_global_op_flags(flags
);
13850 void Client::clear_filer_flags(int flags
)
13852 Mutex::Locker
l(client_lock
);
13853 assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13854 objecter
->clear_global_op_flag(flags
);
13858 * This is included in cap release messages, to cause
13859 * the MDS to wait until this OSD map epoch. It is necessary
13860 * in corner cases where we cancel RADOS ops, so that
13861 * nobody else tries to do IO to the same objects in
13862 * the same epoch as the cancelled ops.
13864 void Client::set_cap_epoch_barrier(epoch_t e
)
13866 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
13867 cap_epoch_barrier
= e
;
13870 const char** Client::get_tracked_conf_keys() const
13872 static const char* keys
[] = {
13873 "client_cache_size",
13874 "client_cache_mid",
13876 "client_deleg_timeout",
13877 "client_deleg_break_on_open",
13883 void Client::handle_conf_change(const struct md_config_t
*conf
,
13884 const std::set
<std::string
> &changed
)
13886 Mutex::Locker
lock(client_lock
);
13888 if (changed
.count("client_cache_mid")) {
13889 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
13891 if (changed
.count("client_acl_type")) {
13893 if (cct
->_conf
->client_acl_type
== "posix_acl")
13894 acl_type
= POSIX_ACL
;
13898 void Client::init_groups(UserPerm
*perms
)
13901 int count
= _getgrouplist(&sgids
, perms
->uid(), perms
->gid());
13902 perms
->init_gids(sgids
, count
);
13905 void intrusive_ptr_add_ref(Inode
*in
)
13910 void intrusive_ptr_release(Inode
*in
)
13912 in
->client
->put_inode(in
);
13915 mds_rank_t
Client::_get_random_up_mds() const
13917 assert(client_lock
.is_locked_by_me());
13919 std::set
<mds_rank_t
> up
;
13920 mdsmap
->get_up_mds_set(up
);
13923 return MDS_RANK_NONE
;
13924 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
13925 for (int n
= rand() % up
.size(); n
; n
--)
13931 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
)
13932 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, NULL
, 0, 0))
13934 monclient
->set_messenger(m
);
13935 objecter
->set_client_incarnation(0);
13938 StandaloneClient::~StandaloneClient()
13941 objecter
= nullptr;
13944 int StandaloneClient::init()
13947 objectcacher
->start();
13950 client_lock
.Lock();
13951 assert(!initialized
);
13953 messenger
->add_dispatcher_tail(objecter
);
13954 messenger
->add_dispatcher_tail(this);
13956 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
13957 int r
= monclient
->init();
13959 // need to do cleanup because we're in an intermediate init state
13961 client_lock
.Unlock();
13962 objecter
->shutdown();
13963 objectcacher
->stop();
13964 monclient
->shutdown();
13969 client_lock
.Unlock();
13975 void StandaloneClient::shutdown()
13977 Client::shutdown();
13978 objecter
->shutdown();
13979 monclient
->shutdown();