1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
22 #include <sys/param.h>
25 #include <sys/utsname.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/fusion/include/std_pair.hpp>
31 #if defined(__FreeBSD__)
32 #define XATTR_CREATE 0x1
33 #define XATTR_REPLACE 0x2
35 #include <sys/xattr.h>
38 #if defined(__linux__)
39 #include <linux/falloc.h>
42 #include <sys/statvfs.h>
44 #include "common/config.h"
45 #include "common/version.h"
48 #include "messages/MClientSession.h"
49 #include "messages/MClientReconnect.h"
50 #include "messages/MClientRequest.h"
51 #include "messages/MClientRequestForward.h"
52 #include "messages/MClientReply.h"
53 #include "messages/MClientCaps.h"
54 #include "messages/MClientLease.h"
55 #include "messages/MClientSnap.h"
56 #include "messages/MCommandReply.h"
57 #include "messages/MOSDMap.h"
58 #include "messages/MClientQuota.h"
59 #include "messages/MClientCapRelease.h"
60 #include "messages/MMDSMap.h"
61 #include "messages/MFSMap.h"
62 #include "messages/MFSMapUser.h"
64 #include "mon/MonClient.h"
66 #include "mds/flock.h"
67 #include "osd/OSDMap.h"
68 #include "osdc/Filer.h"
70 #include "common/Cond.h"
71 #include "common/Mutex.h"
72 #include "common/perf_counters.h"
73 #include "common/admin_socket.h"
74 #include "common/errno.h"
75 #include "include/str_list.h"
77 #define dout_subsys ceph_subsys_client
79 #include "include/lru.h"
80 #include "include/compat.h"
81 #include "include/stringify.h"
86 #include "Delegation.h"
88 #include "ClientSnapRealm.h"
90 #include "MetaSession.h"
91 #include "MetaRequest.h"
92 #include "ObjecterWriteback.h"
93 #include "posix_acl.h"
95 #include "include/assert.h"
96 #include "include/stat.h"
98 #include "include/cephfs/ceph_statx.h"
100 #if HAVE_GETGROUPLIST
107 #define dout_prefix *_dout << "client." << whoami << " "
109 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
111 // FreeBSD fails to define this
115 // Darwin fails to define this
124 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
126 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
128 Client
*client
= static_cast<Client
*>(p
);
129 client
->flush_set_callback(oset
);
135 Client::CommandHook::CommandHook(Client
*client
) :
140 bool Client::CommandHook::call(std::string command
, cmdmap_t
& cmdmap
,
141 std::string format
, bufferlist
& out
)
143 Formatter
*f
= Formatter::create(format
);
144 f
->open_object_section("result");
145 m_client
->client_lock
.Lock();
146 if (command
== "mds_requests")
147 m_client
->dump_mds_requests(f
);
148 else if (command
== "mds_sessions")
149 m_client
->dump_mds_sessions(f
);
150 else if (command
== "dump_cache")
151 m_client
->dump_cache(f
);
152 else if (command
== "kick_stale_sessions")
153 m_client
->_kick_stale_sessions();
154 else if (command
== "status")
155 m_client
->dump_status(f
);
157 assert(0 == "bad command registered");
158 m_client
->client_lock
.Unlock();
168 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
169 : inode(in
), offset(0), next_offset(2),
170 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
174 void Client::_reset_faked_inos()
177 free_faked_inos
.clear();
178 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
179 last_used_faked_ino
= 0;
180 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
183 void Client::_assign_faked_ino(Inode
*in
)
185 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
186 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
187 last_used_faked_ino
= 0;
188 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
190 assert(it
!= free_faked_inos
.end());
191 if (last_used_faked_ino
< it
.get_start()) {
192 assert(it
.get_len() > 0);
193 last_used_faked_ino
= it
.get_start();
195 ++last_used_faked_ino
;
196 assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
198 in
->faked_ino
= last_used_faked_ino
;
199 free_faked_inos
.erase(in
->faked_ino
);
200 faked_ino_map
[in
->faked_ino
] = in
->vino();
203 void Client::_release_faked_ino(Inode
*in
)
205 free_faked_inos
.insert(in
->faked_ino
);
206 faked_ino_map
.erase(in
->faked_ino
);
209 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
214 else if (faked_ino_map
.count(ino
))
215 vino
= faked_ino_map
[ino
];
217 vino
= vinodeno_t(0, CEPH_NOSNAP
);
218 ldout(cct
, 10) << "map_faked_ino " << ino
<< " -> " << vino
<< dendl
;
222 vinodeno_t
Client::map_faked_ino(ino_t ino
)
224 Mutex::Locker
lock(client_lock
);
225 return _map_faked_ino(ino
);
230 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
231 : Dispatcher(m
->cct
),
232 m_command_hook(this),
233 timer(m
->cct
, client_lock
),
234 callback_handle(NULL
),
235 switch_interrupt_cb(NULL
),
237 ino_invalidate_cb(NULL
),
238 dentry_invalidate_cb(NULL
),
240 can_invalidate_dentries(false),
241 async_ino_invalidator(m
->cct
),
242 async_dentry_invalidator(m
->cct
),
243 interrupt_finisher(m
->cct
),
244 remount_finisher(m
->cct
),
245 objecter_finisher(m
->cct
),
247 messenger(m
), monclient(mc
),
249 whoami(mc
->get_global_id()), cap_epoch_barrier(0),
250 last_tid(0), oldest_tid(0), last_flush_tid(1),
252 mounted(false), unmounting(false), blacklisted(false),
253 local_osd(-ENXIO
), local_osd_epoch(0),
254 unsafe_sync_write(0),
255 client_lock("Client::client_lock"),
262 num_flushing_caps
= 0;
264 _dir_vxattrs_name_size
= _vxattrs_calcu_name_size(_dir_vxattrs
);
265 _file_vxattrs_name_size
= _vxattrs_calcu_name_size(_file_vxattrs
);
267 user_id
= cct
->_conf
->client_mount_uid
;
268 group_id
= cct
->_conf
->client_mount_gid
;
271 if (cct
->_conf
->client_acl_type
== "posix_acl")
272 acl_type
= POSIX_ACL
;
274 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
277 free_fd_set
.insert(10, 1<<30);
279 mdsmap
.reset(new MDSMap
);
282 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
284 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
285 client_flush_set_callback
, // all commit callback
287 cct
->_conf
->client_oc_size
,
288 cct
->_conf
->client_oc_max_objects
,
289 cct
->_conf
->client_oc_max_dirty
,
290 cct
->_conf
->client_oc_target_dirty
,
291 cct
->_conf
->client_oc_max_dirty_age
,
293 objecter_finisher
.start();
294 filer
.reset(new Filer(objecter
, &objecter_finisher
));
295 objecter
->enable_blacklist_events();
301 assert(!client_lock
.is_locked());
303 // It is necessary to hold client_lock, because any inode destruction
304 // may call into ObjectCacher, which asserts that it's lock (which is
305 // client_lock) is held.
308 client_lock
.Unlock();
311 void Client::tear_down_cache()
314 for (ceph::unordered_map
<int, Fh
*>::iterator it
= fd_map
.begin();
318 ldout(cct
, 1) << "tear_down_cache forcing close of fh " << it
->first
<< " ino " << fh
->inode
->ino
<< dendl
;
323 while (!opened_dirs
.empty()) {
324 dir_result_t
*dirp
= *opened_dirs
.begin();
325 ldout(cct
, 1) << "tear_down_cache forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
334 assert(lru
.lru_get_size() == 0);
337 assert(inode_map
.size() <= 1 + root_parents
.size());
338 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
342 while (!root_parents
.empty())
343 root_parents
.erase(root_parents
.begin());
348 assert(inode_map
.empty());
351 inodeno_t
Client::get_root_ino()
353 Mutex::Locker
l(client_lock
);
354 if (use_faked_inos())
355 return root
->faked_ino
;
360 Inode
*Client::get_root()
362 Mutex::Locker
l(client_lock
);
370 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
373 in
->make_long_path(path
);
374 ldout(cct
, 1) << "dump_inode: "
375 << (disconnected
? "DISCONNECTED ":"")
376 << "inode " << in
->ino
378 << " ref " << in
->get_num_ref()
382 f
->open_object_section("inode");
383 f
->dump_stream("path") << path
;
385 f
->dump_int("disconnected", 1);
392 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
393 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
394 it
!= in
->dir
->dentries
.end();
396 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
398 f
->open_object_section("dentry");
402 if (it
->second
->inode
)
403 dump_inode(f
, it
->second
->inode
.get(), did
, false);
408 void Client::dump_cache(Formatter
*f
)
412 ldout(cct
, 1) << "dump_cache" << dendl
;
415 f
->open_array_section("cache");
418 dump_inode(f
, root
, did
, true);
420 // make a second pass to catch anything disconnected
421 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
422 it
!= inode_map
.end();
424 if (did
.count(it
->second
))
426 dump_inode(f
, it
->second
, did
, true);
433 void Client::dump_status(Formatter
*f
)
435 assert(client_lock
.is_locked_by_me());
437 ldout(cct
, 1) << __func__
<< dendl
;
439 const epoch_t osd_epoch
440 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
443 f
->open_object_section("metadata");
444 for (const auto& kv
: metadata
)
445 f
->dump_string(kv
.first
.c_str(), kv
.second
);
448 f
->dump_int("dentry_count", lru
.lru_get_size());
449 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
450 f
->dump_int("id", get_nodeid().v
);
451 f
->dump_int("inode_count", inode_map
.size());
452 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
453 f
->dump_int("osd_epoch", osd_epoch
);
454 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
461 objectcacher
->start();
464 assert(!initialized
);
466 messenger
->add_dispatcher_tail(this);
467 client_lock
.Unlock();
473 void Client::_finish_init()
477 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
478 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
479 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
480 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
481 logger
.reset(plb
.create_perf_counters());
482 cct
->get_perfcounters_collection()->add(logger
.get());
484 client_lock
.Unlock();
486 cct
->_conf
->add_observer(this);
488 AdminSocket
* admin_socket
= cct
->get_admin_socket();
489 int ret
= admin_socket
->register_command("mds_requests",
492 "show in-progress mds requests");
494 lderr(cct
) << "error registering admin socket command: "
495 << cpp_strerror(-ret
) << dendl
;
497 ret
= admin_socket
->register_command("mds_sessions",
500 "show mds session state");
502 lderr(cct
) << "error registering admin socket command: "
503 << cpp_strerror(-ret
) << dendl
;
505 ret
= admin_socket
->register_command("dump_cache",
508 "show in-memory metadata cache contents");
510 lderr(cct
) << "error registering admin socket command: "
511 << cpp_strerror(-ret
) << dendl
;
513 ret
= admin_socket
->register_command("kick_stale_sessions",
514 "kick_stale_sessions",
516 "kick sessions that were remote reset");
518 lderr(cct
) << "error registering admin socket command: "
519 << cpp_strerror(-ret
) << dendl
;
521 ret
= admin_socket
->register_command("status",
524 "show overall client status");
526 lderr(cct
) << "error registering admin socket command: "
527 << cpp_strerror(-ret
) << dendl
;
532 client_lock
.Unlock();
535 void Client::shutdown()
537 ldout(cct
, 1) << "shutdown" << dendl
;
539 // If we were not mounted, but were being used for sending
540 // MDS commands, we may have sessions that need closing.
543 client_lock
.Unlock();
545 cct
->_conf
->remove_observer(this);
547 AdminSocket
* admin_socket
= cct
->get_admin_socket();
548 admin_socket
->unregister_command("mds_requests");
549 admin_socket
->unregister_command("mds_sessions");
550 admin_socket
->unregister_command("dump_cache");
551 admin_socket
->unregister_command("kick_stale_sessions");
552 admin_socket
->unregister_command("status");
554 if (ino_invalidate_cb
) {
555 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
556 async_ino_invalidator
.wait_for_empty();
557 async_ino_invalidator
.stop();
560 if (dentry_invalidate_cb
) {
561 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
562 async_dentry_invalidator
.wait_for_empty();
563 async_dentry_invalidator
.stop();
566 if (switch_interrupt_cb
) {
567 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
568 interrupt_finisher
.wait_for_empty();
569 interrupt_finisher
.stop();
573 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
574 remount_finisher
.wait_for_empty();
575 remount_finisher
.stop();
578 objectcacher
->stop(); // outside of client_lock! this does a join.
584 client_lock
.Unlock();
586 objecter_finisher
.wait_for_empty();
587 objecter_finisher
.stop();
590 cct
->get_perfcounters_collection()->remove(logger
.get());
596 // ===================
597 // metadata cache stuff
599 void Client::trim_cache(bool trim_kernel_dcache
)
601 uint64_t max
= cct
->_conf
->client_cache_size
;
602 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << max
<< dendl
;
604 while (lru
.lru_get_size() != last
) {
605 last
= lru
.lru_get_size();
607 if (!unmounting
&& lru
.lru_get_size() <= max
) break;
610 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
617 if (trim_kernel_dcache
&& lru
.lru_get_size() > max
)
618 _invalidate_kernel_dcache();
621 if (lru
.lru_get_size() == 0 && root
&& root
->get_num_ref() == 0 && inode_map
.size() == 1 + root_parents
.size()) {
622 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
626 while (!root_parents
.empty())
627 root_parents
.erase(root_parents
.begin());
633 void Client::trim_cache_for_reconnect(MetaSession
*s
)
635 mds_rank_t mds
= s
->mds_num
;
636 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
<< dendl
;
639 list
<Dentry
*> skipped
;
640 while (lru
.lru_get_size() > 0) {
641 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
645 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
646 dn
->dir
->parent_inode
->caps
.count(mds
)) {
650 skipped
.push_back(dn
);
653 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
654 lru
.lru_insert_mid(*p
);
656 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
657 << " trimmed " << trimmed
<< " dentries" << dendl
;
659 if (s
->caps
.size() > 0)
660 _invalidate_kernel_dcache();
663 void Client::trim_dentry(Dentry
*dn
)
665 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
666 << " in dir " << hex
<< dn
->dir
->parent_inode
->ino
669 Inode
*diri
= dn
->dir
->parent_inode
;
670 diri
->dir_release_count
++;
671 clear_dir_complete_and_ordered(diri
, true);
673 unlink(dn
, false, false); // drop dir, drop dentry
677 void Client::update_inode_file_bits(Inode
*in
,
678 uint64_t truncate_seq
, uint64_t truncate_size
,
679 uint64_t size
, uint64_t change_attr
,
680 uint64_t time_warp_seq
, utime_t ctime
,
683 version_t inline_version
,
684 bufferlist
& inline_data
,
688 ldout(cct
, 10) << "update_inode_file_bits " << *in
<< " " << ccap_string(issued
)
689 << " mtime " << mtime
<< dendl
;
690 ldout(cct
, 25) << "truncate_seq: mds " << truncate_seq
<< " local "
691 << in
->truncate_seq
<< " time_warp_seq: mds " << time_warp_seq
692 << " local " << in
->time_warp_seq
<< dendl
;
693 uint64_t prior_size
= in
->size
;
695 if (inline_version
> in
->inline_version
) {
696 in
->inline_data
= inline_data
;
697 in
->inline_version
= inline_version
;
700 /* always take a newer change attr */
701 if (change_attr
> in
->change_attr
)
702 in
->change_attr
= change_attr
;
704 if (truncate_seq
> in
->truncate_seq
||
705 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
706 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
708 in
->reported_size
= size
;
709 if (truncate_seq
!= in
->truncate_seq
) {
710 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
711 << truncate_seq
<< dendl
;
712 in
->truncate_seq
= truncate_seq
;
713 in
->oset
.truncate_seq
= truncate_seq
;
715 // truncate cached file data
716 if (prior_size
> size
) {
717 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
721 // truncate inline data
722 if (in
->inline_version
< CEPH_INLINE_NONE
) {
723 uint32_t len
= in
->inline_data
.length();
725 in
->inline_data
.splice(size
, len
- size
);
728 if (truncate_seq
>= in
->truncate_seq
&&
729 in
->truncate_size
!= truncate_size
) {
731 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
732 << truncate_size
<< dendl
;
733 in
->truncate_size
= truncate_size
;
734 in
->oset
.truncate_size
= truncate_size
;
736 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
740 // be careful with size, mtime, atime
741 if (issued
& (CEPH_CAP_FILE_EXCL
|
743 CEPH_CAP_FILE_BUFFER
|
745 CEPH_CAP_XATTR_EXCL
)) {
746 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
747 if (ctime
> in
->ctime
)
749 if (time_warp_seq
> in
->time_warp_seq
) {
750 ldout(cct
, 10) << "mds time_warp_seq " << time_warp_seq
<< " on inode " << *in
751 << " is higher than local time_warp_seq "
752 << in
->time_warp_seq
<< dendl
;
753 //the mds updated times, so take those!
756 in
->time_warp_seq
= time_warp_seq
;
757 } else if (time_warp_seq
== in
->time_warp_seq
) {
759 if (mtime
> in
->mtime
)
761 if (atime
> in
->atime
)
763 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
764 //ignore mds values as we have a higher seq
767 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
768 if (time_warp_seq
>= in
->time_warp_seq
) {
772 in
->time_warp_seq
= time_warp_seq
;
776 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
777 << time_warp_seq
<< " is lower than local time_warp_seq "
783 void Client::_fragmap_remove_non_leaves(Inode
*in
)
785 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
786 if (!in
->dirfragtree
.is_leaf(p
->first
))
787 in
->fragmap
.erase(p
++);
792 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
794 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
795 if (p
->second
== mds
)
796 in
->fragmap
.erase(p
++);
801 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
802 MetaSession
*session
,
803 const UserPerm
& request_perms
)
806 bool was_new
= false;
807 if (inode_map
.count(st
->vino
)) {
808 in
= inode_map
[st
->vino
];
809 ldout(cct
, 12) << "add_update_inode had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
811 in
= new Inode(this, st
->vino
, &st
->layout
);
812 inode_map
[st
->vino
] = in
;
814 if (use_faked_inos())
815 _assign_faked_ino(in
);
821 } else if (!mounted
) {
822 root_parents
[root_ancestor
] = in
;
827 in
->ino
= st
->vino
.ino
;
828 in
->snapid
= st
->vino
.snapid
;
829 in
->mode
= st
->mode
& S_IFMT
;
834 if (in
->is_symlink())
835 in
->symlink
= st
->symlink
;
838 ldout(cct
, 12) << "add_update_inode adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
841 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
843 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
844 bool updating_inode
= false;
846 if (st
->version
== 0 ||
847 (in
->version
& ~1) < st
->version
) {
848 updating_inode
= true;
851 issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
852 issued
|= implemented
;
854 in
->version
= st
->version
;
856 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
860 in
->btime
= st
->btime
;
863 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
864 in
->nlink
= st
->nlink
;
867 in
->dirstat
= st
->dirstat
;
868 in
->rstat
= st
->rstat
;
869 in
->quota
= st
->quota
;
870 in
->layout
= st
->layout
;
873 in
->dir_layout
= st
->dir_layout
;
874 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
877 update_inode_file_bits(in
, st
->truncate_seq
, st
->truncate_size
, st
->size
,
878 st
->change_attr
, st
->time_warp_seq
, st
->ctime
,
879 st
->mtime
, st
->atime
, st
->inline_version
,
880 st
->inline_data
, issued
);
881 } else if (st
->inline_version
> in
->inline_version
) {
882 in
->inline_data
= st
->inline_data
;
883 in
->inline_version
= st
->inline_version
;
886 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
887 st
->xattrbl
.length() &&
888 st
->xattr_version
> in
->xattr_version
) {
889 bufferlist::iterator p
= st
->xattrbl
.begin();
890 ::decode(in
->xattrs
, p
);
891 in
->xattr_version
= st
->xattr_version
;
894 // move me if/when version reflects fragtree changes.
895 if (in
->dirfragtree
!= st
->dirfragtree
) {
896 in
->dirfragtree
= st
->dirfragtree
;
897 _fragmap_remove_non_leaves(in
);
900 if (in
->snapid
== CEPH_NOSNAP
) {
901 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.seq
,
902 st
->cap
.mseq
, inodeno_t(st
->cap
.realm
), st
->cap
.flags
,
904 if (in
->auth_cap
&& in
->auth_cap
->session
== session
) {
905 in
->max_size
= st
->max_size
;
906 in
->rstat
= st
->rstat
;
909 in
->snap_caps
|= st
->cap
.caps
;
911 // setting I_COMPLETE needs to happen after adding the cap
912 if (updating_inode
&&
914 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
915 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
916 in
->dirstat
.nfiles
== 0 &&
917 in
->dirstat
.nsubdirs
== 0) {
918 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
919 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
921 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
922 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
923 in
->dir
->readdir_cache
.clear();
924 for (auto p
= in
->dir
->dentries
.begin();
925 p
!= in
->dir
->dentries
.end();
927 unlink(p
->second
, true, true); // keep dir, keep dentry
929 if (in
->dir
->dentries
.empty())
939 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
941 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
942 Inode
*in
, utime_t from
, MetaSession
*session
,
946 if (dir
->dentries
.count(dname
))
947 dn
= dir
->dentries
[dname
];
949 ldout(cct
, 12) << "insert_dentry_inode '" << dname
<< "' vino " << in
->vino()
950 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
953 if (dn
&& dn
->inode
) {
954 if (dn
->inode
->vino() == in
->vino()) {
956 ldout(cct
, 12) << " had dentry " << dname
957 << " with correct vino " << dn
->inode
->vino()
960 ldout(cct
, 12) << " had dentry " << dname
961 << " with WRONG vino " << dn
->inode
->vino()
963 unlink(dn
, true, true); // keep dir, keep dentry
967 if (!dn
|| !dn
->inode
) {
968 InodeRef
tmp_ref(in
);
970 if (old_dentry
->dir
!= dir
) {
971 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
972 old_diri
->dir_ordered_count
++;
973 clear_dir_complete_and_ordered(old_diri
, false);
975 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
977 Inode
*diri
= dir
->parent_inode
;
978 diri
->dir_ordered_count
++;
979 clear_dir_complete_and_ordered(diri
, false);
980 dn
= link(dir
, dname
, in
, dn
);
983 update_dentry_lease(dn
, dlease
, from
, session
);
987 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
990 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
994 if (dlease
->mask
& CEPH_LOCK_DN
) {
995 if (dttl
> dn
->lease_ttl
) {
996 ldout(cct
, 10) << "got dentry lease on " << dn
->name
997 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
998 dn
->lease_ttl
= dttl
;
999 dn
->lease_mds
= session
->mds_num
;
1000 dn
->lease_seq
= dlease
->seq
;
1001 dn
->lease_gen
= session
->cap_gen
;
1004 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1009 * update MDS location cache for a single inode
1011 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1014 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1015 if (dst
->auth
>= 0) {
1016 in
->fragmap
[dst
->frag
] = dst
->auth
;
1018 in
->fragmap
.erase(dst
->frag
);
1020 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1021 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1022 _fragmap_remove_non_leaves(in
);
1026 in
->dir_replicated
= !dst
->dist
.empty(); // FIXME that's just one frag!
1030 if (!st->dirfrag_dist.empty()) { // FIXME
1031 set<int> dist = st->dirfrag_dist.begin()->second;
1032 if (dist.empty() && !in->dir_contacts.empty())
1033 ldout(cct, 9) << "lost dist spec for " << in->ino
1034 << " " << dist << dendl;
1035 if (!dist.empty() && in->dir_contacts.empty())
1036 ldout(cct, 9) << "got dist spec for " << in->ino
1037 << " " << dist << dendl;
1038 in->dir_contacts = dist;
1043 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1045 if (diri
->flags
& I_COMPLETE
) {
1047 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1048 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1050 if (diri
->flags
& I_DIR_ORDERED
) {
1051 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1052 diri
->flags
&= ~I_DIR_ORDERED
;
1056 diri
->dir
->readdir_cache
.clear();
1061 * insert results from readdir or lssnap into the metadata cache.
1063 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1065 MClientReply
*reply
= request
->reply
;
1066 ConnectionRef con
= request
->reply
->get_connection();
1067 uint64_t features
= con
->get_features();
1069 dir_result_t
*dirp
= request
->dirp
;
1072 // the extra buffer list is only set for readdir and lssnap replies
1073 bufferlist::iterator p
= reply
->get_extra_bl().begin();
1076 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1078 diri
= open_snapdir(diri
);
1081 // only open dir if we're actually adding stuff to it!
1082 Dir
*dir
= diri
->open_dir();
1092 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1093 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1095 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1096 unsigned readdir_offset
= dirp
->next_offset
;
1097 string readdir_start
= dirp
->last_name
;
1098 assert(!readdir_start
.empty() || readdir_offset
== 2);
1100 unsigned last_hash
= 0;
1102 if (!readdir_start
.empty()) {
1103 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1104 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1105 /* mds understands offset_hash */
1106 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1110 if (fg
!= dst
.frag
) {
1111 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1115 readdir_start
.clear();
1116 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1120 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1121 << ", hash_order=" << hash_order
1122 << ", readdir_start " << readdir_start
1123 << ", last_hash " << last_hash
1124 << ", next_offset " << readdir_offset
<< dendl
;
1126 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1127 fg
.is_leftmost() && readdir_offset
== 2 &&
1128 !(hash_order
&& last_hash
)) {
1129 dirp
->release_count
= diri
->dir_release_count
;
1130 dirp
->ordered_count
= diri
->dir_ordered_count
;
1131 dirp
->start_shared_gen
= diri
->shared_gen
;
1132 dirp
->cache_index
= 0;
1135 dirp
->buffer_frag
= fg
;
1137 _readdir_drop_dirp_buffer(dirp
);
1138 dirp
->buffer
.reserve(numdn
);
1142 for (unsigned i
=0; i
<numdn
; i
++) {
1144 ::decode(dlease
, p
);
1145 InodeStat
ist(p
, features
);
1147 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1149 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1152 if (diri
->dir
->dentries
.count(dname
)) {
1153 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1154 if (olddn
->inode
!= in
) {
1155 // replace incorrect dentry
1156 unlink(olddn
, true, true); // keep dir, dentry
1157 dn
= link(dir
, dname
, in
, olddn
);
1158 assert(dn
== olddn
);
1166 dn
= link(dir
, dname
, in
, NULL
);
1169 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1171 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1172 if (hash
!= last_hash
)
1175 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1177 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1179 // add to readdir cache
1180 if (dirp
->release_count
== diri
->dir_release_count
&&
1181 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1182 dirp
->start_shared_gen
== diri
->shared_gen
) {
1183 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1185 assert(!dirp
->inode
->is_complete_and_ordered());
1186 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1188 dir
->readdir_cache
.push_back(dn
);
1189 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1190 if (dirp
->inode
->is_complete_and_ordered())
1191 assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1193 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1195 assert(0 == "unexpected readdir buffer idx");
1197 dirp
->cache_index
++;
1199 // add to cached result list
1200 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, in
));
1201 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1205 dirp
->last_name
= dname
;
1207 dirp
->next_offset
= 2;
1209 dirp
->next_offset
= readdir_offset
;
1211 if (dir
->is_empty())
1218 * insert a trace from a MDS reply into the cache.
1220 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1222 MClientReply
*reply
= request
->reply
;
1223 int op
= request
->get_op();
1225 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1226 << " is_target=" << (int)reply
->head
.is_target
1227 << " is_dentry=" << (int)reply
->head
.is_dentry
1230 bufferlist::iterator p
= reply
->get_trace_bl().begin();
1231 if (request
->got_unsafe
) {
1232 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1238 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1240 Dentry
*d
= request
->dentry();
1242 Inode
*diri
= d
->dir
->parent_inode
;
1243 diri
->dir_release_count
++;
1244 clear_dir_complete_and_ordered(diri
, true);
1247 if (d
&& reply
->get_result() == 0) {
1248 if (op
== CEPH_MDS_OP_RENAME
) {
1250 Dentry
*od
= request
->old_dentry();
1251 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1253 unlink(od
, true, true); // keep dir, dentry
1254 } else if (op
== CEPH_MDS_OP_RMDIR
||
1255 op
== CEPH_MDS_OP_UNLINK
) {
1257 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1258 unlink(d
, true, true); // keep dir, dentry
1264 ConnectionRef con
= request
->reply
->get_connection();
1265 uint64_t features
= con
->get_features();
1266 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1269 SnapRealm
*realm
= NULL
;
1270 if (reply
->snapbl
.length())
1271 update_snap_trace(reply
->snapbl
, &realm
);
1273 ldout(cct
, 10) << " hrm "
1274 << " is_target=" << (int)reply
->head
.is_target
1275 << " is_dentry=" << (int)reply
->head
.is_dentry
1284 if (reply
->head
.is_dentry
) {
1285 dirst
.decode(p
, features
);
1288 ::decode(dlease
, p
);
1292 if (reply
->head
.is_target
) {
1293 ist
.decode(p
, features
);
1294 if (cct
->_conf
->client_debug_getattr_caps
) {
1295 unsigned wanted
= 0;
1296 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1297 wanted
= request
->head
.args
.getattr
.mask
;
1298 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1299 wanted
= request
->head
.args
.open
.mask
;
1301 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1302 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1303 assert(0 == "MDS reply does not contain xattrs");
1306 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1311 if (reply
->head
.is_dentry
) {
1312 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1314 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1317 Dir
*dir
= diri
->open_dir();
1318 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1319 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1322 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1323 dn
= diri
->dir
->dentries
[dname
];
1325 diri
->dir_ordered_count
++;
1326 clear_dir_complete_and_ordered(diri
, false);
1327 unlink(dn
, true, true); // keep dir, dentry
1330 if (dlease
.duration_ms
> 0) {
1332 Dir
*dir
= diri
->open_dir();
1333 dn
= link(dir
, dname
, NULL
, NULL
);
1335 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1338 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1339 op
== CEPH_MDS_OP_MKSNAP
) {
1340 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1341 // fake it for snap lookup
1342 vinodeno_t vino
= ist
.vino
;
1343 vino
.snapid
= CEPH_SNAPDIR
;
1344 assert(inode_map
.count(vino
));
1345 diri
= inode_map
[vino
];
1347 string dname
= request
->path
.last_dentry();
1350 dlease
.duration_ms
= 0;
1353 Dir
*dir
= diri
->open_dir();
1354 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1356 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1357 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1359 unlink(dn
, true, true); // keep dir, dentry
1365 if (op
== CEPH_MDS_OP_READDIR
||
1366 op
== CEPH_MDS_OP_LSSNAP
) {
1367 insert_readdir_results(request
, session
, in
);
1368 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1369 // hack: return parent inode instead
1373 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1374 // pin the target inode if its parent dentry is not pinned
1375 request
->set_other_inode(in
);
1380 put_snap_realm(realm
);
1382 request
->target
= in
;
1388 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1390 mds_rank_t mds
= MDS_RANK_NONE
;
1392 bool is_hash
= false;
1398 if (req
->resend_mds
>= 0) {
1399 mds
= req
->resend_mds
;
1400 req
->resend_mds
= -1;
1401 ldout(cct
, 10) << "choose_target_mds resend_mds specified as mds." << mds
<< dendl
;
1405 if (cct
->_conf
->client_use_random_mds
)
1411 ldout(cct
, 20) << "choose_target_mds starting with req->inode " << *in
<< dendl
;
1412 if (req
->path
.depth()) {
1413 hash
= in
->hash_dentry_name(req
->path
[0]);
1414 ldout(cct
, 20) << "choose_target_mds inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1415 << " on " << req
->path
[0]
1416 << " => " << hash
<< dendl
;
1421 in
= de
->inode
.get();
1422 ldout(cct
, 20) << "choose_target_mds starting with req->dentry inode " << *in
<< dendl
;
1424 in
= de
->dir
->parent_inode
;
1425 hash
= in
->hash_dentry_name(de
->name
);
1426 ldout(cct
, 20) << "choose_target_mds dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1427 << " on " << de
->name
1428 << " => " << hash
<< dendl
;
1433 if (in
->snapid
!= CEPH_NOSNAP
) {
1434 ldout(cct
, 10) << "choose_target_mds " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1435 while (in
->snapid
!= CEPH_NOSNAP
) {
1436 if (in
->snapid
== CEPH_SNAPDIR
)
1437 in
= in
->snapdir_parent
.get();
1438 else if (!in
->dn_set
.empty())
1439 /* In most cases there will only be one dentry, so getting it
1440 * will be the correct action. If there are multiple hard links,
1441 * I think the MDS should be able to redirect as needed*/
1442 in
= in
->get_first_parent()->dir
->parent_inode
;
1444 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1451 ldout(cct
, 20) << "choose_target_mds " << *in
<< " is_hash=" << is_hash
1452 << " hash=" << hash
<< dendl
;
1454 if (is_hash
&& S_ISDIR(in
->mode
) && !in
->fragmap
.empty()) {
1455 frag_t fg
= in
->dirfragtree
[hash
];
1456 if (in
->fragmap
.count(fg
)) {
1457 mds
= in
->fragmap
[fg
];
1460 ldout(cct
, 10) << "choose_target_mds from dirfragtree hash" << dendl
;
1465 if (req
->auth_is_best())
1467 if (!cap
&& !in
->caps
.empty())
1468 cap
= in
->caps
.begin()->second
;
1471 mds
= cap
->session
->mds_num
;
1472 ldout(cct
, 10) << "choose_target_mds from caps on inode " << *in
<< dendl
;
1479 mds
= _get_random_up_mds();
1480 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1484 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1489 void Client::connect_mds_targets(mds_rank_t mds
)
1491 ldout(cct
, 10) << "connect_mds_targets for mds." << mds
<< dendl
;
1492 assert(mds_sessions
.count(mds
));
1493 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1494 for (set
<mds_rank_t
>::const_iterator q
= info
.export_targets
.begin();
1495 q
!= info
.export_targets
.end();
1497 if (mds_sessions
.count(*q
) == 0 &&
1498 mdsmap
->is_clientreplay_or_active_or_stopping(*q
)) {
1499 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1500 << " export target mds." << *q
<< dendl
;
1501 _open_mds_session(*q
);
1506 void Client::dump_mds_sessions(Formatter
*f
)
1508 f
->dump_int("id", get_nodeid().v
);
1509 f
->open_array_section("sessions");
1510 for (map
<mds_rank_t
,MetaSession
*>::const_iterator p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ++p
) {
1511 f
->open_object_section("session");
1516 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1518 void Client::dump_mds_requests(Formatter
*f
)
1520 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1521 p
!= mds_requests
.end();
1523 f
->open_object_section("request");
1529 int Client::verify_reply_trace(int r
,
1530 MetaRequest
*request
, MClientReply
*reply
,
1531 InodeRef
*ptarget
, bool *pcreated
,
1532 const UserPerm
& perms
)
1534 // check whether this request actually did the create, and set created flag
1535 bufferlist extra_bl
;
1536 inodeno_t created_ino
;
1537 bool got_created_ino
= false;
1538 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1540 extra_bl
.claim(reply
->get_extra_bl());
1541 if (extra_bl
.length() >= 8) {
1542 // if the extra bufferlist has a buffer, we assume its the created inode
1543 // and that this request to create succeeded in actually creating
1544 // the inode (won the race with other create requests)
1545 ::decode(created_ino
, extra_bl
);
1546 got_created_ino
= true;
1547 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1551 *pcreated
= got_created_ino
;
1553 if (request
->target
) {
1554 *ptarget
= request
->target
;
1555 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1557 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1558 (*ptarget
) = p
->second
;
1559 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1561 // we got a traceless reply, and need to look up what we just
1562 // created. for now, do this by name. someday, do this by the
1563 // ino... which we know! FIXME.
1565 Dentry
*d
= request
->dentry();
1568 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1569 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1570 << " got_ino " << got_created_ino
1571 << " ino " << created_ino
1573 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1576 // if the dentry is not linked, just do our best. see #5021.
1577 assert(0 == "how did this happen? i want logs!");
1580 Inode
*in
= request
->inode();
1581 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1582 << in
->ino
<< dendl
;
1583 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1587 // verify ino returned in reply and trace_dist are the same
1588 if (got_created_ino
&&
1589 created_ino
.val
!= target
->ino
.val
) {
1590 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1594 ptarget
->swap(target
);
1606 * Blocking helper to make an MDS request.
1608 * If the ptarget flag is set, behavior changes slightly: the caller
1609 * expects to get a pointer to the inode we are creating or operating
1610 * on. As a result, we will follow up any traceless mutation reply
1611 * with a getattr or lookup to transparently handle a traceless reply
1612 * from the MDS (as when the MDS restarts and the client has to replay
1615 * @param request the MetaRequest to execute
1616 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1617 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1618 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1619 * @param use_mds [optional] prefer a specific mds (-1 for default)
1620 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1622 int Client::make_request(MetaRequest
*request
,
1623 const UserPerm
& perms
,
1624 InodeRef
*ptarget
, bool *pcreated
,
1630 // assign a unique tid
1631 ceph_tid_t tid
= ++last_tid
;
1632 request
->set_tid(tid
);
1635 request
->op_stamp
= ceph_clock_now();
1638 mds_requests
[tid
] = request
->get();
1639 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1642 request
->set_caller_perms(perms
);
1644 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1645 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1646 request
->set_oldest_client_tid(1);
1648 request
->set_oldest_client_tid(oldest_tid
);
1653 request
->resend_mds
= use_mds
;
1656 if (request
->aborted())
1660 request
->abort(-EBLACKLISTED
);
1666 request
->caller_cond
= &caller_cond
;
1669 Inode
*hash_diri
= NULL
;
1670 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1671 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1672 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1673 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1675 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1676 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1678 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1679 request
->resend_mds
= _get_random_up_mds();
1682 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1683 wait_on_list(waiting_for_mdsmap
);
1689 MetaSession
*session
= NULL
;
1690 if (!have_open_session(mds
)) {
1691 session
= _get_or_open_mds_session(mds
);
1694 if (session
->state
== MetaSession::STATE_OPENING
) {
1695 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1696 wait_on_context_list(session
->waiting_for_open
);
1697 // Abort requests on REJECT from MDS
1698 if (rejected_by_mds
.count(mds
)) {
1699 request
->abort(-EPERM
);
1705 if (!have_open_session(mds
))
1708 session
= mds_sessions
[mds
];
1712 send_request(request
, session
);
1715 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1716 request
->kick
= false;
1717 while (!request
->reply
&& // reply
1718 request
->resend_mds
< 0 && // forward
1720 caller_cond
.Wait(client_lock
);
1721 request
->caller_cond
= NULL
;
1723 // did we get a reply?
1728 if (!request
->reply
) {
1729 assert(request
->aborted());
1730 assert(!request
->got_unsafe
);
1731 r
= request
->get_abort_code();
1732 request
->item
.remove_myself();
1733 unregister_request(request
);
1734 put_request(request
); // ours
1739 MClientReply
*reply
= request
->reply
;
1740 request
->reply
= NULL
;
1741 r
= reply
->get_result();
1743 request
->success
= true;
1745 // kick dispatcher (we've got it!)
1746 assert(request
->dispatch_cond
);
1747 request
->dispatch_cond
->Signal();
1748 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1749 request
->dispatch_cond
= 0;
1751 if (r
>= 0 && ptarget
)
1752 r
= verify_reply_trace(r
, request
, reply
, ptarget
, pcreated
, perms
);
1755 pdirbl
->claim(reply
->get_extra_bl());
1758 utime_t lat
= ceph_clock_now();
1759 lat
-= request
->sent_stamp
;
1760 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1761 logger
->tinc(l_c_lat
, lat
);
1762 logger
->tinc(l_c_reply
, lat
);
1764 put_request(request
);
1770 void Client::unregister_request(MetaRequest
*req
)
1772 mds_requests
.erase(req
->tid
);
1773 if (req
->tid
== oldest_tid
) {
1774 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1776 if (p
== mds_requests
.end()) {
1780 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1781 oldest_tid
= p
->first
;
1790 void Client::put_request(MetaRequest
*request
)
1792 if (request
->_put()) {
1794 if (request
->success
)
1795 op
= request
->get_op();
1797 request
->take_other_inode(&other_in
);
1801 (op
== CEPH_MDS_OP_RMDIR
||
1802 op
== CEPH_MDS_OP_RENAME
||
1803 op
== CEPH_MDS_OP_RMSNAP
)) {
1804 _try_to_trim_inode(other_in
.get(), false);
1809 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1810 mds_rank_t mds
, int drop
,
1811 int unless
, int force
)
1813 ldout(cct
, 20) << "encode_inode_release enter(in:" << *in
<< ", req:" << req
1814 << " mds:" << mds
<< ", drop:" << drop
<< ", unless:" << unless
1815 << ", have:" << ", force:" << force
<< ")" << dendl
;
1817 if (in
->caps
.count(mds
)) {
1818 Cap
*caps
= in
->caps
[mds
];
1819 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1820 if ((drop
& caps
->issued
) &&
1821 !(unless
& caps
->issued
)) {
1822 ldout(cct
, 25) << "Dropping caps. Initial " << ccap_string(caps
->issued
) << dendl
;
1823 caps
->issued
&= ~drop
;
1824 caps
->implemented
&= ~drop
;
1826 ldout(cct
, 25) << "Now have: " << ccap_string(caps
->issued
) << dendl
;
1831 ceph_mds_request_release rel
;
1833 rel
.cap_id
= caps
->cap_id
;
1834 rel
.seq
= caps
->seq
;
1835 rel
.issue_seq
= caps
->issue_seq
;
1836 rel
.mseq
= caps
->mseq
;
1837 rel
.caps
= caps
->implemented
;
1838 rel
.wanted
= caps
->wanted
;
1841 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1844 ldout(cct
, 25) << "encode_inode_release exit(in:" << *in
<< ") released:"
1845 << released
<< dendl
;
1849 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1850 mds_rank_t mds
, int drop
, int unless
)
1852 ldout(cct
, 20) << "encode_dentry_release enter(dn:"
1853 << dn
<< ")" << dendl
;
1856 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
1857 mds
, drop
, unless
, 1);
1858 if (released
&& dn
->lease_mds
== mds
) {
1859 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
1860 MClientRequest::Release
& rel
= req
->cap_releases
.back();
1861 rel
.item
.dname_len
= dn
->name
.length();
1862 rel
.item
.dname_seq
= dn
->lease_seq
;
1863 rel
.dname
= dn
->name
;
1865 ldout(cct
, 25) << "encode_dentry_release exit(dn:"
1866 << dn
<< ")" << dendl
;
1871 * This requires the MClientRequest *request member to be set.
1872 * It will error out horribly without one.
1873 * Additionally, if you set any *drop member, you'd better have
1874 * set the corresponding dentry!
1876 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
1878 ldout(cct
, 20) << "encode_cap_releases enter (req: "
1879 << req
<< ", mds: " << mds
<< ")" << dendl
;
1880 if (req
->inode_drop
&& req
->inode())
1881 encode_inode_release(req
->inode(), req
,
1882 mds
, req
->inode_drop
,
1885 if (req
->old_inode_drop
&& req
->old_inode())
1886 encode_inode_release(req
->old_inode(), req
,
1887 mds
, req
->old_inode_drop
,
1888 req
->old_inode_unless
);
1889 if (req
->other_inode_drop
&& req
->other_inode())
1890 encode_inode_release(req
->other_inode(), req
,
1891 mds
, req
->other_inode_drop
,
1892 req
->other_inode_unless
);
1894 if (req
->dentry_drop
&& req
->dentry())
1895 encode_dentry_release(req
->dentry(), req
,
1896 mds
, req
->dentry_drop
,
1897 req
->dentry_unless
);
1899 if (req
->old_dentry_drop
&& req
->old_dentry())
1900 encode_dentry_release(req
->old_dentry(), req
,
1901 mds
, req
->old_dentry_drop
,
1902 req
->old_dentry_unless
);
1903 ldout(cct
, 25) << "encode_cap_releases exit (req: "
1904 << req
<< ", mds " << mds
<<dendl
;
1907 bool Client::have_open_session(mds_rank_t mds
)
1910 mds_sessions
.count(mds
) &&
1911 (mds_sessions
[mds
]->state
== MetaSession::STATE_OPEN
||
1912 mds_sessions
[mds
]->state
== MetaSession::STATE_STALE
);
1915 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
1917 if (mds_sessions
.count(mds
) == 0)
1919 MetaSession
*s
= mds_sessions
[mds
];
1925 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
1927 if (mds_sessions
.count(mds
))
1928 return mds_sessions
[mds
];
1929 return _open_mds_session(mds
);
1933 * Populate a map of strings with client-identifying metadata,
1934 * such as the hostname. Call this once at initialization.
1936 void Client::populate_metadata(const std::string
&mount_root
)
1942 metadata
["hostname"] = u
.nodename
;
1943 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
1945 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
1948 metadata
["pid"] = stringify(getpid());
1950 // Ceph entity id (the '0' in "client.0")
1951 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
1953 // Our mount position
1954 if (!mount_root
.empty()) {
1955 metadata
["root"] = mount_root
;
1959 metadata
["ceph_version"] = pretty_version_to_str();
1960 metadata
["ceph_sha1"] = git_version_to_str();
1962 // Apply any metadata from the user's configured overrides
1963 std::vector
<std::string
> tokens
;
1964 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
1965 for (const auto &i
: tokens
) {
1966 auto eqpos
= i
.find("=");
1967 // Throw out anything that isn't of the form "<str>=<str>"
1968 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
1969 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
1972 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
1977 * Optionally add or override client metadata fields.
1979 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
1981 Mutex::Locker
l(client_lock
);
1982 assert(initialized
);
1984 if (metadata
.count(k
)) {
1985 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
1986 << "' from '" << metadata
[k
] << "' to '" << v
<< "'" << dendl
;
1992 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
1994 ldout(cct
, 10) << "_open_mds_session mds." << mds
<< dendl
;
1995 assert(mds_sessions
.count(mds
) == 0);
1996 MetaSession
*session
= new MetaSession
;
1997 session
->mds_num
= mds
;
1999 session
->inst
= mdsmap
->get_inst(mds
);
2000 session
->con
= messenger
->get_connection(session
->inst
);
2001 session
->state
= MetaSession::STATE_OPENING
;
2002 session
->mds_state
= MDSMap::STATE_NULL
;
2003 mds_sessions
[mds
] = session
;
2005 // Maybe skip sending a request to open if this MDS daemon
2006 // has previously sent us a REJECT.
2007 if (rejected_by_mds
.count(mds
)) {
2008 if (rejected_by_mds
[mds
] == session
->inst
) {
2009 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " skipping "
2010 "because we were rejected" << dendl
;
2013 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " old inst "
2014 "rejected us, trying with new inst" << dendl
;
2015 rejected_by_mds
.erase(mds
);
2019 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_OPEN
);
2020 m
->client_meta
= metadata
;
2021 session
->con
->send_message(m
);
2025 void Client::_close_mds_session(MetaSession
*s
)
2027 ldout(cct
, 2) << "_close_mds_session mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2028 s
->state
= MetaSession::STATE_CLOSING
;
2029 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2032 void Client::_closed_mds_session(MetaSession
*s
)
2034 s
->state
= MetaSession::STATE_CLOSED
;
2035 s
->con
->mark_down();
2036 signal_context_list(s
->waiting_for_open
);
2037 mount_cond
.Signal();
2038 remove_session_caps(s
);
2039 kick_requests_closed(s
);
2040 mds_sessions
.erase(s
->mds_num
);
2044 void Client::handle_client_session(MClientSession
*m
)
2046 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2047 ldout(cct
, 10) << "handle_client_session " << *m
<< " from mds." << from
<< dendl
;
2049 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2051 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2056 switch (m
->get_op()) {
2057 case CEPH_SESSION_OPEN
:
2058 renew_caps(session
);
2059 session
->state
= MetaSession::STATE_OPEN
;
2061 mount_cond
.Signal();
2063 connect_mds_targets(from
);
2064 signal_context_list(session
->waiting_for_open
);
2067 case CEPH_SESSION_CLOSE
:
2068 _closed_mds_session(session
);
2071 case CEPH_SESSION_RENEWCAPS
:
2072 if (session
->cap_renew_seq
== m
->get_seq()) {
2074 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2075 wake_inode_waiters(session
);
2079 case CEPH_SESSION_STALE
:
2080 // invalidate session caps/leases
2082 session
->cap_ttl
= ceph_clock_now();
2083 session
->cap_ttl
-= 1;
2084 renew_caps(session
);
2087 case CEPH_SESSION_RECALL_STATE
:
2088 trim_caps(session
, m
->get_max_caps());
2091 case CEPH_SESSION_FLUSHMSG
:
2092 session
->con
->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2095 case CEPH_SESSION_FORCE_RO
:
2096 force_session_readonly(session
);
2099 case CEPH_SESSION_REJECT
:
2100 rejected_by_mds
[session
->mds_num
] = session
->inst
;
2101 _closed_mds_session(session
);
2112 bool Client::_any_stale_sessions() const
2114 assert(client_lock
.is_locked_by_me());
2116 for (const auto &i
: mds_sessions
) {
2117 if (i
.second
->state
== MetaSession::STATE_STALE
) {
2125 void Client::_kick_stale_sessions()
2127 ldout(cct
, 1) << "kick_stale_sessions" << dendl
;
2129 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2130 p
!= mds_sessions
.end(); ) {
2131 MetaSession
*s
= p
->second
;
2133 if (s
->state
== MetaSession::STATE_STALE
)
2134 _closed_mds_session(s
);
2138 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2139 bool drop_cap_releases
)
2142 mds_rank_t mds
= session
->mds_num
;
2143 ldout(cct
, 10) << "send_request rebuilding request " << request
->get_tid()
2144 << " for mds." << mds
<< dendl
;
2145 MClientRequest
*r
= build_client_request(request
);
2146 if (request
->dentry()) {
2147 r
->set_dentry_wanted();
2149 if (request
->got_unsafe
) {
2150 r
->set_replayed_op();
2151 if (request
->target
)
2152 r
->head
.ino
= request
->target
->ino
;
2154 encode_cap_releases(request
, mds
);
2155 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2156 request
->cap_releases
.clear();
2158 r
->releases
.swap(request
->cap_releases
);
2160 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2161 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2162 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2163 r
->set_osdmap_epoch(o
.get_epoch());
2167 if (request
->mds
== -1) {
2168 request
->sent_stamp
= ceph_clock_now();
2169 ldout(cct
, 20) << "send_request set sent_stamp to " << request
->sent_stamp
<< dendl
;
2173 Inode
*in
= request
->inode();
2174 if (in
&& in
->caps
.count(mds
))
2175 request
->sent_on_mseq
= in
->caps
[mds
]->mseq
;
2177 session
->requests
.push_back(&request
->item
);
2179 ldout(cct
, 10) << "send_request " << *r
<< " to mds." << mds
<< dendl
;
2180 session
->con
->send_message(r
);
2183 MClientRequest
* Client::build_client_request(MetaRequest
*request
)
2185 MClientRequest
*req
= new MClientRequest(request
->get_op());
2186 req
->set_tid(request
->tid
);
2187 req
->set_stamp(request
->op_stamp
);
2188 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2190 // if the filepath's haven't been set, set them!
2191 if (request
->path
.empty()) {
2192 Inode
*in
= request
->inode();
2193 Dentry
*de
= request
->dentry();
2195 in
->make_nosnap_relative_path(request
->path
);
2198 de
->inode
->make_nosnap_relative_path(request
->path
);
2200 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2201 request
->path
.push_dentry(de
->name
);
2203 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2204 << " No path, inode, or appropriately-endowed dentry given!"
2206 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2207 << " No path, inode, or dentry given!"
2210 req
->set_filepath(request
->get_filepath());
2211 req
->set_filepath2(request
->get_filepath2());
2212 req
->set_data(request
->data
);
2213 req
->set_retry_attempt(request
->retry_attempt
++);
2214 req
->head
.num_fwd
= request
->num_fwd
;
2216 int gid_count
= request
->perms
.get_gids(&_gids
);
2217 req
->set_gid_list(gid_count
, _gids
);
2223 void Client::handle_client_request_forward(MClientRequestForward
*fwd
)
2225 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2226 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2231 ceph_tid_t tid
= fwd
->get_tid();
2233 if (mds_requests
.count(tid
) == 0) {
2234 ldout(cct
, 10) << "handle_client_request_forward no pending request on tid " << tid
<< dendl
;
2239 MetaRequest
*request
= mds_requests
[tid
];
2242 // reset retry counter
2243 request
->retry_attempt
= 0;
2245 // request not forwarded, or dest mds has no session.
2247 ldout(cct
, 10) << "handle_client_request tid " << tid
2248 << " fwd " << fwd
->get_num_fwd()
2249 << " to mds." << fwd
->get_dest_mds()
2250 << ", resending to " << fwd
->get_dest_mds()
2254 request
->item
.remove_myself();
2255 request
->num_fwd
= fwd
->get_num_fwd();
2256 request
->resend_mds
= fwd
->get_dest_mds();
2257 request
->caller_cond
->Signal();
2262 bool Client::is_dir_operation(MetaRequest
*req
)
2264 int op
= req
->get_op();
2265 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2266 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2267 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2268 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2273 void Client::handle_client_reply(MClientReply
*reply
)
2275 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2276 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2282 ceph_tid_t tid
= reply
->get_tid();
2283 bool is_safe
= reply
->is_safe();
2285 if (mds_requests
.count(tid
) == 0) {
2286 lderr(cct
) << "handle_client_reply no pending request on tid " << tid
2287 << " safe is:" << is_safe
<< dendl
;
2291 MetaRequest
*request
= mds_requests
.at(tid
);
2293 ldout(cct
, 20) << "handle_client_reply got a reply. Safe:" << is_safe
2294 << " tid " << tid
<< dendl
;
2296 if (request
->got_unsafe
&& !is_safe
) {
2297 //duplicate response
2298 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2299 << mds_num
<< " safe:" << is_safe
<< dendl
;
2304 if (-ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2305 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2306 << " from mds." << request
->mds
<< dendl
;
2307 request
->send_to_auth
= true;
2308 request
->resend_mds
= choose_target_mds(request
);
2309 Inode
*in
= request
->inode();
2310 if (request
->resend_mds
>= 0 &&
2311 request
->resend_mds
== request
->mds
&&
2313 in
->caps
.count(request
->resend_mds
) == 0 ||
2314 request
->sent_on_mseq
== in
->caps
[request
->resend_mds
]->mseq
)) {
2315 // have to return ESTALE
2317 request
->caller_cond
->Signal();
2321 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2324 assert(request
->reply
== NULL
);
2325 request
->reply
= reply
;
2326 insert_trace(request
, session
);
2328 // Handle unsafe reply
2330 request
->got_unsafe
= true;
2331 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2332 if (is_dir_operation(request
)) {
2333 Inode
*dir
= request
->inode();
2335 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2337 if (request
->target
) {
2338 InodeRef
&in
= request
->target
;
2339 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2343 // Only signal the caller once (on the first reply):
2344 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2345 if (!is_safe
|| !request
->got_unsafe
) {
2347 request
->dispatch_cond
= &cond
;
2350 ldout(cct
, 20) << "handle_client_reply signalling caller " << (void*)request
->caller_cond
<< dendl
;
2351 request
->caller_cond
->Signal();
2353 // wake for kick back
2354 while (request
->dispatch_cond
) {
2355 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid " << tid
<< " " << &cond
<< dendl
;
2356 cond
.Wait(client_lock
);
2361 // the filesystem change is committed to disk
2362 // we're done, clean up
2363 if (request
->got_unsafe
) {
2364 request
->unsafe_item
.remove_myself();
2365 request
->unsafe_dir_item
.remove_myself();
2366 request
->unsafe_target_item
.remove_myself();
2367 signal_cond_list(request
->waitfor_safe
);
2369 request
->item
.remove_myself();
2370 unregister_request(request
);
2373 mount_cond
.Signal();
2376 void Client::_handle_full_flag(int64_t pool
)
2378 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2379 << "on " << pool
<< dendl
;
2380 // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
2381 // to do this rather than blocking, because otherwise when we fill up we
2382 // potentially lock caps forever on files with dirty pages, and we need
2383 // to be able to release those caps to the MDS so that it can delete files
2384 // and free up space.
2385 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-ENOSPC
, pool
);
2387 // For all inodes with layouts in this pool and a pending flush write op
2388 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2389 // from ObjectCacher so that it doesn't re-issue the write in response to
2390 // the ENOSPC error.
2391 // Fortunately since we're cancelling everything in a given pool, we don't
2392 // need to know which ops belong to which ObjectSet, we can just blow all
2393 // the un-flushed cached data away and mark any dirty inodes' async_err
2394 // field with -ENOSPC as long as we're sure all the ops we cancelled were
2395 // affecting this pool, and all the objectsets we're purging were also
2397 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2398 i
!= inode_map
.end(); ++i
)
2400 Inode
*inode
= i
->second
;
2401 if (inode
->oset
.dirty_or_tx
2402 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2403 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2404 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2405 objectcacher
->purge_set(&inode
->oset
);
2406 inode
->set_async_err(-ENOSPC
);
2410 if (cancelled_epoch
!= (epoch_t
)-1) {
2411 set_cap_epoch_barrier(cancelled_epoch
);
2415 void Client::handle_osd_map(MOSDMap
*m
)
2417 std::set
<entity_addr_t
> new_blacklists
;
2418 objecter
->consume_blacklist_events(&new_blacklists
);
2420 const auto myaddr
= messenger
->get_myaddr();
2421 if (!blacklisted
&& new_blacklists
.count(myaddr
)) {
2422 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2423 return o
.get_epoch();
2425 lderr(cct
) << "I was blacklisted at osd epoch " << epoch
<< dendl
;
2427 for (std::map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2428 p
!= mds_requests
.end(); ) {
2429 auto req
= p
->second
;
2431 req
->abort(-EBLACKLISTED
);
2432 if (req
->caller_cond
) {
2434 req
->caller_cond
->Signal();
2438 // Progress aborts on any requests that were on this waitlist. Any
2439 // requests that were on a waiting_for_open session waitlist
2440 // will get kicked during close session below.
2441 signal_cond_list(waiting_for_mdsmap
);
2443 // Force-close all sessions: assume this is not abandoning any state
2444 // on the MDS side because the MDS will have seen the blacklist too.
2445 while(!mds_sessions
.empty()) {
2446 auto i
= mds_sessions
.begin();
2447 auto session
= i
->second
;
2448 _closed_mds_session(session
);
2451 // Since we know all our OSD ops will fail, cancel them all preemtively,
2452 // so that on an unhealthy cluster we can umount promptly even if e.g.
2453 // some PGs were inaccessible.
2454 objecter
->op_cancel_writes(-EBLACKLISTED
);
2456 } else if (blacklisted
) {
2457 // Handle case where we were blacklisted but no longer are
2458 blacklisted
= objecter
->with_osdmap([myaddr
](const OSDMap
&o
){
2459 return o
.is_blacklisted(myaddr
);});
2462 if (objecter
->osdmap_full_flag()) {
2463 _handle_full_flag(-1);
2465 // Accumulate local list of full pools so that I can drop
2466 // the objecter lock before re-entering objecter in
2468 std::vector
<int64_t> full_pools
;
2470 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2471 for (const auto& kv
: o
.get_pools()) {
2472 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2473 full_pools
.push_back(kv
.first
);
2478 for (auto p
: full_pools
)
2479 _handle_full_flag(p
);
2481 // Subscribe to subsequent maps to watch for the full flag going
2482 // away. For the global full flag objecter does this for us, but
2483 // it pays no attention to the per-pool full flag so in this branch
2484 // we do it ourselves.
2485 if (!full_pools
.empty()) {
2486 objecter
->maybe_request_map();
2494 // ------------------------
2495 // incoming messages
2498 bool Client::ms_dispatch(Message
*m
)
2500 Mutex::Locker
l(client_lock
);
2502 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2507 switch (m
->get_type()) {
2508 // mounting and mds sessions
2509 case CEPH_MSG_MDS_MAP
:
2510 handle_mds_map(static_cast<MMDSMap
*>(m
));
2512 case CEPH_MSG_FS_MAP
:
2513 handle_fs_map(static_cast<MFSMap
*>(m
));
2515 case CEPH_MSG_FS_MAP_USER
:
2516 handle_fs_map_user(static_cast<MFSMapUser
*>(m
));
2518 case CEPH_MSG_CLIENT_SESSION
:
2519 handle_client_session(static_cast<MClientSession
*>(m
));
2522 case CEPH_MSG_OSD_MAP
:
2523 handle_osd_map(static_cast<MOSDMap
*>(m
));
2527 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2528 handle_client_request_forward(static_cast<MClientRequestForward
*>(m
));
2530 case CEPH_MSG_CLIENT_REPLY
:
2531 handle_client_reply(static_cast<MClientReply
*>(m
));
2534 case CEPH_MSG_CLIENT_SNAP
:
2535 handle_snap(static_cast<MClientSnap
*>(m
));
2537 case CEPH_MSG_CLIENT_CAPS
:
2538 handle_caps(static_cast<MClientCaps
*>(m
));
2540 case CEPH_MSG_CLIENT_LEASE
:
2541 handle_lease(static_cast<MClientLease
*>(m
));
2543 case MSG_COMMAND_REPLY
:
2544 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2545 handle_command_reply(static_cast<MCommandReply
*>(m
));
2550 case CEPH_MSG_CLIENT_QUOTA
:
2551 handle_quota(static_cast<MClientQuota
*>(m
));
2560 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2561 << "+" << inode_map
.size() << dendl
;
2562 long unsigned size
= lru
.lru_get_size() + inode_map
.size();
2564 if (size
< lru
.lru_get_size() + inode_map
.size()) {
2565 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2566 mount_cond
.Signal();
2568 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2569 << "+" << inode_map
.size() << dendl
;
2576 void Client::handle_fs_map(MFSMap
*m
)
2578 fsmap
.reset(new FSMap(m
->get_fsmap()));
2581 signal_cond_list(waiting_for_fsmap
);
2583 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2586 void Client::handle_fs_map_user(MFSMapUser
*m
)
2588 fsmap_user
.reset(new FSMapUser
);
2589 *fsmap_user
= m
->get_fsmap();
2592 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2593 signal_cond_list(waiting_for_fsmap
);
2596 void Client::handle_mds_map(MMDSMap
* m
)
2598 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2599 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch()
2600 << " is identical to or older than our "
2601 << mdsmap
->get_epoch() << dendl
;
2606 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch() << dendl
;
2608 std::unique_ptr
<MDSMap
> oldmap(new MDSMap
);
2609 oldmap
.swap(mdsmap
);
2611 mdsmap
->decode(m
->get_encoded());
2613 // Cancel any commands for missing or laggy GIDs
2614 std::list
<ceph_tid_t
> cancel_ops
;
2615 auto &commands
= command_table
.get_commands();
2616 for (const auto &i
: commands
) {
2617 auto &op
= i
.second
;
2618 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2619 if (mdsmap
->is_dne_gid(op_mds_gid
) || mdsmap
->is_laggy_gid(op_mds_gid
)) {
2620 ldout(cct
, 1) << __func__
<< ": cancelling command op " << i
.first
<< dendl
;
2621 cancel_ops
.push_back(i
.first
);
2623 std::ostringstream ss
;
2624 ss
<< "MDS " << op_mds_gid
<< " went away";
2625 *(op
.outs
) = ss
.str();
2627 op
.con
->mark_down();
2629 op
.on_finish
->complete(-ETIMEDOUT
);
2634 for (std::list
<ceph_tid_t
>::iterator i
= cancel_ops
.begin();
2635 i
!= cancel_ops
.end(); ++i
) {
2636 command_table
.erase(*i
);
2640 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2641 p
!= mds_sessions
.end(); ) {
2642 mds_rank_t mds
= p
->first
;
2643 MetaSession
*session
= p
->second
;
2646 int oldstate
= oldmap
->get_state(mds
);
2647 int newstate
= mdsmap
->get_state(mds
);
2648 if (!mdsmap
->is_up(mds
)) {
2649 session
->con
->mark_down();
2650 } else if (mdsmap
->get_inst(mds
) != session
->inst
) {
2651 session
->con
->mark_down();
2652 session
->inst
= mdsmap
->get_inst(mds
);
2653 // When new MDS starts to take over, notify kernel to trim unused entries
2654 // in its dcache/icache. Hopefully, the kernel will release some unused
2655 // inodes before the new MDS enters reconnect state.
2656 trim_cache_for_reconnect(session
);
2657 } else if (oldstate
== newstate
)
2658 continue; // no change
2660 session
->mds_state
= newstate
;
2661 if (newstate
== MDSMap::STATE_RECONNECT
) {
2662 session
->con
= messenger
->get_connection(session
->inst
);
2663 send_reconnect(session
);
2664 } else if (newstate
>= MDSMap::STATE_ACTIVE
) {
2665 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2666 // kick new requests
2667 kick_requests(session
);
2668 kick_flushing_caps(session
);
2669 signal_context_list(session
->waiting_for_open
);
2670 kick_maxsize_requests(session
);
2671 wake_inode_waiters(session
);
2673 connect_mds_targets(mds
);
2674 } else if (newstate
== MDSMap::STATE_NULL
&&
2675 mds
>= mdsmap
->get_max_mds()) {
2676 _closed_mds_session(session
);
2680 // kick any waiting threads
2681 signal_cond_list(waiting_for_mdsmap
);
2685 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2688 void Client::send_reconnect(MetaSession
*session
)
2690 mds_rank_t mds
= session
->mds_num
;
2691 ldout(cct
, 10) << "send_reconnect to mds." << mds
<< dendl
;
2693 // trim unused caps to reduce MDS's cache rejoin time
2694 trim_cache_for_reconnect(session
);
2696 session
->readonly
= false;
2698 if (session
->release
) {
2699 session
->release
->put();
2700 session
->release
= NULL
;
2703 // reset my cap seq number
2705 //connect to the mds' offload targets
2706 connect_mds_targets(mds
);
2707 //make sure unsafe requests get saved
2708 resend_unsafe_requests(session
);
2710 MClientReconnect
*m
= new MClientReconnect
;
2712 // i have an open session.
2713 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2714 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2715 p
!= inode_map
.end();
2717 Inode
*in
= p
->second
;
2718 if (in
->caps
.count(mds
)) {
2719 ldout(cct
, 10) << " caps on " << p
->first
2720 << " " << ccap_string(in
->caps
[mds
]->issued
)
2721 << " wants " << ccap_string(in
->caps_wanted())
2724 in
->make_long_path(path
);
2725 ldout(cct
, 10) << " path " << path
<< dendl
;
2728 _encode_filelocks(in
, flockbl
);
2730 Cap
*cap
= in
->caps
[mds
];
2731 cap
->seq
= 0; // reset seq.
2732 cap
->issue_seq
= 0; // reset seq.
2733 cap
->mseq
= 0; // reset seq.
2734 cap
->issued
= cap
->implemented
;
2736 snapid_t snap_follows
= 0;
2737 if (!in
->cap_snaps
.empty())
2738 snap_follows
= in
->cap_snaps
.begin()->first
;
2740 m
->add_cap(p
->first
.ino
,
2742 path
.get_ino(), path
.get_path(), // ino
2743 in
->caps_wanted(), // wanted
2744 cap
->issued
, // issued
2749 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2750 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2751 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2752 did_snaprealm
.insert(in
->snaprealm
->ino
);
2757 early_kick_flushing_caps(session
);
2759 session
->con
->send_message(m
);
2761 mount_cond
.Signal();
2765 void Client::kick_requests(MetaSession
*session
)
2767 ldout(cct
, 10) << "kick_requests for mds." << session
->mds_num
<< dendl
;
2768 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2769 p
!= mds_requests
.end();
2771 MetaRequest
*req
= p
->second
;
2772 if (req
->got_unsafe
)
2774 if (req
->aborted()) {
2775 if (req
->caller_cond
) {
2777 req
->caller_cond
->Signal();
2781 if (req
->retry_attempt
> 0)
2782 continue; // new requests only
2783 if (req
->mds
== session
->mds_num
) {
2784 send_request(p
->second
, session
);
2789 void Client::resend_unsafe_requests(MetaSession
*session
)
2791 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
2794 send_request(*iter
, session
);
2796 // also re-send old requests when MDS enters reconnect stage. So that MDS can
2797 // process completed requests in clientreplay stage.
2798 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2799 p
!= mds_requests
.end();
2801 MetaRequest
*req
= p
->second
;
2802 if (req
->got_unsafe
)
2806 if (req
->retry_attempt
== 0)
2807 continue; // old requests only
2808 if (req
->mds
== session
->mds_num
)
2809 send_request(req
, session
, true);
2813 void Client::wait_unsafe_requests()
2815 list
<MetaRequest
*> last_unsafe_reqs
;
2816 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2817 p
!= mds_sessions
.end();
2819 MetaSession
*s
= p
->second
;
2820 if (!s
->unsafe_requests
.empty()) {
2821 MetaRequest
*req
= s
->unsafe_requests
.back();
2823 last_unsafe_reqs
.push_back(req
);
2827 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
2828 p
!= last_unsafe_reqs
.end();
2830 MetaRequest
*req
= *p
;
2831 if (req
->unsafe_item
.is_on_list())
2832 wait_on_list(req
->waitfor_safe
);
2837 void Client::kick_requests_closed(MetaSession
*session
)
2839 ldout(cct
, 10) << "kick_requests_closed for mds." << session
->mds_num
<< dendl
;
2840 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2841 p
!= mds_requests
.end(); ) {
2842 MetaRequest
*req
= p
->second
;
2844 if (req
->mds
== session
->mds_num
) {
2845 if (req
->caller_cond
) {
2847 req
->caller_cond
->Signal();
2849 req
->item
.remove_myself();
2850 if (req
->got_unsafe
) {
2851 lderr(cct
) << "kick_requests_closed removing unsafe request " << req
->get_tid() << dendl
;
2852 req
->unsafe_item
.remove_myself();
2853 req
->unsafe_dir_item
.remove_myself();
2854 req
->unsafe_target_item
.remove_myself();
2855 signal_cond_list(req
->waitfor_safe
);
2856 unregister_request(req
);
2860 assert(session
->requests
.empty());
2861 assert(session
->unsafe_requests
.empty());
2871 void Client::got_mds_push(MetaSession
*s
)
2874 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
2875 if (s
->state
== MetaSession::STATE_CLOSING
) {
2876 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2880 void Client::handle_lease(MClientLease
*m
)
2882 ldout(cct
, 10) << "handle_lease " << *m
<< dendl
;
2884 assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
2886 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
2887 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
2893 got_mds_push(session
);
2895 ceph_seq_t seq
= m
->get_seq();
2898 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
2899 if (inode_map
.count(vino
) == 0) {
2900 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
2903 in
= inode_map
[vino
];
2905 if (m
->get_mask() & CEPH_LOCK_DN
) {
2906 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
2907 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
2910 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
2911 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
2916 m
->get_connection()->send_message(
2918 CEPH_MDS_LEASE_RELEASE
, seq
,
2919 m
->get_mask(), m
->get_ino(), m
->get_first(), m
->get_last(), m
->dname
));
2923 void Client::put_inode(Inode
*in
, int n
)
2925 ldout(cct
, 10) << "put_inode on " << *in
<< dendl
;
2926 int left
= in
->_put(n
);
2929 remove_all_caps(in
);
2931 ldout(cct
, 10) << "put_inode deleting " << *in
<< dendl
;
2932 bool unclean
= objectcacher
->release_set(&in
->oset
);
2934 inode_map
.erase(in
->vino());
2935 if (use_faked_inos())
2936 _release_faked_ino(in
);
2941 while (!root_parents
.empty())
2942 root_parents
.erase(root_parents
.begin());
2949 void Client::close_dir(Dir
*dir
)
2951 Inode
*in
= dir
->parent_inode
;
2952 ldout(cct
, 15) << "close_dir dir " << dir
<< " on " << in
<< dendl
;
2953 assert(dir
->is_empty());
2954 assert(in
->dir
== dir
);
2955 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
2956 if (!in
->dn_set
.empty())
2957 in
->get_first_parent()->put(); // unpin dentry
2961 put_inode(in
); // unpin inode
2965 * Don't call this with in==NULL, use get_or_create for that
2966 * leave dn set to default NULL unless you're trying to add
2967 * a new inode to a pre-created Dentry
2969 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
2972 // create a new Dentry
2978 dir
->dentries
[dn
->name
] = dn
;
2979 lru
.lru_insert_mid(dn
); // mid or top?
2981 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2982 << " dn " << dn
<< " (new dn)" << dendl
;
2984 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2985 << " dn " << dn
<< " (old dn)" << dendl
;
2988 if (in
) { // link to inode
2992 dn
->get(); // dir -> dn pin
2994 dn
->get(); // ll_ref -> dn pin
2997 assert(in
->dn_set
.count(dn
) == 0);
2999 // only one parent for directories!
3000 if (in
->is_dir() && !in
->dn_set
.empty()) {
3001 Dentry
*olddn
= in
->get_first_parent();
3002 assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
3003 Inode
*old_diri
= olddn
->dir
->parent_inode
;
3004 old_diri
->dir_release_count
++;
3005 clear_dir_complete_and_ordered(old_diri
, true);
3006 unlink(olddn
, true, true); // keep dir, dentry
3009 in
->dn_set
.insert(dn
);
3011 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3017 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3021 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3022 << " inode " << dn
->inode
<< dendl
;
3024 // unlink from inode
3028 dn
->put(); // dir -> dn pin
3030 dn
->put(); // ll_ref -> dn pin
3033 assert(in
->dn_set
.count(dn
));
3034 in
->dn_set
.erase(dn
);
3035 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3041 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3044 dn
->dir
->dentries
.erase(dn
->name
);
3045 if (dn
->dir
->is_empty() && !keepdir
)
3056 * For asynchronous flushes, check for errors from the IO and
3057 * update the inode if necessary
3059 class C_Client_FlushComplete
: public Context
{
3064 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3065 void finish(int r
) override
{
3066 assert(client
->client_lock
.is_locked_by_me());
3068 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3069 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3070 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3071 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3072 inode
->set_async_err(r
);
3082 void Client::get_cap_ref(Inode
*in
, int cap
)
3084 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3085 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3086 ldout(cct
, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in
<< dendl
;
3089 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3090 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3091 ldout(cct
, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in
<< dendl
;
3094 in
->get_cap_ref(cap
);
3097 void Client::put_cap_ref(Inode
*in
, int cap
)
3099 int last
= in
->put_cap_ref(cap
);
3102 int drop
= last
& ~in
->caps_issued();
3103 if (in
->snapid
== CEPH_NOSNAP
) {
3104 if ((last
& CEPH_CAP_FILE_WR
) &&
3105 !in
->cap_snaps
.empty() &&
3106 in
->cap_snaps
.rbegin()->second
.writing
) {
3107 ldout(cct
, 10) << "put_cap_ref finishing pending cap_snap on " << *in
<< dendl
;
3108 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3109 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3110 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3112 if (last
& CEPH_CAP_FILE_BUFFER
) {
3113 for (auto &p
: in
->cap_snaps
)
3114 p
.second
.dirty_data
= 0;
3115 signal_cond_list(in
->waitfor_commit
);
3116 ldout(cct
, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3120 if (last
& CEPH_CAP_FILE_CACHE
) {
3121 ldout(cct
, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in
<< dendl
;
3127 put_inode(in
, put_nref
);
3131 int Client::get_caps(Inode
*in
, int need
, int want
, int *phave
, loff_t endoff
)
3133 int r
= check_pool_perm(in
, need
);
3138 int file_wanted
= in
->caps_file_wanted();
3139 if ((file_wanted
& need
) != need
) {
3140 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3141 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3147 int have
= in
->caps_issued(&implemented
);
3149 bool waitfor_caps
= false;
3150 bool waitfor_commit
= false;
3152 if (have
& need
& CEPH_CAP_FILE_WR
) {
3154 (endoff
>= (loff_t
)in
->max_size
||
3155 endoff
> (loff_t
)(in
->size
<< 1)) &&
3156 endoff
> (loff_t
)in
->wanted_max_size
) {
3157 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3158 in
->wanted_max_size
= endoff
;
3162 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3163 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3164 waitfor_caps
= true;
3166 if (!in
->cap_snaps
.empty()) {
3167 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3168 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3169 waitfor_caps
= true;
3171 for (auto &p
: in
->cap_snaps
) {
3172 if (p
.second
.dirty_data
) {
3173 waitfor_commit
= true;
3177 if (waitfor_commit
) {
3178 _flush(in
, new C_Client_FlushComplete(this, in
));
3179 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3184 if (!waitfor_caps
&& !waitfor_commit
) {
3185 if ((have
& need
) == need
) {
3186 int revoking
= implemented
& ~have
;
3187 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3188 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3189 << " revoking " << ccap_string(revoking
)
3191 if ((revoking
& want
) == 0) {
3192 *phave
= need
| (have
& want
);
3193 in
->get_cap_ref(need
);
3197 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3198 waitfor_caps
= true;
3201 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3202 in
->auth_cap
->session
->readonly
)
3205 if (in
->flags
& I_CAP_DROPPED
) {
3206 int mds_wanted
= in
->caps_mds_wanted();
3207 if ((mds_wanted
& need
) != need
) {
3208 int ret
= _renew_caps(in
);
3213 if ((mds_wanted
& file_wanted
) ==
3214 (file_wanted
& (CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_WR
))) {
3215 in
->flags
&= ~I_CAP_DROPPED
;
3220 wait_on_list(in
->waitfor_caps
);
3221 else if (waitfor_commit
)
3222 wait_on_list(in
->waitfor_commit
);
3226 int Client::get_caps_used(Inode
*in
)
3228 unsigned used
= in
->caps_used();
3229 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3230 !objectcacher
->set_is_empty(&in
->oset
))
3231 used
|= CEPH_CAP_FILE_CACHE
;
3235 void Client::cap_delay_requeue(Inode
*in
)
3237 ldout(cct
, 10) << "cap_delay_requeue on " << *in
<< dendl
;
3238 in
->hold_caps_until
= ceph_clock_now();
3239 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3240 delayed_list
.push_back(&in
->delay_cap_item
);
3243 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3244 bool sync
, int used
, int want
, int retain
,
3245 int flush
, ceph_tid_t flush_tid
)
3247 int held
= cap
->issued
| cap
->implemented
;
3248 int revoking
= cap
->implemented
& ~cap
->issued
;
3249 retain
&= ~revoking
;
3250 int dropping
= cap
->issued
& ~retain
;
3251 int op
= CEPH_CAP_OP_UPDATE
;
3253 ldout(cct
, 10) << "send_cap " << *in
3254 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3255 << (sync
? " sync " : " async ")
3256 << " used " << ccap_string(used
)
3257 << " want " << ccap_string(want
)
3258 << " flush " << ccap_string(flush
)
3259 << " retain " << ccap_string(retain
)
3260 << " held "<< ccap_string(held
)
3261 << " revoking " << ccap_string(revoking
)
3262 << " dropping " << ccap_string(dropping
)
3265 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3266 const int would_have_issued
= cap
->issued
& retain
;
3267 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3269 // - tell the server we think issued is whatever they issued plus whatever we implemented
3270 // - leave what we have implemented in place
3271 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3272 cap
->issued
= cap
->issued
| cap
->implemented
;
3274 // Make an exception for revoking xattr caps: we are injecting
3275 // failure to release other caps, but allow xattr because client
3276 // will block on xattr ops if it can't release these to MDS (#9800)
3277 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3278 cap
->issued
^= xattr_mask
& revoking
;
3279 cap
->implemented
^= xattr_mask
& revoking
;
3281 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3282 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3285 cap
->issued
&= retain
;
3286 cap
->implemented
&= cap
->issued
| used
;
3289 snapid_t follows
= 0;
3292 follows
= in
->snaprealm
->get_snap_context().seq
;
3294 MClientCaps
*m
= new MClientCaps(op
,
3297 cap
->cap_id
, cap
->seq
,
3303 m
->caller_uid
= in
->cap_dirtier_uid
;
3304 m
->caller_gid
= in
->cap_dirtier_gid
;
3306 m
->head
.issue_seq
= cap
->issue_seq
;
3307 m
->set_tid(flush_tid
);
3309 m
->head
.uid
= in
->uid
;
3310 m
->head
.gid
= in
->gid
;
3311 m
->head
.mode
= in
->mode
;
3313 m
->head
.nlink
= in
->nlink
;
3315 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3316 ::encode(in
->xattrs
, m
->xattrbl
);
3317 m
->head
.xattr_version
= in
->xattr_version
;
3321 m
->max_size
= in
->max_size
;
3322 m
->truncate_seq
= in
->truncate_seq
;
3323 m
->truncate_size
= in
->truncate_size
;
3324 m
->mtime
= in
->mtime
;
3325 m
->atime
= in
->atime
;
3326 m
->ctime
= in
->ctime
;
3327 m
->btime
= in
->btime
;
3328 m
->time_warp_seq
= in
->time_warp_seq
;
3329 m
->change_attr
= in
->change_attr
;
3331 m
->flags
|= CLIENT_CAPS_SYNC
;
3333 if (flush
& CEPH_CAP_FILE_WR
) {
3334 m
->inline_version
= in
->inline_version
;
3335 m
->inline_data
= in
->inline_data
;
3338 in
->reported_size
= in
->size
;
3339 m
->set_snap_follows(follows
);
3341 if (cap
== in
->auth_cap
) {
3342 m
->set_max_size(in
->wanted_max_size
);
3343 in
->requested_max_size
= in
->wanted_max_size
;
3344 ldout(cct
, 15) << "auth cap, setting max_size = " << in
->requested_max_size
<< dendl
;
3347 if (!session
->flushing_caps_tids
.empty())
3348 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3350 session
->con
->send_message(m
);
3353 static bool is_max_size_approaching(Inode
*in
)
3355 /* mds will adjust max size according to the reported size */
3356 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3358 if (in
->size
>= in
->max_size
)
3360 /* half of previous max_size increment has been used */
3361 if (in
->max_size
> in
->reported_size
&&
3362 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3370 * Examine currently used and wanted versus held caps. Release, flush or ack
3371 * revoked caps to the MDS as appropriate.
3373 * @param in the inode to check
3374 * @param flags flags to apply to cap check
3376 void Client::check_caps(Inode
*in
, unsigned flags
)
3378 unsigned wanted
= in
->caps_wanted();
3379 unsigned used
= get_caps_used(in
);
3382 if (in
->is_dir() && (in
->flags
& I_COMPLETE
)) {
3383 // we do this here because we don't want to drop to Fs (and then
3384 // drop the Fs if we do a create!) if that alone makes us send lookups
3385 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3386 wanted
|= CEPH_CAP_FILE_EXCL
;
3390 int issued
= in
->caps_issued(&implemented
);
3391 int revoking
= implemented
& ~issued
;
3393 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3396 retain
|= CEPH_CAP_ANY
;
3398 retain
|= CEPH_CAP_ANY_SHARED
;
3401 ldout(cct
, 10) << "check_caps on " << *in
3402 << " wanted " << ccap_string(wanted
)
3403 << " used " << ccap_string(used
)
3404 << " issued " << ccap_string(issued
)
3405 << " revoking " << ccap_string(revoking
)
3406 << " flags=" << flags
3409 if (in
->snapid
!= CEPH_NOSNAP
)
3410 return; //snap caps last forever, can't write
3412 if (in
->caps
.empty())
3413 return; // guard if at end of func
3415 if ((revoking
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) &&
3416 (used
& CEPH_CAP_FILE_CACHE
) && !(used
& CEPH_CAP_FILE_BUFFER
)) {
3418 used
&= ~CEPH_CAP_FILE_CACHE
;
3421 if (!in
->cap_snaps
.empty())
3424 if (flags
& CHECK_CAPS_NODELAY
)
3425 in
->hold_caps_until
= utime_t();
3427 cap_delay_requeue(in
);
3429 utime_t now
= ceph_clock_now();
3431 map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin();
3432 while (it
!= in
->caps
.end()) {
3433 mds_rank_t mds
= it
->first
;
3434 Cap
*cap
= it
->second
;
3437 MetaSession
*session
= mds_sessions
[mds
];
3441 if (in
->auth_cap
&& cap
!= in
->auth_cap
)
3442 cap_used
&= ~in
->auth_cap
->issued
;
3444 revoking
= cap
->implemented
& ~cap
->issued
;
3446 ldout(cct
, 10) << " cap mds." << mds
3447 << " issued " << ccap_string(cap
->issued
)
3448 << " implemented " << ccap_string(cap
->implemented
)
3449 << " revoking " << ccap_string(revoking
) << dendl
;
3451 if (in
->wanted_max_size
> in
->max_size
&&
3452 in
->wanted_max_size
> in
->requested_max_size
&&
3453 cap
== in
->auth_cap
)
3456 /* approaching file_max? */
3457 if ((cap
->issued
& CEPH_CAP_FILE_WR
) &&
3458 cap
== in
->auth_cap
&&
3459 is_max_size_approaching(in
)) {
3460 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3461 << ", reported " << in
->reported_size
<< dendl
;
3465 /* completed revocation? */
3466 if (revoking
&& (revoking
& cap_used
) == 0) {
3467 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
->implemented
& ~cap
->issued
) << dendl
;
3471 /* want more caps from mds? */
3472 if (wanted
& ~(cap
->wanted
| cap
->issued
))
3475 if (!revoking
&& unmounting
&& (cap_used
== 0))
3478 if (wanted
== cap
->wanted
&& // mds knows what we want.
3479 ((cap
->issued
& ~retain
) == 0) &&// and we don't have anything we wouldn't like
3480 !in
->dirty_caps
) // and we have no dirty caps
3483 if (now
< in
->hold_caps_until
) {
3484 ldout(cct
, 10) << "delaying cap release" << dendl
;
3489 // re-send old cap/snapcap flushes first.
3490 if (session
->mds_state
>= MDSMap::STATE_RECONNECT
&&
3491 session
->mds_state
< MDSMap::STATE_ACTIVE
&&
3492 session
->early_flushing_caps
.count(in
) == 0) {
3493 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3494 << " to mds." << session
->mds_num
<< dendl
;
3495 session
->early_flushing_caps
.insert(in
);
3496 if (in
->cap_snaps
.size())
3497 flush_snaps(in
, true);
3498 if (in
->flushing_caps
)
3499 flush_caps(in
, session
, flags
& CHECK_CAPS_SYNCHRONOUS
);
3503 ceph_tid_t flush_tid
;
3504 if (in
->auth_cap
== cap
&& in
->dirty_caps
) {
3505 flushing
= mark_caps_flushing(in
, &flush_tid
);
3511 send_cap(in
, session
, cap
, flags
& CHECK_CAPS_SYNCHRONOUS
, cap_used
, wanted
,
3512 retain
, flushing
, flush_tid
);
3517 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3519 int used
= get_caps_used(in
);
3520 int dirty
= in
->caps_dirty();
3521 ldout(cct
, 10) << "queue_cap_snap " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3523 if (in
->cap_snaps
.size() &&
3524 in
->cap_snaps
.rbegin()->second
.writing
) {
3525 ldout(cct
, 10) << "queue_cap_snap already have pending cap_snap on " << *in
<< dendl
;
3527 } else if (in
->caps_dirty() ||
3528 (used
& CEPH_CAP_FILE_WR
) ||
3529 (dirty
& CEPH_CAP_ANY_WR
)) {
3530 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3531 assert(capsnapem
.second
== true); /* element inserted */
3532 CapSnap
&capsnap
= capsnapem
.first
->second
;
3533 capsnap
.context
= old_snapc
;
3534 capsnap
.issued
= in
->caps_issued();
3535 capsnap
.dirty
= in
->caps_dirty();
3537 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3539 capsnap
.uid
= in
->uid
;
3540 capsnap
.gid
= in
->gid
;
3541 capsnap
.mode
= in
->mode
;
3542 capsnap
.btime
= in
->btime
;
3543 capsnap
.xattrs
= in
->xattrs
;
3544 capsnap
.xattr_version
= in
->xattr_version
;
3546 if (used
& CEPH_CAP_FILE_WR
) {
3547 ldout(cct
, 10) << "queue_cap_snap WR used on " << *in
<< dendl
;
3548 capsnap
.writing
= 1;
3550 finish_cap_snap(in
, capsnap
, used
);
3553 ldout(cct
, 10) << "queue_cap_snap not dirty|writing on " << *in
<< dendl
;
3557 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3559 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3560 capsnap
.size
= in
->size
;
3561 capsnap
.mtime
= in
->mtime
;
3562 capsnap
.atime
= in
->atime
;
3563 capsnap
.ctime
= in
->ctime
;
3564 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3565 capsnap
.change_attr
= in
->change_attr
;
3567 capsnap
.dirty
|= in
->caps_dirty();
3569 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3570 capsnap
.inline_data
= in
->inline_data
;
3571 capsnap
.inline_version
= in
->inline_version
;
3574 if (used
& CEPH_CAP_FILE_BUFFER
) {
3575 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3576 << " WRBUFFER, delaying" << dendl
;
3578 capsnap
.dirty_data
= 0;
3583 void Client::_flushed_cap_snap(Inode
*in
, snapid_t seq
)
3585 ldout(cct
, 10) << "_flushed_cap_snap seq " << seq
<< " on " << *in
<< dendl
;
3586 in
->cap_snaps
.at(seq
).dirty_data
= 0;
3590 void Client::flush_snaps(Inode
*in
, bool all_again
)
3592 ldout(cct
, 10) << "flush_snaps on " << *in
<< " all_again " << all_again
<< dendl
;
3593 assert(in
->cap_snaps
.size());
3596 assert(in
->auth_cap
);
3597 MetaSession
*session
= in
->auth_cap
->session
;
3598 int mseq
= in
->auth_cap
->mseq
;
3600 for (auto &p
: in
->cap_snaps
) {
3601 CapSnap
&capsnap
= p
.second
;
3603 // only flush once per session
3604 if (capsnap
.flush_tid
> 0)
3608 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3609 << " follows " << p
.first
3610 << " size " << capsnap
.size
3611 << " mtime " << capsnap
.mtime
3612 << " dirty_data=" << capsnap
.dirty_data
3613 << " writing=" << capsnap
.writing
3614 << " on " << *in
<< dendl
;
3615 if (capsnap
.dirty_data
|| capsnap
.writing
)
3618 if (capsnap
.flush_tid
== 0) {
3619 capsnap
.flush_tid
= ++last_flush_tid
;
3620 if (!in
->flushing_cap_item
.is_on_list())
3621 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3622 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3625 MClientCaps
*m
= new MClientCaps(CEPH_CAP_OP_FLUSHSNAP
, in
->ino
, in
->snaprealm
->ino
, 0, mseq
,
3628 m
->caller_uid
= user_id
;
3630 m
->caller_gid
= group_id
;
3632 m
->set_client_tid(capsnap
.flush_tid
);
3633 m
->head
.snap_follows
= p
.first
;
3635 m
->head
.caps
= capsnap
.issued
;
3636 m
->head
.dirty
= capsnap
.dirty
;
3638 m
->head
.uid
= capsnap
.uid
;
3639 m
->head
.gid
= capsnap
.gid
;
3640 m
->head
.mode
= capsnap
.mode
;
3641 m
->btime
= capsnap
.btime
;
3643 m
->size
= capsnap
.size
;
3645 m
->head
.xattr_version
= capsnap
.xattr_version
;
3646 ::encode(capsnap
.xattrs
, m
->xattrbl
);
3648 m
->ctime
= capsnap
.ctime
;
3649 m
->btime
= capsnap
.btime
;
3650 m
->mtime
= capsnap
.mtime
;
3651 m
->atime
= capsnap
.atime
;
3652 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3653 m
->change_attr
= capsnap
.change_attr
;
3655 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3656 m
->inline_version
= in
->inline_version
;
3657 m
->inline_data
= in
->inline_data
;
3660 assert(!session
->flushing_caps_tids
.empty());
3661 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3663 session
->con
->send_message(m
);
3669 void Client::wait_on_list(list
<Cond
*>& ls
)
3672 ls
.push_back(&cond
);
3673 cond
.Wait(client_lock
);
3677 void Client::signal_cond_list(list
<Cond
*>& ls
)
3679 for (list
<Cond
*>::iterator it
= ls
.begin(); it
!= ls
.end(); ++it
)
3683 void Client::wait_on_context_list(list
<Context
*>& ls
)
3688 ls
.push_back(new C_Cond(&cond
, &done
, &r
));
3690 cond
.Wait(client_lock
);
3693 void Client::signal_context_list(list
<Context
*>& ls
)
3695 while (!ls
.empty()) {
3696 ls
.front()->complete(0);
3701 void Client::wake_inode_waiters(MetaSession
*s
)
3703 xlist
<Cap
*>::iterator iter
= s
->caps
.begin();
3704 while (!iter
.end()){
3705 signal_cond_list((*iter
)->inode
->waitfor_caps
);
3711 // flush dirty data (from objectcache)
3713 class C_Client_CacheInvalidate
: public Context
{
3717 int64_t offset
, length
;
3719 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
3720 client(c
), offset(off
), length(len
) {
3721 if (client
->use_faked_inos())
3722 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
3726 void finish(int r
) override
{
3727 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
3728 assert(!client
->client_lock
.is_locked_by_me());
3729 client
->_async_invalidate(ino
, offset
, length
);
3733 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
3737 ldout(cct
, 10) << "_async_invalidate " << ino
<< " " << off
<< "~" << len
<< dendl
;
3738 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
3741 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
3743 if (ino_invalidate_cb
)
3744 // we queue the invalidate, which calls the callback and decrements the ref
3745 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
3748 void Client::_invalidate_inode_cache(Inode
*in
)
3750 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< dendl
;
3752 // invalidate our userspace inode cache
3753 if (cct
->_conf
->client_oc
) {
3754 objectcacher
->release_set(&in
->oset
);
3755 if (!objectcacher
->set_is_empty(&in
->oset
))
3756 lderr(cct
) << "failed to invalidate cache for " << *in
<< dendl
;
3759 _schedule_invalidate_callback(in
, 0, 0);
3762 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
3764 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< " " << off
<< "~" << len
<< dendl
;
3766 // invalidate our userspace inode cache
3767 if (cct
->_conf
->client_oc
) {
3768 vector
<ObjectExtent
> ls
;
3769 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
3770 objectcacher
->discard_writeback(&in
->oset
, ls
, nullptr);
3773 _schedule_invalidate_callback(in
, off
, len
);
3776 bool Client::_release(Inode
*in
)
3778 ldout(cct
, 20) << "_release " << *in
<< dendl
;
3779 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3780 _invalidate_inode_cache(in
);
3786 bool Client::_flush(Inode
*in
, Context
*onfinish
)
3788 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
3790 if (!in
->oset
.dirty_or_tx
) {
3791 ldout(cct
, 10) << " nothing to flush" << dendl
;
3792 onfinish
->complete(0);
3796 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
3797 ldout(cct
, 1) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
3798 objectcacher
->purge_set(&in
->oset
);
3800 onfinish
->complete(-ENOSPC
);
3805 return objectcacher
->flush_set(&in
->oset
, onfinish
);
3808 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
3810 assert(client_lock
.is_locked());
3811 if (!in
->oset
.dirty_or_tx
) {
3812 ldout(cct
, 10) << " nothing to flush" << dendl
;
3816 Mutex
flock("Client::_flush_range flock");
3819 Context
*onflush
= new C_SafeCond(&flock
, &cond
, &safe
);
3820 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
3821 offset
, size
, onflush
);
3824 client_lock
.Unlock();
3833 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
3835 // Mutex::Locker l(client_lock);
3836 assert(client_lock
.is_locked()); // will be called via dispatch() -> objecter -> ...
3837 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
3842 void Client::_flushed(Inode
*in
)
3844 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
3846 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
3851 // checks common to add_update_cap, handle_cap_grant
3852 void Client::check_cap_issue(Inode
*in
, Cap
*cap
, unsigned issued
)
3854 unsigned had
= in
->caps_issued();
3856 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
3857 !(had
& CEPH_CAP_FILE_CACHE
))
3860 if ((issued
& CEPH_CAP_FILE_SHARED
) &&
3861 !(had
& CEPH_CAP_FILE_SHARED
)) {
3865 clear_dir_complete_and_ordered(in
, true);
3869 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
3870 unsigned issued
, unsigned seq
, unsigned mseq
, inodeno_t realm
,
3871 int flags
, const UserPerm
& cap_perms
)
3874 mds_rank_t mds
= mds_session
->mds_num
;
3875 if (in
->caps
.count(mds
)) {
3876 cap
= in
->caps
[mds
];
3879 * auth mds of the inode changed. we received the cap export
3880 * message, but still haven't received the cap import message.
3881 * handle_cap_export() updated the new auth MDS' cap.
3883 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
3884 * a message that was send before the cap import message. So
3885 * don't remove caps.
3887 if (ceph_seq_cmp(seq
, cap
->seq
) <= 0) {
3888 assert(cap
== in
->auth_cap
);
3889 assert(cap
->cap_id
== cap_id
);
3892 issued
|= cap
->issued
;
3893 flags
|= CEPH_CAP_FLAG_AUTH
;
3896 mds_session
->num_caps
++;
3897 if (!in
->is_any_caps()) {
3898 assert(in
->snaprealm
== 0);
3899 in
->snaprealm
= get_snap_realm(realm
);
3900 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
3901 ldout(cct
, 15) << "add_update_cap first one, opened snaprealm " << in
->snaprealm
<< dendl
;
3903 in
->caps
[mds
] = cap
= new Cap
;
3905 mds_session
->caps
.push_back(&cap
->cap_item
);
3906 cap
->session
= mds_session
;
3908 cap
->gen
= mds_session
->cap_gen
;
3911 check_cap_issue(in
, cap
, issued
);
3913 if (flags
& CEPH_CAP_FLAG_AUTH
) {
3914 if (in
->auth_cap
!= cap
&&
3915 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
3916 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
3917 ldout(cct
, 10) << "add_update_cap changing auth cap: "
3918 << "add myself to new auth MDS' flushing caps list" << dendl
;
3919 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
3925 unsigned old_caps
= cap
->issued
;
3926 cap
->cap_id
= cap_id
;
3927 cap
->issued
|= issued
;
3928 cap
->implemented
|= issued
;
3930 cap
->issue_seq
= seq
;
3932 cap
->gen
= mds_session
->cap_gen
;
3933 cap
->latest_perms
= cap_perms
;
3934 ldout(cct
, 10) << "add_update_cap issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
->issued
)
3935 << " from mds." << mds
3939 if ((issued
& ~old_caps
) && in
->auth_cap
== cap
) {
3940 // non-auth MDS is revoking the newly grant caps ?
3941 for (map
<mds_rank_t
,Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
3942 if (it
->second
== cap
)
3944 if (it
->second
->implemented
& ~it
->second
->issued
& issued
) {
3945 check_caps(in
, CHECK_CAPS_NODELAY
);
3951 if (issued
& ~old_caps
)
3952 signal_cond_list(in
->waitfor_caps
);
3955 void Client::remove_cap(Cap
*cap
, bool queue_release
)
3957 Inode
*in
= cap
->inode
;
3958 MetaSession
*session
= cap
->session
;
3959 mds_rank_t mds
= cap
->session
->mds_num
;
3961 ldout(cct
, 10) << "remove_cap mds." << mds
<< " on " << *in
<< dendl
;
3963 if (queue_release
) {
3964 session
->enqueue_cap_release(
3972 if (in
->auth_cap
== cap
) {
3973 if (in
->flushing_cap_item
.is_on_list()) {
3974 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
3975 in
->flushing_cap_item
.remove_myself();
3977 in
->auth_cap
= NULL
;
3979 assert(in
->caps
.count(mds
));
3980 in
->caps
.erase(mds
);
3982 cap
->cap_item
.remove_myself();
3986 if (!in
->is_any_caps()) {
3987 ldout(cct
, 15) << "remove_cap last one, closing snaprealm " << in
->snaprealm
<< dendl
;
3988 in
->snaprealm_item
.remove_myself();
3989 put_snap_realm(in
->snaprealm
);
3994 void Client::remove_all_caps(Inode
*in
)
3996 while (!in
->caps
.empty())
3997 remove_cap(in
->caps
.begin()->second
, true);
4000 void Client::remove_session_caps(MetaSession
*s
)
4002 ldout(cct
, 10) << "remove_session_caps mds." << s
->mds_num
<< dendl
;
4004 while (s
->caps
.size()) {
4005 Cap
*cap
= *s
->caps
.begin();
4006 Inode
*in
= cap
->inode
;
4007 bool dirty_caps
= false, cap_snaps
= false;
4008 if (in
->auth_cap
== cap
) {
4009 cap_snaps
= !in
->cap_snaps
.empty();
4010 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4011 in
->wanted_max_size
= 0;
4012 in
->requested_max_size
= 0;
4013 in
->flags
|= I_CAP_DROPPED
;
4015 remove_cap(cap
, false);
4016 signal_cond_list(in
->waitfor_caps
);
4018 InodeRef
tmp_ref(in
);
4019 in
->cap_snaps
.clear();
4022 lderr(cct
) << "remove_session_caps still has dirty|flushing caps on " << *in
<< dendl
;
4023 if (in
->flushing_caps
) {
4024 num_flushing_caps
--;
4025 in
->flushing_cap_tids
.clear();
4027 in
->flushing_caps
= 0;
4028 in
->mark_caps_clean();
4032 s
->flushing_caps_tids
.clear();
4036 int Client::_do_remount(void)
4039 int r
= remount_cb(callback_handle
);
4042 client_t whoami
= get_nodeid();
4045 "failed to remount (to trim kernel dentries): "
4046 "errno = " << e
<< " (" << strerror(e
) << ")" << dendl
;
4049 "failed to remount (to trim kernel dentries): "
4050 "return code = " << r
<< dendl
;
4052 bool should_abort
= cct
->_conf
->get_val
<bool>("client_die_on_failed_remount") ||
4053 cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate");
4054 if (should_abort
&& !unmounting
) {
4055 lderr(cct
) << "failed to remount for kernel dentry trimming; quitting!" << dendl
;
4062 class C_Client_Remount
: public Context
{
4066 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4067 void finish(int r
) override
{
4069 client
->_do_remount();
4073 void Client::_invalidate_kernel_dcache()
4077 if (can_invalidate_dentries
) {
4078 if (dentry_invalidate_cb
&& root
->dir
) {
4079 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4080 p
!= root
->dir
->dentries
.end();
4082 if (p
->second
->inode
)
4083 _schedule_invalidate_dentry_callback(p
->second
, false);
4086 } else if (remount_cb
) {
4088 // when remounting a file system, linux kernel trims all unused dentries in the fs
4089 remount_finisher
.queue(new C_Client_Remount(this));
4093 void Client::trim_caps(MetaSession
*s
, uint64_t max
)
4095 mds_rank_t mds
= s
->mds_num
;
4096 size_t caps_size
= s
->caps
.size();
4097 ldout(cct
, 10) << "trim_caps mds." << mds
<< " max " << max
4098 << " caps " << caps_size
<< dendl
;
4100 uint64_t trimmed
= 0;
4101 auto p
= s
->caps
.begin();
4102 std::set
<Dentry
*> to_trim
; /* this avoids caps other than the one we're
4103 * looking at from getting deleted during traversal. */
4104 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4106 InodeRef
in(cap
->inode
);
4108 // Increment p early because it will be invalidated if cap
4109 // is deleted inside remove_cap
4112 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4113 int mine
= cap
->issued
| cap
->implemented
;
4114 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4115 // disposable non-auth cap
4116 if (!(get_caps_used(in
.get()) & ~oissued
& mine
)) {
4117 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4118 cap
= (remove_cap(cap
, true), nullptr);
4122 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4124 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4125 while (q
!= in
->dn_set
.end()) {
4127 if (dn
->lru_is_expireable()) {
4128 if (can_invalidate_dentries
&&
4129 dn
->dir
->parent_inode
->ino
== MDS_INO_ROOT
) {
4130 // Only issue one of these per DN for inodes in root: handle
4131 // others more efficiently by calling for root-child DNs at
4132 // the end of this function.
4133 _schedule_invalidate_dentry_callback(dn
, true);
4135 ldout(cct
, 20) << " queueing dentry for trimming: " << dn
->name
<< dendl
;
4138 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4142 if (all
&& in
->ino
!= MDS_INO_ROOT
) {
4143 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4148 ldout(cct
, 20) << " trimming queued dentries: " << dendl
;
4149 for (const auto &dn
: to_trim
) {
4154 caps_size
= s
->caps
.size();
4155 if (caps_size
> max
)
4156 _invalidate_kernel_dcache();
4159 void Client::force_session_readonly(MetaSession
*s
)
4162 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4163 Inode
*in
= (*p
)->inode
;
4164 if (in
->caps_wanted() & CEPH_CAP_FILE_WR
)
4165 signal_cond_list(in
->waitfor_caps
);
4169 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4171 MetaSession
*session
= in
->auth_cap
->session
;
4173 int flushing
= in
->dirty_caps
;
4176 ceph_tid_t flush_tid
= ++last_flush_tid
;
4177 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4179 if (!in
->flushing_caps
) {
4180 ldout(cct
, 10) << "mark_caps_flushing " << ccap_string(flushing
) << " " << *in
<< dendl
;
4181 num_flushing_caps
++;
4183 ldout(cct
, 10) << "mark_caps_flushing (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4186 in
->flushing_caps
|= flushing
;
4187 in
->mark_caps_clean();
4189 if (!in
->flushing_cap_item
.is_on_list())
4190 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4191 session
->flushing_caps_tids
.insert(flush_tid
);
4197 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4199 for (auto &p
: in
->cap_snaps
) {
4200 CapSnap
&capsnap
= p
.second
;
4201 if (capsnap
.flush_tid
> 0) {
4202 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4203 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4206 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4207 it
!= in
->flushing_cap_tids
.end();
4209 old_s
->flushing_caps_tids
.erase(it
->first
);
4210 new_s
->flushing_caps_tids
.insert(it
->first
);
4212 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4216 * Flush all caps back to the MDS. Because the callers generally wait on the
4217 * result of this function (syncfs and umount cases), we set
4218 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4220 void Client::flush_caps_sync()
4222 ldout(cct
, 10) << __func__
<< dendl
;
4223 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
4225 unsigned flags
= CHECK_CAPS_NODELAY
;
4229 delayed_list
.pop_front();
4230 if (p
.end() && dirty_list
.empty())
4231 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4232 check_caps(in
, flags
);
4236 p
= dirty_list
.begin();
4238 unsigned flags
= CHECK_CAPS_NODELAY
;
4243 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4244 check_caps(in
, flags
);
4248 void Client::flush_caps(Inode
*in
, MetaSession
*session
, bool sync
)
4250 ldout(cct
, 10) << "flush_caps " << in
<< " mds." << session
->mds_num
<< dendl
;
4251 Cap
*cap
= in
->auth_cap
;
4252 assert(cap
->session
== session
);
4254 for (map
<ceph_tid_t
,int>::iterator p
= in
->flushing_cap_tids
.begin();
4255 p
!= in
->flushing_cap_tids
.end();
4257 bool req_sync
= false;
4259 /* If this is a synchronous request, then flush the journal on last one */
4260 if (sync
&& (p
->first
== in
->flushing_cap_tids
.rbegin()->first
))
4263 send_cap(in
, session
, cap
, req_sync
,
4264 (get_caps_used(in
) | in
->caps_dirty()),
4265 in
->caps_wanted(), (cap
->issued
| cap
->implemented
),
4266 p
->second
, p
->first
);
4270 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4272 while (in
->flushing_caps
) {
4273 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4274 assert(it
!= in
->flushing_cap_tids
.end());
4275 if (it
->first
> want
)
4277 ldout(cct
, 10) << "wait_sync_caps on " << *in
<< " flushing "
4278 << ccap_string(it
->second
) << " want " << want
4279 << " last " << it
->first
<< dendl
;
4280 wait_on_list(in
->waitfor_caps
);
4284 void Client::wait_sync_caps(ceph_tid_t want
)
4287 ldout(cct
, 10) << "wait_sync_caps want " << want
<< " (last is " << last_flush_tid
<< ", "
4288 << num_flushing_caps
<< " total flushing)" << dendl
;
4289 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
4290 p
!= mds_sessions
.end();
4292 MetaSession
*s
= p
->second
;
4293 if (s
->flushing_caps_tids
.empty())
4295 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4296 if (oldest_tid
<= want
) {
4297 ldout(cct
, 10) << " waiting on mds." << p
->first
<< " tid " << oldest_tid
4298 << " (want " << want
<< ")" << dendl
;
4299 sync_cond
.Wait(client_lock
);
4305 void Client::kick_flushing_caps(MetaSession
*session
)
4307 mds_rank_t mds
= session
->mds_num
;
4308 ldout(cct
, 10) << "kick_flushing_caps mds." << mds
<< dendl
;
4310 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4312 if (session
->early_flushing_caps
.count(in
))
4314 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4315 if (in
->cap_snaps
.size())
4316 flush_snaps(in
, true);
4317 if (in
->flushing_caps
)
4318 flush_caps(in
, session
);
4321 session
->early_flushing_caps
.clear();
4324 void Client::early_kick_flushing_caps(MetaSession
*session
)
4326 session
->early_flushing_caps
.clear();
4328 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4330 assert(in
->auth_cap
);
4332 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4333 // stage. This guarantees that MDS processes the cap flush message before issuing
4334 // the flushing caps to other client.
4335 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
)
4338 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4339 << " to mds." << session
->mds_num
<< dendl
;
4341 session
->early_flushing_caps
.insert(in
);
4343 if (in
->cap_snaps
.size())
4344 flush_snaps(in
, true);
4345 if (in
->flushing_caps
)
4346 flush_caps(in
, session
);
4351 void Client::kick_maxsize_requests(MetaSession
*session
)
4353 xlist
<Cap
*>::iterator iter
= session
->caps
.begin();
4354 while (!iter
.end()){
4355 (*iter
)->inode
->requested_max_size
= 0;
4356 (*iter
)->inode
->wanted_max_size
= 0;
4357 signal_cond_list((*iter
)->inode
->waitfor_caps
);
4362 void SnapRealm::build_snap_context()
4364 set
<snapid_t
> snaps
;
4365 snapid_t max_seq
= seq
;
4367 // start with prior_parents?
4368 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4369 snaps
.insert(prior_parent_snaps
[i
]);
4371 // current parent's snaps
4373 const SnapContext
& psnapc
= pparent
->get_snap_context();
4374 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4375 if (psnapc
.snaps
[i
] >= parent_since
)
4376 snaps
.insert(psnapc
.snaps
[i
]);
4377 if (psnapc
.seq
> max_seq
)
4378 max_seq
= psnapc
.seq
;
4382 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4383 snaps
.insert(my_snaps
[i
]);
4386 cached_snap_context
.seq
= max_seq
;
4387 cached_snap_context
.snaps
.resize(0);
4388 cached_snap_context
.snaps
.reserve(snaps
.size());
4389 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4390 cached_snap_context
.snaps
.push_back(*p
);
4393 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4398 while (!q
.empty()) {
4402 ldout(cct
, 10) << "invalidate_snaprealm_and_children " << *realm
<< dendl
;
4403 realm
->invalidate_cache();
4405 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4406 p
!= realm
->pchildren
.end();
4412 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4414 SnapRealm
*realm
= snap_realms
[r
];
4416 snap_realms
[r
] = realm
= new SnapRealm(r
);
4417 ldout(cct
, 20) << "get_snap_realm " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4422 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4424 if (snap_realms
.count(r
) == 0) {
4425 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " fail" << dendl
;
4428 SnapRealm
*realm
= snap_realms
[r
];
4429 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4434 void Client::put_snap_realm(SnapRealm
*realm
)
4436 ldout(cct
, 20) << "put_snap_realm " << realm
->ino
<< " " << realm
4437 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4438 if (--realm
->nref
== 0) {
4439 snap_realms
.erase(realm
->ino
);
4440 if (realm
->pparent
) {
4441 realm
->pparent
->pchildren
.erase(realm
);
4442 put_snap_realm(realm
->pparent
);
4448 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4450 if (realm
->parent
!= parent
) {
4451 ldout(cct
, 10) << "adjust_realm_parent " << *realm
4452 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4453 realm
->parent
= parent
;
4454 if (realm
->pparent
) {
4455 realm
->pparent
->pchildren
.erase(realm
);
4456 put_snap_realm(realm
->pparent
);
4458 realm
->pparent
= get_snap_realm(parent
);
4459 realm
->pparent
->pchildren
.insert(realm
);
4465 static bool has_new_snaps(const SnapContext
& old_snapc
,
4466 const SnapContext
& new_snapc
)
4468 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4472 void Client::update_snap_trace(bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4474 SnapRealm
*first_realm
= NULL
;
4475 ldout(cct
, 10) << "update_snap_trace len " << bl
.length() << dendl
;
4477 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4479 bufferlist::iterator p
= bl
.begin();
4483 SnapRealm
*realm
= get_snap_realm(info
.ino());
4485 bool invalidate
= false;
4487 if (info
.seq() > realm
->seq
) {
4488 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4492 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4493 // flush me + children
4496 while (!q
.empty()) {
4497 SnapRealm
*realm
= q
.front();
4500 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4501 p
!= realm
->pchildren
.end();
4505 if (dirty_realms
.count(realm
) == 0) {
4507 dirty_realms
[realm
] = realm
->get_snap_context();
4513 realm
->seq
= info
.seq();
4514 realm
->created
= info
.created();
4515 realm
->parent_since
= info
.parent_since();
4516 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4517 realm
->my_snaps
= info
.my_snaps
;
4521 // _always_ verify parent
4522 if (adjust_realm_parent(realm
, info
.parent()))
4526 invalidate_snaprealm_and_children(realm
);
4527 ldout(cct
, 15) << "update_snap_trace " << *realm
<< " self|parent updated" << dendl
;
4528 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4530 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq()
4531 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4535 first_realm
= realm
;
4537 put_snap_realm(realm
);
4540 for (map
<SnapRealm
*, SnapContext
>::iterator q
= dirty_realms
.begin();
4541 q
!= dirty_realms
.end();
4543 SnapRealm
*realm
= q
->first
;
4544 // if there are new snaps ?
4545 if (has_new_snaps(q
->second
, realm
->get_snap_context())) {
4546 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4547 xlist
<Inode
*>::iterator r
= realm
->inodes_with_caps
.begin();
4551 queue_cap_snap(in
, q
->second
);
4554 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4556 put_snap_realm(realm
);
4560 *realm_ret
= first_realm
;
4562 put_snap_realm(first_realm
);
4565 void Client::handle_snap(MClientSnap
*m
)
4567 ldout(cct
, 10) << "handle_snap " << *m
<< dendl
;
4568 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4569 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4575 got_mds_push(session
);
4577 map
<Inode
*, SnapContext
> to_move
;
4578 SnapRealm
*realm
= 0;
4580 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
4581 assert(m
->head
.split
);
4583 bufferlist::iterator p
= m
->bl
.begin();
4585 assert(info
.ino() == m
->head
.split
);
4587 // flush, then move, ino's.
4588 realm
= get_snap_realm(info
.ino());
4589 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
4590 for (vector
<inodeno_t
>::iterator p
= m
->split_inos
.begin();
4591 p
!= m
->split_inos
.end();
4593 vinodeno_t
vino(*p
, CEPH_NOSNAP
);
4594 if (inode_map
.count(vino
)) {
4595 Inode
*in
= inode_map
[vino
];
4596 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
4598 if (in
->snaprealm
->created
> info
.created()) {
4599 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
4600 << *in
->snaprealm
<< dendl
;
4603 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
4606 in
->snaprealm_item
.remove_myself();
4607 to_move
[in
] = in
->snaprealm
->get_snap_context();
4608 put_snap_realm(in
->snaprealm
);
4612 // move child snaprealms, too
4613 for (vector
<inodeno_t
>::iterator p
= m
->split_realms
.begin();
4614 p
!= m
->split_realms
.end();
4616 ldout(cct
, 10) << "adjusting snaprealm " << *p
<< " parent" << dendl
;
4617 SnapRealm
*child
= get_snap_realm_maybe(*p
);
4620 adjust_realm_parent(child
, realm
->ino
);
4621 put_snap_realm(child
);
4625 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
4628 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
4629 Inode
*in
= p
->first
;
4630 in
->snaprealm
= realm
;
4631 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4633 // queue for snap writeback
4634 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
4635 queue_cap_snap(in
, p
->second
);
4637 put_snap_realm(realm
);
4643 void Client::handle_quota(MClientQuota
*m
)
4645 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4646 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4652 got_mds_push(session
);
4654 ldout(cct
, 10) << "handle_quota " << *m
<< " from mds." << mds
<< dendl
;
4656 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
4657 if (inode_map
.count(vino
)) {
4659 in
= inode_map
[vino
];
4662 in
->quota
= m
->quota
;
4663 in
->rstat
= m
->rstat
;
4670 void Client::handle_caps(MClientCaps
*m
)
4672 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4673 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4679 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
4680 // Pause RADOS operations until we see the required epoch
4681 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
4684 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
4685 // Record the barrier so that we will transmit it to MDS when releasing
4686 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
4689 got_mds_push(session
);
4691 m
->clear_payload(); // for if/when we send back to MDS
4694 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
4695 if (inode_map
.count(vino
))
4696 in
= inode_map
[vino
];
4698 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
4699 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
4700 session
->enqueue_cap_release(
4707 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< ", dropping" << dendl
;
4711 // in case the mds is waiting on e.g. a revocation
4712 flush_cap_releases();
4716 switch (m
->get_op()) {
4717 case CEPH_CAP_OP_EXPORT
:
4718 return handle_cap_export(session
, in
, m
);
4719 case CEPH_CAP_OP_FLUSHSNAP_ACK
:
4720 return handle_cap_flushsnap_ack(session
, in
, m
);
4721 case CEPH_CAP_OP_IMPORT
:
4722 handle_cap_import(session
, in
, m
);
4725 if (in
->caps
.count(mds
) == 0) {
4726 ldout(cct
, 5) << "handle_caps don't have " << *in
<< " cap on mds." << mds
<< dendl
;
4731 Cap
*cap
= in
->caps
[mds
];
4733 switch (m
->get_op()) {
4734 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
4735 case CEPH_CAP_OP_IMPORT
:
4736 case CEPH_CAP_OP_REVOKE
:
4737 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, cap
, m
);
4738 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, cap
, m
);
4744 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4746 mds_rank_t mds
= session
->mds_num
;
4748 ldout(cct
, 5) << "handle_cap_import ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4749 << " IMPORT from mds." << mds
<< dendl
;
4751 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4754 if (m
->peer
.cap_id
&& in
->caps
.count(peer_mds
)) {
4755 cap
= in
->caps
[peer_mds
];
4757 cap_perms
= cap
->latest_perms
;
4762 SnapRealm
*realm
= NULL
;
4763 update_snap_trace(m
->snapbl
, &realm
);
4765 add_update_cap(in
, session
, m
->get_cap_id(),
4766 m
->get_caps(), m
->get_seq(), m
->get_mseq(), m
->get_realm(),
4767 CEPH_CAP_FLAG_AUTH
, cap_perms
);
4769 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
4770 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
4774 put_snap_realm(realm
);
4776 if (in
->auth_cap
&& in
->auth_cap
->session
->mds_num
== mds
) {
4777 // reflush any/all caps (if we are now the auth_cap)
4778 if (in
->cap_snaps
.size())
4779 flush_snaps(in
, true);
4780 if (in
->flushing_caps
)
4781 flush_caps(in
, session
);
4785 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4787 mds_rank_t mds
= session
->mds_num
;
4789 ldout(cct
, 5) << "handle_cap_export ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4790 << " EXPORT from mds." << mds
<< dendl
;
4793 if (in
->caps
.count(mds
))
4794 cap
= in
->caps
[mds
];
4796 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4798 if (cap
&& cap
->cap_id
== m
->get_cap_id()) {
4799 if (m
->peer
.cap_id
) {
4800 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
4801 if (in
->caps
.count(peer_mds
)) {
4802 Cap
*tcap
= in
->caps
[peer_mds
];
4803 if (tcap
->cap_id
== m
->peer
.cap_id
&&
4804 ceph_seq_cmp(tcap
->seq
, m
->peer
.seq
) < 0) {
4805 tcap
->cap_id
= m
->peer
.cap_id
;
4806 tcap
->seq
= m
->peer
.seq
- 1;
4807 tcap
->issue_seq
= tcap
->seq
;
4808 tcap
->mseq
= m
->peer
.mseq
;
4809 tcap
->issued
|= cap
->issued
;
4810 tcap
->implemented
|= cap
->issued
;
4811 if (cap
== in
->auth_cap
)
4812 in
->auth_cap
= tcap
;
4813 if (in
->auth_cap
== tcap
&& in
->flushing_cap_item
.is_on_list())
4814 adjust_session_flushing_caps(in
, session
, tsession
);
4817 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
->issued
,
4818 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
4819 cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
4823 if (cap
== in
->auth_cap
)
4824 in
->flags
|= I_CAP_DROPPED
;
4827 remove_cap(cap
, false);
4833 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4835 mds_rank_t mds
= session
->mds_num
;
4836 assert(in
->caps
[mds
]);
4838 ldout(cct
, 10) << "handle_cap_trunc on ino " << *in
4839 << " size " << in
->size
<< " -> " << m
->get_size()
4842 int implemented
= 0;
4843 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
4844 issued
|= implemented
;
4845 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(),
4846 m
->get_size(), m
->get_change_attr(), m
->get_time_warp_seq(),
4847 m
->get_ctime(), m
->get_mtime(), m
->get_atime(),
4848 m
->inline_version
, m
->inline_data
, issued
);
4852 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
4854 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
4855 int dirty
= m
->get_dirty();
4859 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4860 it
!= in
->flushing_cap_tids
.end(); ) {
4861 if (it
->first
== flush_ack_tid
)
4862 cleaned
= it
->second
;
4863 if (it
->first
<= flush_ack_tid
) {
4864 session
->flushing_caps_tids
.erase(it
->first
);
4865 in
->flushing_cap_tids
.erase(it
++);
4869 cleaned
&= ~it
->second
;
4875 ldout(cct
, 5) << "handle_cap_flush_ack mds." << session
->mds_num
4876 << " cleaned " << ccap_string(cleaned
) << " on " << *in
4877 << " with " << ccap_string(dirty
) << dendl
;
4880 signal_cond_list(in
->waitfor_caps
);
4881 if (session
->flushing_caps_tids
.empty() ||
4882 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
4887 in
->cap_dirtier_uid
= -1;
4888 in
->cap_dirtier_gid
= -1;
4892 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
4894 if (in
->flushing_caps
) {
4895 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
4896 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
4897 in
->flushing_caps
&= ~cleaned
;
4898 if (in
->flushing_caps
== 0) {
4899 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
4900 num_flushing_caps
--;
4901 if (in
->cap_snaps
.empty())
4902 in
->flushing_cap_item
.remove_myself();
4904 if (!in
->caps_dirty())
4913 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4915 mds_rank_t mds
= session
->mds_num
;
4916 assert(in
->caps
[mds
]);
4917 snapid_t follows
= m
->get_snap_follows();
4919 if (in
->cap_snaps
.count(follows
)) {
4920 CapSnap
&capsnap
= in
->cap_snaps
.at(follows
);
4921 if (m
->get_client_tid() != capsnap
.flush_tid
) {
4922 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != " << capsnap
.flush_tid
<< dendl
;
4924 ldout(cct
, 5) << "handle_cap_flushedsnap mds." << mds
<< " flushed snap follows " << follows
4925 << " on " << *in
<< dendl
;
4927 if (in
->get_num_ref() == 1)
4928 tmp_ref
= in
; // make sure inode not get freed while erasing item from in->cap_snaps
4929 if (in
->flushing_caps
== 0 && in
->cap_snaps
.empty())
4930 in
->flushing_cap_item
.remove_myself();
4931 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4932 in
->cap_snaps
.erase(follows
);
4935 ldout(cct
, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds
<< " flushed snap follows " << follows
4936 << " on " << *in
<< dendl
;
4937 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
4943 class C_Client_DentryInvalidate
: public Context
{
4950 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
4951 client(c
), name(dn
->name
) {
4952 if (client
->use_faked_inos()) {
4953 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
4955 ino
.ino
= dn
->inode
->faked_ino
;
4957 dirino
= dn
->dir
->parent_inode
->vino();
4959 ino
= dn
->inode
->vino();
4962 ino
.ino
= inodeno_t();
4964 void finish(int r
) override
{
4965 // _async_dentry_invalidate is responsible for its own locking
4966 assert(!client
->client_lock
.is_locked_by_me());
4967 client
->_async_dentry_invalidate(dirino
, ino
, name
);
4971 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
4975 ldout(cct
, 10) << "_async_dentry_invalidate '" << name
<< "' ino " << ino
4976 << " in dir " << dirino
<< dendl
;
4977 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
);
4980 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
4982 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
4983 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
4986 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
4988 int ref
= in
->get_num_ref();
4990 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
4991 for (auto p
= in
->dir
->dentries
.begin();
4992 p
!= in
->dir
->dentries
.end(); ) {
4993 Dentry
*dn
= p
->second
;
4995 /* rmsnap removes whole subtree, need trim inodes recursively.
4996 * we don't need to invalidate dentries recursively. because
4997 * invalidating a directory dentry effectively invalidate
4999 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
5000 _try_to_trim_inode(dn
->inode
.get(), false);
5002 if (dn
->lru_is_expireable())
5003 unlink(dn
, true, false); // keep dir, drop dentry
5005 if (in
->dir
->dentries
.empty()) {
5011 if (ref
> 0 && (in
->flags
& I_SNAPDIR_OPEN
)) {
5012 InodeRef snapdir
= open_snapdir(in
);
5013 _try_to_trim_inode(snapdir
.get(), false);
5017 if (ref
> 0 && in
->ll_ref
> 0 && sched_inval
) {
5018 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
5019 while (q
!= in
->dn_set
.end()) {
5021 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
5022 // so in->dn_set doesn't always reflect the state of kernel's dcache.
5023 _schedule_invalidate_dentry_callback(dn
, true);
5024 unlink(dn
, true, true);
5029 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
5031 mds_rank_t mds
= session
->mds_num
;
5032 int used
= get_caps_used(in
);
5033 int wanted
= in
->caps_wanted();
5035 const int old_caps
= cap
->issued
;
5036 const int new_caps
= m
->get_caps();
5037 ldout(cct
, 5) << "handle_cap_grant on in " << m
->get_ino()
5038 << " mds." << mds
<< " seq " << m
->get_seq()
5039 << " caps now " << ccap_string(new_caps
)
5040 << " was " << ccap_string(old_caps
) << dendl
;
5041 cap
->seq
= m
->get_seq();
5042 cap
->gen
= session
->cap_gen
;
5044 in
->layout
= m
->get_layout();
5047 int implemented
= 0;
5048 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
5049 issued
|= implemented
;
5051 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
5052 in
->mode
= m
->head
.mode
;
5053 in
->uid
= m
->head
.uid
;
5054 in
->gid
= m
->head
.gid
;
5055 in
->btime
= m
->btime
;
5057 bool deleted_inode
= false;
5058 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
5059 in
->nlink
= m
->head
.nlink
;
5060 if (in
->nlink
== 0 &&
5061 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5062 deleted_inode
= true;
5064 if ((issued
& CEPH_CAP_XATTR_EXCL
) == 0 &&
5065 m
->xattrbl
.length() &&
5066 m
->head
.xattr_version
> in
->xattr_version
) {
5067 bufferlist::iterator p
= m
->xattrbl
.begin();
5068 ::decode(in
->xattrs
, p
);
5069 in
->xattr_version
= m
->head
.xattr_version
;
5072 if ((new_caps
& CEPH_CAP_FILE_SHARED
) && m
->dirstat_is_valid()) {
5073 in
->dirstat
.nfiles
= m
->get_nfiles();
5074 in
->dirstat
.nsubdirs
= m
->get_nsubdirs();
5077 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(), m
->get_size(),
5078 m
->get_change_attr(), m
->get_time_warp_seq(), m
->get_ctime(),
5079 m
->get_mtime(), m
->get_atime(),
5080 m
->inline_version
, m
->inline_data
, issued
);
5083 if (cap
== in
->auth_cap
&&
5084 m
->get_max_size() != in
->max_size
) {
5085 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5086 in
->max_size
= m
->get_max_size();
5087 if (in
->max_size
> in
->wanted_max_size
) {
5088 in
->wanted_max_size
= 0;
5089 in
->requested_max_size
= 0;
5094 if (m
->get_op() == CEPH_CAP_OP_IMPORT
&& m
->get_wanted() != wanted
)
5097 check_cap_issue(in
, cap
, new_caps
);
5100 int revoked
= old_caps
& ~new_caps
;
5102 ldout(cct
, 10) << " revocation of " << ccap_string(revoked
) << dendl
;
5103 cap
->issued
= new_caps
;
5104 cap
->implemented
|= new_caps
;
5106 // recall delegations if we're losing caps necessary for them
5107 if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_RD
))
5108 in
->recall_deleg(false);
5109 else if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_WR
))
5110 in
->recall_deleg(true);
5112 if ((used
& revoked
& CEPH_CAP_FILE_BUFFER
) &&
5113 !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5114 // waitin' for flush
5115 } else if (revoked
& CEPH_CAP_FILE_CACHE
) {
5119 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5122 } else if (old_caps
== new_caps
) {
5123 ldout(cct
, 10) << " caps unchanged at " << ccap_string(old_caps
) << dendl
;
5125 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~old_caps
) << dendl
;
5126 cap
->issued
= new_caps
;
5127 cap
->implemented
|= new_caps
;
5129 if (cap
== in
->auth_cap
) {
5130 // non-auth MDS is revoking the newly grant caps ?
5131 for (map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
5132 if (it
->second
== cap
)
5134 if (it
->second
->implemented
& ~it
->second
->issued
& new_caps
) {
5147 signal_cond_list(in
->waitfor_caps
);
5149 // may drop inode's last ref
5151 _try_to_trim_inode(in
, true);
5156 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5158 if (perms
.uid() == 0)
5161 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5162 int ret
= _posix_acl_permission(in
, perms
, want
);
5167 // check permissions before doing anything else
5168 if (!in
->check_mode(perms
, want
))
5173 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5174 const UserPerm
& perms
)
5176 int r
= _getattr_for_perm(in
, perms
);
5181 if (strncmp(name
, "system.", 7) == 0) {
5182 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5185 r
= inode_permission(in
, perms
, want
);
5188 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5192 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5193 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5197 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5198 const UserPerm
& perms
)
5200 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5201 int r
= _getattr_for_perm(in
, perms
);
5205 if (mask
& CEPH_SETATTR_SIZE
) {
5206 r
= inode_permission(in
, perms
, MAY_WRITE
);
5212 if (mask
& CEPH_SETATTR_UID
) {
5213 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5216 if (mask
& CEPH_SETATTR_GID
) {
5217 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5218 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5222 if (mask
& CEPH_SETATTR_MODE
) {
5223 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5226 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5227 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5228 stx
->stx_mode
&= ~S_ISGID
;
5231 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5232 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5233 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5234 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5235 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5236 check_mask
|= CEPH_SETATTR_MTIME
;
5237 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5238 check_mask
|= CEPH_SETATTR_ATIME
;
5239 if (check_mask
& mask
) {
5242 r
= inode_permission(in
, perms
, MAY_WRITE
);
5250 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5254 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5256 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5259 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5261 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5262 want
= MAY_READ
| MAY_WRITE
;
5263 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5265 if (flags
& O_TRUNC
)
5269 switch (in
->mode
& S_IFMT
) {
5274 if (want
& MAY_WRITE
) {
5281 r
= _getattr_for_perm(in
, perms
);
5285 r
= inode_permission(in
, perms
, want
);
5287 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5291 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5293 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5294 int r
= _getattr_for_perm(dir
, perms
);
5298 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5300 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5304 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5306 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5307 int r
= _getattr_for_perm(dir
, perms
);
5311 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5313 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5317 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5319 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5320 int r
= _getattr_for_perm(dir
, perms
);
5324 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5328 /* 'name == NULL' means rmsnap */
5329 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5331 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5334 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5338 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5342 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5344 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5345 int r
= _getattr_for_perm(in
, perms
);
5349 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5355 if (!S_ISREG(in
->mode
))
5358 if (in
->mode
& S_ISUID
)
5361 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5364 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5366 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5370 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5372 int mask
= CEPH_STAT_CAP_MODE
;
5374 if (acl_type
!= NO_ACL
) {
5375 mask
|= CEPH_STAT_CAP_XATTR
;
5376 force
= in
->xattr_version
== 0;
5378 return _getattr(in
, mask
, perms
, force
);
5381 vinodeno_t
Client::_get_vino(Inode
*in
)
5383 /* The caller must hold the client lock */
5384 return vinodeno_t(in
->ino
, in
->snapid
);
5387 inodeno_t
Client::_get_inodeno(Inode
*in
)
5389 /* The caller must hold the client lock */
5395 * Resolve an MDS spec to a list of MDS daemon GIDs.
5397 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5398 * It may be '*' in which case it matches all GIDs.
5400 * If no error is returned, the `targets` vector will be populated with at least
5403 int Client::resolve_mds(
5404 const std::string
&mds_spec
,
5405 std::vector
<mds_gid_t
> *targets
)
5408 assert(targets
!= nullptr);
5411 std::stringstream ss
;
5412 int role_r
= fsmap
->parse_role(mds_spec
, &role
, ss
);
5414 // We got a role, resolve it to a GID
5415 ldout(cct
, 10) << __func__
<< ": resolved '" << mds_spec
<< "' to role '"
5416 << role
<< "'" << dendl
;
5418 fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
).global_id
);
5422 std::string strtol_err
;
5423 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5424 if (strtol_err
.empty()) {
5425 // It is a possible GID
5426 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5427 if (fsmap
->gid_exists(mds_gid
)) {
5428 ldout(cct
, 10) << __func__
<< ": validated GID " << mds_gid
<< dendl
;
5429 targets
->push_back(mds_gid
);
5431 lderr(cct
) << __func__
<< ": GID " << mds_gid
<< " not in MDS map"
5435 } else if (mds_spec
== "*") {
5436 // It is a wildcard: use all MDSs
5437 const auto mds_info
= fsmap
->get_mds_info();
5439 if (mds_info
.empty()) {
5440 lderr(cct
) << __func__
<< ": * passed but no MDS daemons found" << dendl
;
5444 for (const auto i
: mds_info
) {
5445 targets
->push_back(i
.first
);
5448 // It did not parse as an integer, it is not a wildcard, it must be a name
5449 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5451 lderr(cct
) << "MDS ID '" << mds_spec
<< "' not found" << dendl
;
5453 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5457 ldout(cct
, 10) << __func__
<< ": resolved ID '" << mds_spec
5458 << "' to GID " << mds_gid
<< dendl
;
5459 targets
->push_back(mds_gid
);
5468 * Authenticate with mon and establish global ID
5470 int Client::authenticate()
5472 assert(client_lock
.is_locked_by_me());
5474 if (monclient
->is_authenticated()) {
5478 client_lock
.Unlock();
5479 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5485 whoami
= monclient
->get_global_id();
5486 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5491 int Client::fetch_fsmap(bool user
)
5494 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5495 // rather than MDSMap because no one MDSMap contains all the daemons, and
5496 // a `tell` can address any daemon.
5497 version_t fsmap_latest
;
5500 monclient
->get_version("fsmap", &fsmap_latest
, NULL
, &cond
);
5501 client_lock
.Unlock();
5504 } while (r
== -EAGAIN
);
5507 lderr(cct
) << "Failed to learn FSMap version: " << cpp_strerror(r
) << dendl
;
5511 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5514 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5515 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5516 monclient
->renew_subs();
5517 wait_on_list(waiting_for_fsmap
);
5520 assert(fsmap_user
->get_epoch() >= fsmap_latest
);
5522 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
5523 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5524 monclient
->renew_subs();
5525 wait_on_list(waiting_for_fsmap
);
5528 assert(fsmap
->get_epoch() >= fsmap_latest
);
5530 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
5531 << fsmap_latest
<< dendl
;
5537 * @mds_spec one of ID, rank, GID, "*"
5540 int Client::mds_command(
5541 const std::string
&mds_spec
,
5542 const vector
<string
>& cmd
,
5543 const bufferlist
& inbl
,
5548 Mutex::Locker
lock(client_lock
);
5559 r
= fetch_fsmap(false);
5564 // Look up MDS target(s) of the command
5565 std::vector
<mds_gid_t
> targets
;
5566 r
= resolve_mds(mds_spec
, &targets
);
5571 // If daemons are laggy, we won't send them commands. If all
5572 // are laggy then we fail.
5573 std::vector
<mds_gid_t
> non_laggy
;
5574 for (const auto gid
: targets
) {
5575 const auto info
= fsmap
->get_info_gid(gid
);
5576 if (!info
.laggy()) {
5577 non_laggy
.push_back(gid
);
5580 if (non_laggy
.size() == 0) {
5581 *outs
= "All targeted MDS daemons are laggy";
5585 if (metadata
.empty()) {
5586 // We are called on an unmounted client, so metadata
5587 // won't be initialized yet.
5588 populate_metadata("");
5591 // Send commands to targets
5592 C_GatherBuilder
gather(cct
, onfinish
);
5593 for (const auto target_gid
: non_laggy
) {
5594 const auto info
= fsmap
->get_info_gid(target_gid
);
5596 // Open a connection to the target MDS
5597 entity_inst_t inst
= info
.get_inst();
5598 ConnectionRef conn
= messenger
->get_connection(inst
);
5600 // Generate MDSCommandOp state
5601 auto &op
= command_table
.start_command();
5603 op
.on_finish
= gather
.new_sub();
5608 op
.mds_gid
= target_gid
;
5611 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
5612 << " tid=" << op
.tid
<< cmd
<< dendl
;
5614 // Construct and send MCommand
5615 MCommand
*m
= op
.get_message(monclient
->get_fsid());
5616 conn
->send_message(m
);
5623 void Client::handle_command_reply(MCommandReply
*m
)
5625 ceph_tid_t
const tid
= m
->get_tid();
5627 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
5629 if (!command_table
.exists(tid
)) {
5630 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
5635 auto &op
= command_table
.get_command(tid
);
5637 op
.outbl
->claim(m
->get_data());
5644 op
.on_finish
->complete(m
->r
);
5647 command_table
.erase(tid
);
5652 // -------------------
5655 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
5658 Mutex::Locker
lock(client_lock
);
5661 ldout(cct
, 5) << "already mounted" << dendl
;
5667 int r
= authenticate();
5669 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
5673 std::string want
= "mdsmap";
5674 const auto &mds_ns
= cct
->_conf
->client_mds_namespace
;
5675 if (!mds_ns
.empty()) {
5676 r
= fetch_fsmap(true);
5679 fs_cluster_id_t cid
= fsmap_user
->get_fs_cid(mds_ns
);
5680 if (cid
== FS_CLUSTER_ID_NONE
)
5683 std::ostringstream oss
;
5684 oss
<< want
<< "." << cid
;
5687 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
5689 monclient
->sub_want(want
, 0, 0);
5690 monclient
->renew_subs();
5692 tick(); // start tick
5696 auto availability
= mdsmap
->is_cluster_available();
5697 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
5699 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
5700 return CEPH_FUSE_NO_MDS_UP
;
5701 } else if (availability
== MDSMap::AVAILABLE
) {
5702 // Continue to mount
5704 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
5705 // Else, wait. MDSMonitor will update the map to bring
5706 // us to a conclusion eventually.
5707 wait_on_list(waiting_for_mdsmap
);
5709 // Unexpected value!
5715 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
5717 filepath
fp(CEPH_INO_ROOT
);
5718 if (!mount_root
.empty()) {
5719 fp
= filepath(mount_root
.c_str());
5722 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
5723 req
->set_filepath(fp
);
5724 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
5725 int res
= make_request(req
, perms
);
5727 if (res
== -EACCES
&& root
) {
5728 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
5746 if (!cct
->_conf
->client_trace
.empty()) {
5747 traceout
.open(cct
->_conf
->client_trace
.c_str());
5748 if (traceout
.is_open()) {
5749 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5751 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5756 ldout(cct, 3) << "op: // client trace data structs" << dendl;
5757 ldout(cct, 3) << "op: struct stat st;" << dendl;
5758 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
5759 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
5760 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
5761 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
5762 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
5763 ldout(cct, 3) << "op: int fd;" << dendl;
5770 void Client::_close_sessions()
5772 while (!mds_sessions
.empty()) {
5773 // send session closes!
5774 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5775 p
!= mds_sessions
.end();
5777 if (p
->second
->state
!= MetaSession::STATE_CLOSING
) {
5778 _close_mds_session(p
->second
);
5782 // wait for sessions to close
5783 ldout(cct
, 2) << "waiting for " << mds_sessions
.size() << " mds sessions to close" << dendl
;
5784 mount_cond
.Wait(client_lock
);
5788 void Client::flush_mdlog_sync()
5790 if (mds_requests
.empty())
5792 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5793 p
!= mds_sessions
.end();
5795 MetaSession
*s
= p
->second
;
5800 void Client::flush_mdlog(MetaSession
*session
)
5802 // Only send this to Luminous or newer MDS daemons, older daemons
5803 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
5804 const uint64_t features
= session
->con
->get_features();
5805 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
5806 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
5807 session
->con
->send_message(m
);
5812 void Client::_unmount()
5817 ldout(cct
, 2) << "unmounting" << dendl
;
5822 flush_mdlog_sync(); // flush the mdlog for pending requests, if any
5823 while (!mds_requests
.empty()) {
5824 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests" << dendl
;
5825 mount_cond
.Wait(client_lock
);
5829 timer
.cancel_event(tick_event
);
5834 // clean up any unclosed files
5835 while (!fd_map
.empty()) {
5836 Fh
*fh
= fd_map
.begin()->second
;
5837 fd_map
.erase(fd_map
.begin());
5838 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
5842 while (!ll_unclosed_fh_set
.empty()) {
5843 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
5845 ll_unclosed_fh_set
.erase(fh
);
5846 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
5850 while (!opened_dirs
.empty()) {
5851 dir_result_t
*dirp
= *opened_dirs
.begin();
5852 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
5859 ldout(cct
, 0) << " skipping clean shutdown, we are blacklisted" << dendl
;
5861 if (cct
->_conf
->client_oc
) {
5862 // Purge all cached data so that ObjectCacher doesn't get hung up
5863 // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED
5864 // is to just leave things marked dirty
5865 // (http://tracker.ceph.com/issues/9105)
5866 for (const auto &i
: inode_map
) {
5867 objectcacher
->purge_set(&(i
.second
->oset
));
5875 while (unsafe_sync_write
> 0) {
5876 ldout(cct
, 0) << unsafe_sync_write
<< " unsafe_sync_writes, waiting" << dendl
;
5877 mount_cond
.Wait(client_lock
);
5880 if (cct
->_conf
->client_oc
) {
5881 // flush/release all buffered data
5882 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
5883 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
5884 p
!= inode_map
.end();
5888 Inode
*in
= p
->second
;
5890 ldout(cct
, 0) << "null inode_map entry ino " << p
->first
<< dendl
;
5893 if (!in
->caps
.empty()) {
5894 InodeRef
tmp_ref(in
);
5896 _flush(in
, new C_Client_FlushComplete(this, in
));
5902 wait_sync_caps(last_flush_tid
);
5907 while (lru
.lru_get_size() > 0 ||
5908 !inode_map
.empty()) {
5909 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
5910 << "+" << inode_map
.size() << " items"
5911 << ", waiting (for caps to release?)"
5913 utime_t until
= ceph_clock_now() + utime_t(5, 0);
5914 int r
= mount_cond
.WaitUntil(client_lock
, until
);
5915 if (r
== ETIMEDOUT
) {
5919 assert(lru
.lru_get_size() == 0);
5920 assert(inode_map
.empty());
5923 if (!cct
->_conf
->client_trace
.empty()) {
5924 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5932 ldout(cct
, 2) << "unmounted." << dendl
;
5935 void Client::unmount()
5937 Mutex::Locker
lock(client_lock
);
5941 void Client::flush_cap_releases()
5943 // send any cap releases
5944 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5945 p
!= mds_sessions
.end();
5947 if (p
->second
->release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
5949 if (cct
->_conf
->client_inject_release_failure
) {
5950 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
5951 p
->second
->release
->put();
5953 p
->second
->con
->send_message(p
->second
->release
);
5955 p
->second
->release
= 0;
5962 if (cct
->_conf
->client_debug_inject_tick_delay
> 0) {
5963 sleep(cct
->_conf
->client_debug_inject_tick_delay
);
5964 assert(0 == cct
->_conf
->set_val("client_debug_inject_tick_delay", "0"));
5965 cct
->_conf
->apply_changes(NULL
);
5968 ldout(cct
, 21) << "tick" << dendl
;
5969 tick_event
= timer
.add_event_after(
5970 cct
->_conf
->client_tick_interval
,
5971 new FunctionContext([this](int) {
5972 // Called back via Timer, which takes client_lock for us
5973 assert(client_lock
.is_locked_by_me());
5976 utime_t now
= ceph_clock_now();
5978 if (!mounted
&& !mds_requests
.empty()) {
5979 MetaRequest
*req
= mds_requests
.begin()->second
;
5980 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
5981 req
->abort(-ETIMEDOUT
);
5982 if (req
->caller_cond
) {
5984 req
->caller_cond
->Signal();
5986 signal_cond_list(waiting_for_mdsmap
);
5987 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5988 p
!= mds_sessions
.end();
5990 signal_context_list(p
->second
->waiting_for_open
);
5994 if (mdsmap
->get_epoch()) {
5996 utime_t el
= now
- last_cap_renew
;
5997 if (el
> mdsmap
->get_session_timeout() / 3.0)
6000 flush_cap_releases();
6004 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
6008 if (in
->hold_caps_until
> now
)
6010 delayed_list
.pop_front();
6011 check_caps(in
, CHECK_CAPS_NODELAY
);
6017 void Client::renew_caps()
6019 ldout(cct
, 10) << "renew_caps()" << dendl
;
6020 last_cap_renew
= ceph_clock_now();
6022 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6023 p
!= mds_sessions
.end();
6025 ldout(cct
, 15) << "renew_caps requesting from mds." << p
->first
<< dendl
;
6026 if (mdsmap
->get_state(p
->first
) >= MDSMap::STATE_REJOIN
)
6027 renew_caps(p
->second
);
6031 void Client::renew_caps(MetaSession
*session
)
6033 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6034 session
->last_cap_renew_request
= ceph_clock_now();
6035 uint64_t seq
= ++session
->cap_renew_seq
;
6036 session
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6040 // ===============================================================
6041 // high level (POSIXy) interface
6043 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6044 InodeRef
*target
, const UserPerm
& perms
)
6046 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6047 MetaRequest
*req
= new MetaRequest(op
);
6049 dir
->make_nosnap_relative_path(path
);
6050 path
.push_dentry(name
);
6051 req
->set_filepath(path
);
6052 req
->set_inode(dir
);
6053 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6054 mask
|= DEBUG_GETATTR_CAPS
;
6055 req
->head
.args
.getattr
.mask
= mask
;
6057 ldout(cct
, 10) << "_do_lookup on " << path
<< dendl
;
6059 int r
= make_request(req
, perms
, target
);
6060 ldout(cct
, 10) << "_do_lookup res is " << r
<< dendl
;
6064 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6065 const UserPerm
& perms
)
6070 if (!dir
->is_dir()) {
6075 if (dname
== "..") {
6076 if (dir
->dn_set
.empty())
6079 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6088 if (dname
.length() > NAME_MAX
) {
6093 if (dname
== cct
->_conf
->client_snapdir
&&
6094 dir
->snapid
== CEPH_NOSNAP
) {
6095 *target
= open_snapdir(dir
);
6100 dir
->dir
->dentries
.count(dname
)) {
6101 dn
= dir
->dir
->dentries
[dname
];
6103 ldout(cct
, 20) << "_lookup have dn " << dname
<< " mds." << dn
->lease_mds
<< " ttl " << dn
->lease_ttl
6104 << " seq " << dn
->lease_seq
6107 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)) {
6108 // is dn lease valid?
6109 utime_t now
= ceph_clock_now();
6110 if (dn
->lease_mds
>= 0 &&
6111 dn
->lease_ttl
> now
&&
6112 mds_sessions
.count(dn
->lease_mds
)) {
6113 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6114 if (s
->cap_ttl
> now
&&
6115 s
->cap_gen
== dn
->lease_gen
) {
6116 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6117 // make trim_caps() behave.
6118 dir
->try_touch_cap(dn
->lease_mds
);
6121 ldout(cct
, 20) << " bad lease, cap_ttl " << s
->cap_ttl
<< ", cap_gen " << s
->cap_gen
6122 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6125 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
6126 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6127 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)))
6129 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6130 ldout(cct
, 10) << "_lookup concluded ENOENT locally for "
6131 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6136 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6139 // can we conclude ENOENT locally?
6140 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true) &&
6141 (dir
->flags
& I_COMPLETE
)) {
6142 ldout(cct
, 10) << "_lookup concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6147 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6152 *target
= dn
->inode
;
6160 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6162 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6166 int Client::get_or_create(Inode
*dir
, const char* name
,
6167 Dentry
**pdn
, bool expect_null
)
6170 ldout(cct
, 20) << "get_or_create " << *dir
<< " name " << name
<< dendl
;
6172 if (dir
->dir
->dentries
.count(name
)) {
6173 Dentry
*dn
= dir
->dir
->dentries
[name
];
6175 // is dn lease valid?
6176 utime_t now
= ceph_clock_now();
6178 dn
->lease_mds
>= 0 &&
6179 dn
->lease_ttl
> now
&&
6180 mds_sessions
.count(dn
->lease_mds
)) {
6181 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6182 if (s
->cap_ttl
> now
&&
6183 s
->cap_gen
== dn
->lease_gen
) {
6190 // otherwise link up a new one
6191 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6198 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6199 const UserPerm
& perms
, bool followsym
, int mask
)
6201 filepath path
= origpath
;
6203 if (origpath
.absolute())
6209 ldout(cct
, 10) << "path_walk " << path
<< dendl
;
6214 while (i
< path
.depth() && cur
) {
6216 const string
&dname
= path
[i
];
6217 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6218 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6220 if (cct
->_conf
->client_permissions
) {
6221 int r
= may_lookup(cur
.get(), perms
);
6224 caps
= CEPH_CAP_AUTH_SHARED
;
6227 /* Get extra requested caps on the last component */
6228 if (i
== (path
.depth() - 1))
6230 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
);
6233 // only follow trailing symlink if followsym. always follow
6234 // 'directory' symlinks.
6235 if (next
&& next
->is_symlink()) {
6237 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6238 if (symlinks
> MAXSYMLINKS
) {
6242 if (i
< path
.depth() - 1) {
6244 // replace consumed components of path with symlink dir target
6245 filepath
resolved(next
->symlink
.c_str());
6246 resolved
.append(path
.postfixpath(i
+ 1));
6249 if (next
->symlink
[0] == '/') {
6253 } else if (followsym
) {
6254 if (next
->symlink
[0] == '/') {
6255 path
= next
->symlink
.c_str();
6260 filepath
more(next
->symlink
.c_str());
6261 // we need to remove the symlink component from off of the path
6262 // before adding the target that the symlink points to. remain
6263 // at the same position in the path.
6283 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
)
6285 Mutex::Locker
lock(client_lock
);
6286 tout(cct
) << "link" << std::endl
;
6287 tout(cct
) << relexisting
<< std::endl
;
6288 tout(cct
) << relpath
<< std::endl
;
6293 filepath
existing(relexisting
);
6296 int r
= path_walk(existing
, &in
, perm
, true);
6299 if (std::string(relpath
) == "/") {
6303 filepath
path(relpath
);
6304 string name
= path
.last_dentry();
6307 r
= path_walk(path
, &dir
, perm
, true);
6310 if (cct
->_conf
->client_permissions
) {
6311 if (S_ISDIR(in
->mode
)) {
6315 r
= may_hardlink(in
.get(), perm
);
6318 r
= may_create(dir
.get(), perm
);
6322 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
);
6326 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
6328 Mutex::Locker
lock(client_lock
);
6329 tout(cct
) << "unlink" << std::endl
;
6330 tout(cct
) << relpath
<< std::endl
;
6335 if (std::string(relpath
) == "/")
6338 filepath
path(relpath
);
6339 string name
= path
.last_dentry();
6342 int r
= path_walk(path
, &dir
, perm
);
6345 if (cct
->_conf
->client_permissions
) {
6346 r
= may_delete(dir
.get(), name
.c_str(), perm
);
6350 return _unlink(dir
.get(), name
.c_str(), perm
);
6353 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
)
6355 Mutex::Locker
lock(client_lock
);
6356 tout(cct
) << "rename" << std::endl
;
6357 tout(cct
) << relfrom
<< std::endl
;
6358 tout(cct
) << relto
<< std::endl
;
6363 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
6366 filepath
from(relfrom
);
6368 string fromname
= from
.last_dentry();
6370 string toname
= to
.last_dentry();
6373 InodeRef fromdir
, todir
;
6374 int r
= path_walk(from
, &fromdir
, perm
);
6377 r
= path_walk(to
, &todir
, perm
);
6381 if (cct
->_conf
->client_permissions
) {
6382 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
6385 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
6386 if (r
< 0 && r
!= -ENOENT
)
6389 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
);
6396 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
)
6398 Mutex::Locker
lock(client_lock
);
6399 tout(cct
) << "mkdir" << std::endl
;
6400 tout(cct
) << relpath
<< std::endl
;
6401 tout(cct
) << mode
<< std::endl
;
6402 ldout(cct
, 10) << "mkdir: " << relpath
<< dendl
;
6407 if (std::string(relpath
) == "/")
6410 filepath
path(relpath
);
6411 string name
= path
.last_dentry();
6414 int r
= path_walk(path
, &dir
, perm
);
6417 if (cct
->_conf
->client_permissions
) {
6418 r
= may_create(dir
.get(), perm
);
6422 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
);
6425 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
6427 Mutex::Locker
lock(client_lock
);
6428 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
6429 tout(cct
) << "mkdirs" << std::endl
;
6430 tout(cct
) << relpath
<< std::endl
;
6431 tout(cct
) << mode
<< std::endl
;
6436 //get through existing parts of path
6437 filepath
path(relpath
);
6439 int r
= 0, caps
= 0;
6442 for (i
=0; i
<path
.depth(); ++i
) {
6443 if (cct
->_conf
->client_permissions
) {
6444 r
= may_lookup(cur
.get(), perms
);
6447 caps
= CEPH_CAP_AUTH_SHARED
;
6449 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
6454 //check that we have work left to do
6455 if (i
==path
.depth()) return -EEXIST
;
6456 if (r
!=-ENOENT
) return r
;
6457 ldout(cct
, 20) << "mkdirs got through " << i
<< " directories on path " << relpath
<< dendl
;
6458 //make new directory at each level
6459 for (; i
<path
.depth(); ++i
) {
6460 if (cct
->_conf
->client_permissions
) {
6461 r
= may_create(cur
.get(), perms
);
6466 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
6468 //check proper creation/existence
6469 if(-EEXIST
== r
&& i
< path
.depth() - 1) {
6470 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
6474 //move to new dir and continue
6476 ldout(cct
, 20) << "mkdirs: successfully created directory "
6477 << filepath(cur
->ino
).get_path() << dendl
;
6482 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
6484 Mutex::Locker
lock(client_lock
);
6485 tout(cct
) << "rmdir" << std::endl
;
6486 tout(cct
) << relpath
<< std::endl
;
6491 if (std::string(relpath
) == "/")
6494 filepath
path(relpath
);
6495 string name
= path
.last_dentry();
6498 int r
= path_walk(path
, &dir
, perms
);
6501 if (cct
->_conf
->client_permissions
) {
6502 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
6506 return _rmdir(dir
.get(), name
.c_str(), perms
);
6509 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
6511 Mutex::Locker
lock(client_lock
);
6512 tout(cct
) << "mknod" << std::endl
;
6513 tout(cct
) << relpath
<< std::endl
;
6514 tout(cct
) << mode
<< std::endl
;
6515 tout(cct
) << rdev
<< std::endl
;
6520 if (std::string(relpath
) == "/")
6523 filepath
path(relpath
);
6524 string name
= path
.last_dentry();
6527 int r
= path_walk(path
, &dir
, perms
);
6530 if (cct
->_conf
->client_permissions
) {
6531 int r
= may_create(dir
.get(), perms
);
6535 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
6540 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
)
6542 Mutex::Locker
lock(client_lock
);
6543 tout(cct
) << "symlink" << std::endl
;
6544 tout(cct
) << target
<< std::endl
;
6545 tout(cct
) << relpath
<< std::endl
;
6550 if (std::string(relpath
) == "/")
6553 filepath
path(relpath
);
6554 string name
= path
.last_dentry();
6557 int r
= path_walk(path
, &dir
, perms
);
6560 if (cct
->_conf
->client_permissions
) {
6561 int r
= may_create(dir
.get(), perms
);
6565 return _symlink(dir
.get(), name
.c_str(), target
, perms
);
6568 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
6570 Mutex::Locker
lock(client_lock
);
6571 tout(cct
) << "readlink" << std::endl
;
6572 tout(cct
) << relpath
<< std::endl
;
6577 filepath
path(relpath
);
6579 int r
= path_walk(path
, &in
, perms
, false);
6583 return _readlink(in
.get(), buf
, size
);
6586 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
6588 if (!in
->is_symlink())
6591 // copy into buf (at most size bytes)
6592 int r
= in
->symlink
.length();
6595 memcpy(buf
, in
->symlink
.c_str(), r
);
6602 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
6604 bool yes
= in
->caps_issued_mask(mask
, true);
6606 ldout(cct
, 10) << "_getattr mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
6610 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6612 in
->make_nosnap_relative_path(path
);
6613 req
->set_filepath(path
);
6615 req
->head
.args
.getattr
.mask
= mask
;
6617 int res
= make_request(req
, perms
);
6618 ldout(cct
, 10) << "_getattr result=" << res
<< dendl
;
6622 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6623 const UserPerm
& perms
, InodeRef
*inp
)
6625 int issued
= in
->caps_issued();
6627 ldout(cct
, 10) << "_setattr mask " << mask
<< " issued " <<
6628 ccap_string(issued
) << dendl
;
6630 if (in
->snapid
!= CEPH_NOSNAP
) {
6633 if ((mask
& CEPH_SETATTR_SIZE
) &&
6634 (unsigned long)stx
->stx_size
> in
->size
&&
6635 is_quota_bytes_exceeded(in
, (unsigned long)stx
->stx_size
- in
->size
,
6640 // make the change locally?
6641 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
6642 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
6643 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
6644 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
6645 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
6648 * This works because we implicitly flush the caps as part of the
6649 * request, so the cap update check will happen with the writeback
6650 * cap context, and then the setattr check will happen with the
6653 * In reality this pattern is likely pretty rare (different users
6654 * setattr'ing the same file). If that turns out not to be the
6655 * case later, we can build a more complex pipelined cap writeback
6659 mask
|= CEPH_SETATTR_CTIME
;
6664 // caller just needs us to bump the ctime
6665 in
->ctime
= ceph_clock_now();
6666 in
->cap_dirtier_uid
= perms
.uid();
6667 in
->cap_dirtier_gid
= perms
.gid();
6668 if (issued
& CEPH_CAP_AUTH_EXCL
)
6669 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6670 else if (issued
& CEPH_CAP_FILE_EXCL
)
6671 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
6672 else if (issued
& CEPH_CAP_XATTR_EXCL
)
6673 in
->mark_caps_dirty(CEPH_CAP_XATTR_EXCL
);
6675 mask
|= CEPH_SETATTR_CTIME
;
6678 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
6679 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
6681 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
6683 if (mask
& CEPH_SETATTR_UID
) {
6684 in
->ctime
= ceph_clock_now();
6685 in
->cap_dirtier_uid
= perms
.uid();
6686 in
->cap_dirtier_gid
= perms
.gid();
6687 in
->uid
= stx
->stx_uid
;
6688 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6689 mask
&= ~CEPH_SETATTR_UID
;
6691 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6693 if (mask
& CEPH_SETATTR_GID
) {
6694 in
->ctime
= ceph_clock_now();
6695 in
->cap_dirtier_uid
= perms
.uid();
6696 in
->cap_dirtier_gid
= perms
.gid();
6697 in
->gid
= stx
->stx_gid
;
6698 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6699 mask
&= ~CEPH_SETATTR_GID
;
6701 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6704 if (mask
& CEPH_SETATTR_MODE
) {
6705 in
->ctime
= ceph_clock_now();
6706 in
->cap_dirtier_uid
= perms
.uid();
6707 in
->cap_dirtier_gid
= perms
.gid();
6708 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
6709 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6710 mask
&= ~CEPH_SETATTR_MODE
;
6711 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6712 } else if (kill_sguid
&& S_ISREG(in
->mode
) && (in
->mode
& (S_IXUSR
|S_IXGRP
|S_IXOTH
))) {
6713 /* Must squash the any setuid/setgid bits with an ownership change */
6714 in
->mode
&= ~(S_ISUID
|S_ISGID
);
6715 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6718 if (mask
& CEPH_SETATTR_BTIME
) {
6719 in
->ctime
= ceph_clock_now();
6720 in
->cap_dirtier_uid
= perms
.uid();
6721 in
->cap_dirtier_gid
= perms
.gid();
6722 in
->btime
= utime_t(stx
->stx_btime
);
6723 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6724 mask
&= ~CEPH_SETATTR_BTIME
;
6725 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
6727 } else if (mask
& CEPH_SETATTR_SIZE
) {
6728 /* If we don't have Ax, then we must ask the server to clear them on truncate */
6729 mask
|= CEPH_SETATTR_KILL_SGUID
;
6732 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
6733 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
6734 if (mask
& CEPH_SETATTR_MTIME
)
6735 in
->mtime
= utime_t(stx
->stx_mtime
);
6736 if (mask
& CEPH_SETATTR_ATIME
)
6737 in
->atime
= utime_t(stx
->stx_atime
);
6738 in
->ctime
= ceph_clock_now();
6739 in
->cap_dirtier_uid
= perms
.uid();
6740 in
->cap_dirtier_gid
= perms
.gid();
6741 in
->time_warp_seq
++;
6742 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
6743 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
6752 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
6756 in
->make_nosnap_relative_path(path
);
6757 req
->set_filepath(path
);
6760 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
6761 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6763 if (mask
& CEPH_SETATTR_MODE
) {
6764 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
6765 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6766 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6768 if (mask
& CEPH_SETATTR_UID
) {
6769 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
6770 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6771 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6773 if (mask
& CEPH_SETATTR_GID
) {
6774 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
6775 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6776 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6778 if (mask
& CEPH_SETATTR_BTIME
) {
6779 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
6780 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6782 if (mask
& CEPH_SETATTR_MTIME
) {
6783 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
6784 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6787 if (mask
& CEPH_SETATTR_ATIME
) {
6788 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
6789 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
6792 if (mask
& CEPH_SETATTR_SIZE
) {
6793 if ((unsigned long)stx
->stx_size
< mdsmap
->get_max_filesize()) {
6794 req
->head
.args
.setattr
.size
= stx
->stx_size
;
6795 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
6798 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
6801 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6804 req
->head
.args
.setattr
.mask
= mask
;
6806 req
->regetattr_mask
= mask
;
6808 int res
= make_request(req
, perms
, inp
);
6809 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
6813 /* Note that we only care about attrs that setattr cares about */
6814 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
6816 stx
->stx_size
= st
->st_size
;
6817 stx
->stx_mode
= st
->st_mode
;
6818 stx
->stx_uid
= st
->st_uid
;
6819 stx
->stx_gid
= st
->st_gid
;
6820 stx
->stx_mtime
= st
->st_mtim
;
6821 stx
->stx_atime
= st
->st_atim
;
6824 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6825 const UserPerm
& perms
, InodeRef
*inp
)
6827 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
6830 if (mask
& CEPH_SETATTR_MODE
)
6831 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
6835 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
6836 const UserPerm
& perms
)
6838 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
6839 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
6840 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
6841 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
6842 if (cct
->_conf
->client_permissions
) {
6843 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
6847 return __setattrx(in
.get(), stx
, mask
, perms
);
6850 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
6851 const UserPerm
& perms
)
6853 struct ceph_statx stx
;
6855 stat_to_statx(attr
, &stx
);
6856 mask
&= ~CEPH_SETATTR_BTIME
;
6858 if ((mask
& CEPH_SETATTR_UID
) && attr
->st_uid
== static_cast<uid_t
>(-1)) {
6859 mask
&= ~CEPH_SETATTR_UID
;
6861 if ((mask
& CEPH_SETATTR_GID
) && attr
->st_gid
== static_cast<uid_t
>(-1)) {
6862 mask
&= ~CEPH_SETATTR_GID
;
6865 return _setattrx(in
, &stx
, mask
, perms
);
6868 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
6869 const UserPerm
& perms
)
6871 Mutex::Locker
lock(client_lock
);
6872 tout(cct
) << "setattr" << std::endl
;
6873 tout(cct
) << relpath
<< std::endl
;
6874 tout(cct
) << mask
<< std::endl
;
6879 filepath
path(relpath
);
6881 int r
= path_walk(path
, &in
, perms
);
6884 return _setattr(in
, attr
, mask
, perms
);
6887 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
6888 const UserPerm
& perms
, int flags
)
6890 Mutex::Locker
lock(client_lock
);
6891 tout(cct
) << "setattrx" << std::endl
;
6892 tout(cct
) << relpath
<< std::endl
;
6893 tout(cct
) << mask
<< std::endl
;
6898 filepath
path(relpath
);
6900 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
6903 return _setattrx(in
, stx
, mask
, perms
);
6906 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
6908 Mutex::Locker
lock(client_lock
);
6909 tout(cct
) << "fsetattr" << std::endl
;
6910 tout(cct
) << fd
<< std::endl
;
6911 tout(cct
) << mask
<< std::endl
;
6916 Fh
*f
= get_filehandle(fd
);
6919 #if defined(__linux__) && defined(O_PATH)
6920 if (f
->flags
& O_PATH
)
6923 return _setattr(f
->inode
, attr
, mask
, perms
);
6926 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
6928 Mutex::Locker
lock(client_lock
);
6929 tout(cct
) << "fsetattr" << std::endl
;
6930 tout(cct
) << fd
<< std::endl
;
6931 tout(cct
) << mask
<< std::endl
;
6936 Fh
*f
= get_filehandle(fd
);
6939 #if defined(__linux__) && defined(O_PATH)
6940 if (f
->flags
& O_PATH
)
6943 return _setattrx(f
->inode
, stx
, mask
, perms
);
6946 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
6947 frag_info_t
*dirstat
, int mask
)
6949 ldout(cct
, 3) << "stat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6950 Mutex::Locker
lock(client_lock
);
6951 tout(cct
) << "stat" << std::endl
;
6952 tout(cct
) << relpath
<< std::endl
;
6957 filepath
path(relpath
);
6959 int r
= path_walk(path
, &in
, perms
, true, mask
);
6962 r
= _getattr(in
, mask
, perms
);
6964 ldout(cct
, 3) << "stat exit on error!" << dendl
;
6967 fill_stat(in
, stbuf
, dirstat
);
6968 ldout(cct
, 3) << "stat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6972 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
6976 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
6977 if (flags
& AT_NO_ATTR_SYNC
)
6980 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
6981 mask
|= CEPH_CAP_PIN
;
6982 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6983 mask
|= CEPH_CAP_AUTH_SHARED
;
6984 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6985 mask
|= CEPH_CAP_LINK_SHARED
;
6986 if (want
& (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
6987 mask
|= CEPH_CAP_FILE_SHARED
;
6988 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
6989 mask
|= CEPH_CAP_XATTR_SHARED
;
6994 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
6995 const UserPerm
& perms
,
6996 unsigned int want
, unsigned int flags
)
6998 ldout(cct
, 3) << "statx enter (relpath " << relpath
<< " want " << want
<< ")" << dendl
;
6999 Mutex::Locker
lock(client_lock
);
7000 tout(cct
) << "statx" << std::endl
;
7001 tout(cct
) << relpath
<< std::endl
;
7006 filepath
path(relpath
);
7009 unsigned mask
= statx_to_mask(flags
, want
);
7011 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
7015 r
= _getattr(in
, mask
, perms
);
7017 ldout(cct
, 3) << "statx exit on error!" << dendl
;
7021 fill_statx(in
, mask
, stx
);
7022 ldout(cct
, 3) << "statx exit (relpath " << relpath
<< " mask " << stx
->stx_mask
<< ")" << dendl
;
7026 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
7027 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
7029 ldout(cct
, 3) << "lstat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7030 Mutex::Locker
lock(client_lock
);
7031 tout(cct
) << "lstat" << std::endl
;
7032 tout(cct
) << relpath
<< std::endl
;
7037 filepath
path(relpath
);
7039 // don't follow symlinks
7040 int r
= path_walk(path
, &in
, perms
, false, mask
);
7043 r
= _getattr(in
, mask
, perms
);
7045 ldout(cct
, 3) << "lstat exit on error!" << dendl
;
7048 fill_stat(in
, stbuf
, dirstat
);
7049 ldout(cct
, 3) << "lstat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7053 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7055 ldout(cct
, 10) << "fill_stat on " << in
->ino
<< " snap/dev" << in
->snapid
7056 << " mode 0" << oct
<< in
->mode
<< dec
7057 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7058 memset(st
, 0, sizeof(struct stat
));
7059 if (use_faked_inos())
7060 st
->st_ino
= in
->faked_ino
;
7062 st
->st_ino
= in
->ino
;
7063 st
->st_dev
= in
->snapid
;
7064 st
->st_mode
= in
->mode
;
7065 st
->st_rdev
= in
->rdev
;
7067 switch (in
->nlink
) {
7069 st
->st_nlink
= 0; /* dir is unlinked */
7072 st
->st_nlink
= 1 /* parent dentry */
7074 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7080 st
->st_nlink
= in
->nlink
;
7082 st
->st_uid
= in
->uid
;
7083 st
->st_gid
= in
->gid
;
7084 if (in
->ctime
> in
->mtime
) {
7085 stat_set_ctime_sec(st
, in
->ctime
.sec());
7086 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7088 stat_set_ctime_sec(st
, in
->mtime
.sec());
7089 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7091 stat_set_atime_sec(st
, in
->atime
.sec());
7092 stat_set_atime_nsec(st
, in
->atime
.nsec());
7093 stat_set_mtime_sec(st
, in
->mtime
.sec());
7094 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7096 if (cct
->_conf
->client_dirsize_rbytes
)
7097 st
->st_size
= in
->rstat
.rbytes
;
7099 st
->st_size
= in
->dirstat
.size();
7102 st
->st_size
= in
->size
;
7103 st
->st_blocks
= (in
->size
+ 511) >> 9;
7105 st
->st_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7108 *dirstat
= in
->dirstat
;
7112 return in
->caps_issued();
7115 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7117 ldout(cct
, 10) << "fill_statx on " << in
->ino
<< " snap/dev" << in
->snapid
7118 << " mode 0" << oct
<< in
->mode
<< dec
7119 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7120 memset(stx
, 0, sizeof(struct ceph_statx
));
7123 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7124 * so that all bits are set.
7129 /* These are always considered to be available */
7130 stx
->stx_dev
= in
->snapid
;
7131 stx
->stx_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7133 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7134 stx
->stx_mode
= S_IFMT
& in
->mode
;
7135 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7136 stx
->stx_rdev
= in
->rdev
;
7137 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7139 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7140 stx
->stx_uid
= in
->uid
;
7141 stx
->stx_gid
= in
->gid
;
7142 stx
->stx_mode
= in
->mode
;
7143 in
->btime
.to_timespec(&stx
->stx_btime
);
7144 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7147 if (mask
& CEPH_CAP_LINK_SHARED
) {
7149 switch (in
->nlink
) {
7151 stx
->stx_nlink
= 0; /* dir is unlinked */
7154 stx
->stx_nlink
= 1 /* parent dentry */
7156 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7162 stx
->stx_nlink
= in
->nlink
;
7164 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7167 if (mask
& CEPH_CAP_FILE_SHARED
) {
7169 in
->atime
.to_timespec(&stx
->stx_atime
);
7170 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7173 if (cct
->_conf
->client_dirsize_rbytes
)
7174 stx
->stx_size
= in
->rstat
.rbytes
;
7176 stx
->stx_size
= in
->dirstat
.size();
7177 stx
->stx_blocks
= 1;
7179 stx
->stx_size
= in
->size
;
7180 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
7182 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
7183 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
7186 /* Change time and change_attr both require all shared caps to view */
7187 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
7188 stx
->stx_version
= in
->change_attr
;
7189 if (in
->ctime
> in
->mtime
)
7190 in
->ctime
.to_timespec(&stx
->stx_ctime
);
7192 in
->mtime
.to_timespec(&stx
->stx_ctime
);
7193 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
7198 void Client::touch_dn(Dentry
*dn
)
7203 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7205 Mutex::Locker
lock(client_lock
);
7206 tout(cct
) << "chmod" << std::endl
;
7207 tout(cct
) << relpath
<< std::endl
;
7208 tout(cct
) << mode
<< std::endl
;
7213 filepath
path(relpath
);
7215 int r
= path_walk(path
, &in
, perms
);
7219 attr
.st_mode
= mode
;
7220 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7223 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
7225 Mutex::Locker
lock(client_lock
);
7226 tout(cct
) << "fchmod" << std::endl
;
7227 tout(cct
) << fd
<< std::endl
;
7228 tout(cct
) << mode
<< std::endl
;
7233 Fh
*f
= get_filehandle(fd
);
7236 #if defined(__linux__) && defined(O_PATH)
7237 if (f
->flags
& O_PATH
)
7241 attr
.st_mode
= mode
;
7242 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
7245 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7247 Mutex::Locker
lock(client_lock
);
7248 tout(cct
) << "lchmod" << std::endl
;
7249 tout(cct
) << relpath
<< std::endl
;
7250 tout(cct
) << mode
<< std::endl
;
7255 filepath
path(relpath
);
7257 // don't follow symlinks
7258 int r
= path_walk(path
, &in
, perms
, false);
7262 attr
.st_mode
= mode
;
7263 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7266 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7267 const UserPerm
& perms
)
7269 Mutex::Locker
lock(client_lock
);
7270 tout(cct
) << "chown" << std::endl
;
7271 tout(cct
) << relpath
<< std::endl
;
7272 tout(cct
) << new_uid
<< std::endl
;
7273 tout(cct
) << new_gid
<< std::endl
;
7278 filepath
path(relpath
);
7280 int r
= path_walk(path
, &in
, perms
);
7284 attr
.st_uid
= new_uid
;
7285 attr
.st_gid
= new_gid
;
7286 return _setattr(in
, &attr
, CEPH_SETATTR_UID
|CEPH_SETATTR_GID
, perms
);
7289 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
7291 Mutex::Locker
lock(client_lock
);
7292 tout(cct
) << "fchown" << std::endl
;
7293 tout(cct
) << fd
<< std::endl
;
7294 tout(cct
) << new_uid
<< std::endl
;
7295 tout(cct
) << new_gid
<< std::endl
;
7300 Fh
*f
= get_filehandle(fd
);
7303 #if defined(__linux__) && defined(O_PATH)
7304 if (f
->flags
& O_PATH
)
7308 attr
.st_uid
= new_uid
;
7309 attr
.st_gid
= new_gid
;
7311 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7312 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7313 return _setattr(f
->inode
, &attr
, mask
, perms
);
7316 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7317 const UserPerm
& perms
)
7319 Mutex::Locker
lock(client_lock
);
7320 tout(cct
) << "lchown" << std::endl
;
7321 tout(cct
) << relpath
<< std::endl
;
7322 tout(cct
) << new_uid
<< std::endl
;
7323 tout(cct
) << new_gid
<< std::endl
;
7328 filepath
path(relpath
);
7330 // don't follow symlinks
7331 int r
= path_walk(path
, &in
, perms
, false);
7335 attr
.st_uid
= new_uid
;
7336 attr
.st_gid
= new_gid
;
7338 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7339 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7340 return _setattr(in
, &attr
, mask
, perms
);
7343 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
7344 const UserPerm
& perms
)
7346 Mutex::Locker
lock(client_lock
);
7347 tout(cct
) << "utime" << std::endl
;
7348 tout(cct
) << relpath
<< std::endl
;
7349 tout(cct
) << buf
->modtime
<< std::endl
;
7350 tout(cct
) << buf
->actime
<< std::endl
;
7355 filepath
path(relpath
);
7357 int r
= path_walk(path
, &in
, perms
);
7361 stat_set_mtime_sec(&attr
, buf
->modtime
);
7362 stat_set_mtime_nsec(&attr
, 0);
7363 stat_set_atime_sec(&attr
, buf
->actime
);
7364 stat_set_atime_nsec(&attr
, 0);
7365 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7368 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
7369 const UserPerm
& perms
)
7371 Mutex::Locker
lock(client_lock
);
7372 tout(cct
) << "lutime" << std::endl
;
7373 tout(cct
) << relpath
<< std::endl
;
7374 tout(cct
) << buf
->modtime
<< std::endl
;
7375 tout(cct
) << buf
->actime
<< std::endl
;
7380 filepath
path(relpath
);
7382 // don't follow symlinks
7383 int r
= path_walk(path
, &in
, perms
, false);
7387 stat_set_mtime_sec(&attr
, buf
->modtime
);
7388 stat_set_mtime_nsec(&attr
, 0);
7389 stat_set_atime_sec(&attr
, buf
->actime
);
7390 stat_set_atime_nsec(&attr
, 0);
7391 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7394 int Client::flock(int fd
, int operation
, uint64_t owner
)
7396 Mutex::Locker
lock(client_lock
);
7397 tout(cct
) << "flock" << std::endl
;
7398 tout(cct
) << fd
<< std::endl
;
7399 tout(cct
) << operation
<< std::endl
;
7400 tout(cct
) << owner
<< std::endl
;
7405 Fh
*f
= get_filehandle(fd
);
7409 return _flock(f
, operation
, owner
);
7412 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7414 Mutex::Locker
lock(client_lock
);
7415 tout(cct
) << "opendir" << std::endl
;
7416 tout(cct
) << relpath
<< std::endl
;
7421 filepath
path(relpath
);
7423 int r
= path_walk(path
, &in
, perms
, true);
7426 if (cct
->_conf
->client_permissions
) {
7427 int r
= may_open(in
.get(), O_RDONLY
, perms
);
7431 r
= _opendir(in
.get(), dirpp
, perms
);
7432 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
7434 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
7438 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7442 *dirpp
= new dir_result_t(in
, perms
);
7443 opened_dirs
.insert(*dirpp
);
7444 ldout(cct
, 3) << "_opendir(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
7449 int Client::closedir(dir_result_t
*dir
)
7451 Mutex::Locker
lock(client_lock
);
7452 tout(cct
) << "closedir" << std::endl
;
7453 tout(cct
) << (unsigned long)dir
<< std::endl
;
7455 ldout(cct
, 3) << "closedir(" << dir
<< ") = 0" << dendl
;
7460 void Client::_closedir(dir_result_t
*dirp
)
7462 ldout(cct
, 10) << "_closedir(" << dirp
<< ")" << dendl
;
7464 ldout(cct
, 10) << "_closedir detaching inode " << dirp
->inode
<< dendl
;
7465 dirp
->inode
.reset();
7467 _readdir_drop_dirp_buffer(dirp
);
7468 opened_dirs
.erase(dirp
);
7472 void Client::rewinddir(dir_result_t
*dirp
)
7474 Mutex::Locker
lock(client_lock
);
7475 ldout(cct
, 3) << "rewinddir(" << dirp
<< ")" << dendl
;
7480 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7481 _readdir_drop_dirp_buffer(d
);
7485 loff_t
Client::telldir(dir_result_t
*dirp
)
7487 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7488 ldout(cct
, 3) << "telldir(" << dirp
<< ") = " << d
->offset
<< dendl
;
7492 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
7494 Mutex::Locker
lock(client_lock
);
7496 ldout(cct
, 3) << "seekdir(" << dirp
<< ", " << offset
<< ")" << dendl
;
7501 if (offset
== dirp
->offset
)
7504 if (offset
> dirp
->offset
)
7505 dirp
->release_count
= 0; // bump if we do a forward seek
7507 dirp
->ordered_count
= 0; // disable filling readdir cache
7509 if (dirp
->hash_order()) {
7510 if (dirp
->offset
> offset
) {
7511 _readdir_drop_dirp_buffer(dirp
);
7516 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
7517 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
7518 _readdir_drop_dirp_buffer(dirp
);
7523 dirp
->offset
= offset
;
7528 // ino_t d_ino; /* inode number */
7529 // off_t d_off; /* offset to the next dirent */
7530 // unsigned short d_reclen; /* length of this record */
7531 // unsigned char d_type; /* type of file */
7532 // char d_name[256]; /* filename */
7534 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
7536 strncpy(de
->d_name
, name
, 255);
7537 de
->d_name
[255] = '\0';
7540 #if !defined(DARWIN) && !defined(__FreeBSD__)
7541 de
->d_off
= next_off
;
7544 de
->d_type
= IFTODT(type
);
7545 ldout(cct
, 10) << "fill_dirent '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
7546 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
7550 void Client::_readdir_next_frag(dir_result_t
*dirp
)
7552 frag_t fg
= dirp
->buffer_frag
;
7554 if (fg
.is_rightmost()) {
7555 ldout(cct
, 10) << "_readdir_next_frag advance from " << fg
<< " to END" << dendl
;
7562 ldout(cct
, 10) << "_readdir_next_frag advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
7564 if (dirp
->hash_order()) {
7566 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
7567 if (dirp
->offset
< new_offset
) // don't decrease offset
7568 dirp
->offset
= new_offset
;
7570 dirp
->last_name
.clear();
7571 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7572 _readdir_rechoose_frag(dirp
);
7576 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
7578 assert(dirp
->inode
);
7580 if (dirp
->hash_order())
7583 frag_t cur
= frag_t(dirp
->offset_high());
7584 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
7586 ldout(cct
, 10) << "_readdir_rechoose_frag frag " << cur
<< " maps to " << fg
<< dendl
;
7587 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7588 dirp
->last_name
.clear();
7589 dirp
->next_offset
= 2;
7593 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
7595 ldout(cct
, 10) << "_readdir_drop_dirp_buffer " << dirp
<< dendl
;
7596 dirp
->buffer
.clear();
7599 int Client::_readdir_get_frag(dir_result_t
*dirp
)
7602 assert(dirp
->inode
);
7604 // get the current frag.
7606 if (dirp
->hash_order())
7607 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
7609 fg
= frag_t(dirp
->offset_high());
7611 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
7612 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
7614 int op
= CEPH_MDS_OP_READDIR
;
7615 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
7616 op
= CEPH_MDS_OP_LSSNAP
;
7618 InodeRef
& diri
= dirp
->inode
;
7620 MetaRequest
*req
= new MetaRequest(op
);
7622 diri
->make_nosnap_relative_path(path
);
7623 req
->set_filepath(path
);
7624 req
->set_inode(diri
.get());
7625 req
->head
.args
.readdir
.frag
= fg
;
7626 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
7627 if (dirp
->last_name
.length()) {
7628 req
->path2
.set_path(dirp
->last_name
);
7629 } else if (dirp
->hash_order()) {
7630 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
7635 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
7637 if (res
== -EAGAIN
) {
7638 ldout(cct
, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl
;
7639 _readdir_rechoose_frag(dirp
);
7640 return _readdir_get_frag(dirp
);
7644 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " got frag " << dirp
->buffer_frag
7645 << " size " << dirp
->buffer
.size() << dendl
;
7647 ldout(cct
, 10) << "_readdir_get_frag got error " << res
<< ", setting end flag" << dendl
;
7654 struct dentry_off_lt
{
7655 bool operator()(const Dentry
* dn
, int64_t off
) const {
7656 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
7660 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
7661 int caps
, bool getref
)
7663 assert(client_lock
.is_locked());
7664 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
7665 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
7667 Dir
*dir
= dirp
->inode
->dir
;
7670 ldout(cct
, 10) << " dir is empty" << dendl
;
7675 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
7676 dir
->readdir_cache
.end(),
7677 dirp
->offset
, dentry_off_lt());
7681 if (!dirp
->inode
->is_complete_and_ordered())
7683 if (pd
== dir
->readdir_cache
.end())
7686 if (dn
->inode
== NULL
) {
7687 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
7691 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
7692 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
7697 int r
= _getattr(dn
->inode
, caps
, dirp
->perms
);
7701 struct ceph_statx stx
;
7703 fill_statx(dn
->inode
, caps
, &stx
);
7705 uint64_t next_off
= dn
->offset
+ 1;
7707 if (pd
== dir
->readdir_cache
.end())
7708 next_off
= dir_result_t::END
;
7711 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7713 in
= dn
->inode
.get();
7717 dn_name
= dn
->name
; // fill in name while we have lock
7719 client_lock
.Unlock();
7720 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
7722 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
7723 << " = " << r
<< dendl
;
7728 dirp
->offset
= next_off
;
7730 dirp
->next_offset
= 2;
7732 dirp
->next_offset
= dirp
->offset_low();
7733 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
7734 dirp
->release_count
= 0; // last_name no longer match cache index
7739 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
7744 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
7745 unsigned want
, unsigned flags
, bool getref
)
7747 int caps
= statx_to_mask(flags
, want
);
7749 Mutex::Locker
lock(client_lock
);
7754 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
7756 ldout(cct
, 10) << "readdir_r_cb " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
7757 << dec
<< " at_end=" << dirp
->at_end()
7758 << " hash_order=" << dirp
->hash_order() << dendl
;
7761 struct ceph_statx stx
;
7762 memset(&de
, 0, sizeof(de
));
7763 memset(&stx
, 0, sizeof(stx
));
7765 InodeRef
& diri
= dirp
->inode
;
7770 if (dirp
->offset
== 0) {
7771 ldout(cct
, 15) << " including ." << dendl
;
7772 assert(diri
->dn_set
.size() < 2); // can't have multiple hard-links to a dir
7773 uint64_t next_off
= 1;
7776 r
= _getattr(diri
, caps
, dirp
->perms
);
7780 fill_statx(diri
, caps
, &stx
);
7781 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
7783 Inode
*inode
= NULL
;
7789 client_lock
.Unlock();
7790 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7795 dirp
->offset
= next_off
;
7799 if (dirp
->offset
== 1) {
7800 ldout(cct
, 15) << " including .." << dendl
;
7801 uint64_t next_off
= 2;
7803 if (diri
->dn_set
.empty())
7806 in
= diri
->get_first_parent()->dir
->parent_inode
;
7809 r
= _getattr(in
, caps
, dirp
->perms
);
7813 fill_statx(in
, caps
, &stx
);
7814 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
7816 Inode
*inode
= NULL
;
7822 client_lock
.Unlock();
7823 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7828 dirp
->offset
= next_off
;
7833 // can we read from our cache?
7834 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
7835 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
7836 << dirp
->inode
->is_complete_and_ordered()
7837 << " issued " << ccap_string(dirp
->inode
->caps_issued())
7839 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
7840 dirp
->inode
->is_complete_and_ordered() &&
7841 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
7842 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
7851 bool check_caps
= true;
7852 if (!dirp
->is_cached()) {
7853 int r
= _readdir_get_frag(dirp
);
7856 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
7857 // different than the requested one. (our dirfragtree was outdated)
7860 frag_t fg
= dirp
->buffer_frag
;
7862 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
7863 << " offset " << hex
<< dirp
->offset
<< dendl
;
7865 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
7866 dirp
->offset
, dir_result_t::dentry_off_lt());
7867 it
!= dirp
->buffer
.end();
7869 dir_result_t::dentry
&entry
= *it
;
7871 uint64_t next_off
= entry
.offset
+ 1;
7875 r
= _getattr(entry
.inode
, caps
, dirp
->perms
);
7880 fill_statx(entry
.inode
, caps
, &stx
);
7881 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7883 Inode
*inode
= NULL
;
7885 inode
= entry
.inode
.get();
7889 client_lock
.Unlock();
7890 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
7893 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
7894 << " = " << r
<< dendl
;
7898 dirp
->offset
= next_off
;
7903 if (dirp
->next_offset
> 2) {
7904 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
7905 _readdir_drop_dirp_buffer(dirp
);
7909 if (!fg
.is_rightmost()) {
7911 _readdir_next_frag(dirp
);
7915 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
7916 diri
->dir_release_count
== dirp
->release_count
) {
7917 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
7918 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
7920 assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
7921 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
7923 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
7925 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
7926 diri
->flags
|= I_COMPLETE
;
7938 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
7940 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
7947 * 1 if we got a dirent
7948 * 0 for end of directory
7952 struct single_readdir
{
7954 struct ceph_statx
*stx
;
7959 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
7960 struct ceph_statx
*stx
, off_t off
,
7963 single_readdir
*c
= static_cast<single_readdir
*>(p
);
7966 return -1; // already filled this dirent
7976 struct dirent
*Client::readdir(dir_result_t
*d
)
7979 static struct dirent de
;
7986 // our callback fills the dirent and sets sr.full=true on first
7987 // call, and returns -1 the second time around.
7988 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
7990 errno
= -ret
; // this sucks.
7991 return (dirent
*) NULL
;
7996 return (dirent
*) NULL
;
7999 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
8000 struct ceph_statx
*stx
, unsigned want
,
8001 unsigned flags
, Inode
**out
)
8009 // our callback fills the dirent and sets sr.full=true on first
8010 // call, and returns -1 the second time around.
8011 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
8023 struct getdents_result
{
8030 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
8031 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8033 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
8039 dlen
= strlen(de
->d_name
) + 1;
8041 if (c
->pos
+ dlen
> c
->buflen
)
8042 return -1; // doesn't fit
8045 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
8047 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
8053 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
8058 gr
.fullent
= fullent
;
8061 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
8063 if (r
< 0) { // some error
8064 if (r
== -1) { // buffer ran out of space
8065 if (gr
.pos
) { // but we got some entries already!
8067 } // or we need a larger buffer
8069 } else { // actual error, return it
8078 struct getdir_result
{
8079 list
<string
> *contents
;
8083 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8085 getdir_result
*r
= static_cast<getdir_result
*>(p
);
8087 r
->contents
->push_back(de
->d_name
);
8092 int Client::getdir(const char *relpath
, list
<string
>& contents
,
8093 const UserPerm
& perms
)
8095 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
8097 Mutex::Locker
lock(client_lock
);
8098 tout(cct
) << "getdir" << std::endl
;
8099 tout(cct
) << relpath
<< std::endl
;
8103 int r
= opendir(relpath
, &d
, perms
);
8108 gr
.contents
= &contents
;
8110 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
8120 /****** file i/o **********/
8121 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
8122 mode_t mode
, int stripe_unit
, int stripe_count
,
8123 int object_size
, const char *data_pool
)
8125 ldout(cct
, 3) << "open enter(" << relpath
<< ", " << ceph_flags_sys2wire(flags
) << "," << mode
<< ")" << dendl
;
8126 Mutex::Locker
lock(client_lock
);
8127 tout(cct
) << "open" << std::endl
;
8128 tout(cct
) << relpath
<< std::endl
;
8129 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
8136 #if defined(__linux__) && defined(O_PATH)
8137 /* When the O_PATH is being specified, others flags than O_DIRECTORY
8138 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
8139 * in kernel (fs/open.c). */
8141 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
8144 filepath
path(relpath
);
8146 bool created
= false;
8147 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
8148 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
8149 int r
= path_walk(path
, &in
, perms
, followsym
, ceph_caps_for_mode(mode
));
8151 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
8154 #if defined(__linux__) && defined(O_PATH)
8155 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
8157 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
8161 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
8162 filepath dirpath
= path
;
8163 string dname
= dirpath
.last_dentry();
8164 dirpath
.pop_dentry();
8166 r
= path_walk(dirpath
, &dir
, perms
, true,
8167 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0);
8170 if (cct
->_conf
->client_permissions
) {
8171 r
= may_create(dir
.get(), perms
);
8175 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
8176 stripe_count
, object_size
, data_pool
, &created
, perms
);
8182 // posix says we can only check permissions of existing files
8183 if (cct
->_conf
->client_permissions
) {
8184 r
= may_open(in
.get(), flags
, perms
);
8191 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
8193 // allocate a integer file descriptor
8196 assert(fd_map
.count(r
) == 0);
8201 tout(cct
) << r
<< std::endl
;
8202 ldout(cct
, 3) << "open exit(" << path
<< ", " << ceph_flags_sys2wire(flags
) << ") = " << r
<< dendl
;
8206 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
, mode_t mode
)
8208 /* Use default file striping parameters */
8209 return open(relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
);
8212 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
8213 const UserPerm
& perms
)
8215 Mutex::Locker
lock(client_lock
);
8216 ldout(cct
, 3) << "lookup_hash enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
8221 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
8223 req
->set_filepath(path
);
8225 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
8227 sprintf(f
, "%u", h
);
8228 filepath
path2(dirino
);
8229 path2
.push_dentry(string(f
));
8230 req
->set_filepath2(path2
);
8232 int r
= make_request(req
, perms
, NULL
, NULL
,
8233 rand() % mdsmap
->get_num_in_mds());
8234 ldout(cct
, 3) << "lookup_hash exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
8240 * Load inode into local cache.
8242 * If inode pointer is non-NULL, and take a reference on
8243 * the resulting Inode object in one operation, so that caller
8244 * can safely assume inode will still be there after return.
8246 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8248 Mutex::Locker
lock(client_lock
);
8249 ldout(cct
, 3) << "lookup_ino enter(" << ino
<< ")" << dendl
;
8254 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
8256 req
->set_filepath(path
);
8258 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8259 if (r
== 0 && inode
!= NULL
) {
8260 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
8261 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
8262 assert(p
!= inode_map
.end());
8266 ldout(cct
, 3) << "lookup_ino exit(" << ino
<< ") = " << r
<< dendl
;
8273 * Find the parent inode of `ino` and insert it into
8274 * our cache. Conditionally also set `parent` to a referenced
8275 * Inode* if caller provides non-NULL value.
8277 int Client::lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8279 Mutex::Locker
lock(client_lock
);
8280 ldout(cct
, 3) << "lookup_parent enter(" << ino
->ino
<< ")" << dendl
;
8285 if (!ino
->dn_set
.empty()) {
8286 // if we exposed the parent here, we'd need to check permissions,
8287 // but right now we just rely on the MDS doing so in make_request
8288 ldout(cct
, 3) << "lookup_parent dentry already present" << dendl
;
8292 if (ino
->is_root()) {
8294 ldout(cct
, 3) << "ino is root, no parent" << dendl
;
8298 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
8299 filepath
path(ino
->ino
);
8300 req
->set_filepath(path
);
8303 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
8304 // Give caller a reference to the parent ino if they provided a pointer.
8305 if (parent
!= NULL
) {
8307 *parent
= target
.get();
8309 ldout(cct
, 3) << "lookup_parent found parent " << (*parent
)->ino
<< dendl
;
8314 ldout(cct
, 3) << "lookup_parent exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8320 * Populate the parent dentry for `ino`, provided it is
8321 * a child of `parent`.
8323 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8325 assert(parent
->is_dir());
8327 Mutex::Locker
lock(client_lock
);
8328 ldout(cct
, 3) << "lookup_name enter(" << ino
->ino
<< ")" << dendl
;
8333 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
8334 req
->set_filepath2(filepath(parent
->ino
));
8335 req
->set_filepath(filepath(ino
->ino
));
8336 req
->set_inode(ino
);
8338 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8339 ldout(cct
, 3) << "lookup_name exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8344 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
8352 f
->actor_perms
= perms
;
8354 ldout(cct
, 10) << "_create_fh " << in
->ino
<< " mode " << cmode
<< dendl
;
8356 if (in
->snapid
!= CEPH_NOSNAP
) {
8357 in
->snap_cap_refs
++;
8358 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
8359 << ccap_string(in
->caps_issued()) << dendl
;
8362 const md_config_t
*conf
= cct
->_conf
;
8363 f
->readahead
.set_trigger_requests(1);
8364 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
8365 uint64_t max_readahead
= Readahead::NO_LIMIT
;
8366 if (conf
->client_readahead_max_bytes
) {
8367 max_readahead
= MIN(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
8369 if (conf
->client_readahead_max_periods
) {
8370 max_readahead
= MIN(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
8372 f
->readahead
.set_max_readahead_size(max_readahead
);
8373 vector
<uint64_t> alignments
;
8374 alignments
.push_back(in
->layout
.get_period());
8375 alignments
.push_back(in
->layout
.stripe_unit
);
8376 f
->readahead
.set_alignments(alignments
);
8381 int Client::_release_fh(Fh
*f
)
8383 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
8384 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
8385 Inode
*in
= f
->inode
.get();
8386 ldout(cct
, 5) << "_release_fh " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
8390 if (in
->snapid
== CEPH_NOSNAP
) {
8391 if (in
->put_open_ref(f
->mode
)) {
8392 _flush(in
, new C_Client_FlushComplete(this, in
));
8396 assert(in
->snap_cap_refs
> 0);
8397 in
->snap_cap_refs
--;
8400 _release_filelocks(f
);
8402 // Finally, read any async err (i.e. from flushes)
8403 int err
= f
->take_async_err();
8405 ldout(cct
, 1) << "_release_fh " << f
<< " on inode " << *in
<< " caught async_err = "
8406 << cpp_strerror(err
) << dendl
;
8408 ldout(cct
, 10) << "_release_fh " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
8416 void Client::_put_fh(Fh
*f
)
8418 int left
= f
->put();
8424 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
8425 const UserPerm
& perms
)
8427 if (in
->snapid
!= CEPH_NOSNAP
&&
8428 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
8432 // use normalized flags to generate cmode
8433 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
8436 int want
= ceph_caps_for_mode(cmode
);
8439 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
8441 if ((flags
& O_TRUNC
) == 0 && in
->caps_issued_mask(want
)) {
8443 check_caps(in
, CHECK_CAPS_NODELAY
);
8446 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8448 in
->make_nosnap_relative_path(path
);
8449 req
->set_filepath(path
);
8450 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
& ~O_CREAT
);
8451 req
->head
.args
.open
.mode
= mode
;
8452 req
->head
.args
.open
.pool
= -1;
8453 if (cct
->_conf
->client_debug_getattr_caps
)
8454 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8456 req
->head
.args
.open
.mask
= 0;
8457 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
8459 result
= make_request(req
, perms
);
8462 * NFS expects that delegations will be broken on a conflicting open,
8463 * not just when there is actual conflicting access to the file. SMB leases
8464 * and oplocks also have similar semantics.
8466 * Ensure that clients that have delegations enabled will wait on minimal
8467 * caps during open, just to ensure that other clients holding delegations
8468 * return theirs first.
8470 if (deleg_timeout
&& result
== 0) {
8473 if (cmode
& CEPH_FILE_MODE_WR
)
8474 need
|= CEPH_CAP_FILE_WR
;
8475 if (cmode
& CEPH_FILE_MODE_RD
)
8476 need
|= CEPH_CAP_FILE_RD
;
8478 result
= get_caps(in
, need
, want
, &have
, -1);
8480 ldout(cct
, 1) << "Unable to get caps after open of inode " << *in
<<
8481 " . Denying open: " <<
8482 cpp_strerror(result
) << dendl
;
8483 in
->put_open_ref(cmode
);
8485 put_cap_ref(in
, need
);
8493 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
8495 in
->put_open_ref(cmode
);
8503 int Client::_renew_caps(Inode
*in
)
8505 int wanted
= in
->caps_file_wanted();
8506 if (in
->is_any_caps() &&
8507 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
8508 check_caps(in
, CHECK_CAPS_NODELAY
);
8513 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
8515 else if (wanted
& CEPH_CAP_FILE_RD
)
8517 else if (wanted
& CEPH_CAP_FILE_WR
)
8520 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8522 in
->make_nosnap_relative_path(path
);
8523 req
->set_filepath(path
);
8524 req
->head
.args
.open
.flags
= flags
;
8525 req
->head
.args
.open
.pool
= -1;
8526 if (cct
->_conf
->client_debug_getattr_caps
)
8527 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8529 req
->head
.args
.open
.mask
= 0;
8532 // duplicate in case Cap goes away; not sure if that race is a concern?
8533 const UserPerm
*pperm
= in
->get_best_perms();
8537 int ret
= make_request(req
, perms
);
8541 int Client::close(int fd
)
8543 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
8544 Mutex::Locker
lock(client_lock
);
8545 tout(cct
) << "close" << std::endl
;
8546 tout(cct
) << fd
<< std::endl
;
8551 Fh
*fh
= get_filehandle(fd
);
8554 int err
= _release_fh(fh
);
8557 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
8565 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
8567 Mutex::Locker
lock(client_lock
);
8568 tout(cct
) << "lseek" << std::endl
;
8569 tout(cct
) << fd
<< std::endl
;
8570 tout(cct
) << offset
<< std::endl
;
8571 tout(cct
) << whence
<< std::endl
;
8576 Fh
*f
= get_filehandle(fd
);
8579 #if defined(__linux__) && defined(O_PATH)
8580 if (f
->flags
& O_PATH
)
8583 return _lseek(f
, offset
, whence
);
8586 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
8588 Inode
*in
= f
->inode
.get();
8601 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8604 f
->pos
= in
->size
+ offset
;
8611 ldout(cct
, 3) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
8616 void Client::lock_fh_pos(Fh
*f
)
8618 ldout(cct
, 10) << "lock_fh_pos " << f
<< dendl
;
8620 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
8622 f
->pos_waiters
.push_back(&cond
);
8623 ldout(cct
, 10) << "lock_fh_pos BLOCKING on " << f
<< dendl
;
8624 while (f
->pos_locked
|| f
->pos_waiters
.front() != &cond
)
8625 cond
.Wait(client_lock
);
8626 ldout(cct
, 10) << "lock_fh_pos UNBLOCKING on " << f
<< dendl
;
8627 assert(f
->pos_waiters
.front() == &cond
);
8628 f
->pos_waiters
.pop_front();
8631 f
->pos_locked
= true;
8634 void Client::unlock_fh_pos(Fh
*f
)
8636 ldout(cct
, 10) << "unlock_fh_pos " << f
<< dendl
;
8637 f
->pos_locked
= false;
8640 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
8642 if (!in
->inline_data
.length()) {
8643 onfinish
->complete(0);
8648 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
8649 object_t oid
= oid_buf
;
8651 ObjectOperation create_ops
;
8652 create_ops
.create(false);
8654 objecter
->mutate(oid
,
8655 OSDMap::file_to_object_locator(in
->layout
),
8657 in
->snaprealm
->get_snap_context(),
8658 ceph::real_clock::now(),
8662 bufferlist inline_version_bl
;
8663 ::encode(in
->inline_version
, inline_version_bl
);
8665 ObjectOperation uninline_ops
;
8666 uninline_ops
.cmpxattr("inline_version",
8667 CEPH_OSD_CMPXATTR_OP_GT
,
8668 CEPH_OSD_CMPXATTR_MODE_U64
,
8670 bufferlist inline_data
= in
->inline_data
;
8671 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
8672 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
8674 objecter
->mutate(oid
,
8675 OSDMap::file_to_object_locator(in
->layout
),
8677 in
->snaprealm
->get_snap_context(),
8678 ceph::real_clock::now(),
8687 // blocking osd interface
8689 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
8691 Mutex::Locker
lock(client_lock
);
8692 tout(cct
) << "read" << std::endl
;
8693 tout(cct
) << fd
<< std::endl
;
8694 tout(cct
) << size
<< std::endl
;
8695 tout(cct
) << offset
<< std::endl
;
8700 Fh
*f
= get_filehandle(fd
);
8703 #if defined(__linux__) && defined(O_PATH)
8704 if (f
->flags
& O_PATH
)
8708 int r
= _read(f
, offset
, size
, &bl
);
8709 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8711 bl
.copy(0, bl
.length(), buf
);
8717 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
8721 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
8724 int Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
8726 const md_config_t
*conf
= cct
->_conf
;
8727 Inode
*in
= f
->inode
.get();
8729 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
8731 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8733 bool movepos
= false;
8739 loff_t start_pos
= offset
;
8741 if (in
->inline_version
== 0) {
8742 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8748 assert(in
->inline_version
> 0);
8753 int r
= get_caps(in
, CEPH_CAP_FILE_RD
, CEPH_CAP_FILE_CACHE
, &have
, -1);
8759 if (f
->flags
& O_DIRECT
)
8760 have
&= ~CEPH_CAP_FILE_CACHE
;
8762 Mutex
uninline_flock("Client::_read_uninline_data flock");
8764 bool uninline_done
= false;
8765 int uninline_ret
= 0;
8766 Context
*onuninline
= NULL
;
8768 if (in
->inline_version
< CEPH_INLINE_NONE
) {
8769 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
8770 onuninline
= new C_SafeCond(&uninline_flock
,
8774 uninline_data(in
, onuninline
);
8776 uint32_t len
= in
->inline_data
.length();
8778 uint64_t endoff
= offset
+ size
;
8779 if (endoff
> in
->size
)
8783 if (endoff
<= len
) {
8784 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
8786 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
8787 bl
->append_zero(endoff
- len
);
8789 } else if ((uint64_t)offset
< endoff
) {
8790 bl
->append_zero(endoff
- offset
);
8797 if (!conf
->client_debug_force_sync_read
&&
8798 (conf
->client_oc
&& (have
& CEPH_CAP_FILE_CACHE
))) {
8800 if (f
->flags
& O_RSYNC
) {
8801 _flush_range(in
, offset
, size
);
8803 r
= _read_async(f
, offset
, size
, bl
);
8807 if (f
->flags
& O_DIRECT
)
8808 _flush_range(in
, offset
, size
);
8810 bool checkeof
= false;
8811 r
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
8818 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8821 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8826 if ((uint64_t)offset
< in
->size
)
8834 f
->pos
= start_pos
+ bl
->length();
8842 client_lock
.Unlock();
8843 uninline_flock
.Lock();
8844 while (!uninline_done
)
8845 uninline_cond
.Wait(uninline_flock
);
8846 uninline_flock
.Unlock();
8849 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
8850 in
->inline_data
.clear();
8851 in
->inline_version
= CEPH_INLINE_NONE
;
8852 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
8859 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8865 return bl
->length();
8868 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
8871 f
->readahead
.inc_pending();
8874 Client::C_Readahead::~C_Readahead() {
8875 f
->readahead
.dec_pending();
8879 void Client::C_Readahead::finish(int r
) {
8880 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
8881 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8884 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
8886 const md_config_t
*conf
= cct
->_conf
;
8887 Inode
*in
= f
->inode
.get();
8889 ldout(cct
, 10) << "_read_async " << *in
<< " " << off
<< "~" << len
<< dendl
;
8891 // trim read based on file size?
8892 if (off
>= in
->size
)
8896 if (off
+ len
> in
->size
) {
8897 len
= in
->size
- off
;
8900 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
8901 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
8902 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
8904 // read (and possibly block)
8906 Mutex
flock("Client::_read_async flock");
8909 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &rvalue
);
8910 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8911 off
, len
, bl
, 0, onfinish
);
8913 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8914 client_lock
.Unlock();
8920 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8927 if(f
->readahead
.get_min_readahead_size() > 0) {
8928 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
8929 if (readahead_extent
.second
> 0) {
8930 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
8931 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
8932 Context
*onfinish2
= new C_Readahead(this, f
);
8933 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8934 readahead_extent
.first
, readahead_extent
.second
,
8935 NULL
, 0, onfinish2
);
8937 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
8938 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8940 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
8949 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
8952 Inode
*in
= f
->inode
.get();
8957 ldout(cct
, 10) << "_read_sync " << *in
<< " " << off
<< "~" << len
<< dendl
;
8959 Mutex
flock("Client::_read_sync flock");
8964 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
8968 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
8970 in
->truncate_size
, in
->truncate_seq
,
8972 client_lock
.Unlock();
8979 // if we get ENOENT from OSD, assume 0 bytes returned
8990 bl
->claim_append(tbl
);
8993 if (r
>= 0 && r
< wanted
) {
8994 if (pos
< in
->size
) {
8995 // zero up to known EOF
8996 int64_t some
= in
->size
- pos
;
9018 * we keep count of uncommitted sync writes on the inode, so that
9021 void Client::_sync_write_commit(Inode
*in
)
9023 assert(unsafe_sync_write
> 0);
9024 unsafe_sync_write
--;
9026 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9028 ldout(cct
, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write
<< dendl
;
9029 if (unsafe_sync_write
== 0 && unmounting
) {
9030 ldout(cct
, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl
;
9031 mount_cond
.Signal();
9035 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
9037 Mutex::Locker
lock(client_lock
);
9038 tout(cct
) << "write" << std::endl
;
9039 tout(cct
) << fd
<< std::endl
;
9040 tout(cct
) << size
<< std::endl
;
9041 tout(cct
) << offset
<< std::endl
;
9046 Fh
*fh
= get_filehandle(fd
);
9049 #if defined(__linux__) && defined(O_PATH)
9050 if (fh
->flags
& O_PATH
)
9053 int r
= _write(fh
, offset
, size
, buf
, NULL
, 0);
9054 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
9058 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
9062 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
9065 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
9067 Mutex::Locker
lock(client_lock
);
9068 tout(cct
) << fd
<< std::endl
;
9069 tout(cct
) << offset
<< std::endl
;
9074 Fh
*fh
= get_filehandle(fd
);
9077 #if defined(__linux__) && defined(O_PATH)
9078 if (fh
->flags
& O_PATH
)
9081 loff_t totallen
= 0;
9082 for (unsigned i
= 0; i
< iovcnt
; i
++) {
9083 totallen
+= iov
[i
].iov_len
;
9086 int w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
9087 ldout(cct
, 3) << "pwritev(" << fd
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
9091 int r
= _read(fh
, offset
, totallen
, &bl
);
9092 ldout(cct
, 3) << "preadv(" << fd
<< ", " << offset
<< ") = " << r
<< dendl
;
9097 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
9099 * This piece of code aims to handle the case that bufferlist does not have enough data
9100 * to fill in the iov
9102 if (resid
< iov
[j
].iov_len
) {
9103 bl
.copy(bufoff
, resid
, (char *)iov
[j
].iov_base
);
9106 bl
.copy(bufoff
, iov
[j
].iov_len
, (char *)iov
[j
].iov_base
);
9108 resid
-= iov
[j
].iov_len
;
9109 bufoff
+= iov
[j
].iov_len
;
9115 int Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
9116 const struct iovec
*iov
, int iovcnt
)
9118 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
9121 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
9122 Inode
*in
= f
->inode
.get();
9124 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
9128 assert(in
->snapid
== CEPH_NOSNAP
);
9130 // was Fh opened as writeable?
9131 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
9135 uint64_t endoff
= offset
+ size
;
9136 std::list
<InodeRef
> quota_roots
;
9137 if (endoff
> in
->size
&&
9138 is_quota_bytes_exceeded(in
, endoff
- in
->size
, f
->actor_perms
, "a_roots
)) {
9142 // use/adjust fd pos?
9146 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
9147 * change out from under us.
9149 if (f
->flags
& O_APPEND
) {
9150 int r
= _lseek(f
, 0, SEEK_END
);
9157 f
->pos
= offset
+size
;
9161 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
9163 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
9166 utime_t start
= ceph_clock_now();
9168 if (in
->inline_version
== 0) {
9169 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
9172 assert(in
->inline_version
> 0);
9175 // copy into fresh buffer (since our write may be resub, async)
9179 bl
.append(buf
, size
);
9181 for (int i
= 0; i
< iovcnt
; i
++) {
9182 if (iov
[i
].iov_len
> 0) {
9183 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
9189 uint64_t totalwritten
;
9191 int r
= get_caps(in
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
,
9192 CEPH_CAP_FILE_BUFFER
, &have
, endoff
);
9196 /* clear the setuid/setgid bits, if any */
9197 if (unlikely(in
->mode
& (S_ISUID
|S_ISGID
)) && size
> 0) {
9198 struct ceph_statx stx
= { 0 };
9200 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9201 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
9205 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9208 if (f
->flags
& O_DIRECT
)
9209 have
&= ~CEPH_CAP_FILE_BUFFER
;
9211 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
9213 Mutex
uninline_flock("Client::_write_uninline_data flock");
9215 bool uninline_done
= false;
9216 int uninline_ret
= 0;
9217 Context
*onuninline
= NULL
;
9219 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9220 if (endoff
> cct
->_conf
->client_max_inline_size
||
9221 endoff
> CEPH_INLINE_MAX_SIZE
||
9222 !(have
& CEPH_CAP_FILE_BUFFER
)) {
9223 onuninline
= new C_SafeCond(&uninline_flock
,
9227 uninline_data(in
, onuninline
);
9229 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9231 uint32_t len
= in
->inline_data
.length();
9234 in
->inline_data
.copy(endoff
, len
- endoff
, bl
);
9237 in
->inline_data
.splice(offset
, len
- offset
);
9238 else if (offset
> len
)
9239 in
->inline_data
.append_zero(offset
- len
);
9241 in
->inline_data
.append(bl
);
9242 in
->inline_version
++;
9244 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9250 if (cct
->_conf
->client_oc
&& (have
& CEPH_CAP_FILE_BUFFER
)) {
9251 // do buffered write
9252 if (!in
->oset
.dirty_or_tx
)
9253 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
9255 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9257 // async, caching, non-blocking.
9258 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
9259 in
->snaprealm
->get_snap_context(),
9260 offset
, size
, bl
, ceph::real_clock::now(),
9262 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9267 // flush cached write if O_SYNC is set on file fh
9268 // O_DSYNC == O_SYNC on linux < 2.6.33
9269 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
9270 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
9271 _flush_range(in
, offset
, size
);
9274 if (f
->flags
& O_DIRECT
)
9275 _flush_range(in
, offset
, size
);
9277 // simple, non-atomic sync write
9278 Mutex
flock("Client::_write flock");
9281 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
9283 unsafe_sync_write
++;
9284 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
); // released by onsafe callback
9286 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
9287 offset
, size
, bl
, ceph::real_clock::now(), 0,
9288 in
->truncate_size
, in
->truncate_seq
,
9290 client_lock
.Unlock();
9297 _sync_write_commit(in
);
9300 // if we get here, write was successful, update client metadata
9303 lat
= ceph_clock_now();
9305 logger
->tinc(l_c_wrlat
, lat
);
9307 totalwritten
= size
;
9308 r
= (int)totalwritten
;
9311 if (totalwritten
+ offset
> in
->size
) {
9312 in
->size
= totalwritten
+ offset
;
9313 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9315 if (is_quota_bytes_approaching(in
, quota_roots
)) {
9316 check_caps(in
, CHECK_CAPS_NODELAY
);
9317 } else if (is_max_size_approaching(in
)) {
9321 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
9323 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
9327 in
->mtime
= ceph_clock_now();
9329 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9334 client_lock
.Unlock();
9335 uninline_flock
.Lock();
9336 while (!uninline_done
)
9337 uninline_cond
.Wait(uninline_flock
);
9338 uninline_flock
.Unlock();
9341 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
9342 in
->inline_data
.clear();
9343 in
->inline_version
= CEPH_INLINE_NONE
;
9344 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9350 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
9354 int Client::_flush(Fh
*f
)
9356 Inode
*in
= f
->inode
.get();
9357 int err
= f
->take_async_err();
9359 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
9360 << cpp_strerror(err
) << dendl
;
9362 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9368 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
9370 struct ceph_statx stx
;
9371 stx
.stx_size
= length
;
9372 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
9375 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
9377 Mutex::Locker
lock(client_lock
);
9378 tout(cct
) << "ftruncate" << std::endl
;
9379 tout(cct
) << fd
<< std::endl
;
9380 tout(cct
) << length
<< std::endl
;
9385 Fh
*f
= get_filehandle(fd
);
9388 #if defined(__linux__) && defined(O_PATH)
9389 if (f
->flags
& O_PATH
)
9393 attr
.st_size
= length
;
9394 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
9397 int Client::fsync(int fd
, bool syncdataonly
)
9399 Mutex::Locker
lock(client_lock
);
9400 tout(cct
) << "fsync" << std::endl
;
9401 tout(cct
) << fd
<< std::endl
;
9402 tout(cct
) << syncdataonly
<< std::endl
;
9407 Fh
*f
= get_filehandle(fd
);
9410 #if defined(__linux__) && defined(O_PATH)
9411 if (f
->flags
& O_PATH
)
9414 int r
= _fsync(f
, syncdataonly
);
9416 // The IOs in this fsync were okay, but maybe something happened
9417 // in the background that we shoudl be reporting?
9418 r
= f
->take_async_err();
9419 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
9420 << ") = 0, async_err = " << r
<< dendl
;
9422 // Assume that an error we encountered during fsync, even reported
9423 // synchronously, would also have applied the error to the Fh, and we
9424 // should clear it here to avoid returning the same error again on next
9426 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
9428 f
->take_async_err();
9433 int Client::_fsync(Inode
*in
, bool syncdataonly
)
9436 Mutex
lock("Client::_fsync::lock");
9439 C_SafeCond
*object_cacher_completion
= NULL
;
9440 ceph_tid_t flush_tid
= 0;
9443 ldout(cct
, 3) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
9445 if (cct
->_conf
->client_oc
) {
9446 object_cacher_completion
= new C_SafeCond(&lock
, &cond
, &done
, &r
);
9447 tmp_ref
= in
; // take a reference; C_SafeCond doesn't and _flush won't either
9448 _flush(in
, object_cacher_completion
);
9449 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
9452 if (!syncdataonly
&& in
->dirty_caps
) {
9453 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
9454 if (in
->flushing_caps
)
9455 flush_tid
= last_flush_tid
;
9456 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
9458 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
9461 MetaRequest
*req
= in
->unsafe_ops
.back();
9462 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
9465 wait_on_list(req
->waitfor_safe
);
9469 if (object_cacher_completion
) { // wait on a real reply instead of guessing
9470 client_lock
.Unlock();
9472 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9477 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
9479 // FIXME: this can starve
9480 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
9481 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
9482 << " uncommitted, waiting" << dendl
;
9483 wait_on_list(in
->waitfor_commit
);
9489 wait_sync_caps(in
, flush_tid
);
9491 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
9493 ldout(cct
, 1) << "ino " << in
->ino
<< " failed to commit to disk! "
9494 << cpp_strerror(-r
) << dendl
;
9500 int Client::_fsync(Fh
*f
, bool syncdataonly
)
9502 ldout(cct
, 3) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
9503 return _fsync(f
->inode
.get(), syncdataonly
);
9506 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
9508 Mutex::Locker
lock(client_lock
);
9509 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
9510 tout(cct
) << fd
<< std::endl
;
9515 Fh
*f
= get_filehandle(fd
);
9518 int r
= _getattr(f
->inode
, mask
, perms
);
9521 fill_stat(f
->inode
, stbuf
, NULL
);
9522 ldout(cct
, 3) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
9526 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
9527 unsigned int want
, unsigned int flags
)
9529 Mutex::Locker
lock(client_lock
);
9530 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
9531 tout(cct
) << fd
<< std::endl
;
9536 Fh
*f
= get_filehandle(fd
);
9540 unsigned mask
= statx_to_mask(flags
, want
);
9543 if (mask
&& !f
->inode
->caps_issued_mask(mask
, true)) {
9544 r
= _getattr(f
->inode
, mask
, perms
);
9546 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
9551 fill_statx(f
->inode
, mask
, stx
);
9552 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
9556 // not written yet, but i want to link!
9558 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
9559 const UserPerm
& perms
)
9561 Mutex::Locker
lock(client_lock
);
9562 tout(cct
) << "chdir" << std::endl
;
9563 tout(cct
) << relpath
<< std::endl
;
9568 filepath
path(relpath
);
9570 int r
= path_walk(path
, &in
, perms
);
9575 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
9577 _getcwd(new_cwd
, perms
);
9581 void Client::_getcwd(string
& dir
, const UserPerm
& perms
)
9584 ldout(cct
, 10) << "getcwd " << *cwd
<< dendl
;
9586 Inode
*in
= cwd
.get();
9587 while (in
!= root
) {
9588 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
9590 // A cwd or ancester is unlinked
9591 if (in
->dn_set
.empty()) {
9595 Dentry
*dn
= in
->get_first_parent();
9600 ldout(cct
, 10) << "getcwd looking up parent for " << *in
<< dendl
;
9601 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9602 filepath
path(in
->ino
);
9603 req
->set_filepath(path
);
9605 int res
= make_request(req
, perms
);
9614 path
.push_front_dentry(dn
->name
);
9615 in
= dn
->dir
->parent_inode
;
9618 dir
+= path
.get_path();
9621 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
9623 Mutex::Locker
l(client_lock
);
9625 _getcwd(dir
, perms
);
9628 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
9629 const UserPerm
& perms
)
9631 Mutex::Locker
l(client_lock
);
9632 tout(cct
) << "statfs" << std::endl
;
9640 const vector
<int64_t> &data_pools
= mdsmap
->get_data_pools();
9641 if (data_pools
.size() == 1) {
9642 objecter
->get_fs_stats(stats
, data_pools
[0], &cond
);
9644 objecter
->get_fs_stats(stats
, boost::optional
<int64_t>(), &cond
);
9647 client_lock
.Unlock();
9648 int rval
= cond
.wait();
9652 ldout(cct
, 1) << "underlying call to statfs returned error: "
9653 << cpp_strerror(rval
)
9658 memset(stbuf
, 0, sizeof(*stbuf
));
9661 * we're going to set a block size of 4MB so we can represent larger
9662 * FSes without overflowing. Additionally convert the space
9663 * measurements from KB to bytes while making them in terms of
9664 * blocks. We use 4MB only because it is big enough, and because it
9665 * actually *is* the (ceph) default block size.
9667 const int CEPH_BLOCK_SHIFT
= 22;
9668 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
9669 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
9670 stbuf
->f_files
= stats
.num_objects
;
9671 stbuf
->f_ffree
= -1;
9672 stbuf
->f_favail
= -1;
9673 stbuf
->f_fsid
= -1; // ??
9674 stbuf
->f_flag
= 0; // ??
9675 stbuf
->f_namemax
= NAME_MAX
;
9677 // Usually quota_root will == root_ancestor, but if the mount root has no
9678 // quota but we can see a parent of it that does have a quota, we'll
9679 // respect that one instead.
9680 assert(root
!= nullptr);
9681 Inode
*quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
, perms
);
9683 // get_quota_root should always give us something
9684 // because client quotas are always enabled
9685 assert(quota_root
!= nullptr);
9687 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
9689 // Skip the getattr if any sessions are stale, as we don't want to
9690 // block `df` if this client has e.g. been evicted, or if the MDS cluster
9692 if (!_any_stale_sessions()) {
9693 int r
= _getattr(quota_root
, 0, perms
, true);
9695 // Ignore return value: error getting latest inode metadata is not a good
9696 // reason to break "df".
9697 lderr(cct
) << "Error in getattr on quota root 0x"
9698 << std::hex
<< quota_root
->ino
<< std::dec
9699 << " statfs result may be outdated" << dendl
;
9703 // Special case: if there is a size quota set on the Inode acting
9704 // as the root for this client mount, then report the quota status
9705 // as the filesystem statistics.
9706 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
9707 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
9708 // It is possible for a quota to be exceeded: arithmetic here must
9709 // handle case where used > total.
9710 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
9712 stbuf
->f_blocks
= total
;
9713 stbuf
->f_bfree
= free
;
9714 stbuf
->f_bavail
= free
;
9716 // General case: report the cluster statistics returned from RADOS. Because
9717 // multiple pools may be used without one filesystem namespace via
9718 // layouts, this is the most correct thing we can do.
9719 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
9720 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9721 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9727 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
9728 struct flock
*fl
, uint64_t owner
, bool removing
)
9730 ldout(cct
, 10) << "_do_filelock ino " << in
->ino
9731 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
9732 << " type " << fl
->l_type
<< " owner " << owner
9733 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
9736 if (F_RDLCK
== fl
->l_type
)
9737 lock_cmd
= CEPH_LOCK_SHARED
;
9738 else if (F_WRLCK
== fl
->l_type
)
9739 lock_cmd
= CEPH_LOCK_EXCL
;
9740 else if (F_UNLCK
== fl
->l_type
)
9741 lock_cmd
= CEPH_LOCK_UNLOCK
;
9745 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
9749 * Set the most significant bit, so that MDS knows the 'owner'
9750 * is sufficient to identify the owner of lock. (old code uses
9751 * both 'owner' and 'pid')
9753 owner
|= (1ULL << 63);
9755 MetaRequest
*req
= new MetaRequest(op
);
9757 in
->make_nosnap_relative_path(path
);
9758 req
->set_filepath(path
);
9761 req
->head
.args
.filelock_change
.rule
= lock_type
;
9762 req
->head
.args
.filelock_change
.type
= lock_cmd
;
9763 req
->head
.args
.filelock_change
.owner
= owner
;
9764 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
9765 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
9766 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
9767 req
->head
.args
.filelock_change
.wait
= sleep
;
9772 if (sleep
&& switch_interrupt_cb
) {
9774 switch_interrupt_cb(callback_handle
, req
->get());
9775 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9776 // disable interrupt
9777 switch_interrupt_cb(callback_handle
, NULL
);
9778 if (ret
== 0 && req
->aborted()) {
9779 // effect of this lock request has been revoked by the 'lock intr' request
9780 ret
= req
->get_abort_code();
9784 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9788 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
9789 ceph_filelock filelock
;
9790 bufferlist::iterator p
= bl
.begin();
9791 ::decode(filelock
, p
);
9793 if (CEPH_LOCK_SHARED
== filelock
.type
)
9794 fl
->l_type
= F_RDLCK
;
9795 else if (CEPH_LOCK_EXCL
== filelock
.type
)
9796 fl
->l_type
= F_WRLCK
;
9798 fl
->l_type
= F_UNLCK
;
9800 fl
->l_whence
= SEEK_SET
;
9801 fl
->l_start
= filelock
.start
;
9802 fl
->l_len
= filelock
.length
;
9803 fl
->l_pid
= filelock
.pid
;
9804 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
9805 ceph_lock_state_t
*lock_state
;
9806 if (lock_type
== CEPH_LOCK_FCNTL
) {
9807 if (!in
->fcntl_locks
)
9808 in
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9809 lock_state
= in
->fcntl_locks
;
9810 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
9811 if (!in
->flock_locks
)
9812 in
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9813 lock_state
= in
->flock_locks
;
9818 _update_lock_state(fl
, owner
, lock_state
);
9821 if (lock_type
== CEPH_LOCK_FCNTL
) {
9822 if (!fh
->fcntl_locks
)
9823 fh
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9824 lock_state
= fh
->fcntl_locks
;
9826 if (!fh
->flock_locks
)
9827 fh
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9828 lock_state
= fh
->flock_locks
;
9830 _update_lock_state(fl
, owner
, lock_state
);
9838 int Client::_interrupt_filelock(MetaRequest
*req
)
9840 // Set abort code, but do not kick. The abort code prevents the request
9841 // from being re-sent.
9844 return 0; // haven't sent the request
9846 Inode
*in
= req
->inode();
9849 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
9850 lock_type
= CEPH_LOCK_FLOCK_INTR
;
9851 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
9852 lock_type
= CEPH_LOCK_FCNTL_INTR
;
9858 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
9860 in
->make_nosnap_relative_path(path
);
9861 intr_req
->set_filepath(path
);
9862 intr_req
->set_inode(in
);
9863 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
9864 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
9865 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
9867 UserPerm
perms(req
->get_uid(), req
->get_gid());
9868 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
9871 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
9873 if (!in
->fcntl_locks
&& !in
->flock_locks
)
9876 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
9877 ::encode(nr_fcntl_locks
, bl
);
9878 if (nr_fcntl_locks
) {
9879 ceph_lock_state_t
* lock_state
= in
->fcntl_locks
;
9880 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9881 p
!= lock_state
->held_locks
.end();
9883 ::encode(p
->second
, bl
);
9886 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
9887 ::encode(nr_flock_locks
, bl
);
9888 if (nr_flock_locks
) {
9889 ceph_lock_state_t
* lock_state
= in
->flock_locks
;
9890 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9891 p
!= lock_state
->held_locks
.end();
9893 ::encode(p
->second
, bl
);
9896 ldout(cct
, 10) << "_encode_filelocks ino " << in
->ino
<< ", " << nr_fcntl_locks
9897 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
9900 void Client::_release_filelocks(Fh
*fh
)
9902 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
9905 Inode
*in
= fh
->inode
.get();
9906 ldout(cct
, 10) << "_release_filelocks " << fh
<< " ino " << in
->ino
<< dendl
;
9908 list
<pair
<int, ceph_filelock
> > to_release
;
9910 if (fh
->fcntl_locks
) {
9911 ceph_lock_state_t
* lock_state
= fh
->fcntl_locks
;
9912 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9913 p
!= lock_state
->held_locks
.end();
9915 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, p
->second
));
9916 delete fh
->fcntl_locks
;
9918 if (fh
->flock_locks
) {
9919 ceph_lock_state_t
* lock_state
= fh
->flock_locks
;
9920 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9921 p
!= lock_state
->held_locks
.end();
9923 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, p
->second
));
9924 delete fh
->flock_locks
;
9927 if (to_release
.empty())
9931 memset(&fl
, 0, sizeof(fl
));
9932 fl
.l_whence
= SEEK_SET
;
9933 fl
.l_type
= F_UNLCK
;
9935 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
9936 p
!= to_release
.end();
9938 fl
.l_start
= p
->second
.start
;
9939 fl
.l_len
= p
->second
.length
;
9940 fl
.l_pid
= p
->second
.pid
;
9941 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
9942 p
->second
.owner
, true);
9946 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
9947 ceph_lock_state_t
*lock_state
)
9950 if (F_RDLCK
== fl
->l_type
)
9951 lock_cmd
= CEPH_LOCK_SHARED
;
9952 else if (F_WRLCK
== fl
->l_type
)
9953 lock_cmd
= CEPH_LOCK_EXCL
;
9955 lock_cmd
= CEPH_LOCK_UNLOCK
;;
9957 ceph_filelock filelock
;
9958 filelock
.start
= fl
->l_start
;
9959 filelock
.length
= fl
->l_len
;
9960 filelock
.client
= 0;
9961 // see comment in _do_filelock()
9962 filelock
.owner
= owner
| (1ULL << 63);
9963 filelock
.pid
= fl
->l_pid
;
9964 filelock
.type
= lock_cmd
;
9966 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
9967 list
<ceph_filelock
> activated_locks
;
9968 lock_state
->remove_lock(filelock
, activated_locks
);
9970 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
9975 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
9977 Inode
*in
= fh
->inode
.get();
9978 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
9979 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
9983 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
9985 Inode
*in
= fh
->inode
.get();
9986 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
9987 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
9988 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
9992 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
9994 Inode
*in
= fh
->inode
.get();
9995 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
9997 int sleep
= !(cmd
& LOCK_NB
);
10016 memset(&fl
, 0, sizeof(fl
));
10018 fl
.l_whence
= SEEK_SET
;
10020 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
10021 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
10025 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
10027 /* Since the only thing this does is wrap a call to statfs, and
10028 statfs takes a lock, it doesn't seem we have a need to split it
10030 return statfs(0, stbuf
, perms
);
10033 void Client::ll_register_callbacks(struct client_callback_args
*args
)
10037 Mutex::Locker
l(client_lock
);
10038 ldout(cct
, 10) << "ll_register_callbacks cb " << args
->handle
10039 << " invalidate_ino_cb " << args
->ino_cb
10040 << " invalidate_dentry_cb " << args
->dentry_cb
10041 << " switch_interrupt_cb " << args
->switch_intr_cb
10042 << " remount_cb " << args
->remount_cb
10044 callback_handle
= args
->handle
;
10045 if (args
->ino_cb
) {
10046 ino_invalidate_cb
= args
->ino_cb
;
10047 async_ino_invalidator
.start();
10049 if (args
->dentry_cb
) {
10050 dentry_invalidate_cb
= args
->dentry_cb
;
10051 async_dentry_invalidator
.start();
10053 if (args
->switch_intr_cb
) {
10054 switch_interrupt_cb
= args
->switch_intr_cb
;
10055 interrupt_finisher
.start();
10057 if (args
->remount_cb
) {
10058 remount_cb
= args
->remount_cb
;
10059 remount_finisher
.start();
10061 umask_cb
= args
->umask_cb
;
10064 int Client::test_dentry_handling(bool can_invalidate
)
10068 can_invalidate_dentries
= can_invalidate
;
10070 if (can_invalidate_dentries
) {
10071 assert(dentry_invalidate_cb
);
10072 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
10074 } else if (remount_cb
) {
10075 ldout(cct
, 1) << "using remount_cb" << dendl
;
10079 bool should_abort
= cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate");
10080 if (should_abort
) {
10081 lderr(cct
) << "no method to invalidate kernel dentry cache; quitting!" << dendl
;
10084 lderr(cct
) << "no method to invalidate kernel dentry cache; expect issues!" << dendl
;
10090 int Client::_sync_fs()
10092 ldout(cct
, 10) << "_sync_fs" << dendl
;
10095 Mutex
lock("Client::_fsync::lock");
10097 bool flush_done
= false;
10098 if (cct
->_conf
->client_oc
)
10099 objectcacher
->flush_all(new C_SafeCond(&lock
, &cond
, &flush_done
));
10105 ceph_tid_t flush_tid
= last_flush_tid
;
10107 // wait for unsafe mds requests
10108 wait_unsafe_requests();
10110 wait_sync_caps(flush_tid
);
10113 client_lock
.Unlock();
10115 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
10116 while (!flush_done
)
10119 client_lock
.Lock();
10125 int Client::sync_fs()
10127 Mutex::Locker
l(client_lock
);
10135 int64_t Client::drop_caches()
10137 Mutex::Locker
l(client_lock
);
10138 return objectcacher
->release_all();
10142 int Client::lazyio_propogate(int fd
, loff_t offset
, size_t count
)
10144 Mutex::Locker
l(client_lock
);
10145 ldout(cct
, 3) << "op: client->lazyio_propogate(" << fd
10146 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10148 Fh
*f
= get_filehandle(fd
);
10158 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
10160 Mutex::Locker
l(client_lock
);
10161 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
10162 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10164 Fh
*f
= get_filehandle(fd
);
10167 Inode
*in
= f
->inode
.get();
10176 // =============================
10179 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
)
10181 Mutex::Locker
l(client_lock
);
10186 filepath
path(relpath
);
10188 int r
= path_walk(path
, &in
, perm
);
10191 if (cct
->_conf
->client_permissions
) {
10192 r
= may_create(in
.get(), perm
);
10196 Inode
*snapdir
= open_snapdir(in
.get());
10197 return _mkdir(snapdir
, name
, 0, perm
);
10200 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
)
10202 Mutex::Locker
l(client_lock
);
10207 filepath
path(relpath
);
10209 int r
= path_walk(path
, &in
, perms
);
10212 if (cct
->_conf
->client_permissions
) {
10213 r
= may_delete(in
.get(), NULL
, perms
);
10217 Inode
*snapdir
= open_snapdir(in
.get());
10218 return _rmdir(snapdir
, name
, perms
);
10221 // =============================
10224 int Client::get_caps_issued(int fd
) {
10226 Mutex::Locker
lock(client_lock
);
10231 Fh
*f
= get_filehandle(fd
);
10235 return f
->inode
->caps_issued();
10238 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
10240 Mutex::Locker
lock(client_lock
);
10247 int r
= path_walk(p
, &in
, perms
, true);
10250 return in
->caps_issued();
10253 // =========================================
10256 Inode
*Client::open_snapdir(Inode
*diri
)
10259 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
10260 if (!inode_map
.count(vino
)) {
10261 in
= new Inode(this, vino
, &diri
->layout
);
10263 in
->ino
= diri
->ino
;
10264 in
->snapid
= CEPH_SNAPDIR
;
10265 in
->mode
= diri
->mode
;
10266 in
->uid
= diri
->uid
;
10267 in
->gid
= diri
->gid
;
10268 in
->mtime
= diri
->mtime
;
10269 in
->ctime
= diri
->ctime
;
10270 in
->btime
= diri
->btime
;
10271 in
->size
= diri
->size
;
10272 in
->change_attr
= diri
->change_attr
;
10274 in
->dirfragtree
.clear();
10275 in
->snapdir_parent
= diri
;
10276 diri
->flags
|= I_SNAPDIR_OPEN
;
10277 inode_map
[vino
] = in
;
10278 if (use_faked_inos())
10279 _assign_faked_ino(in
);
10280 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
10282 in
= inode_map
[vino
];
10283 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
10288 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
10289 Inode
**out
, const UserPerm
& perms
)
10291 Mutex::Locker
lock(client_lock
);
10292 vinodeno_t vparent
= _get_vino(parent
);
10293 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
<< dendl
;
10294 tout(cct
) << "ll_lookup" << std::endl
;
10295 tout(cct
) << name
<< std::endl
;
10301 if (!cct
->_conf
->fuse_default_permissions
) {
10302 r
= may_lookup(parent
, perms
);
10307 string
dname(name
);
10310 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
10317 fill_stat(in
, attr
);
10321 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
10322 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
10323 tout(cct
) << attr
->st_ino
<< std::endl
;
10328 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
10329 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
10330 const UserPerm
& perms
)
10332 Mutex::Locker
lock(client_lock
);
10333 vinodeno_t vparent
= _get_vino(parent
);
10334 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
<< dendl
;
10335 tout(cct
) << "ll_lookupx" << std::endl
;
10336 tout(cct
) << name
<< std::endl
;
10342 if (!cct
->_conf
->fuse_default_permissions
) {
10343 r
= may_lookup(parent
, perms
);
10348 string
dname(name
);
10351 unsigned mask
= statx_to_mask(flags
, want
);
10352 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
10358 fill_statx(in
, mask
, stx
);
10362 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
10363 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
10364 tout(cct
) << stx
->stx_ino
<< std::endl
;
10369 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
10370 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
10372 Mutex::Locker
lock(client_lock
);
10377 filepath
fp(name
, 0);
10380 unsigned mask
= statx_to_mask(flags
, want
);
10382 ldout(cct
, 3) << "ll_walk" << name
<< dendl
;
10383 tout(cct
) << "ll_walk" << std::endl
;
10384 tout(cct
) << name
<< std::endl
;
10386 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
10388 /* zero out mask, just in case... */
10395 fill_statx(in
, mask
, stx
);
10402 void Client::_ll_get(Inode
*in
)
10404 if (in
->ll_ref
== 0) {
10406 if (in
->is_dir() && !in
->dn_set
.empty()) {
10407 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10408 in
->get_first_parent()->get(); // pin dentry
10412 ldout(cct
, 20) << "_ll_get " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
10415 int Client::_ll_put(Inode
*in
, int num
)
10418 ldout(cct
, 20) << "_ll_put " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
10419 if (in
->ll_ref
== 0) {
10420 if (in
->is_dir() && !in
->dn_set
.empty()) {
10421 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10422 in
->get_first_parent()->put(); // unpin dentry
10431 void Client::_ll_drop_pins()
10433 ldout(cct
, 10) << "_ll_drop_pins" << dendl
;
10434 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
10435 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
10436 it
!= inode_map
.end();
10438 Inode
*in
= it
->second
;
10442 _ll_put(in
, in
->ll_ref
);
10446 bool Client::ll_forget(Inode
*in
, int count
)
10448 Mutex::Locker
lock(client_lock
);
10449 inodeno_t ino
= _get_inodeno(in
);
10451 ldout(cct
, 3) << "ll_forget " << ino
<< " " << count
<< dendl
;
10452 tout(cct
) << "ll_forget" << std::endl
;
10453 tout(cct
) << ino
.val
<< std::endl
;
10454 tout(cct
) << count
<< std::endl
;
10456 // Ignore forget if we're no longer mounted
10460 if (ino
== 1) return true; // ignore forget on root.
10463 if (in
->ll_ref
< count
) {
10464 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
10465 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
10466 _ll_put(in
, in
->ll_ref
);
10469 if (_ll_put(in
, count
) == 0)
10476 bool Client::ll_put(Inode
*in
)
10478 /* ll_forget already takes the lock */
10479 return ll_forget(in
, 1);
10482 snapid_t
Client::ll_get_snapid(Inode
*in
)
10484 Mutex::Locker
lock(client_lock
);
10488 Inode
*Client::ll_get_inode(ino_t ino
)
10490 Mutex::Locker
lock(client_lock
);
10495 vinodeno_t vino
= _map_faked_ino(ino
);
10496 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10497 if (p
== inode_map
.end())
10499 Inode
*in
= p
->second
;
10504 Inode
*Client::ll_get_inode(vinodeno_t vino
)
10506 Mutex::Locker
lock(client_lock
);
10511 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10512 if (p
== inode_map
.end())
10514 Inode
*in
= p
->second
;
10519 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
10521 vinodeno_t vino
= _get_vino(in
);
10523 ldout(cct
, 3) << "ll_getattr " << vino
<< dendl
;
10524 tout(cct
) << "ll_getattr" << std::endl
;
10525 tout(cct
) << vino
.ino
.val
<< std::endl
;
10527 if (vino
.snapid
< CEPH_NOSNAP
)
10530 return _getattr(in
, caps
, perms
);
10533 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
10535 Mutex::Locker
lock(client_lock
);
10540 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
10543 fill_stat(in
, attr
);
10544 ldout(cct
, 3) << "ll_getattr " << _get_vino(in
) << " = " << res
<< dendl
;
10548 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
10549 unsigned int flags
, const UserPerm
& perms
)
10551 Mutex::Locker
lock(client_lock
);
10557 unsigned mask
= statx_to_mask(flags
, want
);
10559 if (mask
&& !in
->caps_issued_mask(mask
, true))
10560 res
= _ll_getattr(in
, mask
, perms
);
10563 fill_statx(in
, mask
, stx
);
10564 ldout(cct
, 3) << "ll_getattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10568 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10569 const UserPerm
& perms
, InodeRef
*inp
)
10571 vinodeno_t vino
= _get_vino(in
);
10573 ldout(cct
, 3) << "ll_setattrx " << vino
<< " mask " << hex
<< mask
<< dec
10575 tout(cct
) << "ll_setattrx" << std::endl
;
10576 tout(cct
) << vino
.ino
.val
<< std::endl
;
10577 tout(cct
) << stx
->stx_mode
<< std::endl
;
10578 tout(cct
) << stx
->stx_uid
<< std::endl
;
10579 tout(cct
) << stx
->stx_gid
<< std::endl
;
10580 tout(cct
) << stx
->stx_size
<< std::endl
;
10581 tout(cct
) << stx
->stx_mtime
<< std::endl
;
10582 tout(cct
) << stx
->stx_atime
<< std::endl
;
10583 tout(cct
) << stx
->stx_btime
<< std::endl
;
10584 tout(cct
) << mask
<< std::endl
;
10586 if (!cct
->_conf
->fuse_default_permissions
) {
10587 int res
= may_setattr(in
, stx
, mask
, perms
);
10592 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
10594 return __setattrx(in
, stx
, mask
, perms
, inp
);
10597 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10598 const UserPerm
& perms
)
10600 Mutex::Locker
lock(client_lock
);
10605 InodeRef
target(in
);
10606 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
10608 assert(in
== target
.get());
10609 fill_statx(in
, in
->caps_issued(), stx
);
10612 ldout(cct
, 3) << "ll_setattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10616 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
10617 const UserPerm
& perms
)
10619 struct ceph_statx stx
;
10620 stat_to_statx(attr
, &stx
);
10622 Mutex::Locker
lock(client_lock
);
10627 InodeRef
target(in
);
10628 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
10630 assert(in
== target
.get());
10631 fill_stat(in
, attr
);
10634 ldout(cct
, 3) << "ll_setattr " << _get_vino(in
) << " = " << res
<< dendl
;
10642 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
10643 const UserPerm
& perms
)
10645 Mutex::Locker
lock(client_lock
);
10651 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10654 return _getxattr(in
, name
, value
, size
, perms
);
10657 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
10658 const UserPerm
& perms
)
10660 Mutex::Locker
lock(client_lock
);
10666 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10669 return _getxattr(in
, name
, value
, size
, perms
);
10672 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
10673 const UserPerm
& perms
)
10675 Mutex::Locker
lock(client_lock
);
10680 Fh
*f
= get_filehandle(fd
);
10683 return _getxattr(f
->inode
, name
, value
, size
, perms
);
10686 int Client::listxattr(const char *path
, char *list
, size_t size
,
10687 const UserPerm
& perms
)
10689 Mutex::Locker
lock(client_lock
);
10695 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10698 return Client::_listxattr(in
.get(), list
, size
, perms
);
10701 int Client::llistxattr(const char *path
, char *list
, size_t size
,
10702 const UserPerm
& perms
)
10704 Mutex::Locker
lock(client_lock
);
10710 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10713 return Client::_listxattr(in
.get(), list
, size
, perms
);
10716 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
10718 Mutex::Locker
lock(client_lock
);
10723 Fh
*f
= get_filehandle(fd
);
10726 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
10729 int Client::removexattr(const char *path
, const char *name
,
10730 const UserPerm
& perms
)
10732 Mutex::Locker
lock(client_lock
);
10738 int r
= Client::path_walk(path
, &in
, perms
, true);
10741 return _removexattr(in
, name
, perms
);
10744 int Client::lremovexattr(const char *path
, const char *name
,
10745 const UserPerm
& perms
)
10747 Mutex::Locker
lock(client_lock
);
10753 int r
= Client::path_walk(path
, &in
, perms
, false);
10756 return _removexattr(in
, name
, perms
);
10759 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
10761 Mutex::Locker
lock(client_lock
);
10766 Fh
*f
= get_filehandle(fd
);
10769 return _removexattr(f
->inode
, name
, perms
);
10772 int Client::setxattr(const char *path
, const char *name
, const void *value
,
10773 size_t size
, int flags
, const UserPerm
& perms
)
10775 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10777 Mutex::Locker
lock(client_lock
);
10783 int r
= Client::path_walk(path
, &in
, perms
, true);
10786 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10789 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
10790 size_t size
, int flags
, const UserPerm
& perms
)
10792 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10794 Mutex::Locker
lock(client_lock
);
10800 int r
= Client::path_walk(path
, &in
, perms
, false);
10803 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10806 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
10807 int flags
, const UserPerm
& perms
)
10809 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10811 Mutex::Locker
lock(client_lock
);
10816 Fh
*f
= get_filehandle(fd
);
10819 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
10822 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
10823 const UserPerm
& perms
)
10827 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10831 // Do a force getattr to get the latest quota before returning
10832 // a value to userspace.
10834 if (vxattr
->flags
& VXATTR_RSTAT
) {
10835 flags
|= CEPH_STAT_RSTAT
;
10837 r
= _getattr(in
, flags
, perms
, true);
10839 // Error from getattr!
10843 // call pointer-to-member function
10845 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
10846 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
10852 if (r
> (int)size
) {
10854 } else if (r
> 0) {
10855 memcpy(value
, buf
, r
);
10861 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
10866 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10870 if (in
->xattrs
.count(n
)) {
10871 r
= in
->xattrs
[n
].length();
10872 if (r
> 0 && size
!= 0) {
10873 if (size
>= (unsigned)r
)
10874 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
10881 ldout(cct
, 3) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
10885 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
10886 const UserPerm
& perms
)
10888 if (cct
->_conf
->client_permissions
) {
10889 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
10893 return _getxattr(in
.get(), name
, value
, size
, perms
);
10896 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
10897 size_t size
, const UserPerm
& perms
)
10899 Mutex::Locker
lock(client_lock
);
10904 vinodeno_t vino
= _get_vino(in
);
10906 ldout(cct
, 3) << "ll_getxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10907 tout(cct
) << "ll_getxattr" << std::endl
;
10908 tout(cct
) << vino
.ino
.val
<< std::endl
;
10909 tout(cct
) << name
<< std::endl
;
10911 if (!cct
->_conf
->fuse_default_permissions
) {
10912 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
10917 return _getxattr(in
, name
, value
, size
, perms
);
10920 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
10921 const UserPerm
& perms
)
10923 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10925 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10926 p
!= in
->xattrs
.end();
10928 r
+= p
->first
.length() + 1;
10930 const VXattr
*vxattrs
= _get_vxattrs(in
);
10931 r
+= _vxattrs_name_size(vxattrs
);
10934 if (size
>= (unsigned)r
) {
10935 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10936 p
!= in
->xattrs
.end();
10938 memcpy(name
, p
->first
.c_str(), p
->first
.length());
10939 name
+= p
->first
.length();
10944 for (int i
= 0; !vxattrs
[i
].name
.empty(); i
++) {
10945 const VXattr
& vxattr
= vxattrs
[i
];
10948 // call pointer-to-member function
10949 if(vxattr
.exists_cb
&& !(this->*(vxattr
.exists_cb
))(in
))
10951 memcpy(name
, vxattr
.name
.c_str(), vxattr
.name
.length());
10952 name
+= vxattr
.name
.length();
10961 ldout(cct
, 3) << "_listxattr(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
10965 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
10966 const UserPerm
& perms
)
10968 Mutex::Locker
lock(client_lock
);
10973 vinodeno_t vino
= _get_vino(in
);
10975 ldout(cct
, 3) << "ll_listxattr " << vino
<< " size " << size
<< dendl
;
10976 tout(cct
) << "ll_listxattr" << std::endl
;
10977 tout(cct
) << vino
.ino
.val
<< std::endl
;
10978 tout(cct
) << size
<< std::endl
;
10980 return _listxattr(in
, names
, size
, perms
);
10983 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
10984 size_t size
, int flags
, const UserPerm
& perms
)
10987 int xattr_flags
= 0;
10989 xattr_flags
|= CEPH_XATTR_REMOVE
;
10990 if (flags
& XATTR_CREATE
)
10991 xattr_flags
|= CEPH_XATTR_CREATE
;
10992 if (flags
& XATTR_REPLACE
)
10993 xattr_flags
|= CEPH_XATTR_REPLACE
;
10995 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
10997 in
->make_nosnap_relative_path(path
);
10998 req
->set_filepath(path
);
10999 req
->set_string2(name
);
11000 req
->set_inode(in
);
11001 req
->head
.args
.setxattr
.flags
= xattr_flags
;
11004 bl
.append((const char*)value
, size
);
11007 int res
= make_request(req
, perms
);
11010 ldout(cct
, 3) << "_setxattr(" << in
->ino
<< ", \"" << name
<< "\") = " <<
11015 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
11016 size_t size
, int flags
, const UserPerm
& perms
)
11018 if (in
->snapid
!= CEPH_NOSNAP
) {
11022 bool posix_acl_xattr
= false;
11023 if (acl_type
== POSIX_ACL
)
11024 posix_acl_xattr
= !strncmp(name
, "system.", 7);
11026 if (strncmp(name
, "user.", 5) &&
11027 strncmp(name
, "security.", 9) &&
11028 strncmp(name
, "trusted.", 8) &&
11029 strncmp(name
, "ceph.", 5) &&
11031 return -EOPNOTSUPP
;
11033 if (posix_acl_xattr
) {
11034 if (!strcmp(name
, ACL_EA_ACCESS
)) {
11035 mode_t new_mode
= in
->mode
;
11037 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
11044 if (new_mode
!= in
->mode
) {
11045 struct ceph_statx stx
;
11046 stx
.stx_mode
= new_mode
;
11047 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
11052 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
11054 if (!S_ISDIR(in
->mode
))
11056 int ret
= posix_acl_check(value
, size
);
11065 return -EOPNOTSUPP
;
11068 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11069 if (vxattr
&& vxattr
->readonly
)
11070 return -EOPNOTSUPP
;
11073 return _do_setxattr(in
, name
, value
, size
, flags
, perms
);
11076 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
11077 size_t size
, int flags
, const UserPerm
& perms
)
11079 if (cct
->_conf
->client_permissions
) {
11080 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11084 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
11087 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
11090 if (name
== "layout") {
11091 string::iterator begin
= value
.begin();
11092 string::iterator end
= value
.end();
11093 keys_and_values
<string::iterator
> p
; // create instance of parser
11094 std::map
<string
, string
> m
; // map to receive results
11095 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
11100 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
11101 if (q
->first
== "pool") {
11106 } else if (name
== "layout.pool") {
11110 if (tmp
.length()) {
11113 pool
= boost::lexical_cast
<unsigned>(tmp
);
11114 if (!osdmap
->have_pg_pool(pool
))
11116 } catch (boost::bad_lexical_cast
const&) {
11117 pool
= osdmap
->lookup_pg_pool_name(tmp
);
11127 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
11129 // For setting pool of layout, MetaRequest need osdmap epoch.
11130 // There is a race which create a new data pool but client and mds both don't have.
11131 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
11132 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
11133 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
11134 string
rest(strstr(name
, "layout"));
11135 string
v((const char*)value
, size
);
11136 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
11137 return _setxattr_check_data_pool(rest
, v
, &o
);
11140 if (r
== -ENOENT
) {
11142 objecter
->wait_for_latest_osdmap(&ctx
);
11148 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
11149 size_t size
, int flags
, const UserPerm
& perms
)
11151 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
11153 Mutex::Locker
lock(client_lock
);
11158 vinodeno_t vino
= _get_vino(in
);
11160 ldout(cct
, 3) << "ll_setxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
11161 tout(cct
) << "ll_setxattr" << std::endl
;
11162 tout(cct
) << vino
.ino
.val
<< std::endl
;
11163 tout(cct
) << name
<< std::endl
;
11165 if (!cct
->_conf
->fuse_default_permissions
) {
11166 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11170 return _setxattr(in
, name
, value
, size
, flags
, perms
);
11173 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11175 if (in
->snapid
!= CEPH_NOSNAP
) {
11179 // same xattrs supported by kernel client
11180 if (strncmp(name
, "user.", 5) &&
11181 strncmp(name
, "system.", 7) &&
11182 strncmp(name
, "security.", 9) &&
11183 strncmp(name
, "trusted.", 8) &&
11184 strncmp(name
, "ceph.", 5))
11185 return -EOPNOTSUPP
;
11187 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11188 if (vxattr
&& vxattr
->readonly
)
11189 return -EOPNOTSUPP
;
11191 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
11193 in
->make_nosnap_relative_path(path
);
11194 req
->set_filepath(path
);
11195 req
->set_filepath2(name
);
11196 req
->set_inode(in
);
11198 int res
= make_request(req
, perms
);
11201 ldout(cct
, 3) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
11205 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
11207 if (cct
->_conf
->client_permissions
) {
11208 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11212 return _removexattr(in
.get(), name
, perms
);
11215 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11217 Mutex::Locker
lock(client_lock
);
11222 vinodeno_t vino
= _get_vino(in
);
11224 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
11225 tout(cct
) << "ll_removexattr" << std::endl
;
11226 tout(cct
) << vino
.ino
.val
<< std::endl
;
11227 tout(cct
) << name
<< std::endl
;
11229 if (!cct
->_conf
->fuse_default_permissions
) {
11230 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11235 return _removexattr(in
, name
, perms
);
11238 bool Client::_vxattrcb_quota_exists(Inode
*in
)
11240 return in
->quota
.is_enable();
11242 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
11244 return snprintf(val
, size
,
11245 "max_bytes=%lld max_files=%lld",
11246 (long long int)in
->quota
.max_bytes
,
11247 (long long int)in
->quota
.max_files
);
11249 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
11251 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
11253 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
11255 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
11258 bool Client::_vxattrcb_layout_exists(Inode
*in
)
11260 return in
->layout
!= file_layout_t();
11262 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
11264 int r
= snprintf(val
, size
,
11265 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
11266 (unsigned long long)in
->layout
.stripe_unit
,
11267 (unsigned long long)in
->layout
.stripe_count
,
11268 (unsigned long long)in
->layout
.object_size
);
11269 objecter
->with_osdmap([&](const OSDMap
& o
) {
11270 if (o
.have_pg_pool(in
->layout
.pool_id
))
11271 r
+= snprintf(val
+ r
, size
- r
, "%s",
11272 o
.get_pool_name(in
->layout
.pool_id
).c_str());
11274 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
11275 (uint64_t)in
->layout
.pool_id
);
11277 if (in
->layout
.pool_ns
.length())
11278 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
11279 in
->layout
.pool_ns
.c_str());
11282 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
11284 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_unit
);
11286 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
11288 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_count
);
11290 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
11292 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.object_size
);
11294 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
11297 objecter
->with_osdmap([&](const OSDMap
& o
) {
11298 if (o
.have_pg_pool(in
->layout
.pool_id
))
11299 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
11300 in
->layout
.pool_id
).c_str());
11302 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
11306 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
11308 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
11310 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
11312 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
11314 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
11316 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nfiles
);
11318 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
11320 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nsubdirs
);
11322 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
11324 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
11326 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
11328 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rfiles
);
11330 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
11332 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rsubdirs
);
11334 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
11336 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rbytes
);
11338 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
11340 return snprintf(val
, size
, "%ld.09%ld", (long)in
->rstat
.rctime
.sec(),
11341 (long)in
->rstat
.rctime
.nsec());
11344 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
11345 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
11347 #define XATTR_NAME_CEPH(_type, _name) \
11349 name: CEPH_XATTR_NAME(_type, _name), \
11350 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11356 #define XATTR_NAME_CEPH2(_type, _name, _flags) \
11358 name: CEPH_XATTR_NAME(_type, _name), \
11359 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11365 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
11367 name: CEPH_XATTR_NAME2(_type, _name, _field), \
11368 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
11371 exists_cb: &Client::_vxattrcb_layout_exists, \
11374 #define XATTR_QUOTA_FIELD(_type, _name) \
11376 name: CEPH_XATTR_NAME(_type, _name), \
11377 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11380 exists_cb: &Client::_vxattrcb_quota_exists, \
11384 const Client::VXattr
Client::_dir_vxattrs
[] = {
11386 name
: "ceph.dir.layout",
11387 getxattr_cb
: &Client::_vxattrcb_layout
,
11390 exists_cb
: &Client::_vxattrcb_layout_exists
,
11393 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
11394 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
11395 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
11396 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
11397 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
11398 XATTR_NAME_CEPH(dir
, entries
),
11399 XATTR_NAME_CEPH(dir
, files
),
11400 XATTR_NAME_CEPH(dir
, subdirs
),
11401 XATTR_NAME_CEPH2(dir
, rentries
, VXATTR_RSTAT
),
11402 XATTR_NAME_CEPH2(dir
, rfiles
, VXATTR_RSTAT
),
11403 XATTR_NAME_CEPH2(dir
, rsubdirs
, VXATTR_RSTAT
),
11404 XATTR_NAME_CEPH2(dir
, rbytes
, VXATTR_RSTAT
),
11405 XATTR_NAME_CEPH2(dir
, rctime
, VXATTR_RSTAT
),
11407 name
: "ceph.quota",
11408 getxattr_cb
: &Client::_vxattrcb_quota
,
11411 exists_cb
: &Client::_vxattrcb_quota_exists
,
11414 XATTR_QUOTA_FIELD(quota
, max_bytes
),
11415 XATTR_QUOTA_FIELD(quota
, max_files
),
11416 { name
: "" } /* Required table terminator */
11419 const Client::VXattr
Client::_file_vxattrs
[] = {
11421 name
: "ceph.file.layout",
11422 getxattr_cb
: &Client::_vxattrcb_layout
,
11425 exists_cb
: &Client::_vxattrcb_layout_exists
,
11428 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
11429 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
11430 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
11431 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
11432 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
11433 { name
: "" } /* Required table terminator */
11436 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
11439 return _dir_vxattrs
;
11440 else if (in
->is_file())
11441 return _file_vxattrs
;
11445 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
11447 if (strncmp(name
, "ceph.", 5) == 0) {
11448 const VXattr
*vxattr
= _get_vxattrs(in
);
11450 while (!vxattr
->name
.empty()) {
11451 if (vxattr
->name
== name
)
11460 size_t Client::_vxattrs_calcu_name_size(const VXattr
*vxattr
)
11463 while (!vxattr
->name
.empty()) {
11464 if (!vxattr
->hidden
)
11465 len
+= vxattr
->name
.length() + 1;
11471 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
11473 Mutex::Locker
lock(client_lock
);
11478 vinodeno_t vino
= _get_vino(in
);
11480 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
11481 tout(cct
) << "ll_readlink" << std::endl
;
11482 tout(cct
) << vino
.ino
.val
<< std::endl
;
11484 set
<Dentry
*>::iterator dn
= in
->dn_set
.begin();
11485 while (dn
!= in
->dn_set
.end()) {
11490 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
11491 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
11495 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
11496 const UserPerm
& perms
, InodeRef
*inp
)
11498 ldout(cct
, 3) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
11499 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
11500 << ", gid " << perms
.gid() << ")" << dendl
;
11502 if (strlen(name
) > NAME_MAX
)
11503 return -ENAMETOOLONG
;
11505 if (dir
->snapid
!= CEPH_NOSNAP
) {
11508 if (is_quota_files_exceeded(dir
, perms
)) {
11512 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
11515 dir
->make_nosnap_relative_path(path
);
11516 path
.push_dentry(name
);
11517 req
->set_filepath(path
);
11518 req
->set_inode(dir
);
11519 req
->head
.args
.mknod
.rdev
= rdev
;
11520 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11521 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11523 bufferlist xattrs_bl
;
11524 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11527 req
->head
.args
.mknod
.mode
= mode
;
11528 if (xattrs_bl
.length() > 0)
11529 req
->set_data(xattrs_bl
);
11532 res
= get_or_create(dir
, name
, &de
);
11535 req
->set_dentry(de
);
11537 res
= make_request(req
, perms
, inp
);
11541 ldout(cct
, 3) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11549 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
11550 dev_t rdev
, struct stat
*attr
, Inode
**out
,
11551 const UserPerm
& perms
)
11553 Mutex::Locker
lock(client_lock
);
11558 vinodeno_t vparent
= _get_vino(parent
);
11560 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
11561 tout(cct
) << "ll_mknod" << std::endl
;
11562 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11563 tout(cct
) << name
<< std::endl
;
11564 tout(cct
) << mode
<< std::endl
;
11565 tout(cct
) << rdev
<< std::endl
;
11567 if (!cct
->_conf
->fuse_default_permissions
) {
11568 int r
= may_create(parent
, perms
);
11574 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11576 fill_stat(in
, attr
);
11579 tout(cct
) << attr
->st_ino
<< std::endl
;
11580 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
11581 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11586 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
11587 dev_t rdev
, Inode
**out
,
11588 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11589 const UserPerm
& perms
)
11591 unsigned caps
= statx_to_mask(flags
, want
);
11592 Mutex::Locker
lock(client_lock
);
11597 vinodeno_t vparent
= _get_vino(parent
);
11599 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
11600 tout(cct
) << "ll_mknodx" << std::endl
;
11601 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11602 tout(cct
) << name
<< std::endl
;
11603 tout(cct
) << mode
<< std::endl
;
11604 tout(cct
) << rdev
<< std::endl
;
11606 if (!cct
->_conf
->fuse_default_permissions
) {
11607 int r
= may_create(parent
, perms
);
11613 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11615 fill_statx(in
, caps
, stx
);
11618 tout(cct
) << stx
->stx_ino
<< std::endl
;
11619 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
11620 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11625 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
11626 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
11627 int object_size
, const char *data_pool
, bool *created
,
11628 const UserPerm
& perms
)
11630 ldout(cct
, 3) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
11631 mode
<< dec
<< ")" << dendl
;
11633 if (strlen(name
) > NAME_MAX
)
11634 return -ENAMETOOLONG
;
11635 if (dir
->snapid
!= CEPH_NOSNAP
) {
11638 if (is_quota_files_exceeded(dir
, perms
)) {
11642 // use normalized flags to generate cmode
11643 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
11647 int64_t pool_id
= -1;
11648 if (data_pool
&& *data_pool
) {
11649 pool_id
= objecter
->with_osdmap(
11650 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
11653 if (pool_id
> 0xffffffffll
)
11654 return -ERANGE
; // bummer!
11657 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
11660 dir
->make_nosnap_relative_path(path
);
11661 path
.push_dentry(name
);
11662 req
->set_filepath(path
);
11663 req
->set_inode(dir
);
11664 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
| O_CREAT
);
11666 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
11667 req
->head
.args
.open
.stripe_count
= stripe_count
;
11668 req
->head
.args
.open
.object_size
= object_size
;
11669 if (cct
->_conf
->client_debug_getattr_caps
)
11670 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
11672 req
->head
.args
.open
.mask
= 0;
11673 req
->head
.args
.open
.pool
= pool_id
;
11674 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11675 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11678 bufferlist xattrs_bl
;
11679 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11682 req
->head
.args
.open
.mode
= mode
;
11683 if (xattrs_bl
.length() > 0)
11684 req
->set_data(xattrs_bl
);
11687 res
= get_or_create(dir
, name
, &de
);
11690 req
->set_dentry(de
);
11692 res
= make_request(req
, perms
, inp
, created
);
11697 /* If the caller passed a value in fhp, do the open */
11699 (*inp
)->get_open_ref(cmode
);
11700 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
11706 ldout(cct
, 3) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
11707 << " layout " << stripe_unit
11708 << ' ' << stripe_count
11709 << ' ' << object_size
11710 <<") = " << res
<< dendl
;
11719 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
11722 ldout(cct
, 3) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
11723 << mode
<< dec
<< ", uid " << perm
.uid()
11724 << ", gid " << perm
.gid() << ")" << dendl
;
11726 if (strlen(name
) > NAME_MAX
)
11727 return -ENAMETOOLONG
;
11729 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11732 if (is_quota_files_exceeded(dir
, perm
)) {
11735 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
?
11736 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
11739 dir
->make_nosnap_relative_path(path
);
11740 path
.push_dentry(name
);
11741 req
->set_filepath(path
);
11742 req
->set_inode(dir
);
11743 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11744 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11747 bufferlist xattrs_bl
;
11748 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perm
);
11751 req
->head
.args
.mkdir
.mode
= mode
;
11752 if (xattrs_bl
.length() > 0)
11753 req
->set_data(xattrs_bl
);
11756 res
= get_or_create(dir
, name
, &de
);
11759 req
->set_dentry(de
);
11761 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
11762 res
= make_request(req
, perm
, inp
);
11763 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
11767 ldout(cct
, 3) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11775 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
11776 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
11778 Mutex::Locker
lock(client_lock
);
11783 vinodeno_t vparent
= _get_vino(parent
);
11785 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
11786 tout(cct
) << "ll_mkdir" << std::endl
;
11787 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11788 tout(cct
) << name
<< std::endl
;
11789 tout(cct
) << mode
<< std::endl
;
11791 if (!cct
->_conf
->fuse_default_permissions
) {
11792 int r
= may_create(parent
, perm
);
11798 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
11800 fill_stat(in
, attr
);
11803 tout(cct
) << attr
->st_ino
<< std::endl
;
11804 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
11805 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11810 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
11811 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11812 const UserPerm
& perms
)
11814 Mutex::Locker
lock(client_lock
);
11819 vinodeno_t vparent
= _get_vino(parent
);
11821 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
11822 tout(cct
) << "ll_mkdirx" << std::endl
;
11823 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11824 tout(cct
) << name
<< std::endl
;
11825 tout(cct
) << mode
<< std::endl
;
11827 if (!cct
->_conf
->fuse_default_permissions
) {
11828 int r
= may_create(parent
, perms
);
11834 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
11836 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11842 tout(cct
) << stx
->stx_ino
<< std::endl
;
11843 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
11844 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11849 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
11850 const UserPerm
& perms
, InodeRef
*inp
)
11852 ldout(cct
, 3) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
11853 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
11856 if (strlen(name
) > NAME_MAX
)
11857 return -ENAMETOOLONG
;
11859 if (dir
->snapid
!= CEPH_NOSNAP
) {
11862 if (is_quota_files_exceeded(dir
, perms
)) {
11866 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
11869 dir
->make_nosnap_relative_path(path
);
11870 path
.push_dentry(name
);
11871 req
->set_filepath(path
);
11872 req
->set_inode(dir
);
11873 req
->set_string2(target
);
11874 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11875 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11878 int res
= get_or_create(dir
, name
, &de
);
11881 req
->set_dentry(de
);
11883 res
= make_request(req
, perms
, inp
);
11886 ldout(cct
, 3) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
11895 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
11896 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
11898 Mutex::Locker
lock(client_lock
);
11903 vinodeno_t vparent
= _get_vino(parent
);
11905 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
11907 tout(cct
) << "ll_symlink" << std::endl
;
11908 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11909 tout(cct
) << name
<< std::endl
;
11910 tout(cct
) << value
<< std::endl
;
11912 if (!cct
->_conf
->fuse_default_permissions
) {
11913 int r
= may_create(parent
, perms
);
11919 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11921 fill_stat(in
, attr
);
11924 tout(cct
) << attr
->st_ino
<< std::endl
;
11925 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
11926 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11931 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
11932 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
11933 unsigned flags
, const UserPerm
& perms
)
11935 Mutex::Locker
lock(client_lock
);
11940 vinodeno_t vparent
= _get_vino(parent
);
11942 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
11944 tout(cct
) << "ll_symlinkx" << std::endl
;
11945 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11946 tout(cct
) << name
<< std::endl
;
11947 tout(cct
) << value
<< std::endl
;
11949 if (!cct
->_conf
->fuse_default_permissions
) {
11950 int r
= may_create(parent
, perms
);
11956 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11958 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11961 tout(cct
) << stx
->stx_ino
<< std::endl
;
11962 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
11963 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11968 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
11970 ldout(cct
, 3) << "_unlink(" << dir
->ino
<< " " << name
11971 << " uid " << perm
.uid() << " gid " << perm
.gid()
11974 if (dir
->snapid
!= CEPH_NOSNAP
) {
11978 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
11981 dir
->make_nosnap_relative_path(path
);
11982 path
.push_dentry(name
);
11983 req
->set_filepath(path
);
11989 int res
= get_or_create(dir
, name
, &de
);
11992 req
->set_dentry(de
);
11993 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11994 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11996 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
12000 in
= otherin
.get();
12001 req
->set_other_inode(in
);
12002 in
->break_all_delegs();
12003 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12005 req
->set_inode(dir
);
12007 res
= make_request(req
, perm
);
12010 ldout(cct
, 3) << "unlink(" << path
<< ") = " << res
<< dendl
;
12018 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
12020 Mutex::Locker
lock(client_lock
);
12025 vinodeno_t vino
= _get_vino(in
);
12027 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
12028 tout(cct
) << "ll_unlink" << std::endl
;
12029 tout(cct
) << vino
.ino
.val
<< std::endl
;
12030 tout(cct
) << name
<< std::endl
;
12032 if (!cct
->_conf
->fuse_default_permissions
) {
12033 int r
= may_delete(in
, name
, perm
);
12037 return _unlink(in
, name
, perm
);
12040 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
12042 ldout(cct
, 3) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
12043 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
12045 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
12049 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
: CEPH_MDS_OP_RMDIR
;
12050 MetaRequest
*req
= new MetaRequest(op
);
12052 dir
->make_nosnap_relative_path(path
);
12053 path
.push_dentry(name
);
12054 req
->set_filepath(path
);
12056 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12057 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12058 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12063 int res
= get_or_create(dir
, name
, &de
);
12066 if (op
== CEPH_MDS_OP_RMDIR
)
12067 req
->set_dentry(de
);
12071 res
= _lookup(dir
, name
, 0, &in
, perms
);
12074 if (op
== CEPH_MDS_OP_RMDIR
) {
12075 req
->set_inode(dir
);
12076 req
->set_other_inode(in
.get());
12078 unlink(de
, true, true);
12080 req
->set_other_inode(in
.get());
12083 res
= make_request(req
, perms
);
12086 ldout(cct
, 3) << "rmdir(" << path
<< ") = " << res
<< dendl
;
12094 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
12096 Mutex::Locker
lock(client_lock
);
12101 vinodeno_t vino
= _get_vino(in
);
12103 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
12104 tout(cct
) << "ll_rmdir" << std::endl
;
12105 tout(cct
) << vino
.ino
.val
<< std::endl
;
12106 tout(cct
) << name
<< std::endl
;
12108 if (!cct
->_conf
->fuse_default_permissions
) {
12109 int r
= may_delete(in
, name
, perms
);
12114 return _rmdir(in
, name
, perms
);
12117 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
)
12119 ldout(cct
, 3) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
12120 << todir
->ino
<< " " << toname
12121 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
12124 if (fromdir
->snapid
!= todir
->snapid
)
12127 int op
= CEPH_MDS_OP_RENAME
;
12128 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
12129 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
12130 op
= CEPH_MDS_OP_RENAMESNAP
;
12134 if (fromdir
!= todir
) {
12135 Inode
*fromdir_root
=
12136 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
12137 Inode
*todir_root
=
12138 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
12139 if (fromdir_root
!= todir_root
) {
12145 MetaRequest
*req
= new MetaRequest(op
);
12148 fromdir
->make_nosnap_relative_path(from
);
12149 from
.push_dentry(fromname
);
12151 todir
->make_nosnap_relative_path(to
);
12152 to
.push_dentry(toname
);
12153 req
->set_filepath(to
);
12154 req
->set_filepath2(from
);
12157 int res
= get_or_create(fromdir
, fromname
, &oldde
);
12161 res
= get_or_create(todir
, toname
, &de
);
12165 if (op
== CEPH_MDS_OP_RENAME
) {
12166 req
->set_old_dentry(oldde
);
12167 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
12168 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
12170 req
->set_dentry(de
);
12171 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12172 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12174 InodeRef oldin
, otherin
;
12175 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
12179 Inode
*oldinode
= oldin
.get();
12180 oldinode
->break_all_delegs();
12181 req
->set_old_inode(oldinode
);
12182 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
12184 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
12188 Inode
*in
= otherin
.get();
12189 req
->set_other_inode(in
);
12190 in
->break_all_delegs();
12192 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12200 req
->set_inode(todir
);
12202 // renamesnap reply contains no tracedn, so we need to invalidate
12204 unlink(oldde
, true, true);
12205 unlink(de
, true, true);
12208 res
= make_request(req
, perm
, &target
);
12209 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
12211 // renamed item from our cache
12214 ldout(cct
, 3) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
12222 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
12223 const char *newname
, const UserPerm
& perm
)
12225 Mutex::Locker
lock(client_lock
);
12230 vinodeno_t vparent
= _get_vino(parent
);
12231 vinodeno_t vnewparent
= _get_vino(newparent
);
12233 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
12234 << vnewparent
<< " " << newname
<< dendl
;
12235 tout(cct
) << "ll_rename" << std::endl
;
12236 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12237 tout(cct
) << name
<< std::endl
;
12238 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
12239 tout(cct
) << newname
<< std::endl
;
12241 if (!cct
->_conf
->fuse_default_permissions
) {
12242 int r
= may_delete(parent
, name
, perm
);
12245 r
= may_delete(newparent
, newname
, perm
);
12246 if (r
< 0 && r
!= -ENOENT
)
12250 return _rename(parent
, name
, newparent
, newname
, perm
);
12253 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, InodeRef
*inp
)
12255 ldout(cct
, 3) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
12256 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
12258 if (strlen(newname
) > NAME_MAX
)
12259 return -ENAMETOOLONG
;
12261 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
12264 if (is_quota_files_exceeded(dir
, perm
)) {
12268 in
->break_all_delegs();
12269 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
12271 filepath
path(newname
, dir
->ino
);
12272 req
->set_filepath(path
);
12273 filepath
existing(in
->ino
);
12274 req
->set_filepath2(existing
);
12276 req
->set_inode(dir
);
12277 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
12278 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
12281 int res
= get_or_create(dir
, newname
, &de
);
12284 req
->set_dentry(de
);
12286 res
= make_request(req
, perm
, inp
);
12287 ldout(cct
, 10) << "link result is " << res
<< dendl
;
12290 ldout(cct
, 3) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
12298 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
12299 const UserPerm
& perm
)
12301 Mutex::Locker
lock(client_lock
);
12306 vinodeno_t vino
= _get_vino(in
);
12307 vinodeno_t vnewparent
= _get_vino(newparent
);
12309 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
12311 tout(cct
) << "ll_link" << std::endl
;
12312 tout(cct
) << vino
.ino
.val
<< std::endl
;
12313 tout(cct
) << vnewparent
<< std::endl
;
12314 tout(cct
) << newname
<< std::endl
;
12319 if (!cct
->_conf
->fuse_default_permissions
) {
12320 if (S_ISDIR(in
->mode
))
12323 r
= may_hardlink(in
, perm
);
12327 r
= may_create(newparent
, perm
);
12332 return _link(in
, newparent
, newname
, perm
, &target
);
12335 int Client::ll_num_osds(void)
12337 Mutex::Locker
lock(client_lock
);
12338 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
12341 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
12343 Mutex::Locker
lock(client_lock
);
12346 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
12347 if (!o
.exists(osd
))
12349 g
= o
.get_addr(osd
);
12354 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
12355 *addr
= ntohl(nb_addr
);
12359 uint32_t Client::ll_stripe_unit(Inode
*in
)
12361 Mutex::Locker
lock(client_lock
);
12362 return in
->layout
.stripe_unit
;
12365 uint64_t Client::ll_snap_seq(Inode
*in
)
12367 Mutex::Locker
lock(client_lock
);
12368 return in
->snaprealm
->seq
;
12371 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
12373 Mutex::Locker
lock(client_lock
);
12374 *layout
= in
->layout
;
12378 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
12380 return ll_file_layout(fh
->inode
.get(), layout
);
12383 /* Currently we cannot take advantage of redundancy in reads, since we
12384 would have to go through all possible placement groups (a
12385 potentially quite large number determined by a hash), and use CRUSH
12386 to calculate the appropriate set of OSDs for each placement group,
12387 then index into that. An array with one entry per OSD is much more
12388 tractable and works for demonstration purposes. */
12390 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
12391 file_layout_t
* layout
)
12393 Mutex::Locker
lock(client_lock
);
12395 inodeno_t ino
= in
->ino
;
12396 uint32_t object_size
= layout
->object_size
;
12397 uint32_t su
= layout
->stripe_unit
;
12398 uint32_t stripe_count
= layout
->stripe_count
;
12399 uint64_t stripes_per_object
= object_size
/ su
;
12401 uint64_t stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
12402 uint64_t stripepos
= blockno
% stripe_count
; // which object in the object set (X)
12403 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
12404 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
12406 object_t oid
= file_object_t(ino
, objectno
);
12407 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12408 ceph_object_layout olayout
=
12409 o
.file_to_object_layout(oid
, *layout
);
12410 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
12413 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
12418 /* Return the offset of the block, internal to the object */
12420 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
12422 Mutex::Locker
lock(client_lock
);
12423 file_layout_t
*layout
=&(in
->layout
);
12424 uint32_t object_size
= layout
->object_size
;
12425 uint32_t su
= layout
->stripe_unit
;
12426 uint64_t stripes_per_object
= object_size
/ su
;
12428 return (blockno
% stripes_per_object
) * su
;
12431 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
12432 const UserPerm
& perms
)
12434 Mutex::Locker
lock(client_lock
);
12439 vinodeno_t vino
= _get_vino(in
);
12441 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
12442 tout(cct
) << "ll_opendir" << std::endl
;
12443 tout(cct
) << vino
.ino
.val
<< std::endl
;
12445 if (!cct
->_conf
->fuse_default_permissions
) {
12446 int r
= may_open(in
, flags
, perms
);
12451 int r
= _opendir(in
, dirpp
, perms
);
12452 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
12454 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
12459 int Client::ll_releasedir(dir_result_t
*dirp
)
12461 Mutex::Locker
lock(client_lock
);
12462 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
12463 tout(cct
) << "ll_releasedir" << std::endl
;
12464 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12473 int Client::ll_fsyncdir(dir_result_t
*dirp
)
12475 Mutex::Locker
lock(client_lock
);
12476 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
12477 tout(cct
) << "ll_fsyncdir" << std::endl
;
12478 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12483 return _fsync(dirp
->inode
.get(), false);
12486 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
12488 assert(!(flags
& O_CREAT
));
12490 Mutex::Locker
lock(client_lock
);
12495 vinodeno_t vino
= _get_vino(in
);
12497 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
12498 tout(cct
) << "ll_open" << std::endl
;
12499 tout(cct
) << vino
.ino
.val
<< std::endl
;
12500 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12503 if (!cct
->_conf
->fuse_default_permissions
) {
12504 r
= may_open(in
, flags
, perms
);
12509 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
12512 Fh
*fhptr
= fhp
? *fhp
: NULL
;
12514 ll_unclosed_fh_set
.insert(fhptr
);
12516 tout(cct
) << (unsigned long)fhptr
<< std::endl
;
12517 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
12518 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
12522 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12523 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
12524 const UserPerm
& perms
)
12528 vinodeno_t vparent
= _get_vino(parent
);
12530 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12531 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
12532 << ", gid " << perms
.gid() << dendl
;
12533 tout(cct
) << "ll_create" << std::endl
;
12534 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12535 tout(cct
) << name
<< std::endl
;
12536 tout(cct
) << mode
<< std::endl
;
12537 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12539 bool created
= false;
12540 int r
= _lookup(parent
, name
, caps
, in
, perms
);
12542 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
12545 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
12546 if (!cct
->_conf
->fuse_default_permissions
) {
12547 r
= may_create(parent
, perms
);
12551 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
12562 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
12564 if (!cct
->_conf
->fuse_default_permissions
) {
12565 r
= may_open(in
->get(), flags
, perms
);
12568 int release_r
= _release_fh(*fhp
);
12569 assert(release_r
== 0); // during create, no async data ops should have happened
12574 if (*fhp
== NULL
) {
12575 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
12583 ll_unclosed_fh_set
.insert(*fhp
);
12588 Inode
*inode
= in
->get();
12589 if (use_faked_inos())
12590 ino
= inode
->faked_ino
;
12595 tout(cct
) << (unsigned long)*fhp
<< std::endl
;
12596 tout(cct
) << ino
<< std::endl
;
12597 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12598 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
12599 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
12604 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12605 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
12606 const UserPerm
& perms
)
12608 Mutex::Locker
lock(client_lock
);
12614 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
12619 // passing an Inode in outp requires an additional ref
12624 fill_stat(in
, attr
);
12632 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
12633 int oflags
, Inode
**outp
, Fh
**fhp
,
12634 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
12635 const UserPerm
& perms
)
12637 unsigned caps
= statx_to_mask(lflags
, want
);
12638 Mutex::Locker
lock(client_lock
);
12644 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
12648 // passing an Inode in outp requires an additional ref
12653 fill_statx(in
, caps
, stx
);
12662 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
12664 Mutex::Locker
lock(client_lock
);
12665 tout(cct
) << "ll_lseek" << std::endl
;
12666 tout(cct
) << offset
<< std::endl
;
12667 tout(cct
) << whence
<< std::endl
;
12672 return _lseek(fh
, offset
, whence
);
12675 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
12677 Mutex::Locker
lock(client_lock
);
12678 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
12679 tout(cct
) << "ll_read" << std::endl
;
12680 tout(cct
) << (unsigned long)fh
<< std::endl
;
12681 tout(cct
) << off
<< std::endl
;
12682 tout(cct
) << len
<< std::endl
;
12687 return _read(fh
, off
, len
, bl
);
12690 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
12694 file_layout_t
* layout
)
12696 Mutex::Locker
lock(client_lock
);
12701 vinodeno_t vino
= _get_vino(in
);
12702 object_t oid
= file_object_t(vino
.ino
, blockid
);
12703 C_SaferCond onfinish
;
12706 objecter
->read(oid
,
12707 object_locator_t(layout
->pool_id
),
12712 CEPH_OSD_FLAG_READ
,
12715 client_lock
.Unlock();
12716 int r
= onfinish
.wait();
12717 client_lock
.Lock();
12720 bl
.copy(0, bl
.length(), buf
);
12727 /* It appears that the OSD doesn't return success unless the entire
12728 buffer was written, return the write length on success. */
12730 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
12731 char* buf
, uint64_t offset
,
12732 uint64_t length
, file_layout_t
* layout
,
12733 uint64_t snapseq
, uint32_t sync
)
12735 Mutex
flock("Client::ll_write_block flock");
12736 vinodeno_t vino
= ll_get_vino(in
);
12740 Context
*onsafe
= nullptr;
12745 if (true || sync
) {
12746 /* if write is stable, the epilogue is waiting on
12748 onsafe
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
12751 /* if write is unstable, we just place a barrier for
12752 * future commits to wait on */
12753 /*onsafe = new C_Block_Sync(this, vino.ino,
12754 barrier_interval(offset, offset + length), &r);
12758 object_t oid
= file_object_t(vino
.ino
, blockid
);
12759 SnapContext fakesnap
;
12761 if (length
> 0) bp
= buffer::copy(buf
, length
);
12765 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
12768 fakesnap
.seq
= snapseq
;
12770 /* lock just in time */
12771 client_lock
.Lock();
12773 client_lock
.Unlock();
12778 objecter
->write(oid
,
12779 object_locator_t(layout
->pool_id
),
12784 ceph::real_clock::now(),
12788 client_lock
.Unlock();
12789 if (!done
/* also !sync */) {
12803 int Client::ll_commit_blocks(Inode
*in
,
12807 Mutex::Locker
lock(client_lock
);
12809 BarrierContext *bctx;
12810 vinodeno_t vino = _get_vino(in);
12811 uint64_t ino = vino.ino;
12813 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
12814 << offset << " to " << length << dendl;
12820 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
12821 if (p != barriers.end()) {
12822 barrier_interval civ(offset, offset + length);
12823 p->second->commit_barrier(civ);
12829 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
12831 Mutex::Locker
lock(client_lock
);
12832 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
12833 "~" << len
<< dendl
;
12834 tout(cct
) << "ll_write" << std::endl
;
12835 tout(cct
) << (unsigned long)fh
<< std::endl
;
12836 tout(cct
) << off
<< std::endl
;
12837 tout(cct
) << len
<< std::endl
;
12842 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
12843 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
12848 int Client::ll_flush(Fh
*fh
)
12850 Mutex::Locker
lock(client_lock
);
12851 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12852 tout(cct
) << "ll_flush" << std::endl
;
12853 tout(cct
) << (unsigned long)fh
<< std::endl
;
12861 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
12863 Mutex::Locker
lock(client_lock
);
12864 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12865 tout(cct
) << "ll_fsync" << std::endl
;
12866 tout(cct
) << (unsigned long)fh
<< std::endl
;
12871 int r
= _fsync(fh
, syncdataonly
);
12873 // If we're returning an error, clear it from the FH
12874 fh
->take_async_err();
12879 int Client::ll_sync_inode(Inode
*in
, bool syncdataonly
)
12881 Mutex::Locker
lock(client_lock
);
12882 ldout(cct
, 3) << "ll_sync_inode " << *in
<< " " << dendl
;
12883 tout(cct
) << "ll_sync_inode" << std::endl
;
12884 tout(cct
) << (unsigned long)in
<< std::endl
;
12889 return _fsync(in
, syncdataonly
);
12892 #ifdef FALLOC_FL_PUNCH_HOLE
12894 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12896 if (offset
< 0 || length
<= 0)
12899 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
12900 return -EOPNOTSUPP
;
12902 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
12903 return -EOPNOTSUPP
;
12905 Inode
*in
= fh
->inode
.get();
12907 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
12908 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
12912 if (in
->snapid
!= CEPH_NOSNAP
)
12915 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
12918 uint64_t size
= offset
+ length
;
12919 std::list
<InodeRef
> quota_roots
;
12920 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
12922 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
, "a_roots
)) {
12927 int r
= get_caps(in
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
12931 Mutex
uninline_flock("Client::_fallocate_uninline_data flock");
12932 Cond uninline_cond
;
12933 bool uninline_done
= false;
12934 int uninline_ret
= 0;
12935 Context
*onuninline
= NULL
;
12937 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
12938 if (in
->inline_version
< CEPH_INLINE_NONE
&&
12939 (have
& CEPH_CAP_FILE_BUFFER
)) {
12941 int len
= in
->inline_data
.length();
12942 if (offset
< len
) {
12944 in
->inline_data
.copy(0, offset
, bl
);
12946 if (offset
+ size
> len
)
12947 size
= len
- offset
;
12949 bl
.append_zero(size
);
12950 if (offset
+ size
< len
)
12951 in
->inline_data
.copy(offset
+ size
, len
- offset
- size
, bl
);
12952 in
->inline_data
= bl
;
12953 in
->inline_version
++;
12955 in
->mtime
= ceph_clock_now();
12957 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
12959 if (in
->inline_version
< CEPH_INLINE_NONE
) {
12960 onuninline
= new C_SafeCond(&uninline_flock
,
12964 uninline_data(in
, onuninline
);
12967 Mutex
flock("Client::_punch_hole flock");
12970 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
12972 unsafe_sync_write
++;
12973 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
12975 _invalidate_inode_cache(in
, offset
, length
);
12976 filer
->zero(in
->ino
, &in
->layout
,
12977 in
->snaprealm
->get_snap_context(),
12979 ceph::real_clock::now(),
12980 0, true, onfinish
);
12981 in
->mtime
= ceph_clock_now();
12983 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
12985 client_lock
.Unlock();
12990 client_lock
.Lock();
12991 _sync_write_commit(in
);
12993 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
12994 uint64_t size
= offset
+ length
;
12995 if (size
> in
->size
) {
12997 in
->mtime
= ceph_clock_now();
12999 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13001 if (is_quota_bytes_approaching(in
, quota_roots
)) {
13002 check_caps(in
, CHECK_CAPS_NODELAY
);
13003 } else if (is_max_size_approaching(in
)) {
13010 client_lock
.Unlock();
13011 uninline_flock
.Lock();
13012 while (!uninline_done
)
13013 uninline_cond
.Wait(uninline_flock
);
13014 uninline_flock
.Unlock();
13015 client_lock
.Lock();
13017 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
13018 in
->inline_data
.clear();
13019 in
->inline_version
= CEPH_INLINE_NONE
;
13020 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13026 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
13031 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
13033 return -EOPNOTSUPP
;
13039 int Client::ll_fallocate(Fh
*fh
, int mode
, loff_t offset
, loff_t length
)
13041 Mutex::Locker
lock(client_lock
);
13042 ldout(cct
, 3) << "ll_fallocate " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
13043 tout(cct
) << "ll_fallocate " << mode
<< " " << offset
<< " " << length
<< std::endl
;
13044 tout(cct
) << (unsigned long)fh
<< std::endl
;
13049 return _fallocate(fh
, mode
, offset
, length
);
13052 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
13054 Mutex::Locker
lock(client_lock
);
13055 tout(cct
) << "fallocate " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
13060 Fh
*fh
= get_filehandle(fd
);
13063 #if defined(__linux__) && defined(O_PATH)
13064 if (fh
->flags
& O_PATH
)
13067 return _fallocate(fh
, mode
, offset
, length
);
13070 int Client::ll_release(Fh
*fh
)
13072 Mutex::Locker
lock(client_lock
);
13073 ldout(cct
, 3) << "ll_release (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
13075 tout(cct
) << "ll_release (fh)" << std::endl
;
13076 tout(cct
) << (unsigned long)fh
<< std::endl
;
13081 if (ll_unclosed_fh_set
.count(fh
))
13082 ll_unclosed_fh_set
.erase(fh
);
13083 return _release_fh(fh
);
13086 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
13088 Mutex::Locker
lock(client_lock
);
13090 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
13091 tout(cct
) << "ll_getk (fh)" << (unsigned long)fh
<< std::endl
;
13096 return _getlk(fh
, fl
, owner
);
13099 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
13101 Mutex::Locker
lock(client_lock
);
13103 ldout(cct
, 3) << "ll_setlk (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13104 tout(cct
) << "ll_setk (fh)" << (unsigned long)fh
<< std::endl
;
13109 return _setlk(fh
, fl
, owner
, sleep
);
13112 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
13114 Mutex::Locker
lock(client_lock
);
13116 ldout(cct
, 3) << "ll_flock (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13117 tout(cct
) << "ll_flock (fh)" << (unsigned long)fh
<< std::endl
;
13122 return _flock(fh
, cmd
, owner
);
13125 int Client::set_deleg_timeout(uint32_t timeout
)
13127 Mutex::Locker
lock(client_lock
);
13130 * The whole point is to prevent blacklisting so we must time out the
13131 * delegation before the session autoclose timeout kicks in.
13133 if (timeout
>= mdsmap
->get_session_autoclose())
13136 deleg_timeout
= timeout
;
13140 int Client::ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
)
13144 Mutex::Locker
lock(client_lock
);
13149 Inode
*inode
= fh
->inode
.get();
13152 case CEPH_DELEGATION_NONE
:
13153 inode
->unset_deleg(fh
);
13158 ret
= inode
->set_deleg(fh
, cmd
, cb
, priv
);
13159 } catch (std::bad_alloc
) {
13167 class C_Client_RequestInterrupt
: public Context
{
13172 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
13175 void finish(int r
) override
{
13176 Mutex::Locker
l(client
->client_lock
);
13177 assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
13178 client
->_interrupt_filelock(req
);
13179 client
->put_request(req
);
13183 void Client::ll_interrupt(void *d
)
13185 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
13186 ldout(cct
, 3) << "ll_interrupt tid " << req
->get_tid() << dendl
;
13187 tout(cct
) << "ll_interrupt tid " << req
->get_tid() << std::endl
;
13188 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
13191 // =========================================
13194 // expose file layouts
13196 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
13197 const UserPerm
& perms
)
13199 Mutex::Locker
lock(client_lock
);
13204 filepath
path(relpath
);
13206 int r
= path_walk(path
, &in
, perms
);
13212 ldout(cct
, 3) << "describe_layout(" << relpath
<< ") = 0" << dendl
;
13216 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
13218 Mutex::Locker
lock(client_lock
);
13223 Fh
*f
= get_filehandle(fd
);
13226 Inode
*in
= f
->inode
.get();
13230 ldout(cct
, 3) << "fdescribe_layout(" << fd
<< ") = 0" << dendl
;
13234 int64_t Client::get_default_pool_id()
13236 Mutex::Locker
lock(client_lock
);
13241 /* first data pool is the default */
13242 return mdsmap
->get_first_data_pool();
13247 int64_t Client::get_pool_id(const char *pool_name
)
13249 Mutex::Locker
lock(client_lock
);
13254 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
13258 string
Client::get_pool_name(int64_t pool
)
13260 Mutex::Locker
lock(client_lock
);
13265 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13266 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
13270 int Client::get_pool_replication(int64_t pool
)
13272 Mutex::Locker
lock(client_lock
);
13277 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13278 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -ENOENT
;
13282 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
13284 Mutex::Locker
lock(client_lock
);
13289 Fh
*f
= get_filehandle(fd
);
13292 Inode
*in
= f
->inode
.get();
13294 vector
<ObjectExtent
> extents
;
13295 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
13296 assert(extents
.size() == 1);
13298 objecter
->with_osdmap([&](const OSDMap
& o
) {
13299 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13300 o
.pg_to_acting_osds(pg
, osds
);
13307 * Return the remainder of the extent (stripe unit)
13309 * If length = 1 is passed to Striper::file_to_extents we get a single
13310 * extent back, but its length is one so we still need to compute the length
13311 * to the end of the stripe unit.
13313 * If length = su then we may get 1 or 2 objects back in the extents vector
13314 * which would have to be examined. Even then, the offsets are local to the
13315 * object, so matching up to the file offset is extra work.
13317 * It seems simpler to stick with length = 1 and manually compute the
13321 uint64_t su
= in
->layout
.stripe_unit
;
13322 *len
= su
- (off
% su
);
13328 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
13330 Mutex::Locker
lock(client_lock
);
13337 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13338 return o
.crush
->get_full_location_ordered(id
, path
);
13342 int Client::get_file_stripe_address(int fd
, loff_t offset
,
13343 vector
<entity_addr_t
>& address
)
13345 Mutex::Locker
lock(client_lock
);
13350 Fh
*f
= get_filehandle(fd
);
13353 Inode
*in
= f
->inode
.get();
13356 vector
<ObjectExtent
> extents
;
13357 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
13358 in
->truncate_size
, extents
);
13359 assert(extents
.size() == 1);
13361 // now we have the object and its 'layout'
13362 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13363 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13365 o
.pg_to_acting_osds(pg
, osds
);
13368 for (unsigned i
= 0; i
< osds
.size(); i
++) {
13369 entity_addr_t addr
= o
.get_addr(osds
[i
]);
13370 address
.push_back(addr
);
13376 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
13378 Mutex::Locker
lock(client_lock
);
13383 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13384 if (!o
.exists(osd
))
13387 addr
= o
.get_addr(osd
);
13392 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
13393 loff_t length
, loff_t offset
)
13395 Mutex::Locker
lock(client_lock
);
13400 Fh
*f
= get_filehandle(fd
);
13403 Inode
*in
= f
->inode
.get();
13405 // map to a list of extents
13406 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
13408 ldout(cct
, 3) << "enumerate_layout(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
13413 /* find an osd with the same ip. -ENXIO if none. */
13414 int Client::get_local_osd()
13416 Mutex::Locker
lock(client_lock
);
13421 objecter
->with_osdmap([this](const OSDMap
& o
) {
13422 if (o
.get_epoch() != local_osd_epoch
) {
13423 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddr());
13424 local_osd_epoch
= o
.get_epoch();
13435 // ===============================
13437 void Client::ms_handle_connect(Connection
*con
)
13439 ldout(cct
, 10) << "ms_handle_connect on " << con
->get_peer_addr() << dendl
;
13442 bool Client::ms_handle_reset(Connection
*con
)
13444 ldout(cct
, 0) << "ms_handle_reset on " << con
->get_peer_addr() << dendl
;
13448 void Client::ms_handle_remote_reset(Connection
*con
)
13450 ldout(cct
, 0) << "ms_handle_remote_reset on " << con
->get_peer_addr() << dendl
;
13451 Mutex::Locker
l(client_lock
);
13452 switch (con
->get_peer_type()) {
13453 case CEPH_ENTITY_TYPE_MDS
:
13455 // kludge to figure out which mds this is; fixme with a Connection* state
13456 mds_rank_t mds
= MDS_RANK_NONE
;
13457 MetaSession
*s
= NULL
;
13458 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
13459 p
!= mds_sessions
.end();
13461 if (mdsmap
->get_addr(p
->first
) == con
->get_peer_addr()) {
13467 assert (s
!= NULL
);
13468 switch (s
->state
) {
13469 case MetaSession::STATE_CLOSING
:
13470 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
13471 _closed_mds_session(s
);
13474 case MetaSession::STATE_OPENING
:
13476 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
13477 list
<Context
*> waiters
;
13478 waiters
.swap(s
->waiting_for_open
);
13479 _closed_mds_session(s
);
13480 MetaSession
*news
= _get_or_open_mds_session(mds
);
13481 news
->waiting_for_open
.swap(waiters
);
13485 case MetaSession::STATE_OPEN
:
13487 objecter
->maybe_request_map(); /* to check if we are blacklisted */
13488 const md_config_t
*conf
= cct
->_conf
;
13489 if (conf
->client_reconnect_stale
) {
13490 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
13491 _closed_mds_session(s
);
13493 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
13494 s
->state
= MetaSession::STATE_STALE
;
13499 case MetaSession::STATE_NEW
:
13500 case MetaSession::STATE_CLOSED
:
13510 bool Client::ms_handle_refused(Connection
*con
)
13512 ldout(cct
, 1) << "ms_handle_refused on " << con
->get_peer_addr() << dendl
;
13516 bool Client::ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
)
13518 if (dest_type
== CEPH_ENTITY_TYPE_MON
)
13520 *authorizer
= monclient
->build_authorizer(dest_type
);
13524 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
13527 utime_t now
= ceph_clock_now();
13530 if (cur
!= in
&& cur
->quota
.is_enable())
13533 Inode
*parent_in
= NULL
;
13534 if (!cur
->dn_set
.empty()) {
13535 for (auto p
= cur
->dn_set
.begin(); p
!= cur
->dn_set
.end(); ++p
) {
13537 if (dn
->lease_mds
>= 0 &&
13538 dn
->lease_ttl
> now
&&
13539 mds_sessions
.count(dn
->lease_mds
)) {
13540 parent_in
= dn
->dir
->parent_inode
;
13542 Inode
*diri
= dn
->dir
->parent_inode
;
13543 if (diri
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
13544 diri
->shared_gen
== dn
->cap_shared_gen
) {
13545 parent_in
= dn
->dir
->parent_inode
;
13551 } else if (root_parents
.count(cur
)) {
13552 parent_in
= root_parents
[cur
].get();
13560 if (cur
== root_ancestor
)
13564 if (cur
->nlink
== 0) {
13565 cur
= root_ancestor
;
13569 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
13570 filepath
path(cur
->ino
);
13571 req
->set_filepath(path
);
13572 req
->set_inode(cur
);
13574 InodeRef parent_ref
;
13575 int ret
= make_request(req
, perms
, &parent_ref
);
13577 ldout(cct
, 1) << __func__
<< " " << in
->vino()
13578 << " failed to find parent of " << cur
->vino()
13579 << " err " << ret
<< dendl
;
13580 // FIXME: what to do?
13581 cur
= root_ancestor
;
13585 now
= ceph_clock_now();
13587 cur
= parent_ref
.get();
13589 cur
= in
; // start over
13592 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << cur
->vino() << dendl
;
13597 * Traverse quota ancestors of the Inode, return true
13598 * if any of them passes the passed function
13600 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
13601 std::function
<bool (const Inode
&in
)> test
)
13604 assert(in
!= NULL
);
13609 if (in
== root_ancestor
) {
13610 // We're done traversing, drop out
13613 // Continue up the tree
13614 in
= get_quota_root(in
, perms
);
13621 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
13623 return check_quota_condition(in
, perms
,
13624 [](const Inode
&in
) {
13625 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
13629 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
13630 const UserPerm
& perms
,
13631 std::list
<InodeRef
>* quota_roots
)
13633 return check_quota_condition(in
, perms
,
13634 [&new_bytes
, quota_roots
](const Inode
&in
) {
13636 quota_roots
->emplace_back(const_cast<Inode
*>(&in
));
13637 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
13638 > in
.quota
.max_bytes
;
13642 bool Client::is_quota_bytes_approaching(Inode
*in
, std::list
<InodeRef
>& quota_roots
)
13644 assert(in
->size
>= in
->reported_size
);
13645 const uint64_t size
= in
->size
- in
->reported_size
;
13647 for (auto& diri
: quota_roots
) {
13648 if (diri
->quota
.max_bytes
) {
13649 if (diri
->rstat
.rbytes
>= diri
->quota
.max_bytes
)
13652 uint64_t space
= diri
->quota
.max_bytes
- diri
->rstat
.rbytes
;
13653 if ((space
>> 4) < size
)
13667 int Client::check_pool_perm(Inode
*in
, int need
)
13669 if (!cct
->_conf
->client_check_pool_perm
)
13672 int64_t pool_id
= in
->layout
.pool_id
;
13673 std::string pool_ns
= in
->layout
.pool_ns
;
13674 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
13677 auto it
= pool_perms
.find(perm_key
);
13678 if (it
== pool_perms
.end())
13680 if (it
->second
== POOL_CHECKING
) {
13681 // avoid concurrent checkings
13682 wait_on_list(waiting_for_pool_perm
);
13685 assert(have
& POOL_CHECKED
);
13691 if (in
->snapid
!= CEPH_NOSNAP
) {
13692 // pool permission check needs to write to the first object. But for snapshot,
13693 // head of the first object may have alread been deleted. To avoid creating
13694 // orphan object, skip the check for now.
13698 pool_perms
[perm_key
] = POOL_CHECKING
;
13701 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
13702 object_t oid
= oid_buf
;
13704 SnapContext nullsnapc
;
13706 C_SaferCond rd_cond
;
13707 ObjectOperation rd_op
;
13708 rd_op
.stat(NULL
, (ceph::real_time
*)nullptr, NULL
);
13710 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
13711 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
13713 C_SaferCond wr_cond
;
13714 ObjectOperation wr_op
;
13715 wr_op
.create(true);
13717 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
13718 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
13720 client_lock
.Unlock();
13721 int rd_ret
= rd_cond
.wait();
13722 int wr_ret
= wr_cond
.wait();
13723 client_lock
.Lock();
13725 bool errored
= false;
13727 if (rd_ret
== 0 || rd_ret
== -ENOENT
)
13729 else if (rd_ret
!= -EPERM
) {
13730 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13731 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13735 if (wr_ret
== 0 || wr_ret
== -EEXIST
)
13736 have
|= POOL_WRITE
;
13737 else if (wr_ret
!= -EPERM
) {
13738 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13739 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13744 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
13745 // Raise EIO because actual error code might be misleading for
13746 // userspace filesystem user.
13747 pool_perms
.erase(perm_key
);
13748 signal_cond_list(waiting_for_pool_perm
);
13752 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
13753 signal_cond_list(waiting_for_pool_perm
);
13756 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
13757 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13758 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
13761 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
13762 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13763 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
13770 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
13772 if (acl_type
== POSIX_ACL
) {
13773 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13774 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13776 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
13782 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
13784 if (acl_type
== NO_ACL
)
13787 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
13791 if (acl_type
== POSIX_ACL
) {
13792 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13793 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13794 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
13795 r
= posix_acl_access_chmod(acl
, mode
);
13798 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
13804 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
13808 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
13809 const UserPerm
& perms
)
13811 if (acl_type
== NO_ACL
)
13814 if (S_ISLNK(*mode
))
13817 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
13821 if (acl_type
== POSIX_ACL
) {
13822 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
13823 map
<string
, bufferptr
> xattrs
;
13825 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
13826 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
13827 r
= posix_acl_inherit_mode(acl
, mode
);
13832 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
13836 xattrs
[ACL_EA_ACCESS
] = acl
;
13839 if (S_ISDIR(*mode
))
13840 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
13844 ::encode(xattrs
, xattrs_bl
);
13847 *mode
&= ~umask_cb(callback_handle
);
13852 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
13856 void Client::set_filer_flags(int flags
)
13858 Mutex::Locker
l(client_lock
);
13859 assert(flags
== 0 ||
13860 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13861 objecter
->add_global_op_flags(flags
);
13864 void Client::clear_filer_flags(int flags
)
13866 Mutex::Locker
l(client_lock
);
13867 assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13868 objecter
->clear_global_op_flag(flags
);
13872 * This is included in cap release messages, to cause
13873 * the MDS to wait until this OSD map epoch. It is necessary
13874 * in corner cases where we cancel RADOS ops, so that
13875 * nobody else tries to do IO to the same objects in
13876 * the same epoch as the cancelled ops.
13878 void Client::set_cap_epoch_barrier(epoch_t e
)
13880 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
13881 cap_epoch_barrier
= e
;
13884 const char** Client::get_tracked_conf_keys() const
13886 static const char* keys
[] = {
13887 "client_cache_size",
13888 "client_cache_mid",
13890 "client_deleg_timeout",
13891 "client_deleg_break_on_open",
13897 void Client::handle_conf_change(const struct md_config_t
*conf
,
13898 const std::set
<std::string
> &changed
)
13900 Mutex::Locker
lock(client_lock
);
13902 if (changed
.count("client_cache_mid")) {
13903 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
13905 if (changed
.count("client_acl_type")) {
13907 if (cct
->_conf
->client_acl_type
== "posix_acl")
13908 acl_type
= POSIX_ACL
;
13912 void intrusive_ptr_add_ref(Inode
*in
)
13917 void intrusive_ptr_release(Inode
*in
)
13919 in
->client
->put_inode(in
);
13922 mds_rank_t
Client::_get_random_up_mds() const
13924 assert(client_lock
.is_locked_by_me());
13926 std::set
<mds_rank_t
> up
;
13927 mdsmap
->get_up_mds_set(up
);
13930 return MDS_RANK_NONE
;
13931 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
13932 for (int n
= rand() % up
.size(); n
; n
--)
13938 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
)
13939 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, NULL
, 0, 0))
13941 monclient
->set_messenger(m
);
13942 objecter
->set_client_incarnation(0);
13945 StandaloneClient::~StandaloneClient()
13948 objecter
= nullptr;
13951 int StandaloneClient::init()
13954 objectcacher
->start();
13957 client_lock
.Lock();
13958 assert(!initialized
);
13960 messenger
->add_dispatcher_tail(objecter
);
13961 messenger
->add_dispatcher_tail(this);
13963 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
13964 int r
= monclient
->init();
13966 // need to do cleanup because we're in an intermediate init state
13968 client_lock
.Unlock();
13969 objecter
->shutdown();
13970 objectcacher
->stop();
13971 monclient
->shutdown();
13976 client_lock
.Unlock();
13982 void StandaloneClient::shutdown()
13984 Client::shutdown();
13985 objecter
->shutdown();
13986 monclient
->shutdown();