1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
22 #include <sys/param.h>
25 #include <sys/utsname.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/fusion/include/std_pair.hpp>
31 #if defined(__FreeBSD__)
32 #define XATTR_CREATE 0x1
33 #define XATTR_REPLACE 0x2
35 #include <sys/xattr.h>
38 #if defined(__linux__)
39 #include <linux/falloc.h>
42 #include <sys/statvfs.h>
44 #include "common/config.h"
45 #include "common/version.h"
48 #include "messages/MClientSession.h"
49 #include "messages/MClientReconnect.h"
50 #include "messages/MClientRequest.h"
51 #include "messages/MClientRequestForward.h"
52 #include "messages/MClientReply.h"
53 #include "messages/MClientCaps.h"
54 #include "messages/MClientLease.h"
55 #include "messages/MClientSnap.h"
56 #include "messages/MCommandReply.h"
57 #include "messages/MOSDMap.h"
58 #include "messages/MClientQuota.h"
59 #include "messages/MClientCapRelease.h"
60 #include "messages/MMDSMap.h"
61 #include "messages/MFSMap.h"
62 #include "messages/MFSMapUser.h"
64 #include "mon/MonClient.h"
66 #include "mds/flock.h"
67 #include "osd/OSDMap.h"
68 #include "osdc/Filer.h"
70 #include "common/Cond.h"
71 #include "common/Mutex.h"
72 #include "common/perf_counters.h"
73 #include "common/admin_socket.h"
74 #include "common/errno.h"
75 #include "include/str_list.h"
77 #define dout_subsys ceph_subsys_client
79 #include "include/lru.h"
80 #include "include/compat.h"
81 #include "include/stringify.h"
86 #include "Delegation.h"
88 #include "ClientSnapRealm.h"
90 #include "MetaSession.h"
91 #include "MetaRequest.h"
92 #include "ObjecterWriteback.h"
93 #include "posix_acl.h"
95 #include "include/assert.h"
96 #include "include/stat.h"
98 #include "include/cephfs/ceph_statx.h"
100 #if HAVE_GETGROUPLIST
107 #define dout_prefix *_dout << "client." << whoami << " "
109 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
111 // FreeBSD fails to define this
115 // Darwin fails to define this
124 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
126 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
128 Client
*client
= static_cast<Client
*>(p
);
129 client
->flush_set_callback(oset
);
135 Client::CommandHook::CommandHook(Client
*client
) :
140 bool Client::CommandHook::call(std::string command
, cmdmap_t
& cmdmap
,
141 std::string format
, bufferlist
& out
)
143 Formatter
*f
= Formatter::create(format
);
144 f
->open_object_section("result");
145 m_client
->client_lock
.Lock();
146 if (command
== "mds_requests")
147 m_client
->dump_mds_requests(f
);
148 else if (command
== "mds_sessions")
149 m_client
->dump_mds_sessions(f
);
150 else if (command
== "dump_cache")
151 m_client
->dump_cache(f
);
152 else if (command
== "kick_stale_sessions")
153 m_client
->_kick_stale_sessions();
154 else if (command
== "status")
155 m_client
->dump_status(f
);
157 assert(0 == "bad command registered");
158 m_client
->client_lock
.Unlock();
168 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
169 : inode(in
), offset(0), next_offset(2),
170 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
174 void Client::_reset_faked_inos()
177 free_faked_inos
.clear();
178 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
179 last_used_faked_ino
= 0;
180 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
183 void Client::_assign_faked_ino(Inode
*in
)
185 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
186 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
187 last_used_faked_ino
= 0;
188 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
190 assert(it
!= free_faked_inos
.end());
191 if (last_used_faked_ino
< it
.get_start()) {
192 assert(it
.get_len() > 0);
193 last_used_faked_ino
= it
.get_start();
195 ++last_used_faked_ino
;
196 assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
198 in
->faked_ino
= last_used_faked_ino
;
199 free_faked_inos
.erase(in
->faked_ino
);
200 faked_ino_map
[in
->faked_ino
] = in
->vino();
203 void Client::_release_faked_ino(Inode
*in
)
205 free_faked_inos
.insert(in
->faked_ino
);
206 faked_ino_map
.erase(in
->faked_ino
);
209 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
214 else if (faked_ino_map
.count(ino
))
215 vino
= faked_ino_map
[ino
];
217 vino
= vinodeno_t(0, CEPH_NOSNAP
);
218 ldout(cct
, 10) << "map_faked_ino " << ino
<< " -> " << vino
<< dendl
;
222 vinodeno_t
Client::map_faked_ino(ino_t ino
)
224 Mutex::Locker
lock(client_lock
);
225 return _map_faked_ino(ino
);
230 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
231 : Dispatcher(m
->cct
),
232 m_command_hook(this),
233 timer(m
->cct
, client_lock
),
234 callback_handle(NULL
),
235 switch_interrupt_cb(NULL
),
237 ino_invalidate_cb(NULL
),
238 dentry_invalidate_cb(NULL
),
240 can_invalidate_dentries(false),
241 async_ino_invalidator(m
->cct
),
242 async_dentry_invalidator(m
->cct
),
243 interrupt_finisher(m
->cct
),
244 remount_finisher(m
->cct
),
245 objecter_finisher(m
->cct
),
247 messenger(m
), monclient(mc
),
249 whoami(mc
->get_global_id()), cap_epoch_barrier(0),
250 last_tid(0), oldest_tid(0), last_flush_tid(1),
252 mounted(false), unmounting(false), blacklisted(false),
253 local_osd(-ENXIO
), local_osd_epoch(0),
254 unsafe_sync_write(0),
255 client_lock("Client::client_lock"),
262 num_flushing_caps
= 0;
264 _dir_vxattrs_name_size
= _vxattrs_calcu_name_size(_dir_vxattrs
);
265 _file_vxattrs_name_size
= _vxattrs_calcu_name_size(_file_vxattrs
);
267 user_id
= cct
->_conf
->client_mount_uid
;
268 group_id
= cct
->_conf
->client_mount_gid
;
271 if (cct
->_conf
->client_acl_type
== "posix_acl")
272 acl_type
= POSIX_ACL
;
274 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
277 free_fd_set
.insert(10, 1<<30);
279 mdsmap
.reset(new MDSMap
);
282 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
284 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
285 client_flush_set_callback
, // all commit callback
287 cct
->_conf
->client_oc_size
,
288 cct
->_conf
->client_oc_max_objects
,
289 cct
->_conf
->client_oc_max_dirty
,
290 cct
->_conf
->client_oc_target_dirty
,
291 cct
->_conf
->client_oc_max_dirty_age
,
293 objecter_finisher
.start();
294 filer
.reset(new Filer(objecter
, &objecter_finisher
));
295 objecter
->enable_blacklist_events();
301 assert(!client_lock
.is_locked());
303 // It is necessary to hold client_lock, because any inode destruction
304 // may call into ObjectCacher, which asserts that it's lock (which is
305 // client_lock) is held.
308 client_lock
.Unlock();
311 void Client::tear_down_cache()
314 for (ceph::unordered_map
<int, Fh
*>::iterator it
= fd_map
.begin();
318 ldout(cct
, 1) << "tear_down_cache forcing close of fh " << it
->first
<< " ino " << fh
->inode
->ino
<< dendl
;
323 while (!opened_dirs
.empty()) {
324 dir_result_t
*dirp
= *opened_dirs
.begin();
325 ldout(cct
, 1) << "tear_down_cache forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
334 assert(lru
.lru_get_size() == 0);
337 assert(inode_map
.size() <= 1 + root_parents
.size());
338 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
342 while (!root_parents
.empty())
343 root_parents
.erase(root_parents
.begin());
348 assert(inode_map
.empty());
351 inodeno_t
Client::get_root_ino()
353 Mutex::Locker
l(client_lock
);
354 if (use_faked_inos())
355 return root
->faked_ino
;
360 Inode
*Client::get_root()
362 Mutex::Locker
l(client_lock
);
370 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
373 in
->make_long_path(path
);
374 ldout(cct
, 1) << "dump_inode: "
375 << (disconnected
? "DISCONNECTED ":"")
376 << "inode " << in
->ino
378 << " ref " << in
->get_num_ref()
382 f
->open_object_section("inode");
383 f
->dump_stream("path") << path
;
385 f
->dump_int("disconnected", 1);
392 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
393 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
394 it
!= in
->dir
->dentries
.end();
396 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
398 f
->open_object_section("dentry");
402 if (it
->second
->inode
)
403 dump_inode(f
, it
->second
->inode
.get(), did
, false);
408 void Client::dump_cache(Formatter
*f
)
412 ldout(cct
, 1) << "dump_cache" << dendl
;
415 f
->open_array_section("cache");
418 dump_inode(f
, root
, did
, true);
420 // make a second pass to catch anything disconnected
421 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
422 it
!= inode_map
.end();
424 if (did
.count(it
->second
))
426 dump_inode(f
, it
->second
, did
, true);
433 void Client::dump_status(Formatter
*f
)
435 assert(client_lock
.is_locked_by_me());
437 ldout(cct
, 1) << __func__
<< dendl
;
439 const epoch_t osd_epoch
440 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
443 f
->open_object_section("metadata");
444 for (const auto& kv
: metadata
)
445 f
->dump_string(kv
.first
.c_str(), kv
.second
);
448 f
->dump_int("dentry_count", lru
.lru_get_size());
449 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
450 f
->dump_int("id", get_nodeid().v
);
451 entity_inst_t
inst(messenger
->get_myname(), messenger
->get_myaddr());
452 f
->dump_object("inst", inst
);
453 f
->dump_stream("inst_str") << inst
;
454 f
->dump_stream("addr_str") << inst
.addr
;
455 f
->dump_int("inode_count", inode_map
.size());
456 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
457 f
->dump_int("osd_epoch", osd_epoch
);
458 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
465 objectcacher
->start();
468 assert(!initialized
);
470 messenger
->add_dispatcher_tail(this);
471 client_lock
.Unlock();
477 void Client::_finish_init()
481 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
482 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
483 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
484 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
485 logger
.reset(plb
.create_perf_counters());
486 cct
->get_perfcounters_collection()->add(logger
.get());
488 client_lock
.Unlock();
490 cct
->_conf
->add_observer(this);
492 AdminSocket
* admin_socket
= cct
->get_admin_socket();
493 int ret
= admin_socket
->register_command("mds_requests",
496 "show in-progress mds requests");
498 lderr(cct
) << "error registering admin socket command: "
499 << cpp_strerror(-ret
) << dendl
;
501 ret
= admin_socket
->register_command("mds_sessions",
504 "show mds session state");
506 lderr(cct
) << "error registering admin socket command: "
507 << cpp_strerror(-ret
) << dendl
;
509 ret
= admin_socket
->register_command("dump_cache",
512 "show in-memory metadata cache contents");
514 lderr(cct
) << "error registering admin socket command: "
515 << cpp_strerror(-ret
) << dendl
;
517 ret
= admin_socket
->register_command("kick_stale_sessions",
518 "kick_stale_sessions",
520 "kick sessions that were remote reset");
522 lderr(cct
) << "error registering admin socket command: "
523 << cpp_strerror(-ret
) << dendl
;
525 ret
= admin_socket
->register_command("status",
528 "show overall client status");
530 lderr(cct
) << "error registering admin socket command: "
531 << cpp_strerror(-ret
) << dendl
;
536 client_lock
.Unlock();
539 void Client::shutdown()
541 ldout(cct
, 1) << "shutdown" << dendl
;
543 // If we were not mounted, but were being used for sending
544 // MDS commands, we may have sessions that need closing.
547 client_lock
.Unlock();
549 cct
->_conf
->remove_observer(this);
551 AdminSocket
* admin_socket
= cct
->get_admin_socket();
552 admin_socket
->unregister_command("mds_requests");
553 admin_socket
->unregister_command("mds_sessions");
554 admin_socket
->unregister_command("dump_cache");
555 admin_socket
->unregister_command("kick_stale_sessions");
556 admin_socket
->unregister_command("status");
558 if (ino_invalidate_cb
) {
559 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
560 async_ino_invalidator
.wait_for_empty();
561 async_ino_invalidator
.stop();
564 if (dentry_invalidate_cb
) {
565 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
566 async_dentry_invalidator
.wait_for_empty();
567 async_dentry_invalidator
.stop();
570 if (switch_interrupt_cb
) {
571 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
572 interrupt_finisher
.wait_for_empty();
573 interrupt_finisher
.stop();
577 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
578 remount_finisher
.wait_for_empty();
579 remount_finisher
.stop();
582 objectcacher
->stop(); // outside of client_lock! this does a join.
588 client_lock
.Unlock();
590 objecter_finisher
.wait_for_empty();
591 objecter_finisher
.stop();
594 cct
->get_perfcounters_collection()->remove(logger
.get());
600 // ===================
601 // metadata cache stuff
603 void Client::trim_cache(bool trim_kernel_dcache
)
605 uint64_t max
= cct
->_conf
->client_cache_size
;
606 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << max
<< dendl
;
608 while (lru
.lru_get_size() != last
) {
609 last
= lru
.lru_get_size();
611 if (!unmounting
&& lru
.lru_get_size() <= max
) break;
614 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
621 if (trim_kernel_dcache
&& lru
.lru_get_size() > max
)
622 _invalidate_kernel_dcache();
625 if (lru
.lru_get_size() == 0 && root
&& root
->get_num_ref() == 0 && inode_map
.size() == 1 + root_parents
.size()) {
626 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
630 while (!root_parents
.empty())
631 root_parents
.erase(root_parents
.begin());
637 void Client::trim_cache_for_reconnect(MetaSession
*s
)
639 mds_rank_t mds
= s
->mds_num
;
640 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
<< dendl
;
643 list
<Dentry
*> skipped
;
644 while (lru
.lru_get_size() > 0) {
645 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
649 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
650 dn
->dir
->parent_inode
->caps
.count(mds
)) {
654 skipped
.push_back(dn
);
657 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
658 lru
.lru_insert_mid(*p
);
660 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
661 << " trimmed " << trimmed
<< " dentries" << dendl
;
663 if (s
->caps
.size() > 0)
664 _invalidate_kernel_dcache();
667 void Client::trim_dentry(Dentry
*dn
)
669 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
670 << " in dir " << hex
<< dn
->dir
->parent_inode
->ino
673 Inode
*diri
= dn
->dir
->parent_inode
;
674 diri
->dir_release_count
++;
675 clear_dir_complete_and_ordered(diri
, true);
677 unlink(dn
, false, false); // drop dir, drop dentry
681 void Client::update_inode_file_size(Inode
*in
, int issued
, uint64_t size
,
682 uint64_t truncate_seq
, uint64_t truncate_size
)
684 uint64_t prior_size
= in
->size
;
686 if (truncate_seq
> in
->truncate_seq
||
687 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
688 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
690 in
->reported_size
= size
;
691 if (truncate_seq
!= in
->truncate_seq
) {
692 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
693 << truncate_seq
<< dendl
;
694 in
->truncate_seq
= truncate_seq
;
695 in
->oset
.truncate_seq
= truncate_seq
;
697 // truncate cached file data
698 if (prior_size
> size
) {
699 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
703 // truncate inline data
704 if (in
->inline_version
< CEPH_INLINE_NONE
) {
705 uint32_t len
= in
->inline_data
.length();
707 in
->inline_data
.splice(size
, len
- size
);
710 if (truncate_seq
>= in
->truncate_seq
&&
711 in
->truncate_size
!= truncate_size
) {
713 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
714 << truncate_size
<< dendl
;
715 in
->truncate_size
= truncate_size
;
716 in
->oset
.truncate_size
= truncate_size
;
718 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
723 void Client::update_inode_file_time(Inode
*in
, int issued
, uint64_t time_warp_seq
,
724 utime_t ctime
, utime_t mtime
, utime_t atime
)
726 ldout(cct
, 10) << __func__
<< " " << *in
<< " " << ccap_string(issued
)
727 << " ctime " << ctime
<< " mtime " << mtime
<< dendl
;
729 if (time_warp_seq
> in
->time_warp_seq
)
730 ldout(cct
, 10) << " mds time_warp_seq " << time_warp_seq
731 << " is higher than local time_warp_seq "
732 << in
->time_warp_seq
<< dendl
;
735 // be careful with size, mtime, atime
736 if (issued
& (CEPH_CAP_FILE_EXCL
|
738 CEPH_CAP_FILE_BUFFER
|
740 CEPH_CAP_XATTR_EXCL
)) {
741 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
742 if (ctime
> in
->ctime
)
744 if (time_warp_seq
> in
->time_warp_seq
) {
745 //the mds updated times, so take those!
748 in
->time_warp_seq
= time_warp_seq
;
749 } else if (time_warp_seq
== in
->time_warp_seq
) {
751 if (mtime
> in
->mtime
)
753 if (atime
> in
->atime
)
755 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
756 //ignore mds values as we have a higher seq
759 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
760 if (time_warp_seq
>= in
->time_warp_seq
) {
764 in
->time_warp_seq
= time_warp_seq
;
768 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
769 << time_warp_seq
<< " is lower than local time_warp_seq "
775 void Client::_fragmap_remove_non_leaves(Inode
*in
)
777 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
778 if (!in
->dirfragtree
.is_leaf(p
->first
))
779 in
->fragmap
.erase(p
++);
784 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
786 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
787 if (p
->second
== mds
)
788 in
->fragmap
.erase(p
++);
793 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
794 MetaSession
*session
,
795 const UserPerm
& request_perms
)
798 bool was_new
= false;
799 if (inode_map
.count(st
->vino
)) {
800 in
= inode_map
[st
->vino
];
801 ldout(cct
, 12) << "add_update_inode had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
803 in
= new Inode(this, st
->vino
, &st
->layout
);
804 inode_map
[st
->vino
] = in
;
806 if (use_faked_inos())
807 _assign_faked_ino(in
);
813 } else if (!mounted
) {
814 root_parents
[root_ancestor
] = in
;
819 in
->ino
= st
->vino
.ino
;
820 in
->snapid
= st
->vino
.snapid
;
821 in
->mode
= st
->mode
& S_IFMT
;
826 if (in
->is_symlink())
827 in
->symlink
= st
->symlink
;
829 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
830 bool new_version
= false;
831 if (in
->version
== 0 ||
832 ((st
->cap
.flags
& CEPH_CAP_FLAG_AUTH
) &&
833 (in
->version
& ~1) < st
->version
))
837 in
->caps_issued(&issued
);
838 issued
|= in
->caps_dirty();
839 int new_issued
= ~issued
& (int)st
->cap
.caps
;
841 if ((new_version
|| (new_issued
& CEPH_CAP_AUTH_SHARED
)) &&
842 !(issued
& CEPH_CAP_AUTH_EXCL
)) {
846 in
->btime
= st
->btime
;
849 if ((new_version
|| (new_issued
& CEPH_CAP_LINK_SHARED
)) &&
850 !(issued
& CEPH_CAP_LINK_EXCL
)) {
851 in
->nlink
= st
->nlink
;
854 if (new_version
|| (new_issued
& CEPH_CAP_ANY_RD
)) {
855 update_inode_file_time(in
, issued
, st
->time_warp_seq
,
856 st
->ctime
, st
->mtime
, st
->atime
);
860 (new_issued
& (CEPH_CAP_ANY_FILE_RD
| CEPH_CAP_ANY_FILE_WR
))) {
861 in
->layout
= st
->layout
;
862 update_inode_file_size(in
, issued
, st
->size
, st
->truncate_seq
, st
->truncate_size
);
866 if (new_version
|| (new_issued
& CEPH_CAP_FILE_SHARED
)) {
867 in
->dirstat
= st
->dirstat
;
869 // dir_layout/rstat/quota are not tracked by capability, update them only if
870 // the inode stat is from auth mds
871 if (new_version
|| (st
->cap
.flags
& CEPH_CAP_FLAG_AUTH
)) {
872 in
->dir_layout
= st
->dir_layout
;
873 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
874 in
->rstat
= st
->rstat
;
875 in
->quota
= st
->quota
;
877 // move me if/when version reflects fragtree changes.
878 if (in
->dirfragtree
!= st
->dirfragtree
) {
879 in
->dirfragtree
= st
->dirfragtree
;
880 _fragmap_remove_non_leaves(in
);
884 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
885 st
->xattrbl
.length() &&
886 st
->xattr_version
> in
->xattr_version
) {
887 bufferlist::iterator p
= st
->xattrbl
.begin();
888 ::decode(in
->xattrs
, p
);
889 in
->xattr_version
= st
->xattr_version
;
892 if (st
->inline_version
> in
->inline_version
) {
893 in
->inline_data
= st
->inline_data
;
894 in
->inline_version
= st
->inline_version
;
897 /* always take a newer change attr */
898 if (st
->change_attr
> in
->change_attr
)
899 in
->change_attr
= st
->change_attr
;
901 if (st
->version
> in
->version
)
902 in
->version
= st
->version
;
905 ldout(cct
, 12) << __func__
<< " adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
908 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
910 if (in
->snapid
== CEPH_NOSNAP
) {
911 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.seq
,
912 st
->cap
.mseq
, inodeno_t(st
->cap
.realm
), st
->cap
.flags
,
914 if (in
->auth_cap
&& in
->auth_cap
->session
== session
) {
915 in
->max_size
= st
->max_size
;
916 in
->rstat
= st
->rstat
;
919 // setting I_COMPLETE needs to happen after adding the cap
921 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
922 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
923 in
->dirstat
.nfiles
== 0 &&
924 in
->dirstat
.nsubdirs
== 0) {
925 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
926 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
928 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
929 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
930 in
->dir
->readdir_cache
.clear();
931 for (const auto& p
: in
->dir
->dentries
) {
932 unlink(p
.second
, true, true); // keep dir, keep dentry
934 if (in
->dir
->dentries
.empty())
939 in
->snap_caps
|= st
->cap
.caps
;
947 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
949 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
950 Inode
*in
, utime_t from
, MetaSession
*session
,
954 if (dir
->dentries
.count(dname
))
955 dn
= dir
->dentries
[dname
];
957 ldout(cct
, 12) << "insert_dentry_inode '" << dname
<< "' vino " << in
->vino()
958 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
961 if (dn
&& dn
->inode
) {
962 if (dn
->inode
->vino() == in
->vino()) {
964 ldout(cct
, 12) << " had dentry " << dname
965 << " with correct vino " << dn
->inode
->vino()
968 ldout(cct
, 12) << " had dentry " << dname
969 << " with WRONG vino " << dn
->inode
->vino()
971 unlink(dn
, true, true); // keep dir, keep dentry
975 if (!dn
|| !dn
->inode
) {
976 InodeRef
tmp_ref(in
);
978 if (old_dentry
->dir
!= dir
) {
979 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
980 old_diri
->dir_ordered_count
++;
981 clear_dir_complete_and_ordered(old_diri
, false);
983 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
985 Inode
*diri
= dir
->parent_inode
;
986 diri
->dir_ordered_count
++;
987 clear_dir_complete_and_ordered(diri
, false);
988 dn
= link(dir
, dname
, in
, dn
);
991 update_dentry_lease(dn
, dlease
, from
, session
);
995 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
998 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
1002 if (dlease
->mask
& CEPH_LOCK_DN
) {
1003 if (dttl
> dn
->lease_ttl
) {
1004 ldout(cct
, 10) << "got dentry lease on " << dn
->name
1005 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
1006 dn
->lease_ttl
= dttl
;
1007 dn
->lease_mds
= session
->mds_num
;
1008 dn
->lease_seq
= dlease
->seq
;
1009 dn
->lease_gen
= session
->cap_gen
;
1012 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1017 * update MDS location cache for a single inode
1019 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1022 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1023 if (dst
->auth
>= 0) {
1024 in
->fragmap
[dst
->frag
] = dst
->auth
;
1026 in
->fragmap
.erase(dst
->frag
);
1028 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1029 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1030 _fragmap_remove_non_leaves(in
);
1034 in
->dir_replicated
= !dst
->dist
.empty(); // FIXME that's just one frag!
1038 if (!st->dirfrag_dist.empty()) { // FIXME
1039 set<int> dist = st->dirfrag_dist.begin()->second;
1040 if (dist.empty() && !in->dir_contacts.empty())
1041 ldout(cct, 9) << "lost dist spec for " << in->ino
1042 << " " << dist << dendl;
1043 if (!dist.empty() && in->dir_contacts.empty())
1044 ldout(cct, 9) << "got dist spec for " << in->ino
1045 << " " << dist << dendl;
1046 in->dir_contacts = dist;
1051 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1053 if (diri
->flags
& I_COMPLETE
) {
1055 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1056 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1058 if (diri
->flags
& I_DIR_ORDERED
) {
1059 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1060 diri
->flags
&= ~I_DIR_ORDERED
;
1064 diri
->dir
->readdir_cache
.clear();
1069 * insert results from readdir or lssnap into the metadata cache.
1071 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1073 MClientReply
*reply
= request
->reply
;
1074 ConnectionRef con
= request
->reply
->get_connection();
1075 uint64_t features
= con
->get_features();
1077 dir_result_t
*dirp
= request
->dirp
;
1080 // the extra buffer list is only set for readdir and lssnap replies
1081 bufferlist::iterator p
= reply
->get_extra_bl().begin();
1084 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1086 diri
= open_snapdir(diri
);
1089 // only open dir if we're actually adding stuff to it!
1090 Dir
*dir
= diri
->open_dir();
1100 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1101 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1103 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1104 unsigned readdir_offset
= dirp
->next_offset
;
1105 string readdir_start
= dirp
->last_name
;
1106 assert(!readdir_start
.empty() || readdir_offset
== 2);
1108 unsigned last_hash
= 0;
1110 if (!readdir_start
.empty()) {
1111 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1112 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1113 /* mds understands offset_hash */
1114 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1118 if (fg
!= dst
.frag
) {
1119 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1123 readdir_start
.clear();
1124 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1128 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1129 << ", hash_order=" << hash_order
1130 << ", readdir_start " << readdir_start
1131 << ", last_hash " << last_hash
1132 << ", next_offset " << readdir_offset
<< dendl
;
1134 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1135 fg
.is_leftmost() && readdir_offset
== 2 &&
1136 !(hash_order
&& last_hash
)) {
1137 dirp
->release_count
= diri
->dir_release_count
;
1138 dirp
->ordered_count
= diri
->dir_ordered_count
;
1139 dirp
->start_shared_gen
= diri
->shared_gen
;
1140 dirp
->cache_index
= 0;
1143 dirp
->buffer_frag
= fg
;
1145 _readdir_drop_dirp_buffer(dirp
);
1146 dirp
->buffer
.reserve(numdn
);
1150 for (unsigned i
=0; i
<numdn
; i
++) {
1152 ::decode(dlease
, p
);
1153 InodeStat
ist(p
, features
);
1155 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1157 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1160 if (diri
->dir
->dentries
.count(dname
)) {
1161 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1162 if (olddn
->inode
!= in
) {
1163 // replace incorrect dentry
1164 unlink(olddn
, true, true); // keep dir, dentry
1165 dn
= link(dir
, dname
, in
, olddn
);
1166 assert(dn
== olddn
);
1174 dn
= link(dir
, dname
, in
, NULL
);
1177 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1179 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1180 if (hash
!= last_hash
)
1183 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1185 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1187 // add to readdir cache
1188 if (dirp
->release_count
== diri
->dir_release_count
&&
1189 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1190 dirp
->start_shared_gen
== diri
->shared_gen
) {
1191 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1193 assert(!dirp
->inode
->is_complete_and_ordered());
1194 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1196 dir
->readdir_cache
.push_back(dn
);
1197 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1198 if (dirp
->inode
->is_complete_and_ordered())
1199 assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1201 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1203 assert(0 == "unexpected readdir buffer idx");
1205 dirp
->cache_index
++;
1207 // add to cached result list
1208 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, in
));
1209 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1213 dirp
->last_name
= dname
;
1215 dirp
->next_offset
= 2;
1217 dirp
->next_offset
= readdir_offset
;
1219 if (dir
->is_empty())
1226 * insert a trace from a MDS reply into the cache.
1228 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1230 MClientReply
*reply
= request
->reply
;
1231 int op
= request
->get_op();
1233 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1234 << " is_target=" << (int)reply
->head
.is_target
1235 << " is_dentry=" << (int)reply
->head
.is_dentry
1238 bufferlist::iterator p
= reply
->get_trace_bl().begin();
1239 if (request
->got_unsafe
) {
1240 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1246 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1248 Dentry
*d
= request
->dentry();
1250 Inode
*diri
= d
->dir
->parent_inode
;
1251 diri
->dir_release_count
++;
1252 clear_dir_complete_and_ordered(diri
, true);
1255 if (d
&& reply
->get_result() == 0) {
1256 if (op
== CEPH_MDS_OP_RENAME
) {
1258 Dentry
*od
= request
->old_dentry();
1259 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1261 unlink(od
, true, true); // keep dir, dentry
1262 } else if (op
== CEPH_MDS_OP_RMDIR
||
1263 op
== CEPH_MDS_OP_UNLINK
) {
1265 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1266 unlink(d
, true, true); // keep dir, dentry
1272 ConnectionRef con
= request
->reply
->get_connection();
1273 uint64_t features
= con
->get_features();
1274 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1277 SnapRealm
*realm
= NULL
;
1278 if (reply
->snapbl
.length())
1279 update_snap_trace(reply
->snapbl
, &realm
);
1281 ldout(cct
, 10) << " hrm "
1282 << " is_target=" << (int)reply
->head
.is_target
1283 << " is_dentry=" << (int)reply
->head
.is_dentry
1292 if (reply
->head
.is_dentry
) {
1293 dirst
.decode(p
, features
);
1296 ::decode(dlease
, p
);
1300 if (reply
->head
.is_target
) {
1301 ist
.decode(p
, features
);
1302 if (cct
->_conf
->client_debug_getattr_caps
) {
1303 unsigned wanted
= 0;
1304 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1305 wanted
= request
->head
.args
.getattr
.mask
;
1306 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1307 wanted
= request
->head
.args
.open
.mask
;
1309 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1310 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1311 assert(0 == "MDS reply does not contain xattrs");
1314 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1319 if (reply
->head
.is_dentry
) {
1320 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1322 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1325 Dir
*dir
= diri
->open_dir();
1326 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1327 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1330 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1331 dn
= diri
->dir
->dentries
[dname
];
1333 diri
->dir_ordered_count
++;
1334 clear_dir_complete_and_ordered(diri
, false);
1335 unlink(dn
, true, true); // keep dir, dentry
1338 if (dlease
.duration_ms
> 0) {
1340 Dir
*dir
= diri
->open_dir();
1341 dn
= link(dir
, dname
, NULL
, NULL
);
1343 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1346 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1347 op
== CEPH_MDS_OP_MKSNAP
) {
1348 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1349 // fake it for snap lookup
1350 vinodeno_t vino
= ist
.vino
;
1351 vino
.snapid
= CEPH_SNAPDIR
;
1352 assert(inode_map
.count(vino
));
1353 diri
= inode_map
[vino
];
1355 string dname
= request
->path
.last_dentry();
1358 dlease
.duration_ms
= 0;
1361 Dir
*dir
= diri
->open_dir();
1362 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1364 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1365 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1367 unlink(dn
, true, true); // keep dir, dentry
1373 if (op
== CEPH_MDS_OP_READDIR
||
1374 op
== CEPH_MDS_OP_LSSNAP
) {
1375 insert_readdir_results(request
, session
, in
);
1376 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1377 // hack: return parent inode instead
1381 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1382 // pin the target inode if its parent dentry is not pinned
1383 request
->set_other_inode(in
);
1388 put_snap_realm(realm
);
1390 request
->target
= in
;
1396 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1398 mds_rank_t mds
= MDS_RANK_NONE
;
1400 bool is_hash
= false;
1406 if (req
->resend_mds
>= 0) {
1407 mds
= req
->resend_mds
;
1408 req
->resend_mds
= -1;
1409 ldout(cct
, 10) << "choose_target_mds resend_mds specified as mds." << mds
<< dendl
;
1413 if (cct
->_conf
->client_use_random_mds
)
1419 ldout(cct
, 20) << "choose_target_mds starting with req->inode " << *in
<< dendl
;
1420 if (req
->path
.depth()) {
1421 hash
= in
->hash_dentry_name(req
->path
[0]);
1422 ldout(cct
, 20) << "choose_target_mds inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1423 << " on " << req
->path
[0]
1424 << " => " << hash
<< dendl
;
1429 in
= de
->inode
.get();
1430 ldout(cct
, 20) << "choose_target_mds starting with req->dentry inode " << *in
<< dendl
;
1432 in
= de
->dir
->parent_inode
;
1433 hash
= in
->hash_dentry_name(de
->name
);
1434 ldout(cct
, 20) << "choose_target_mds dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1435 << " on " << de
->name
1436 << " => " << hash
<< dendl
;
1441 if (in
->snapid
!= CEPH_NOSNAP
) {
1442 ldout(cct
, 10) << "choose_target_mds " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1443 while (in
->snapid
!= CEPH_NOSNAP
) {
1444 if (in
->snapid
== CEPH_SNAPDIR
)
1445 in
= in
->snapdir_parent
.get();
1446 else if (!in
->dn_set
.empty())
1447 /* In most cases there will only be one dentry, so getting it
1448 * will be the correct action. If there are multiple hard links,
1449 * I think the MDS should be able to redirect as needed*/
1450 in
= in
->get_first_parent()->dir
->parent_inode
;
1452 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1459 ldout(cct
, 20) << "choose_target_mds " << *in
<< " is_hash=" << is_hash
1460 << " hash=" << hash
<< dendl
;
1462 if (is_hash
&& S_ISDIR(in
->mode
) && !in
->fragmap
.empty()) {
1463 frag_t fg
= in
->dirfragtree
[hash
];
1464 if (in
->fragmap
.count(fg
)) {
1465 mds
= in
->fragmap
[fg
];
1468 } else if (in
->auth_cap
) {
1469 mds
= in
->auth_cap
->session
->mds_num
;
1472 ldout(cct
, 10) << "choose_target_mds from dirfragtree hash" << dendl
;
1477 if (req
->auth_is_best())
1479 if (!cap
&& !in
->caps
.empty())
1480 cap
= in
->caps
.begin()->second
;
1483 mds
= cap
->session
->mds_num
;
1484 ldout(cct
, 10) << "choose_target_mds from caps on inode " << *in
<< dendl
;
1491 mds
= _get_random_up_mds();
1492 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1496 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1501 void Client::connect_mds_targets(mds_rank_t mds
)
1503 ldout(cct
, 10) << "connect_mds_targets for mds." << mds
<< dendl
;
1504 assert(mds_sessions
.count(mds
));
1505 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1506 for (set
<mds_rank_t
>::const_iterator q
= info
.export_targets
.begin();
1507 q
!= info
.export_targets
.end();
1509 if (mds_sessions
.count(*q
) == 0 &&
1510 mdsmap
->is_clientreplay_or_active_or_stopping(*q
)) {
1511 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1512 << " export target mds." << *q
<< dendl
;
1513 _open_mds_session(*q
);
1518 void Client::dump_mds_sessions(Formatter
*f
)
1520 f
->dump_int("id", get_nodeid().v
);
1521 entity_inst_t
inst(messenger
->get_myname(), messenger
->get_myaddr());
1522 f
->dump_object("inst", inst
);
1523 f
->dump_stream("inst_str") << inst
;
1524 f
->dump_stream("addr_str") << inst
.addr
;
1525 f
->open_array_section("sessions");
1526 for (map
<mds_rank_t
,MetaSession
*>::const_iterator p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ++p
) {
1527 f
->open_object_section("session");
1532 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1534 void Client::dump_mds_requests(Formatter
*f
)
1536 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1537 p
!= mds_requests
.end();
1539 f
->open_object_section("request");
1545 int Client::verify_reply_trace(int r
,
1546 MetaRequest
*request
, MClientReply
*reply
,
1547 InodeRef
*ptarget
, bool *pcreated
,
1548 const UserPerm
& perms
)
1550 // check whether this request actually did the create, and set created flag
1551 bufferlist extra_bl
;
1552 inodeno_t created_ino
;
1553 bool got_created_ino
= false;
1554 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1556 extra_bl
.claim(reply
->get_extra_bl());
1557 if (extra_bl
.length() >= 8) {
1558 // if the extra bufferlist has a buffer, we assume its the created inode
1559 // and that this request to create succeeded in actually creating
1560 // the inode (won the race with other create requests)
1561 ::decode(created_ino
, extra_bl
);
1562 got_created_ino
= true;
1563 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1567 *pcreated
= got_created_ino
;
1569 if (request
->target
) {
1570 *ptarget
= request
->target
;
1571 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1573 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1574 (*ptarget
) = p
->second
;
1575 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1577 // we got a traceless reply, and need to look up what we just
1578 // created. for now, do this by name. someday, do this by the
1579 // ino... which we know! FIXME.
1581 Dentry
*d
= request
->dentry();
1584 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1585 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1586 << " got_ino " << got_created_ino
1587 << " ino " << created_ino
1589 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1592 // if the dentry is not linked, just do our best. see #5021.
1593 assert(0 == "how did this happen? i want logs!");
1596 Inode
*in
= request
->inode();
1597 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1598 << in
->ino
<< dendl
;
1599 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1603 // verify ino returned in reply and trace_dist are the same
1604 if (got_created_ino
&&
1605 created_ino
.val
!= target
->ino
.val
) {
1606 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1610 ptarget
->swap(target
);
1622 * Blocking helper to make an MDS request.
1624 * If the ptarget flag is set, behavior changes slightly: the caller
1625 * expects to get a pointer to the inode we are creating or operating
1626 * on. As a result, we will follow up any traceless mutation reply
1627 * with a getattr or lookup to transparently handle a traceless reply
1628 * from the MDS (as when the MDS restarts and the client has to replay
1631 * @param request the MetaRequest to execute
1632 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1633 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1634 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1635 * @param use_mds [optional] prefer a specific mds (-1 for default)
1636 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1638 int Client::make_request(MetaRequest
*request
,
1639 const UserPerm
& perms
,
1640 InodeRef
*ptarget
, bool *pcreated
,
1646 // assign a unique tid
1647 ceph_tid_t tid
= ++last_tid
;
1648 request
->set_tid(tid
);
1651 request
->op_stamp
= ceph_clock_now();
1654 mds_requests
[tid
] = request
->get();
1655 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1658 request
->set_caller_perms(perms
);
1660 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1661 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1662 request
->set_oldest_client_tid(1);
1664 request
->set_oldest_client_tid(oldest_tid
);
1669 request
->resend_mds
= use_mds
;
1672 if (request
->aborted())
1676 request
->abort(-EBLACKLISTED
);
1682 request
->caller_cond
= &caller_cond
;
1685 Inode
*hash_diri
= NULL
;
1686 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1687 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1688 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1689 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1691 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1692 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1694 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1695 request
->resend_mds
= _get_random_up_mds();
1698 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1699 wait_on_list(waiting_for_mdsmap
);
1705 MetaSession
*session
= NULL
;
1706 if (!have_open_session(mds
)) {
1707 session
= _get_or_open_mds_session(mds
);
1710 if (session
->state
== MetaSession::STATE_OPENING
) {
1711 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1712 wait_on_context_list(session
->waiting_for_open
);
1713 // Abort requests on REJECT from MDS
1714 if (rejected_by_mds
.count(mds
)) {
1715 request
->abort(-EPERM
);
1721 if (!have_open_session(mds
))
1724 session
= mds_sessions
[mds
];
1728 send_request(request
, session
);
1731 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1732 request
->kick
= false;
1733 while (!request
->reply
&& // reply
1734 request
->resend_mds
< 0 && // forward
1736 caller_cond
.Wait(client_lock
);
1737 request
->caller_cond
= NULL
;
1739 // did we get a reply?
1744 if (!request
->reply
) {
1745 assert(request
->aborted());
1746 assert(!request
->got_unsafe
);
1747 r
= request
->get_abort_code();
1748 request
->item
.remove_myself();
1749 unregister_request(request
);
1750 put_request(request
); // ours
1755 MClientReply
*reply
= request
->reply
;
1756 request
->reply
= NULL
;
1757 r
= reply
->get_result();
1759 request
->success
= true;
1761 // kick dispatcher (we've got it!)
1762 assert(request
->dispatch_cond
);
1763 request
->dispatch_cond
->Signal();
1764 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1765 request
->dispatch_cond
= 0;
1767 if (r
>= 0 && ptarget
)
1768 r
= verify_reply_trace(r
, request
, reply
, ptarget
, pcreated
, perms
);
1771 pdirbl
->claim(reply
->get_extra_bl());
1774 utime_t lat
= ceph_clock_now();
1775 lat
-= request
->sent_stamp
;
1776 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1777 logger
->tinc(l_c_lat
, lat
);
1778 logger
->tinc(l_c_reply
, lat
);
1780 put_request(request
);
1786 void Client::unregister_request(MetaRequest
*req
)
1788 mds_requests
.erase(req
->tid
);
1789 if (req
->tid
== oldest_tid
) {
1790 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1792 if (p
== mds_requests
.end()) {
1796 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1797 oldest_tid
= p
->first
;
1806 void Client::put_request(MetaRequest
*request
)
1808 if (request
->_put()) {
1810 if (request
->success
)
1811 op
= request
->get_op();
1813 request
->take_other_inode(&other_in
);
1817 (op
== CEPH_MDS_OP_RMDIR
||
1818 op
== CEPH_MDS_OP_RENAME
||
1819 op
== CEPH_MDS_OP_RMSNAP
)) {
1820 _try_to_trim_inode(other_in
.get(), false);
1825 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1826 mds_rank_t mds
, int drop
,
1827 int unless
, int force
)
1829 ldout(cct
, 20) << "encode_inode_release enter(in:" << *in
<< ", req:" << req
1830 << " mds:" << mds
<< ", drop:" << drop
<< ", unless:" << unless
1831 << ", have:" << ", force:" << force
<< ")" << dendl
;
1833 if (in
->caps
.count(mds
)) {
1834 Cap
*caps
= in
->caps
[mds
];
1835 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1836 if ((drop
& caps
->issued
) &&
1837 !(unless
& caps
->issued
)) {
1838 ldout(cct
, 25) << "Dropping caps. Initial " << ccap_string(caps
->issued
) << dendl
;
1839 caps
->issued
&= ~drop
;
1840 caps
->implemented
&= ~drop
;
1842 ldout(cct
, 25) << "Now have: " << ccap_string(caps
->issued
) << dendl
;
1847 ceph_mds_request_release rel
;
1849 rel
.cap_id
= caps
->cap_id
;
1850 rel
.seq
= caps
->seq
;
1851 rel
.issue_seq
= caps
->issue_seq
;
1852 rel
.mseq
= caps
->mseq
;
1853 rel
.caps
= caps
->implemented
;
1854 rel
.wanted
= caps
->wanted
;
1857 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1860 ldout(cct
, 25) << "encode_inode_release exit(in:" << *in
<< ") released:"
1861 << released
<< dendl
;
1865 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1866 mds_rank_t mds
, int drop
, int unless
)
1868 ldout(cct
, 20) << "encode_dentry_release enter(dn:"
1869 << dn
<< ")" << dendl
;
1872 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
1873 mds
, drop
, unless
, 1);
1874 if (released
&& dn
->lease_mds
== mds
) {
1875 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
1876 MClientRequest::Release
& rel
= req
->cap_releases
.back();
1877 rel
.item
.dname_len
= dn
->name
.length();
1878 rel
.item
.dname_seq
= dn
->lease_seq
;
1879 rel
.dname
= dn
->name
;
1881 ldout(cct
, 25) << "encode_dentry_release exit(dn:"
1882 << dn
<< ")" << dendl
;
1887 * This requires the MClientRequest *request member to be set.
1888 * It will error out horribly without one.
1889 * Additionally, if you set any *drop member, you'd better have
1890 * set the corresponding dentry!
1892 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
1894 ldout(cct
, 20) << "encode_cap_releases enter (req: "
1895 << req
<< ", mds: " << mds
<< ")" << dendl
;
1896 if (req
->inode_drop
&& req
->inode())
1897 encode_inode_release(req
->inode(), req
,
1898 mds
, req
->inode_drop
,
1901 if (req
->old_inode_drop
&& req
->old_inode())
1902 encode_inode_release(req
->old_inode(), req
,
1903 mds
, req
->old_inode_drop
,
1904 req
->old_inode_unless
);
1905 if (req
->other_inode_drop
&& req
->other_inode())
1906 encode_inode_release(req
->other_inode(), req
,
1907 mds
, req
->other_inode_drop
,
1908 req
->other_inode_unless
);
1910 if (req
->dentry_drop
&& req
->dentry())
1911 encode_dentry_release(req
->dentry(), req
,
1912 mds
, req
->dentry_drop
,
1913 req
->dentry_unless
);
1915 if (req
->old_dentry_drop
&& req
->old_dentry())
1916 encode_dentry_release(req
->old_dentry(), req
,
1917 mds
, req
->old_dentry_drop
,
1918 req
->old_dentry_unless
);
1919 ldout(cct
, 25) << "encode_cap_releases exit (req: "
1920 << req
<< ", mds " << mds
<<dendl
;
1923 bool Client::have_open_session(mds_rank_t mds
)
1926 mds_sessions
.count(mds
) &&
1927 (mds_sessions
[mds
]->state
== MetaSession::STATE_OPEN
||
1928 mds_sessions
[mds
]->state
== MetaSession::STATE_STALE
);
1931 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
1933 if (mds_sessions
.count(mds
) == 0)
1935 MetaSession
*s
= mds_sessions
[mds
];
1941 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
1943 if (mds_sessions
.count(mds
))
1944 return mds_sessions
[mds
];
1945 return _open_mds_session(mds
);
1949 * Populate a map of strings with client-identifying metadata,
1950 * such as the hostname. Call this once at initialization.
1952 void Client::populate_metadata(const std::string
&mount_root
)
1958 metadata
["hostname"] = u
.nodename
;
1959 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
1961 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
1964 metadata
["pid"] = stringify(getpid());
1966 // Ceph entity id (the '0' in "client.0")
1967 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
1969 // Our mount position
1970 if (!mount_root
.empty()) {
1971 metadata
["root"] = mount_root
;
1975 metadata
["ceph_version"] = pretty_version_to_str();
1976 metadata
["ceph_sha1"] = git_version_to_str();
1978 // Apply any metadata from the user's configured overrides
1979 std::vector
<std::string
> tokens
;
1980 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
1981 for (const auto &i
: tokens
) {
1982 auto eqpos
= i
.find("=");
1983 // Throw out anything that isn't of the form "<str>=<str>"
1984 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
1985 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
1988 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
1993 * Optionally add or override client metadata fields.
1995 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
1997 Mutex::Locker
l(client_lock
);
1998 assert(initialized
);
2000 if (metadata
.count(k
)) {
2001 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
2002 << "' from '" << metadata
[k
] << "' to '" << v
<< "'" << dendl
;
2008 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
2010 ldout(cct
, 10) << "_open_mds_session mds." << mds
<< dendl
;
2011 assert(mds_sessions
.count(mds
) == 0);
2012 MetaSession
*session
= new MetaSession
;
2013 session
->mds_num
= mds
;
2015 session
->inst
= mdsmap
->get_inst(mds
);
2016 session
->con
= messenger
->get_connection(session
->inst
);
2017 session
->state
= MetaSession::STATE_OPENING
;
2018 session
->mds_state
= MDSMap::STATE_NULL
;
2019 mds_sessions
[mds
] = session
;
2021 // Maybe skip sending a request to open if this MDS daemon
2022 // has previously sent us a REJECT.
2023 if (rejected_by_mds
.count(mds
)) {
2024 if (rejected_by_mds
[mds
] == session
->inst
) {
2025 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " skipping "
2026 "because we were rejected" << dendl
;
2029 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " old inst "
2030 "rejected us, trying with new inst" << dendl
;
2031 rejected_by_mds
.erase(mds
);
2035 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_OPEN
);
2036 m
->client_meta
= metadata
;
2037 session
->con
->send_message(m
);
2041 void Client::_close_mds_session(MetaSession
*s
)
2043 ldout(cct
, 2) << "_close_mds_session mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2044 s
->state
= MetaSession::STATE_CLOSING
;
2045 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2048 void Client::_closed_mds_session(MetaSession
*s
)
2050 s
->state
= MetaSession::STATE_CLOSED
;
2051 s
->con
->mark_down();
2052 signal_context_list(s
->waiting_for_open
);
2053 mount_cond
.Signal();
2054 remove_session_caps(s
);
2055 kick_requests_closed(s
);
2056 mds_sessions
.erase(s
->mds_num
);
2060 void Client::handle_client_session(MClientSession
*m
)
2062 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2063 ldout(cct
, 10) << "handle_client_session " << *m
<< " from mds." << from
<< dendl
;
2065 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2067 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2072 switch (m
->get_op()) {
2073 case CEPH_SESSION_OPEN
:
2074 renew_caps(session
);
2075 session
->state
= MetaSession::STATE_OPEN
;
2077 mount_cond
.Signal();
2079 connect_mds_targets(from
);
2080 signal_context_list(session
->waiting_for_open
);
2083 case CEPH_SESSION_CLOSE
:
2084 _closed_mds_session(session
);
2087 case CEPH_SESSION_RENEWCAPS
:
2088 if (session
->cap_renew_seq
== m
->get_seq()) {
2090 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2091 wake_inode_waiters(session
);
2095 case CEPH_SESSION_STALE
:
2096 // invalidate session caps/leases
2098 session
->cap_ttl
= ceph_clock_now();
2099 session
->cap_ttl
-= 1;
2100 renew_caps(session
);
2103 case CEPH_SESSION_RECALL_STATE
:
2104 trim_caps(session
, m
->get_max_caps());
2107 case CEPH_SESSION_FLUSHMSG
:
2108 session
->con
->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2111 case CEPH_SESSION_FORCE_RO
:
2112 force_session_readonly(session
);
2115 case CEPH_SESSION_REJECT
:
2116 rejected_by_mds
[session
->mds_num
] = session
->inst
;
2117 _closed_mds_session(session
);
2128 bool Client::_any_stale_sessions() const
2130 assert(client_lock
.is_locked_by_me());
2132 for (const auto &i
: mds_sessions
) {
2133 if (i
.second
->state
== MetaSession::STATE_STALE
) {
2141 void Client::_kick_stale_sessions()
2143 ldout(cct
, 1) << "kick_stale_sessions" << dendl
;
2145 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2146 p
!= mds_sessions
.end(); ) {
2147 MetaSession
*s
= p
->second
;
2149 if (s
->state
== MetaSession::STATE_STALE
)
2150 _closed_mds_session(s
);
2154 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2155 bool drop_cap_releases
)
2158 mds_rank_t mds
= session
->mds_num
;
2159 ldout(cct
, 10) << "send_request rebuilding request " << request
->get_tid()
2160 << " for mds." << mds
<< dendl
;
2161 MClientRequest
*r
= build_client_request(request
);
2162 if (request
->dentry()) {
2163 r
->set_dentry_wanted();
2165 if (request
->got_unsafe
) {
2166 r
->set_replayed_op();
2167 if (request
->target
)
2168 r
->head
.ino
= request
->target
->ino
;
2170 encode_cap_releases(request
, mds
);
2171 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2172 request
->cap_releases
.clear();
2174 r
->releases
.swap(request
->cap_releases
);
2176 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2177 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2178 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2179 r
->set_osdmap_epoch(o
.get_epoch());
2183 if (request
->mds
== -1) {
2184 request
->sent_stamp
= ceph_clock_now();
2185 ldout(cct
, 20) << "send_request set sent_stamp to " << request
->sent_stamp
<< dendl
;
2189 Inode
*in
= request
->inode();
2190 if (in
&& in
->caps
.count(mds
))
2191 request
->sent_on_mseq
= in
->caps
[mds
]->mseq
;
2193 session
->requests
.push_back(&request
->item
);
2195 ldout(cct
, 10) << "send_request " << *r
<< " to mds." << mds
<< dendl
;
2196 session
->con
->send_message(r
);
2199 MClientRequest
* Client::build_client_request(MetaRequest
*request
)
2201 MClientRequest
*req
= new MClientRequest(request
->get_op());
2202 req
->set_tid(request
->tid
);
2203 req
->set_stamp(request
->op_stamp
);
2204 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2206 // if the filepath's haven't been set, set them!
2207 if (request
->path
.empty()) {
2208 Inode
*in
= request
->inode();
2209 Dentry
*de
= request
->dentry();
2211 in
->make_nosnap_relative_path(request
->path
);
2214 de
->inode
->make_nosnap_relative_path(request
->path
);
2216 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2217 request
->path
.push_dentry(de
->name
);
2219 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2220 << " No path, inode, or appropriately-endowed dentry given!"
2222 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2223 << " No path, inode, or dentry given!"
2226 req
->set_filepath(request
->get_filepath());
2227 req
->set_filepath2(request
->get_filepath2());
2228 req
->set_data(request
->data
);
2229 req
->set_retry_attempt(request
->retry_attempt
++);
2230 req
->head
.num_fwd
= request
->num_fwd
;
2232 int gid_count
= request
->perms
.get_gids(&_gids
);
2233 req
->set_gid_list(gid_count
, _gids
);
2239 void Client::handle_client_request_forward(MClientRequestForward
*fwd
)
2241 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2242 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2247 ceph_tid_t tid
= fwd
->get_tid();
2249 if (mds_requests
.count(tid
) == 0) {
2250 ldout(cct
, 10) << "handle_client_request_forward no pending request on tid " << tid
<< dendl
;
2255 MetaRequest
*request
= mds_requests
[tid
];
2258 // reset retry counter
2259 request
->retry_attempt
= 0;
2261 // request not forwarded, or dest mds has no session.
2263 ldout(cct
, 10) << "handle_client_request tid " << tid
2264 << " fwd " << fwd
->get_num_fwd()
2265 << " to mds." << fwd
->get_dest_mds()
2266 << ", resending to " << fwd
->get_dest_mds()
2270 request
->item
.remove_myself();
2271 request
->num_fwd
= fwd
->get_num_fwd();
2272 request
->resend_mds
= fwd
->get_dest_mds();
2273 request
->caller_cond
->Signal();
2278 bool Client::is_dir_operation(MetaRequest
*req
)
2280 int op
= req
->get_op();
2281 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2282 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2283 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2284 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2289 void Client::handle_client_reply(MClientReply
*reply
)
2291 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2292 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2298 ceph_tid_t tid
= reply
->get_tid();
2299 bool is_safe
= reply
->is_safe();
2301 if (mds_requests
.count(tid
) == 0) {
2302 lderr(cct
) << "handle_client_reply no pending request on tid " << tid
2303 << " safe is:" << is_safe
<< dendl
;
2307 MetaRequest
*request
= mds_requests
.at(tid
);
2309 ldout(cct
, 20) << "handle_client_reply got a reply. Safe:" << is_safe
2310 << " tid " << tid
<< dendl
;
2312 if (request
->got_unsafe
&& !is_safe
) {
2313 //duplicate response
2314 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2315 << mds_num
<< " safe:" << is_safe
<< dendl
;
2320 if (-ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2321 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2322 << " from mds." << request
->mds
<< dendl
;
2323 request
->send_to_auth
= true;
2324 request
->resend_mds
= choose_target_mds(request
);
2325 Inode
*in
= request
->inode();
2326 if (request
->resend_mds
>= 0 &&
2327 request
->resend_mds
== request
->mds
&&
2329 in
->caps
.count(request
->resend_mds
) == 0 ||
2330 request
->sent_on_mseq
== in
->caps
[request
->resend_mds
]->mseq
)) {
2331 // have to return ESTALE
2333 request
->caller_cond
->Signal();
2337 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2340 assert(request
->reply
== NULL
);
2341 request
->reply
= reply
;
2342 insert_trace(request
, session
);
2344 // Handle unsafe reply
2346 request
->got_unsafe
= true;
2347 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2348 if (is_dir_operation(request
)) {
2349 Inode
*dir
= request
->inode();
2351 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2353 if (request
->target
) {
2354 InodeRef
&in
= request
->target
;
2355 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2359 // Only signal the caller once (on the first reply):
2360 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2361 if (!is_safe
|| !request
->got_unsafe
) {
2363 request
->dispatch_cond
= &cond
;
2366 ldout(cct
, 20) << "handle_client_reply signalling caller " << (void*)request
->caller_cond
<< dendl
;
2367 request
->caller_cond
->Signal();
2369 // wake for kick back
2370 while (request
->dispatch_cond
) {
2371 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid " << tid
<< " " << &cond
<< dendl
;
2372 cond
.Wait(client_lock
);
2377 // the filesystem change is committed to disk
2378 // we're done, clean up
2379 if (request
->got_unsafe
) {
2380 request
->unsafe_item
.remove_myself();
2381 request
->unsafe_dir_item
.remove_myself();
2382 request
->unsafe_target_item
.remove_myself();
2383 signal_cond_list(request
->waitfor_safe
);
2385 request
->item
.remove_myself();
2386 unregister_request(request
);
2389 mount_cond
.Signal();
2392 void Client::_handle_full_flag(int64_t pool
)
2394 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2395 << "on " << pool
<< dendl
;
2396 // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
2397 // to do this rather than blocking, because otherwise when we fill up we
2398 // potentially lock caps forever on files with dirty pages, and we need
2399 // to be able to release those caps to the MDS so that it can delete files
2400 // and free up space.
2401 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-ENOSPC
, pool
);
2403 // For all inodes with layouts in this pool and a pending flush write op
2404 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2405 // from ObjectCacher so that it doesn't re-issue the write in response to
2406 // the ENOSPC error.
2407 // Fortunately since we're cancelling everything in a given pool, we don't
2408 // need to know which ops belong to which ObjectSet, we can just blow all
2409 // the un-flushed cached data away and mark any dirty inodes' async_err
2410 // field with -ENOSPC as long as we're sure all the ops we cancelled were
2411 // affecting this pool, and all the objectsets we're purging were also
2413 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2414 i
!= inode_map
.end(); ++i
)
2416 Inode
*inode
= i
->second
;
2417 if (inode
->oset
.dirty_or_tx
2418 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2419 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2420 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2421 objectcacher
->purge_set(&inode
->oset
);
2422 inode
->set_async_err(-ENOSPC
);
2426 if (cancelled_epoch
!= (epoch_t
)-1) {
2427 set_cap_epoch_barrier(cancelled_epoch
);
2431 void Client::handle_osd_map(MOSDMap
*m
)
2433 std::set
<entity_addr_t
> new_blacklists
;
2434 objecter
->consume_blacklist_events(&new_blacklists
);
2436 const auto myaddr
= messenger
->get_myaddr();
2437 if (!blacklisted
&& new_blacklists
.count(myaddr
)) {
2438 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2439 return o
.get_epoch();
2441 lderr(cct
) << "I was blacklisted at osd epoch " << epoch
<< dendl
;
2443 for (std::map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2444 p
!= mds_requests
.end(); ) {
2445 auto req
= p
->second
;
2447 req
->abort(-EBLACKLISTED
);
2448 if (req
->caller_cond
) {
2450 req
->caller_cond
->Signal();
2454 // Progress aborts on any requests that were on this waitlist. Any
2455 // requests that were on a waiting_for_open session waitlist
2456 // will get kicked during close session below.
2457 signal_cond_list(waiting_for_mdsmap
);
2459 // Force-close all sessions: assume this is not abandoning any state
2460 // on the MDS side because the MDS will have seen the blacklist too.
2461 while(!mds_sessions
.empty()) {
2462 auto i
= mds_sessions
.begin();
2463 auto session
= i
->second
;
2464 _closed_mds_session(session
);
2467 // Since we know all our OSD ops will fail, cancel them all preemtively,
2468 // so that on an unhealthy cluster we can umount promptly even if e.g.
2469 // some PGs were inaccessible.
2470 objecter
->op_cancel_writes(-EBLACKLISTED
);
2472 } else if (blacklisted
) {
2473 // Handle case where we were blacklisted but no longer are
2474 blacklisted
= objecter
->with_osdmap([myaddr
](const OSDMap
&o
){
2475 return o
.is_blacklisted(myaddr
);});
2478 if (objecter
->osdmap_full_flag()) {
2479 _handle_full_flag(-1);
2481 // Accumulate local list of full pools so that I can drop
2482 // the objecter lock before re-entering objecter in
2484 std::vector
<int64_t> full_pools
;
2486 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2487 for (const auto& kv
: o
.get_pools()) {
2488 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2489 full_pools
.push_back(kv
.first
);
2494 for (auto p
: full_pools
)
2495 _handle_full_flag(p
);
2497 // Subscribe to subsequent maps to watch for the full flag going
2498 // away. For the global full flag objecter does this for us, but
2499 // it pays no attention to the per-pool full flag so in this branch
2500 // we do it ourselves.
2501 if (!full_pools
.empty()) {
2502 objecter
->maybe_request_map();
2510 // ------------------------
2511 // incoming messages
2514 bool Client::ms_dispatch(Message
*m
)
2516 Mutex::Locker
l(client_lock
);
2518 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2523 switch (m
->get_type()) {
2524 // mounting and mds sessions
2525 case CEPH_MSG_MDS_MAP
:
2526 handle_mds_map(static_cast<MMDSMap
*>(m
));
2528 case CEPH_MSG_FS_MAP
:
2529 handle_fs_map(static_cast<MFSMap
*>(m
));
2531 case CEPH_MSG_FS_MAP_USER
:
2532 handle_fs_map_user(static_cast<MFSMapUser
*>(m
));
2534 case CEPH_MSG_CLIENT_SESSION
:
2535 handle_client_session(static_cast<MClientSession
*>(m
));
2538 case CEPH_MSG_OSD_MAP
:
2539 handle_osd_map(static_cast<MOSDMap
*>(m
));
2543 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2544 handle_client_request_forward(static_cast<MClientRequestForward
*>(m
));
2546 case CEPH_MSG_CLIENT_REPLY
:
2547 handle_client_reply(static_cast<MClientReply
*>(m
));
2550 case CEPH_MSG_CLIENT_SNAP
:
2551 handle_snap(static_cast<MClientSnap
*>(m
));
2553 case CEPH_MSG_CLIENT_CAPS
:
2554 handle_caps(static_cast<MClientCaps
*>(m
));
2556 case CEPH_MSG_CLIENT_LEASE
:
2557 handle_lease(static_cast<MClientLease
*>(m
));
2559 case MSG_COMMAND_REPLY
:
2560 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2561 handle_command_reply(static_cast<MCommandReply
*>(m
));
2566 case CEPH_MSG_CLIENT_QUOTA
:
2567 handle_quota(static_cast<MClientQuota
*>(m
));
2576 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2577 << "+" << inode_map
.size() << dendl
;
2578 long unsigned size
= lru
.lru_get_size() + inode_map
.size();
2580 if (size
< lru
.lru_get_size() + inode_map
.size()) {
2581 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2582 mount_cond
.Signal();
2584 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2585 << "+" << inode_map
.size() << dendl
;
2592 void Client::handle_fs_map(MFSMap
*m
)
2594 fsmap
.reset(new FSMap(m
->get_fsmap()));
2597 signal_cond_list(waiting_for_fsmap
);
2599 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2602 void Client::handle_fs_map_user(MFSMapUser
*m
)
2604 fsmap_user
.reset(new FSMapUser
);
2605 *fsmap_user
= m
->get_fsmap();
2608 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2609 signal_cond_list(waiting_for_fsmap
);
2612 void Client::handle_mds_map(MMDSMap
* m
)
2614 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2615 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch()
2616 << " is identical to or older than our "
2617 << mdsmap
->get_epoch() << dendl
;
2622 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch() << dendl
;
2624 std::unique_ptr
<MDSMap
> oldmap(new MDSMap
);
2625 oldmap
.swap(mdsmap
);
2627 mdsmap
->decode(m
->get_encoded());
2629 // Cancel any commands for missing or laggy GIDs
2630 std::list
<ceph_tid_t
> cancel_ops
;
2631 auto &commands
= command_table
.get_commands();
2632 for (const auto &i
: commands
) {
2633 auto &op
= i
.second
;
2634 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2635 if (mdsmap
->is_dne_gid(op_mds_gid
) || mdsmap
->is_laggy_gid(op_mds_gid
)) {
2636 ldout(cct
, 1) << __func__
<< ": cancelling command op " << i
.first
<< dendl
;
2637 cancel_ops
.push_back(i
.first
);
2639 std::ostringstream ss
;
2640 ss
<< "MDS " << op_mds_gid
<< " went away";
2641 *(op
.outs
) = ss
.str();
2643 op
.con
->mark_down();
2645 op
.on_finish
->complete(-ETIMEDOUT
);
2650 for (std::list
<ceph_tid_t
>::iterator i
= cancel_ops
.begin();
2651 i
!= cancel_ops
.end(); ++i
) {
2652 command_table
.erase(*i
);
2656 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2657 p
!= mds_sessions
.end(); ) {
2658 mds_rank_t mds
= p
->first
;
2659 MetaSession
*session
= p
->second
;
2662 int oldstate
= oldmap
->get_state(mds
);
2663 int newstate
= mdsmap
->get_state(mds
);
2664 if (!mdsmap
->is_up(mds
)) {
2665 session
->con
->mark_down();
2666 } else if (mdsmap
->get_inst(mds
) != session
->inst
) {
2667 session
->con
->mark_down();
2668 session
->inst
= mdsmap
->get_inst(mds
);
2669 // When new MDS starts to take over, notify kernel to trim unused entries
2670 // in its dcache/icache. Hopefully, the kernel will release some unused
2671 // inodes before the new MDS enters reconnect state.
2672 trim_cache_for_reconnect(session
);
2673 } else if (oldstate
== newstate
)
2674 continue; // no change
2676 session
->mds_state
= newstate
;
2677 if (newstate
== MDSMap::STATE_RECONNECT
) {
2678 session
->con
= messenger
->get_connection(session
->inst
);
2679 send_reconnect(session
);
2680 } else if (newstate
>= MDSMap::STATE_ACTIVE
) {
2681 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2682 // kick new requests
2683 kick_requests(session
);
2684 kick_flushing_caps(session
);
2685 signal_context_list(session
->waiting_for_open
);
2686 kick_maxsize_requests(session
);
2687 wake_inode_waiters(session
);
2689 connect_mds_targets(mds
);
2690 } else if (newstate
== MDSMap::STATE_NULL
&&
2691 mds
>= mdsmap
->get_max_mds()) {
2692 _closed_mds_session(session
);
2696 // kick any waiting threads
2697 signal_cond_list(waiting_for_mdsmap
);
2701 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2704 void Client::send_reconnect(MetaSession
*session
)
2706 mds_rank_t mds
= session
->mds_num
;
2707 ldout(cct
, 10) << "send_reconnect to mds." << mds
<< dendl
;
2709 // trim unused caps to reduce MDS's cache rejoin time
2710 trim_cache_for_reconnect(session
);
2712 session
->readonly
= false;
2714 if (session
->release
) {
2715 session
->release
->put();
2716 session
->release
= NULL
;
2719 // reset my cap seq number
2721 //connect to the mds' offload targets
2722 connect_mds_targets(mds
);
2723 //make sure unsafe requests get saved
2724 resend_unsafe_requests(session
);
2726 MClientReconnect
*m
= new MClientReconnect
;
2728 // i have an open session.
2729 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2730 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2731 p
!= inode_map
.end();
2733 Inode
*in
= p
->second
;
2734 if (in
->caps
.count(mds
)) {
2735 ldout(cct
, 10) << " caps on " << p
->first
2736 << " " << ccap_string(in
->caps
[mds
]->issued
)
2737 << " wants " << ccap_string(in
->caps_wanted())
2740 in
->make_long_path(path
);
2741 ldout(cct
, 10) << " path " << path
<< dendl
;
2744 _encode_filelocks(in
, flockbl
);
2746 Cap
*cap
= in
->caps
[mds
];
2747 cap
->seq
= 0; // reset seq.
2748 cap
->issue_seq
= 0; // reset seq.
2749 cap
->mseq
= 0; // reset seq.
2750 cap
->issued
= cap
->implemented
;
2752 snapid_t snap_follows
= 0;
2753 if (!in
->cap_snaps
.empty())
2754 snap_follows
= in
->cap_snaps
.begin()->first
;
2756 m
->add_cap(p
->first
.ino
,
2758 path
.get_ino(), path
.get_path(), // ino
2759 in
->caps_wanted(), // wanted
2760 cap
->issued
, // issued
2765 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2766 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2767 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2768 did_snaprealm
.insert(in
->snaprealm
->ino
);
2773 early_kick_flushing_caps(session
);
2775 session
->con
->send_message(m
);
2777 mount_cond
.Signal();
2781 void Client::kick_requests(MetaSession
*session
)
2783 ldout(cct
, 10) << "kick_requests for mds." << session
->mds_num
<< dendl
;
2784 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2785 p
!= mds_requests
.end();
2787 MetaRequest
*req
= p
->second
;
2788 if (req
->got_unsafe
)
2790 if (req
->aborted()) {
2791 if (req
->caller_cond
) {
2793 req
->caller_cond
->Signal();
2797 if (req
->retry_attempt
> 0)
2798 continue; // new requests only
2799 if (req
->mds
== session
->mds_num
) {
2800 send_request(p
->second
, session
);
2805 void Client::resend_unsafe_requests(MetaSession
*session
)
2807 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
2810 send_request(*iter
, session
);
2812 // also re-send old requests when MDS enters reconnect stage. So that MDS can
2813 // process completed requests in clientreplay stage.
2814 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2815 p
!= mds_requests
.end();
2817 MetaRequest
*req
= p
->second
;
2818 if (req
->got_unsafe
)
2822 if (req
->retry_attempt
== 0)
2823 continue; // old requests only
2824 if (req
->mds
== session
->mds_num
)
2825 send_request(req
, session
, true);
2829 void Client::wait_unsafe_requests()
2831 list
<MetaRequest
*> last_unsafe_reqs
;
2832 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2833 p
!= mds_sessions
.end();
2835 MetaSession
*s
= p
->second
;
2836 if (!s
->unsafe_requests
.empty()) {
2837 MetaRequest
*req
= s
->unsafe_requests
.back();
2839 last_unsafe_reqs
.push_back(req
);
2843 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
2844 p
!= last_unsafe_reqs
.end();
2846 MetaRequest
*req
= *p
;
2847 if (req
->unsafe_item
.is_on_list())
2848 wait_on_list(req
->waitfor_safe
);
2853 void Client::kick_requests_closed(MetaSession
*session
)
2855 ldout(cct
, 10) << "kick_requests_closed for mds." << session
->mds_num
<< dendl
;
2856 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2857 p
!= mds_requests
.end(); ) {
2858 MetaRequest
*req
= p
->second
;
2860 if (req
->mds
== session
->mds_num
) {
2861 if (req
->caller_cond
) {
2863 req
->caller_cond
->Signal();
2865 req
->item
.remove_myself();
2866 if (req
->got_unsafe
) {
2867 lderr(cct
) << "kick_requests_closed removing unsafe request " << req
->get_tid() << dendl
;
2868 req
->unsafe_item
.remove_myself();
2869 req
->unsafe_dir_item
.remove_myself();
2870 req
->unsafe_target_item
.remove_myself();
2871 signal_cond_list(req
->waitfor_safe
);
2872 unregister_request(req
);
2876 assert(session
->requests
.empty());
2877 assert(session
->unsafe_requests
.empty());
2887 void Client::got_mds_push(MetaSession
*s
)
2890 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
2891 if (s
->state
== MetaSession::STATE_CLOSING
) {
2892 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2896 void Client::handle_lease(MClientLease
*m
)
2898 ldout(cct
, 10) << "handle_lease " << *m
<< dendl
;
2900 assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
2902 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
2903 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
2909 got_mds_push(session
);
2911 ceph_seq_t seq
= m
->get_seq();
2914 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
2915 if (inode_map
.count(vino
) == 0) {
2916 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
2919 in
= inode_map
[vino
];
2921 if (m
->get_mask() & CEPH_LOCK_DN
) {
2922 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
2923 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
2926 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
2927 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
2932 m
->get_connection()->send_message(
2934 CEPH_MDS_LEASE_RELEASE
, seq
,
2935 m
->get_mask(), m
->get_ino(), m
->get_first(), m
->get_last(), m
->dname
));
2939 void Client::put_inode(Inode
*in
, int n
)
2941 ldout(cct
, 10) << "put_inode on " << *in
<< dendl
;
2942 int left
= in
->_put(n
);
2945 remove_all_caps(in
);
2947 ldout(cct
, 10) << "put_inode deleting " << *in
<< dendl
;
2948 bool unclean
= objectcacher
->release_set(&in
->oset
);
2950 inode_map
.erase(in
->vino());
2951 if (use_faked_inos())
2952 _release_faked_ino(in
);
2957 while (!root_parents
.empty())
2958 root_parents
.erase(root_parents
.begin());
2965 void Client::close_dir(Dir
*dir
)
2967 Inode
*in
= dir
->parent_inode
;
2968 ldout(cct
, 15) << "close_dir dir " << dir
<< " on " << in
<< dendl
;
2969 assert(dir
->is_empty());
2970 assert(in
->dir
== dir
);
2971 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
2972 if (!in
->dn_set
.empty())
2973 in
->get_first_parent()->put(); // unpin dentry
2977 put_inode(in
); // unpin inode
2981 * Don't call this with in==NULL, use get_or_create for that
2982 * leave dn set to default NULL unless you're trying to add
2983 * a new inode to a pre-created Dentry
2985 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
2988 // create a new Dentry
2994 dir
->dentries
[dn
->name
] = dn
;
2995 lru
.lru_insert_mid(dn
); // mid or top?
2997 dir
->num_null_dentries
++;
2999 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
3000 << " dn " << dn
<< " (new dn)" << dendl
;
3004 dir
->num_null_dentries
--;
3005 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
3006 << " dn " << dn
<< " (old dn)" << dendl
;
3009 if (in
) { // link to inode
3013 dn
->get(); // dir -> dn pin
3015 dn
->get(); // ll_ref -> dn pin
3018 assert(in
->dn_set
.count(dn
) == 0);
3020 // only one parent for directories!
3021 if (in
->is_dir() && !in
->dn_set
.empty()) {
3022 Dentry
*olddn
= in
->get_first_parent();
3023 assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
3024 Inode
*old_diri
= olddn
->dir
->parent_inode
;
3025 old_diri
->dir_release_count
++;
3026 clear_dir_complete_and_ordered(old_diri
, true);
3027 unlink(olddn
, true, true); // keep dir, dentry
3030 in
->dn_set
.insert(dn
);
3032 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3038 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3042 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3043 << " inode " << dn
->inode
<< dendl
;
3045 // unlink from inode
3049 dn
->put(); // dir -> dn pin
3051 dn
->put(); // ll_ref -> dn pin
3054 assert(in
->dn_set
.count(dn
));
3055 in
->dn_set
.erase(dn
);
3056 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3062 dn
->dir
->num_null_dentries
++;
3064 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3067 dn
->dir
->dentries
.erase(dn
->name
);
3069 dn
->dir
->num_null_dentries
--;
3070 if (dn
->dir
->is_empty() && !keepdir
)
3081 * For asynchronous flushes, check for errors from the IO and
3082 * update the inode if necessary
3084 class C_Client_FlushComplete
: public Context
{
3089 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3090 void finish(int r
) override
{
3091 assert(client
->client_lock
.is_locked_by_me());
3093 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3094 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3095 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3096 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3097 inode
->set_async_err(r
);
3107 void Client::get_cap_ref(Inode
*in
, int cap
)
3109 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3110 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3111 ldout(cct
, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in
<< dendl
;
3114 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3115 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3116 ldout(cct
, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in
<< dendl
;
3119 in
->get_cap_ref(cap
);
3122 void Client::put_cap_ref(Inode
*in
, int cap
)
3124 int last
= in
->put_cap_ref(cap
);
3127 int drop
= last
& ~in
->caps_issued();
3128 if (in
->snapid
== CEPH_NOSNAP
) {
3129 if ((last
& CEPH_CAP_FILE_WR
) &&
3130 !in
->cap_snaps
.empty() &&
3131 in
->cap_snaps
.rbegin()->second
.writing
) {
3132 ldout(cct
, 10) << "put_cap_ref finishing pending cap_snap on " << *in
<< dendl
;
3133 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3134 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3135 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3137 if (last
& CEPH_CAP_FILE_BUFFER
) {
3138 for (auto &p
: in
->cap_snaps
)
3139 p
.second
.dirty_data
= 0;
3140 signal_cond_list(in
->waitfor_commit
);
3141 ldout(cct
, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3145 if (last
& CEPH_CAP_FILE_CACHE
) {
3146 ldout(cct
, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in
<< dendl
;
3152 put_inode(in
, put_nref
);
3156 int Client::get_caps(Inode
*in
, int need
, int want
, int *phave
, loff_t endoff
)
3158 int r
= check_pool_perm(in
, need
);
3163 int file_wanted
= in
->caps_file_wanted();
3164 if ((file_wanted
& need
) != need
) {
3165 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3166 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3172 int have
= in
->caps_issued(&implemented
);
3174 bool waitfor_caps
= false;
3175 bool waitfor_commit
= false;
3177 if (have
& need
& CEPH_CAP_FILE_WR
) {
3179 (endoff
>= (loff_t
)in
->max_size
||
3180 endoff
> (loff_t
)(in
->size
<< 1)) &&
3181 endoff
> (loff_t
)in
->wanted_max_size
) {
3182 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3183 in
->wanted_max_size
= endoff
;
3187 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3188 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3189 waitfor_caps
= true;
3191 if (!in
->cap_snaps
.empty()) {
3192 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3193 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3194 waitfor_caps
= true;
3196 for (auto &p
: in
->cap_snaps
) {
3197 if (p
.second
.dirty_data
) {
3198 waitfor_commit
= true;
3202 if (waitfor_commit
) {
3203 _flush(in
, new C_Client_FlushComplete(this, in
));
3204 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3209 if (!waitfor_caps
&& !waitfor_commit
) {
3210 if ((have
& need
) == need
) {
3211 int revoking
= implemented
& ~have
;
3212 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3213 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3214 << " revoking " << ccap_string(revoking
)
3216 if ((revoking
& want
) == 0) {
3217 *phave
= need
| (have
& want
);
3218 in
->get_cap_ref(need
);
3222 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3223 waitfor_caps
= true;
3226 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3227 in
->auth_cap
->session
->readonly
)
3230 if (in
->flags
& I_CAP_DROPPED
) {
3231 int mds_wanted
= in
->caps_mds_wanted();
3232 if ((mds_wanted
& need
) != need
) {
3233 int ret
= _renew_caps(in
);
3238 if ((mds_wanted
& file_wanted
) ==
3239 (file_wanted
& (CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_WR
))) {
3240 in
->flags
&= ~I_CAP_DROPPED
;
3245 wait_on_list(in
->waitfor_caps
);
3246 else if (waitfor_commit
)
3247 wait_on_list(in
->waitfor_commit
);
3251 int Client::get_caps_used(Inode
*in
)
3253 unsigned used
= in
->caps_used();
3254 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3255 !objectcacher
->set_is_empty(&in
->oset
))
3256 used
|= CEPH_CAP_FILE_CACHE
;
3260 void Client::cap_delay_requeue(Inode
*in
)
3262 ldout(cct
, 10) << "cap_delay_requeue on " << *in
<< dendl
;
3263 in
->hold_caps_until
= ceph_clock_now();
3264 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3265 delayed_list
.push_back(&in
->delay_cap_item
);
3268 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3269 bool sync
, int used
, int want
, int retain
,
3270 int flush
, ceph_tid_t flush_tid
)
3272 int held
= cap
->issued
| cap
->implemented
;
3273 int revoking
= cap
->implemented
& ~cap
->issued
;
3274 retain
&= ~revoking
;
3275 int dropping
= cap
->issued
& ~retain
;
3276 int op
= CEPH_CAP_OP_UPDATE
;
3278 ldout(cct
, 10) << "send_cap " << *in
3279 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3280 << (sync
? " sync " : " async ")
3281 << " used " << ccap_string(used
)
3282 << " want " << ccap_string(want
)
3283 << " flush " << ccap_string(flush
)
3284 << " retain " << ccap_string(retain
)
3285 << " held "<< ccap_string(held
)
3286 << " revoking " << ccap_string(revoking
)
3287 << " dropping " << ccap_string(dropping
)
3290 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3291 const int would_have_issued
= cap
->issued
& retain
;
3292 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3294 // - tell the server we think issued is whatever they issued plus whatever we implemented
3295 // - leave what we have implemented in place
3296 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3297 cap
->issued
= cap
->issued
| cap
->implemented
;
3299 // Make an exception for revoking xattr caps: we are injecting
3300 // failure to release other caps, but allow xattr because client
3301 // will block on xattr ops if it can't release these to MDS (#9800)
3302 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3303 cap
->issued
^= xattr_mask
& revoking
;
3304 cap
->implemented
^= xattr_mask
& revoking
;
3306 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3307 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3310 cap
->issued
&= retain
;
3311 cap
->implemented
&= cap
->issued
| used
;
3314 snapid_t follows
= 0;
3317 follows
= in
->snaprealm
->get_snap_context().seq
;
3319 MClientCaps
*m
= new MClientCaps(op
,
3322 cap
->cap_id
, cap
->seq
,
3328 m
->caller_uid
= in
->cap_dirtier_uid
;
3329 m
->caller_gid
= in
->cap_dirtier_gid
;
3331 m
->head
.issue_seq
= cap
->issue_seq
;
3332 m
->set_tid(flush_tid
);
3334 m
->head
.uid
= in
->uid
;
3335 m
->head
.gid
= in
->gid
;
3336 m
->head
.mode
= in
->mode
;
3338 m
->head
.nlink
= in
->nlink
;
3340 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3341 ::encode(in
->xattrs
, m
->xattrbl
);
3342 m
->head
.xattr_version
= in
->xattr_version
;
3346 m
->max_size
= in
->max_size
;
3347 m
->truncate_seq
= in
->truncate_seq
;
3348 m
->truncate_size
= in
->truncate_size
;
3349 m
->mtime
= in
->mtime
;
3350 m
->atime
= in
->atime
;
3351 m
->ctime
= in
->ctime
;
3352 m
->btime
= in
->btime
;
3353 m
->time_warp_seq
= in
->time_warp_seq
;
3354 m
->change_attr
= in
->change_attr
;
3356 m
->flags
|= CLIENT_CAPS_SYNC
;
3358 if (flush
& CEPH_CAP_FILE_WR
) {
3359 m
->inline_version
= in
->inline_version
;
3360 m
->inline_data
= in
->inline_data
;
3363 in
->reported_size
= in
->size
;
3364 m
->set_snap_follows(follows
);
3366 if (cap
== in
->auth_cap
) {
3367 m
->set_max_size(in
->wanted_max_size
);
3368 in
->requested_max_size
= in
->wanted_max_size
;
3369 ldout(cct
, 15) << "auth cap, setting max_size = " << in
->requested_max_size
<< dendl
;
3372 if (!session
->flushing_caps_tids
.empty())
3373 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3375 session
->con
->send_message(m
);
3378 static bool is_max_size_approaching(Inode
*in
)
3380 /* mds will adjust max size according to the reported size */
3381 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3383 if (in
->size
>= in
->max_size
)
3385 /* half of previous max_size increment has been used */
3386 if (in
->max_size
> in
->reported_size
&&
3387 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3395 * Examine currently used and wanted versus held caps. Release, flush or ack
3396 * revoked caps to the MDS as appropriate.
3398 * @param in the inode to check
3399 * @param flags flags to apply to cap check
3401 void Client::check_caps(Inode
*in
, unsigned flags
)
3403 unsigned wanted
= in
->caps_wanted();
3404 unsigned used
= get_caps_used(in
);
3407 if (in
->is_dir() && (in
->flags
& I_COMPLETE
)) {
3408 // we do this here because we don't want to drop to Fs (and then
3409 // drop the Fs if we do a create!) if that alone makes us send lookups
3410 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3411 wanted
|= CEPH_CAP_FILE_EXCL
;
3415 int issued
= in
->caps_issued(&implemented
);
3416 int revoking
= implemented
& ~issued
;
3418 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3421 retain
|= CEPH_CAP_ANY
;
3423 retain
|= CEPH_CAP_ANY_SHARED
;
3426 ldout(cct
, 10) << "check_caps on " << *in
3427 << " wanted " << ccap_string(wanted
)
3428 << " used " << ccap_string(used
)
3429 << " issued " << ccap_string(issued
)
3430 << " revoking " << ccap_string(revoking
)
3431 << " flags=" << flags
3434 if (in
->snapid
!= CEPH_NOSNAP
)
3435 return; //snap caps last forever, can't write
3437 if (in
->caps
.empty())
3438 return; // guard if at end of func
3440 if ((revoking
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) &&
3441 (used
& CEPH_CAP_FILE_CACHE
) && !(used
& CEPH_CAP_FILE_BUFFER
)) {
3443 used
&= ~CEPH_CAP_FILE_CACHE
;
3446 if (!in
->cap_snaps
.empty())
3449 if (flags
& CHECK_CAPS_NODELAY
)
3450 in
->hold_caps_until
= utime_t();
3452 cap_delay_requeue(in
);
3454 utime_t now
= ceph_clock_now();
3456 map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin();
3457 while (it
!= in
->caps
.end()) {
3458 mds_rank_t mds
= it
->first
;
3459 Cap
*cap
= it
->second
;
3462 MetaSession
*session
= mds_sessions
[mds
];
3466 if (in
->auth_cap
&& cap
!= in
->auth_cap
)
3467 cap_used
&= ~in
->auth_cap
->issued
;
3469 revoking
= cap
->implemented
& ~cap
->issued
;
3471 ldout(cct
, 10) << " cap mds." << mds
3472 << " issued " << ccap_string(cap
->issued
)
3473 << " implemented " << ccap_string(cap
->implemented
)
3474 << " revoking " << ccap_string(revoking
) << dendl
;
3476 if (in
->wanted_max_size
> in
->max_size
&&
3477 in
->wanted_max_size
> in
->requested_max_size
&&
3478 cap
== in
->auth_cap
)
3481 /* approaching file_max? */
3482 if ((cap
->issued
& CEPH_CAP_FILE_WR
) &&
3483 cap
== in
->auth_cap
&&
3484 is_max_size_approaching(in
)) {
3485 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3486 << ", reported " << in
->reported_size
<< dendl
;
3490 /* completed revocation? */
3491 if (revoking
&& (revoking
& cap_used
) == 0) {
3492 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
->implemented
& ~cap
->issued
) << dendl
;
3496 /* want more caps from mds? */
3497 if (wanted
& ~(cap
->wanted
| cap
->issued
))
3500 if (!revoking
&& unmounting
&& (cap_used
== 0))
3503 if (wanted
== cap
->wanted
&& // mds knows what we want.
3504 ((cap
->issued
& ~retain
) == 0) &&// and we don't have anything we wouldn't like
3505 !in
->dirty_caps
) // and we have no dirty caps
3508 if (now
< in
->hold_caps_until
) {
3509 ldout(cct
, 10) << "delaying cap release" << dendl
;
3514 // re-send old cap/snapcap flushes first.
3515 if (session
->mds_state
>= MDSMap::STATE_RECONNECT
&&
3516 session
->mds_state
< MDSMap::STATE_ACTIVE
&&
3517 session
->early_flushing_caps
.count(in
) == 0) {
3518 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3519 << " to mds." << session
->mds_num
<< dendl
;
3520 session
->early_flushing_caps
.insert(in
);
3521 if (in
->cap_snaps
.size())
3522 flush_snaps(in
, true);
3523 if (in
->flushing_caps
)
3524 flush_caps(in
, session
, flags
& CHECK_CAPS_SYNCHRONOUS
);
3528 ceph_tid_t flush_tid
;
3529 if (in
->auth_cap
== cap
&& in
->dirty_caps
) {
3530 flushing
= mark_caps_flushing(in
, &flush_tid
);
3536 send_cap(in
, session
, cap
, flags
& CHECK_CAPS_SYNCHRONOUS
, cap_used
, wanted
,
3537 retain
, flushing
, flush_tid
);
3542 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3544 int used
= get_caps_used(in
);
3545 int dirty
= in
->caps_dirty();
3546 ldout(cct
, 10) << "queue_cap_snap " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3548 if (in
->cap_snaps
.size() &&
3549 in
->cap_snaps
.rbegin()->second
.writing
) {
3550 ldout(cct
, 10) << "queue_cap_snap already have pending cap_snap on " << *in
<< dendl
;
3552 } else if (in
->caps_dirty() ||
3553 (used
& CEPH_CAP_FILE_WR
) ||
3554 (dirty
& CEPH_CAP_ANY_WR
)) {
3555 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3556 assert(capsnapem
.second
== true); /* element inserted */
3557 CapSnap
&capsnap
= capsnapem
.first
->second
;
3558 capsnap
.context
= old_snapc
;
3559 capsnap
.issued
= in
->caps_issued();
3560 capsnap
.dirty
= in
->caps_dirty();
3562 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3564 capsnap
.uid
= in
->uid
;
3565 capsnap
.gid
= in
->gid
;
3566 capsnap
.mode
= in
->mode
;
3567 capsnap
.btime
= in
->btime
;
3568 capsnap
.xattrs
= in
->xattrs
;
3569 capsnap
.xattr_version
= in
->xattr_version
;
3571 if (used
& CEPH_CAP_FILE_WR
) {
3572 ldout(cct
, 10) << "queue_cap_snap WR used on " << *in
<< dendl
;
3573 capsnap
.writing
= 1;
3575 finish_cap_snap(in
, capsnap
, used
);
3578 ldout(cct
, 10) << "queue_cap_snap not dirty|writing on " << *in
<< dendl
;
3582 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3584 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3585 capsnap
.size
= in
->size
;
3586 capsnap
.mtime
= in
->mtime
;
3587 capsnap
.atime
= in
->atime
;
3588 capsnap
.ctime
= in
->ctime
;
3589 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3590 capsnap
.change_attr
= in
->change_attr
;
3592 capsnap
.dirty
|= in
->caps_dirty();
3594 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3595 capsnap
.inline_data
= in
->inline_data
;
3596 capsnap
.inline_version
= in
->inline_version
;
3599 if (used
& CEPH_CAP_FILE_BUFFER
) {
3600 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3601 << " WRBUFFER, delaying" << dendl
;
3603 capsnap
.dirty_data
= 0;
3608 void Client::_flushed_cap_snap(Inode
*in
, snapid_t seq
)
3610 ldout(cct
, 10) << "_flushed_cap_snap seq " << seq
<< " on " << *in
<< dendl
;
3611 in
->cap_snaps
.at(seq
).dirty_data
= 0;
3615 void Client::flush_snaps(Inode
*in
, bool all_again
)
3617 ldout(cct
, 10) << "flush_snaps on " << *in
<< " all_again " << all_again
<< dendl
;
3618 assert(in
->cap_snaps
.size());
3621 assert(in
->auth_cap
);
3622 MetaSession
*session
= in
->auth_cap
->session
;
3623 int mseq
= in
->auth_cap
->mseq
;
3625 for (auto &p
: in
->cap_snaps
) {
3626 CapSnap
&capsnap
= p
.second
;
3628 // only flush once per session
3629 if (capsnap
.flush_tid
> 0)
3633 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3634 << " follows " << p
.first
3635 << " size " << capsnap
.size
3636 << " mtime " << capsnap
.mtime
3637 << " dirty_data=" << capsnap
.dirty_data
3638 << " writing=" << capsnap
.writing
3639 << " on " << *in
<< dendl
;
3640 if (capsnap
.dirty_data
|| capsnap
.writing
)
3643 if (capsnap
.flush_tid
== 0) {
3644 capsnap
.flush_tid
= ++last_flush_tid
;
3645 if (!in
->flushing_cap_item
.is_on_list())
3646 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3647 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3650 MClientCaps
*m
= new MClientCaps(CEPH_CAP_OP_FLUSHSNAP
, in
->ino
, in
->snaprealm
->ino
, 0, mseq
,
3653 m
->caller_uid
= user_id
;
3655 m
->caller_gid
= group_id
;
3657 m
->set_client_tid(capsnap
.flush_tid
);
3658 m
->head
.snap_follows
= p
.first
;
3660 m
->head
.caps
= capsnap
.issued
;
3661 m
->head
.dirty
= capsnap
.dirty
;
3663 m
->head
.uid
= capsnap
.uid
;
3664 m
->head
.gid
= capsnap
.gid
;
3665 m
->head
.mode
= capsnap
.mode
;
3666 m
->btime
= capsnap
.btime
;
3668 m
->size
= capsnap
.size
;
3670 m
->head
.xattr_version
= capsnap
.xattr_version
;
3671 ::encode(capsnap
.xattrs
, m
->xattrbl
);
3673 m
->ctime
= capsnap
.ctime
;
3674 m
->btime
= capsnap
.btime
;
3675 m
->mtime
= capsnap
.mtime
;
3676 m
->atime
= capsnap
.atime
;
3677 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3678 m
->change_attr
= capsnap
.change_attr
;
3680 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3681 m
->inline_version
= in
->inline_version
;
3682 m
->inline_data
= in
->inline_data
;
3685 assert(!session
->flushing_caps_tids
.empty());
3686 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3688 session
->con
->send_message(m
);
3694 void Client::wait_on_list(list
<Cond
*>& ls
)
3697 ls
.push_back(&cond
);
3698 cond
.Wait(client_lock
);
3702 void Client::signal_cond_list(list
<Cond
*>& ls
)
3704 for (list
<Cond
*>::iterator it
= ls
.begin(); it
!= ls
.end(); ++it
)
3708 void Client::wait_on_context_list(list
<Context
*>& ls
)
3713 ls
.push_back(new C_Cond(&cond
, &done
, &r
));
3715 cond
.Wait(client_lock
);
3718 void Client::signal_context_list(list
<Context
*>& ls
)
3720 while (!ls
.empty()) {
3721 ls
.front()->complete(0);
3726 void Client::wake_inode_waiters(MetaSession
*s
)
3728 xlist
<Cap
*>::iterator iter
= s
->caps
.begin();
3729 while (!iter
.end()){
3730 signal_cond_list((*iter
)->inode
->waitfor_caps
);
3736 // flush dirty data (from objectcache)
3738 class C_Client_CacheInvalidate
: public Context
{
3742 int64_t offset
, length
;
3744 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
3745 client(c
), offset(off
), length(len
) {
3746 if (client
->use_faked_inos())
3747 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
3751 void finish(int r
) override
{
3752 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
3753 assert(!client
->client_lock
.is_locked_by_me());
3754 client
->_async_invalidate(ino
, offset
, length
);
3758 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
3762 ldout(cct
, 10) << "_async_invalidate " << ino
<< " " << off
<< "~" << len
<< dendl
;
3763 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
3766 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
3768 if (ino_invalidate_cb
)
3769 // we queue the invalidate, which calls the callback and decrements the ref
3770 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
3773 void Client::_invalidate_inode_cache(Inode
*in
)
3775 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< dendl
;
3777 // invalidate our userspace inode cache
3778 if (cct
->_conf
->client_oc
) {
3779 objectcacher
->release_set(&in
->oset
);
3780 if (!objectcacher
->set_is_empty(&in
->oset
))
3781 lderr(cct
) << "failed to invalidate cache for " << *in
<< dendl
;
3784 _schedule_invalidate_callback(in
, 0, 0);
3787 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
3789 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< " " << off
<< "~" << len
<< dendl
;
3791 // invalidate our userspace inode cache
3792 if (cct
->_conf
->client_oc
) {
3793 vector
<ObjectExtent
> ls
;
3794 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
3795 objectcacher
->discard_writeback(&in
->oset
, ls
, nullptr);
3798 _schedule_invalidate_callback(in
, off
, len
);
3801 bool Client::_release(Inode
*in
)
3803 ldout(cct
, 20) << "_release " << *in
<< dendl
;
3804 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3805 _invalidate_inode_cache(in
);
3811 bool Client::_flush(Inode
*in
, Context
*onfinish
)
3813 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
3815 if (!in
->oset
.dirty_or_tx
) {
3816 ldout(cct
, 10) << " nothing to flush" << dendl
;
3817 onfinish
->complete(0);
3821 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
3822 ldout(cct
, 8) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
3823 objectcacher
->purge_set(&in
->oset
);
3825 onfinish
->complete(-ENOSPC
);
3830 return objectcacher
->flush_set(&in
->oset
, onfinish
);
3833 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
3835 assert(client_lock
.is_locked());
3836 if (!in
->oset
.dirty_or_tx
) {
3837 ldout(cct
, 10) << " nothing to flush" << dendl
;
3841 Mutex
flock("Client::_flush_range flock");
3844 Context
*onflush
= new C_SafeCond(&flock
, &cond
, &safe
);
3845 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
3846 offset
, size
, onflush
);
3849 client_lock
.Unlock();
3858 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
3860 // Mutex::Locker l(client_lock);
3861 assert(client_lock
.is_locked()); // will be called via dispatch() -> objecter -> ...
3862 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
3867 void Client::_flushed(Inode
*in
)
3869 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
3871 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
3876 // checks common to add_update_cap, handle_cap_grant
3877 void Client::check_cap_issue(Inode
*in
, Cap
*cap
, unsigned issued
)
3879 unsigned had
= in
->caps_issued();
3881 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
3882 !(had
& CEPH_CAP_FILE_CACHE
))
3885 if ((issued
& CEPH_CAP_FILE_SHARED
) &&
3886 !(had
& CEPH_CAP_FILE_SHARED
)) {
3890 clear_dir_complete_and_ordered(in
, true);
3894 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
3895 unsigned issued
, unsigned seq
, unsigned mseq
, inodeno_t realm
,
3896 int flags
, const UserPerm
& cap_perms
)
3899 mds_rank_t mds
= mds_session
->mds_num
;
3900 if (in
->caps
.count(mds
)) {
3901 cap
= in
->caps
[mds
];
3904 * auth mds of the inode changed. we received the cap export
3905 * message, but still haven't received the cap import message.
3906 * handle_cap_export() updated the new auth MDS' cap.
3908 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
3909 * a message that was send before the cap import message. So
3910 * don't remove caps.
3912 if (ceph_seq_cmp(seq
, cap
->seq
) <= 0) {
3913 assert(cap
== in
->auth_cap
);
3914 assert(cap
->cap_id
== cap_id
);
3917 issued
|= cap
->issued
;
3918 flags
|= CEPH_CAP_FLAG_AUTH
;
3921 mds_session
->num_caps
++;
3922 if (!in
->is_any_caps()) {
3923 assert(in
->snaprealm
== 0);
3924 in
->snaprealm
= get_snap_realm(realm
);
3925 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
3926 ldout(cct
, 15) << "add_update_cap first one, opened snaprealm " << in
->snaprealm
<< dendl
;
3928 in
->caps
[mds
] = cap
= new Cap
;
3930 mds_session
->caps
.push_back(&cap
->cap_item
);
3931 cap
->session
= mds_session
;
3933 cap
->gen
= mds_session
->cap_gen
;
3936 check_cap_issue(in
, cap
, issued
);
3938 if (flags
& CEPH_CAP_FLAG_AUTH
) {
3939 if (in
->auth_cap
!= cap
&&
3940 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
3941 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
3942 ldout(cct
, 10) << "add_update_cap changing auth cap: "
3943 << "add myself to new auth MDS' flushing caps list" << dendl
;
3944 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
3950 unsigned old_caps
= cap
->issued
;
3951 cap
->cap_id
= cap_id
;
3952 cap
->issued
= issued
;
3953 cap
->implemented
|= issued
;
3955 cap
->issue_seq
= seq
;
3957 cap
->gen
= mds_session
->cap_gen
;
3958 cap
->latest_perms
= cap_perms
;
3959 ldout(cct
, 10) << "add_update_cap issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
->issued
)
3960 << " from mds." << mds
3964 if ((issued
& ~old_caps
) && in
->auth_cap
== cap
) {
3965 // non-auth MDS is revoking the newly grant caps ?
3966 for (map
<mds_rank_t
,Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
3967 if (it
->second
== cap
)
3969 if (it
->second
->implemented
& ~it
->second
->issued
& issued
) {
3970 check_caps(in
, CHECK_CAPS_NODELAY
);
3976 if (issued
& ~old_caps
)
3977 signal_cond_list(in
->waitfor_caps
);
3980 void Client::remove_cap(Cap
*cap
, bool queue_release
)
3982 Inode
*in
= cap
->inode
;
3983 MetaSession
*session
= cap
->session
;
3984 mds_rank_t mds
= cap
->session
->mds_num
;
3986 ldout(cct
, 10) << "remove_cap mds." << mds
<< " on " << *in
<< dendl
;
3988 if (queue_release
) {
3989 session
->enqueue_cap_release(
3997 if (in
->auth_cap
== cap
) {
3998 if (in
->flushing_cap_item
.is_on_list()) {
3999 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
4000 in
->flushing_cap_item
.remove_myself();
4002 in
->auth_cap
= NULL
;
4004 assert(in
->caps
.count(mds
));
4005 in
->caps
.erase(mds
);
4007 cap
->cap_item
.remove_myself();
4011 if (!in
->is_any_caps()) {
4012 ldout(cct
, 15) << "remove_cap last one, closing snaprealm " << in
->snaprealm
<< dendl
;
4013 in
->snaprealm_item
.remove_myself();
4014 put_snap_realm(in
->snaprealm
);
4019 void Client::remove_all_caps(Inode
*in
)
4021 while (!in
->caps
.empty())
4022 remove_cap(in
->caps
.begin()->second
, true);
4025 void Client::remove_session_caps(MetaSession
*s
)
4027 ldout(cct
, 10) << "remove_session_caps mds." << s
->mds_num
<< dendl
;
4029 while (s
->caps
.size()) {
4030 Cap
*cap
= *s
->caps
.begin();
4031 Inode
*in
= cap
->inode
;
4032 bool dirty_caps
= false, cap_snaps
= false;
4033 if (in
->auth_cap
== cap
) {
4034 cap_snaps
= !in
->cap_snaps
.empty();
4035 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4036 in
->wanted_max_size
= 0;
4037 in
->requested_max_size
= 0;
4038 in
->flags
|= I_CAP_DROPPED
;
4040 remove_cap(cap
, false);
4041 signal_cond_list(in
->waitfor_caps
);
4043 InodeRef
tmp_ref(in
);
4044 in
->cap_snaps
.clear();
4047 lderr(cct
) << "remove_session_caps still has dirty|flushing caps on " << *in
<< dendl
;
4048 if (in
->flushing_caps
) {
4049 num_flushing_caps
--;
4050 in
->flushing_cap_tids
.clear();
4052 in
->flushing_caps
= 0;
4053 in
->mark_caps_clean();
4057 s
->flushing_caps_tids
.clear();
4061 int Client::_do_remount(bool retry_on_error
)
4063 uint64_t max_retries
= cct
->_conf
->get_val
<uint64_t>("mds_max_retries_on_remount_failure");
4066 int r
= remount_cb(callback_handle
);
4068 retries_on_invalidate
= 0;
4071 client_t whoami
= get_nodeid();
4074 "failed to remount (to trim kernel dentries): "
4075 "errno = " << e
<< " (" << strerror(e
) << ")" << dendl
;
4078 "failed to remount (to trim kernel dentries): "
4079 "return code = " << r
<< dendl
;
4082 (cct
->_conf
->get_val
<bool>("client_die_on_failed_remount") ||
4083 cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate")) &&
4084 !(retry_on_error
&& (++retries_on_invalidate
< max_retries
));
4085 if (should_abort
&& !unmounting
) {
4086 lderr(cct
) << "failed to remount for kernel dentry trimming; quitting!" << dendl
;
4093 class C_Client_Remount
: public Context
{
4097 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4098 void finish(int r
) override
{
4100 client
->_do_remount(true);
4104 void Client::_invalidate_kernel_dcache()
4108 if (can_invalidate_dentries
) {
4109 if (dentry_invalidate_cb
&& root
->dir
) {
4110 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4111 p
!= root
->dir
->dentries
.end();
4113 if (p
->second
->inode
)
4114 _schedule_invalidate_dentry_callback(p
->second
, false);
4117 } else if (remount_cb
) {
4119 // when remounting a file system, linux kernel trims all unused dentries in the fs
4120 remount_finisher
.queue(new C_Client_Remount(this));
4124 void Client::_trim_negative_child_dentries(InodeRef
& in
)
4130 if (dir
&& dir
->dentries
.size() == dir
->num_null_dentries
) {
4131 for (auto p
= dir
->dentries
.begin(); p
!= dir
->dentries
.end(); ) {
4132 Dentry
*dn
= p
->second
;
4135 if (dn
->lru_is_expireable())
4136 unlink(dn
, true, false); // keep dir, drop dentry
4138 if (dir
->dentries
.empty()) {
4143 if (in
->flags
& I_SNAPDIR_OPEN
) {
4144 InodeRef snapdir
= open_snapdir(in
.get());
4145 _trim_negative_child_dentries(snapdir
);
4149 void Client::trim_caps(MetaSession
*s
, uint64_t max
)
4151 mds_rank_t mds
= s
->mds_num
;
4152 size_t caps_size
= s
->caps
.size();
4153 ldout(cct
, 10) << "trim_caps mds." << mds
<< " max " << max
4154 << " caps " << caps_size
<< dendl
;
4156 uint64_t trimmed
= 0;
4157 auto p
= s
->caps
.begin();
4158 std::set
<Dentry
*> to_trim
; /* this avoids caps other than the one we're
4159 * looking at from getting deleted during traversal. */
4160 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4162 InodeRef
in(cap
->inode
);
4164 // Increment p early because it will be invalidated if cap
4165 // is deleted inside remove_cap
4168 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4169 int mine
= cap
->issued
| cap
->implemented
;
4170 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4171 // disposable non-auth cap
4172 if (!(get_caps_used(in
.get()) & ~oissued
& mine
)) {
4173 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4174 cap
= (remove_cap(cap
, true), nullptr);
4178 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4179 _trim_negative_child_dentries(in
);
4181 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4182 while (q
!= in
->dn_set
.end()) {
4184 if (dn
->lru_is_expireable()) {
4185 if (can_invalidate_dentries
&&
4186 dn
->dir
->parent_inode
->ino
== MDS_INO_ROOT
) {
4187 // Only issue one of these per DN for inodes in root: handle
4188 // others more efficiently by calling for root-child DNs at
4189 // the end of this function.
4190 _schedule_invalidate_dentry_callback(dn
, true);
4192 ldout(cct
, 20) << " queueing dentry for trimming: " << dn
->name
<< dendl
;
4195 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4199 if (all
&& in
->ino
!= MDS_INO_ROOT
) {
4200 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4205 ldout(cct
, 20) << " trimming queued dentries: " << dendl
;
4206 for (const auto &dn
: to_trim
) {
4211 caps_size
= s
->caps
.size();
4212 if (caps_size
> max
)
4213 _invalidate_kernel_dcache();
4216 void Client::force_session_readonly(MetaSession
*s
)
4219 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4220 Inode
*in
= (*p
)->inode
;
4221 if (in
->caps_wanted() & CEPH_CAP_FILE_WR
)
4222 signal_cond_list(in
->waitfor_caps
);
4226 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4228 MetaSession
*session
= in
->auth_cap
->session
;
4230 int flushing
= in
->dirty_caps
;
4233 ceph_tid_t flush_tid
= ++last_flush_tid
;
4234 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4236 if (!in
->flushing_caps
) {
4237 ldout(cct
, 10) << "mark_caps_flushing " << ccap_string(flushing
) << " " << *in
<< dendl
;
4238 num_flushing_caps
++;
4240 ldout(cct
, 10) << "mark_caps_flushing (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4243 in
->flushing_caps
|= flushing
;
4244 in
->mark_caps_clean();
4246 if (!in
->flushing_cap_item
.is_on_list())
4247 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4248 session
->flushing_caps_tids
.insert(flush_tid
);
4254 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4256 for (auto &p
: in
->cap_snaps
) {
4257 CapSnap
&capsnap
= p
.second
;
4258 if (capsnap
.flush_tid
> 0) {
4259 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4260 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4263 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4264 it
!= in
->flushing_cap_tids
.end();
4266 old_s
->flushing_caps_tids
.erase(it
->first
);
4267 new_s
->flushing_caps_tids
.insert(it
->first
);
4269 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4273 * Flush all caps back to the MDS. Because the callers generally wait on the
4274 * result of this function (syncfs and umount cases), we set
4275 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4277 void Client::flush_caps_sync()
4279 ldout(cct
, 10) << __func__
<< dendl
;
4280 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
4282 unsigned flags
= CHECK_CAPS_NODELAY
;
4286 delayed_list
.pop_front();
4287 if (p
.end() && dirty_list
.empty())
4288 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4289 check_caps(in
, flags
);
4293 p
= dirty_list
.begin();
4295 unsigned flags
= CHECK_CAPS_NODELAY
;
4300 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4301 check_caps(in
, flags
);
4305 void Client::flush_caps(Inode
*in
, MetaSession
*session
, bool sync
)
4307 ldout(cct
, 10) << "flush_caps " << in
<< " mds." << session
->mds_num
<< dendl
;
4308 Cap
*cap
= in
->auth_cap
;
4309 assert(cap
->session
== session
);
4311 for (map
<ceph_tid_t
,int>::iterator p
= in
->flushing_cap_tids
.begin();
4312 p
!= in
->flushing_cap_tids
.end();
4314 bool req_sync
= false;
4316 /* If this is a synchronous request, then flush the journal on last one */
4317 if (sync
&& (p
->first
== in
->flushing_cap_tids
.rbegin()->first
))
4320 send_cap(in
, session
, cap
, req_sync
,
4321 (get_caps_used(in
) | in
->caps_dirty()),
4322 in
->caps_wanted(), (cap
->issued
| cap
->implemented
),
4323 p
->second
, p
->first
);
4327 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4329 while (in
->flushing_caps
) {
4330 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4331 assert(it
!= in
->flushing_cap_tids
.end());
4332 if (it
->first
> want
)
4334 ldout(cct
, 10) << "wait_sync_caps on " << *in
<< " flushing "
4335 << ccap_string(it
->second
) << " want " << want
4336 << " last " << it
->first
<< dendl
;
4337 wait_on_list(in
->waitfor_caps
);
4341 void Client::wait_sync_caps(ceph_tid_t want
)
4344 ldout(cct
, 10) << "wait_sync_caps want " << want
<< " (last is " << last_flush_tid
<< ", "
4345 << num_flushing_caps
<< " total flushing)" << dendl
;
4346 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
4347 p
!= mds_sessions
.end();
4349 MetaSession
*s
= p
->second
;
4350 if (s
->flushing_caps_tids
.empty())
4352 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4353 if (oldest_tid
<= want
) {
4354 ldout(cct
, 10) << " waiting on mds." << p
->first
<< " tid " << oldest_tid
4355 << " (want " << want
<< ")" << dendl
;
4356 sync_cond
.Wait(client_lock
);
4362 void Client::kick_flushing_caps(MetaSession
*session
)
4364 mds_rank_t mds
= session
->mds_num
;
4365 ldout(cct
, 10) << "kick_flushing_caps mds." << mds
<< dendl
;
4367 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4369 if (session
->early_flushing_caps
.count(in
))
4371 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4372 if (in
->cap_snaps
.size())
4373 flush_snaps(in
, true);
4374 if (in
->flushing_caps
)
4375 flush_caps(in
, session
);
4378 session
->early_flushing_caps
.clear();
4381 void Client::early_kick_flushing_caps(MetaSession
*session
)
4383 session
->early_flushing_caps
.clear();
4385 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4387 assert(in
->auth_cap
);
4389 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4390 // stage. This guarantees that MDS processes the cap flush message before issuing
4391 // the flushing caps to other client.
4392 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
)
4395 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4396 << " to mds." << session
->mds_num
<< dendl
;
4398 session
->early_flushing_caps
.insert(in
);
4400 if (in
->cap_snaps
.size())
4401 flush_snaps(in
, true);
4402 if (in
->flushing_caps
)
4403 flush_caps(in
, session
);
4408 void Client::kick_maxsize_requests(MetaSession
*session
)
4410 xlist
<Cap
*>::iterator iter
= session
->caps
.begin();
4411 while (!iter
.end()){
4412 (*iter
)->inode
->requested_max_size
= 0;
4413 (*iter
)->inode
->wanted_max_size
= 0;
4414 signal_cond_list((*iter
)->inode
->waitfor_caps
);
4419 void SnapRealm::build_snap_context()
4421 set
<snapid_t
> snaps
;
4422 snapid_t max_seq
= seq
;
4424 // start with prior_parents?
4425 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4426 snaps
.insert(prior_parent_snaps
[i
]);
4428 // current parent's snaps
4430 const SnapContext
& psnapc
= pparent
->get_snap_context();
4431 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4432 if (psnapc
.snaps
[i
] >= parent_since
)
4433 snaps
.insert(psnapc
.snaps
[i
]);
4434 if (psnapc
.seq
> max_seq
)
4435 max_seq
= psnapc
.seq
;
4439 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4440 snaps
.insert(my_snaps
[i
]);
4443 cached_snap_context
.seq
= max_seq
;
4444 cached_snap_context
.snaps
.resize(0);
4445 cached_snap_context
.snaps
.reserve(snaps
.size());
4446 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4447 cached_snap_context
.snaps
.push_back(*p
);
4450 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4455 while (!q
.empty()) {
4459 ldout(cct
, 10) << "invalidate_snaprealm_and_children " << *realm
<< dendl
;
4460 realm
->invalidate_cache();
4462 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4463 p
!= realm
->pchildren
.end();
4469 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4471 SnapRealm
*realm
= snap_realms
[r
];
4473 snap_realms
[r
] = realm
= new SnapRealm(r
);
4474 ldout(cct
, 20) << "get_snap_realm " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4479 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4481 if (snap_realms
.count(r
) == 0) {
4482 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " fail" << dendl
;
4485 SnapRealm
*realm
= snap_realms
[r
];
4486 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4491 void Client::put_snap_realm(SnapRealm
*realm
)
4493 ldout(cct
, 20) << "put_snap_realm " << realm
->ino
<< " " << realm
4494 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4495 if (--realm
->nref
== 0) {
4496 snap_realms
.erase(realm
->ino
);
4497 if (realm
->pparent
) {
4498 realm
->pparent
->pchildren
.erase(realm
);
4499 put_snap_realm(realm
->pparent
);
4505 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4507 if (realm
->parent
!= parent
) {
4508 ldout(cct
, 10) << "adjust_realm_parent " << *realm
4509 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4510 realm
->parent
= parent
;
4511 if (realm
->pparent
) {
4512 realm
->pparent
->pchildren
.erase(realm
);
4513 put_snap_realm(realm
->pparent
);
4515 realm
->pparent
= get_snap_realm(parent
);
4516 realm
->pparent
->pchildren
.insert(realm
);
4522 static bool has_new_snaps(const SnapContext
& old_snapc
,
4523 const SnapContext
& new_snapc
)
4525 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4529 void Client::update_snap_trace(bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4531 SnapRealm
*first_realm
= NULL
;
4532 ldout(cct
, 10) << "update_snap_trace len " << bl
.length() << dendl
;
4534 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4536 bufferlist::iterator p
= bl
.begin();
4540 SnapRealm
*realm
= get_snap_realm(info
.ino());
4542 bool invalidate
= false;
4544 if (info
.seq() > realm
->seq
) {
4545 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4549 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4550 // flush me + children
4553 while (!q
.empty()) {
4554 SnapRealm
*realm
= q
.front();
4557 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4558 p
!= realm
->pchildren
.end();
4562 if (dirty_realms
.count(realm
) == 0) {
4564 dirty_realms
[realm
] = realm
->get_snap_context();
4570 realm
->seq
= info
.seq();
4571 realm
->created
= info
.created();
4572 realm
->parent_since
= info
.parent_since();
4573 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4574 realm
->my_snaps
= info
.my_snaps
;
4578 // _always_ verify parent
4579 if (adjust_realm_parent(realm
, info
.parent()))
4583 invalidate_snaprealm_and_children(realm
);
4584 ldout(cct
, 15) << "update_snap_trace " << *realm
<< " self|parent updated" << dendl
;
4585 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4587 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq()
4588 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4592 first_realm
= realm
;
4594 put_snap_realm(realm
);
4597 for (map
<SnapRealm
*, SnapContext
>::iterator q
= dirty_realms
.begin();
4598 q
!= dirty_realms
.end();
4600 SnapRealm
*realm
= q
->first
;
4601 // if there are new snaps ?
4602 if (has_new_snaps(q
->second
, realm
->get_snap_context())) {
4603 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4604 xlist
<Inode
*>::iterator r
= realm
->inodes_with_caps
.begin();
4608 queue_cap_snap(in
, q
->second
);
4611 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4613 put_snap_realm(realm
);
4617 *realm_ret
= first_realm
;
4619 put_snap_realm(first_realm
);
4622 void Client::handle_snap(MClientSnap
*m
)
4624 ldout(cct
, 10) << "handle_snap " << *m
<< dendl
;
4625 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4626 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4632 got_mds_push(session
);
4634 map
<Inode
*, SnapContext
> to_move
;
4635 SnapRealm
*realm
= 0;
4637 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
4638 assert(m
->head
.split
);
4640 bufferlist::iterator p
= m
->bl
.begin();
4642 assert(info
.ino() == m
->head
.split
);
4644 // flush, then move, ino's.
4645 realm
= get_snap_realm(info
.ino());
4646 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
4647 for (vector
<inodeno_t
>::iterator p
= m
->split_inos
.begin();
4648 p
!= m
->split_inos
.end();
4650 vinodeno_t
vino(*p
, CEPH_NOSNAP
);
4651 if (inode_map
.count(vino
)) {
4652 Inode
*in
= inode_map
[vino
];
4653 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
4655 if (in
->snaprealm
->created
> info
.created()) {
4656 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
4657 << *in
->snaprealm
<< dendl
;
4660 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
4663 in
->snaprealm_item
.remove_myself();
4664 to_move
[in
] = in
->snaprealm
->get_snap_context();
4665 put_snap_realm(in
->snaprealm
);
4669 // move child snaprealms, too
4670 for (vector
<inodeno_t
>::iterator p
= m
->split_realms
.begin();
4671 p
!= m
->split_realms
.end();
4673 ldout(cct
, 10) << "adjusting snaprealm " << *p
<< " parent" << dendl
;
4674 SnapRealm
*child
= get_snap_realm_maybe(*p
);
4677 adjust_realm_parent(child
, realm
->ino
);
4678 put_snap_realm(child
);
4682 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
4685 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
4686 Inode
*in
= p
->first
;
4687 in
->snaprealm
= realm
;
4688 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4690 // queue for snap writeback
4691 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
4692 queue_cap_snap(in
, p
->second
);
4694 put_snap_realm(realm
);
4700 void Client::handle_quota(MClientQuota
*m
)
4702 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4703 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4709 got_mds_push(session
);
4711 ldout(cct
, 10) << "handle_quota " << *m
<< " from mds." << mds
<< dendl
;
4713 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
4714 if (inode_map
.count(vino
)) {
4716 in
= inode_map
[vino
];
4719 in
->quota
= m
->quota
;
4720 in
->rstat
= m
->rstat
;
4727 void Client::handle_caps(MClientCaps
*m
)
4729 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4730 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4736 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
4737 // Pause RADOS operations until we see the required epoch
4738 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
4741 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
4742 // Record the barrier so that we will transmit it to MDS when releasing
4743 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
4746 got_mds_push(session
);
4748 m
->clear_payload(); // for if/when we send back to MDS
4751 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
4752 if (inode_map
.count(vino
))
4753 in
= inode_map
[vino
];
4755 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
4756 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
4757 session
->enqueue_cap_release(
4764 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< ", dropping" << dendl
;
4768 // in case the mds is waiting on e.g. a revocation
4769 flush_cap_releases();
4773 switch (m
->get_op()) {
4774 case CEPH_CAP_OP_EXPORT
:
4775 return handle_cap_export(session
, in
, m
);
4776 case CEPH_CAP_OP_FLUSHSNAP_ACK
:
4777 return handle_cap_flushsnap_ack(session
, in
, m
);
4778 case CEPH_CAP_OP_IMPORT
:
4779 handle_cap_import(session
, in
, m
);
4782 if (in
->caps
.count(mds
) == 0) {
4783 ldout(cct
, 5) << "handle_caps don't have " << *in
<< " cap on mds." << mds
<< dendl
;
4788 Cap
*cap
= in
->caps
[mds
];
4790 switch (m
->get_op()) {
4791 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
4792 case CEPH_CAP_OP_IMPORT
:
4793 case CEPH_CAP_OP_REVOKE
:
4794 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, cap
, m
);
4795 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, cap
, m
);
4801 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4803 mds_rank_t mds
= session
->mds_num
;
4805 ldout(cct
, 5) << "handle_cap_import ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4806 << " IMPORT from mds." << mds
<< dendl
;
4808 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4811 if (m
->peer
.cap_id
&& in
->caps
.count(peer_mds
)) {
4812 cap
= in
->caps
[peer_mds
];
4814 cap_perms
= cap
->latest_perms
;
4819 SnapRealm
*realm
= NULL
;
4820 update_snap_trace(m
->snapbl
, &realm
);
4822 add_update_cap(in
, session
, m
->get_cap_id(),
4823 m
->get_caps(), m
->get_seq(), m
->get_mseq(), m
->get_realm(),
4824 CEPH_CAP_FLAG_AUTH
, cap_perms
);
4826 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
4827 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
4831 put_snap_realm(realm
);
4833 if (in
->auth_cap
&& in
->auth_cap
->session
->mds_num
== mds
) {
4834 // reflush any/all caps (if we are now the auth_cap)
4835 if (in
->cap_snaps
.size())
4836 flush_snaps(in
, true);
4837 if (in
->flushing_caps
)
4838 flush_caps(in
, session
);
4842 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4844 mds_rank_t mds
= session
->mds_num
;
4846 ldout(cct
, 5) << "handle_cap_export ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4847 << " EXPORT from mds." << mds
<< dendl
;
4850 if (in
->caps
.count(mds
))
4851 cap
= in
->caps
[mds
];
4853 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4855 if (cap
&& cap
->cap_id
== m
->get_cap_id()) {
4856 if (m
->peer
.cap_id
) {
4857 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
4858 if (in
->caps
.count(peer_mds
)) {
4859 Cap
*tcap
= in
->caps
[peer_mds
];
4860 if (tcap
->cap_id
== m
->peer
.cap_id
&&
4861 ceph_seq_cmp(tcap
->seq
, m
->peer
.seq
) < 0) {
4862 tcap
->cap_id
= m
->peer
.cap_id
;
4863 tcap
->seq
= m
->peer
.seq
- 1;
4864 tcap
->issue_seq
= tcap
->seq
;
4865 tcap
->mseq
= m
->peer
.mseq
;
4866 tcap
->issued
|= cap
->issued
;
4867 tcap
->implemented
|= cap
->issued
;
4868 if (cap
== in
->auth_cap
)
4869 in
->auth_cap
= tcap
;
4870 if (in
->auth_cap
== tcap
&& in
->flushing_cap_item
.is_on_list())
4871 adjust_session_flushing_caps(in
, session
, tsession
);
4874 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
->issued
,
4875 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
4876 cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
4880 if (cap
== in
->auth_cap
)
4881 in
->flags
|= I_CAP_DROPPED
;
4884 remove_cap(cap
, false);
4890 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4892 mds_rank_t mds
= session
->mds_num
;
4893 assert(in
->caps
[mds
]);
4895 ldout(cct
, 10) << "handle_cap_trunc on ino " << *in
4896 << " size " << in
->size
<< " -> " << m
->get_size()
4900 in
->caps_issued(&issued
);
4901 issued
|= in
->caps_dirty();
4902 update_inode_file_size(in
, issued
, m
->get_size(),
4903 m
->get_truncate_seq(), m
->get_truncate_size());
4907 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
4909 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
4910 int dirty
= m
->get_dirty();
4914 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4915 it
!= in
->flushing_cap_tids
.end(); ) {
4916 if (it
->first
== flush_ack_tid
)
4917 cleaned
= it
->second
;
4918 if (it
->first
<= flush_ack_tid
) {
4919 session
->flushing_caps_tids
.erase(it
->first
);
4920 in
->flushing_cap_tids
.erase(it
++);
4924 cleaned
&= ~it
->second
;
4930 ldout(cct
, 5) << "handle_cap_flush_ack mds." << session
->mds_num
4931 << " cleaned " << ccap_string(cleaned
) << " on " << *in
4932 << " with " << ccap_string(dirty
) << dendl
;
4935 signal_cond_list(in
->waitfor_caps
);
4936 if (session
->flushing_caps_tids
.empty() ||
4937 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
4942 in
->cap_dirtier_uid
= -1;
4943 in
->cap_dirtier_gid
= -1;
4947 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
4949 if (in
->flushing_caps
) {
4950 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
4951 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
4952 in
->flushing_caps
&= ~cleaned
;
4953 if (in
->flushing_caps
== 0) {
4954 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
4955 num_flushing_caps
--;
4956 if (in
->cap_snaps
.empty())
4957 in
->flushing_cap_item
.remove_myself();
4959 if (!in
->caps_dirty())
4968 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4970 mds_rank_t mds
= session
->mds_num
;
4971 assert(in
->caps
[mds
]);
4972 snapid_t follows
= m
->get_snap_follows();
4974 if (in
->cap_snaps
.count(follows
)) {
4975 CapSnap
&capsnap
= in
->cap_snaps
.at(follows
);
4976 if (m
->get_client_tid() != capsnap
.flush_tid
) {
4977 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != " << capsnap
.flush_tid
<< dendl
;
4979 ldout(cct
, 5) << "handle_cap_flushedsnap mds." << mds
<< " flushed snap follows " << follows
4980 << " on " << *in
<< dendl
;
4982 if (in
->get_num_ref() == 1)
4983 tmp_ref
= in
; // make sure inode not get freed while erasing item from in->cap_snaps
4984 if (in
->flushing_caps
== 0 && in
->cap_snaps
.empty())
4985 in
->flushing_cap_item
.remove_myself();
4986 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4987 in
->cap_snaps
.erase(follows
);
4990 ldout(cct
, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds
<< " flushed snap follows " << follows
4991 << " on " << *in
<< dendl
;
4992 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
4998 class C_Client_DentryInvalidate
: public Context
{
5005 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
5006 client(c
), name(dn
->name
) {
5007 if (client
->use_faked_inos()) {
5008 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
5010 ino
.ino
= dn
->inode
->faked_ino
;
5012 dirino
= dn
->dir
->parent_inode
->vino();
5014 ino
= dn
->inode
->vino();
5017 ino
.ino
= inodeno_t();
5019 void finish(int r
) override
{
5020 // _async_dentry_invalidate is responsible for its own locking
5021 assert(!client
->client_lock
.is_locked_by_me());
5022 client
->_async_dentry_invalidate(dirino
, ino
, name
);
5026 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
5030 ldout(cct
, 10) << "_async_dentry_invalidate '" << name
<< "' ino " << ino
5031 << " in dir " << dirino
<< dendl
;
5032 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
);
5035 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
5037 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
5038 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
5041 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
5043 int ref
= in
->get_num_ref();
5045 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
5046 for (auto p
= in
->dir
->dentries
.begin();
5047 p
!= in
->dir
->dentries
.end(); ) {
5048 Dentry
*dn
= p
->second
;
5050 /* rmsnap removes whole subtree, need trim inodes recursively.
5051 * we don't need to invalidate dentries recursively. because
5052 * invalidating a directory dentry effectively invalidate
5054 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
5055 _try_to_trim_inode(dn
->inode
.get(), false);
5057 if (dn
->lru_is_expireable())
5058 unlink(dn
, true, false); // keep dir, drop dentry
5060 if (in
->dir
->dentries
.empty()) {
5066 if (ref
> 0 && (in
->flags
& I_SNAPDIR_OPEN
)) {
5067 InodeRef snapdir
= open_snapdir(in
);
5068 _try_to_trim_inode(snapdir
.get(), false);
5072 if (ref
> 0 && in
->ll_ref
> 0 && sched_inval
) {
5073 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
5074 while (q
!= in
->dn_set
.end()) {
5076 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
5077 // so in->dn_set doesn't always reflect the state of kernel's dcache.
5078 _schedule_invalidate_dentry_callback(dn
, true);
5079 unlink(dn
, true, true);
5084 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
5086 mds_rank_t mds
= session
->mds_num
;
5087 int used
= get_caps_used(in
);
5088 int wanted
= in
->caps_wanted();
5090 const int old_caps
= cap
->issued
;
5091 const int new_caps
= m
->get_caps();
5092 ldout(cct
, 5) << "handle_cap_grant on in " << m
->get_ino()
5093 << " mds." << mds
<< " seq " << m
->get_seq()
5094 << " caps now " << ccap_string(new_caps
)
5095 << " was " << ccap_string(old_caps
) << dendl
;
5096 cap
->seq
= m
->get_seq();
5097 cap
->gen
= session
->cap_gen
;
5101 in
->caps_issued(&issued
);
5102 issued
|= in
->caps_dirty();
5104 if ((new_caps
& CEPH_CAP_AUTH_SHARED
) &&
5105 !(issued
& CEPH_CAP_AUTH_EXCL
)) {
5106 in
->mode
= m
->head
.mode
;
5107 in
->uid
= m
->head
.uid
;
5108 in
->gid
= m
->head
.gid
;
5109 in
->btime
= m
->btime
;
5111 bool deleted_inode
= false;
5112 if ((new_caps
& CEPH_CAP_LINK_SHARED
) &&
5113 !(issued
& CEPH_CAP_LINK_EXCL
)) {
5114 in
->nlink
= m
->head
.nlink
;
5115 if (in
->nlink
== 0 &&
5116 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5117 deleted_inode
= true;
5119 if (!(issued
& CEPH_CAP_XATTR_EXCL
) &&
5120 m
->xattrbl
.length() &&
5121 m
->head
.xattr_version
> in
->xattr_version
) {
5122 bufferlist::iterator p
= m
->xattrbl
.begin();
5123 ::decode(in
->xattrs
, p
);
5124 in
->xattr_version
= m
->head
.xattr_version
;
5127 if ((new_caps
& CEPH_CAP_FILE_SHARED
) && m
->dirstat_is_valid()) {
5128 in
->dirstat
.nfiles
= m
->get_nfiles();
5129 in
->dirstat
.nsubdirs
= m
->get_nsubdirs();
5132 if (new_caps
& CEPH_CAP_ANY_RD
) {
5133 update_inode_file_time(in
, issued
, m
->get_time_warp_seq(),
5134 m
->get_ctime(), m
->get_mtime(), m
->get_atime());
5137 if (new_caps
& (CEPH_CAP_ANY_FILE_RD
| CEPH_CAP_ANY_FILE_WR
)) {
5138 in
->layout
= m
->get_layout();
5139 update_inode_file_size(in
, issued
, m
->get_size(),
5140 m
->get_truncate_seq(), m
->get_truncate_size());
5143 if (m
->inline_version
> in
->inline_version
) {
5144 in
->inline_data
= m
->inline_data
;
5145 in
->inline_version
= m
->inline_version
;
5148 /* always take a newer change attr */
5149 if (m
->get_change_attr() > in
->change_attr
)
5150 in
->change_attr
= m
->get_change_attr();
5153 if (cap
== in
->auth_cap
&&
5154 (new_caps
& CEPH_CAP_ANY_FILE_WR
) &&
5155 (m
->get_max_size() != in
->max_size
)) {
5156 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5157 in
->max_size
= m
->get_max_size();
5158 if (in
->max_size
> in
->wanted_max_size
) {
5159 in
->wanted_max_size
= 0;
5160 in
->requested_max_size
= 0;
5165 if (m
->get_op() == CEPH_CAP_OP_IMPORT
&& m
->get_wanted() != wanted
)
5168 check_cap_issue(in
, cap
, new_caps
);
5171 int revoked
= old_caps
& ~new_caps
;
5173 ldout(cct
, 10) << " revocation of " << ccap_string(revoked
) << dendl
;
5174 cap
->issued
= new_caps
;
5175 cap
->implemented
|= new_caps
;
5177 // recall delegations if we're losing caps necessary for them
5178 if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_RD
))
5179 in
->recall_deleg(false);
5180 else if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_WR
))
5181 in
->recall_deleg(true);
5183 if ((used
& revoked
& CEPH_CAP_FILE_BUFFER
) &&
5184 !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5185 // waitin' for flush
5186 } else if (revoked
& CEPH_CAP_FILE_CACHE
) {
5190 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5193 } else if (old_caps
== new_caps
) {
5194 ldout(cct
, 10) << " caps unchanged at " << ccap_string(old_caps
) << dendl
;
5196 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~old_caps
) << dendl
;
5197 cap
->issued
= new_caps
;
5198 cap
->implemented
|= new_caps
;
5200 if (cap
== in
->auth_cap
) {
5201 // non-auth MDS is revoking the newly grant caps ?
5202 for (map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
5203 if (it
->second
== cap
)
5205 if (it
->second
->implemented
& ~it
->second
->issued
& new_caps
) {
5218 signal_cond_list(in
->waitfor_caps
);
5220 // may drop inode's last ref
5222 _try_to_trim_inode(in
, true);
5227 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5229 if (perms
.uid() == 0)
5232 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5233 int ret
= _posix_acl_permission(in
, perms
, want
);
5238 // check permissions before doing anything else
5239 if (!in
->check_mode(perms
, want
))
5244 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5245 const UserPerm
& perms
)
5247 int r
= _getattr_for_perm(in
, perms
);
5252 if (strncmp(name
, "system.", 7) == 0) {
5253 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5256 r
= inode_permission(in
, perms
, want
);
5259 ldout(cct
, 5) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5263 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5264 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5268 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5269 const UserPerm
& perms
)
5271 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5272 int r
= _getattr_for_perm(in
, perms
);
5276 if (mask
& CEPH_SETATTR_SIZE
) {
5277 r
= inode_permission(in
, perms
, MAY_WRITE
);
5283 if (mask
& CEPH_SETATTR_UID
) {
5284 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5287 if (mask
& CEPH_SETATTR_GID
) {
5288 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5289 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5293 if (mask
& CEPH_SETATTR_MODE
) {
5294 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5297 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5298 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5299 stx
->stx_mode
&= ~S_ISGID
;
5302 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5303 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5304 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5305 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5306 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5307 check_mask
|= CEPH_SETATTR_MTIME
;
5308 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5309 check_mask
|= CEPH_SETATTR_ATIME
;
5310 if (check_mask
& mask
) {
5313 r
= inode_permission(in
, perms
, MAY_WRITE
);
5321 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5325 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5327 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5330 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5332 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5333 want
= MAY_READ
| MAY_WRITE
;
5334 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5336 if (flags
& O_TRUNC
)
5340 switch (in
->mode
& S_IFMT
) {
5345 if (want
& MAY_WRITE
) {
5352 r
= _getattr_for_perm(in
, perms
);
5356 r
= inode_permission(in
, perms
, want
);
5358 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5362 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5364 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5365 int r
= _getattr_for_perm(dir
, perms
);
5369 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5371 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5375 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5377 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5378 int r
= _getattr_for_perm(dir
, perms
);
5382 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5384 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5388 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5390 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5391 int r
= _getattr_for_perm(dir
, perms
);
5395 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5399 /* 'name == NULL' means rmsnap */
5400 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5402 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5405 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5409 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5413 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5415 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5416 int r
= _getattr_for_perm(in
, perms
);
5420 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5426 if (!S_ISREG(in
->mode
))
5429 if (in
->mode
& S_ISUID
)
5432 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5435 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5437 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5441 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5443 int mask
= CEPH_STAT_CAP_MODE
;
5445 if (acl_type
!= NO_ACL
) {
5446 mask
|= CEPH_STAT_CAP_XATTR
;
5447 force
= in
->xattr_version
== 0;
5449 return _getattr(in
, mask
, perms
, force
);
5452 vinodeno_t
Client::_get_vino(Inode
*in
)
5454 /* The caller must hold the client lock */
5455 return vinodeno_t(in
->ino
, in
->snapid
);
5458 inodeno_t
Client::_get_inodeno(Inode
*in
)
5460 /* The caller must hold the client lock */
5466 * Resolve an MDS spec to a list of MDS daemon GIDs.
5468 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5469 * It may be '*' in which case it matches all GIDs.
5471 * If no error is returned, the `targets` vector will be populated with at least
5474 int Client::resolve_mds(
5475 const std::string
&mds_spec
,
5476 std::vector
<mds_gid_t
> *targets
)
5479 assert(targets
!= nullptr);
5482 std::stringstream ss
;
5483 int role_r
= fsmap
->parse_role(mds_spec
, &role
, ss
);
5485 // We got a role, resolve it to a GID
5486 ldout(cct
, 10) << __func__
<< ": resolved '" << mds_spec
<< "' to role '"
5487 << role
<< "'" << dendl
;
5489 fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
).global_id
);
5493 std::string strtol_err
;
5494 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5495 if (strtol_err
.empty()) {
5496 // It is a possible GID
5497 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5498 if (fsmap
->gid_exists(mds_gid
)) {
5499 ldout(cct
, 10) << __func__
<< ": validated GID " << mds_gid
<< dendl
;
5500 targets
->push_back(mds_gid
);
5502 lderr(cct
) << __func__
<< ": GID " << mds_gid
<< " not in MDS map"
5506 } else if (mds_spec
== "*") {
5507 // It is a wildcard: use all MDSs
5508 const auto mds_info
= fsmap
->get_mds_info();
5510 if (mds_info
.empty()) {
5511 lderr(cct
) << __func__
<< ": * passed but no MDS daemons found" << dendl
;
5515 for (const auto i
: mds_info
) {
5516 targets
->push_back(i
.first
);
5519 // It did not parse as an integer, it is not a wildcard, it must be a name
5520 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5522 lderr(cct
) << "MDS ID '" << mds_spec
<< "' not found" << dendl
;
5524 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5528 ldout(cct
, 10) << __func__
<< ": resolved ID '" << mds_spec
5529 << "' to GID " << mds_gid
<< dendl
;
5530 targets
->push_back(mds_gid
);
5539 * Authenticate with mon and establish global ID
5541 int Client::authenticate()
5543 assert(client_lock
.is_locked_by_me());
5545 if (monclient
->is_authenticated()) {
5549 client_lock
.Unlock();
5550 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5556 whoami
= monclient
->get_global_id();
5557 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5562 int Client::fetch_fsmap(bool user
)
5565 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5566 // rather than MDSMap because no one MDSMap contains all the daemons, and
5567 // a `tell` can address any daemon.
5568 version_t fsmap_latest
;
5571 monclient
->get_version("fsmap", &fsmap_latest
, NULL
, &cond
);
5572 client_lock
.Unlock();
5575 } while (r
== -EAGAIN
);
5578 lderr(cct
) << "Failed to learn FSMap version: " << cpp_strerror(r
) << dendl
;
5582 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5585 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5586 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5587 monclient
->renew_subs();
5588 wait_on_list(waiting_for_fsmap
);
5591 assert(fsmap_user
->get_epoch() >= fsmap_latest
);
5593 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
5594 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5595 monclient
->renew_subs();
5596 wait_on_list(waiting_for_fsmap
);
5599 assert(fsmap
->get_epoch() >= fsmap_latest
);
5601 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
5602 << fsmap_latest
<< dendl
;
5608 * @mds_spec one of ID, rank, GID, "*"
5611 int Client::mds_command(
5612 const std::string
&mds_spec
,
5613 const vector
<string
>& cmd
,
5614 const bufferlist
& inbl
,
5619 Mutex::Locker
lock(client_lock
);
5630 r
= fetch_fsmap(false);
5635 // Look up MDS target(s) of the command
5636 std::vector
<mds_gid_t
> targets
;
5637 r
= resolve_mds(mds_spec
, &targets
);
5642 // If daemons are laggy, we won't send them commands. If all
5643 // are laggy then we fail.
5644 std::vector
<mds_gid_t
> non_laggy
;
5645 for (const auto gid
: targets
) {
5646 const auto info
= fsmap
->get_info_gid(gid
);
5647 if (!info
.laggy()) {
5648 non_laggy
.push_back(gid
);
5651 if (non_laggy
.size() == 0) {
5652 *outs
= "All targeted MDS daemons are laggy";
5656 if (metadata
.empty()) {
5657 // We are called on an unmounted client, so metadata
5658 // won't be initialized yet.
5659 populate_metadata("");
5662 // Send commands to targets
5663 C_GatherBuilder
gather(cct
, onfinish
);
5664 for (const auto target_gid
: non_laggy
) {
5665 const auto info
= fsmap
->get_info_gid(target_gid
);
5667 // Open a connection to the target MDS
5668 entity_inst_t inst
= info
.get_inst();
5669 ConnectionRef conn
= messenger
->get_connection(inst
);
5671 // Generate MDSCommandOp state
5672 auto &op
= command_table
.start_command();
5674 op
.on_finish
= gather
.new_sub();
5679 op
.mds_gid
= target_gid
;
5682 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
5683 << " tid=" << op
.tid
<< cmd
<< dendl
;
5685 // Construct and send MCommand
5686 MCommand
*m
= op
.get_message(monclient
->get_fsid());
5687 conn
->send_message(m
);
5694 void Client::handle_command_reply(MCommandReply
*m
)
5696 ceph_tid_t
const tid
= m
->get_tid();
5698 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
5700 if (!command_table
.exists(tid
)) {
5701 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
5706 auto &op
= command_table
.get_command(tid
);
5708 op
.outbl
->claim(m
->get_data());
5715 op
.on_finish
->complete(m
->r
);
5718 command_table
.erase(tid
);
5723 // -------------------
5726 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
5729 Mutex::Locker
lock(client_lock
);
5732 ldout(cct
, 5) << "already mounted" << dendl
;
5738 int r
= authenticate();
5740 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
5744 std::string want
= "mdsmap";
5745 const auto &mds_ns
= cct
->_conf
->client_mds_namespace
;
5746 if (!mds_ns
.empty()) {
5747 r
= fetch_fsmap(true);
5750 fs_cluster_id_t cid
= fsmap_user
->get_fs_cid(mds_ns
);
5751 if (cid
== FS_CLUSTER_ID_NONE
)
5754 std::ostringstream oss
;
5755 oss
<< want
<< "." << cid
;
5758 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
5760 monclient
->sub_want(want
, 0, 0);
5761 monclient
->renew_subs();
5763 tick(); // start tick
5767 auto availability
= mdsmap
->is_cluster_available();
5768 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
5770 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
5771 return CEPH_FUSE_NO_MDS_UP
;
5772 } else if (availability
== MDSMap::AVAILABLE
) {
5773 // Continue to mount
5775 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
5776 // Else, wait. MDSMonitor will update the map to bring
5777 // us to a conclusion eventually.
5778 wait_on_list(waiting_for_mdsmap
);
5780 // Unexpected value!
5786 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
5788 filepath
fp(CEPH_INO_ROOT
);
5789 if (!mount_root
.empty()) {
5790 fp
= filepath(mount_root
.c_str());
5793 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
5794 req
->set_filepath(fp
);
5795 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
5796 int res
= make_request(req
, perms
);
5798 if (res
== -EACCES
&& root
) {
5799 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
5817 if (!cct
->_conf
->client_trace
.empty()) {
5818 traceout
.open(cct
->_conf
->client_trace
.c_str());
5819 if (traceout
.is_open()) {
5820 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5822 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5827 ldout(cct, 3) << "op: // client trace data structs" << dendl;
5828 ldout(cct, 3) << "op: struct stat st;" << dendl;
5829 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
5830 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
5831 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
5832 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
5833 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
5834 ldout(cct, 3) << "op: int fd;" << dendl;
5841 void Client::_close_sessions()
5843 while (!mds_sessions
.empty()) {
5844 // send session closes!
5845 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5846 p
!= mds_sessions
.end();
5848 if (p
->second
->state
!= MetaSession::STATE_CLOSING
) {
5849 _close_mds_session(p
->second
);
5853 // wait for sessions to close
5854 ldout(cct
, 2) << "waiting for " << mds_sessions
.size() << " mds sessions to close" << dendl
;
5855 mount_cond
.Wait(client_lock
);
5859 void Client::flush_mdlog_sync()
5861 if (mds_requests
.empty())
5863 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5864 p
!= mds_sessions
.end();
5866 MetaSession
*s
= p
->second
;
5871 void Client::flush_mdlog(MetaSession
*session
)
5873 // Only send this to Luminous or newer MDS daemons, older daemons
5874 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
5875 const uint64_t features
= session
->con
->get_features();
5876 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
5877 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
5878 session
->con
->send_message(m
);
5883 void Client::_unmount()
5888 ldout(cct
, 2) << "unmounting" << dendl
;
5893 flush_mdlog_sync(); // flush the mdlog for pending requests, if any
5894 while (!mds_requests
.empty()) {
5895 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests" << dendl
;
5896 mount_cond
.Wait(client_lock
);
5900 timer
.cancel_event(tick_event
);
5905 // clean up any unclosed files
5906 while (!fd_map
.empty()) {
5907 Fh
*fh
= fd_map
.begin()->second
;
5908 fd_map
.erase(fd_map
.begin());
5909 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
5913 while (!ll_unclosed_fh_set
.empty()) {
5914 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
5916 ll_unclosed_fh_set
.erase(fh
);
5917 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
5921 while (!opened_dirs
.empty()) {
5922 dir_result_t
*dirp
= *opened_dirs
.begin();
5923 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
5930 ldout(cct
, 0) << " skipping clean shutdown, we are blacklisted" << dendl
;
5932 if (cct
->_conf
->client_oc
) {
5933 // Purge all cached data so that ObjectCacher doesn't get hung up
5934 // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED
5935 // is to just leave things marked dirty
5936 // (http://tracker.ceph.com/issues/9105)
5937 for (const auto &i
: inode_map
) {
5938 objectcacher
->purge_set(&(i
.second
->oset
));
5946 while (unsafe_sync_write
> 0) {
5947 ldout(cct
, 0) << unsafe_sync_write
<< " unsafe_sync_writes, waiting" << dendl
;
5948 mount_cond
.Wait(client_lock
);
5951 if (cct
->_conf
->client_oc
) {
5952 // flush/release all buffered data
5953 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
5954 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
5955 p
!= inode_map
.end();
5959 Inode
*in
= p
->second
;
5961 ldout(cct
, 0) << "null inode_map entry ino " << p
->first
<< dendl
;
5964 if (!in
->caps
.empty()) {
5965 InodeRef
tmp_ref(in
);
5967 _flush(in
, new C_Client_FlushComplete(this, in
));
5973 wait_sync_caps(last_flush_tid
);
5978 while (lru
.lru_get_size() > 0 ||
5979 !inode_map
.empty()) {
5980 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
5981 << "+" << inode_map
.size() << " items"
5982 << ", waiting (for caps to release?)"
5984 utime_t until
= ceph_clock_now() + utime_t(5, 0);
5985 int r
= mount_cond
.WaitUntil(client_lock
, until
);
5986 if (r
== ETIMEDOUT
) {
5990 assert(lru
.lru_get_size() == 0);
5991 assert(inode_map
.empty());
5994 if (!cct
->_conf
->client_trace
.empty()) {
5995 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
6003 ldout(cct
, 2) << "unmounted." << dendl
;
6006 void Client::unmount()
6008 Mutex::Locker
lock(client_lock
);
6012 void Client::flush_cap_releases()
6014 // send any cap releases
6015 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6016 p
!= mds_sessions
.end();
6018 if (p
->second
->release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
6020 if (cct
->_conf
->client_inject_release_failure
) {
6021 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
6022 p
->second
->release
->put();
6024 p
->second
->con
->send_message(p
->second
->release
);
6026 p
->second
->release
= 0;
6033 if (cct
->_conf
->client_debug_inject_tick_delay
> 0) {
6034 sleep(cct
->_conf
->client_debug_inject_tick_delay
);
6035 assert(0 == cct
->_conf
->set_val("client_debug_inject_tick_delay", "0"));
6036 cct
->_conf
->apply_changes(NULL
);
6039 ldout(cct
, 21) << "tick" << dendl
;
6040 tick_event
= timer
.add_event_after(
6041 cct
->_conf
->client_tick_interval
,
6042 new FunctionContext([this](int) {
6043 // Called back via Timer, which takes client_lock for us
6044 assert(client_lock
.is_locked_by_me());
6047 utime_t now
= ceph_clock_now();
6049 if (!mounted
&& !mds_requests
.empty()) {
6050 MetaRequest
*req
= mds_requests
.begin()->second
;
6051 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
6052 req
->abort(-ETIMEDOUT
);
6053 if (req
->caller_cond
) {
6055 req
->caller_cond
->Signal();
6057 signal_cond_list(waiting_for_mdsmap
);
6058 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6059 p
!= mds_sessions
.end();
6061 signal_context_list(p
->second
->waiting_for_open
);
6065 if (mdsmap
->get_epoch()) {
6067 utime_t el
= now
- last_cap_renew
;
6068 if (el
> mdsmap
->get_session_timeout() / 3.0)
6071 flush_cap_releases();
6075 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
6079 if (in
->hold_caps_until
> now
)
6081 delayed_list
.pop_front();
6082 check_caps(in
, CHECK_CAPS_NODELAY
);
6088 void Client::renew_caps()
6090 ldout(cct
, 10) << "renew_caps()" << dendl
;
6091 last_cap_renew
= ceph_clock_now();
6093 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6094 p
!= mds_sessions
.end();
6096 ldout(cct
, 15) << "renew_caps requesting from mds." << p
->first
<< dendl
;
6097 if (mdsmap
->get_state(p
->first
) >= MDSMap::STATE_REJOIN
)
6098 renew_caps(p
->second
);
6102 void Client::renew_caps(MetaSession
*session
)
6104 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6105 session
->last_cap_renew_request
= ceph_clock_now();
6106 uint64_t seq
= ++session
->cap_renew_seq
;
6107 session
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6111 // ===============================================================
6112 // high level (POSIXy) interface
6114 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6115 InodeRef
*target
, const UserPerm
& perms
)
6117 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6118 MetaRequest
*req
= new MetaRequest(op
);
6120 dir
->make_nosnap_relative_path(path
);
6121 path
.push_dentry(name
);
6122 req
->set_filepath(path
);
6123 req
->set_inode(dir
);
6124 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6125 mask
|= DEBUG_GETATTR_CAPS
;
6126 req
->head
.args
.getattr
.mask
= mask
;
6128 ldout(cct
, 10) << "_do_lookup on " << path
<< dendl
;
6130 int r
= make_request(req
, perms
, target
);
6131 ldout(cct
, 10) << "_do_lookup res is " << r
<< dendl
;
6135 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6136 const UserPerm
& perms
)
6141 if (!dir
->is_dir()) {
6146 if (dname
== "..") {
6147 if (dir
->dn_set
.empty())
6150 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6159 if (dname
.length() > NAME_MAX
) {
6164 if (dname
== cct
->_conf
->client_snapdir
&&
6165 dir
->snapid
== CEPH_NOSNAP
) {
6166 *target
= open_snapdir(dir
);
6171 dir
->dir
->dentries
.count(dname
)) {
6172 dn
= dir
->dir
->dentries
[dname
];
6174 ldout(cct
, 20) << "_lookup have dn " << dname
<< " mds." << dn
->lease_mds
<< " ttl " << dn
->lease_ttl
6175 << " seq " << dn
->lease_seq
6178 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)) {
6179 // is dn lease valid?
6180 utime_t now
= ceph_clock_now();
6181 if (dn
->lease_mds
>= 0 &&
6182 dn
->lease_ttl
> now
&&
6183 mds_sessions
.count(dn
->lease_mds
)) {
6184 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6185 if (s
->cap_ttl
> now
&&
6186 s
->cap_gen
== dn
->lease_gen
) {
6187 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6188 // make trim_caps() behave.
6189 dir
->try_touch_cap(dn
->lease_mds
);
6192 ldout(cct
, 20) << " bad lease, cap_ttl " << s
->cap_ttl
<< ", cap_gen " << s
->cap_gen
6193 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6196 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
6197 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6198 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)))
6200 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6201 ldout(cct
, 10) << "_lookup concluded ENOENT locally for "
6202 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6207 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6210 // can we conclude ENOENT locally?
6211 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true) &&
6212 (dir
->flags
& I_COMPLETE
)) {
6213 ldout(cct
, 10) << "_lookup concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6218 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6223 *target
= dn
->inode
;
6231 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6233 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6237 int Client::get_or_create(Inode
*dir
, const char* name
,
6238 Dentry
**pdn
, bool expect_null
)
6241 ldout(cct
, 20) << "get_or_create " << *dir
<< " name " << name
<< dendl
;
6243 if (dir
->dir
->dentries
.count(name
)) {
6244 Dentry
*dn
= dir
->dir
->dentries
[name
];
6246 // is dn lease valid?
6247 utime_t now
= ceph_clock_now();
6249 dn
->lease_mds
>= 0 &&
6250 dn
->lease_ttl
> now
&&
6251 mds_sessions
.count(dn
->lease_mds
)) {
6252 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6253 if (s
->cap_ttl
> now
&&
6254 s
->cap_gen
== dn
->lease_gen
) {
6261 // otherwise link up a new one
6262 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6269 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6270 const UserPerm
& perms
, bool followsym
, int mask
)
6272 filepath path
= origpath
;
6274 if (origpath
.absolute())
6280 ldout(cct
, 10) << "path_walk " << path
<< dendl
;
6285 while (i
< path
.depth() && cur
) {
6287 const string
&dname
= path
[i
];
6288 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6289 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6291 if (cct
->_conf
->client_permissions
) {
6292 int r
= may_lookup(cur
.get(), perms
);
6295 caps
= CEPH_CAP_AUTH_SHARED
;
6298 /* Get extra requested caps on the last component */
6299 if (i
== (path
.depth() - 1))
6301 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
);
6304 // only follow trailing symlink if followsym. always follow
6305 // 'directory' symlinks.
6306 if (next
&& next
->is_symlink()) {
6308 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6309 if (symlinks
> MAXSYMLINKS
) {
6313 if (i
< path
.depth() - 1) {
6315 // replace consumed components of path with symlink dir target
6316 filepath
resolved(next
->symlink
.c_str());
6317 resolved
.append(path
.postfixpath(i
+ 1));
6320 if (next
->symlink
[0] == '/') {
6324 } else if (followsym
) {
6325 if (next
->symlink
[0] == '/') {
6326 path
= next
->symlink
.c_str();
6331 filepath
more(next
->symlink
.c_str());
6332 // we need to remove the symlink component from off of the path
6333 // before adding the target that the symlink points to. remain
6334 // at the same position in the path.
6354 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
)
6356 Mutex::Locker
lock(client_lock
);
6357 tout(cct
) << "link" << std::endl
;
6358 tout(cct
) << relexisting
<< std::endl
;
6359 tout(cct
) << relpath
<< std::endl
;
6364 filepath
existing(relexisting
);
6367 int r
= path_walk(existing
, &in
, perm
, true);
6370 if (std::string(relpath
) == "/") {
6374 filepath
path(relpath
);
6375 string name
= path
.last_dentry();
6378 r
= path_walk(path
, &dir
, perm
, true);
6381 if (cct
->_conf
->client_permissions
) {
6382 if (S_ISDIR(in
->mode
)) {
6386 r
= may_hardlink(in
.get(), perm
);
6389 r
= may_create(dir
.get(), perm
);
6393 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
);
6397 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
6399 Mutex::Locker
lock(client_lock
);
6400 tout(cct
) << "unlink" << std::endl
;
6401 tout(cct
) << relpath
<< std::endl
;
6406 if (std::string(relpath
) == "/")
6409 filepath
path(relpath
);
6410 string name
= path
.last_dentry();
6413 int r
= path_walk(path
, &dir
, perm
);
6416 if (cct
->_conf
->client_permissions
) {
6417 r
= may_delete(dir
.get(), name
.c_str(), perm
);
6421 return _unlink(dir
.get(), name
.c_str(), perm
);
6424 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
)
6426 Mutex::Locker
lock(client_lock
);
6427 tout(cct
) << "rename" << std::endl
;
6428 tout(cct
) << relfrom
<< std::endl
;
6429 tout(cct
) << relto
<< std::endl
;
6434 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
6437 filepath
from(relfrom
);
6439 string fromname
= from
.last_dentry();
6441 string toname
= to
.last_dentry();
6444 InodeRef fromdir
, todir
;
6445 int r
= path_walk(from
, &fromdir
, perm
);
6448 r
= path_walk(to
, &todir
, perm
);
6452 if (cct
->_conf
->client_permissions
) {
6453 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
6456 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
6457 if (r
< 0 && r
!= -ENOENT
)
6460 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
);
6467 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
)
6469 Mutex::Locker
lock(client_lock
);
6470 tout(cct
) << "mkdir" << std::endl
;
6471 tout(cct
) << relpath
<< std::endl
;
6472 tout(cct
) << mode
<< std::endl
;
6473 ldout(cct
, 10) << "mkdir: " << relpath
<< dendl
;
6478 if (std::string(relpath
) == "/")
6481 filepath
path(relpath
);
6482 string name
= path
.last_dentry();
6485 int r
= path_walk(path
, &dir
, perm
);
6488 if (cct
->_conf
->client_permissions
) {
6489 r
= may_create(dir
.get(), perm
);
6493 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
);
6496 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
6498 Mutex::Locker
lock(client_lock
);
6499 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
6500 tout(cct
) << "mkdirs" << std::endl
;
6501 tout(cct
) << relpath
<< std::endl
;
6502 tout(cct
) << mode
<< std::endl
;
6507 //get through existing parts of path
6508 filepath
path(relpath
);
6510 int r
= 0, caps
= 0;
6513 for (i
=0; i
<path
.depth(); ++i
) {
6514 if (cct
->_conf
->client_permissions
) {
6515 r
= may_lookup(cur
.get(), perms
);
6518 caps
= CEPH_CAP_AUTH_SHARED
;
6520 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
6525 //check that we have work left to do
6526 if (i
==path
.depth()) return -EEXIST
;
6527 if (r
!=-ENOENT
) return r
;
6528 ldout(cct
, 20) << "mkdirs got through " << i
<< " directories on path " << relpath
<< dendl
;
6529 //make new directory at each level
6530 for (; i
<path
.depth(); ++i
) {
6531 if (cct
->_conf
->client_permissions
) {
6532 r
= may_create(cur
.get(), perms
);
6537 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
6539 //check proper creation/existence
6540 if(-EEXIST
== r
&& i
< path
.depth() - 1) {
6541 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
6545 //move to new dir and continue
6547 ldout(cct
, 20) << "mkdirs: successfully created directory "
6548 << filepath(cur
->ino
).get_path() << dendl
;
6553 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
6555 Mutex::Locker
lock(client_lock
);
6556 tout(cct
) << "rmdir" << std::endl
;
6557 tout(cct
) << relpath
<< std::endl
;
6562 if (std::string(relpath
) == "/")
6565 filepath
path(relpath
);
6566 string name
= path
.last_dentry();
6569 int r
= path_walk(path
, &dir
, perms
);
6572 if (cct
->_conf
->client_permissions
) {
6573 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
6577 return _rmdir(dir
.get(), name
.c_str(), perms
);
6580 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
6582 Mutex::Locker
lock(client_lock
);
6583 tout(cct
) << "mknod" << std::endl
;
6584 tout(cct
) << relpath
<< std::endl
;
6585 tout(cct
) << mode
<< std::endl
;
6586 tout(cct
) << rdev
<< std::endl
;
6591 if (std::string(relpath
) == "/")
6594 filepath
path(relpath
);
6595 string name
= path
.last_dentry();
6598 int r
= path_walk(path
, &dir
, perms
);
6601 if (cct
->_conf
->client_permissions
) {
6602 int r
= may_create(dir
.get(), perms
);
6606 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
6611 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
)
6613 Mutex::Locker
lock(client_lock
);
6614 tout(cct
) << "symlink" << std::endl
;
6615 tout(cct
) << target
<< std::endl
;
6616 tout(cct
) << relpath
<< std::endl
;
6621 if (std::string(relpath
) == "/")
6624 filepath
path(relpath
);
6625 string name
= path
.last_dentry();
6628 int r
= path_walk(path
, &dir
, perms
);
6631 if (cct
->_conf
->client_permissions
) {
6632 int r
= may_create(dir
.get(), perms
);
6636 return _symlink(dir
.get(), name
.c_str(), target
, perms
);
6639 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
6641 Mutex::Locker
lock(client_lock
);
6642 tout(cct
) << "readlink" << std::endl
;
6643 tout(cct
) << relpath
<< std::endl
;
6648 filepath
path(relpath
);
6650 int r
= path_walk(path
, &in
, perms
, false);
6654 return _readlink(in
.get(), buf
, size
);
6657 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
6659 if (!in
->is_symlink())
6662 // copy into buf (at most size bytes)
6663 int r
= in
->symlink
.length();
6666 memcpy(buf
, in
->symlink
.c_str(), r
);
6673 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
6675 bool yes
= in
->caps_issued_mask(mask
, true);
6677 ldout(cct
, 10) << "_getattr mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
6681 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6683 in
->make_nosnap_relative_path(path
);
6684 req
->set_filepath(path
);
6686 req
->head
.args
.getattr
.mask
= mask
;
6688 int res
= make_request(req
, perms
);
6689 ldout(cct
, 10) << "_getattr result=" << res
<< dendl
;
6693 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6694 const UserPerm
& perms
, InodeRef
*inp
)
6696 int issued
= in
->caps_issued();
6698 ldout(cct
, 10) << "_setattr mask " << mask
<< " issued " <<
6699 ccap_string(issued
) << dendl
;
6701 if (in
->snapid
!= CEPH_NOSNAP
) {
6704 if ((mask
& CEPH_SETATTR_SIZE
) &&
6705 (unsigned long)stx
->stx_size
> in
->size
&&
6706 is_quota_bytes_exceeded(in
, (unsigned long)stx
->stx_size
- in
->size
,
6711 // make the change locally?
6712 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
6713 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
6714 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
6715 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
6716 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
6719 * This works because we implicitly flush the caps as part of the
6720 * request, so the cap update check will happen with the writeback
6721 * cap context, and then the setattr check will happen with the
6724 * In reality this pattern is likely pretty rare (different users
6725 * setattr'ing the same file). If that turns out not to be the
6726 * case later, we can build a more complex pipelined cap writeback
6730 mask
|= CEPH_SETATTR_CTIME
;
6735 // caller just needs us to bump the ctime
6736 in
->ctime
= ceph_clock_now();
6737 in
->cap_dirtier_uid
= perms
.uid();
6738 in
->cap_dirtier_gid
= perms
.gid();
6739 if (issued
& CEPH_CAP_AUTH_EXCL
)
6740 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6741 else if (issued
& CEPH_CAP_FILE_EXCL
)
6742 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
6743 else if (issued
& CEPH_CAP_XATTR_EXCL
)
6744 in
->mark_caps_dirty(CEPH_CAP_XATTR_EXCL
);
6746 mask
|= CEPH_SETATTR_CTIME
;
6749 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
6750 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
6752 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
6754 if (mask
& CEPH_SETATTR_UID
) {
6755 in
->ctime
= ceph_clock_now();
6756 in
->cap_dirtier_uid
= perms
.uid();
6757 in
->cap_dirtier_gid
= perms
.gid();
6758 in
->uid
= stx
->stx_uid
;
6759 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6760 mask
&= ~CEPH_SETATTR_UID
;
6762 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6764 if (mask
& CEPH_SETATTR_GID
) {
6765 in
->ctime
= ceph_clock_now();
6766 in
->cap_dirtier_uid
= perms
.uid();
6767 in
->cap_dirtier_gid
= perms
.gid();
6768 in
->gid
= stx
->stx_gid
;
6769 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6770 mask
&= ~CEPH_SETATTR_GID
;
6772 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6775 if (mask
& CEPH_SETATTR_MODE
) {
6776 in
->ctime
= ceph_clock_now();
6777 in
->cap_dirtier_uid
= perms
.uid();
6778 in
->cap_dirtier_gid
= perms
.gid();
6779 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
6780 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6781 mask
&= ~CEPH_SETATTR_MODE
;
6782 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6783 } else if (kill_sguid
&& S_ISREG(in
->mode
) && (in
->mode
& (S_IXUSR
|S_IXGRP
|S_IXOTH
))) {
6784 /* Must squash the any setuid/setgid bits with an ownership change */
6785 in
->mode
&= ~(S_ISUID
|S_ISGID
);
6786 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6789 if (mask
& CEPH_SETATTR_BTIME
) {
6790 in
->ctime
= ceph_clock_now();
6791 in
->cap_dirtier_uid
= perms
.uid();
6792 in
->cap_dirtier_gid
= perms
.gid();
6793 in
->btime
= utime_t(stx
->stx_btime
);
6794 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
6795 mask
&= ~CEPH_SETATTR_BTIME
;
6796 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
6798 } else if (mask
& CEPH_SETATTR_SIZE
) {
6799 /* If we don't have Ax, then we must ask the server to clear them on truncate */
6800 mask
|= CEPH_SETATTR_KILL_SGUID
;
6803 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
6804 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
6805 if (mask
& CEPH_SETATTR_MTIME
)
6806 in
->mtime
= utime_t(stx
->stx_mtime
);
6807 if (mask
& CEPH_SETATTR_ATIME
)
6808 in
->atime
= utime_t(stx
->stx_atime
);
6809 in
->ctime
= ceph_clock_now();
6810 in
->cap_dirtier_uid
= perms
.uid();
6811 in
->cap_dirtier_gid
= perms
.gid();
6812 in
->time_warp_seq
++;
6813 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
6814 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
6823 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
6827 in
->make_nosnap_relative_path(path
);
6828 req
->set_filepath(path
);
6831 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
6832 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6834 if (mask
& CEPH_SETATTR_MODE
) {
6835 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
6836 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6837 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6839 if (mask
& CEPH_SETATTR_UID
) {
6840 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
6841 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6842 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6844 if (mask
& CEPH_SETATTR_GID
) {
6845 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
6846 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6847 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6849 if (mask
& CEPH_SETATTR_BTIME
) {
6850 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
6851 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6853 if (mask
& CEPH_SETATTR_MTIME
) {
6854 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
6855 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6858 if (mask
& CEPH_SETATTR_ATIME
) {
6859 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
6860 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
6863 if (mask
& CEPH_SETATTR_SIZE
) {
6864 if ((unsigned long)stx
->stx_size
< mdsmap
->get_max_filesize()) {
6865 req
->head
.args
.setattr
.size
= stx
->stx_size
;
6866 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
6869 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
6872 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
6875 req
->head
.args
.setattr
.mask
= mask
;
6877 req
->regetattr_mask
= mask
;
6879 int res
= make_request(req
, perms
, inp
);
6880 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
6884 /* Note that we only care about attrs that setattr cares about */
6885 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
6887 stx
->stx_size
= st
->st_size
;
6888 stx
->stx_mode
= st
->st_mode
;
6889 stx
->stx_uid
= st
->st_uid
;
6890 stx
->stx_gid
= st
->st_gid
;
6891 stx
->stx_mtime
= st
->st_mtim
;
6892 stx
->stx_atime
= st
->st_atim
;
6895 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6896 const UserPerm
& perms
, InodeRef
*inp
)
6898 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
6901 if (mask
& CEPH_SETATTR_MODE
)
6902 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
6906 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
6907 const UserPerm
& perms
)
6909 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
6910 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
6911 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
6912 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
6913 if (cct
->_conf
->client_permissions
) {
6914 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
6918 return __setattrx(in
.get(), stx
, mask
, perms
);
6921 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
6922 const UserPerm
& perms
)
6924 struct ceph_statx stx
;
6926 stat_to_statx(attr
, &stx
);
6927 mask
&= ~CEPH_SETATTR_BTIME
;
6929 if ((mask
& CEPH_SETATTR_UID
) && attr
->st_uid
== static_cast<uid_t
>(-1)) {
6930 mask
&= ~CEPH_SETATTR_UID
;
6932 if ((mask
& CEPH_SETATTR_GID
) && attr
->st_gid
== static_cast<uid_t
>(-1)) {
6933 mask
&= ~CEPH_SETATTR_GID
;
6936 return _setattrx(in
, &stx
, mask
, perms
);
6939 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
6940 const UserPerm
& perms
)
6942 Mutex::Locker
lock(client_lock
);
6943 tout(cct
) << "setattr" << std::endl
;
6944 tout(cct
) << relpath
<< std::endl
;
6945 tout(cct
) << mask
<< std::endl
;
6950 filepath
path(relpath
);
6952 int r
= path_walk(path
, &in
, perms
);
6955 return _setattr(in
, attr
, mask
, perms
);
6958 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
6959 const UserPerm
& perms
, int flags
)
6961 Mutex::Locker
lock(client_lock
);
6962 tout(cct
) << "setattrx" << std::endl
;
6963 tout(cct
) << relpath
<< std::endl
;
6964 tout(cct
) << mask
<< std::endl
;
6969 filepath
path(relpath
);
6971 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
6974 return _setattrx(in
, stx
, mask
, perms
);
6977 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
6979 Mutex::Locker
lock(client_lock
);
6980 tout(cct
) << "fsetattr" << std::endl
;
6981 tout(cct
) << fd
<< std::endl
;
6982 tout(cct
) << mask
<< std::endl
;
6987 Fh
*f
= get_filehandle(fd
);
6990 #if defined(__linux__) && defined(O_PATH)
6991 if (f
->flags
& O_PATH
)
6994 return _setattr(f
->inode
, attr
, mask
, perms
);
6997 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
6999 Mutex::Locker
lock(client_lock
);
7000 tout(cct
) << "fsetattr" << std::endl
;
7001 tout(cct
) << fd
<< std::endl
;
7002 tout(cct
) << mask
<< std::endl
;
7007 Fh
*f
= get_filehandle(fd
);
7010 #if defined(__linux__) && defined(O_PATH)
7011 if (f
->flags
& O_PATH
)
7014 return _setattrx(f
->inode
, stx
, mask
, perms
);
7017 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
7018 frag_info_t
*dirstat
, int mask
)
7020 ldout(cct
, 3) << "stat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7021 Mutex::Locker
lock(client_lock
);
7022 tout(cct
) << "stat" << std::endl
;
7023 tout(cct
) << relpath
<< std::endl
;
7028 filepath
path(relpath
);
7030 int r
= path_walk(path
, &in
, perms
, true, mask
);
7033 r
= _getattr(in
, mask
, perms
);
7035 ldout(cct
, 3) << "stat exit on error!" << dendl
;
7038 fill_stat(in
, stbuf
, dirstat
);
7039 ldout(cct
, 3) << "stat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7043 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
7047 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
7048 if (flags
& AT_NO_ATTR_SYNC
)
7051 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
7052 mask
|= CEPH_CAP_PIN
;
7053 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7054 mask
|= CEPH_CAP_AUTH_SHARED
;
7055 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7056 mask
|= CEPH_CAP_LINK_SHARED
;
7057 if (want
& (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
7058 mask
|= CEPH_CAP_FILE_SHARED
;
7059 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
7060 mask
|= CEPH_CAP_XATTR_SHARED
;
7065 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
7066 const UserPerm
& perms
,
7067 unsigned int want
, unsigned int flags
)
7069 ldout(cct
, 3) << "statx enter (relpath " << relpath
<< " want " << want
<< ")" << dendl
;
7070 Mutex::Locker
lock(client_lock
);
7071 tout(cct
) << "statx" << std::endl
;
7072 tout(cct
) << relpath
<< std::endl
;
7077 filepath
path(relpath
);
7080 unsigned mask
= statx_to_mask(flags
, want
);
7082 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
7086 r
= _getattr(in
, mask
, perms
);
7088 ldout(cct
, 3) << "statx exit on error!" << dendl
;
7092 fill_statx(in
, mask
, stx
);
7093 ldout(cct
, 3) << "statx exit (relpath " << relpath
<< " mask " << stx
->stx_mask
<< ")" << dendl
;
7097 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
7098 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
7100 ldout(cct
, 3) << "lstat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7101 Mutex::Locker
lock(client_lock
);
7102 tout(cct
) << "lstat" << std::endl
;
7103 tout(cct
) << relpath
<< std::endl
;
7108 filepath
path(relpath
);
7110 // don't follow symlinks
7111 int r
= path_walk(path
, &in
, perms
, false, mask
);
7114 r
= _getattr(in
, mask
, perms
);
7116 ldout(cct
, 3) << "lstat exit on error!" << dendl
;
7119 fill_stat(in
, stbuf
, dirstat
);
7120 ldout(cct
, 3) << "lstat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7124 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7126 ldout(cct
, 10) << "fill_stat on " << in
->ino
<< " snap/dev" << in
->snapid
7127 << " mode 0" << oct
<< in
->mode
<< dec
7128 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7129 memset(st
, 0, sizeof(struct stat
));
7130 if (use_faked_inos())
7131 st
->st_ino
= in
->faked_ino
;
7133 st
->st_ino
= in
->ino
;
7134 st
->st_dev
= in
->snapid
;
7135 st
->st_mode
= in
->mode
;
7136 st
->st_rdev
= in
->rdev
;
7138 switch (in
->nlink
) {
7140 st
->st_nlink
= 0; /* dir is unlinked */
7143 st
->st_nlink
= 1 /* parent dentry */
7145 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7151 st
->st_nlink
= in
->nlink
;
7153 st
->st_uid
= in
->uid
;
7154 st
->st_gid
= in
->gid
;
7155 if (in
->ctime
> in
->mtime
) {
7156 stat_set_ctime_sec(st
, in
->ctime
.sec());
7157 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7159 stat_set_ctime_sec(st
, in
->mtime
.sec());
7160 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7162 stat_set_atime_sec(st
, in
->atime
.sec());
7163 stat_set_atime_nsec(st
, in
->atime
.nsec());
7164 stat_set_mtime_sec(st
, in
->mtime
.sec());
7165 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7167 if (cct
->_conf
->client_dirsize_rbytes
)
7168 st
->st_size
= in
->rstat
.rbytes
;
7170 st
->st_size
= in
->dirstat
.size();
7173 st
->st_size
= in
->size
;
7174 st
->st_blocks
= (in
->size
+ 511) >> 9;
7176 st
->st_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7179 *dirstat
= in
->dirstat
;
7183 return in
->caps_issued();
7186 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7188 ldout(cct
, 10) << "fill_statx on " << in
->ino
<< " snap/dev" << in
->snapid
7189 << " mode 0" << oct
<< in
->mode
<< dec
7190 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7191 memset(stx
, 0, sizeof(struct ceph_statx
));
7194 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7195 * so that all bits are set.
7200 /* These are always considered to be available */
7201 stx
->stx_dev
= in
->snapid
;
7202 stx
->stx_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7204 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7205 stx
->stx_mode
= S_IFMT
& in
->mode
;
7206 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7207 stx
->stx_rdev
= in
->rdev
;
7208 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7210 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7211 stx
->stx_uid
= in
->uid
;
7212 stx
->stx_gid
= in
->gid
;
7213 stx
->stx_mode
= in
->mode
;
7214 in
->btime
.to_timespec(&stx
->stx_btime
);
7215 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7218 if (mask
& CEPH_CAP_LINK_SHARED
) {
7220 switch (in
->nlink
) {
7222 stx
->stx_nlink
= 0; /* dir is unlinked */
7225 stx
->stx_nlink
= 1 /* parent dentry */
7227 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7233 stx
->stx_nlink
= in
->nlink
;
7235 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7238 if (mask
& CEPH_CAP_FILE_SHARED
) {
7240 in
->atime
.to_timespec(&stx
->stx_atime
);
7241 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7244 if (cct
->_conf
->client_dirsize_rbytes
)
7245 stx
->stx_size
= in
->rstat
.rbytes
;
7247 stx
->stx_size
= in
->dirstat
.size();
7248 stx
->stx_blocks
= 1;
7250 stx
->stx_size
= in
->size
;
7251 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
7253 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
7254 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
7257 /* Change time and change_attr both require all shared caps to view */
7258 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
7259 stx
->stx_version
= in
->change_attr
;
7260 if (in
->ctime
> in
->mtime
)
7261 in
->ctime
.to_timespec(&stx
->stx_ctime
);
7263 in
->mtime
.to_timespec(&stx
->stx_ctime
);
7264 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
7269 void Client::touch_dn(Dentry
*dn
)
7274 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7276 Mutex::Locker
lock(client_lock
);
7277 tout(cct
) << "chmod" << std::endl
;
7278 tout(cct
) << relpath
<< std::endl
;
7279 tout(cct
) << mode
<< std::endl
;
7284 filepath
path(relpath
);
7286 int r
= path_walk(path
, &in
, perms
);
7290 attr
.st_mode
= mode
;
7291 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7294 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
7296 Mutex::Locker
lock(client_lock
);
7297 tout(cct
) << "fchmod" << std::endl
;
7298 tout(cct
) << fd
<< std::endl
;
7299 tout(cct
) << mode
<< std::endl
;
7304 Fh
*f
= get_filehandle(fd
);
7307 #if defined(__linux__) && defined(O_PATH)
7308 if (f
->flags
& O_PATH
)
7312 attr
.st_mode
= mode
;
7313 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
7316 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7318 Mutex::Locker
lock(client_lock
);
7319 tout(cct
) << "lchmod" << std::endl
;
7320 tout(cct
) << relpath
<< std::endl
;
7321 tout(cct
) << mode
<< std::endl
;
7326 filepath
path(relpath
);
7328 // don't follow symlinks
7329 int r
= path_walk(path
, &in
, perms
, false);
7333 attr
.st_mode
= mode
;
7334 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7337 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7338 const UserPerm
& perms
)
7340 Mutex::Locker
lock(client_lock
);
7341 tout(cct
) << "chown" << std::endl
;
7342 tout(cct
) << relpath
<< std::endl
;
7343 tout(cct
) << new_uid
<< std::endl
;
7344 tout(cct
) << new_gid
<< std::endl
;
7349 filepath
path(relpath
);
7351 int r
= path_walk(path
, &in
, perms
);
7355 attr
.st_uid
= new_uid
;
7356 attr
.st_gid
= new_gid
;
7357 return _setattr(in
, &attr
, CEPH_SETATTR_UID
|CEPH_SETATTR_GID
, perms
);
7360 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
7362 Mutex::Locker
lock(client_lock
);
7363 tout(cct
) << "fchown" << std::endl
;
7364 tout(cct
) << fd
<< std::endl
;
7365 tout(cct
) << new_uid
<< std::endl
;
7366 tout(cct
) << new_gid
<< std::endl
;
7371 Fh
*f
= get_filehandle(fd
);
7374 #if defined(__linux__) && defined(O_PATH)
7375 if (f
->flags
& O_PATH
)
7379 attr
.st_uid
= new_uid
;
7380 attr
.st_gid
= new_gid
;
7382 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7383 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7384 return _setattr(f
->inode
, &attr
, mask
, perms
);
7387 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7388 const UserPerm
& perms
)
7390 Mutex::Locker
lock(client_lock
);
7391 tout(cct
) << "lchown" << std::endl
;
7392 tout(cct
) << relpath
<< std::endl
;
7393 tout(cct
) << new_uid
<< std::endl
;
7394 tout(cct
) << new_gid
<< std::endl
;
7399 filepath
path(relpath
);
7401 // don't follow symlinks
7402 int r
= path_walk(path
, &in
, perms
, false);
7406 attr
.st_uid
= new_uid
;
7407 attr
.st_gid
= new_gid
;
7409 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7410 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7411 return _setattr(in
, &attr
, mask
, perms
);
7414 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
7415 const UserPerm
& perms
)
7417 Mutex::Locker
lock(client_lock
);
7418 tout(cct
) << "utime" << std::endl
;
7419 tout(cct
) << relpath
<< std::endl
;
7420 tout(cct
) << buf
->modtime
<< std::endl
;
7421 tout(cct
) << buf
->actime
<< std::endl
;
7426 filepath
path(relpath
);
7428 int r
= path_walk(path
, &in
, perms
);
7432 stat_set_mtime_sec(&attr
, buf
->modtime
);
7433 stat_set_mtime_nsec(&attr
, 0);
7434 stat_set_atime_sec(&attr
, buf
->actime
);
7435 stat_set_atime_nsec(&attr
, 0);
7436 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7439 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
7440 const UserPerm
& perms
)
7442 Mutex::Locker
lock(client_lock
);
7443 tout(cct
) << "lutime" << std::endl
;
7444 tout(cct
) << relpath
<< std::endl
;
7445 tout(cct
) << buf
->modtime
<< std::endl
;
7446 tout(cct
) << buf
->actime
<< std::endl
;
7451 filepath
path(relpath
);
7453 // don't follow symlinks
7454 int r
= path_walk(path
, &in
, perms
, false);
7458 stat_set_mtime_sec(&attr
, buf
->modtime
);
7459 stat_set_mtime_nsec(&attr
, 0);
7460 stat_set_atime_sec(&attr
, buf
->actime
);
7461 stat_set_atime_nsec(&attr
, 0);
7462 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7465 int Client::flock(int fd
, int operation
, uint64_t owner
)
7467 Mutex::Locker
lock(client_lock
);
7468 tout(cct
) << "flock" << std::endl
;
7469 tout(cct
) << fd
<< std::endl
;
7470 tout(cct
) << operation
<< std::endl
;
7471 tout(cct
) << owner
<< std::endl
;
7476 Fh
*f
= get_filehandle(fd
);
7480 return _flock(f
, operation
, owner
);
7483 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7485 Mutex::Locker
lock(client_lock
);
7486 tout(cct
) << "opendir" << std::endl
;
7487 tout(cct
) << relpath
<< std::endl
;
7492 filepath
path(relpath
);
7494 int r
= path_walk(path
, &in
, perms
, true);
7497 if (cct
->_conf
->client_permissions
) {
7498 int r
= may_open(in
.get(), O_RDONLY
, perms
);
7502 r
= _opendir(in
.get(), dirpp
, perms
);
7503 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
7505 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
7509 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7513 *dirpp
= new dir_result_t(in
, perms
);
7514 opened_dirs
.insert(*dirpp
);
7515 ldout(cct
, 8) << "_opendir(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
7520 int Client::closedir(dir_result_t
*dir
)
7522 Mutex::Locker
lock(client_lock
);
7523 tout(cct
) << "closedir" << std::endl
;
7524 tout(cct
) << (unsigned long)dir
<< std::endl
;
7526 ldout(cct
, 3) << "closedir(" << dir
<< ") = 0" << dendl
;
7531 void Client::_closedir(dir_result_t
*dirp
)
7533 ldout(cct
, 10) << "_closedir(" << dirp
<< ")" << dendl
;
7535 ldout(cct
, 10) << "_closedir detaching inode " << dirp
->inode
<< dendl
;
7536 dirp
->inode
.reset();
7538 _readdir_drop_dirp_buffer(dirp
);
7539 opened_dirs
.erase(dirp
);
7543 void Client::rewinddir(dir_result_t
*dirp
)
7545 Mutex::Locker
lock(client_lock
);
7546 ldout(cct
, 3) << "rewinddir(" << dirp
<< ")" << dendl
;
7551 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7552 _readdir_drop_dirp_buffer(d
);
7556 loff_t
Client::telldir(dir_result_t
*dirp
)
7558 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7559 ldout(cct
, 3) << "telldir(" << dirp
<< ") = " << d
->offset
<< dendl
;
7563 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
7565 Mutex::Locker
lock(client_lock
);
7567 ldout(cct
, 3) << "seekdir(" << dirp
<< ", " << offset
<< ")" << dendl
;
7572 if (offset
== dirp
->offset
)
7575 if (offset
> dirp
->offset
)
7576 dirp
->release_count
= 0; // bump if we do a forward seek
7578 dirp
->ordered_count
= 0; // disable filling readdir cache
7580 if (dirp
->hash_order()) {
7581 if (dirp
->offset
> offset
) {
7582 _readdir_drop_dirp_buffer(dirp
);
7587 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
7588 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
7589 _readdir_drop_dirp_buffer(dirp
);
7594 dirp
->offset
= offset
;
7599 // ino_t d_ino; /* inode number */
7600 // off_t d_off; /* offset to the next dirent */
7601 // unsigned short d_reclen; /* length of this record */
7602 // unsigned char d_type; /* type of file */
7603 // char d_name[256]; /* filename */
7605 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
7607 strncpy(de
->d_name
, name
, 255);
7608 de
->d_name
[255] = '\0';
7611 #if !defined(DARWIN) && !defined(__FreeBSD__)
7612 de
->d_off
= next_off
;
7615 de
->d_type
= IFTODT(type
);
7616 ldout(cct
, 10) << "fill_dirent '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
7617 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
7621 void Client::_readdir_next_frag(dir_result_t
*dirp
)
7623 frag_t fg
= dirp
->buffer_frag
;
7625 if (fg
.is_rightmost()) {
7626 ldout(cct
, 10) << "_readdir_next_frag advance from " << fg
<< " to END" << dendl
;
7633 ldout(cct
, 10) << "_readdir_next_frag advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
7635 if (dirp
->hash_order()) {
7637 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
7638 if (dirp
->offset
< new_offset
) // don't decrease offset
7639 dirp
->offset
= new_offset
;
7641 dirp
->last_name
.clear();
7642 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7643 _readdir_rechoose_frag(dirp
);
7647 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
7649 assert(dirp
->inode
);
7651 if (dirp
->hash_order())
7654 frag_t cur
= frag_t(dirp
->offset_high());
7655 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
7657 ldout(cct
, 10) << "_readdir_rechoose_frag frag " << cur
<< " maps to " << fg
<< dendl
;
7658 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7659 dirp
->last_name
.clear();
7660 dirp
->next_offset
= 2;
7664 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
7666 ldout(cct
, 10) << "_readdir_drop_dirp_buffer " << dirp
<< dendl
;
7667 dirp
->buffer
.clear();
7670 int Client::_readdir_get_frag(dir_result_t
*dirp
)
7673 assert(dirp
->inode
);
7675 // get the current frag.
7677 if (dirp
->hash_order())
7678 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
7680 fg
= frag_t(dirp
->offset_high());
7682 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
7683 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
7685 int op
= CEPH_MDS_OP_READDIR
;
7686 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
7687 op
= CEPH_MDS_OP_LSSNAP
;
7689 InodeRef
& diri
= dirp
->inode
;
7691 MetaRequest
*req
= new MetaRequest(op
);
7693 diri
->make_nosnap_relative_path(path
);
7694 req
->set_filepath(path
);
7695 req
->set_inode(diri
.get());
7696 req
->head
.args
.readdir
.frag
= fg
;
7697 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
7698 if (dirp
->last_name
.length()) {
7699 req
->path2
.set_path(dirp
->last_name
);
7700 } else if (dirp
->hash_order()) {
7701 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
7706 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
7708 if (res
== -EAGAIN
) {
7709 ldout(cct
, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl
;
7710 _readdir_rechoose_frag(dirp
);
7711 return _readdir_get_frag(dirp
);
7715 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " got frag " << dirp
->buffer_frag
7716 << " size " << dirp
->buffer
.size() << dendl
;
7718 ldout(cct
, 10) << "_readdir_get_frag got error " << res
<< ", setting end flag" << dendl
;
7725 struct dentry_off_lt
{
7726 bool operator()(const Dentry
* dn
, int64_t off
) const {
7727 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
7731 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
7732 int caps
, bool getref
)
7734 assert(client_lock
.is_locked());
7735 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
7736 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
7738 Dir
*dir
= dirp
->inode
->dir
;
7741 ldout(cct
, 10) << " dir is empty" << dendl
;
7746 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
7747 dir
->readdir_cache
.end(),
7748 dirp
->offset
, dentry_off_lt());
7752 if (!dirp
->inode
->is_complete_and_ordered())
7754 if (pd
== dir
->readdir_cache
.end())
7757 if (dn
->inode
== NULL
) {
7758 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
7762 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
7763 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
7768 int r
= _getattr(dn
->inode
, caps
, dirp
->perms
);
7772 struct ceph_statx stx
;
7774 fill_statx(dn
->inode
, caps
, &stx
);
7776 uint64_t next_off
= dn
->offset
+ 1;
7778 if (pd
== dir
->readdir_cache
.end())
7779 next_off
= dir_result_t::END
;
7782 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7784 in
= dn
->inode
.get();
7788 dn_name
= dn
->name
; // fill in name while we have lock
7790 client_lock
.Unlock();
7791 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
7793 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
7794 << " = " << r
<< dendl
;
7799 dirp
->offset
= next_off
;
7801 dirp
->next_offset
= 2;
7803 dirp
->next_offset
= dirp
->offset_low();
7804 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
7805 dirp
->release_count
= 0; // last_name no longer match cache index
7810 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
7815 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
7816 unsigned want
, unsigned flags
, bool getref
)
7818 int caps
= statx_to_mask(flags
, want
);
7820 Mutex::Locker
lock(client_lock
);
7825 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
7827 ldout(cct
, 10) << "readdir_r_cb " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
7828 << dec
<< " at_end=" << dirp
->at_end()
7829 << " hash_order=" << dirp
->hash_order() << dendl
;
7832 struct ceph_statx stx
;
7833 memset(&de
, 0, sizeof(de
));
7834 memset(&stx
, 0, sizeof(stx
));
7836 InodeRef
& diri
= dirp
->inode
;
7841 if (dirp
->offset
== 0) {
7842 ldout(cct
, 15) << " including ." << dendl
;
7843 assert(diri
->dn_set
.size() < 2); // can't have multiple hard-links to a dir
7844 uint64_t next_off
= 1;
7847 r
= _getattr(diri
, caps
, dirp
->perms
);
7851 fill_statx(diri
, caps
, &stx
);
7852 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
7854 Inode
*inode
= NULL
;
7860 client_lock
.Unlock();
7861 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7866 dirp
->offset
= next_off
;
7870 if (dirp
->offset
== 1) {
7871 ldout(cct
, 15) << " including .." << dendl
;
7872 uint64_t next_off
= 2;
7874 if (diri
->dn_set
.empty())
7877 in
= diri
->get_first_parent()->dir
->parent_inode
;
7880 r
= _getattr(in
, caps
, dirp
->perms
);
7884 fill_statx(in
, caps
, &stx
);
7885 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
7887 Inode
*inode
= NULL
;
7893 client_lock
.Unlock();
7894 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7899 dirp
->offset
= next_off
;
7904 // can we read from our cache?
7905 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
7906 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
7907 << dirp
->inode
->is_complete_and_ordered()
7908 << " issued " << ccap_string(dirp
->inode
->caps_issued())
7910 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
7911 dirp
->inode
->is_complete_and_ordered() &&
7912 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
7913 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
7922 bool check_caps
= true;
7923 if (!dirp
->is_cached()) {
7924 int r
= _readdir_get_frag(dirp
);
7927 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
7928 // different than the requested one. (our dirfragtree was outdated)
7931 frag_t fg
= dirp
->buffer_frag
;
7933 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
7934 << " offset " << hex
<< dirp
->offset
<< dendl
;
7936 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
7937 dirp
->offset
, dir_result_t::dentry_off_lt());
7938 it
!= dirp
->buffer
.end();
7940 dir_result_t::dentry
&entry
= *it
;
7942 uint64_t next_off
= entry
.offset
+ 1;
7946 r
= _getattr(entry
.inode
, caps
, dirp
->perms
);
7951 fill_statx(entry
.inode
, caps
, &stx
);
7952 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7954 Inode
*inode
= NULL
;
7956 inode
= entry
.inode
.get();
7960 client_lock
.Unlock();
7961 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
7964 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
7965 << " = " << r
<< dendl
;
7969 dirp
->offset
= next_off
;
7974 if (dirp
->next_offset
> 2) {
7975 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
7976 _readdir_drop_dirp_buffer(dirp
);
7980 if (!fg
.is_rightmost()) {
7982 _readdir_next_frag(dirp
);
7986 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
7987 diri
->dir_release_count
== dirp
->release_count
) {
7988 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
7989 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
7991 assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
7992 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
7994 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
7996 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
7997 diri
->flags
|= I_COMPLETE
;
8009 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
8011 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
8018 * 1 if we got a dirent
8019 * 0 for end of directory
8023 struct single_readdir
{
8025 struct ceph_statx
*stx
;
8030 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
8031 struct ceph_statx
*stx
, off_t off
,
8034 single_readdir
*c
= static_cast<single_readdir
*>(p
);
8037 return -1; // already filled this dirent
8047 struct dirent
*Client::readdir(dir_result_t
*d
)
8050 static struct dirent de
;
8057 // our callback fills the dirent and sets sr.full=true on first
8058 // call, and returns -1 the second time around.
8059 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
8061 errno
= -ret
; // this sucks.
8062 return (dirent
*) NULL
;
8067 return (dirent
*) NULL
;
8070 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
8071 struct ceph_statx
*stx
, unsigned want
,
8072 unsigned flags
, Inode
**out
)
8080 // our callback fills the dirent and sets sr.full=true on first
8081 // call, and returns -1 the second time around.
8082 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
8094 struct getdents_result
{
8101 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
8102 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8104 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
8110 dlen
= strlen(de
->d_name
) + 1;
8112 if (c
->pos
+ dlen
> c
->buflen
)
8113 return -1; // doesn't fit
8116 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
8118 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
8124 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
8129 gr
.fullent
= fullent
;
8132 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
8134 if (r
< 0) { // some error
8135 if (r
== -1) { // buffer ran out of space
8136 if (gr
.pos
) { // but we got some entries already!
8138 } // or we need a larger buffer
8140 } else { // actual error, return it
8149 struct getdir_result
{
8150 list
<string
> *contents
;
8154 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
8156 getdir_result
*r
= static_cast<getdir_result
*>(p
);
8158 r
->contents
->push_back(de
->d_name
);
8163 int Client::getdir(const char *relpath
, list
<string
>& contents
,
8164 const UserPerm
& perms
)
8166 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
8168 Mutex::Locker
lock(client_lock
);
8169 tout(cct
) << "getdir" << std::endl
;
8170 tout(cct
) << relpath
<< std::endl
;
8174 int r
= opendir(relpath
, &d
, perms
);
8179 gr
.contents
= &contents
;
8181 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
8191 /****** file i/o **********/
8192 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
8193 mode_t mode
, int stripe_unit
, int stripe_count
,
8194 int object_size
, const char *data_pool
)
8196 ldout(cct
, 3) << "open enter(" << relpath
<< ", " << ceph_flags_sys2wire(flags
) << "," << mode
<< ")" << dendl
;
8197 Mutex::Locker
lock(client_lock
);
8198 tout(cct
) << "open" << std::endl
;
8199 tout(cct
) << relpath
<< std::endl
;
8200 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
8207 #if defined(__linux__) && defined(O_PATH)
8208 /* When the O_PATH is being specified, others flags than O_DIRECTORY
8209 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
8210 * in kernel (fs/open.c). */
8212 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
8215 filepath
path(relpath
);
8217 bool created
= false;
8218 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
8219 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
8220 int r
= path_walk(path
, &in
, perms
, followsym
, ceph_caps_for_mode(mode
));
8222 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
8225 #if defined(__linux__) && defined(O_PATH)
8226 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
8228 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
8232 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
8233 filepath dirpath
= path
;
8234 string dname
= dirpath
.last_dentry();
8235 dirpath
.pop_dentry();
8237 r
= path_walk(dirpath
, &dir
, perms
, true,
8238 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0);
8241 if (cct
->_conf
->client_permissions
) {
8242 r
= may_create(dir
.get(), perms
);
8246 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
8247 stripe_count
, object_size
, data_pool
, &created
, perms
);
8253 // posix says we can only check permissions of existing files
8254 if (cct
->_conf
->client_permissions
) {
8255 r
= may_open(in
.get(), flags
, perms
);
8262 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
8264 // allocate a integer file descriptor
8267 assert(fd_map
.count(r
) == 0);
8272 tout(cct
) << r
<< std::endl
;
8273 ldout(cct
, 3) << "open exit(" << path
<< ", " << ceph_flags_sys2wire(flags
) << ") = " << r
<< dendl
;
8277 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
, mode_t mode
)
8279 /* Use default file striping parameters */
8280 return open(relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
);
8283 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
8284 const UserPerm
& perms
)
8286 Mutex::Locker
lock(client_lock
);
8287 ldout(cct
, 3) << "lookup_hash enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
8292 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
8294 req
->set_filepath(path
);
8296 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
8298 sprintf(f
, "%u", h
);
8299 filepath
path2(dirino
);
8300 path2
.push_dentry(string(f
));
8301 req
->set_filepath2(path2
);
8303 int r
= make_request(req
, perms
, NULL
, NULL
,
8304 rand() % mdsmap
->get_num_in_mds());
8305 ldout(cct
, 3) << "lookup_hash exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
8311 * Load inode into local cache.
8313 * If inode pointer is non-NULL, and take a reference on
8314 * the resulting Inode object in one operation, so that caller
8315 * can safely assume inode will still be there after return.
8317 int Client::_lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8319 ldout(cct
, 8) << "lookup_ino enter(" << ino
<< ")" << dendl
;
8324 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
8326 req
->set_filepath(path
);
8328 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8329 if (r
== 0 && inode
!= NULL
) {
8330 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
8331 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
8332 assert(p
!= inode_map
.end());
8336 ldout(cct
, 8) << "lookup_ino exit(" << ino
<< ") = " << r
<< dendl
;
8340 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8342 Mutex::Locker
lock(client_lock
);
8343 return _lookup_ino(ino
, perms
, inode
);
8347 * Find the parent inode of `ino` and insert it into
8348 * our cache. Conditionally also set `parent` to a referenced
8349 * Inode* if caller provides non-NULL value.
8351 int Client::_lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8353 ldout(cct
, 8) << "lookup_parent enter(" << ino
->ino
<< ")" << dendl
;
8358 if (!ino
->dn_set
.empty()) {
8359 // if we exposed the parent here, we'd need to check permissions,
8360 // but right now we just rely on the MDS doing so in make_request
8361 ldout(cct
, 8) << "lookup_parent dentry already present" << dendl
;
8365 if (ino
->is_root()) {
8367 ldout(cct
, 8) << "ino is root, no parent" << dendl
;
8371 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
8372 filepath
path(ino
->ino
);
8373 req
->set_filepath(path
);
8376 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
8377 // Give caller a reference to the parent ino if they provided a pointer.
8378 if (parent
!= NULL
) {
8380 *parent
= target
.get();
8382 ldout(cct
, 8) << "lookup_parent found parent " << (*parent
)->ino
<< dendl
;
8387 ldout(cct
, 8) << "lookup_parent exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8391 int Client::lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8393 Mutex::Locker
lock(client_lock
);
8394 return _lookup_parent(ino
, perms
, parent
);
8398 * Populate the parent dentry for `ino`, provided it is
8399 * a child of `parent`.
8401 int Client::_lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8403 assert(parent
->is_dir());
8404 ldout(cct
, 3) << "lookup_name enter(" << ino
->ino
<< ")" << dendl
;
8409 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
8410 req
->set_filepath2(filepath(parent
->ino
));
8411 req
->set_filepath(filepath(ino
->ino
));
8412 req
->set_inode(ino
);
8414 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8415 ldout(cct
, 3) << "lookup_name exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8419 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8421 Mutex::Locker
lock(client_lock
);
8422 return _lookup_name(ino
, parent
, perms
);
8425 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
8433 f
->actor_perms
= perms
;
8435 ldout(cct
, 10) << "_create_fh " << in
->ino
<< " mode " << cmode
<< dendl
;
8437 if (in
->snapid
!= CEPH_NOSNAP
) {
8438 in
->snap_cap_refs
++;
8439 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
8440 << ccap_string(in
->caps_issued()) << dendl
;
8443 const md_config_t
*conf
= cct
->_conf
;
8444 f
->readahead
.set_trigger_requests(1);
8445 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
8446 uint64_t max_readahead
= Readahead::NO_LIMIT
;
8447 if (conf
->client_readahead_max_bytes
) {
8448 max_readahead
= MIN(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
8450 if (conf
->client_readahead_max_periods
) {
8451 max_readahead
= MIN(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
8453 f
->readahead
.set_max_readahead_size(max_readahead
);
8454 vector
<uint64_t> alignments
;
8455 alignments
.push_back(in
->layout
.get_period());
8456 alignments
.push_back(in
->layout
.stripe_unit
);
8457 f
->readahead
.set_alignments(alignments
);
8462 int Client::_release_fh(Fh
*f
)
8464 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
8465 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
8466 Inode
*in
= f
->inode
.get();
8467 ldout(cct
, 8) << "_release_fh " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
8471 if (in
->snapid
== CEPH_NOSNAP
) {
8472 if (in
->put_open_ref(f
->mode
)) {
8473 _flush(in
, new C_Client_FlushComplete(this, in
));
8477 assert(in
->snap_cap_refs
> 0);
8478 in
->snap_cap_refs
--;
8481 _release_filelocks(f
);
8483 // Finally, read any async err (i.e. from flushes)
8484 int err
= f
->take_async_err();
8486 ldout(cct
, 1) << "_release_fh " << f
<< " on inode " << *in
<< " caught async_err = "
8487 << cpp_strerror(err
) << dendl
;
8489 ldout(cct
, 10) << "_release_fh " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
8497 void Client::_put_fh(Fh
*f
)
8499 int left
= f
->put();
8505 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
8506 const UserPerm
& perms
)
8508 if (in
->snapid
!= CEPH_NOSNAP
&&
8509 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
8513 // use normalized flags to generate cmode
8514 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
8517 int want
= ceph_caps_for_mode(cmode
);
8520 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
8522 if ((flags
& O_TRUNC
) == 0 && in
->caps_issued_mask(want
)) {
8524 check_caps(in
, CHECK_CAPS_NODELAY
);
8527 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8529 in
->make_nosnap_relative_path(path
);
8530 req
->set_filepath(path
);
8531 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
& ~O_CREAT
);
8532 req
->head
.args
.open
.mode
= mode
;
8533 req
->head
.args
.open
.pool
= -1;
8534 if (cct
->_conf
->client_debug_getattr_caps
)
8535 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8537 req
->head
.args
.open
.mask
= 0;
8538 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
8540 result
= make_request(req
, perms
);
8543 * NFS expects that delegations will be broken on a conflicting open,
8544 * not just when there is actual conflicting access to the file. SMB leases
8545 * and oplocks also have similar semantics.
8547 * Ensure that clients that have delegations enabled will wait on minimal
8548 * caps during open, just to ensure that other clients holding delegations
8549 * return theirs first.
8551 if (deleg_timeout
&& result
== 0) {
8554 if (cmode
& CEPH_FILE_MODE_WR
)
8555 need
|= CEPH_CAP_FILE_WR
;
8556 if (cmode
& CEPH_FILE_MODE_RD
)
8557 need
|= CEPH_CAP_FILE_RD
;
8559 result
= get_caps(in
, need
, want
, &have
, -1);
8561 ldout(cct
, 8) << "Unable to get caps after open of inode " << *in
<<
8562 " . Denying open: " <<
8563 cpp_strerror(result
) << dendl
;
8564 in
->put_open_ref(cmode
);
8566 put_cap_ref(in
, need
);
8574 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
8576 in
->put_open_ref(cmode
);
8584 int Client::_renew_caps(Inode
*in
)
8586 int wanted
= in
->caps_file_wanted();
8587 if (in
->is_any_caps() &&
8588 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
8589 check_caps(in
, CHECK_CAPS_NODELAY
);
8594 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
8596 else if (wanted
& CEPH_CAP_FILE_RD
)
8598 else if (wanted
& CEPH_CAP_FILE_WR
)
8601 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8603 in
->make_nosnap_relative_path(path
);
8604 req
->set_filepath(path
);
8605 req
->head
.args
.open
.flags
= flags
;
8606 req
->head
.args
.open
.pool
= -1;
8607 if (cct
->_conf
->client_debug_getattr_caps
)
8608 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8610 req
->head
.args
.open
.mask
= 0;
8613 // duplicate in case Cap goes away; not sure if that race is a concern?
8614 const UserPerm
*pperm
= in
->get_best_perms();
8618 int ret
= make_request(req
, perms
);
8622 int Client::close(int fd
)
8624 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
8625 Mutex::Locker
lock(client_lock
);
8626 tout(cct
) << "close" << std::endl
;
8627 tout(cct
) << fd
<< std::endl
;
8632 Fh
*fh
= get_filehandle(fd
);
8635 int err
= _release_fh(fh
);
8638 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
8646 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
8648 Mutex::Locker
lock(client_lock
);
8649 tout(cct
) << "lseek" << std::endl
;
8650 tout(cct
) << fd
<< std::endl
;
8651 tout(cct
) << offset
<< std::endl
;
8652 tout(cct
) << whence
<< std::endl
;
8657 Fh
*f
= get_filehandle(fd
);
8660 #if defined(__linux__) && defined(O_PATH)
8661 if (f
->flags
& O_PATH
)
8664 return _lseek(f
, offset
, whence
);
8667 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
8669 Inode
*in
= f
->inode
.get();
8682 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8685 f
->pos
= in
->size
+ offset
;
8692 ldout(cct
, 8) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
8697 void Client::lock_fh_pos(Fh
*f
)
8699 ldout(cct
, 10) << "lock_fh_pos " << f
<< dendl
;
8701 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
8703 f
->pos_waiters
.push_back(&cond
);
8704 ldout(cct
, 10) << "lock_fh_pos BLOCKING on " << f
<< dendl
;
8705 while (f
->pos_locked
|| f
->pos_waiters
.front() != &cond
)
8706 cond
.Wait(client_lock
);
8707 ldout(cct
, 10) << "lock_fh_pos UNBLOCKING on " << f
<< dendl
;
8708 assert(f
->pos_waiters
.front() == &cond
);
8709 f
->pos_waiters
.pop_front();
8712 f
->pos_locked
= true;
8715 void Client::unlock_fh_pos(Fh
*f
)
8717 ldout(cct
, 10) << "unlock_fh_pos " << f
<< dendl
;
8718 f
->pos_locked
= false;
8721 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
8723 if (!in
->inline_data
.length()) {
8724 onfinish
->complete(0);
8729 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
8730 object_t oid
= oid_buf
;
8732 ObjectOperation create_ops
;
8733 create_ops
.create(false);
8735 objecter
->mutate(oid
,
8736 OSDMap::file_to_object_locator(in
->layout
),
8738 in
->snaprealm
->get_snap_context(),
8739 ceph::real_clock::now(),
8743 bufferlist inline_version_bl
;
8744 ::encode(in
->inline_version
, inline_version_bl
);
8746 ObjectOperation uninline_ops
;
8747 uninline_ops
.cmpxattr("inline_version",
8748 CEPH_OSD_CMPXATTR_OP_GT
,
8749 CEPH_OSD_CMPXATTR_MODE_U64
,
8751 bufferlist inline_data
= in
->inline_data
;
8752 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
8753 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
8755 objecter
->mutate(oid
,
8756 OSDMap::file_to_object_locator(in
->layout
),
8758 in
->snaprealm
->get_snap_context(),
8759 ceph::real_clock::now(),
8768 // blocking osd interface
8770 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
8772 Mutex::Locker
lock(client_lock
);
8773 tout(cct
) << "read" << std::endl
;
8774 tout(cct
) << fd
<< std::endl
;
8775 tout(cct
) << size
<< std::endl
;
8776 tout(cct
) << offset
<< std::endl
;
8781 Fh
*f
= get_filehandle(fd
);
8784 #if defined(__linux__) && defined(O_PATH)
8785 if (f
->flags
& O_PATH
)
8789 int r
= _read(f
, offset
, size
, &bl
);
8790 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8792 bl
.copy(0, bl
.length(), buf
);
8798 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
8802 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
8805 int Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
8807 const md_config_t
*conf
= cct
->_conf
;
8808 Inode
*in
= f
->inode
.get();
8810 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
8812 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8814 bool movepos
= false;
8820 loff_t start_pos
= offset
;
8822 if (in
->inline_version
== 0) {
8823 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8829 assert(in
->inline_version
> 0);
8834 int r
= get_caps(in
, CEPH_CAP_FILE_RD
, CEPH_CAP_FILE_CACHE
, &have
, -1);
8840 if (f
->flags
& O_DIRECT
)
8841 have
&= ~CEPH_CAP_FILE_CACHE
;
8843 Mutex
uninline_flock("Client::_read_uninline_data flock");
8845 bool uninline_done
= false;
8846 int uninline_ret
= 0;
8847 Context
*onuninline
= NULL
;
8849 if (in
->inline_version
< CEPH_INLINE_NONE
) {
8850 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
8851 onuninline
= new C_SafeCond(&uninline_flock
,
8855 uninline_data(in
, onuninline
);
8857 uint32_t len
= in
->inline_data
.length();
8859 uint64_t endoff
= offset
+ size
;
8860 if (endoff
> in
->size
)
8864 if (endoff
<= len
) {
8865 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
8867 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
8868 bl
->append_zero(endoff
- len
);
8870 } else if ((uint64_t)offset
< endoff
) {
8871 bl
->append_zero(endoff
- offset
);
8878 if (!conf
->client_debug_force_sync_read
&&
8879 (conf
->client_oc
&& (have
& CEPH_CAP_FILE_CACHE
))) {
8881 if (f
->flags
& O_RSYNC
) {
8882 _flush_range(in
, offset
, size
);
8884 r
= _read_async(f
, offset
, size
, bl
);
8888 if (f
->flags
& O_DIRECT
)
8889 _flush_range(in
, offset
, size
);
8891 bool checkeof
= false;
8892 r
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
8899 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8902 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8907 if ((uint64_t)offset
< in
->size
)
8915 f
->pos
= start_pos
+ bl
->length();
8923 client_lock
.Unlock();
8924 uninline_flock
.Lock();
8925 while (!uninline_done
)
8926 uninline_cond
.Wait(uninline_flock
);
8927 uninline_flock
.Unlock();
8930 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
8931 in
->inline_data
.clear();
8932 in
->inline_version
= CEPH_INLINE_NONE
;
8933 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
8940 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8946 return bl
->length();
8949 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
8952 f
->readahead
.inc_pending();
8955 Client::C_Readahead::~C_Readahead() {
8956 f
->readahead
.dec_pending();
8960 void Client::C_Readahead::finish(int r
) {
8961 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
8962 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8965 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
8967 const md_config_t
*conf
= cct
->_conf
;
8968 Inode
*in
= f
->inode
.get();
8970 ldout(cct
, 10) << "_read_async " << *in
<< " " << off
<< "~" << len
<< dendl
;
8972 // trim read based on file size?
8973 if (off
>= in
->size
)
8977 if (off
+ len
> in
->size
) {
8978 len
= in
->size
- off
;
8981 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
8982 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
8983 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
8985 // read (and possibly block)
8987 Mutex
flock("Client::_read_async flock");
8990 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &rvalue
);
8991 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8992 off
, len
, bl
, 0, onfinish
);
8994 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8995 client_lock
.Unlock();
9001 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
9008 if(f
->readahead
.get_min_readahead_size() > 0) {
9009 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
9010 if (readahead_extent
.second
> 0) {
9011 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
9012 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
9013 Context
*onfinish2
= new C_Readahead(this, f
);
9014 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
9015 readahead_extent
.first
, readahead_extent
.second
,
9016 NULL
, 0, onfinish2
);
9018 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
9019 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
9021 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
9030 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
9033 Inode
*in
= f
->inode
.get();
9038 ldout(cct
, 10) << "_read_sync " << *in
<< " " << off
<< "~" << len
<< dendl
;
9040 Mutex
flock("Client::_read_sync flock");
9045 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
9049 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
9051 in
->truncate_size
, in
->truncate_seq
,
9053 client_lock
.Unlock();
9060 // if we get ENOENT from OSD, assume 0 bytes returned
9071 bl
->claim_append(tbl
);
9074 if (r
>= 0 && r
< wanted
) {
9075 if (pos
< in
->size
) {
9076 // zero up to known EOF
9077 int64_t some
= in
->size
- pos
;
9099 * we keep count of uncommitted sync writes on the inode, so that
9102 void Client::_sync_write_commit(Inode
*in
)
9104 assert(unsafe_sync_write
> 0);
9105 unsafe_sync_write
--;
9107 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9109 ldout(cct
, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write
<< dendl
;
9110 if (unsafe_sync_write
== 0 && unmounting
) {
9111 ldout(cct
, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl
;
9112 mount_cond
.Signal();
9116 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
9118 Mutex::Locker
lock(client_lock
);
9119 tout(cct
) << "write" << std::endl
;
9120 tout(cct
) << fd
<< std::endl
;
9121 tout(cct
) << size
<< std::endl
;
9122 tout(cct
) << offset
<< std::endl
;
9127 Fh
*fh
= get_filehandle(fd
);
9130 #if defined(__linux__) && defined(O_PATH)
9131 if (fh
->flags
& O_PATH
)
9134 int r
= _write(fh
, offset
, size
, buf
, NULL
, 0);
9135 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
9139 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
9143 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
9146 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
9148 Mutex::Locker
lock(client_lock
);
9149 tout(cct
) << fd
<< std::endl
;
9150 tout(cct
) << offset
<< std::endl
;
9155 Fh
*fh
= get_filehandle(fd
);
9158 #if defined(__linux__) && defined(O_PATH)
9159 if (fh
->flags
& O_PATH
)
9162 loff_t totallen
= 0;
9163 for (unsigned i
= 0; i
< iovcnt
; i
++) {
9164 totallen
+= iov
[i
].iov_len
;
9167 int w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
9168 ldout(cct
, 3) << "pwritev(" << fd
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
9172 int r
= _read(fh
, offset
, totallen
, &bl
);
9173 ldout(cct
, 3) << "preadv(" << fd
<< ", " << offset
<< ") = " << r
<< dendl
;
9178 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
9180 * This piece of code aims to handle the case that bufferlist does not have enough data
9181 * to fill in the iov
9183 if (resid
< iov
[j
].iov_len
) {
9184 bl
.copy(bufoff
, resid
, (char *)iov
[j
].iov_base
);
9187 bl
.copy(bufoff
, iov
[j
].iov_len
, (char *)iov
[j
].iov_base
);
9189 resid
-= iov
[j
].iov_len
;
9190 bufoff
+= iov
[j
].iov_len
;
9196 int Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
9197 const struct iovec
*iov
, int iovcnt
)
9199 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
9202 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
9203 Inode
*in
= f
->inode
.get();
9205 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
9209 assert(in
->snapid
== CEPH_NOSNAP
);
9211 // was Fh opened as writeable?
9212 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
9216 uint64_t endoff
= offset
+ size
;
9217 std::list
<InodeRef
> quota_roots
;
9218 if (endoff
> in
->size
&&
9219 is_quota_bytes_exceeded(in
, endoff
- in
->size
, f
->actor_perms
, "a_roots
)) {
9223 // use/adjust fd pos?
9227 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
9228 * change out from under us.
9230 if (f
->flags
& O_APPEND
) {
9231 int r
= _lseek(f
, 0, SEEK_END
);
9238 f
->pos
= offset
+size
;
9242 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
9244 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
9247 utime_t start
= ceph_clock_now();
9249 if (in
->inline_version
== 0) {
9250 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
9253 assert(in
->inline_version
> 0);
9256 // copy into fresh buffer (since our write may be resub, async)
9260 bl
.append(buf
, size
);
9262 for (int i
= 0; i
< iovcnt
; i
++) {
9263 if (iov
[i
].iov_len
> 0) {
9264 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
9270 uint64_t totalwritten
;
9272 int r
= get_caps(in
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
,
9273 CEPH_CAP_FILE_BUFFER
, &have
, endoff
);
9277 /* clear the setuid/setgid bits, if any */
9278 if (unlikely(in
->mode
& (S_ISUID
|S_ISGID
)) && size
> 0) {
9279 struct ceph_statx stx
= { 0 };
9281 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9282 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
9286 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9289 if (f
->flags
& O_DIRECT
)
9290 have
&= ~CEPH_CAP_FILE_BUFFER
;
9292 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
9294 Mutex
uninline_flock("Client::_write_uninline_data flock");
9296 bool uninline_done
= false;
9297 int uninline_ret
= 0;
9298 Context
*onuninline
= NULL
;
9300 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9301 if (endoff
> cct
->_conf
->client_max_inline_size
||
9302 endoff
> CEPH_INLINE_MAX_SIZE
||
9303 !(have
& CEPH_CAP_FILE_BUFFER
)) {
9304 onuninline
= new C_SafeCond(&uninline_flock
,
9308 uninline_data(in
, onuninline
);
9310 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9312 uint32_t len
= in
->inline_data
.length();
9315 in
->inline_data
.copy(endoff
, len
- endoff
, bl
);
9318 in
->inline_data
.splice(offset
, len
- offset
);
9319 else if (offset
> len
)
9320 in
->inline_data
.append_zero(offset
- len
);
9322 in
->inline_data
.append(bl
);
9323 in
->inline_version
++;
9325 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9331 if (cct
->_conf
->client_oc
&& (have
& CEPH_CAP_FILE_BUFFER
)) {
9332 // do buffered write
9333 if (!in
->oset
.dirty_or_tx
)
9334 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
9336 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9338 // async, caching, non-blocking.
9339 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
9340 in
->snaprealm
->get_snap_context(),
9341 offset
, size
, bl
, ceph::real_clock::now(),
9343 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9348 // flush cached write if O_SYNC is set on file fh
9349 // O_DSYNC == O_SYNC on linux < 2.6.33
9350 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
9351 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
9352 _flush_range(in
, offset
, size
);
9355 if (f
->flags
& O_DIRECT
)
9356 _flush_range(in
, offset
, size
);
9358 // simple, non-atomic sync write
9359 Mutex
flock("Client::_write flock");
9362 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
9364 unsafe_sync_write
++;
9365 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
); // released by onsafe callback
9367 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
9368 offset
, size
, bl
, ceph::real_clock::now(), 0,
9369 in
->truncate_size
, in
->truncate_seq
,
9371 client_lock
.Unlock();
9378 _sync_write_commit(in
);
9381 // if we get here, write was successful, update client metadata
9384 lat
= ceph_clock_now();
9386 logger
->tinc(l_c_wrlat
, lat
);
9388 totalwritten
= size
;
9389 r
= (int)totalwritten
;
9392 if (totalwritten
+ offset
> in
->size
) {
9393 in
->size
= totalwritten
+ offset
;
9394 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9396 if (is_quota_bytes_approaching(in
, quota_roots
)) {
9397 check_caps(in
, CHECK_CAPS_NODELAY
);
9398 } else if (is_max_size_approaching(in
)) {
9402 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
9404 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
9408 in
->mtime
= in
->ctime
= ceph_clock_now();
9410 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9415 client_lock
.Unlock();
9416 uninline_flock
.Lock();
9417 while (!uninline_done
)
9418 uninline_cond
.Wait(uninline_flock
);
9419 uninline_flock
.Unlock();
9422 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
9423 in
->inline_data
.clear();
9424 in
->inline_version
= CEPH_INLINE_NONE
;
9425 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9431 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
9435 int Client::_flush(Fh
*f
)
9437 Inode
*in
= f
->inode
.get();
9438 int err
= f
->take_async_err();
9440 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
9441 << cpp_strerror(err
) << dendl
;
9443 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9449 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
9451 struct ceph_statx stx
;
9452 stx
.stx_size
= length
;
9453 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
9456 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
9458 Mutex::Locker
lock(client_lock
);
9459 tout(cct
) << "ftruncate" << std::endl
;
9460 tout(cct
) << fd
<< std::endl
;
9461 tout(cct
) << length
<< std::endl
;
9466 Fh
*f
= get_filehandle(fd
);
9469 #if defined(__linux__) && defined(O_PATH)
9470 if (f
->flags
& O_PATH
)
9474 attr
.st_size
= length
;
9475 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
9478 int Client::fsync(int fd
, bool syncdataonly
)
9480 Mutex::Locker
lock(client_lock
);
9481 tout(cct
) << "fsync" << std::endl
;
9482 tout(cct
) << fd
<< std::endl
;
9483 tout(cct
) << syncdataonly
<< std::endl
;
9488 Fh
*f
= get_filehandle(fd
);
9491 #if defined(__linux__) && defined(O_PATH)
9492 if (f
->flags
& O_PATH
)
9495 int r
= _fsync(f
, syncdataonly
);
9497 // The IOs in this fsync were okay, but maybe something happened
9498 // in the background that we shoudl be reporting?
9499 r
= f
->take_async_err();
9500 ldout(cct
, 5) << "fsync(" << fd
<< ", " << syncdataonly
9501 << ") = 0, async_err = " << r
<< dendl
;
9503 // Assume that an error we encountered during fsync, even reported
9504 // synchronously, would also have applied the error to the Fh, and we
9505 // should clear it here to avoid returning the same error again on next
9507 ldout(cct
, 5) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
9509 f
->take_async_err();
9514 int Client::_fsync(Inode
*in
, bool syncdataonly
)
9517 Mutex
lock("Client::_fsync::lock");
9520 C_SafeCond
*object_cacher_completion
= NULL
;
9521 ceph_tid_t flush_tid
= 0;
9524 ldout(cct
, 8) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
9526 if (cct
->_conf
->client_oc
) {
9527 object_cacher_completion
= new C_SafeCond(&lock
, &cond
, &done
, &r
);
9528 tmp_ref
= in
; // take a reference; C_SafeCond doesn't and _flush won't either
9529 _flush(in
, object_cacher_completion
);
9530 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
9533 if (!syncdataonly
&& in
->dirty_caps
) {
9534 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
9535 if (in
->flushing_caps
)
9536 flush_tid
= last_flush_tid
;
9537 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
9539 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
9542 MetaRequest
*req
= in
->unsafe_ops
.back();
9543 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
9546 wait_on_list(req
->waitfor_safe
);
9550 if (object_cacher_completion
) { // wait on a real reply instead of guessing
9551 client_lock
.Unlock();
9553 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9558 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
9560 // FIXME: this can starve
9561 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
9562 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
9563 << " uncommitted, waiting" << dendl
;
9564 wait_on_list(in
->waitfor_commit
);
9570 wait_sync_caps(in
, flush_tid
);
9572 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
9574 ldout(cct
, 8) << "ino " << in
->ino
<< " failed to commit to disk! "
9575 << cpp_strerror(-r
) << dendl
;
9581 int Client::_fsync(Fh
*f
, bool syncdataonly
)
9583 ldout(cct
, 8) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
9584 return _fsync(f
->inode
.get(), syncdataonly
);
9587 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
9589 Mutex::Locker
lock(client_lock
);
9590 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
9591 tout(cct
) << fd
<< std::endl
;
9596 Fh
*f
= get_filehandle(fd
);
9599 int r
= _getattr(f
->inode
, mask
, perms
);
9602 fill_stat(f
->inode
, stbuf
, NULL
);
9603 ldout(cct
, 5) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
9607 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
9608 unsigned int want
, unsigned int flags
)
9610 Mutex::Locker
lock(client_lock
);
9611 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
9612 tout(cct
) << fd
<< std::endl
;
9617 Fh
*f
= get_filehandle(fd
);
9621 unsigned mask
= statx_to_mask(flags
, want
);
9624 if (mask
&& !f
->inode
->caps_issued_mask(mask
, true)) {
9625 r
= _getattr(f
->inode
, mask
, perms
);
9627 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
9632 fill_statx(f
->inode
, mask
, stx
);
9633 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
9637 // not written yet, but i want to link!
9639 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
9640 const UserPerm
& perms
)
9642 Mutex::Locker
lock(client_lock
);
9643 tout(cct
) << "chdir" << std::endl
;
9644 tout(cct
) << relpath
<< std::endl
;
9649 filepath
path(relpath
);
9651 int r
= path_walk(path
, &in
, perms
);
9656 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
9658 _getcwd(new_cwd
, perms
);
9662 void Client::_getcwd(string
& dir
, const UserPerm
& perms
)
9665 ldout(cct
, 10) << "getcwd " << *cwd
<< dendl
;
9667 Inode
*in
= cwd
.get();
9668 while (in
!= root
) {
9669 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
9671 // A cwd or ancester is unlinked
9672 if (in
->dn_set
.empty()) {
9676 Dentry
*dn
= in
->get_first_parent();
9681 ldout(cct
, 10) << "getcwd looking up parent for " << *in
<< dendl
;
9682 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9683 filepath
path(in
->ino
);
9684 req
->set_filepath(path
);
9686 int res
= make_request(req
, perms
);
9695 path
.push_front_dentry(dn
->name
);
9696 in
= dn
->dir
->parent_inode
;
9699 dir
+= path
.get_path();
9702 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
9704 Mutex::Locker
l(client_lock
);
9706 _getcwd(dir
, perms
);
9709 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
9710 const UserPerm
& perms
)
9712 Mutex::Locker
l(client_lock
);
9713 tout(cct
) << "statfs" << std::endl
;
9714 unsigned long int total_files_on_fs
;
9722 const vector
<int64_t> &data_pools
= mdsmap
->get_data_pools();
9723 if (data_pools
.size() == 1) {
9724 objecter
->get_fs_stats(stats
, data_pools
[0], &cond
);
9726 objecter
->get_fs_stats(stats
, boost::optional
<int64_t>(), &cond
);
9729 client_lock
.Unlock();
9730 int rval
= cond
.wait();
9732 total_files_on_fs
= root
->rstat
.rfiles
+ root
->rstat
.rsubdirs
;
9736 ldout(cct
, 1) << "underlying call to statfs returned error: "
9737 << cpp_strerror(rval
)
9742 memset(stbuf
, 0, sizeof(*stbuf
));
9745 * we're going to set a block size of 4MB so we can represent larger
9746 * FSes without overflowing. Additionally convert the space
9747 * measurements from KB to bytes while making them in terms of
9748 * blocks. We use 4MB only because it is big enough, and because it
9749 * actually *is* the (ceph) default block size.
9751 const int CEPH_BLOCK_SHIFT
= 22;
9752 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
9753 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
9754 stbuf
->f_files
= total_files_on_fs
;
9756 stbuf
->f_favail
= -1;
9757 stbuf
->f_fsid
= -1; // ??
9758 stbuf
->f_flag
= 0; // ??
9759 stbuf
->f_namemax
= NAME_MAX
;
9761 // Usually quota_root will == root_ancestor, but if the mount root has no
9762 // quota but we can see a parent of it that does have a quota, we'll
9763 // respect that one instead.
9764 assert(root
!= nullptr);
9765 Inode
*quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
, perms
);
9767 // get_quota_root should always give us something
9768 // because client quotas are always enabled
9769 assert(quota_root
!= nullptr);
9771 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
9773 // Skip the getattr if any sessions are stale, as we don't want to
9774 // block `df` if this client has e.g. been evicted, or if the MDS cluster
9776 if (!_any_stale_sessions()) {
9777 int r
= _getattr(quota_root
, 0, perms
, true);
9779 // Ignore return value: error getting latest inode metadata is not a good
9780 // reason to break "df".
9781 lderr(cct
) << "Error in getattr on quota root 0x"
9782 << std::hex
<< quota_root
->ino
<< std::dec
9783 << " statfs result may be outdated" << dendl
;
9787 // Special case: if there is a size quota set on the Inode acting
9788 // as the root for this client mount, then report the quota status
9789 // as the filesystem statistics.
9790 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
9791 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
9792 // It is possible for a quota to be exceeded: arithmetic here must
9793 // handle case where used > total.
9794 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
9796 stbuf
->f_blocks
= total
;
9797 stbuf
->f_bfree
= free
;
9798 stbuf
->f_bavail
= free
;
9800 // General case: report the cluster statistics returned from RADOS. Because
9801 // multiple pools may be used without one filesystem namespace via
9802 // layouts, this is the most correct thing we can do.
9803 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
9804 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9805 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9811 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
9812 struct flock
*fl
, uint64_t owner
, bool removing
)
9814 ldout(cct
, 10) << "_do_filelock ino " << in
->ino
9815 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
9816 << " type " << fl
->l_type
<< " owner " << owner
9817 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
9820 if (F_RDLCK
== fl
->l_type
)
9821 lock_cmd
= CEPH_LOCK_SHARED
;
9822 else if (F_WRLCK
== fl
->l_type
)
9823 lock_cmd
= CEPH_LOCK_EXCL
;
9824 else if (F_UNLCK
== fl
->l_type
)
9825 lock_cmd
= CEPH_LOCK_UNLOCK
;
9829 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
9833 * Set the most significant bit, so that MDS knows the 'owner'
9834 * is sufficient to identify the owner of lock. (old code uses
9835 * both 'owner' and 'pid')
9837 owner
|= (1ULL << 63);
9839 MetaRequest
*req
= new MetaRequest(op
);
9841 in
->make_nosnap_relative_path(path
);
9842 req
->set_filepath(path
);
9845 req
->head
.args
.filelock_change
.rule
= lock_type
;
9846 req
->head
.args
.filelock_change
.type
= lock_cmd
;
9847 req
->head
.args
.filelock_change
.owner
= owner
;
9848 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
9849 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
9850 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
9851 req
->head
.args
.filelock_change
.wait
= sleep
;
9856 if (sleep
&& switch_interrupt_cb
) {
9858 switch_interrupt_cb(callback_handle
, req
->get());
9859 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9860 // disable interrupt
9861 switch_interrupt_cb(callback_handle
, NULL
);
9862 if (ret
== 0 && req
->aborted()) {
9863 // effect of this lock request has been revoked by the 'lock intr' request
9864 ret
= req
->get_abort_code();
9868 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9872 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
9873 ceph_filelock filelock
;
9874 bufferlist::iterator p
= bl
.begin();
9875 ::decode(filelock
, p
);
9877 if (CEPH_LOCK_SHARED
== filelock
.type
)
9878 fl
->l_type
= F_RDLCK
;
9879 else if (CEPH_LOCK_EXCL
== filelock
.type
)
9880 fl
->l_type
= F_WRLCK
;
9882 fl
->l_type
= F_UNLCK
;
9884 fl
->l_whence
= SEEK_SET
;
9885 fl
->l_start
= filelock
.start
;
9886 fl
->l_len
= filelock
.length
;
9887 fl
->l_pid
= filelock
.pid
;
9888 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
9889 ceph_lock_state_t
*lock_state
;
9890 if (lock_type
== CEPH_LOCK_FCNTL
) {
9891 if (!in
->fcntl_locks
)
9892 in
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9893 lock_state
= in
->fcntl_locks
;
9894 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
9895 if (!in
->flock_locks
)
9896 in
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9897 lock_state
= in
->flock_locks
;
9902 _update_lock_state(fl
, owner
, lock_state
);
9905 if (lock_type
== CEPH_LOCK_FCNTL
) {
9906 if (!fh
->fcntl_locks
)
9907 fh
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9908 lock_state
= fh
->fcntl_locks
;
9910 if (!fh
->flock_locks
)
9911 fh
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9912 lock_state
= fh
->flock_locks
;
9914 _update_lock_state(fl
, owner
, lock_state
);
9922 int Client::_interrupt_filelock(MetaRequest
*req
)
9924 // Set abort code, but do not kick. The abort code prevents the request
9925 // from being re-sent.
9928 return 0; // haven't sent the request
9930 Inode
*in
= req
->inode();
9933 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
9934 lock_type
= CEPH_LOCK_FLOCK_INTR
;
9935 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
9936 lock_type
= CEPH_LOCK_FCNTL_INTR
;
9942 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
9944 in
->make_nosnap_relative_path(path
);
9945 intr_req
->set_filepath(path
);
9946 intr_req
->set_inode(in
);
9947 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
9948 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
9949 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
9951 UserPerm
perms(req
->get_uid(), req
->get_gid());
9952 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
9955 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
9957 if (!in
->fcntl_locks
&& !in
->flock_locks
)
9960 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
9961 ::encode(nr_fcntl_locks
, bl
);
9962 if (nr_fcntl_locks
) {
9963 ceph_lock_state_t
* lock_state
= in
->fcntl_locks
;
9964 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9965 p
!= lock_state
->held_locks
.end();
9967 ::encode(p
->second
, bl
);
9970 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
9971 ::encode(nr_flock_locks
, bl
);
9972 if (nr_flock_locks
) {
9973 ceph_lock_state_t
* lock_state
= in
->flock_locks
;
9974 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9975 p
!= lock_state
->held_locks
.end();
9977 ::encode(p
->second
, bl
);
9980 ldout(cct
, 10) << "_encode_filelocks ino " << in
->ino
<< ", " << nr_fcntl_locks
9981 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
9984 void Client::_release_filelocks(Fh
*fh
)
9986 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
9989 Inode
*in
= fh
->inode
.get();
9990 ldout(cct
, 10) << "_release_filelocks " << fh
<< " ino " << in
->ino
<< dendl
;
9992 list
<pair
<int, ceph_filelock
> > to_release
;
9994 if (fh
->fcntl_locks
) {
9995 ceph_lock_state_t
* lock_state
= fh
->fcntl_locks
;
9996 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9997 p
!= lock_state
->held_locks
.end();
9999 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, p
->second
));
10000 delete fh
->fcntl_locks
;
10002 if (fh
->flock_locks
) {
10003 ceph_lock_state_t
* lock_state
= fh
->flock_locks
;
10004 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
10005 p
!= lock_state
->held_locks
.end();
10007 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, p
->second
));
10008 delete fh
->flock_locks
;
10011 if (to_release
.empty())
10015 memset(&fl
, 0, sizeof(fl
));
10016 fl
.l_whence
= SEEK_SET
;
10017 fl
.l_type
= F_UNLCK
;
10019 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
10020 p
!= to_release
.end();
10022 fl
.l_start
= p
->second
.start
;
10023 fl
.l_len
= p
->second
.length
;
10024 fl
.l_pid
= p
->second
.pid
;
10025 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
10026 p
->second
.owner
, true);
10030 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
10031 ceph_lock_state_t
*lock_state
)
10034 if (F_RDLCK
== fl
->l_type
)
10035 lock_cmd
= CEPH_LOCK_SHARED
;
10036 else if (F_WRLCK
== fl
->l_type
)
10037 lock_cmd
= CEPH_LOCK_EXCL
;
10039 lock_cmd
= CEPH_LOCK_UNLOCK
;;
10041 ceph_filelock filelock
;
10042 filelock
.start
= fl
->l_start
;
10043 filelock
.length
= fl
->l_len
;
10044 filelock
.client
= 0;
10045 // see comment in _do_filelock()
10046 filelock
.owner
= owner
| (1ULL << 63);
10047 filelock
.pid
= fl
->l_pid
;
10048 filelock
.type
= lock_cmd
;
10050 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
10051 list
<ceph_filelock
> activated_locks
;
10052 lock_state
->remove_lock(filelock
, activated_locks
);
10054 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
10059 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
10061 Inode
*in
= fh
->inode
.get();
10062 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
10063 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
10067 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
10069 Inode
*in
= fh
->inode
.get();
10070 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
10071 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
10072 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
10076 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
10078 Inode
*in
= fh
->inode
.get();
10079 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
10081 int sleep
= !(cmd
& LOCK_NB
);
10100 memset(&fl
, 0, sizeof(fl
));
10102 fl
.l_whence
= SEEK_SET
;
10104 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
10105 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
10109 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
10111 /* Since the only thing this does is wrap a call to statfs, and
10112 statfs takes a lock, it doesn't seem we have a need to split it
10114 return statfs(0, stbuf
, perms
);
10117 void Client::ll_register_callbacks(struct client_callback_args
*args
)
10121 Mutex::Locker
l(client_lock
);
10122 ldout(cct
, 10) << "ll_register_callbacks cb " << args
->handle
10123 << " invalidate_ino_cb " << args
->ino_cb
10124 << " invalidate_dentry_cb " << args
->dentry_cb
10125 << " switch_interrupt_cb " << args
->switch_intr_cb
10126 << " remount_cb " << args
->remount_cb
10128 callback_handle
= args
->handle
;
10129 if (args
->ino_cb
) {
10130 ino_invalidate_cb
= args
->ino_cb
;
10131 async_ino_invalidator
.start();
10133 if (args
->dentry_cb
) {
10134 dentry_invalidate_cb
= args
->dentry_cb
;
10135 async_dentry_invalidator
.start();
10137 if (args
->switch_intr_cb
) {
10138 switch_interrupt_cb
= args
->switch_intr_cb
;
10139 interrupt_finisher
.start();
10141 if (args
->remount_cb
) {
10142 remount_cb
= args
->remount_cb
;
10143 remount_finisher
.start();
10145 umask_cb
= args
->umask_cb
;
10148 int Client::test_dentry_handling(bool can_invalidate
)
10152 can_invalidate_dentries
= can_invalidate
;
10154 if (can_invalidate_dentries
) {
10155 assert(dentry_invalidate_cb
);
10156 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
10158 } else if (remount_cb
) {
10159 ldout(cct
, 1) << "using remount_cb" << dendl
;
10160 r
= _do_remount(false);
10163 bool should_abort
= cct
->_conf
->get_val
<bool>("client_die_on_failed_dentry_invalidate");
10164 if (should_abort
) {
10165 lderr(cct
) << "no method to invalidate kernel dentry cache; quitting!" << dendl
;
10168 lderr(cct
) << "no method to invalidate kernel dentry cache; expect issues!" << dendl
;
10174 int Client::_sync_fs()
10176 ldout(cct
, 10) << "_sync_fs" << dendl
;
10179 Mutex
lock("Client::_fsync::lock");
10181 bool flush_done
= false;
10182 if (cct
->_conf
->client_oc
)
10183 objectcacher
->flush_all(new C_SafeCond(&lock
, &cond
, &flush_done
));
10189 ceph_tid_t flush_tid
= last_flush_tid
;
10191 // wait for unsafe mds requests
10192 wait_unsafe_requests();
10194 wait_sync_caps(flush_tid
);
10197 client_lock
.Unlock();
10199 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
10200 while (!flush_done
)
10203 client_lock
.Lock();
10209 int Client::sync_fs()
10211 Mutex::Locker
l(client_lock
);
10219 int64_t Client::drop_caches()
10221 Mutex::Locker
l(client_lock
);
10222 return objectcacher
->release_all();
10226 int Client::lazyio_propogate(int fd
, loff_t offset
, size_t count
)
10228 Mutex::Locker
l(client_lock
);
10229 ldout(cct
, 3) << "op: client->lazyio_propogate(" << fd
10230 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10232 Fh
*f
= get_filehandle(fd
);
10242 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
10244 Mutex::Locker
l(client_lock
);
10245 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
10246 << ", " << offset
<< ", " << count
<< ")" << dendl
;
10248 Fh
*f
= get_filehandle(fd
);
10251 Inode
*in
= f
->inode
.get();
10260 // =============================
10263 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
)
10265 Mutex::Locker
l(client_lock
);
10270 filepath
path(relpath
);
10272 int r
= path_walk(path
, &in
, perm
);
10275 if (cct
->_conf
->client_permissions
) {
10276 r
= may_create(in
.get(), perm
);
10280 Inode
*snapdir
= open_snapdir(in
.get());
10281 return _mkdir(snapdir
, name
, 0, perm
);
10284 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
)
10286 Mutex::Locker
l(client_lock
);
10291 filepath
path(relpath
);
10293 int r
= path_walk(path
, &in
, perms
);
10296 if (cct
->_conf
->client_permissions
) {
10297 r
= may_delete(in
.get(), NULL
, perms
);
10301 Inode
*snapdir
= open_snapdir(in
.get());
10302 return _rmdir(snapdir
, name
, perms
);
10305 // =============================
10308 int Client::get_caps_issued(int fd
) {
10310 Mutex::Locker
lock(client_lock
);
10315 Fh
*f
= get_filehandle(fd
);
10319 return f
->inode
->caps_issued();
10322 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
10324 Mutex::Locker
lock(client_lock
);
10331 int r
= path_walk(p
, &in
, perms
, true);
10334 return in
->caps_issued();
10337 // =========================================
10340 Inode
*Client::open_snapdir(Inode
*diri
)
10343 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
10344 if (!inode_map
.count(vino
)) {
10345 in
= new Inode(this, vino
, &diri
->layout
);
10347 in
->ino
= diri
->ino
;
10348 in
->snapid
= CEPH_SNAPDIR
;
10349 in
->mode
= diri
->mode
;
10350 in
->uid
= diri
->uid
;
10351 in
->gid
= diri
->gid
;
10352 in
->mtime
= diri
->mtime
;
10353 in
->ctime
= diri
->ctime
;
10354 in
->btime
= diri
->btime
;
10355 in
->size
= diri
->size
;
10356 in
->change_attr
= diri
->change_attr
;
10358 in
->dirfragtree
.clear();
10359 in
->snapdir_parent
= diri
;
10360 diri
->flags
|= I_SNAPDIR_OPEN
;
10361 inode_map
[vino
] = in
;
10362 if (use_faked_inos())
10363 _assign_faked_ino(in
);
10364 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
10366 in
= inode_map
[vino
];
10367 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
10372 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
10373 Inode
**out
, const UserPerm
& perms
)
10375 Mutex::Locker
lock(client_lock
);
10376 vinodeno_t vparent
= _get_vino(parent
);
10377 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
<< dendl
;
10378 tout(cct
) << "ll_lookup" << std::endl
;
10379 tout(cct
) << name
<< std::endl
;
10385 if (!cct
->_conf
->fuse_default_permissions
) {
10386 r
= may_lookup(parent
, perms
);
10391 string
dname(name
);
10394 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
10401 fill_stat(in
, attr
);
10405 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
10406 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
10407 tout(cct
) << attr
->st_ino
<< std::endl
;
10412 int Client::ll_lookup_inode(
10413 struct inodeno_t ino
,
10414 const UserPerm
& perms
,
10417 Mutex::Locker
lock(client_lock
);
10418 ldout(cct
, 3) << "ll_lookup_inode " << ino
<< dendl
;
10420 // Num1: get inode and *inode
10421 int r
= _lookup_ino(ino
, perms
, inode
);
10425 assert(inode
!= NULL
);
10426 assert(*inode
!= NULL
);
10428 // Num2: Request the parent inode, so that we can look up the name
10430 r
= _lookup_parent(*inode
, perms
, &parent
);
10431 if (r
&& r
!= -EINVAL
) {
10432 // Unexpected error
10433 _ll_forget(*inode
, 1);
10435 } else if (r
== -EINVAL
) {
10436 // EINVAL indicates node without parents (root), drop out now
10437 // and don't try to look up the non-existent dentry.
10440 // FIXME: I don't think this works; lookup_parent() returns 0 if the parent
10441 // is already in cache
10442 assert(parent
!= NULL
);
10444 // Num3: Finally, get the name (dentry) of the requested inode
10445 r
= _lookup_name(*inode
, parent
, perms
);
10447 // Unexpected error
10448 _ll_forget(parent
, 1);
10449 _ll_forget(*inode
, 1);
10453 _ll_forget(parent
, 1);
10457 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
10458 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
10459 const UserPerm
& perms
)
10461 Mutex::Locker
lock(client_lock
);
10462 vinodeno_t vparent
= _get_vino(parent
);
10463 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
<< dendl
;
10464 tout(cct
) << "ll_lookupx" << std::endl
;
10465 tout(cct
) << name
<< std::endl
;
10471 if (!cct
->_conf
->fuse_default_permissions
) {
10472 r
= may_lookup(parent
, perms
);
10477 string
dname(name
);
10480 unsigned mask
= statx_to_mask(flags
, want
);
10481 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
10487 fill_statx(in
, mask
, stx
);
10491 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
10492 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
10493 tout(cct
) << stx
->stx_ino
<< std::endl
;
10498 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
10499 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
10501 Mutex::Locker
lock(client_lock
);
10506 filepath
fp(name
, 0);
10509 unsigned mask
= statx_to_mask(flags
, want
);
10511 ldout(cct
, 3) << "ll_walk" << name
<< dendl
;
10512 tout(cct
) << "ll_walk" << std::endl
;
10513 tout(cct
) << name
<< std::endl
;
10515 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
10517 /* zero out mask, just in case... */
10524 fill_statx(in
, mask
, stx
);
10531 void Client::_ll_get(Inode
*in
)
10533 if (in
->ll_ref
== 0) {
10535 if (in
->is_dir() && !in
->dn_set
.empty()) {
10536 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10537 in
->get_first_parent()->get(); // pin dentry
10541 ldout(cct
, 20) << "_ll_get " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
10544 int Client::_ll_put(Inode
*in
, int num
)
10547 ldout(cct
, 20) << "_ll_put " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
10548 if (in
->ll_ref
== 0) {
10549 if (in
->is_dir() && !in
->dn_set
.empty()) {
10550 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10551 in
->get_first_parent()->put(); // unpin dentry
10560 void Client::_ll_drop_pins()
10562 ldout(cct
, 10) << "_ll_drop_pins" << dendl
;
10563 std::set
<InodeRef
> to_be_put
; //this set will be deconstructed item by item when exit
10564 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
10565 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
10566 it
!= inode_map
.end();
10568 Inode
*in
= it
->second
;
10572 to_be_put
.insert(in
);
10573 _ll_put(in
, in
->ll_ref
);
10578 bool Client::_ll_forget(Inode
*in
, int count
)
10580 inodeno_t ino
= _get_inodeno(in
);
10582 ldout(cct
, 8) << "ll_forget " << ino
<< " " << count
<< dendl
;
10583 tout(cct
) << "ll_forget" << std::endl
;
10584 tout(cct
) << ino
.val
<< std::endl
;
10585 tout(cct
) << count
<< std::endl
;
10587 // Ignore forget if we're no longer mounted
10591 if (ino
== 1) return true; // ignore forget on root.
10594 if (in
->ll_ref
< count
) {
10595 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
10596 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
10597 _ll_put(in
, in
->ll_ref
);
10600 if (_ll_put(in
, count
) == 0)
10607 bool Client::ll_forget(Inode
*in
, int count
)
10609 Mutex::Locker
lock(client_lock
);
10610 return _ll_forget(in
, count
);
10613 bool Client::ll_put(Inode
*in
)
10615 /* ll_forget already takes the lock */
10616 return ll_forget(in
, 1);
10619 snapid_t
Client::ll_get_snapid(Inode
*in
)
10621 Mutex::Locker
lock(client_lock
);
10625 Inode
*Client::ll_get_inode(ino_t ino
)
10627 Mutex::Locker
lock(client_lock
);
10632 vinodeno_t vino
= _map_faked_ino(ino
);
10633 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10634 if (p
== inode_map
.end())
10636 Inode
*in
= p
->second
;
10641 Inode
*Client::ll_get_inode(vinodeno_t vino
)
10643 Mutex::Locker
lock(client_lock
);
10648 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10649 if (p
== inode_map
.end())
10651 Inode
*in
= p
->second
;
10656 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
10658 vinodeno_t vino
= _get_vino(in
);
10660 ldout(cct
, 8) << "ll_getattr " << vino
<< dendl
;
10661 tout(cct
) << "ll_getattr" << std::endl
;
10662 tout(cct
) << vino
.ino
.val
<< std::endl
;
10664 if (vino
.snapid
< CEPH_NOSNAP
)
10667 return _getattr(in
, caps
, perms
);
10670 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
10672 Mutex::Locker
lock(client_lock
);
10677 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
10680 fill_stat(in
, attr
);
10681 ldout(cct
, 3) << "ll_getattr " << _get_vino(in
) << " = " << res
<< dendl
;
10685 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
10686 unsigned int flags
, const UserPerm
& perms
)
10688 Mutex::Locker
lock(client_lock
);
10694 unsigned mask
= statx_to_mask(flags
, want
);
10696 if (mask
&& !in
->caps_issued_mask(mask
, true))
10697 res
= _ll_getattr(in
, mask
, perms
);
10700 fill_statx(in
, mask
, stx
);
10701 ldout(cct
, 3) << "ll_getattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10705 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10706 const UserPerm
& perms
, InodeRef
*inp
)
10708 vinodeno_t vino
= _get_vino(in
);
10710 ldout(cct
, 8) << "ll_setattrx " << vino
<< " mask " << hex
<< mask
<< dec
10712 tout(cct
) << "ll_setattrx" << std::endl
;
10713 tout(cct
) << vino
.ino
.val
<< std::endl
;
10714 tout(cct
) << stx
->stx_mode
<< std::endl
;
10715 tout(cct
) << stx
->stx_uid
<< std::endl
;
10716 tout(cct
) << stx
->stx_gid
<< std::endl
;
10717 tout(cct
) << stx
->stx_size
<< std::endl
;
10718 tout(cct
) << stx
->stx_mtime
<< std::endl
;
10719 tout(cct
) << stx
->stx_atime
<< std::endl
;
10720 tout(cct
) << stx
->stx_btime
<< std::endl
;
10721 tout(cct
) << mask
<< std::endl
;
10723 if (!cct
->_conf
->fuse_default_permissions
) {
10724 int res
= may_setattr(in
, stx
, mask
, perms
);
10729 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
10731 return __setattrx(in
, stx
, mask
, perms
, inp
);
10734 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10735 const UserPerm
& perms
)
10737 Mutex::Locker
lock(client_lock
);
10742 InodeRef
target(in
);
10743 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
10745 assert(in
== target
.get());
10746 fill_statx(in
, in
->caps_issued(), stx
);
10749 ldout(cct
, 3) << "ll_setattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10753 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
10754 const UserPerm
& perms
)
10756 struct ceph_statx stx
;
10757 stat_to_statx(attr
, &stx
);
10759 Mutex::Locker
lock(client_lock
);
10764 InodeRef
target(in
);
10765 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
10767 assert(in
== target
.get());
10768 fill_stat(in
, attr
);
10771 ldout(cct
, 3) << "ll_setattr " << _get_vino(in
) << " = " << res
<< dendl
;
10779 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
10780 const UserPerm
& perms
)
10782 Mutex::Locker
lock(client_lock
);
10788 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10791 return _getxattr(in
, name
, value
, size
, perms
);
10794 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
10795 const UserPerm
& perms
)
10797 Mutex::Locker
lock(client_lock
);
10803 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10806 return _getxattr(in
, name
, value
, size
, perms
);
10809 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
10810 const UserPerm
& perms
)
10812 Mutex::Locker
lock(client_lock
);
10817 Fh
*f
= get_filehandle(fd
);
10820 return _getxattr(f
->inode
, name
, value
, size
, perms
);
10823 int Client::listxattr(const char *path
, char *list
, size_t size
,
10824 const UserPerm
& perms
)
10826 Mutex::Locker
lock(client_lock
);
10832 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10835 return Client::_listxattr(in
.get(), list
, size
, perms
);
10838 int Client::llistxattr(const char *path
, char *list
, size_t size
,
10839 const UserPerm
& perms
)
10841 Mutex::Locker
lock(client_lock
);
10847 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10850 return Client::_listxattr(in
.get(), list
, size
, perms
);
10853 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
10855 Mutex::Locker
lock(client_lock
);
10860 Fh
*f
= get_filehandle(fd
);
10863 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
10866 int Client::removexattr(const char *path
, const char *name
,
10867 const UserPerm
& perms
)
10869 Mutex::Locker
lock(client_lock
);
10875 int r
= Client::path_walk(path
, &in
, perms
, true);
10878 return _removexattr(in
, name
, perms
);
10881 int Client::lremovexattr(const char *path
, const char *name
,
10882 const UserPerm
& perms
)
10884 Mutex::Locker
lock(client_lock
);
10890 int r
= Client::path_walk(path
, &in
, perms
, false);
10893 return _removexattr(in
, name
, perms
);
10896 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
10898 Mutex::Locker
lock(client_lock
);
10903 Fh
*f
= get_filehandle(fd
);
10906 return _removexattr(f
->inode
, name
, perms
);
10909 int Client::setxattr(const char *path
, const char *name
, const void *value
,
10910 size_t size
, int flags
, const UserPerm
& perms
)
10912 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10914 Mutex::Locker
lock(client_lock
);
10920 int r
= Client::path_walk(path
, &in
, perms
, true);
10923 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10926 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
10927 size_t size
, int flags
, const UserPerm
& perms
)
10929 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10931 Mutex::Locker
lock(client_lock
);
10937 int r
= Client::path_walk(path
, &in
, perms
, false);
10940 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10943 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
10944 int flags
, const UserPerm
& perms
)
10946 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10948 Mutex::Locker
lock(client_lock
);
10953 Fh
*f
= get_filehandle(fd
);
10956 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
10959 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
10960 const UserPerm
& perms
)
10964 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10968 // Do a force getattr to get the latest quota before returning
10969 // a value to userspace.
10971 if (vxattr
->flags
& VXATTR_RSTAT
) {
10972 flags
|= CEPH_STAT_RSTAT
;
10974 r
= _getattr(in
, flags
, perms
, true);
10976 // Error from getattr!
10980 // call pointer-to-member function
10982 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
10983 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
10989 if (r
> (int)size
) {
10991 } else if (r
> 0) {
10992 memcpy(value
, buf
, r
);
10998 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
11003 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
11007 if (in
->xattrs
.count(n
)) {
11008 r
= in
->xattrs
[n
].length();
11009 if (r
> 0 && size
!= 0) {
11010 if (size
>= (unsigned)r
)
11011 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
11018 ldout(cct
, 8) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
11022 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
11023 const UserPerm
& perms
)
11025 if (cct
->_conf
->client_permissions
) {
11026 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
11030 return _getxattr(in
.get(), name
, value
, size
, perms
);
11033 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
11034 size_t size
, const UserPerm
& perms
)
11036 Mutex::Locker
lock(client_lock
);
11041 vinodeno_t vino
= _get_vino(in
);
11043 ldout(cct
, 3) << "ll_getxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
11044 tout(cct
) << "ll_getxattr" << std::endl
;
11045 tout(cct
) << vino
.ino
.val
<< std::endl
;
11046 tout(cct
) << name
<< std::endl
;
11048 if (!cct
->_conf
->fuse_default_permissions
) {
11049 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
11054 return _getxattr(in
, name
, value
, size
, perms
);
11057 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
11058 const UserPerm
& perms
)
11060 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
11062 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
11063 p
!= in
->xattrs
.end();
11065 r
+= p
->first
.length() + 1;
11067 const VXattr
*vxattrs
= _get_vxattrs(in
);
11068 r
+= _vxattrs_name_size(vxattrs
);
11071 if (size
>= (unsigned)r
) {
11072 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
11073 p
!= in
->xattrs
.end();
11075 memcpy(name
, p
->first
.c_str(), p
->first
.length());
11076 name
+= p
->first
.length();
11081 for (int i
= 0; !vxattrs
[i
].name
.empty(); i
++) {
11082 const VXattr
& vxattr
= vxattrs
[i
];
11085 // call pointer-to-member function
11086 if(vxattr
.exists_cb
&& !(this->*(vxattr
.exists_cb
))(in
))
11088 memcpy(name
, vxattr
.name
.c_str(), vxattr
.name
.length());
11089 name
+= vxattr
.name
.length();
11098 ldout(cct
, 8) << "_listxattr(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
11102 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
11103 const UserPerm
& perms
)
11105 Mutex::Locker
lock(client_lock
);
11110 vinodeno_t vino
= _get_vino(in
);
11112 ldout(cct
, 3) << "ll_listxattr " << vino
<< " size " << size
<< dendl
;
11113 tout(cct
) << "ll_listxattr" << std::endl
;
11114 tout(cct
) << vino
.ino
.val
<< std::endl
;
11115 tout(cct
) << size
<< std::endl
;
11117 return _listxattr(in
, names
, size
, perms
);
11120 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
11121 size_t size
, int flags
, const UserPerm
& perms
)
11124 int xattr_flags
= 0;
11126 xattr_flags
|= CEPH_XATTR_REMOVE
;
11127 if (flags
& XATTR_CREATE
)
11128 xattr_flags
|= CEPH_XATTR_CREATE
;
11129 if (flags
& XATTR_REPLACE
)
11130 xattr_flags
|= CEPH_XATTR_REPLACE
;
11132 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
11134 in
->make_nosnap_relative_path(path
);
11135 req
->set_filepath(path
);
11136 req
->set_string2(name
);
11137 req
->set_inode(in
);
11138 req
->head
.args
.setxattr
.flags
= xattr_flags
;
11141 bl
.append((const char*)value
, size
);
11144 int res
= make_request(req
, perms
);
11147 ldout(cct
, 3) << "_setxattr(" << in
->ino
<< ", \"" << name
<< "\") = " <<
11152 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
11153 size_t size
, int flags
, const UserPerm
& perms
)
11155 if (in
->snapid
!= CEPH_NOSNAP
) {
11159 bool posix_acl_xattr
= false;
11160 if (acl_type
== POSIX_ACL
)
11161 posix_acl_xattr
= !strncmp(name
, "system.", 7);
11163 if (strncmp(name
, "user.", 5) &&
11164 strncmp(name
, "security.", 9) &&
11165 strncmp(name
, "trusted.", 8) &&
11166 strncmp(name
, "ceph.", 5) &&
11168 return -EOPNOTSUPP
;
11170 if (posix_acl_xattr
) {
11171 if (!strcmp(name
, ACL_EA_ACCESS
)) {
11172 mode_t new_mode
= in
->mode
;
11174 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
11181 if (new_mode
!= in
->mode
) {
11182 struct ceph_statx stx
;
11183 stx
.stx_mode
= new_mode
;
11184 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
11189 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
11191 if (!S_ISDIR(in
->mode
))
11193 int ret
= posix_acl_check(value
, size
);
11202 return -EOPNOTSUPP
;
11205 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11206 if (vxattr
&& vxattr
->readonly
)
11207 return -EOPNOTSUPP
;
11210 return _do_setxattr(in
, name
, value
, size
, flags
, perms
);
11213 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
11214 size_t size
, int flags
, const UserPerm
& perms
)
11216 if (cct
->_conf
->client_permissions
) {
11217 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11221 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
11224 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
11227 if (name
== "layout") {
11228 string::iterator begin
= value
.begin();
11229 string::iterator end
= value
.end();
11230 keys_and_values
<string::iterator
> p
; // create instance of parser
11231 std::map
<string
, string
> m
; // map to receive results
11232 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
11237 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
11238 if (q
->first
== "pool") {
11243 } else if (name
== "layout.pool") {
11247 if (tmp
.length()) {
11250 pool
= boost::lexical_cast
<unsigned>(tmp
);
11251 if (!osdmap
->have_pg_pool(pool
))
11253 } catch (boost::bad_lexical_cast
const&) {
11254 pool
= osdmap
->lookup_pg_pool_name(tmp
);
11264 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
11266 // For setting pool of layout, MetaRequest need osdmap epoch.
11267 // There is a race which create a new data pool but client and mds both don't have.
11268 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
11269 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
11270 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
11271 string
rest(strstr(name
, "layout"));
11272 string
v((const char*)value
, size
);
11273 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
11274 return _setxattr_check_data_pool(rest
, v
, &o
);
11277 if (r
== -ENOENT
) {
11279 objecter
->wait_for_latest_osdmap(&ctx
);
11285 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
11286 size_t size
, int flags
, const UserPerm
& perms
)
11288 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
11290 Mutex::Locker
lock(client_lock
);
11295 vinodeno_t vino
= _get_vino(in
);
11297 ldout(cct
, 3) << "ll_setxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
11298 tout(cct
) << "ll_setxattr" << std::endl
;
11299 tout(cct
) << vino
.ino
.val
<< std::endl
;
11300 tout(cct
) << name
<< std::endl
;
11302 if (!cct
->_conf
->fuse_default_permissions
) {
11303 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11307 return _setxattr(in
, name
, value
, size
, flags
, perms
);
11310 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11312 if (in
->snapid
!= CEPH_NOSNAP
) {
11316 // same xattrs supported by kernel client
11317 if (strncmp(name
, "user.", 5) &&
11318 strncmp(name
, "system.", 7) &&
11319 strncmp(name
, "security.", 9) &&
11320 strncmp(name
, "trusted.", 8) &&
11321 strncmp(name
, "ceph.", 5))
11322 return -EOPNOTSUPP
;
11324 const VXattr
*vxattr
= _match_vxattr(in
, name
);
11325 if (vxattr
&& vxattr
->readonly
)
11326 return -EOPNOTSUPP
;
11328 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
11330 in
->make_nosnap_relative_path(path
);
11331 req
->set_filepath(path
);
11332 req
->set_filepath2(name
);
11333 req
->set_inode(in
);
11335 int res
= make_request(req
, perms
);
11338 ldout(cct
, 8) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
11342 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
11344 if (cct
->_conf
->client_permissions
) {
11345 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
11349 return _removexattr(in
.get(), name
, perms
);
11352 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
11354 Mutex::Locker
lock(client_lock
);
11359 vinodeno_t vino
= _get_vino(in
);
11361 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
11362 tout(cct
) << "ll_removexattr" << std::endl
;
11363 tout(cct
) << vino
.ino
.val
<< std::endl
;
11364 tout(cct
) << name
<< std::endl
;
11366 if (!cct
->_conf
->fuse_default_permissions
) {
11367 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
11372 return _removexattr(in
, name
, perms
);
11375 bool Client::_vxattrcb_quota_exists(Inode
*in
)
11377 return in
->quota
.is_enable();
11379 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
11381 return snprintf(val
, size
,
11382 "max_bytes=%lld max_files=%lld",
11383 (long long int)in
->quota
.max_bytes
,
11384 (long long int)in
->quota
.max_files
);
11386 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
11388 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
11390 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
11392 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
11395 bool Client::_vxattrcb_layout_exists(Inode
*in
)
11397 return in
->layout
!= file_layout_t();
11399 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
11401 int r
= snprintf(val
, size
,
11402 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
11403 (unsigned long long)in
->layout
.stripe_unit
,
11404 (unsigned long long)in
->layout
.stripe_count
,
11405 (unsigned long long)in
->layout
.object_size
);
11406 objecter
->with_osdmap([&](const OSDMap
& o
) {
11407 if (o
.have_pg_pool(in
->layout
.pool_id
))
11408 r
+= snprintf(val
+ r
, size
- r
, "%s",
11409 o
.get_pool_name(in
->layout
.pool_id
).c_str());
11411 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
11412 (uint64_t)in
->layout
.pool_id
);
11414 if (in
->layout
.pool_ns
.length())
11415 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
11416 in
->layout
.pool_ns
.c_str());
11419 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
11421 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_unit
);
11423 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
11425 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_count
);
11427 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
11429 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.object_size
);
11431 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
11434 objecter
->with_osdmap([&](const OSDMap
& o
) {
11435 if (o
.have_pg_pool(in
->layout
.pool_id
))
11436 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
11437 in
->layout
.pool_id
).c_str());
11439 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
11443 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
11445 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
11447 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
11449 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
11451 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
11453 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nfiles
);
11455 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
11457 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nsubdirs
);
11459 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
11461 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
11463 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
11465 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rfiles
);
11467 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
11469 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rsubdirs
);
11471 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
11473 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rbytes
);
11475 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
11477 return snprintf(val
, size
, "%ld.09%ld", (long)in
->rstat
.rctime
.sec(),
11478 (long)in
->rstat
.rctime
.nsec());
11481 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
11482 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
11484 #define XATTR_NAME_CEPH(_type, _name) \
11486 name: CEPH_XATTR_NAME(_type, _name), \
11487 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11493 #define XATTR_NAME_CEPH2(_type, _name, _flags) \
11495 name: CEPH_XATTR_NAME(_type, _name), \
11496 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11502 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
11504 name: CEPH_XATTR_NAME2(_type, _name, _field), \
11505 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
11508 exists_cb: &Client::_vxattrcb_layout_exists, \
11511 #define XATTR_QUOTA_FIELD(_type, _name) \
11513 name: CEPH_XATTR_NAME(_type, _name), \
11514 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11517 exists_cb: &Client::_vxattrcb_quota_exists, \
11521 const Client::VXattr
Client::_dir_vxattrs
[] = {
11523 name
: "ceph.dir.layout",
11524 getxattr_cb
: &Client::_vxattrcb_layout
,
11527 exists_cb
: &Client::_vxattrcb_layout_exists
,
11530 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
11531 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
11532 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
11533 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
11534 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
11535 XATTR_NAME_CEPH(dir
, entries
),
11536 XATTR_NAME_CEPH(dir
, files
),
11537 XATTR_NAME_CEPH(dir
, subdirs
),
11538 XATTR_NAME_CEPH2(dir
, rentries
, VXATTR_RSTAT
),
11539 XATTR_NAME_CEPH2(dir
, rfiles
, VXATTR_RSTAT
),
11540 XATTR_NAME_CEPH2(dir
, rsubdirs
, VXATTR_RSTAT
),
11541 XATTR_NAME_CEPH2(dir
, rbytes
, VXATTR_RSTAT
),
11542 XATTR_NAME_CEPH2(dir
, rctime
, VXATTR_RSTAT
),
11544 name
: "ceph.quota",
11545 getxattr_cb
: &Client::_vxattrcb_quota
,
11548 exists_cb
: &Client::_vxattrcb_quota_exists
,
11551 XATTR_QUOTA_FIELD(quota
, max_bytes
),
11552 XATTR_QUOTA_FIELD(quota
, max_files
),
11553 { name
: "" } /* Required table terminator */
11556 const Client::VXattr
Client::_file_vxattrs
[] = {
11558 name
: "ceph.file.layout",
11559 getxattr_cb
: &Client::_vxattrcb_layout
,
11562 exists_cb
: &Client::_vxattrcb_layout_exists
,
11565 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
11566 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
11567 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
11568 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
11569 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
11570 { name
: "" } /* Required table terminator */
11573 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
11576 return _dir_vxattrs
;
11577 else if (in
->is_file())
11578 return _file_vxattrs
;
11582 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
11584 if (strncmp(name
, "ceph.", 5) == 0) {
11585 const VXattr
*vxattr
= _get_vxattrs(in
);
11587 while (!vxattr
->name
.empty()) {
11588 if (vxattr
->name
== name
)
11597 size_t Client::_vxattrs_calcu_name_size(const VXattr
*vxattr
)
11600 while (!vxattr
->name
.empty()) {
11601 if (!vxattr
->hidden
)
11602 len
+= vxattr
->name
.length() + 1;
11608 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
11610 Mutex::Locker
lock(client_lock
);
11615 vinodeno_t vino
= _get_vino(in
);
11617 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
11618 tout(cct
) << "ll_readlink" << std::endl
;
11619 tout(cct
) << vino
.ino
.val
<< std::endl
;
11621 set
<Dentry
*>::iterator dn
= in
->dn_set
.begin();
11622 while (dn
!= in
->dn_set
.end()) {
11627 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
11628 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
11632 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
11633 const UserPerm
& perms
, InodeRef
*inp
)
11635 ldout(cct
, 8) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
11636 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
11637 << ", gid " << perms
.gid() << ")" << dendl
;
11639 if (strlen(name
) > NAME_MAX
)
11640 return -ENAMETOOLONG
;
11642 if (dir
->snapid
!= CEPH_NOSNAP
) {
11645 if (is_quota_files_exceeded(dir
, perms
)) {
11649 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
11652 dir
->make_nosnap_relative_path(path
);
11653 path
.push_dentry(name
);
11654 req
->set_filepath(path
);
11655 req
->set_inode(dir
);
11656 req
->head
.args
.mknod
.rdev
= rdev
;
11657 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11658 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11660 bufferlist xattrs_bl
;
11661 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11664 req
->head
.args
.mknod
.mode
= mode
;
11665 if (xattrs_bl
.length() > 0)
11666 req
->set_data(xattrs_bl
);
11669 res
= get_or_create(dir
, name
, &de
);
11672 req
->set_dentry(de
);
11674 res
= make_request(req
, perms
, inp
);
11678 ldout(cct
, 8) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11686 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
11687 dev_t rdev
, struct stat
*attr
, Inode
**out
,
11688 const UserPerm
& perms
)
11690 Mutex::Locker
lock(client_lock
);
11695 vinodeno_t vparent
= _get_vino(parent
);
11697 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
11698 tout(cct
) << "ll_mknod" << std::endl
;
11699 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11700 tout(cct
) << name
<< std::endl
;
11701 tout(cct
) << mode
<< std::endl
;
11702 tout(cct
) << rdev
<< std::endl
;
11704 if (!cct
->_conf
->fuse_default_permissions
) {
11705 int r
= may_create(parent
, perms
);
11711 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11713 fill_stat(in
, attr
);
11716 tout(cct
) << attr
->st_ino
<< std::endl
;
11717 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
11718 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11723 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
11724 dev_t rdev
, Inode
**out
,
11725 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11726 const UserPerm
& perms
)
11728 unsigned caps
= statx_to_mask(flags
, want
);
11729 Mutex::Locker
lock(client_lock
);
11734 vinodeno_t vparent
= _get_vino(parent
);
11736 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
11737 tout(cct
) << "ll_mknodx" << std::endl
;
11738 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11739 tout(cct
) << name
<< std::endl
;
11740 tout(cct
) << mode
<< std::endl
;
11741 tout(cct
) << rdev
<< std::endl
;
11743 if (!cct
->_conf
->fuse_default_permissions
) {
11744 int r
= may_create(parent
, perms
);
11750 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11752 fill_statx(in
, caps
, stx
);
11755 tout(cct
) << stx
->stx_ino
<< std::endl
;
11756 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
11757 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11762 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
11763 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
11764 int object_size
, const char *data_pool
, bool *created
,
11765 const UserPerm
& perms
)
11767 ldout(cct
, 8) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
11768 mode
<< dec
<< ")" << dendl
;
11770 if (strlen(name
) > NAME_MAX
)
11771 return -ENAMETOOLONG
;
11772 if (dir
->snapid
!= CEPH_NOSNAP
) {
11775 if (is_quota_files_exceeded(dir
, perms
)) {
11779 // use normalized flags to generate cmode
11780 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
11784 int64_t pool_id
= -1;
11785 if (data_pool
&& *data_pool
) {
11786 pool_id
= objecter
->with_osdmap(
11787 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
11790 if (pool_id
> 0xffffffffll
)
11791 return -ERANGE
; // bummer!
11794 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
11797 dir
->make_nosnap_relative_path(path
);
11798 path
.push_dentry(name
);
11799 req
->set_filepath(path
);
11800 req
->set_inode(dir
);
11801 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
| O_CREAT
);
11803 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
11804 req
->head
.args
.open
.stripe_count
= stripe_count
;
11805 req
->head
.args
.open
.object_size
= object_size
;
11806 if (cct
->_conf
->client_debug_getattr_caps
)
11807 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
11809 req
->head
.args
.open
.mask
= 0;
11810 req
->head
.args
.open
.pool
= pool_id
;
11811 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11812 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11815 bufferlist xattrs_bl
;
11816 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11819 req
->head
.args
.open
.mode
= mode
;
11820 if (xattrs_bl
.length() > 0)
11821 req
->set_data(xattrs_bl
);
11824 res
= get_or_create(dir
, name
, &de
);
11827 req
->set_dentry(de
);
11829 res
= make_request(req
, perms
, inp
, created
);
11834 /* If the caller passed a value in fhp, do the open */
11836 (*inp
)->get_open_ref(cmode
);
11837 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
11843 ldout(cct
, 8) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
11844 << " layout " << stripe_unit
11845 << ' ' << stripe_count
11846 << ' ' << object_size
11847 <<") = " << res
<< dendl
;
11856 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
11859 ldout(cct
, 8) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
11860 << mode
<< dec
<< ", uid " << perm
.uid()
11861 << ", gid " << perm
.gid() << ")" << dendl
;
11863 if (strlen(name
) > NAME_MAX
)
11864 return -ENAMETOOLONG
;
11866 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11869 if (is_quota_files_exceeded(dir
, perm
)) {
11872 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
?
11873 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
11876 dir
->make_nosnap_relative_path(path
);
11877 path
.push_dentry(name
);
11878 req
->set_filepath(path
);
11879 req
->set_inode(dir
);
11880 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11881 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11884 bufferlist xattrs_bl
;
11885 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perm
);
11888 req
->head
.args
.mkdir
.mode
= mode
;
11889 if (xattrs_bl
.length() > 0)
11890 req
->set_data(xattrs_bl
);
11893 res
= get_or_create(dir
, name
, &de
);
11896 req
->set_dentry(de
);
11898 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
11899 res
= make_request(req
, perm
, inp
);
11900 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
11904 ldout(cct
, 8) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11912 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
11913 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
11915 Mutex::Locker
lock(client_lock
);
11920 vinodeno_t vparent
= _get_vino(parent
);
11922 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
11923 tout(cct
) << "ll_mkdir" << std::endl
;
11924 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11925 tout(cct
) << name
<< std::endl
;
11926 tout(cct
) << mode
<< std::endl
;
11928 if (!cct
->_conf
->fuse_default_permissions
) {
11929 int r
= may_create(parent
, perm
);
11935 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
11937 fill_stat(in
, attr
);
11940 tout(cct
) << attr
->st_ino
<< std::endl
;
11941 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
11942 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11947 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
11948 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11949 const UserPerm
& perms
)
11951 Mutex::Locker
lock(client_lock
);
11956 vinodeno_t vparent
= _get_vino(parent
);
11958 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
11959 tout(cct
) << "ll_mkdirx" << std::endl
;
11960 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11961 tout(cct
) << name
<< std::endl
;
11962 tout(cct
) << mode
<< std::endl
;
11964 if (!cct
->_conf
->fuse_default_permissions
) {
11965 int r
= may_create(parent
, perms
);
11971 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
11973 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11979 tout(cct
) << stx
->stx_ino
<< std::endl
;
11980 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
11981 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11986 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
11987 const UserPerm
& perms
, InodeRef
*inp
)
11989 ldout(cct
, 8) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
11990 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
11993 if (strlen(name
) > NAME_MAX
)
11994 return -ENAMETOOLONG
;
11996 if (dir
->snapid
!= CEPH_NOSNAP
) {
11999 if (is_quota_files_exceeded(dir
, perms
)) {
12003 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
12006 dir
->make_nosnap_relative_path(path
);
12007 path
.push_dentry(name
);
12008 req
->set_filepath(path
);
12009 req
->set_inode(dir
);
12010 req
->set_string2(target
);
12011 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12012 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12015 int res
= get_or_create(dir
, name
, &de
);
12018 req
->set_dentry(de
);
12020 res
= make_request(req
, perms
, inp
);
12023 ldout(cct
, 8) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
12032 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
12033 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
12035 Mutex::Locker
lock(client_lock
);
12040 vinodeno_t vparent
= _get_vino(parent
);
12042 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
12044 tout(cct
) << "ll_symlink" << std::endl
;
12045 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12046 tout(cct
) << name
<< std::endl
;
12047 tout(cct
) << value
<< std::endl
;
12049 if (!cct
->_conf
->fuse_default_permissions
) {
12050 int r
= may_create(parent
, perms
);
12056 int r
= _symlink(parent
, name
, value
, perms
, &in
);
12058 fill_stat(in
, attr
);
12061 tout(cct
) << attr
->st_ino
<< std::endl
;
12062 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
12063 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
12068 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
12069 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
12070 unsigned flags
, const UserPerm
& perms
)
12072 Mutex::Locker
lock(client_lock
);
12077 vinodeno_t vparent
= _get_vino(parent
);
12079 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
12081 tout(cct
) << "ll_symlinkx" << std::endl
;
12082 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12083 tout(cct
) << name
<< std::endl
;
12084 tout(cct
) << value
<< std::endl
;
12086 if (!cct
->_conf
->fuse_default_permissions
) {
12087 int r
= may_create(parent
, perms
);
12093 int r
= _symlink(parent
, name
, value
, perms
, &in
);
12095 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
12098 tout(cct
) << stx
->stx_ino
<< std::endl
;
12099 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
12100 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
12105 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
12107 ldout(cct
, 8) << "_unlink(" << dir
->ino
<< " " << name
12108 << " uid " << perm
.uid() << " gid " << perm
.gid()
12111 if (dir
->snapid
!= CEPH_NOSNAP
) {
12115 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
12118 dir
->make_nosnap_relative_path(path
);
12119 path
.push_dentry(name
);
12120 req
->set_filepath(path
);
12126 int res
= get_or_create(dir
, name
, &de
);
12129 req
->set_dentry(de
);
12130 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12131 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12133 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
12137 in
= otherin
.get();
12138 req
->set_other_inode(in
);
12139 in
->break_all_delegs();
12140 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12142 req
->set_inode(dir
);
12144 res
= make_request(req
, perm
);
12147 ldout(cct
, 8) << "unlink(" << path
<< ") = " << res
<< dendl
;
12155 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
12157 Mutex::Locker
lock(client_lock
);
12162 vinodeno_t vino
= _get_vino(in
);
12164 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
12165 tout(cct
) << "ll_unlink" << std::endl
;
12166 tout(cct
) << vino
.ino
.val
<< std::endl
;
12167 tout(cct
) << name
<< std::endl
;
12169 if (!cct
->_conf
->fuse_default_permissions
) {
12170 int r
= may_delete(in
, name
, perm
);
12174 return _unlink(in
, name
, perm
);
12177 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
12179 ldout(cct
, 8) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
12180 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
12182 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
12186 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
: CEPH_MDS_OP_RMDIR
;
12187 MetaRequest
*req
= new MetaRequest(op
);
12189 dir
->make_nosnap_relative_path(path
);
12190 path
.push_dentry(name
);
12191 req
->set_filepath(path
);
12193 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12194 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12195 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12200 int res
= get_or_create(dir
, name
, &de
);
12203 if (op
== CEPH_MDS_OP_RMDIR
)
12204 req
->set_dentry(de
);
12208 res
= _lookup(dir
, name
, 0, &in
, perms
);
12211 if (op
== CEPH_MDS_OP_RMDIR
) {
12212 req
->set_inode(dir
);
12213 req
->set_other_inode(in
.get());
12215 unlink(de
, true, true);
12217 req
->set_other_inode(in
.get());
12220 res
= make_request(req
, perms
);
12223 ldout(cct
, 8) << "rmdir(" << path
<< ") = " << res
<< dendl
;
12231 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
12233 Mutex::Locker
lock(client_lock
);
12238 vinodeno_t vino
= _get_vino(in
);
12240 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
12241 tout(cct
) << "ll_rmdir" << std::endl
;
12242 tout(cct
) << vino
.ino
.val
<< std::endl
;
12243 tout(cct
) << name
<< std::endl
;
12245 if (!cct
->_conf
->fuse_default_permissions
) {
12246 int r
= may_delete(in
, name
, perms
);
12251 return _rmdir(in
, name
, perms
);
12254 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
)
12256 ldout(cct
, 8) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
12257 << todir
->ino
<< " " << toname
12258 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
12261 if (fromdir
->snapid
!= todir
->snapid
)
12264 int op
= CEPH_MDS_OP_RENAME
;
12265 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
12266 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
12267 op
= CEPH_MDS_OP_RENAMESNAP
;
12271 if (fromdir
!= todir
) {
12272 Inode
*fromdir_root
=
12273 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
12274 Inode
*todir_root
=
12275 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
12276 if (fromdir_root
!= todir_root
) {
12282 MetaRequest
*req
= new MetaRequest(op
);
12285 fromdir
->make_nosnap_relative_path(from
);
12286 from
.push_dentry(fromname
);
12288 todir
->make_nosnap_relative_path(to
);
12289 to
.push_dentry(toname
);
12290 req
->set_filepath(to
);
12291 req
->set_filepath2(from
);
12294 int res
= get_or_create(fromdir
, fromname
, &oldde
);
12298 res
= get_or_create(todir
, toname
, &de
);
12302 if (op
== CEPH_MDS_OP_RENAME
) {
12303 req
->set_old_dentry(oldde
);
12304 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
12305 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
12307 req
->set_dentry(de
);
12308 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12309 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12311 InodeRef oldin
, otherin
;
12312 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
12316 Inode
*oldinode
= oldin
.get();
12317 oldinode
->break_all_delegs();
12318 req
->set_old_inode(oldinode
);
12319 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
12321 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
12325 Inode
*in
= otherin
.get();
12326 req
->set_other_inode(in
);
12327 in
->break_all_delegs();
12329 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
12337 req
->set_inode(todir
);
12339 // renamesnap reply contains no tracedn, so we need to invalidate
12341 unlink(oldde
, true, true);
12342 unlink(de
, true, true);
12345 res
= make_request(req
, perm
, &target
);
12346 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
12348 // renamed item from our cache
12351 ldout(cct
, 8) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
12359 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
12360 const char *newname
, const UserPerm
& perm
)
12362 Mutex::Locker
lock(client_lock
);
12367 vinodeno_t vparent
= _get_vino(parent
);
12368 vinodeno_t vnewparent
= _get_vino(newparent
);
12370 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
12371 << vnewparent
<< " " << newname
<< dendl
;
12372 tout(cct
) << "ll_rename" << std::endl
;
12373 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12374 tout(cct
) << name
<< std::endl
;
12375 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
12376 tout(cct
) << newname
<< std::endl
;
12378 if (!cct
->_conf
->fuse_default_permissions
) {
12379 int r
= may_delete(parent
, name
, perm
);
12382 r
= may_delete(newparent
, newname
, perm
);
12383 if (r
< 0 && r
!= -ENOENT
)
12387 return _rename(parent
, name
, newparent
, newname
, perm
);
12390 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, InodeRef
*inp
)
12392 ldout(cct
, 8) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
12393 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
12395 if (strlen(newname
) > NAME_MAX
)
12396 return -ENAMETOOLONG
;
12398 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
12401 if (is_quota_files_exceeded(dir
, perm
)) {
12405 in
->break_all_delegs();
12406 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
12408 filepath
path(newname
, dir
->ino
);
12409 req
->set_filepath(path
);
12410 filepath
existing(in
->ino
);
12411 req
->set_filepath2(existing
);
12413 req
->set_inode(dir
);
12414 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
12415 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
12418 int res
= get_or_create(dir
, newname
, &de
);
12421 req
->set_dentry(de
);
12423 res
= make_request(req
, perm
, inp
);
12424 ldout(cct
, 10) << "link result is " << res
<< dendl
;
12427 ldout(cct
, 8) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
12435 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
12436 const UserPerm
& perm
)
12438 Mutex::Locker
lock(client_lock
);
12443 vinodeno_t vino
= _get_vino(in
);
12444 vinodeno_t vnewparent
= _get_vino(newparent
);
12446 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
12448 tout(cct
) << "ll_link" << std::endl
;
12449 tout(cct
) << vino
.ino
.val
<< std::endl
;
12450 tout(cct
) << vnewparent
<< std::endl
;
12451 tout(cct
) << newname
<< std::endl
;
12456 if (!cct
->_conf
->fuse_default_permissions
) {
12457 if (S_ISDIR(in
->mode
))
12460 r
= may_hardlink(in
, perm
);
12464 r
= may_create(newparent
, perm
);
12469 return _link(in
, newparent
, newname
, perm
, &target
);
12472 int Client::ll_num_osds(void)
12474 Mutex::Locker
lock(client_lock
);
12475 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
12478 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
12480 Mutex::Locker
lock(client_lock
);
12483 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
12484 if (!o
.exists(osd
))
12486 g
= o
.get_addr(osd
);
12491 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
12492 *addr
= ntohl(nb_addr
);
12496 uint32_t Client::ll_stripe_unit(Inode
*in
)
12498 Mutex::Locker
lock(client_lock
);
12499 return in
->layout
.stripe_unit
;
12502 uint64_t Client::ll_snap_seq(Inode
*in
)
12504 Mutex::Locker
lock(client_lock
);
12505 return in
->snaprealm
->seq
;
12508 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
12510 Mutex::Locker
lock(client_lock
);
12511 *layout
= in
->layout
;
12515 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
12517 return ll_file_layout(fh
->inode
.get(), layout
);
12520 /* Currently we cannot take advantage of redundancy in reads, since we
12521 would have to go through all possible placement groups (a
12522 potentially quite large number determined by a hash), and use CRUSH
12523 to calculate the appropriate set of OSDs for each placement group,
12524 then index into that. An array with one entry per OSD is much more
12525 tractable and works for demonstration purposes. */
12527 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
12528 file_layout_t
* layout
)
12530 Mutex::Locker
lock(client_lock
);
12532 inodeno_t ino
= in
->ino
;
12533 uint32_t object_size
= layout
->object_size
;
12534 uint32_t su
= layout
->stripe_unit
;
12535 uint32_t stripe_count
= layout
->stripe_count
;
12536 uint64_t stripes_per_object
= object_size
/ su
;
12538 uint64_t stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
12539 uint64_t stripepos
= blockno
% stripe_count
; // which object in the object set (X)
12540 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
12541 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
12543 object_t oid
= file_object_t(ino
, objectno
);
12544 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12545 ceph_object_layout olayout
=
12546 o
.file_to_object_layout(oid
, *layout
);
12547 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
12550 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
12555 /* Return the offset of the block, internal to the object */
12557 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
12559 Mutex::Locker
lock(client_lock
);
12560 file_layout_t
*layout
=&(in
->layout
);
12561 uint32_t object_size
= layout
->object_size
;
12562 uint32_t su
= layout
->stripe_unit
;
12563 uint64_t stripes_per_object
= object_size
/ su
;
12565 return (blockno
% stripes_per_object
) * su
;
12568 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
12569 const UserPerm
& perms
)
12571 Mutex::Locker
lock(client_lock
);
12576 vinodeno_t vino
= _get_vino(in
);
12578 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
12579 tout(cct
) << "ll_opendir" << std::endl
;
12580 tout(cct
) << vino
.ino
.val
<< std::endl
;
12582 if (!cct
->_conf
->fuse_default_permissions
) {
12583 int r
= may_open(in
, flags
, perms
);
12588 int r
= _opendir(in
, dirpp
, perms
);
12589 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
12591 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
12596 int Client::ll_releasedir(dir_result_t
*dirp
)
12598 Mutex::Locker
lock(client_lock
);
12599 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
12600 tout(cct
) << "ll_releasedir" << std::endl
;
12601 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12610 int Client::ll_fsyncdir(dir_result_t
*dirp
)
12612 Mutex::Locker
lock(client_lock
);
12613 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
12614 tout(cct
) << "ll_fsyncdir" << std::endl
;
12615 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12620 return _fsync(dirp
->inode
.get(), false);
12623 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
12625 assert(!(flags
& O_CREAT
));
12627 Mutex::Locker
lock(client_lock
);
12632 vinodeno_t vino
= _get_vino(in
);
12634 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
12635 tout(cct
) << "ll_open" << std::endl
;
12636 tout(cct
) << vino
.ino
.val
<< std::endl
;
12637 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12640 if (!cct
->_conf
->fuse_default_permissions
) {
12641 r
= may_open(in
, flags
, perms
);
12646 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
12649 Fh
*fhptr
= fhp
? *fhp
: NULL
;
12651 ll_unclosed_fh_set
.insert(fhptr
);
12653 tout(cct
) << (unsigned long)fhptr
<< std::endl
;
12654 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
12655 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
12659 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12660 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
12661 const UserPerm
& perms
)
12665 vinodeno_t vparent
= _get_vino(parent
);
12667 ldout(cct
, 8) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12668 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
12669 << ", gid " << perms
.gid() << dendl
;
12670 tout(cct
) << "ll_create" << std::endl
;
12671 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12672 tout(cct
) << name
<< std::endl
;
12673 tout(cct
) << mode
<< std::endl
;
12674 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12676 bool created
= false;
12677 int r
= _lookup(parent
, name
, caps
, in
, perms
);
12679 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
12682 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
12683 if (!cct
->_conf
->fuse_default_permissions
) {
12684 r
= may_create(parent
, perms
);
12688 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
12699 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
12701 if (!cct
->_conf
->fuse_default_permissions
) {
12702 r
= may_open(in
->get(), flags
, perms
);
12705 int release_r
= _release_fh(*fhp
);
12706 assert(release_r
== 0); // during create, no async data ops should have happened
12711 if (*fhp
== NULL
) {
12712 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
12720 ll_unclosed_fh_set
.insert(*fhp
);
12725 Inode
*inode
= in
->get();
12726 if (use_faked_inos())
12727 ino
= inode
->faked_ino
;
12732 tout(cct
) << (unsigned long)*fhp
<< std::endl
;
12733 tout(cct
) << ino
<< std::endl
;
12734 ldout(cct
, 8) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12735 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
12736 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
12741 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12742 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
12743 const UserPerm
& perms
)
12745 Mutex::Locker
lock(client_lock
);
12751 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
12756 // passing an Inode in outp requires an additional ref
12761 fill_stat(in
, attr
);
12769 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
12770 int oflags
, Inode
**outp
, Fh
**fhp
,
12771 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
12772 const UserPerm
& perms
)
12774 unsigned caps
= statx_to_mask(lflags
, want
);
12775 Mutex::Locker
lock(client_lock
);
12781 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
12785 // passing an Inode in outp requires an additional ref
12790 fill_statx(in
, caps
, stx
);
12799 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
12801 Mutex::Locker
lock(client_lock
);
12802 tout(cct
) << "ll_lseek" << std::endl
;
12803 tout(cct
) << offset
<< std::endl
;
12804 tout(cct
) << whence
<< std::endl
;
12809 return _lseek(fh
, offset
, whence
);
12812 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
12814 Mutex::Locker
lock(client_lock
);
12815 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
12816 tout(cct
) << "ll_read" << std::endl
;
12817 tout(cct
) << (unsigned long)fh
<< std::endl
;
12818 tout(cct
) << off
<< std::endl
;
12819 tout(cct
) << len
<< std::endl
;
12824 return _read(fh
, off
, len
, bl
);
12827 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
12831 file_layout_t
* layout
)
12833 Mutex::Locker
lock(client_lock
);
12838 vinodeno_t vino
= _get_vino(in
);
12839 object_t oid
= file_object_t(vino
.ino
, blockid
);
12840 C_SaferCond onfinish
;
12843 objecter
->read(oid
,
12844 object_locator_t(layout
->pool_id
),
12849 CEPH_OSD_FLAG_READ
,
12852 client_lock
.Unlock();
12853 int r
= onfinish
.wait();
12854 client_lock
.Lock();
12857 bl
.copy(0, bl
.length(), buf
);
12864 /* It appears that the OSD doesn't return success unless the entire
12865 buffer was written, return the write length on success. */
12867 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
12868 char* buf
, uint64_t offset
,
12869 uint64_t length
, file_layout_t
* layout
,
12870 uint64_t snapseq
, uint32_t sync
)
12872 Mutex
flock("Client::ll_write_block flock");
12873 vinodeno_t vino
= ll_get_vino(in
);
12877 Context
*onsafe
= nullptr;
12882 if (true || sync
) {
12883 /* if write is stable, the epilogue is waiting on
12885 onsafe
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
12888 /* if write is unstable, we just place a barrier for
12889 * future commits to wait on */
12890 /*onsafe = new C_Block_Sync(this, vino.ino,
12891 barrier_interval(offset, offset + length), &r);
12895 object_t oid
= file_object_t(vino
.ino
, blockid
);
12896 SnapContext fakesnap
;
12898 if (length
> 0) bp
= buffer::copy(buf
, length
);
12902 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
12905 fakesnap
.seq
= snapseq
;
12907 /* lock just in time */
12908 client_lock
.Lock();
12910 client_lock
.Unlock();
12915 objecter
->write(oid
,
12916 object_locator_t(layout
->pool_id
),
12921 ceph::real_clock::now(),
12925 client_lock
.Unlock();
12926 if (!done
/* also !sync */) {
12940 int Client::ll_commit_blocks(Inode
*in
,
12944 Mutex::Locker
lock(client_lock
);
12946 BarrierContext *bctx;
12947 vinodeno_t vino = _get_vino(in);
12948 uint64_t ino = vino.ino;
12950 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
12951 << offset << " to " << length << dendl;
12957 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
12958 if (p != barriers.end()) {
12959 barrier_interval civ(offset, offset + length);
12960 p->second->commit_barrier(civ);
12966 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
12968 Mutex::Locker
lock(client_lock
);
12969 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
12970 "~" << len
<< dendl
;
12971 tout(cct
) << "ll_write" << std::endl
;
12972 tout(cct
) << (unsigned long)fh
<< std::endl
;
12973 tout(cct
) << off
<< std::endl
;
12974 tout(cct
) << len
<< std::endl
;
12979 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
12980 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
12985 int Client::ll_flush(Fh
*fh
)
12987 Mutex::Locker
lock(client_lock
);
12988 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12989 tout(cct
) << "ll_flush" << std::endl
;
12990 tout(cct
) << (unsigned long)fh
<< std::endl
;
12998 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
13000 Mutex::Locker
lock(client_lock
);
13001 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
13002 tout(cct
) << "ll_fsync" << std::endl
;
13003 tout(cct
) << (unsigned long)fh
<< std::endl
;
13008 int r
= _fsync(fh
, syncdataonly
);
13010 // If we're returning an error, clear it from the FH
13011 fh
->take_async_err();
13016 int Client::ll_sync_inode(Inode
*in
, bool syncdataonly
)
13018 Mutex::Locker
lock(client_lock
);
13019 ldout(cct
, 3) << "ll_sync_inode " << *in
<< " " << dendl
;
13020 tout(cct
) << "ll_sync_inode" << std::endl
;
13021 tout(cct
) << (unsigned long)in
<< std::endl
;
13026 return _fsync(in
, syncdataonly
);
13029 #ifdef FALLOC_FL_PUNCH_HOLE
13031 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
13033 if (offset
< 0 || length
<= 0)
13036 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
13037 return -EOPNOTSUPP
;
13039 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
13040 return -EOPNOTSUPP
;
13042 Inode
*in
= fh
->inode
.get();
13044 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
13045 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
13049 if (in
->snapid
!= CEPH_NOSNAP
)
13052 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
13055 uint64_t size
= offset
+ length
;
13056 std::list
<InodeRef
> quota_roots
;
13057 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
13059 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
, "a_roots
)) {
13064 int r
= get_caps(in
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
13068 Mutex
uninline_flock("Client::_fallocate_uninline_data flock");
13069 Cond uninline_cond
;
13070 bool uninline_done
= false;
13071 int uninline_ret
= 0;
13072 Context
*onuninline
= NULL
;
13074 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
13075 if (in
->inline_version
< CEPH_INLINE_NONE
&&
13076 (have
& CEPH_CAP_FILE_BUFFER
)) {
13078 int len
= in
->inline_data
.length();
13079 if (offset
< len
) {
13081 in
->inline_data
.copy(0, offset
, bl
);
13083 if (offset
+ size
> len
)
13084 size
= len
- offset
;
13086 bl
.append_zero(size
);
13087 if (offset
+ size
< len
)
13088 in
->inline_data
.copy(offset
+ size
, len
- offset
- size
, bl
);
13089 in
->inline_data
= bl
;
13090 in
->inline_version
++;
13092 in
->mtime
= in
->ctime
= ceph_clock_now();
13094 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13096 if (in
->inline_version
< CEPH_INLINE_NONE
) {
13097 onuninline
= new C_SafeCond(&uninline_flock
,
13101 uninline_data(in
, onuninline
);
13104 Mutex
flock("Client::_punch_hole flock");
13107 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
13109 unsafe_sync_write
++;
13110 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
13112 _invalidate_inode_cache(in
, offset
, length
);
13113 filer
->zero(in
->ino
, &in
->layout
,
13114 in
->snaprealm
->get_snap_context(),
13116 ceph::real_clock::now(),
13117 0, true, onfinish
);
13118 in
->mtime
= in
->ctime
= ceph_clock_now();
13120 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13122 client_lock
.Unlock();
13127 client_lock
.Lock();
13128 _sync_write_commit(in
);
13130 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
13131 uint64_t size
= offset
+ length
;
13132 if (size
> in
->size
) {
13134 in
->mtime
= in
->ctime
= ceph_clock_now();
13136 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13138 if (is_quota_bytes_approaching(in
, quota_roots
)) {
13139 check_caps(in
, CHECK_CAPS_NODELAY
);
13140 } else if (is_max_size_approaching(in
)) {
13147 client_lock
.Unlock();
13148 uninline_flock
.Lock();
13149 while (!uninline_done
)
13150 uninline_cond
.Wait(uninline_flock
);
13151 uninline_flock
.Unlock();
13152 client_lock
.Lock();
13154 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
13155 in
->inline_data
.clear();
13156 in
->inline_version
= CEPH_INLINE_NONE
;
13157 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
13163 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
13168 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
13170 return -EOPNOTSUPP
;
13176 int Client::ll_fallocate(Fh
*fh
, int mode
, loff_t offset
, loff_t length
)
13178 Mutex::Locker
lock(client_lock
);
13179 ldout(cct
, 3) << "ll_fallocate " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
13180 tout(cct
) << "ll_fallocate " << mode
<< " " << offset
<< " " << length
<< std::endl
;
13181 tout(cct
) << (unsigned long)fh
<< std::endl
;
13186 return _fallocate(fh
, mode
, offset
, length
);
13189 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
13191 Mutex::Locker
lock(client_lock
);
13192 tout(cct
) << "fallocate " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
13197 Fh
*fh
= get_filehandle(fd
);
13200 #if defined(__linux__) && defined(O_PATH)
13201 if (fh
->flags
& O_PATH
)
13204 return _fallocate(fh
, mode
, offset
, length
);
13207 int Client::ll_release(Fh
*fh
)
13209 Mutex::Locker
lock(client_lock
);
13214 ldout(cct
, 3) << "ll_release (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
13216 tout(cct
) << "ll_release (fh)" << std::endl
;
13217 tout(cct
) << (unsigned long)fh
<< std::endl
;
13219 if (ll_unclosed_fh_set
.count(fh
))
13220 ll_unclosed_fh_set
.erase(fh
);
13221 return _release_fh(fh
);
13224 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
13226 Mutex::Locker
lock(client_lock
);
13228 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
13229 tout(cct
) << "ll_getk (fh)" << (unsigned long)fh
<< std::endl
;
13234 return _getlk(fh
, fl
, owner
);
13237 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
13239 Mutex::Locker
lock(client_lock
);
13241 ldout(cct
, 3) << "ll_setlk (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13242 tout(cct
) << "ll_setk (fh)" << (unsigned long)fh
<< std::endl
;
13247 return _setlk(fh
, fl
, owner
, sleep
);
13250 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
13252 Mutex::Locker
lock(client_lock
);
13254 ldout(cct
, 3) << "ll_flock (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
13255 tout(cct
) << "ll_flock (fh)" << (unsigned long)fh
<< std::endl
;
13260 return _flock(fh
, cmd
, owner
);
13263 int Client::set_deleg_timeout(uint32_t timeout
)
13265 Mutex::Locker
lock(client_lock
);
13268 * The whole point is to prevent blacklisting so we must time out the
13269 * delegation before the session autoclose timeout kicks in.
13271 if (timeout
>= mdsmap
->get_session_autoclose())
13274 deleg_timeout
= timeout
;
13278 int Client::ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
)
13282 Mutex::Locker
lock(client_lock
);
13287 Inode
*inode
= fh
->inode
.get();
13290 case CEPH_DELEGATION_NONE
:
13291 inode
->unset_deleg(fh
);
13296 ret
= inode
->set_deleg(fh
, cmd
, cb
, priv
);
13297 } catch (std::bad_alloc
) {
13305 class C_Client_RequestInterrupt
: public Context
{
13310 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
13313 void finish(int r
) override
{
13314 Mutex::Locker
l(client
->client_lock
);
13315 assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
13316 client
->_interrupt_filelock(req
);
13317 client
->put_request(req
);
13321 void Client::ll_interrupt(void *d
)
13323 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
13324 ldout(cct
, 3) << "ll_interrupt tid " << req
->get_tid() << dendl
;
13325 tout(cct
) << "ll_interrupt tid " << req
->get_tid() << std::endl
;
13326 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
13329 // =========================================
13332 // expose file layouts
13334 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
13335 const UserPerm
& perms
)
13337 Mutex::Locker
lock(client_lock
);
13342 filepath
path(relpath
);
13344 int r
= path_walk(path
, &in
, perms
);
13350 ldout(cct
, 3) << "describe_layout(" << relpath
<< ") = 0" << dendl
;
13354 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
13356 Mutex::Locker
lock(client_lock
);
13361 Fh
*f
= get_filehandle(fd
);
13364 Inode
*in
= f
->inode
.get();
13368 ldout(cct
, 3) << "fdescribe_layout(" << fd
<< ") = 0" << dendl
;
13372 int64_t Client::get_default_pool_id()
13374 Mutex::Locker
lock(client_lock
);
13379 /* first data pool is the default */
13380 return mdsmap
->get_first_data_pool();
13385 int64_t Client::get_pool_id(const char *pool_name
)
13387 Mutex::Locker
lock(client_lock
);
13392 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
13396 string
Client::get_pool_name(int64_t pool
)
13398 Mutex::Locker
lock(client_lock
);
13403 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13404 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
13408 int Client::get_pool_replication(int64_t pool
)
13410 Mutex::Locker
lock(client_lock
);
13415 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
13416 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -ENOENT
;
13420 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
13422 Mutex::Locker
lock(client_lock
);
13427 Fh
*f
= get_filehandle(fd
);
13430 Inode
*in
= f
->inode
.get();
13432 vector
<ObjectExtent
> extents
;
13433 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
13434 assert(extents
.size() == 1);
13436 objecter
->with_osdmap([&](const OSDMap
& o
) {
13437 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13438 o
.pg_to_acting_osds(pg
, osds
);
13445 * Return the remainder of the extent (stripe unit)
13447 * If length = 1 is passed to Striper::file_to_extents we get a single
13448 * extent back, but its length is one so we still need to compute the length
13449 * to the end of the stripe unit.
13451 * If length = su then we may get 1 or 2 objects back in the extents vector
13452 * which would have to be examined. Even then, the offsets are local to the
13453 * object, so matching up to the file offset is extra work.
13455 * It seems simpler to stick with length = 1 and manually compute the
13459 uint64_t su
= in
->layout
.stripe_unit
;
13460 *len
= su
- (off
% su
);
13466 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
13468 Mutex::Locker
lock(client_lock
);
13475 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13476 return o
.crush
->get_full_location_ordered(id
, path
);
13480 int Client::get_file_stripe_address(int fd
, loff_t offset
,
13481 vector
<entity_addr_t
>& address
)
13483 Mutex::Locker
lock(client_lock
);
13488 Fh
*f
= get_filehandle(fd
);
13491 Inode
*in
= f
->inode
.get();
13494 vector
<ObjectExtent
> extents
;
13495 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
13496 in
->truncate_size
, extents
);
13497 assert(extents
.size() == 1);
13499 // now we have the object and its 'layout'
13500 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13501 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
13503 o
.pg_to_acting_osds(pg
, osds
);
13506 for (unsigned i
= 0; i
< osds
.size(); i
++) {
13507 entity_addr_t addr
= o
.get_addr(osds
[i
]);
13508 address
.push_back(addr
);
13514 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
13516 Mutex::Locker
lock(client_lock
);
13521 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13522 if (!o
.exists(osd
))
13525 addr
= o
.get_addr(osd
);
13530 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
13531 loff_t length
, loff_t offset
)
13533 Mutex::Locker
lock(client_lock
);
13538 Fh
*f
= get_filehandle(fd
);
13541 Inode
*in
= f
->inode
.get();
13543 // map to a list of extents
13544 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
13546 ldout(cct
, 3) << "enumerate_layout(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
13551 /* find an osd with the same ip. -ENXIO if none. */
13552 int Client::get_local_osd()
13554 Mutex::Locker
lock(client_lock
);
13559 objecter
->with_osdmap([this](const OSDMap
& o
) {
13560 if (o
.get_epoch() != local_osd_epoch
) {
13561 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddr());
13562 local_osd_epoch
= o
.get_epoch();
13573 // ===============================
13575 void Client::ms_handle_connect(Connection
*con
)
13577 ldout(cct
, 10) << "ms_handle_connect on " << con
->get_peer_addr() << dendl
;
13580 bool Client::ms_handle_reset(Connection
*con
)
13582 ldout(cct
, 0) << "ms_handle_reset on " << con
->get_peer_addr() << dendl
;
13586 void Client::ms_handle_remote_reset(Connection
*con
)
13588 ldout(cct
, 0) << "ms_handle_remote_reset on " << con
->get_peer_addr() << dendl
;
13589 Mutex::Locker
l(client_lock
);
13590 switch (con
->get_peer_type()) {
13591 case CEPH_ENTITY_TYPE_MDS
:
13593 // kludge to figure out which mds this is; fixme with a Connection* state
13594 mds_rank_t mds
= MDS_RANK_NONE
;
13595 MetaSession
*s
= NULL
;
13596 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
13597 p
!= mds_sessions
.end();
13599 if (mdsmap
->get_addr(p
->first
) == con
->get_peer_addr()) {
13605 assert (s
!= NULL
);
13606 switch (s
->state
) {
13607 case MetaSession::STATE_CLOSING
:
13608 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
13609 _closed_mds_session(s
);
13612 case MetaSession::STATE_OPENING
:
13614 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
13615 list
<Context
*> waiters
;
13616 waiters
.swap(s
->waiting_for_open
);
13617 _closed_mds_session(s
);
13618 MetaSession
*news
= _get_or_open_mds_session(mds
);
13619 news
->waiting_for_open
.swap(waiters
);
13623 case MetaSession::STATE_OPEN
:
13625 objecter
->maybe_request_map(); /* to check if we are blacklisted */
13626 const md_config_t
*conf
= cct
->_conf
;
13627 if (conf
->client_reconnect_stale
) {
13628 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
13629 _closed_mds_session(s
);
13631 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
13632 s
->state
= MetaSession::STATE_STALE
;
13637 case MetaSession::STATE_NEW
:
13638 case MetaSession::STATE_CLOSED
:
13648 bool Client::ms_handle_refused(Connection
*con
)
13650 ldout(cct
, 1) << "ms_handle_refused on " << con
->get_peer_addr() << dendl
;
13654 bool Client::ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
)
13656 if (dest_type
== CEPH_ENTITY_TYPE_MON
)
13658 *authorizer
= monclient
->build_authorizer(dest_type
);
13662 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
13665 utime_t now
= ceph_clock_now();
13668 if (cur
!= in
&& cur
->quota
.is_enable())
13671 Inode
*parent_in
= NULL
;
13672 if (!cur
->dn_set
.empty()) {
13673 for (auto p
= cur
->dn_set
.begin(); p
!= cur
->dn_set
.end(); ++p
) {
13675 if (dn
->lease_mds
>= 0 &&
13676 dn
->lease_ttl
> now
&&
13677 mds_sessions
.count(dn
->lease_mds
)) {
13678 parent_in
= dn
->dir
->parent_inode
;
13680 Inode
*diri
= dn
->dir
->parent_inode
;
13681 if (diri
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
13682 diri
->shared_gen
== dn
->cap_shared_gen
) {
13683 parent_in
= dn
->dir
->parent_inode
;
13689 } else if (root_parents
.count(cur
)) {
13690 parent_in
= root_parents
[cur
].get();
13698 if (cur
== root_ancestor
)
13702 if (cur
->nlink
== 0) {
13703 cur
= root_ancestor
;
13707 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
13708 filepath
path(cur
->ino
);
13709 req
->set_filepath(path
);
13710 req
->set_inode(cur
);
13712 InodeRef parent_ref
;
13713 int ret
= make_request(req
, perms
, &parent_ref
);
13715 ldout(cct
, 1) << __func__
<< " " << in
->vino()
13716 << " failed to find parent of " << cur
->vino()
13717 << " err " << ret
<< dendl
;
13718 // FIXME: what to do?
13719 cur
= root_ancestor
;
13723 now
= ceph_clock_now();
13725 cur
= parent_ref
.get();
13727 cur
= in
; // start over
13730 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << cur
->vino() << dendl
;
13735 * Traverse quota ancestors of the Inode, return true
13736 * if any of them passes the passed function
13738 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
13739 std::function
<bool (const Inode
&in
)> test
)
13742 assert(in
!= NULL
);
13747 if (in
== root_ancestor
) {
13748 // We're done traversing, drop out
13751 // Continue up the tree
13752 in
= get_quota_root(in
, perms
);
13759 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
13761 return check_quota_condition(in
, perms
,
13762 [](const Inode
&in
) {
13763 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
13767 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
13768 const UserPerm
& perms
,
13769 std::list
<InodeRef
>* quota_roots
)
13771 return check_quota_condition(in
, perms
,
13772 [&new_bytes
, quota_roots
](const Inode
&in
) {
13774 quota_roots
->emplace_back(const_cast<Inode
*>(&in
));
13775 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
13776 > in
.quota
.max_bytes
;
13780 bool Client::is_quota_bytes_approaching(Inode
*in
, std::list
<InodeRef
>& quota_roots
)
13782 assert(in
->size
>= in
->reported_size
);
13783 const uint64_t size
= in
->size
- in
->reported_size
;
13785 for (auto& diri
: quota_roots
) {
13786 if (diri
->quota
.max_bytes
) {
13787 if (diri
->rstat
.rbytes
>= diri
->quota
.max_bytes
)
13790 uint64_t space
= diri
->quota
.max_bytes
- diri
->rstat
.rbytes
;
13791 if ((space
>> 4) < size
)
13805 int Client::check_pool_perm(Inode
*in
, int need
)
13807 if (!cct
->_conf
->client_check_pool_perm
)
13810 int64_t pool_id
= in
->layout
.pool_id
;
13811 std::string pool_ns
= in
->layout
.pool_ns
;
13812 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
13815 auto it
= pool_perms
.find(perm_key
);
13816 if (it
== pool_perms
.end())
13818 if (it
->second
== POOL_CHECKING
) {
13819 // avoid concurrent checkings
13820 wait_on_list(waiting_for_pool_perm
);
13823 assert(have
& POOL_CHECKED
);
13829 if (in
->snapid
!= CEPH_NOSNAP
) {
13830 // pool permission check needs to write to the first object. But for snapshot,
13831 // head of the first object may have alread been deleted. To avoid creating
13832 // orphan object, skip the check for now.
13836 pool_perms
[perm_key
] = POOL_CHECKING
;
13839 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
13840 object_t oid
= oid_buf
;
13842 SnapContext nullsnapc
;
13844 C_SaferCond rd_cond
;
13845 ObjectOperation rd_op
;
13846 rd_op
.stat(NULL
, (ceph::real_time
*)nullptr, NULL
);
13848 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
13849 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
13851 C_SaferCond wr_cond
;
13852 ObjectOperation wr_op
;
13853 wr_op
.create(true);
13855 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
13856 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
13858 client_lock
.Unlock();
13859 int rd_ret
= rd_cond
.wait();
13860 int wr_ret
= wr_cond
.wait();
13861 client_lock
.Lock();
13863 bool errored
= false;
13865 if (rd_ret
== 0 || rd_ret
== -ENOENT
)
13867 else if (rd_ret
!= -EPERM
) {
13868 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13869 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13873 if (wr_ret
== 0 || wr_ret
== -EEXIST
)
13874 have
|= POOL_WRITE
;
13875 else if (wr_ret
!= -EPERM
) {
13876 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13877 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13882 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
13883 // Raise EIO because actual error code might be misleading for
13884 // userspace filesystem user.
13885 pool_perms
.erase(perm_key
);
13886 signal_cond_list(waiting_for_pool_perm
);
13890 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
13891 signal_cond_list(waiting_for_pool_perm
);
13894 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
13895 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13896 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
13899 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
13900 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13901 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
13908 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
13910 if (acl_type
== POSIX_ACL
) {
13911 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13912 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13914 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
13920 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
13922 if (acl_type
== NO_ACL
)
13925 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
13929 if (acl_type
== POSIX_ACL
) {
13930 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13931 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13932 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
13933 r
= posix_acl_access_chmod(acl
, mode
);
13936 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
13942 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
13946 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
13947 const UserPerm
& perms
)
13949 if (acl_type
== NO_ACL
)
13952 if (S_ISLNK(*mode
))
13955 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
13959 if (acl_type
== POSIX_ACL
) {
13960 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
13961 map
<string
, bufferptr
> xattrs
;
13963 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
13964 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
13965 r
= posix_acl_inherit_mode(acl
, mode
);
13970 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
13974 xattrs
[ACL_EA_ACCESS
] = acl
;
13977 if (S_ISDIR(*mode
))
13978 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
13982 ::encode(xattrs
, xattrs_bl
);
13985 *mode
&= ~umask_cb(callback_handle
);
13990 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
13994 void Client::set_filer_flags(int flags
)
13996 Mutex::Locker
l(client_lock
);
13997 assert(flags
== 0 ||
13998 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13999 objecter
->add_global_op_flags(flags
);
14002 void Client::clear_filer_flags(int flags
)
14004 Mutex::Locker
l(client_lock
);
14005 assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
14006 objecter
->clear_global_op_flag(flags
);
14010 * This is included in cap release messages, to cause
14011 * the MDS to wait until this OSD map epoch. It is necessary
14012 * in corner cases where we cancel RADOS ops, so that
14013 * nobody else tries to do IO to the same objects in
14014 * the same epoch as the cancelled ops.
14016 void Client::set_cap_epoch_barrier(epoch_t e
)
14018 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
14019 cap_epoch_barrier
= e
;
14022 const char** Client::get_tracked_conf_keys() const
14024 static const char* keys
[] = {
14025 "client_cache_size",
14026 "client_cache_mid",
14028 "client_deleg_timeout",
14029 "client_deleg_break_on_open",
14035 void Client::handle_conf_change(const struct md_config_t
*conf
,
14036 const std::set
<std::string
> &changed
)
14038 Mutex::Locker
lock(client_lock
);
14040 if (changed
.count("client_cache_mid")) {
14041 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
14043 if (changed
.count("client_acl_type")) {
14045 if (cct
->_conf
->client_acl_type
== "posix_acl")
14046 acl_type
= POSIX_ACL
;
14050 void intrusive_ptr_add_ref(Inode
*in
)
14055 void intrusive_ptr_release(Inode
*in
)
14057 in
->client
->put_inode(in
);
14060 mds_rank_t
Client::_get_random_up_mds() const
14062 assert(client_lock
.is_locked_by_me());
14064 std::set
<mds_rank_t
> up
;
14065 mdsmap
->get_up_mds_set(up
);
14068 return MDS_RANK_NONE
;
14069 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
14070 for (int n
= rand() % up
.size(); n
; n
--)
14076 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
)
14077 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, NULL
, 0, 0))
14079 monclient
->set_messenger(m
);
14080 objecter
->set_client_incarnation(0);
14083 StandaloneClient::~StandaloneClient()
14086 objecter
= nullptr;
14089 int StandaloneClient::init()
14092 objectcacher
->start();
14095 client_lock
.Lock();
14096 assert(!initialized
);
14098 messenger
->add_dispatcher_tail(objecter
);
14099 messenger
->add_dispatcher_tail(this);
14101 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
14102 int r
= monclient
->init();
14104 // need to do cleanup because we're in an intermediate init state
14106 client_lock
.Unlock();
14107 objecter
->shutdown();
14108 objectcacher
->stop();
14109 monclient
->shutdown();
14114 client_lock
.Unlock();
14120 void StandaloneClient::shutdown()
14122 Client::shutdown();
14123 objecter
->shutdown();
14124 monclient
->shutdown();