1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
23 #include <sys/param.h>
27 #include <sys/utsname.h>
31 #include <boost/lexical_cast.hpp>
32 #include <boost/fusion/include/std_pair.hpp>
34 #include "common/async/waiter.h"
36 #if defined(__FreeBSD__) || defined(_WIN32)
37 #define XATTR_CREATE 0x1
38 #define XATTR_REPLACE 0x2
40 #include <sys/xattr.h>
43 #if defined(__linux__)
44 #include <linux/falloc.h>
47 #include <sys/statvfs.h>
49 #include "common/config.h"
50 #include "common/version.h"
51 #include "common/async/blocked_completion.h"
53 #include "mon/MonClient.h"
55 #include "messages/MClientCaps.h"
56 #include "messages/MClientLease.h"
57 #include "messages/MClientQuota.h"
58 #include "messages/MClientReclaim.h"
59 #include "messages/MClientReclaimReply.h"
60 #include "messages/MClientReconnect.h"
61 #include "messages/MClientReply.h"
62 #include "messages/MClientRequest.h"
63 #include "messages/MClientRequestForward.h"
64 #include "messages/MClientSession.h"
65 #include "messages/MClientSnap.h"
66 #include "messages/MClientMetrics.h"
67 #include "messages/MCommandReply.h"
68 #include "messages/MFSMap.h"
69 #include "messages/MFSMapUser.h"
70 #include "messages/MMDSMap.h"
71 #include "messages/MOSDMap.h"
73 #include "mds/flock.h"
74 #include "mds/cephfs_features.h"
75 #include "osd/OSDMap.h"
76 #include "osdc/Filer.h"
78 #include "common/Cond.h"
79 #include "common/perf_counters.h"
80 #include "common/admin_socket.h"
81 #include "common/errno.h"
82 #include "include/str_list.h"
84 #define dout_subsys ceph_subsys_client
86 #include "include/lru.h"
87 #include "include/compat.h"
88 #include "include/stringify.h"
89 #include "include/random.h"
94 #include "Delegation.h"
96 #include "ClientSnapRealm.h"
98 #include "MetaSession.h"
99 #include "MetaRequest.h"
100 #include "ObjecterWriteback.h"
101 #include "posix_acl.h"
103 #include "include/ceph_assert.h"
104 #include "include/stat.h"
106 #include "include/cephfs/ceph_ll_client.h"
108 #if HAVE_GETGROUPLIST
115 #define dout_prefix *_dout << "client." << whoami << " "
117 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
119 // FreeBSD fails to define this
123 // Darwin fails to define this
132 // Windows doesn't define those values. While the Posix compatibilty layer
133 // doesn't support those values, the Windows native functions do provide
134 // similar flags. Special care should be taken if we're going to use those
135 // flags in ceph-dokan. The current values are no-ops, while propagating
136 // them to the rest of the code might cause the Windows functions to reject
139 #define O_NOFOLLOW 0x0
146 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
149 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
152 using namespace TOPNSPC::common
;
154 namespace bs
= boost::system
;
155 namespace ca
= ceph::async
;
157 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
159 Client
*client
= static_cast<Client
*>(p
);
160 client
->flush_set_callback(oset
);
163 bool Client::is_reserved_vino(vinodeno_t
&vino
) {
164 if (MDS_IS_PRIVATE_INO(vino
.ino
)) {
165 ldout(cct
, -1) << __func__
<< " attempt to access reserved inode number " << vino
<< dendl
;
174 Client::CommandHook::CommandHook(Client
*client
) :
179 int Client::CommandHook::call(
180 std::string_view command
,
181 const cmdmap_t
& cmdmap
,
186 f
->open_object_section("result");
188 std::scoped_lock l
{m_client
->client_lock
};
189 if (command
== "mds_requests")
190 m_client
->dump_mds_requests(f
);
191 else if (command
== "mds_sessions") {
192 bool cap_dump
= false;
193 cmd_getval(cmdmap
, "cap_dump", cap_dump
);
194 m_client
->dump_mds_sessions(f
, cap_dump
);
195 } else if (command
== "dump_cache")
196 m_client
->dump_cache(f
);
197 else if (command
== "kick_stale_sessions")
198 m_client
->_kick_stale_sessions();
199 else if (command
== "status")
200 m_client
->dump_status(f
);
202 ceph_abort_msg("bad command registered");
211 int Client::get_fd_inode(int fd
, InodeRef
*in
) {
213 if (fd
== CEPHFS_AT_FDCWD
) {
216 Fh
*f
= get_filehandle(fd
);
226 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
227 : inode(in
), offset(0), next_offset(2),
228 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
232 void Client::_reset_faked_inos()
235 free_faked_inos
.clear();
236 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
237 last_used_faked_ino
= 0;
238 last_used_faked_root
= 0;
240 // On Windows, sizeof(ino_t) is just 2. Despite that, most "native"
241 // Windows structures, including Dokan ones, are using 64B identifiers.
242 _use_faked_inos
= false;
244 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
248 void Client::_assign_faked_ino(Inode
*in
)
250 if (0 == last_used_faked_ino
)
251 last_used_faked_ino
= last_used_faked_ino
+ 2048; // start(1024)~2048 reserved for _assign_faked_root
252 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
253 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
254 last_used_faked_ino
= 2048;
255 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
257 ceph_assert(it
!= free_faked_inos
.end());
258 if (last_used_faked_ino
< it
.get_start()) {
259 ceph_assert(it
.get_len() > 0);
260 last_used_faked_ino
= it
.get_start();
262 ++last_used_faked_ino
;
263 ceph_assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
265 in
->faked_ino
= last_used_faked_ino
;
266 free_faked_inos
.erase(in
->faked_ino
);
267 faked_ino_map
[in
->faked_ino
] = in
->vino();
271 * In the faked mode, if you export multiple subdirectories,
272 * you will see that the inode numbers of the exported subdirectories
273 * are the same. so we distinguish the mount point by reserving
274 * the "fake ids" between "1024~2048" and combining the last
275 * 10bits(0x3ff) of the "root inodes".
277 void Client::_assign_faked_root(Inode
*in
)
279 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_root
+ 1);
280 if (it
== free_faked_inos
.end() && last_used_faked_root
> 0) {
281 last_used_faked_root
= 0;
282 it
= free_faked_inos
.lower_bound(last_used_faked_root
+ 1);
284 assert(it
!= free_faked_inos
.end());
285 vinodeno_t inode_info
= in
->vino();
286 uint64_t inode_num
= (uint64_t)inode_info
.ino
;
287 ldout(cct
, 10) << "inode_num " << inode_num
<< "inode_num & 0x3ff=" << (inode_num
& 0x3ff)<< dendl
;
288 last_used_faked_root
= it
.get_start() + (inode_num
& 0x3ff); // 0x3ff mask and get_start will not exceed 2048
289 assert(it
.get_start() + it
.get_len() > last_used_faked_root
);
291 in
->faked_ino
= last_used_faked_root
;
292 free_faked_inos
.erase(in
->faked_ino
);
293 faked_ino_map
[in
->faked_ino
] = in
->vino();
296 void Client::_release_faked_ino(Inode
*in
)
298 free_faked_inos
.insert(in
->faked_ino
);
299 faked_ino_map
.erase(in
->faked_ino
);
302 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
307 else if (faked_ino_map
.count(ino
))
308 vino
= faked_ino_map
[ino
];
310 vino
= vinodeno_t(0, CEPH_NOSNAP
);
311 ldout(cct
, 10) << __func__
<< " " << ino
<< " -> " << vino
<< dendl
;
315 vinodeno_t
Client::map_faked_ino(ino_t ino
)
317 std::scoped_lock
lock(client_lock
);
318 return _map_faked_ino(ino
);
323 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
324 : Dispatcher(m
->cct
->get()),
325 timer(m
->cct
, timer_lock
, false),
329 whoami(mc
->get_global_id()),
330 mount_state(CLIENT_UNMOUNTED
, "Client::mountstate_lock"),
331 initialize_state(CLIENT_NEW
, "Client::initstate_lock"),
332 cct_deleter
{m
->cct
, [](CephContext
*p
) {p
->put();}},
333 async_ino_invalidator(m
->cct
),
334 async_dentry_invalidator(m
->cct
),
335 interrupt_finisher(m
->cct
),
336 remount_finisher(m
->cct
),
337 async_ino_releasor(m
->cct
),
338 objecter_finisher(m
->cct
),
339 m_command_hook(this),
344 user_id
= cct
->_conf
->client_mount_uid
;
345 group_id
= cct
->_conf
->client_mount_gid
;
346 fuse_default_permissions
= cct
->_conf
.get_val
<bool>(
347 "fuse_default_permissions");
349 if (cct
->_conf
->client_acl_type
== "posix_acl")
350 acl_type
= POSIX_ACL
;
352 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
355 free_fd_set
.insert(10, 1<<30);
357 mdsmap
.reset(new MDSMap
);
360 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
362 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
363 client_flush_set_callback
, // all commit callback
365 cct
->_conf
->client_oc_size
,
366 cct
->_conf
->client_oc_max_objects
,
367 cct
->_conf
->client_oc_max_dirty
,
368 cct
->_conf
->client_oc_target_dirty
,
369 cct
->_conf
->client_oc_max_dirty_age
,
376 ceph_assert(ceph_mutex_is_not_locked(client_lock
));
378 // If the task is crashed or aborted and doesn't
379 // get any chance to run the umount and shutdow.
381 std::scoped_lock l
{client_lock
};
382 tick_thread_stopped
= true;
383 upkeep_cond
.notify_one();
386 if (upkeeper
.joinable())
389 // It is necessary to hold client_lock, because any inode destruction
390 // may call into ObjectCacher, which asserts that it's lock (which is
391 // client_lock) is held.
392 std::scoped_lock l
{client_lock
};
396 void Client::tear_down_cache()
399 for (auto &[fd
, fh
] : fd_map
) {
400 ldout(cct
, 1) << __func__
<< " forcing close of fh " << fd
<< " ino " << fh
->inode
->ino
<< dendl
;
405 while (!opened_dirs
.empty()) {
406 dir_result_t
*dirp
= *opened_dirs
.begin();
407 ldout(cct
, 1) << __func__
<< " forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
416 ceph_assert(lru
.lru_get_size() == 0);
419 ceph_assert(inode_map
.size() <= 1 + root_parents
.size());
420 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
424 ceph_assert(inode_map
.empty());
427 inodeno_t
Client::get_root_ino()
429 std::scoped_lock
l(client_lock
);
430 if (use_faked_inos())
431 return root
->faked_ino
;
436 Inode
*Client::get_root()
438 std::scoped_lock
l(client_lock
);
446 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
449 in
->make_long_path(path
);
450 ldout(cct
, 1) << "dump_inode: "
451 << (disconnected
? "DISCONNECTED ":"")
452 << "inode " << in
->ino
454 << " ref " << in
->get_nref()
455 << " " << *in
<< dendl
;
458 f
->open_object_section("inode");
459 f
->dump_stream("path") << path
;
461 f
->dump_int("disconnected", 1);
468 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
469 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
470 it
!= in
->dir
->dentries
.end();
472 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
474 f
->open_object_section("dentry");
478 if (it
->second
->inode
)
479 dump_inode(f
, it
->second
->inode
.get(), did
, false);
484 void Client::dump_cache(Formatter
*f
)
488 ldout(cct
, 1) << __func__
<< dendl
;
491 f
->open_array_section("cache");
494 dump_inode(f
, root
.get(), did
, true);
496 // make a second pass to catch anything disconnected
497 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
498 it
!= inode_map
.end();
500 if (did
.count(it
->second
))
502 dump_inode(f
, it
->second
, did
, true);
509 void Client::dump_status(Formatter
*f
)
511 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
513 ldout(cct
, 1) << __func__
<< dendl
;
515 const epoch_t osd_epoch
516 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
519 f
->open_object_section("metadata");
520 for (const auto& kv
: metadata
)
521 f
->dump_string(kv
.first
.c_str(), kv
.second
);
524 f
->dump_int("dentry_count", lru
.lru_get_size());
525 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
526 f
->dump_int("id", get_nodeid().v
);
527 entity_inst_t
inst(messenger
->get_myname(), messenger
->get_myaddr_legacy());
528 f
->dump_object("inst", inst
);
529 f
->dump_object("addr", inst
.addr
);
530 f
->dump_stream("inst_str") << inst
.name
<< " " << inst
.addr
.get_legacy_str();
531 f
->dump_string("addr_str", inst
.addr
.get_legacy_str());
532 f
->dump_int("inode_count", inode_map
.size());
533 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
534 f
->dump_int("osd_epoch", osd_epoch
);
535 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
536 f
->dump_bool("blocklisted", blocklisted
);
537 f
->dump_string("fs_name", mdsmap
->get_fs_name());
541 void Client::_pre_init()
545 objecter_finisher
.start();
546 filer
.reset(new Filer(objecter
, &objecter_finisher
));
547 objecter
->enable_blocklist_events();
549 objectcacher
->start();
554 RWRef_t
iref_writer(initialize_state
, CLIENT_INITIALIZING
, false);
555 ceph_assert(iref_writer
.is_first_writer());
559 std::scoped_lock l
{client_lock
};
560 messenger
->add_dispatcher_tail(this);
563 iref_writer
.update_state(CLIENT_INITIALIZED
);
567 void Client::_finish_init()
570 std::scoped_lock l
{client_lock
};
572 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
573 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
574 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
575 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
576 plb
.add_time_avg(l_c_read
, "rdlat", "Latency of a file data read operation");
577 plb
.add_time_avg(l_c_fsync
, "fsync", "Latency of a file sync operation");
578 logger
.reset(plb
.create_perf_counters());
579 cct
->get_perfcounters_collection()->add(logger
.get());
582 cct
->_conf
.add_observer(this);
584 AdminSocket
* admin_socket
= cct
->get_admin_socket();
585 int ret
= admin_socket
->register_command("mds_requests",
587 "show in-progress mds requests");
589 lderr(cct
) << "error registering admin socket command: "
590 << cpp_strerror(-ret
) << dendl
;
592 ret
= admin_socket
->register_command("mds_sessions "
593 "name=cap_dump,type=CephBool,req=false",
595 "show mds session state");
597 lderr(cct
) << "error registering admin socket command: "
598 << cpp_strerror(-ret
) << dendl
;
600 ret
= admin_socket
->register_command("dump_cache",
602 "show in-memory metadata cache contents");
604 lderr(cct
) << "error registering admin socket command: "
605 << cpp_strerror(-ret
) << dendl
;
607 ret
= admin_socket
->register_command("kick_stale_sessions",
609 "kick sessions that were remote reset");
611 lderr(cct
) << "error registering admin socket command: "
612 << cpp_strerror(-ret
) << dendl
;
614 ret
= admin_socket
->register_command("status",
616 "show overall client status");
618 lderr(cct
) << "error registering admin socket command: "
619 << cpp_strerror(-ret
) << dendl
;
623 void Client::shutdown()
625 ldout(cct
, 1) << __func__
<< dendl
;
627 // If we were not mounted, but were being used for sending
628 // MDS commands, we may have sessions that need closing.
630 std::scoped_lock l
{client_lock
};
632 // To make sure the tick thread will be stoppped before
633 // destructing the Client, just in case like the _mount()
634 // failed but didn't not get a chance to stop the tick
636 tick_thread_stopped
= true;
637 upkeep_cond
.notify_one();
641 cct
->_conf
.remove_observer(this);
643 cct
->get_admin_socket()->unregister_commands(&m_command_hook
);
645 if (ino_invalidate_cb
) {
646 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
647 async_ino_invalidator
.wait_for_empty();
648 async_ino_invalidator
.stop();
651 if (dentry_invalidate_cb
) {
652 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
653 async_dentry_invalidator
.wait_for_empty();
654 async_dentry_invalidator
.stop();
657 if (switch_interrupt_cb
) {
658 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
659 interrupt_finisher
.wait_for_empty();
660 interrupt_finisher
.stop();
664 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
665 remount_finisher
.wait_for_empty();
666 remount_finisher
.stop();
669 if (ino_release_cb
) {
670 ldout(cct
, 10) << "shutdown stopping inode release finisher" << dendl
;
671 async_ino_releasor
.wait_for_empty();
672 async_ino_releasor
.stop();
675 objectcacher
->stop(); // outside of client_lock! this does a join.
678 * We are shuting down the client.
680 * Just declare the state to CLIENT_NEW to block and fail any
681 * new comming "reader" and then try to wait all the in-flight
682 * "readers" to finish.
684 RWRef_t
iref_writer(initialize_state
, CLIENT_NEW
, false);
685 if (!iref_writer
.is_first_writer())
687 iref_writer
.wait_readers_done();
690 std::scoped_lock
l(timer_lock
);
694 objecter_finisher
.wait_for_empty();
695 objecter_finisher
.stop();
698 cct
->get_perfcounters_collection()->remove(logger
.get());
704 // ===================
705 // metadata cache stuff
707 void Client::trim_cache(bool trim_kernel_dcache
)
709 uint64_t max
= cct
->_conf
->client_cache_size
;
710 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << max
<< dendl
;
712 while (lru
.lru_get_size() != last
) {
713 last
= lru
.lru_get_size();
715 if (!is_unmounting() && lru
.lru_get_size() <= max
) break;
718 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
725 if (trim_kernel_dcache
&& lru
.lru_get_size() > max
)
726 _invalidate_kernel_dcache();
729 if (lru
.lru_get_size() == 0 && root
&& root
->get_nref() == 1 && inode_map
.size() == 1 + root_parents
.size()) {
730 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
735 void Client::trim_cache_for_reconnect(MetaSession
*s
)
737 mds_rank_t mds
= s
->mds_num
;
738 ldout(cct
, 20) << __func__
<< " mds." << mds
<< dendl
;
741 list
<Dentry
*> skipped
;
742 while (lru
.lru_get_size() > 0) {
743 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
747 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
748 dn
->dir
->parent_inode
->caps
.count(mds
)) {
752 skipped
.push_back(dn
);
755 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
756 lru
.lru_insert_mid(*p
);
758 ldout(cct
, 20) << __func__
<< " mds." << mds
759 << " trimmed " << trimmed
<< " dentries" << dendl
;
761 if (s
->caps
.size() > 0)
762 _invalidate_kernel_dcache();
765 void Client::trim_dentry(Dentry
*dn
)
767 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
769 << std::hex
<< dn
->dir
->parent_inode
->ino
<< std::dec
772 Inode
*diri
= dn
->dir
->parent_inode
;
773 clear_dir_complete_and_ordered(diri
, true);
775 unlink(dn
, false, false); // drop dir, drop dentry
779 void Client::update_inode_file_size(Inode
*in
, int issued
, uint64_t size
,
780 uint64_t truncate_seq
, uint64_t truncate_size
)
782 uint64_t prior_size
= in
->size
;
784 if (truncate_seq
> in
->truncate_seq
||
785 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
786 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
788 in
->reported_size
= size
;
789 if (truncate_seq
!= in
->truncate_seq
) {
790 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
791 << truncate_seq
<< dendl
;
792 in
->truncate_seq
= truncate_seq
;
793 in
->oset
.truncate_seq
= truncate_seq
;
795 // truncate cached file data
796 if (prior_size
> size
) {
797 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
801 // truncate inline data
802 if (in
->inline_version
< CEPH_INLINE_NONE
) {
803 uint32_t len
= in
->inline_data
.length();
805 in
->inline_data
.splice(size
, len
- size
);
808 if (truncate_seq
>= in
->truncate_seq
&&
809 in
->truncate_size
!= truncate_size
) {
811 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
812 << truncate_size
<< dendl
;
813 in
->truncate_size
= truncate_size
;
814 in
->oset
.truncate_size
= truncate_size
;
816 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
821 void Client::update_inode_file_time(Inode
*in
, int issued
, uint64_t time_warp_seq
,
822 utime_t ctime
, utime_t mtime
, utime_t atime
)
824 ldout(cct
, 10) << __func__
<< " " << *in
<< " " << ccap_string(issued
)
825 << " ctime " << ctime
<< " mtime " << mtime
<< dendl
;
827 if (time_warp_seq
> in
->time_warp_seq
)
828 ldout(cct
, 10) << " mds time_warp_seq " << time_warp_seq
829 << " is higher than local time_warp_seq "
830 << in
->time_warp_seq
<< dendl
;
833 // be careful with size, mtime, atime
834 if (issued
& (CEPH_CAP_FILE_EXCL
|
836 CEPH_CAP_FILE_BUFFER
|
838 CEPH_CAP_XATTR_EXCL
)) {
839 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
840 if (ctime
> in
->ctime
)
842 if (time_warp_seq
> in
->time_warp_seq
) {
843 //the mds updated times, so take those!
846 in
->time_warp_seq
= time_warp_seq
;
847 } else if (time_warp_seq
== in
->time_warp_seq
) {
849 if (mtime
> in
->mtime
)
851 if (atime
> in
->atime
)
853 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
854 //ignore mds values as we have a higher seq
857 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
858 if (time_warp_seq
>= in
->time_warp_seq
) {
862 in
->time_warp_seq
= time_warp_seq
;
866 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
867 << time_warp_seq
<< " is lower than local time_warp_seq "
873 void Client::_fragmap_remove_non_leaves(Inode
*in
)
875 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
876 if (!in
->dirfragtree
.is_leaf(p
->first
))
877 in
->fragmap
.erase(p
++);
882 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
884 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
885 if (p
->second
== mds
)
886 in
->fragmap
.erase(p
++);
891 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
892 MetaSession
*session
,
893 const UserPerm
& request_perms
)
896 bool was_new
= false;
897 if (inode_map
.count(st
->vino
)) {
898 in
= inode_map
[st
->vino
];
899 ldout(cct
, 12) << __func__
<< " had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
901 in
= new Inode(this, st
->vino
, &st
->layout
);
902 inode_map
[st
->vino
] = in
;
904 if (use_faked_inos())
905 _assign_faked_ino(in
);
909 if (use_faked_inos())
910 _assign_faked_root(root
.get());
913 } else if (is_mounting()) {
914 root_parents
[root_ancestor
] = in
;
919 in
->ino
= st
->vino
.ino
;
920 in
->snapid
= st
->vino
.snapid
;
921 in
->mode
= st
->mode
& S_IFMT
;
926 if (in
->is_symlink())
927 in
->symlink
= st
->symlink
;
929 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
930 bool new_version
= false;
931 if (in
->version
== 0 ||
932 ((st
->cap
.flags
& CEPH_CAP_FLAG_AUTH
) &&
933 (in
->version
& ~1) < st
->version
))
937 in
->caps_issued(&issued
);
938 issued
|= in
->caps_dirty();
939 int new_issued
= ~issued
& (int)st
->cap
.caps
;
941 if ((new_version
|| (new_issued
& CEPH_CAP_AUTH_SHARED
)) &&
942 !(issued
& CEPH_CAP_AUTH_EXCL
)) {
946 in
->btime
= st
->btime
;
947 in
->snap_btime
= st
->snap_btime
;
948 in
->snap_metadata
= st
->snap_metadata
;
951 if ((new_version
|| (new_issued
& CEPH_CAP_LINK_SHARED
)) &&
952 !(issued
& CEPH_CAP_LINK_EXCL
)) {
953 in
->nlink
= st
->nlink
;
956 if (new_version
|| (new_issued
& CEPH_CAP_ANY_RD
)) {
957 update_inode_file_time(in
, issued
, st
->time_warp_seq
,
958 st
->ctime
, st
->mtime
, st
->atime
);
962 (new_issued
& (CEPH_CAP_ANY_FILE_RD
| CEPH_CAP_ANY_FILE_WR
))) {
963 in
->layout
= st
->layout
;
964 update_inode_file_size(in
, issued
, st
->size
, st
->truncate_seq
, st
->truncate_size
);
968 if (new_version
|| (new_issued
& CEPH_CAP_FILE_SHARED
)) {
969 in
->dirstat
= st
->dirstat
;
971 // dir_layout/rstat/quota are not tracked by capability, update them only if
972 // the inode stat is from auth mds
973 if (new_version
|| (st
->cap
.flags
& CEPH_CAP_FLAG_AUTH
)) {
974 in
->dir_layout
= st
->dir_layout
;
975 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
976 in
->rstat
= st
->rstat
;
977 in
->quota
= st
->quota
;
978 in
->dir_pin
= st
->dir_pin
;
980 // move me if/when version reflects fragtree changes.
981 if (in
->dirfragtree
!= st
->dirfragtree
) {
982 in
->dirfragtree
= st
->dirfragtree
;
983 _fragmap_remove_non_leaves(in
);
987 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
988 st
->xattrbl
.length() &&
989 st
->xattr_version
> in
->xattr_version
) {
990 auto p
= st
->xattrbl
.cbegin();
991 decode(in
->xattrs
, p
);
992 in
->xattr_version
= st
->xattr_version
;
995 if (st
->inline_version
> in
->inline_version
) {
996 in
->inline_data
= st
->inline_data
;
997 in
->inline_version
= st
->inline_version
;
1000 /* always take a newer change attr */
1001 if (st
->change_attr
> in
->change_attr
)
1002 in
->change_attr
= st
->change_attr
;
1004 if (st
->version
> in
->version
)
1005 in
->version
= st
->version
;
1008 ldout(cct
, 12) << __func__
<< " adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
1011 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
1013 if (in
->snapid
== CEPH_NOSNAP
) {
1014 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.wanted
,
1015 st
->cap
.seq
, st
->cap
.mseq
, inodeno_t(st
->cap
.realm
),
1016 st
->cap
.flags
, request_perms
);
1017 if (in
->auth_cap
&& in
->auth_cap
->session
== session
) {
1018 in
->max_size
= st
->max_size
;
1019 in
->rstat
= st
->rstat
;
1022 // setting I_COMPLETE needs to happen after adding the cap
1024 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
1025 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
1026 in
->dirstat
.nfiles
== 0 &&
1027 in
->dirstat
.nsubdirs
== 0) {
1028 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
1029 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
1031 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
1032 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
1033 in
->dir
->readdir_cache
.clear();
1034 for (const auto& p
: in
->dir
->dentries
) {
1035 unlink(p
.second
, true, true); // keep dir, keep dentry
1037 if (in
->dir
->dentries
.empty())
1042 in
->snap_caps
|= st
->cap
.caps
;
1045 in
->fscrypt
= st
->fscrypt
;
1051 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
1053 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
1054 Inode
*in
, utime_t from
, MetaSession
*session
,
1058 if (dir
->dentries
.count(dname
))
1059 dn
= dir
->dentries
[dname
];
1061 ldout(cct
, 12) << __func__
<< " '" << dname
<< "' vino " << in
->vino()
1062 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
1065 if (dn
&& dn
->inode
) {
1066 if (dn
->inode
->vino() == in
->vino()) {
1068 ldout(cct
, 12) << " had dentry " << dname
1069 << " with correct vino " << dn
->inode
->vino()
1072 ldout(cct
, 12) << " had dentry " << dname
1073 << " with WRONG vino " << dn
->inode
->vino()
1075 unlink(dn
, true, true); // keep dir, keep dentry
1079 if (!dn
|| !dn
->inode
) {
1080 InodeRef
tmp_ref(in
);
1082 if (old_dentry
->dir
!= dir
) {
1083 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
1084 clear_dir_complete_and_ordered(old_diri
, false);
1086 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
1088 Inode
*diri
= dir
->parent_inode
;
1089 clear_dir_complete_and_ordered(diri
, false);
1090 dn
= link(dir
, dname
, in
, dn
);
1093 update_dentry_lease(dn
, dlease
, from
, session
);
1097 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
1099 utime_t dttl
= from
;
1100 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
1102 ldout(cct
, 15) << __func__
<< " " << *dn
<< " " << *dlease
<< " from " << from
<< dendl
;
1106 if (dlease
->mask
& CEPH_LEASE_VALID
) {
1107 if (dttl
> dn
->lease_ttl
) {
1108 ldout(cct
, 10) << "got dentry lease on " << dn
->name
1109 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
1110 dn
->lease_ttl
= dttl
;
1111 dn
->lease_mds
= session
->mds_num
;
1112 dn
->lease_seq
= dlease
->seq
;
1113 dn
->lease_gen
= session
->cap_gen
;
1116 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1117 if (dlease
->mask
& CEPH_LEASE_PRIMARY_LINK
)
1119 dn
->alternate_name
= std::move(dlease
->alternate_name
);
1124 * update MDS location cache for a single inode
1126 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1129 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1130 if (dst
->auth
>= 0) {
1131 in
->fragmap
[dst
->frag
] = dst
->auth
;
1133 in
->fragmap
.erase(dst
->frag
);
1135 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1136 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1137 _fragmap_remove_non_leaves(in
);
1141 in
->dir_replicated
= !dst
->dist
.empty();
1142 if (!dst
->dist
.empty())
1143 in
->frag_repmap
[dst
->frag
].assign(dst
->dist
.begin(), dst
->dist
.end()) ;
1145 in
->frag_repmap
.erase(dst
->frag
);
1148 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1151 diri
->dir_release_count
++;
1153 diri
->dir_ordered_count
++;
1154 if (diri
->flags
& I_COMPLETE
) {
1156 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1157 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1159 if (diri
->flags
& I_DIR_ORDERED
) {
1160 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1161 diri
->flags
&= ~I_DIR_ORDERED
;
1165 diri
->dir
->readdir_cache
.clear();
1170 * insert results from readdir or lssnap into the metadata cache.
1172 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1174 auto& reply
= request
->reply
;
1175 ConnectionRef con
= request
->reply
->get_connection();
1177 if(session
->mds_features
.test(CEPHFS_FEATURE_REPLY_ENCODING
)) {
1178 features
= (uint64_t)-1;
1181 features
= con
->get_features();
1184 dir_result_t
*dirp
= request
->dirp
;
1187 // the extra buffer list is only set for readdir and lssnap replies
1188 auto p
= reply
->get_extra_bl().cbegin();
1191 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1193 diri
= open_snapdir(diri
);
1196 // only open dir if we're actually adding stuff to it!
1197 Dir
*dir
= diri
->open_dir();
1201 DirStat
dst(p
, features
);
1207 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1208 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1210 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1211 unsigned readdir_offset
= dirp
->next_offset
;
1212 string readdir_start
= dirp
->last_name
;
1213 ceph_assert(!readdir_start
.empty() || readdir_offset
== 2);
1215 unsigned last_hash
= 0;
1217 if (!readdir_start
.empty()) {
1218 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1219 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1220 /* mds understands offset_hash */
1221 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1225 if (fg
!= dst
.frag
) {
1226 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1230 readdir_start
.clear();
1231 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1235 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1236 << ", hash_order=" << hash_order
1237 << ", readdir_start " << readdir_start
1238 << ", last_hash " << last_hash
1239 << ", next_offset " << readdir_offset
<< dendl
;
1241 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1242 fg
.is_leftmost() && readdir_offset
== 2 &&
1243 !(hash_order
&& last_hash
)) {
1244 dirp
->release_count
= diri
->dir_release_count
;
1245 dirp
->ordered_count
= diri
->dir_ordered_count
;
1246 dirp
->start_shared_gen
= diri
->shared_gen
;
1247 dirp
->cache_index
= 0;
1250 dirp
->buffer_frag
= fg
;
1252 _readdir_drop_dirp_buffer(dirp
);
1253 dirp
->buffer
.reserve(numdn
);
1257 for (unsigned i
=0; i
<numdn
; i
++) {
1259 dlease
.decode(p
, features
);
1260 InodeStat
ist(p
, features
);
1262 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1264 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1267 if (diri
->dir
->dentries
.count(dname
)) {
1268 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1269 if (olddn
->inode
!= in
) {
1270 // replace incorrect dentry
1271 unlink(olddn
, true, true); // keep dir, dentry
1272 dn
= link(dir
, dname
, in
, olddn
);
1273 ceph_assert(dn
== olddn
);
1281 dn
= link(dir
, dname
, in
, NULL
);
1283 dn
->alternate_name
= std::move(dlease
.alternate_name
);
1285 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1287 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1288 if (hash
!= last_hash
)
1291 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1293 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1295 // add to readdir cache
1296 if (dirp
->release_count
== diri
->dir_release_count
&&
1297 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1298 dirp
->start_shared_gen
== diri
->shared_gen
) {
1299 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1301 ceph_assert(!dirp
->inode
->is_complete_and_ordered());
1302 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1304 dir
->readdir_cache
.push_back(dn
);
1305 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1306 if (dirp
->inode
->is_complete_and_ordered())
1307 ceph_assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1309 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1311 ceph_abort_msg("unexpected readdir buffer idx");
1313 dirp
->cache_index
++;
1315 // add to cached result list
1316 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, dn
->alternate_name
, in
));
1317 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1321 dirp
->last_name
= dname
;
1323 dirp
->next_offset
= 2;
1325 dirp
->next_offset
= readdir_offset
;
1327 if (dir
->is_empty())
1334 * insert a trace from a MDS reply into the cache.
1336 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1338 auto& reply
= request
->reply
;
1339 int op
= request
->get_op();
1341 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1342 << " is_target=" << (int)reply
->head
.is_target
1343 << " is_dentry=" << (int)reply
->head
.is_dentry
1346 auto p
= reply
->get_trace_bl().cbegin();
1347 if (request
->got_unsafe
) {
1348 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1349 ceph_assert(p
.end());
1354 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1356 Dentry
*d
= request
->dentry();
1358 Inode
*diri
= d
->dir
->parent_inode
;
1359 clear_dir_complete_and_ordered(diri
, true);
1362 if (d
&& reply
->get_result() == 0) {
1363 if (op
== CEPH_MDS_OP_RENAME
) {
1365 Dentry
*od
= request
->old_dentry();
1366 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1368 unlink(od
, true, true); // keep dir, dentry
1369 } else if (op
== CEPH_MDS_OP_RMDIR
||
1370 op
== CEPH_MDS_OP_UNLINK
) {
1372 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1373 unlink(d
, true, true); // keep dir, dentry
1379 ConnectionRef con
= request
->reply
->get_connection();
1381 if (session
->mds_features
.test(CEPHFS_FEATURE_REPLY_ENCODING
)) {
1382 features
= (uint64_t)-1;
1385 features
= con
->get_features();
1387 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1390 SnapRealm
*realm
= NULL
;
1391 if (reply
->snapbl
.length())
1392 update_snap_trace(reply
->snapbl
, &realm
);
1394 ldout(cct
, 10) << " hrm "
1395 << " is_target=" << (int)reply
->head
.is_target
1396 << " is_dentry=" << (int)reply
->head
.is_dentry
1405 if (reply
->head
.is_dentry
) {
1406 dirst
.decode(p
, features
);
1407 dst
.decode(p
, features
);
1409 dlease
.decode(p
, features
);
1413 if (reply
->head
.is_target
) {
1414 ist
.decode(p
, features
);
1415 if (cct
->_conf
->client_debug_getattr_caps
) {
1416 unsigned wanted
= 0;
1417 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1418 wanted
= request
->head
.args
.getattr
.mask
;
1419 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1420 wanted
= request
->head
.args
.open
.mask
;
1422 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1423 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1424 ceph_abort_msg("MDS reply does not contain xattrs");
1427 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1432 if (reply
->head
.is_dentry
) {
1433 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1435 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1438 Dir
*dir
= diri
->open_dir();
1439 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1440 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1443 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1444 dn
= diri
->dir
->dentries
[dname
];
1446 clear_dir_complete_and_ordered(diri
, false);
1447 unlink(dn
, true, true); // keep dir, dentry
1450 if (dlease
.duration_ms
> 0) {
1452 Dir
*dir
= diri
->open_dir();
1453 dn
= link(dir
, dname
, NULL
, NULL
);
1455 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1458 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1459 op
== CEPH_MDS_OP_MKSNAP
) {
1460 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1461 // fake it for snap lookup
1462 vinodeno_t vino
= ist
.vino
;
1463 vino
.snapid
= CEPH_SNAPDIR
;
1464 ceph_assert(inode_map
.count(vino
));
1465 diri
= inode_map
[vino
];
1467 string dname
= request
->path
.last_dentry();
1470 dlease
.duration_ms
= 0;
1473 Dir
*dir
= diri
->open_dir();
1474 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1476 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1477 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1479 unlink(dn
, true, true); // keep dir, dentry
1485 if (op
== CEPH_MDS_OP_READDIR
||
1486 op
== CEPH_MDS_OP_LSSNAP
) {
1487 insert_readdir_results(request
, session
, in
);
1488 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1489 // hack: return parent inode instead
1493 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1494 // pin the target inode if its parent dentry is not pinned
1495 request
->set_other_inode(in
);
1500 put_snap_realm(realm
);
1502 request
->target
= in
;
1508 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1510 mds_rank_t mds
= MDS_RANK_NONE
;
1512 bool is_hash
= false;
1517 if (req
->resend_mds
>= 0) {
1518 mds
= req
->resend_mds
;
1519 req
->resend_mds
= -1;
1520 ldout(cct
, 10) << __func__
<< " resend_mds specified as mds." << mds
<< dendl
;
1524 if (cct
->_conf
->client_use_random_mds
)
1530 ldout(cct
, 20) << __func__
<< " starting with req->inode " << *in
<< dendl
;
1531 if (req
->path
.depth()) {
1532 hash
= in
->hash_dentry_name(req
->path
[0]);
1533 ldout(cct
, 20) << __func__
<< " inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1534 << " on " << req
->path
[0]
1535 << " => " << hash
<< dendl
;
1540 in
= de
->inode
.get();
1541 ldout(cct
, 20) << __func__
<< " starting with req->dentry inode " << *in
<< dendl
;
1543 in
= de
->dir
->parent_inode
;
1544 hash
= in
->hash_dentry_name(de
->name
);
1545 ldout(cct
, 20) << __func__
<< " dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1546 << " on " << de
->name
1547 << " => " << hash
<< dendl
;
1552 if (in
->snapid
!= CEPH_NOSNAP
) {
1553 ldout(cct
, 10) << __func__
<< " " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1554 while (in
->snapid
!= CEPH_NOSNAP
) {
1555 if (in
->snapid
== CEPH_SNAPDIR
)
1556 in
= in
->snapdir_parent
.get();
1557 else if (!in
->dentries
.empty())
1558 /* In most cases there will only be one dentry, so getting it
1559 * will be the correct action. If there are multiple hard links,
1560 * I think the MDS should be able to redirect as needed*/
1561 in
= in
->get_first_parent()->dir
->parent_inode
;
1563 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1570 ldout(cct
, 20) << __func__
<< " " << *in
<< " is_hash=" << is_hash
1571 << " hash=" << hash
<< dendl
;
1573 if (is_hash
&& S_ISDIR(in
->mode
) && (!in
->fragmap
.empty() || !in
->frag_repmap
.empty())) {
1574 frag_t fg
= in
->dirfragtree
[hash
];
1575 if (!req
->auth_is_best()) {
1576 auto repmapit
= in
->frag_repmap
.find(fg
);
1577 if (repmapit
!= in
->frag_repmap
.end()) {
1578 auto& repmap
= repmapit
->second
;
1579 auto r
= ceph::util::generate_random_number
<uint64_t>(0, repmap
.size()-1);
1582 } else if (in
->fragmap
.count(fg
)) {
1583 mds
= in
->fragmap
[fg
];
1586 } else if (in
->auth_cap
) {
1587 req
->send_to_auth
= true;
1588 mds
= in
->auth_cap
->session
->mds_num
;
1591 ldout(cct
, 10) << __func__
<< " from dirfragtree hash" << dendl
;
1596 if (in
->auth_cap
&& req
->auth_is_best()) {
1597 mds
= in
->auth_cap
->session
->mds_num
;
1598 } else if (!in
->caps
.empty()) {
1599 mds
= in
->caps
.begin()->second
.session
->mds_num
;
1603 ldout(cct
, 10) << __func__
<< " from caps on inode " << *in
<< dendl
;
1610 mds
= _get_random_up_mds();
1611 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1615 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1619 void Client::connect_mds_targets(mds_rank_t mds
)
1621 ldout(cct
, 10) << __func__
<< " for mds." << mds
<< dendl
;
1622 ceph_assert(mds_sessions
.count(mds
));
1623 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1624 for (const auto &rank
: info
.export_targets
) {
1625 if (mds_sessions
.count(rank
) == 0 &&
1626 mdsmap
->is_clientreplay_or_active_or_stopping(rank
)) {
1627 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1628 << " export target mds." << rank
<< dendl
;
1629 _open_mds_session(rank
);
1634 void Client::dump_mds_sessions(Formatter
*f
, bool cap_dump
)
1636 f
->dump_int("id", get_nodeid().v
);
1637 entity_inst_t
inst(messenger
->get_myname(), messenger
->get_myaddr_legacy());
1638 f
->dump_object("inst", inst
);
1639 f
->dump_stream("inst_str") << inst
;
1640 f
->dump_stream("addr_str") << inst
.addr
;
1641 f
->open_array_section("sessions");
1642 for (const auto &p
: mds_sessions
) {
1643 f
->open_object_section("session");
1644 p
.second
.dump(f
, cap_dump
);
1648 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1651 void Client::dump_mds_requests(Formatter
*f
)
1653 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1654 p
!= mds_requests
.end();
1656 f
->open_object_section("request");
1662 int Client::verify_reply_trace(int r
, MetaSession
*session
,
1663 MetaRequest
*request
, const MConstRef
<MClientReply
>& reply
,
1664 InodeRef
*ptarget
, bool *pcreated
,
1665 const UserPerm
& perms
)
1667 // check whether this request actually did the create, and set created flag
1668 bufferlist extra_bl
;
1669 inodeno_t created_ino
;
1670 bool got_created_ino
= false;
1671 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1673 extra_bl
= reply
->get_extra_bl();
1674 if (extra_bl
.length() >= 8) {
1675 if (session
->mds_features
.test(CEPHFS_FEATURE_DELEG_INO
)) {
1676 struct openc_response_t ocres
;
1678 decode(ocres
, extra_bl
);
1679 created_ino
= ocres
.created_ino
;
1681 * The userland cephfs client doesn't have a way to do an async create
1682 * (yet), so just discard delegated_inos for now. Eventually we should
1683 * store them and use them in create calls, even if they are synchronous,
1684 * if only for testing purposes.
1686 ldout(cct
, 10) << "delegated_inos: " << ocres
.delegated_inos
<< dendl
;
1688 // u64 containing number of created ino
1689 decode(created_ino
, extra_bl
);
1691 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1692 got_created_ino
= true;
1696 *pcreated
= got_created_ino
;
1698 if (request
->target
) {
1699 *ptarget
= request
->target
;
1700 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1702 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1703 (*ptarget
) = p
->second
;
1704 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1706 // we got a traceless reply, and need to look up what we just
1707 // created. for now, do this by name. someday, do this by the
1708 // ino... which we know! FIXME.
1710 Dentry
*d
= request
->dentry();
1713 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1714 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1715 << " got_ino " << got_created_ino
1716 << " ino " << created_ino
1718 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1721 // if the dentry is not linked, just do our best. see #5021.
1722 ceph_abort_msg("how did this happen? i want logs!");
1725 Inode
*in
= request
->inode();
1726 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1727 << in
->ino
<< dendl
;
1728 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1732 // verify ino returned in reply and trace_dist are the same
1733 if (got_created_ino
&&
1734 created_ino
.val
!= target
->ino
.val
) {
1735 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1739 ptarget
->swap(target
);
1751 * Blocking helper to make an MDS request.
1753 * If the ptarget flag is set, behavior changes slightly: the caller
1754 * expects to get a pointer to the inode we are creating or operating
1755 * on. As a result, we will follow up any traceless mutation reply
1756 * with a getattr or lookup to transparently handle a traceless reply
1757 * from the MDS (as when the MDS restarts and the client has to replay
1760 * @param request the MetaRequest to execute
1761 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1762 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1763 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1764 * @param use_mds [optional] prefer a specific mds (-1 for default)
1765 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1767 int Client::make_request(MetaRequest
*request
,
1768 const UserPerm
& perms
,
1769 InodeRef
*ptarget
, bool *pcreated
,
1775 // assign a unique tid
1776 ceph_tid_t tid
= ++last_tid
;
1777 request
->set_tid(tid
);
1780 request
->op_stamp
= ceph_clock_now();
1783 mds_requests
[tid
] = request
->get();
1784 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1787 request
->set_caller_perms(perms
);
1789 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1790 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1791 request
->set_oldest_client_tid(1);
1793 request
->set_oldest_client_tid(oldest_tid
);
1798 request
->resend_mds
= use_mds
;
1800 MetaSession
*session
= NULL
;
1802 if (request
->aborted())
1806 request
->abort(-CEPHFS_EBLOCKLISTED
);
1811 ceph::condition_variable caller_cond
;
1812 request
->caller_cond
= &caller_cond
;
1815 Inode
*hash_diri
= NULL
;
1816 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1817 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1818 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1819 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1821 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1822 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1824 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1825 request
->resend_mds
= _get_random_up_mds();
1828 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1829 wait_on_list(waiting_for_mdsmap
);
1835 if (!have_open_session(mds
)) {
1836 session
= _get_or_open_mds_session(mds
);
1837 if (session
->state
== MetaSession::STATE_REJECTED
) {
1838 request
->abort(-CEPHFS_EPERM
);
1842 if (session
->state
== MetaSession::STATE_OPENING
) {
1843 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1844 wait_on_context_list(session
->waiting_for_open
);
1848 if (!have_open_session(mds
))
1851 session
= &mds_sessions
.at(mds
);
1855 send_request(request
, session
);
1858 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1859 request
->kick
= false;
1860 std::unique_lock l
{client_lock
, std::adopt_lock
};
1861 caller_cond
.wait(l
, [request
] {
1862 return (request
->reply
|| // reply
1863 request
->resend_mds
>= 0 || // forward
1867 request
->caller_cond
= nullptr;
1869 // did we get a reply?
1874 if (!request
->reply
) {
1875 ceph_assert(request
->aborted());
1876 ceph_assert(!request
->got_unsafe
);
1877 r
= request
->get_abort_code();
1878 request
->item
.remove_myself();
1879 unregister_request(request
);
1880 put_request(request
);
1885 auto reply
= std::move(request
->reply
);
1886 r
= reply
->get_result();
1888 request
->success
= true;
1890 // kick dispatcher (we've got it!)
1891 ceph_assert(request
->dispatch_cond
);
1892 request
->dispatch_cond
->notify_all();
1893 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1894 request
->dispatch_cond
= 0;
1896 if (r
>= 0 && ptarget
)
1897 r
= verify_reply_trace(r
, session
, request
, reply
, ptarget
, pcreated
, perms
);
1900 *pdirbl
= reply
->get_extra_bl();
1903 utime_t lat
= ceph_clock_now();
1904 lat
-= request
->sent_stamp
;
1905 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1906 logger
->tinc(l_c_lat
, lat
);
1907 logger
->tinc(l_c_reply
, lat
);
1909 put_request(request
);
1913 void Client::unregister_request(MetaRequest
*req
)
1915 mds_requests
.erase(req
->tid
);
1916 if (req
->tid
== oldest_tid
) {
1917 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1919 if (p
== mds_requests
.end()) {
1923 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1924 oldest_tid
= p
->first
;
1933 void Client::put_request(MetaRequest
*request
)
1935 if (request
->_put()) {
1937 if (request
->success
)
1938 op
= request
->get_op();
1940 request
->take_other_inode(&other_in
);
1944 (op
== CEPH_MDS_OP_RMDIR
||
1945 op
== CEPH_MDS_OP_RENAME
||
1946 op
== CEPH_MDS_OP_RMSNAP
)) {
1947 _try_to_trim_inode(other_in
.get(), false);
1952 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1953 mds_rank_t mds
, int drop
,
1954 int unless
, int force
)
1956 ldout(cct
, 20) << __func__
<< " enter(in:" << *in
<< ", req:" << req
1957 << " mds:" << mds
<< ", drop:" << ccap_string(drop
) << ", unless:" << ccap_string(unless
)
1958 << ", force:" << force
<< ")" << dendl
;
1960 auto it
= in
->caps
.find(mds
);
1961 if (it
!= in
->caps
.end()) {
1962 Cap
&cap
= it
->second
;
1963 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1964 if ((drop
& cap
.issued
) &&
1965 !(unless
& cap
.issued
)) {
1966 ldout(cct
, 25) << "dropping caps " << ccap_string(drop
) << dendl
;
1967 cap
.issued
&= ~drop
;
1968 cap
.implemented
&= ~drop
;
1974 cap
.wanted
= in
->caps_wanted();
1975 if (&cap
== in
->auth_cap
&&
1976 !(cap
.wanted
& CEPH_CAP_ANY_FILE_WR
)) {
1977 in
->requested_max_size
= 0;
1978 ldout(cct
, 25) << "reset requested_max_size due to not wanting any file write cap" << dendl
;
1980 ceph_mds_request_release rel
;
1982 rel
.cap_id
= cap
.cap_id
;
1984 rel
.issue_seq
= cap
.issue_seq
;
1985 rel
.mseq
= cap
.mseq
;
1986 rel
.caps
= cap
.implemented
;
1987 rel
.wanted
= cap
.wanted
;
1990 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1993 ldout(cct
, 25) << __func__
<< " exit(in:" << *in
<< ") released:"
1994 << released
<< dendl
;
1998 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1999 mds_rank_t mds
, int drop
, int unless
)
2001 ldout(cct
, 20) << __func__
<< " enter(dn:"
2002 << dn
<< ")" << dendl
;
2005 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
2006 mds
, drop
, unless
, 1);
2007 if (released
&& dn
->lease_mds
== mds
) {
2008 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
2009 auto& rel
= req
->cap_releases
.back();
2010 rel
.item
.dname_len
= dn
->name
.length();
2011 rel
.item
.dname_seq
= dn
->lease_seq
;
2012 rel
.dname
= dn
->name
;
2015 ldout(cct
, 25) << __func__
<< " exit(dn:"
2016 << dn
<< ")" << dendl
;
2021 * This requires the MClientRequest *request member to be set.
2022 * It will error out horribly without one.
2023 * Additionally, if you set any *drop member, you'd better have
2024 * set the corresponding dentry!
2026 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
2028 ldout(cct
, 20) << __func__
<< " enter (req: "
2029 << req
<< ", mds: " << mds
<< ")" << dendl
;
2030 if (req
->inode_drop
&& req
->inode())
2031 encode_inode_release(req
->inode(), req
,
2032 mds
, req
->inode_drop
,
2035 if (req
->old_inode_drop
&& req
->old_inode())
2036 encode_inode_release(req
->old_inode(), req
,
2037 mds
, req
->old_inode_drop
,
2038 req
->old_inode_unless
);
2039 if (req
->other_inode_drop
&& req
->other_inode())
2040 encode_inode_release(req
->other_inode(), req
,
2041 mds
, req
->other_inode_drop
,
2042 req
->other_inode_unless
);
2044 if (req
->dentry_drop
&& req
->dentry())
2045 encode_dentry_release(req
->dentry(), req
,
2046 mds
, req
->dentry_drop
,
2047 req
->dentry_unless
);
2049 if (req
->old_dentry_drop
&& req
->old_dentry())
2050 encode_dentry_release(req
->old_dentry(), req
,
2051 mds
, req
->old_dentry_drop
,
2052 req
->old_dentry_unless
);
2053 ldout(cct
, 25) << __func__
<< " exit (req: "
2054 << req
<< ", mds " << mds
<<dendl
;
2057 bool Client::have_open_session(mds_rank_t mds
)
2059 const auto &it
= mds_sessions
.find(mds
);
2060 return it
!= mds_sessions
.end() &&
2061 (it
->second
.state
== MetaSession::STATE_OPEN
||
2062 it
->second
.state
== MetaSession::STATE_STALE
);
2065 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
2067 const auto &it
= mds_sessions
.find(mds
);
2068 if (it
== mds_sessions
.end() || it
->second
.con
!= con
) {
2075 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
2077 auto it
= mds_sessions
.find(mds
);
2078 return it
== mds_sessions
.end() ? _open_mds_session(mds
) : &it
->second
;
2082 * Populate a map of strings with client-identifying metadata,
2083 * such as the hostname. Call this once at initialization.
2085 void Client::populate_metadata(const std::string
&mount_root
)
2089 // TODO: move this to compat.h
2091 DWORD hostname_sz
= 64;
2092 GetComputerNameA(hostname
, &hostname_sz
);
2093 metadata
["hostname"] = hostname
;
2098 metadata
["hostname"] = u
.nodename
;
2099 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
2101 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
2105 metadata
["pid"] = stringify(getpid());
2107 // Ceph entity id (the '0' in "client.0")
2108 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
2110 // Our mount position
2111 if (!mount_root
.empty()) {
2112 metadata
["root"] = mount_root
;
2116 metadata
["ceph_version"] = pretty_version_to_str();
2117 metadata
["ceph_sha1"] = git_version_to_str();
2119 // Apply any metadata from the user's configured overrides
2120 std::vector
<std::string
> tokens
;
2121 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
2122 for (const auto &i
: tokens
) {
2123 auto eqpos
= i
.find("=");
2124 // Throw out anything that isn't of the form "<str>=<str>"
2125 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
2126 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
2129 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
2134 * Optionally add or override client metadata fields.
2136 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
2138 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
2139 ceph_assert(iref_reader
.is_state_satisfied());
2141 std::scoped_lock
l(client_lock
);
2143 auto it
= metadata
.find(k
);
2144 if (it
!= metadata
.end()) {
2145 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
2146 << "' from '" << it
->second
<< "' to '" << v
<< "'" << dendl
;
2152 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
2154 ldout(cct
, 10) << __func__
<< " mds." << mds
<< dendl
;
2155 auto addrs
= mdsmap
->get_addrs(mds
);
2156 auto em
= mds_sessions
.emplace(std::piecewise_construct
,
2157 std::forward_as_tuple(mds
),
2158 std::forward_as_tuple(mds
, messenger
->connect_to_mds(addrs
), addrs
));
2159 ceph_assert(em
.second
); /* not already present */
2160 MetaSession
*session
= &em
.first
->second
;
2162 auto m
= make_message
<MClientSession
>(CEPH_SESSION_REQUEST_OPEN
);
2163 m
->metadata
= metadata
;
2164 m
->supported_features
= feature_bitset_t(CEPHFS_FEATURES_CLIENT_SUPPORTED
);
2165 m
->metric_spec
= feature_bitset_t(CEPHFS_METRIC_FEATURES_ALL
);
2166 session
->con
->send_message2(std::move(m
));
2170 void Client::_close_mds_session(MetaSession
*s
)
2172 ldout(cct
, 2) << __func__
<< " mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2173 s
->state
= MetaSession::STATE_CLOSING
;
2174 s
->con
->send_message2(make_message
<MClientSession
>(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2177 void Client::_closed_mds_session(MetaSession
*s
, int err
, bool rejected
)
2179 ldout(cct
, 5) << __func__
<< " mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2180 if (rejected
&& s
->state
!= MetaSession::STATE_CLOSING
)
2181 s
->state
= MetaSession::STATE_REJECTED
;
2183 s
->state
= MetaSession::STATE_CLOSED
;
2184 s
->con
->mark_down();
2185 signal_context_list(s
->waiting_for_open
);
2186 mount_cond
.notify_all();
2187 remove_session_caps(s
, err
);
2188 kick_requests_closed(s
);
2189 mds_ranks_closing
.erase(s
->mds_num
);
2190 if (s
->state
== MetaSession::STATE_CLOSED
)
2191 mds_sessions
.erase(s
->mds_num
);
2194 void Client::handle_client_session(const MConstRef
<MClientSession
>& m
)
2196 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2197 ldout(cct
, 10) << __func__
<< " " << *m
<< " from mds." << from
<< dendl
;
2199 std::scoped_lock
cl(client_lock
);
2200 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2202 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2206 switch (m
->get_op()) {
2207 case CEPH_SESSION_OPEN
:
2209 feature_bitset_t
missing_features(CEPHFS_FEATURES_CLIENT_REQUIRED
);
2210 missing_features
-= m
->supported_features
;
2211 if (!missing_features
.empty()) {
2212 lderr(cct
) << "mds." << from
<< " lacks required features '"
2213 << missing_features
<< "', closing session " << dendl
;
2214 _close_mds_session(session
);
2215 _closed_mds_session(session
, -CEPHFS_EPERM
, true);
2218 session
->mds_features
= std::move(m
->supported_features
);
2220 renew_caps(session
);
2221 session
->state
= MetaSession::STATE_OPEN
;
2222 if (is_unmounting())
2223 mount_cond
.notify_all();
2225 connect_mds_targets(from
);
2226 signal_context_list(session
->waiting_for_open
);
2230 case CEPH_SESSION_CLOSE
:
2231 _closed_mds_session(session
);
2234 case CEPH_SESSION_RENEWCAPS
:
2235 if (session
->cap_renew_seq
== m
->get_seq()) {
2236 bool was_stale
= ceph_clock_now() >= session
->cap_ttl
;
2238 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2240 wake_up_session_caps(session
, false);
2244 case CEPH_SESSION_STALE
:
2245 // invalidate session caps/leases
2247 session
->cap_ttl
= ceph_clock_now();
2248 session
->cap_ttl
-= 1;
2249 renew_caps(session
);
2252 case CEPH_SESSION_RECALL_STATE
:
2254 * Call the renew caps and flush cap releases just before
2255 * triming the caps in case the tick() won't get a chance
2256 * to run them, which could cause the client to be blocklisted
2257 * and MDS daemons trying to recall the caps again and
2260 * In most cases it will do nothing, and the new cap releases
2261 * added by trim_caps() followed will be deferred flushing
2264 renew_and_flush_cap_releases();
2265 trim_caps(session
, m
->get_max_caps());
2268 case CEPH_SESSION_FLUSHMSG
:
2269 /* flush cap release */
2270 if (auto& m
= session
->release
; m
) {
2271 session
->con
->send_message2(std::move(m
));
2273 session
->con
->send_message2(make_message
<MClientSession
>(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2276 case CEPH_SESSION_FORCE_RO
:
2277 force_session_readonly(session
);
2280 case CEPH_SESSION_REJECT
:
2282 std::string_view error_str
;
2283 auto it
= m
->metadata
.find("error_string");
2284 if (it
!= m
->metadata
.end())
2285 error_str
= it
->second
;
2287 error_str
= "unknown error";
2288 lderr(cct
) << "mds." << from
<< " rejected us (" << error_str
<< ")" << dendl
;
2290 _closed_mds_session(session
, -CEPHFS_EPERM
, true);
2299 bool Client::_any_stale_sessions() const
2301 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
2303 for (const auto &p
: mds_sessions
) {
2304 if (p
.second
.state
== MetaSession::STATE_STALE
) {
2312 void Client::_kick_stale_sessions()
2314 ldout(cct
, 1) << __func__
<< dendl
;
2316 for (auto it
= mds_sessions
.begin(); it
!= mds_sessions
.end(); ) {
2317 MetaSession
&s
= it
->second
;
2318 if (s
.state
== MetaSession::STATE_REJECTED
) {
2319 mds_sessions
.erase(it
++);
2323 if (s
.state
== MetaSession::STATE_STALE
)
2324 _closed_mds_session(&s
);
2328 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2329 bool drop_cap_releases
)
2332 mds_rank_t mds
= session
->mds_num
;
2333 ldout(cct
, 10) << __func__
<< " rebuilding request " << request
->get_tid()
2334 << " for mds." << mds
<< dendl
;
2335 auto r
= build_client_request(request
);
2336 if (request
->dentry()) {
2337 r
->set_dentry_wanted();
2339 if (request
->got_unsafe
) {
2340 r
->set_replayed_op();
2341 if (request
->target
)
2342 r
->head
.ino
= request
->target
->ino
;
2344 encode_cap_releases(request
, mds
);
2345 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2346 request
->cap_releases
.clear();
2348 r
->releases
.swap(request
->cap_releases
);
2350 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2351 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2352 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2353 r
->set_osdmap_epoch(o
.get_epoch());
2357 if (request
->mds
== -1) {
2358 request
->sent_stamp
= ceph_clock_now();
2359 ldout(cct
, 20) << __func__
<< " set sent_stamp to " << request
->sent_stamp
<< dendl
;
2363 Inode
*in
= request
->inode();
2365 auto it
= in
->caps
.find(mds
);
2366 if (it
!= in
->caps
.end()) {
2367 request
->sent_on_mseq
= it
->second
.mseq
;
2371 session
->requests
.push_back(&request
->item
);
2373 ldout(cct
, 10) << __func__
<< " " << *r
<< " to mds." << mds
<< dendl
;
2374 session
->con
->send_message2(std::move(r
));
2377 ref_t
<MClientRequest
> Client::build_client_request(MetaRequest
*request
)
2379 auto req
= make_message
<MClientRequest
>(request
->get_op());
2380 req
->set_tid(request
->tid
);
2381 req
->set_stamp(request
->op_stamp
);
2382 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2384 // if the filepath's haven't been set, set them!
2385 if (request
->path
.empty()) {
2386 Inode
*in
= request
->inode();
2387 Dentry
*de
= request
->dentry();
2389 in
->make_nosnap_relative_path(request
->path
);
2392 de
->inode
->make_nosnap_relative_path(request
->path
);
2394 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2395 request
->path
.push_dentry(de
->name
);
2397 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2398 << " No path, inode, or appropriately-endowed dentry given!"
2400 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2401 << " No path, inode, or dentry given!"
2404 req
->set_filepath(request
->get_filepath());
2405 req
->set_filepath2(request
->get_filepath2());
2406 req
->set_alternate_name(request
->alternate_name
);
2407 req
->set_data(request
->data
);
2408 req
->set_retry_attempt(request
->retry_attempt
++);
2409 req
->head
.num_fwd
= request
->num_fwd
;
2411 int gid_count
= request
->perms
.get_gids(&_gids
);
2412 req
->set_gid_list(gid_count
, _gids
);
2418 void Client::handle_client_request_forward(const MConstRef
<MClientRequestForward
>& fwd
)
2420 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2422 std::scoped_lock
cl(client_lock
);
2423 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2427 ceph_tid_t tid
= fwd
->get_tid();
2429 if (mds_requests
.count(tid
) == 0) {
2430 ldout(cct
, 10) << __func__
<< " no pending request on tid " << tid
<< dendl
;
2434 MetaRequest
*request
= mds_requests
[tid
];
2435 ceph_assert(request
);
2437 // reset retry counter
2438 request
->retry_attempt
= 0;
2440 // request not forwarded, or dest mds has no session.
2442 ldout(cct
, 10) << __func__
<< " tid " << tid
2443 << " fwd " << fwd
->get_num_fwd()
2444 << " to mds." << fwd
->get_dest_mds()
2445 << ", resending to " << fwd
->get_dest_mds()
2449 request
->item
.remove_myself();
2450 request
->num_fwd
= fwd
->get_num_fwd();
2451 request
->resend_mds
= fwd
->get_dest_mds();
2452 request
->caller_cond
->notify_all();
2455 bool Client::is_dir_operation(MetaRequest
*req
)
2457 int op
= req
->get_op();
2458 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2459 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2460 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2461 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2466 void Client::handle_client_reply(const MConstRef
<MClientReply
>& reply
)
2468 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2470 std::scoped_lock
cl(client_lock
);
2471 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2476 ceph_tid_t tid
= reply
->get_tid();
2477 bool is_safe
= reply
->is_safe();
2479 if (mds_requests
.count(tid
) == 0) {
2480 lderr(cct
) << __func__
<< " no pending request on tid " << tid
2481 << " safe is:" << is_safe
<< dendl
;
2484 MetaRequest
*request
= mds_requests
.at(tid
);
2486 ldout(cct
, 20) << __func__
<< " got a reply. Safe:" << is_safe
2487 << " tid " << tid
<< dendl
;
2489 if (request
->got_unsafe
&& !is_safe
) {
2490 //duplicate response
2491 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2492 << mds_num
<< " safe:" << is_safe
<< dendl
;
2496 if (-CEPHFS_ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2497 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2498 << " from mds." << request
->mds
<< dendl
;
2499 request
->send_to_auth
= true;
2500 request
->resend_mds
= choose_target_mds(request
);
2501 Inode
*in
= request
->inode();
2502 std::map
<mds_rank_t
, Cap
>::const_iterator it
;
2503 if (request
->resend_mds
>= 0 &&
2504 request
->resend_mds
== request
->mds
&&
2506 (it
= in
->caps
.find(request
->resend_mds
)) != in
->caps
.end() ||
2507 request
->sent_on_mseq
== it
->second
.mseq
)) {
2508 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2510 request
->caller_cond
->notify_all();
2515 ceph_assert(!request
->reply
);
2516 request
->reply
= reply
;
2517 insert_trace(request
, session
);
2519 // Handle unsafe reply
2521 request
->got_unsafe
= true;
2522 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2523 if (is_dir_operation(request
)) {
2524 Inode
*dir
= request
->inode();
2526 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2528 if (request
->target
) {
2529 InodeRef
&in
= request
->target
;
2530 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2534 // Only signal the caller once (on the first reply):
2535 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2536 if (!is_safe
|| !request
->got_unsafe
) {
2537 ceph::condition_variable cond
;
2538 request
->dispatch_cond
= &cond
;
2541 ldout(cct
, 20) << __func__
<< " signalling caller " << (void*)request
->caller_cond
<< dendl
;
2542 request
->caller_cond
->notify_all();
2544 // wake for kick back
2545 std::unique_lock l
{client_lock
, std::adopt_lock
};
2546 cond
.wait(l
, [tid
, request
, &cond
, this] {
2547 if (request
->dispatch_cond
) {
2548 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid "
2549 << tid
<< " " << &cond
<< dendl
;
2551 return !request
->dispatch_cond
;
2557 // the filesystem change is committed to disk
2558 // we're done, clean up
2559 if (request
->got_unsafe
) {
2560 request
->unsafe_item
.remove_myself();
2561 request
->unsafe_dir_item
.remove_myself();
2562 request
->unsafe_target_item
.remove_myself();
2563 signal_cond_list(request
->waitfor_safe
);
2565 request
->item
.remove_myself();
2566 unregister_request(request
);
2568 if (is_unmounting())
2569 mount_cond
.notify_all();
2572 void Client::_handle_full_flag(int64_t pool
)
2574 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2575 << "on " << pool
<< dendl
;
2576 // Cancel all outstanding ops in this pool with -CEPHFS_ENOSPC: it is necessary
2577 // to do this rather than blocking, because otherwise when we fill up we
2578 // potentially lock caps forever on files with dirty pages, and we need
2579 // to be able to release those caps to the MDS so that it can delete files
2580 // and free up space.
2581 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-CEPHFS_ENOSPC
, pool
);
2583 // For all inodes with layouts in this pool and a pending flush write op
2584 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2585 // from ObjectCacher so that it doesn't re-issue the write in response to
2586 // the ENOSPC error.
2587 // Fortunately since we're cancelling everything in a given pool, we don't
2588 // need to know which ops belong to which ObjectSet, we can just blow all
2589 // the un-flushed cached data away and mark any dirty inodes' async_err
2590 // field with -CEPHFS_ENOSPC as long as we're sure all the ops we cancelled were
2591 // affecting this pool, and all the objectsets we're purging were also
2593 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2594 i
!= inode_map
.end(); ++i
)
2596 Inode
*inode
= i
->second
;
2597 if (inode
->oset
.dirty_or_tx
2598 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2599 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2600 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2601 objectcacher
->purge_set(&inode
->oset
);
2602 inode
->set_async_err(-CEPHFS_ENOSPC
);
2606 if (cancelled_epoch
!= (epoch_t
)-1) {
2607 set_cap_epoch_barrier(cancelled_epoch
);
2611 void Client::handle_osd_map(const MConstRef
<MOSDMap
>& m
)
2613 std::set
<entity_addr_t
> new_blocklists
;
2615 std::scoped_lock
cl(client_lock
);
2616 objecter
->consume_blocklist_events(&new_blocklists
);
2618 const auto myaddrs
= messenger
->get_myaddrs();
2619 bool new_blocklist
= false;
2620 bool prenautilus
= objecter
->with_osdmap(
2621 [&](const OSDMap
& o
) {
2622 return o
.require_osd_release
< ceph_release_t::nautilus
;
2625 for (auto a
: myaddrs
.v
) {
2626 // blocklist entries are always TYPE_ANY for nautilus+
2627 a
.set_type(entity_addr_t::TYPE_ANY
);
2628 if (new_blocklists
.count(a
)) {
2629 new_blocklist
= true;
2633 // ...except pre-nautilus, they were TYPE_LEGACY
2634 a
.set_type(entity_addr_t::TYPE_LEGACY
);
2635 if (new_blocklists
.count(a
)) {
2636 new_blocklist
= true;
2642 if (new_blocklist
) {
2643 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2644 return o
.get_epoch();
2646 lderr(cct
) << "I was blocklisted at osd epoch " << epoch
<< dendl
;
2649 _abort_mds_sessions(-CEPHFS_EBLOCKLISTED
);
2651 // Since we know all our OSD ops will fail, cancel them all preemtively,
2652 // so that on an unhealthy cluster we can umount promptly even if e.g.
2653 // some PGs were inaccessible.
2654 objecter
->op_cancel_writes(-CEPHFS_EBLOCKLISTED
);
2659 // Handle case where we were blocklisted but no longer are
2660 blocklisted
= objecter
->with_osdmap([myaddrs
](const OSDMap
&o
){
2661 return o
.is_blocklisted(myaddrs
);});
2664 // Always subscribe to next osdmap for blocklisted client
2665 // until this client is not blocklisted.
2667 objecter
->maybe_request_map();
2670 if (objecter
->osdmap_full_flag()) {
2671 _handle_full_flag(-1);
2673 // Accumulate local list of full pools so that I can drop
2674 // the objecter lock before re-entering objecter in
2676 std::vector
<int64_t> full_pools
;
2678 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2679 for (const auto& kv
: o
.get_pools()) {
2680 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2681 full_pools
.push_back(kv
.first
);
2686 for (auto p
: full_pools
)
2687 _handle_full_flag(p
);
2689 // Subscribe to subsequent maps to watch for the full flag going
2690 // away. For the global full flag objecter does this for us, but
2691 // it pays no attention to the per-pool full flag so in this branch
2692 // we do it ourselves.
2693 if (!full_pools
.empty()) {
2694 objecter
->maybe_request_map();
2700 // ------------------------
2701 // incoming messages
2704 bool Client::ms_dispatch2(const MessageRef
&m
)
2706 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
2707 if (!iref_reader
.is_state_satisfied()) {
2708 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2712 switch (m
->get_type()) {
2713 // mounting and mds sessions
2714 case CEPH_MSG_MDS_MAP
:
2715 handle_mds_map(ref_cast
<MMDSMap
>(m
));
2717 case CEPH_MSG_FS_MAP
:
2718 handle_fs_map(ref_cast
<MFSMap
>(m
));
2720 case CEPH_MSG_FS_MAP_USER
:
2721 handle_fs_map_user(ref_cast
<MFSMapUser
>(m
));
2723 case CEPH_MSG_CLIENT_SESSION
:
2724 handle_client_session(ref_cast
<MClientSession
>(m
));
2727 case CEPH_MSG_OSD_MAP
:
2728 handle_osd_map(ref_cast
<MOSDMap
>(m
));
2732 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2733 handle_client_request_forward(ref_cast
<MClientRequestForward
>(m
));
2735 case CEPH_MSG_CLIENT_REPLY
:
2736 handle_client_reply(ref_cast
<MClientReply
>(m
));
2740 case CEPH_MSG_CLIENT_RECLAIM_REPLY
:
2741 handle_client_reclaim_reply(ref_cast
<MClientReclaimReply
>(m
));
2744 case CEPH_MSG_CLIENT_SNAP
:
2745 handle_snap(ref_cast
<MClientSnap
>(m
));
2747 case CEPH_MSG_CLIENT_CAPS
:
2748 handle_caps(ref_cast
<MClientCaps
>(m
));
2750 case CEPH_MSG_CLIENT_LEASE
:
2751 handle_lease(ref_cast
<MClientLease
>(m
));
2753 case MSG_COMMAND_REPLY
:
2754 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2755 handle_command_reply(ref_cast
<MCommandReply
>(m
));
2760 case CEPH_MSG_CLIENT_QUOTA
:
2761 handle_quota(ref_cast
<MClientQuota
>(m
));
2769 std::scoped_lock
cl(client_lock
);
2770 if (is_unmounting()) {
2771 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2772 << "+" << inode_map
.size() << dendl
;
2773 uint64_t size
= lru
.lru_get_size() + inode_map
.size();
2775 if (size
> lru
.lru_get_size() + inode_map
.size()) {
2776 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2777 mount_cond
.notify_all();
2779 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2780 << "+" << inode_map
.size() << dendl
;
2787 void Client::handle_fs_map(const MConstRef
<MFSMap
>& m
)
2789 std::scoped_lock
cl(client_lock
);
2790 fsmap
.reset(new FSMap(m
->get_fsmap()));
2792 signal_cond_list(waiting_for_fsmap
);
2794 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2797 void Client::handle_fs_map_user(const MConstRef
<MFSMapUser
>& m
)
2799 std::scoped_lock
cl(client_lock
);
2800 fsmap_user
.reset(new FSMapUser
);
2801 *fsmap_user
= m
->get_fsmap();
2803 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2804 signal_cond_list(waiting_for_fsmap
);
2807 // Cancel all the commands for missing or laggy GIDs
2808 void Client::cancel_commands(const MDSMap
& newmap
)
2810 std::vector
<ceph_tid_t
> cancel_ops
;
2812 std::scoped_lock
cmd_lock(command_lock
);
2813 auto &commands
= command_table
.get_commands();
2814 for (const auto &[tid
, op
] : commands
) {
2815 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2816 if (newmap
.is_dne_gid(op_mds_gid
) || newmap
.is_laggy_gid(op_mds_gid
)) {
2817 ldout(cct
, 1) << __func__
<< ": cancelling command op " << tid
<< dendl
;
2818 cancel_ops
.push_back(tid
);
2820 std::ostringstream ss
;
2821 ss
<< "MDS " << op_mds_gid
<< " went away";
2822 *(op
.outs
) = ss
.str();
2825 * No need to make the con->mark_down under
2826 * client_lock here, because the con will
2829 op
.con
->mark_down();
2831 op
.on_finish
->complete(-CEPHFS_ETIMEDOUT
);
2835 for (const auto &tid
: cancel_ops
)
2836 command_table
.erase(tid
);
2839 void Client::handle_mds_map(const MConstRef
<MMDSMap
>& m
)
2841 std::unique_lock
cl(client_lock
);
2842 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2843 ldout(cct
, 1) << __func__
<< " epoch " << m
->get_epoch()
2844 << " is identical to or older than our "
2845 << mdsmap
->get_epoch() << dendl
;
2850 ldout(cct
, 1) << __func__
<< " epoch " << m
->get_epoch() << dendl
;
2851 std::unique_ptr
<MDSMap
> _mdsmap(new MDSMap
);
2852 _mdsmap
->decode(m
->get_encoded());
2853 cancel_commands(*_mdsmap
.get());
2856 _mdsmap
.swap(mdsmap
);
2859 for (auto p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ) {
2860 mds_rank_t mds
= p
->first
;
2861 MetaSession
*session
= &p
->second
;
2864 int oldstate
= _mdsmap
->get_state(mds
);
2865 int newstate
= mdsmap
->get_state(mds
);
2866 if (!mdsmap
->is_up(mds
)) {
2867 session
->con
->mark_down();
2868 } else if (mdsmap
->get_addrs(mds
) != session
->addrs
) {
2869 auto old_inc
= _mdsmap
->get_incarnation(mds
);
2870 auto new_inc
= mdsmap
->get_incarnation(mds
);
2871 if (old_inc
!= new_inc
) {
2872 ldout(cct
, 1) << "mds incarnation changed from "
2873 << old_inc
<< " to " << new_inc
<< dendl
;
2874 oldstate
= MDSMap::STATE_NULL
;
2876 session
->con
->mark_down();
2877 session
->addrs
= mdsmap
->get_addrs(mds
);
2878 // When new MDS starts to take over, notify kernel to trim unused entries
2879 // in its dcache/icache. Hopefully, the kernel will release some unused
2880 // inodes before the new MDS enters reconnect state.
2881 trim_cache_for_reconnect(session
);
2882 } else if (oldstate
== newstate
)
2883 continue; // no change
2885 session
->mds_state
= newstate
;
2886 if (newstate
== MDSMap::STATE_RECONNECT
) {
2887 session
->con
= messenger
->connect_to_mds(session
->addrs
);
2888 send_reconnect(session
);
2889 } else if (newstate
> MDSMap::STATE_RECONNECT
) {
2890 if (oldstate
< MDSMap::STATE_RECONNECT
) {
2891 ldout(cct
, 1) << "we may miss the MDSMap::RECONNECT, close mds session ... " << dendl
;
2892 _closed_mds_session(session
);
2895 if (newstate
>= MDSMap::STATE_ACTIVE
) {
2896 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2897 // kick new requests
2898 kick_requests(session
);
2899 kick_flushing_caps(session
);
2900 signal_context_list(session
->waiting_for_open
);
2901 wake_up_session_caps(session
, true);
2903 connect_mds_targets(mds
);
2905 } else if (newstate
== MDSMap::STATE_NULL
&&
2906 mds
>= mdsmap
->get_max_mds()) {
2907 _closed_mds_session(session
);
2911 // kick any waiting threads
2912 signal_cond_list(waiting_for_mdsmap
);
2914 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2917 void Client::send_reconnect(MetaSession
*session
)
2919 mds_rank_t mds
= session
->mds_num
;
2920 ldout(cct
, 10) << __func__
<< " to mds." << mds
<< dendl
;
2922 // trim unused caps to reduce MDS's cache rejoin time
2923 trim_cache_for_reconnect(session
);
2925 session
->readonly
= false;
2927 session
->release
.reset();
2929 // reset my cap seq number
2931 //connect to the mds' offload targets
2932 connect_mds_targets(mds
);
2933 //make sure unsafe requests get saved
2934 resend_unsafe_requests(session
);
2936 early_kick_flushing_caps(session
);
2938 auto m
= make_message
<MClientReconnect
>();
2939 bool allow_multi
= session
->mds_features
.test(CEPHFS_FEATURE_MULTI_RECONNECT
);
2941 // i have an open session.
2942 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2943 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2944 p
!= inode_map
.end();
2946 Inode
*in
= p
->second
;
2947 auto it
= in
->caps
.find(mds
);
2948 if (it
!= in
->caps
.end()) {
2950 m
->get_approx_size() >=
2951 static_cast<size_t>((std::numeric_limits
<int>::max() >> 1))) {
2953 session
->con
->send_message2(std::move(m
));
2955 m
= make_message
<MClientReconnect
>();
2958 Cap
&cap
= it
->second
;
2959 ldout(cct
, 10) << " caps on " << p
->first
2960 << " " << ccap_string(cap
.issued
)
2961 << " wants " << ccap_string(in
->caps_wanted())
2964 in
->make_short_path(path
);
2965 ldout(cct
, 10) << " path " << path
<< dendl
;
2968 _encode_filelocks(in
, flockbl
);
2970 cap
.seq
= 0; // reset seq.
2971 cap
.issue_seq
= 0; // reset seq.
2972 cap
.mseq
= 0; // reset seq.
2973 // cap gen should catch up with session cap_gen
2974 if (cap
.gen
< session
->cap_gen
) {
2975 cap
.gen
= session
->cap_gen
;
2976 cap
.issued
= cap
.implemented
= CEPH_CAP_PIN
;
2978 cap
.issued
= cap
.implemented
;
2980 snapid_t snap_follows
= 0;
2981 if (!in
->cap_snaps
.empty())
2982 snap_follows
= in
->cap_snaps
.begin()->first
;
2984 m
->add_cap(p
->first
.ino
,
2986 path
.get_ino(), path
.get_path(), // ino
2987 in
->caps_wanted(), // wanted
2988 cap
.issued
, // issued
2993 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2994 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2995 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2996 did_snaprealm
.insert(in
->snaprealm
->ino
);
3002 m
->set_encoding_version(0); // use connection features to choose encoding
3003 session
->con
->send_message2(std::move(m
));
3005 mount_cond
.notify_all();
3007 if (session
->reclaim_state
== MetaSession::RECLAIMING
)
3008 signal_cond_list(waiting_for_reclaim
);
3012 void Client::kick_requests(MetaSession
*session
)
3014 ldout(cct
, 10) << __func__
<< " for mds." << session
->mds_num
<< dendl
;
3015 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
3016 p
!= mds_requests
.end();
3018 MetaRequest
*req
= p
->second
;
3019 if (req
->got_unsafe
)
3021 if (req
->aborted()) {
3022 if (req
->caller_cond
) {
3024 req
->caller_cond
->notify_all();
3028 if (req
->retry_attempt
> 0)
3029 continue; // new requests only
3030 if (req
->mds
== session
->mds_num
) {
3031 send_request(p
->second
, session
);
3036 void Client::resend_unsafe_requests(MetaSession
*session
)
3038 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
3041 send_request(*iter
, session
);
3043 // also re-send old requests when MDS enters reconnect stage. So that MDS can
3044 // process completed requests in clientreplay stage.
3045 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
3046 p
!= mds_requests
.end();
3048 MetaRequest
*req
= p
->second
;
3049 if (req
->got_unsafe
)
3053 if (req
->retry_attempt
== 0)
3054 continue; // old requests only
3055 if (req
->mds
== session
->mds_num
)
3056 send_request(req
, session
, true);
3060 void Client::wait_unsafe_requests()
3062 list
<MetaRequest
*> last_unsafe_reqs
;
3063 for (const auto &p
: mds_sessions
) {
3064 const MetaSession
&s
= p
.second
;
3065 if (!s
.unsafe_requests
.empty()) {
3066 MetaRequest
*req
= s
.unsafe_requests
.back();
3068 last_unsafe_reqs
.push_back(req
);
3072 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
3073 p
!= last_unsafe_reqs
.end();
3075 MetaRequest
*req
= *p
;
3076 if (req
->unsafe_item
.is_on_list())
3077 wait_on_list(req
->waitfor_safe
);
3082 void Client::kick_requests_closed(MetaSession
*session
)
3084 ldout(cct
, 10) << __func__
<< " for mds." << session
->mds_num
<< dendl
;
3085 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
3086 p
!= mds_requests
.end(); ) {
3087 MetaRequest
*req
= p
->second
;
3089 if (req
->mds
== session
->mds_num
) {
3090 if (req
->caller_cond
) {
3092 req
->caller_cond
->notify_all();
3094 req
->item
.remove_myself();
3095 if (req
->got_unsafe
) {
3096 lderr(cct
) << __func__
<< " removing unsafe request " << req
->get_tid() << dendl
;
3097 req
->unsafe_item
.remove_myself();
3098 if (is_dir_operation(req
)) {
3099 Inode
*dir
= req
->inode();
3101 dir
->set_async_err(-CEPHFS_EIO
);
3102 lderr(cct
) << "kick_requests_closed drop req of inode(dir) : "
3103 << dir
->ino
<< " " << req
->get_tid() << dendl
;
3104 req
->unsafe_dir_item
.remove_myself();
3107 InodeRef
&in
= req
->target
;
3108 in
->set_async_err(-CEPHFS_EIO
);
3109 lderr(cct
) << "kick_requests_closed drop req of inode : "
3110 << in
->ino
<< " " << req
->get_tid() << dendl
;
3111 req
->unsafe_target_item
.remove_myself();
3113 signal_cond_list(req
->waitfor_safe
);
3114 unregister_request(req
);
3118 ceph_assert(session
->requests
.empty());
3119 ceph_assert(session
->unsafe_requests
.empty());
3129 void Client::got_mds_push(MetaSession
*s
)
3132 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
3133 if (s
->state
== MetaSession::STATE_CLOSING
) {
3134 s
->con
->send_message2(make_message
<MClientSession
>(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
3138 void Client::handle_lease(const MConstRef
<MClientLease
>& m
)
3140 ldout(cct
, 10) << __func__
<< " " << *m
<< dendl
;
3142 ceph_assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
3143 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
3145 std::scoped_lock
cl(client_lock
);
3146 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
3151 got_mds_push(session
);
3153 ceph_seq_t seq
= m
->get_seq();
3156 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
3157 if (inode_map
.count(vino
) == 0) {
3158 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
3161 in
= inode_map
[vino
];
3163 if (m
->get_mask() & CEPH_LEASE_VALID
) {
3164 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
3165 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
3168 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
3169 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
3175 auto reply
= make_message
<MClientLease
>(CEPH_MDS_LEASE_RELEASE
, seq
,
3176 m
->get_mask(), m
->get_ino(),
3177 m
->get_first(), m
->get_last(), m
->dname
);
3178 m
->get_connection()->send_message2(std::move(reply
));
3182 void Client::_put_inode(Inode
*in
, int n
)
3184 ldout(cct
, 10) << __func__
<< " on " << *in
<< " n = " << n
<< dendl
;
3186 int left
= in
->get_nref();
3187 ceph_assert(left
>= n
+ 1);
3190 if (left
== 1) { // the last one will be held by the inode_map
3192 remove_all_caps(in
);
3194 ldout(cct
, 10) << __func__
<< " deleting " << *in
<< dendl
;
3195 bool unclean
= objectcacher
->release_set(&in
->oset
);
3196 ceph_assert(!unclean
);
3197 inode_map
.erase(in
->vino());
3198 if (use_faked_inos())
3199 _release_faked_ino(in
);
3201 if (root
== nullptr) {
3203 while (!root_parents
.empty())
3204 root_parents
.erase(root_parents
.begin());
3211 void Client::delay_put_inodes(bool wakeup
)
3213 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
3215 std::map
<Inode
*,int> release
;
3217 std::scoped_lock
dl(delay_i_lock
);
3218 release
.swap(delay_i_release
);
3221 if (release
.empty())
3224 for (auto &[in
, cnt
] : release
)
3225 _put_inode(in
, cnt
);
3228 mount_cond
.notify_all();
3231 void Client::put_inode(Inode
*in
, int n
)
3233 ldout(cct
, 20) << __func__
<< " on " << *in
<< " n = " << n
<< dendl
;
3235 std::scoped_lock
dl(delay_i_lock
);
3236 delay_i_release
[in
] += n
;
3239 void Client::close_dir(Dir
*dir
)
3241 Inode
*in
= dir
->parent_inode
;
3242 ldout(cct
, 15) << __func__
<< " dir " << dir
<< " on " << in
<< dendl
;
3243 ceph_assert(dir
->is_empty());
3244 ceph_assert(in
->dir
== dir
);
3245 ceph_assert(in
->dentries
.size() < 2); // dirs can't be hard-linked
3246 if (!in
->dentries
.empty())
3247 in
->get_first_parent()->put(); // unpin dentry
3251 put_inode(in
); // unpin inode
3255 * Don't call this with in==NULL, use get_or_create for that
3256 * leave dn set to default NULL unless you're trying to add
3257 * a new inode to a pre-created Dentry
3259 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
3262 // create a new Dentry
3263 dn
= new Dentry(dir
, name
);
3265 lru
.lru_insert_mid(dn
); // mid or top?
3267 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
3268 << " dn " << dn
<< " (new dn)" << dendl
;
3270 ceph_assert(!dn
->inode
);
3271 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
3272 << " dn " << dn
<< " (old dn)" << dendl
;
3275 if (in
) { // link to inode
3277 // only one parent for directories!
3278 if (in
->is_dir() && !in
->dentries
.empty()) {
3279 tmp_ref
= in
; // prevent unlink below from freeing the inode.
3280 Dentry
*olddn
= in
->get_first_parent();
3281 ceph_assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
3282 Inode
*old_diri
= olddn
->dir
->parent_inode
;
3283 clear_dir_complete_and_ordered(old_diri
, true);
3284 unlink(olddn
, true, true); // keep dir, dentry
3289 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dentries
<< dendl
;
3295 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3297 InodeRef
in(dn
->inode
);
3298 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3299 << " inode " << dn
->inode
<< dendl
;
3301 // unlink from inode
3305 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dentries
<< dendl
;
3311 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3321 if (dir
->is_empty() && !keepdir
)
3327 * For asynchronous flushes, check for errors from the IO and
3328 * update the inode if necessary
3330 class C_Client_FlushComplete
: public Context
{
3335 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3336 void finish(int r
) override
{
3337 ceph_assert(ceph_mutex_is_locked_by_me(client
->client_lock
));
3339 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3340 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3341 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3342 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3343 inode
->set_async_err(r
);
3353 void Client::get_cap_ref(Inode
*in
, int cap
)
3355 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3356 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3357 ldout(cct
, 5) << __func__
<< " got first FILE_BUFFER ref on " << *in
<< dendl
;
3360 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3361 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3362 ldout(cct
, 5) << __func__
<< " got first FILE_CACHE ref on " << *in
<< dendl
;
3365 in
->get_cap_ref(cap
);
3368 void Client::put_cap_ref(Inode
*in
, int cap
)
3370 int last
= in
->put_cap_ref(cap
);
3373 int drop
= last
& ~in
->caps_issued();
3374 if (in
->snapid
== CEPH_NOSNAP
) {
3375 if ((last
& (CEPH_CAP_FILE_WR
| CEPH_CAP_FILE_BUFFER
)) &&
3376 !in
->cap_snaps
.empty() &&
3377 in
->cap_snaps
.rbegin()->second
.writing
) {
3378 ldout(cct
, 10) << __func__
<< " finishing pending cap_snap on " << *in
<< dendl
;
3379 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3380 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3381 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3383 if (last
& CEPH_CAP_FILE_BUFFER
) {
3384 for (auto &p
: in
->cap_snaps
)
3385 p
.second
.dirty_data
= 0;
3386 signal_cond_list(in
->waitfor_commit
);
3387 ldout(cct
, 5) << __func__
<< " dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3391 if (last
& CEPH_CAP_FILE_CACHE
) {
3392 ldout(cct
, 5) << __func__
<< " dropped last FILE_CACHE ref on " << *in
<< dendl
;
3398 put_inode(in
, put_nref
);
3402 // get caps for a given file handle -- the inode should have @need caps
3403 // issued by the mds and @want caps not revoked (or not under revocation).
3404 // this routine blocks till the cap requirement is satisfied. also account
3405 // (track) for capability hit when required (when cap requirement succeedes).
3406 int Client::get_caps(Fh
*fh
, int need
, int want
, int *phave
, loff_t endoff
)
3408 Inode
*in
= fh
->inode
.get();
3410 int r
= check_pool_perm(in
, need
);
3415 int file_wanted
= in
->caps_file_wanted();
3416 if ((file_wanted
& need
) != need
) {
3417 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3418 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3420 return -CEPHFS_EBADF
;
3423 if ((fh
->mode
& CEPH_FILE_MODE_WR
) && fh
->gen
!= fd_gen
)
3424 return -CEPHFS_EBADF
;
3426 if ((in
->flags
& I_ERROR_FILELOCK
) && fh
->has_any_filelocks())
3430 int have
= in
->caps_issued(&implemented
);
3432 bool waitfor_caps
= false;
3433 bool waitfor_commit
= false;
3435 if (have
& need
& CEPH_CAP_FILE_WR
) {
3437 if ((endoff
>= (loff_t
)in
->max_size
||
3438 endoff
> (loff_t
)(in
->size
<< 1)) &&
3439 endoff
> (loff_t
)in
->wanted_max_size
) {
3440 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3441 in
->wanted_max_size
= endoff
;
3443 if (in
->wanted_max_size
> in
->max_size
&&
3444 in
->wanted_max_size
> in
->requested_max_size
)
3448 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3449 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3450 waitfor_caps
= true;
3452 if (!in
->cap_snaps
.empty()) {
3453 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3454 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3455 waitfor_caps
= true;
3457 for (auto &p
: in
->cap_snaps
) {
3458 if (p
.second
.dirty_data
) {
3459 waitfor_commit
= true;
3463 if (waitfor_commit
) {
3464 _flush(in
, new C_Client_FlushComplete(this, in
));
3465 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3470 if (!waitfor_caps
&& !waitfor_commit
) {
3471 if ((have
& need
) == need
) {
3472 int revoking
= implemented
& ~have
;
3473 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3474 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3475 << " revoking " << ccap_string(revoking
)
3477 if ((revoking
& want
) == 0) {
3478 *phave
= need
| (have
& want
);
3479 in
->get_cap_ref(need
);
3484 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3485 waitfor_caps
= true;
3488 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3489 in
->auth_cap
->session
->readonly
)
3490 return -CEPHFS_EROFS
;
3492 if (in
->flags
& I_CAP_DROPPED
) {
3493 int mds_wanted
= in
->caps_mds_wanted();
3494 if ((mds_wanted
& need
) != need
) {
3495 int ret
= _renew_caps(in
);
3500 if (!(file_wanted
& ~mds_wanted
))
3501 in
->flags
&= ~I_CAP_DROPPED
;
3505 wait_on_list(in
->waitfor_caps
);
3506 else if (waitfor_commit
)
3507 wait_on_list(in
->waitfor_commit
);
3511 int Client::get_caps_used(Inode
*in
)
3513 unsigned used
= in
->caps_used();
3514 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3515 !objectcacher
->set_is_empty(&in
->oset
))
3516 used
|= CEPH_CAP_FILE_CACHE
;
3520 void Client::cap_delay_requeue(Inode
*in
)
3522 ldout(cct
, 10) << __func__
<< " on " << *in
<< dendl
;
3523 in
->hold_caps_until
= ceph_clock_now();
3524 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3525 delayed_list
.push_back(&in
->delay_cap_item
);
3528 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3529 int flags
, int used
, int want
, int retain
,
3530 int flush
, ceph_tid_t flush_tid
)
3532 int held
= cap
->issued
| cap
->implemented
;
3533 int revoking
= cap
->implemented
& ~cap
->issued
;
3534 retain
&= ~revoking
;
3535 int dropping
= cap
->issued
& ~retain
;
3536 int op
= CEPH_CAP_OP_UPDATE
;
3538 ldout(cct
, 10) << __func__
<< " " << *in
3539 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3540 << " used " << ccap_string(used
)
3541 << " want " << ccap_string(want
)
3542 << " flush " << ccap_string(flush
)
3543 << " retain " << ccap_string(retain
)
3544 << " held "<< ccap_string(held
)
3545 << " revoking " << ccap_string(revoking
)
3546 << " dropping " << ccap_string(dropping
)
3549 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3550 const int would_have_issued
= cap
->issued
& retain
;
3551 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3553 // - tell the server we think issued is whatever they issued plus whatever we implemented
3554 // - leave what we have implemented in place
3555 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3556 cap
->issued
= cap
->issued
| cap
->implemented
;
3558 // Make an exception for revoking xattr caps: we are injecting
3559 // failure to release other caps, but allow xattr because client
3560 // will block on xattr ops if it can't release these to MDS (#9800)
3561 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3562 cap
->issued
^= xattr_mask
& revoking
;
3563 cap
->implemented
^= xattr_mask
& revoking
;
3565 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3566 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3569 cap
->issued
&= retain
;
3570 cap
->implemented
&= cap
->issued
| used
;
3573 snapid_t follows
= 0;
3576 follows
= in
->snaprealm
->get_snap_context().seq
;
3578 auto m
= make_message
<MClientCaps
>(op
,
3581 cap
->cap_id
, cap
->seq
,
3587 m
->caller_uid
= in
->cap_dirtier_uid
;
3588 m
->caller_gid
= in
->cap_dirtier_gid
;
3590 m
->head
.issue_seq
= cap
->issue_seq
;
3591 m
->set_tid(flush_tid
);
3593 m
->head
.uid
= in
->uid
;
3594 m
->head
.gid
= in
->gid
;
3595 m
->head
.mode
= in
->mode
;
3597 m
->head
.nlink
= in
->nlink
;
3599 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3600 encode(in
->xattrs
, m
->xattrbl
);
3601 m
->head
.xattr_version
= in
->xattr_version
;
3605 m
->max_size
= in
->max_size
;
3606 m
->truncate_seq
= in
->truncate_seq
;
3607 m
->truncate_size
= in
->truncate_size
;
3608 m
->mtime
= in
->mtime
;
3609 m
->atime
= in
->atime
;
3610 m
->ctime
= in
->ctime
;
3611 m
->btime
= in
->btime
;
3612 m
->time_warp_seq
= in
->time_warp_seq
;
3613 m
->change_attr
= in
->change_attr
;
3615 if (!(flags
& MClientCaps::FLAG_PENDING_CAPSNAP
) &&
3616 !in
->cap_snaps
.empty() &&
3617 in
->cap_snaps
.rbegin()->second
.flush_tid
== 0)
3618 flags
|= MClientCaps::FLAG_PENDING_CAPSNAP
;
3621 if (flush
& CEPH_CAP_FILE_WR
) {
3622 m
->inline_version
= in
->inline_version
;
3623 m
->inline_data
= in
->inline_data
;
3626 in
->reported_size
= in
->size
;
3627 m
->set_snap_follows(follows
);
3629 if (cap
== in
->auth_cap
) {
3630 if (want
& CEPH_CAP_ANY_FILE_WR
) {
3631 m
->set_max_size(in
->wanted_max_size
);
3632 in
->requested_max_size
= in
->wanted_max_size
;
3633 ldout(cct
, 15) << "auth cap, requesting max_size " << in
->requested_max_size
<< dendl
;
3635 in
->requested_max_size
= 0;
3636 ldout(cct
, 15) << "auth cap, reset requested_max_size due to not wanting any file write cap" << dendl
;
3640 if (!session
->flushing_caps_tids
.empty())
3641 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3643 session
->con
->send_message2(std::move(m
));
3646 static bool is_max_size_approaching(Inode
*in
)
3648 /* mds will adjust max size according to the reported size */
3649 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3651 if (in
->size
>= in
->max_size
)
3653 /* half of previous max_size increment has been used */
3654 if (in
->max_size
> in
->reported_size
&&
3655 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3660 static int adjust_caps_used_for_lazyio(int used
, int issued
, int implemented
)
3662 if (!(used
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
)))
3664 if (!(implemented
& CEPH_CAP_FILE_LAZYIO
))
3667 if (issued
& CEPH_CAP_FILE_LAZYIO
) {
3668 if (!(issued
& CEPH_CAP_FILE_CACHE
)) {
3669 used
&= ~CEPH_CAP_FILE_CACHE
;
3670 used
|= CEPH_CAP_FILE_LAZYIO
;
3672 if (!(issued
& CEPH_CAP_FILE_BUFFER
)) {
3673 used
&= ~CEPH_CAP_FILE_BUFFER
;
3674 used
|= CEPH_CAP_FILE_LAZYIO
;
3677 if (!(implemented
& CEPH_CAP_FILE_CACHE
)) {
3678 used
&= ~CEPH_CAP_FILE_CACHE
;
3679 used
|= CEPH_CAP_FILE_LAZYIO
;
3681 if (!(implemented
& CEPH_CAP_FILE_BUFFER
)) {
3682 used
&= ~CEPH_CAP_FILE_BUFFER
;
3683 used
|= CEPH_CAP_FILE_LAZYIO
;
3692 * Examine currently used and wanted versus held caps. Release, flush or ack
3693 * revoked caps to the MDS as appropriate.
3695 * @param in the inode to check
3696 * @param flags flags to apply to cap check
3698 void Client::check_caps(Inode
*in
, unsigned flags
)
3700 unsigned wanted
= in
->caps_wanted();
3701 unsigned used
= get_caps_used(in
);
3705 int issued
= in
->caps_issued(&implemented
);
3706 int revoking
= implemented
& ~issued
;
3708 int orig_used
= used
;
3709 used
= adjust_caps_used_for_lazyio(used
, issued
, implemented
);
3711 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3712 if (!is_unmounting() && in
->nlink
> 0) {
3714 retain
|= CEPH_CAP_ANY
;
3715 } else if (in
->is_dir() &&
3716 (issued
& CEPH_CAP_FILE_SHARED
) &&
3717 (in
->flags
& I_COMPLETE
)) {
3718 // we do this here because we don't want to drop to Fs (and then
3719 // drop the Fs if we do a create!) if that alone makes us send lookups
3720 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3721 wanted
= CEPH_CAP_ANY_SHARED
| CEPH_CAP_FILE_EXCL
;
3724 retain
|= CEPH_CAP_ANY_SHARED
;
3725 // keep RD only if we didn't have the file open RW,
3726 // because then the mds would revoke it anyway to
3727 // journal max_size=0.
3728 if (in
->max_size
== 0)
3729 retain
|= CEPH_CAP_ANY_RD
;
3733 ldout(cct
, 10) << __func__
<< " on " << *in
3734 << " wanted " << ccap_string(wanted
)
3735 << " used " << ccap_string(used
)
3736 << " issued " << ccap_string(issued
)
3737 << " revoking " << ccap_string(revoking
)
3738 << " flags=" << flags
3741 if (in
->snapid
!= CEPH_NOSNAP
)
3742 return; //snap caps last forever, can't write
3744 if (in
->caps
.empty())
3745 return; // guard if at end of func
3747 if (!(orig_used
& CEPH_CAP_FILE_BUFFER
) &&
3748 (revoking
& used
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
))) {
3750 used
&= ~(CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
);
3754 for (auto &p
: in
->caps
) {
3755 mds_rank_t mds
= p
.first
;
3756 Cap
&cap
= p
.second
;
3758 MetaSession
*session
= &mds_sessions
.at(mds
);
3761 if (in
->auth_cap
&& &cap
!= in
->auth_cap
)
3762 cap_used
&= ~in
->auth_cap
->issued
;
3764 revoking
= cap
.implemented
& ~cap
.issued
;
3766 ldout(cct
, 10) << " cap mds." << mds
3767 << " issued " << ccap_string(cap
.issued
)
3768 << " implemented " << ccap_string(cap
.implemented
)
3769 << " revoking " << ccap_string(revoking
) << dendl
;
3771 if (in
->wanted_max_size
> in
->max_size
&&
3772 in
->wanted_max_size
> in
->requested_max_size
&&
3773 &cap
== in
->auth_cap
)
3776 /* approaching file_max? */
3777 if ((cap
.issued
& CEPH_CAP_FILE_WR
) &&
3778 &cap
== in
->auth_cap
&&
3779 is_max_size_approaching(in
)) {
3780 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3781 << ", reported " << in
->reported_size
<< dendl
;
3785 /* completed revocation? */
3786 if (revoking
&& (revoking
& cap_used
) == 0) {
3787 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
.implemented
& ~cap
.issued
) << dendl
;
3791 /* want more caps from mds? */
3792 if (wanted
& ~(cap
.wanted
| cap
.issued
))
3795 if (!revoking
&& is_unmounting() && (cap_used
== 0))
3798 if ((cap
.issued
& ~retain
) == 0 && // and we don't have anything we wouldn't like
3799 !in
->dirty_caps
) // and we have no dirty caps
3802 if (!(flags
& CHECK_CAPS_NODELAY
)) {
3803 ldout(cct
, 10) << "delaying cap release" << dendl
;
3804 cap_delay_requeue(in
);
3809 if (&cap
== in
->auth_cap
) {
3810 if (in
->flags
& I_KICK_FLUSH
) {
3811 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3812 << " to mds." << mds
<< dendl
;
3813 kick_flushing_caps(in
, session
);
3815 if (!in
->cap_snaps
.empty() &&
3816 in
->cap_snaps
.rbegin()->second
.flush_tid
== 0)
3822 ceph_tid_t flush_tid
;
3823 if (in
->auth_cap
== &cap
&& in
->dirty_caps
) {
3824 flushing
= mark_caps_flushing(in
, &flush_tid
);
3825 if (flags
& CHECK_CAPS_SYNCHRONOUS
)
3826 msg_flags
|= MClientCaps::FLAG_SYNC
;
3832 send_cap(in
, session
, &cap
, msg_flags
, cap_used
, wanted
, retain
,
3833 flushing
, flush_tid
);
3838 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3840 int used
= get_caps_used(in
);
3841 int dirty
= in
->caps_dirty();
3842 ldout(cct
, 10) << __func__
<< " " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3844 if (in
->cap_snaps
.size() &&
3845 in
->cap_snaps
.rbegin()->second
.writing
) {
3846 ldout(cct
, 10) << __func__
<< " already have pending cap_snap on " << *in
<< dendl
;
3848 } else if (in
->caps_dirty() ||
3849 (used
& CEPH_CAP_FILE_WR
) ||
3850 (dirty
& CEPH_CAP_ANY_WR
)) {
3851 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3852 ceph_assert(capsnapem
.second
); /* element inserted */
3853 CapSnap
&capsnap
= capsnapem
.first
->second
;
3854 capsnap
.context
= old_snapc
;
3855 capsnap
.issued
= in
->caps_issued();
3856 capsnap
.dirty
= in
->caps_dirty();
3858 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3860 capsnap
.uid
= in
->uid
;
3861 capsnap
.gid
= in
->gid
;
3862 capsnap
.mode
= in
->mode
;
3863 capsnap
.btime
= in
->btime
;
3864 capsnap
.xattrs
= in
->xattrs
;
3865 capsnap
.xattr_version
= in
->xattr_version
;
3866 capsnap
.cap_dirtier_uid
= in
->cap_dirtier_uid
;
3867 capsnap
.cap_dirtier_gid
= in
->cap_dirtier_gid
;
3869 if (used
& CEPH_CAP_FILE_WR
) {
3870 ldout(cct
, 10) << __func__
<< " WR used on " << *in
<< dendl
;
3871 capsnap
.writing
= 1;
3873 finish_cap_snap(in
, capsnap
, used
);
3876 ldout(cct
, 10) << __func__
<< " not dirty|writing on " << *in
<< dendl
;
3880 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3882 ldout(cct
, 10) << __func__
<< " " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3883 capsnap
.size
= in
->size
;
3884 capsnap
.mtime
= in
->mtime
;
3885 capsnap
.atime
= in
->atime
;
3886 capsnap
.ctime
= in
->ctime
;
3887 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3888 capsnap
.change_attr
= in
->change_attr
;
3889 capsnap
.dirty
|= in
->caps_dirty();
3891 /* Only reset it if it wasn't set before */
3892 if (capsnap
.cap_dirtier_uid
== -1) {
3893 capsnap
.cap_dirtier_uid
= in
->cap_dirtier_uid
;
3894 capsnap
.cap_dirtier_gid
= in
->cap_dirtier_gid
;
3897 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3898 capsnap
.inline_data
= in
->inline_data
;
3899 capsnap
.inline_version
= in
->inline_version
;
3902 if (used
& CEPH_CAP_FILE_BUFFER
) {
3903 capsnap
.writing
= 1;
3904 ldout(cct
, 10) << __func__
<< " " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3905 << " WRBUFFER, delaying" << dendl
;
3907 capsnap
.dirty_data
= 0;
3912 void Client::send_flush_snap(Inode
*in
, MetaSession
*session
,
3913 snapid_t follows
, CapSnap
& capsnap
)
3915 auto m
= make_message
<MClientCaps
>(CEPH_CAP_OP_FLUSHSNAP
,
3916 in
->ino
, in
->snaprealm
->ino
, 0,
3917 in
->auth_cap
->mseq
, cap_epoch_barrier
);
3918 m
->caller_uid
= capsnap
.cap_dirtier_uid
;
3919 m
->caller_gid
= capsnap
.cap_dirtier_gid
;
3921 m
->set_client_tid(capsnap
.flush_tid
);
3922 m
->head
.snap_follows
= follows
;
3924 m
->head
.caps
= capsnap
.issued
;
3925 m
->head
.dirty
= capsnap
.dirty
;
3927 m
->head
.uid
= capsnap
.uid
;
3928 m
->head
.gid
= capsnap
.gid
;
3929 m
->head
.mode
= capsnap
.mode
;
3930 m
->btime
= capsnap
.btime
;
3932 m
->size
= capsnap
.size
;
3934 m
->head
.xattr_version
= capsnap
.xattr_version
;
3935 encode(capsnap
.xattrs
, m
->xattrbl
);
3937 m
->ctime
= capsnap
.ctime
;
3938 m
->btime
= capsnap
.btime
;
3939 m
->mtime
= capsnap
.mtime
;
3940 m
->atime
= capsnap
.atime
;
3941 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3942 m
->change_attr
= capsnap
.change_attr
;
3944 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3945 m
->inline_version
= in
->inline_version
;
3946 m
->inline_data
= in
->inline_data
;
3949 ceph_assert(!session
->flushing_caps_tids
.empty());
3950 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3952 session
->con
->send_message2(std::move(m
));
3955 void Client::flush_snaps(Inode
*in
)
3957 ldout(cct
, 10) << "flush_snaps on " << *in
<< dendl
;
3958 ceph_assert(in
->cap_snaps
.size());
3961 ceph_assert(in
->auth_cap
);
3962 MetaSession
*session
= in
->auth_cap
->session
;
3964 for (auto &p
: in
->cap_snaps
) {
3965 CapSnap
&capsnap
= p
.second
;
3966 // only do new flush
3967 if (capsnap
.flush_tid
> 0)
3970 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3971 << " follows " << p
.first
3972 << " size " << capsnap
.size
3973 << " mtime " << capsnap
.mtime
3974 << " dirty_data=" << capsnap
.dirty_data
3975 << " writing=" << capsnap
.writing
3976 << " on " << *in
<< dendl
;
3977 if (capsnap
.dirty_data
|| capsnap
.writing
)
3980 capsnap
.flush_tid
= ++last_flush_tid
;
3981 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3982 in
->flushing_cap_tids
[capsnap
.flush_tid
] = 0;
3983 if (!in
->flushing_cap_item
.is_on_list())
3984 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3986 send_flush_snap(in
, session
, p
.first
, capsnap
);
3990 void Client::wait_on_list(list
<ceph::condition_variable
*>& ls
)
3992 ceph::condition_variable cond
;
3993 ls
.push_back(&cond
);
3994 std::unique_lock l
{client_lock
, std::adopt_lock
};
4000 void Client::signal_cond_list(list
<ceph::condition_variable
*>& ls
)
4002 for (auto cond
: ls
) {
4007 void Client::wait_on_context_list(list
<Context
*>& ls
)
4009 ceph::condition_variable cond
;
4012 ls
.push_back(new C_Cond(cond
, &done
, &r
));
4013 std::unique_lock l
{client_lock
, std::adopt_lock
};
4014 cond
.wait(l
, [&done
] { return done
;});
4018 void Client::signal_context_list(list
<Context
*>& ls
)
4020 while (!ls
.empty()) {
4021 ls
.front()->complete(0);
4026 void Client::wake_up_session_caps(MetaSession
*s
, bool reconnect
)
4028 for (const auto &cap
: s
->caps
) {
4029 auto &in
= cap
->inode
;
4031 in
.requested_max_size
= 0;
4032 in
.wanted_max_size
= 0;
4034 if (cap
->gen
< s
->cap_gen
) {
4035 // mds did not re-issue stale cap.
4036 cap
->issued
= cap
->implemented
= CEPH_CAP_PIN
;
4037 // make sure mds knows what we want.
4038 if (in
.caps_file_wanted() & ~cap
->wanted
)
4039 in
.flags
|= I_CAP_DROPPED
;
4042 signal_cond_list(in
.waitfor_caps
);
4047 // flush dirty data (from objectcache)
4049 class C_Client_CacheInvalidate
: public Context
{
4053 int64_t offset
, length
;
4055 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
4056 client(c
), offset(off
), length(len
) {
4057 if (client
->use_faked_inos())
4058 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
4062 void finish(int r
) override
{
4063 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
4064 ceph_assert(ceph_mutex_is_not_locked_by_me(client
->client_lock
));
4065 client
->_async_invalidate(ino
, offset
, length
);
4069 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
4071 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
4072 if (!mref_reader
.is_state_satisfied())
4075 ldout(cct
, 10) << __func__
<< " " << ino
<< " " << off
<< "~" << len
<< dendl
;
4076 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
4079 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
4081 if (ino_invalidate_cb
)
4082 // we queue the invalidate, which calls the callback and decrements the ref
4083 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
4086 void Client::_invalidate_inode_cache(Inode
*in
)
4088 ldout(cct
, 10) << __func__
<< " " << *in
<< dendl
;
4090 // invalidate our userspace inode cache
4091 if (cct
->_conf
->client_oc
) {
4092 objectcacher
->release_set(&in
->oset
);
4093 if (!objectcacher
->set_is_empty(&in
->oset
))
4094 lderr(cct
) << "failed to invalidate cache for " << *in
<< dendl
;
4097 _schedule_invalidate_callback(in
, 0, 0);
4100 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
4102 ldout(cct
, 10) << __func__
<< " " << *in
<< " " << off
<< "~" << len
<< dendl
;
4104 // invalidate our userspace inode cache
4105 if (cct
->_conf
->client_oc
) {
4106 vector
<ObjectExtent
> ls
;
4107 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
4108 objectcacher
->discard_writeback(&in
->oset
, ls
, nullptr);
4111 _schedule_invalidate_callback(in
, off
, len
);
4114 bool Client::_release(Inode
*in
)
4116 ldout(cct
, 20) << "_release " << *in
<< dendl
;
4117 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
4118 _invalidate_inode_cache(in
);
4124 bool Client::_flush(Inode
*in
, Context
*onfinish
)
4126 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
4128 if (!in
->oset
.dirty_or_tx
) {
4129 ldout(cct
, 10) << " nothing to flush" << dendl
;
4130 onfinish
->complete(0);
4134 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
4135 ldout(cct
, 8) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
4136 objectcacher
->purge_set(&in
->oset
);
4138 onfinish
->complete(-CEPHFS_ENOSPC
);
4143 return objectcacher
->flush_set(&in
->oset
, onfinish
);
4146 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
4148 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
4149 if (!in
->oset
.dirty_or_tx
) {
4150 ldout(cct
, 10) << " nothing to flush" << dendl
;
4154 C_SaferCond
onflush("Client::_flush_range flock");
4155 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
4156 offset
, size
, &onflush
);
4159 client_lock
.unlock();
4165 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
4167 // std::scoped_lock l(client_lock);
4168 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
)); // will be called via dispatch() -> objecter -> ...
4169 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
4174 void Client::_flushed(Inode
*in
)
4176 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
4178 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
4183 // checks common to add_update_cap, handle_cap_grant
4184 void Client::check_cap_issue(Inode
*in
, unsigned issued
)
4186 unsigned had
= in
->caps_issued();
4188 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
4189 !(had
& CEPH_CAP_FILE_CACHE
))
4192 if ((issued
& CEPH_CAP_FILE_SHARED
) !=
4193 (had
& CEPH_CAP_FILE_SHARED
)) {
4194 if (issued
& CEPH_CAP_FILE_SHARED
)
4197 clear_dir_complete_and_ordered(in
, true);
4201 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
4202 unsigned issued
, unsigned wanted
, unsigned seq
, unsigned mseq
,
4203 inodeno_t realm
, int flags
, const UserPerm
& cap_perms
)
4205 if (!in
->is_any_caps()) {
4206 ceph_assert(in
->snaprealm
== 0);
4207 in
->snaprealm
= get_snap_realm(realm
);
4208 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4209 ldout(cct
, 15) << __func__
<< " first one, opened snaprealm " << in
->snaprealm
<< dendl
;
4211 ceph_assert(in
->snaprealm
);
4212 if ((flags
& CEPH_CAP_FLAG_AUTH
) &&
4213 realm
!= inodeno_t(-1) && in
->snaprealm
->ino
!= realm
) {
4214 in
->snaprealm_item
.remove_myself();
4215 auto oldrealm
= in
->snaprealm
;
4216 in
->snaprealm
= get_snap_realm(realm
);
4217 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4218 put_snap_realm(oldrealm
);
4222 mds_rank_t mds
= mds_session
->mds_num
;
4223 const auto &capem
= in
->caps
.emplace(std::piecewise_construct
, std::forward_as_tuple(mds
), std::forward_as_tuple(*in
, mds_session
));
4224 Cap
&cap
= capem
.first
->second
;
4225 if (!capem
.second
) {
4226 if (cap
.gen
< mds_session
->cap_gen
)
4227 cap
.issued
= cap
.implemented
= CEPH_CAP_PIN
;
4230 * auth mds of the inode changed. we received the cap export
4231 * message, but still haven't received the cap import message.
4232 * handle_cap_export() updated the new auth MDS' cap.
4234 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
4235 * a message that was send before the cap import message. So
4236 * don't remove caps.
4238 if (ceph_seq_cmp(seq
, cap
.seq
) <= 0) {
4239 if (&cap
!= in
->auth_cap
)
4240 ldout(cct
, 0) << "WARNING: " << "inode " << *in
<< " caps on mds." << mds
<< " != auth_cap." << dendl
;
4242 ceph_assert(cap
.cap_id
== cap_id
);
4245 issued
|= cap
.issued
;
4246 flags
|= CEPH_CAP_FLAG_AUTH
;
4252 check_cap_issue(in
, issued
);
4254 if (flags
& CEPH_CAP_FLAG_AUTH
) {
4255 if (in
->auth_cap
!= &cap
&&
4256 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
4257 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
4258 ldout(cct
, 10) << __func__
<< " changing auth cap: "
4259 << "add myself to new auth MDS' flushing caps list" << dendl
;
4260 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
4262 in
->auth_cap
= &cap
;
4266 unsigned old_caps
= cap
.issued
;
4267 cap
.cap_id
= cap_id
;
4268 cap
.issued
= issued
;
4269 cap
.implemented
|= issued
;
4270 if (ceph_seq_cmp(mseq
, cap
.mseq
) > 0)
4271 cap
.wanted
= wanted
;
4273 cap
.wanted
|= wanted
;
4275 cap
.issue_seq
= seq
;
4277 cap
.gen
= mds_session
->cap_gen
;
4278 cap
.latest_perms
= cap_perms
;
4279 ldout(cct
, 10) << __func__
<< " issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
.issued
)
4280 << " from mds." << mds
4284 if ((issued
& ~old_caps
) && in
->auth_cap
== &cap
) {
4285 // non-auth MDS is revoking the newly grant caps ?
4286 for (auto &p
: in
->caps
) {
4287 if (&p
.second
== &cap
)
4289 if (p
.second
.implemented
& ~p
.second
.issued
& issued
) {
4290 check_caps(in
, CHECK_CAPS_NODELAY
);
4296 if (issued
& ~old_caps
)
4297 signal_cond_list(in
->waitfor_caps
);
4300 void Client::remove_cap(Cap
*cap
, bool queue_release
)
4302 auto &in
= cap
->inode
;
4303 MetaSession
*session
= cap
->session
;
4304 mds_rank_t mds
= cap
->session
->mds_num
;
4306 ldout(cct
, 10) << __func__
<< " mds." << mds
<< " on " << in
<< dendl
;
4308 if (queue_release
) {
4309 session
->enqueue_cap_release(
4320 if (in
.auth_cap
== cap
) {
4321 if (in
.flushing_cap_item
.is_on_list()) {
4322 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
4323 in
.flushing_cap_item
.remove_myself();
4327 size_t n
= in
.caps
.erase(mds
);
4328 ceph_assert(n
== 1);
4331 if (!in
.is_any_caps()) {
4332 ldout(cct
, 15) << __func__
<< " last one, closing snaprealm " << in
.snaprealm
<< dendl
;
4333 in
.snaprealm_item
.remove_myself();
4334 put_snap_realm(in
.snaprealm
);
4339 void Client::remove_all_caps(Inode
*in
)
4341 while (!in
->caps
.empty())
4342 remove_cap(&in
->caps
.begin()->second
, true);
4345 void Client::remove_session_caps(MetaSession
*s
, int err
)
4347 ldout(cct
, 10) << __func__
<< " mds." << s
->mds_num
<< dendl
;
4349 while (s
->caps
.size()) {
4350 Cap
*cap
= *s
->caps
.begin();
4351 InodeRef
in(&cap
->inode
);
4352 bool dirty_caps
= false;
4353 if (in
->auth_cap
== cap
) {
4354 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4355 in
->wanted_max_size
= 0;
4356 in
->requested_max_size
= 0;
4357 if (in
->has_any_filelocks())
4358 in
->flags
|= I_ERROR_FILELOCK
;
4360 auto caps
= cap
->implemented
;
4361 if (cap
->wanted
| cap
->issued
)
4362 in
->flags
|= I_CAP_DROPPED
;
4363 remove_cap(cap
, false);
4364 in
->cap_snaps
.clear();
4366 lderr(cct
) << __func__
<< " still has dirty|flushing caps on " << *in
<< dendl
;
4367 if (in
->flushing_caps
) {
4368 num_flushing_caps
--;
4369 in
->flushing_cap_tids
.clear();
4371 in
->flushing_caps
= 0;
4372 in
->mark_caps_clean();
4373 put_inode(in
.get());
4375 caps
&= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
;
4376 if (caps
&& !in
->caps_issued_mask(caps
, true)) {
4377 if (err
== -CEPHFS_EBLOCKLISTED
) {
4378 if (in
->oset
.dirty_or_tx
) {
4379 lderr(cct
) << __func__
<< " still has dirty data on " << *in
<< dendl
;
4380 in
->set_async_err(err
);
4382 objectcacher
->purge_set(&in
->oset
);
4384 objectcacher
->release_set(&in
->oset
);
4386 _schedule_invalidate_callback(in
.get(), 0, 0);
4389 signal_cond_list(in
->waitfor_caps
);
4391 s
->flushing_caps_tids
.clear();
4392 sync_cond
.notify_all();
4395 int Client::_do_remount(bool retry_on_error
)
4397 uint64_t max_retries
= cct
->_conf
.get_val
<uint64_t>("mds_max_retries_on_remount_failure");
4400 int r
= remount_cb(callback_handle
);
4402 retries_on_invalidate
= 0;
4405 client_t whoami
= get_nodeid();
4408 "failed to remount (to trim kernel dentries): "
4409 "errno = " << e
<< " (" << strerror(e
) << ")" << dendl
;
4412 "failed to remount (to trim kernel dentries): "
4413 "return code = " << r
<< dendl
;
4416 (cct
->_conf
.get_val
<bool>("client_die_on_failed_remount") ||
4417 cct
->_conf
.get_val
<bool>("client_die_on_failed_dentry_invalidate")) &&
4418 !(retry_on_error
&& (++retries_on_invalidate
< max_retries
));
4419 if (should_abort
&& !is_unmounting()) {
4420 lderr(cct
) << "failed to remount for kernel dentry trimming; quitting!" << dendl
;
4427 class C_Client_Remount
: public Context
{
4431 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4432 void finish(int r
) override
{
4433 ceph_assert(r
== 0);
4434 client
->_do_remount(true);
4438 void Client::_invalidate_kernel_dcache()
4440 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
4441 if (!mref_reader
.is_state_satisfied())
4444 if (can_invalidate_dentries
) {
4445 if (dentry_invalidate_cb
&& root
->dir
) {
4446 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4447 p
!= root
->dir
->dentries
.end();
4449 if (p
->second
->inode
)
4450 _schedule_invalidate_dentry_callback(p
->second
, false);
4453 } else if (remount_cb
) {
4455 // when remounting a file system, linux kernel trims all unused dentries in the fs
4456 remount_finisher
.queue(new C_Client_Remount(this));
4460 void Client::_trim_negative_child_dentries(InodeRef
& in
)
4466 if (dir
&& dir
->dentries
.size() == dir
->num_null_dentries
) {
4467 for (auto p
= dir
->dentries
.begin(); p
!= dir
->dentries
.end(); ) {
4468 Dentry
*dn
= p
->second
;
4470 ceph_assert(!dn
->inode
);
4471 if (dn
->lru_is_expireable())
4472 unlink(dn
, true, false); // keep dir, drop dentry
4474 if (dir
->dentries
.empty()) {
4479 if (in
->flags
& I_SNAPDIR_OPEN
) {
4480 InodeRef snapdir
= open_snapdir(in
.get());
4481 _trim_negative_child_dentries(snapdir
);
4485 class C_Client_CacheRelease
: public Context
{
4490 C_Client_CacheRelease(Client
*c
, Inode
*in
) :
4492 if (client
->use_faked_inos())
4493 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
4497 void finish(int r
) override
{
4498 ceph_assert(ceph_mutex_is_not_locked_by_me(client
->client_lock
));
4499 client
->_async_inode_release(ino
);
4503 void Client::_async_inode_release(vinodeno_t ino
)
4505 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
4506 if (!mref_reader
.is_state_satisfied())
4509 ldout(cct
, 10) << __func__
<< " " << ino
<< dendl
;
4510 ino_release_cb(callback_handle
, ino
);
4513 void Client::_schedule_ino_release_callback(Inode
*in
) {
4516 // we queue the invalidate, which calls the callback and decrements the ref
4517 async_ino_releasor
.queue(new C_Client_CacheRelease(this, in
));
4520 void Client::trim_caps(MetaSession
*s
, uint64_t max
)
4522 mds_rank_t mds
= s
->mds_num
;
4523 size_t caps_size
= s
->caps
.size();
4524 ldout(cct
, 10) << __func__
<< " mds." << mds
<< " max " << max
4525 << " caps " << caps_size
<< dendl
;
4527 uint64_t trimmed
= 0;
4528 auto p
= s
->caps
.begin();
4529 std::set
<Dentry
*> to_trim
; /* this avoids caps other than the one we're
4530 * looking at from getting deleted during traversal. */
4531 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4533 InodeRef
in(&cap
->inode
);
4535 // Increment p early because it will be invalidated if cap
4536 // is deleted inside remove_cap
4539 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4540 int mine
= cap
->issued
| cap
->implemented
;
4541 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4542 // disposable non-auth cap
4543 if (!(get_caps_used(in
.get()) & ~oissued
& mine
)) {
4544 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4545 cap
= (remove_cap(cap
, true), nullptr);
4549 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4550 _trim_negative_child_dentries(in
);
4552 auto q
= in
->dentries
.begin();
4553 while (q
!= in
->dentries
.end()) {
4556 if (dn
->lru_is_expireable()) {
4557 if (can_invalidate_dentries
&&
4558 dn
->dir
->parent_inode
->ino
== CEPH_INO_ROOT
) {
4559 // Only issue one of these per DN for inodes in root: handle
4560 // others more efficiently by calling for root-child DNs at
4561 // the end of this function.
4562 _schedule_invalidate_dentry_callback(dn
, true);
4564 ldout(cct
, 20) << " queueing dentry for trimming: " << dn
->name
<< dendl
;
4567 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4571 if (in
->ll_ref
== 1 && in
->ino
!= CEPH_INO_ROOT
) {
4572 _schedule_ino_release_callback(in
.get());
4574 if (all
&& in
->ino
!= CEPH_INO_ROOT
) {
4575 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4580 ldout(cct
, 20) << " trimming queued dentries: " << dendl
;
4581 for (const auto &dn
: to_trim
) {
4586 caps_size
= s
->caps
.size();
4587 if (caps_size
> (size_t)max
)
4588 _invalidate_kernel_dcache();
4591 void Client::force_session_readonly(MetaSession
*s
)
4594 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4595 auto &in
= (*p
)->inode
;
4596 if (in
.caps_wanted() & CEPH_CAP_FILE_WR
)
4597 signal_cond_list(in
.waitfor_caps
);
4601 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4603 MetaSession
*session
= in
->auth_cap
->session
;
4605 int flushing
= in
->dirty_caps
;
4606 ceph_assert(flushing
);
4608 ceph_tid_t flush_tid
= ++last_flush_tid
;
4609 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4611 if (!in
->flushing_caps
) {
4612 ldout(cct
, 10) << __func__
<< " " << ccap_string(flushing
) << " " << *in
<< dendl
;
4613 num_flushing_caps
++;
4615 ldout(cct
, 10) << __func__
<< " (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4618 in
->flushing_caps
|= flushing
;
4619 in
->mark_caps_clean();
4621 if (!in
->flushing_cap_item
.is_on_list())
4622 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4623 session
->flushing_caps_tids
.insert(flush_tid
);
4629 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4631 for (auto &p
: in
->cap_snaps
) {
4632 CapSnap
&capsnap
= p
.second
;
4633 if (capsnap
.flush_tid
> 0) {
4634 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4635 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4638 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4639 it
!= in
->flushing_cap_tids
.end();
4641 old_s
->flushing_caps_tids
.erase(it
->first
);
4642 new_s
->flushing_caps_tids
.insert(it
->first
);
4644 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4648 * Flush all caps back to the MDS. Because the callers generally wait on the
4649 * result of this function (syncfs and umount cases), we set
4650 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4652 void Client::flush_caps_sync()
4654 ldout(cct
, 10) << __func__
<< dendl
;
4655 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
4657 unsigned flags
= CHECK_CAPS_NODELAY
;
4661 delayed_list
.pop_front();
4662 if (p
.end() && dirty_list
.empty())
4663 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4664 check_caps(in
, flags
);
4668 p
= dirty_list
.begin();
4670 unsigned flags
= CHECK_CAPS_NODELAY
;
4675 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4676 check_caps(in
, flags
);
4680 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4682 while (in
->flushing_caps
) {
4683 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4684 ceph_assert(it
!= in
->flushing_cap_tids
.end());
4685 if (it
->first
> want
)
4687 ldout(cct
, 10) << __func__
<< " on " << *in
<< " flushing "
4688 << ccap_string(it
->second
) << " want " << want
4689 << " last " << it
->first
<< dendl
;
4690 wait_on_list(in
->waitfor_caps
);
4694 void Client::wait_sync_caps(ceph_tid_t want
)
4697 ldout(cct
, 10) << __func__
<< " want " << want
<< " (last is " << last_flush_tid
<< ", "
4698 << num_flushing_caps
<< " total flushing)" << dendl
;
4699 for (auto &p
: mds_sessions
) {
4700 MetaSession
*s
= &p
.second
;
4701 if (s
->flushing_caps_tids
.empty())
4703 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4704 if (oldest_tid
<= want
) {
4705 ldout(cct
, 10) << " waiting on mds." << p
.first
<< " tid " << oldest_tid
4706 << " (want " << want
<< ")" << dendl
;
4707 std::unique_lock l
{client_lock
, std::adopt_lock
};
4715 void Client::kick_flushing_caps(Inode
*in
, MetaSession
*session
)
4717 in
->flags
&= ~I_KICK_FLUSH
;
4719 Cap
*cap
= in
->auth_cap
;
4720 ceph_assert(cap
->session
== session
);
4722 ceph_tid_t last_snap_flush
= 0;
4723 for (auto p
= in
->flushing_cap_tids
.rbegin();
4724 p
!= in
->flushing_cap_tids
.rend();
4727 last_snap_flush
= p
->first
;
4732 int wanted
= in
->caps_wanted();
4733 int used
= get_caps_used(in
) | in
->caps_dirty();
4734 auto it
= in
->cap_snaps
.begin();
4735 for (auto& p
: in
->flushing_cap_tids
) {
4737 int msg_flags
= p
.first
< last_snap_flush
? MClientCaps::FLAG_PENDING_CAPSNAP
: 0;
4738 send_cap(in
, session
, cap
, msg_flags
, used
, wanted
, (cap
->issued
| cap
->implemented
),
4741 ceph_assert(it
!= in
->cap_snaps
.end());
4742 ceph_assert(it
->second
.flush_tid
== p
.first
);
4743 send_flush_snap(in
, session
, it
->first
, it
->second
);
4749 void Client::kick_flushing_caps(MetaSession
*session
)
4751 mds_rank_t mds
= session
->mds_num
;
4752 ldout(cct
, 10) << __func__
<< " mds." << mds
<< dendl
;
4754 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4756 if (in
->flags
& I_KICK_FLUSH
) {
4757 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4758 kick_flushing_caps(in
, session
);
4763 void Client::early_kick_flushing_caps(MetaSession
*session
)
4765 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4767 Cap
*cap
= in
->auth_cap
;
4770 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4771 // stage. This guarantees that MDS processes the cap flush message before issuing
4772 // the flushing caps to other client.
4773 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
) {
4774 in
->flags
|= I_KICK_FLUSH
;
4778 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4779 << " to mds." << session
->mds_num
<< dendl
;
4780 // send_reconnect() also will reset these sequence numbers. make sure
4781 // sequence numbers in cap flush message match later reconnect message.
4785 cap
->issued
= cap
->implemented
;
4787 kick_flushing_caps(in
, session
);
4791 void SnapRealm::build_snap_context()
4793 set
<snapid_t
> snaps
;
4794 snapid_t max_seq
= seq
;
4796 // start with prior_parents?
4797 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4798 snaps
.insert(prior_parent_snaps
[i
]);
4800 // current parent's snaps
4802 const SnapContext
& psnapc
= pparent
->get_snap_context();
4803 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4804 if (psnapc
.snaps
[i
] >= parent_since
)
4805 snaps
.insert(psnapc
.snaps
[i
]);
4806 if (psnapc
.seq
> max_seq
)
4807 max_seq
= psnapc
.seq
;
4811 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4812 snaps
.insert(my_snaps
[i
]);
4815 cached_snap_context
.seq
= max_seq
;
4816 cached_snap_context
.snaps
.resize(0);
4817 cached_snap_context
.snaps
.reserve(snaps
.size());
4818 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4819 cached_snap_context
.snaps
.push_back(*p
);
4822 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4827 while (!q
.empty()) {
4831 ldout(cct
, 10) << __func__
<< " " << *realm
<< dendl
;
4832 realm
->invalidate_cache();
4834 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4835 p
!= realm
->pchildren
.end();
4841 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4843 SnapRealm
*realm
= snap_realms
[r
];
4845 snap_realms
[r
] = realm
= new SnapRealm(r
);
4846 ldout(cct
, 20) << __func__
<< " " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4851 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4853 if (snap_realms
.count(r
) == 0) {
4854 ldout(cct
, 20) << __func__
<< " " << r
<< " fail" << dendl
;
4857 SnapRealm
*realm
= snap_realms
[r
];
4858 ldout(cct
, 20) << __func__
<< " " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4863 void Client::put_snap_realm(SnapRealm
*realm
)
4865 ldout(cct
, 20) << __func__
<< " " << realm
->ino
<< " " << realm
4866 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4867 if (--realm
->nref
== 0) {
4868 snap_realms
.erase(realm
->ino
);
4869 if (realm
->pparent
) {
4870 realm
->pparent
->pchildren
.erase(realm
);
4871 put_snap_realm(realm
->pparent
);
4877 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4879 if (realm
->parent
!= parent
) {
4880 ldout(cct
, 10) << __func__
<< " " << *realm
4881 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4882 realm
->parent
= parent
;
4883 if (realm
->pparent
) {
4884 realm
->pparent
->pchildren
.erase(realm
);
4885 put_snap_realm(realm
->pparent
);
4887 realm
->pparent
= get_snap_realm(parent
);
4888 realm
->pparent
->pchildren
.insert(realm
);
4894 static bool has_new_snaps(const SnapContext
& old_snapc
,
4895 const SnapContext
& new_snapc
)
4897 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4901 void Client::update_snap_trace(const bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4903 SnapRealm
*first_realm
= NULL
;
4904 ldout(cct
, 10) << __func__
<< " len " << bl
.length() << dendl
;
4906 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4908 auto p
= bl
.cbegin();
4912 SnapRealm
*realm
= get_snap_realm(info
.ino());
4914 bool invalidate
= false;
4916 if (info
.seq() > realm
->seq
) {
4917 ldout(cct
, 10) << __func__
<< " " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4921 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4922 // flush me + children
4925 while (!q
.empty()) {
4926 SnapRealm
*realm
= q
.front();
4929 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4930 p
!= realm
->pchildren
.end();
4934 if (dirty_realms
.count(realm
) == 0) {
4936 dirty_realms
[realm
] = realm
->get_snap_context();
4942 realm
->seq
= info
.seq();
4943 realm
->created
= info
.created();
4944 realm
->parent_since
= info
.parent_since();
4945 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4946 realm
->my_snaps
= info
.my_snaps
;
4950 // _always_ verify parent
4951 if (adjust_realm_parent(realm
, info
.parent()))
4955 invalidate_snaprealm_and_children(realm
);
4956 ldout(cct
, 15) << __func__
<< " " << *realm
<< " self|parent updated" << dendl
;
4957 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4959 ldout(cct
, 10) << __func__
<< " " << *realm
<< " seq " << info
.seq()
4960 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4964 first_realm
= realm
;
4966 put_snap_realm(realm
);
4969 for (auto &[realm
, snapc
] : dirty_realms
) {
4970 // if there are new snaps ?
4971 if (has_new_snaps(snapc
, realm
->get_snap_context())) {
4972 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4973 for (auto&& in
: realm
->inodes_with_caps
) {
4974 queue_cap_snap(in
, snapc
);
4977 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4979 put_snap_realm(realm
);
4983 *realm_ret
= first_realm
;
4985 put_snap_realm(first_realm
);
4988 void Client::handle_snap(const MConstRef
<MClientSnap
>& m
)
4990 ldout(cct
, 10) << __func__
<< " " << *m
<< dendl
;
4991 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4993 std::scoped_lock
cl(client_lock
);
4994 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4999 got_mds_push(session
);
5001 map
<Inode
*, SnapContext
> to_move
;
5002 SnapRealm
*realm
= 0;
5004 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
5005 ceph_assert(m
->head
.split
);
5007 auto p
= m
->bl
.cbegin();
5009 ceph_assert(info
.ino() == m
->head
.split
);
5011 // flush, then move, ino's.
5012 realm
= get_snap_realm(info
.ino());
5013 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
5014 for (auto& ino
: m
->split_inos
) {
5015 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
5016 if (inode_map
.count(vino
)) {
5017 Inode
*in
= inode_map
[vino
];
5018 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
5020 if (in
->snaprealm
->created
> info
.created()) {
5021 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
5022 << *in
->snaprealm
<< dendl
;
5025 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
5028 in
->snaprealm_item
.remove_myself();
5029 to_move
[in
] = in
->snaprealm
->get_snap_context();
5030 put_snap_realm(in
->snaprealm
);
5034 // move child snaprealms, too
5035 for (auto& child_realm
: m
->split_realms
) {
5036 ldout(cct
, 10) << "adjusting snaprealm " << child_realm
<< " parent" << dendl
;
5037 SnapRealm
*child
= get_snap_realm_maybe(child_realm
);
5040 adjust_realm_parent(child
, realm
->ino
);
5041 put_snap_realm(child
);
5045 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
5048 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
5049 Inode
*in
= p
->first
;
5050 in
->snaprealm
= realm
;
5051 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
5053 // queue for snap writeback
5054 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
5055 queue_cap_snap(in
, p
->second
);
5057 put_snap_realm(realm
);
5061 void Client::handle_quota(const MConstRef
<MClientQuota
>& m
)
5063 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
5065 std::scoped_lock
cl(client_lock
);
5066 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
5071 got_mds_push(session
);
5073 ldout(cct
, 10) << __func__
<< " " << *m
<< " from mds." << mds
<< dendl
;
5075 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
5076 if (inode_map
.count(vino
)) {
5078 in
= inode_map
[vino
];
5081 in
->quota
= m
->quota
;
5082 in
->rstat
= m
->rstat
;
5087 void Client::handle_caps(const MConstRef
<MClientCaps
>& m
)
5089 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
5091 std::scoped_lock
cl(client_lock
);
5092 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
5097 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
5098 // Pause RADOS operations until we see the required epoch
5099 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
5102 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
5103 // Record the barrier so that we will transmit it to MDS when releasing
5104 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
5107 got_mds_push(session
);
5110 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
5111 if (auto it
= inode_map
.find(vino
); it
!= inode_map
.end()) {
5114 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
5115 ldout(cct
, 5) << __func__
<< " don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
5116 session
->enqueue_cap_release(
5123 ldout(cct
, 5) << __func__
<< " don't have vino " << vino
<< ", dropping" << dendl
;
5126 // in case the mds is waiting on e.g. a revocation
5127 flush_cap_releases();
5131 switch (m
->get_op()) {
5132 case CEPH_CAP_OP_EXPORT
: return handle_cap_export(session
, in
, m
);
5133 case CEPH_CAP_OP_FLUSHSNAP_ACK
: return handle_cap_flushsnap_ack(session
, in
, m
);
5134 case CEPH_CAP_OP_IMPORT
: /* no return */ handle_cap_import(session
, in
, m
);
5137 if (auto it
= in
->caps
.find(mds
); it
!= in
->caps
.end()) {
5138 Cap
&cap
= in
->caps
.at(mds
);
5140 switch (m
->get_op()) {
5141 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
5142 case CEPH_CAP_OP_IMPORT
:
5143 case CEPH_CAP_OP_REVOKE
:
5144 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, &cap
, m
);
5145 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, &cap
, m
);
5148 ldout(cct
, 5) << __func__
<< " don't have " << *in
<< " cap on mds." << mds
<< dendl
;
5153 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
)
5155 mds_rank_t mds
= session
->mds_num
;
5157 ldout(cct
, 5) << __func__
<< " ino " << m
->get_ino() << " mseq " << m
->get_mseq()
5158 << " IMPORT from mds." << mds
<< dendl
;
5160 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
5163 if (auto it
= in
->caps
.find(peer_mds
); m
->peer
.cap_id
&& it
!= in
->caps
.end()) {
5165 cap_perms
= cap
->latest_perms
;
5169 SnapRealm
*realm
= NULL
;
5170 update_snap_trace(m
->snapbl
, &realm
);
5172 int issued
= m
->get_caps();
5173 int wanted
= m
->get_wanted();
5174 add_update_cap(in
, session
, m
->get_cap_id(),
5175 issued
, wanted
, m
->get_seq(), m
->get_mseq(),
5176 m
->get_realm(), CEPH_CAP_FLAG_AUTH
, cap_perms
);
5178 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
5179 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
5183 put_snap_realm(realm
);
5185 if (in
->auth_cap
&& in
->auth_cap
->session
== session
) {
5186 if (!(wanted
& CEPH_CAP_ANY_FILE_WR
) ||
5187 in
->requested_max_size
> m
->get_max_size()) {
5188 in
->requested_max_size
= 0;
5189 ldout(cct
, 15) << "reset requested_max_size after cap import" << dendl
;
5191 // reflush any/all caps (if we are now the auth_cap)
5192 kick_flushing_caps(in
, session
);
5196 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
)
5198 mds_rank_t mds
= session
->mds_num
;
5200 ldout(cct
, 5) << __func__
<< " ino " << m
->get_ino() << " mseq " << m
->get_mseq()
5201 << " EXPORT from mds." << mds
<< dendl
;
5203 auto it
= in
->caps
.find(mds
);
5204 if (it
!= in
->caps
.end()) {
5205 Cap
&cap
= it
->second
;
5206 if (cap
.cap_id
== m
->get_cap_id()) {
5207 if (m
->peer
.cap_id
) {
5208 const auto peer_mds
= mds_rank_t(m
->peer
.mds
);
5209 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
5210 auto it
= in
->caps
.find(peer_mds
);
5211 if (it
!= in
->caps
.end()) {
5212 Cap
&tcap
= it
->second
;
5213 if (tcap
.cap_id
== m
->peer
.cap_id
&&
5214 ceph_seq_cmp(tcap
.seq
, m
->peer
.seq
) < 0) {
5215 tcap
.cap_id
= m
->peer
.cap_id
;
5216 tcap
.seq
= m
->peer
.seq
- 1;
5217 tcap
.issue_seq
= tcap
.seq
;
5218 tcap
.issued
|= cap
.issued
;
5219 tcap
.implemented
|= cap
.issued
;
5220 if (&cap
== in
->auth_cap
)
5221 in
->auth_cap
= &tcap
;
5222 if (in
->auth_cap
== &tcap
&& in
->flushing_cap_item
.is_on_list())
5223 adjust_session_flushing_caps(in
, session
, tsession
);
5226 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
.issued
, 0,
5227 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
5228 &cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
5232 if (cap
.wanted
| cap
.issued
)
5233 in
->flags
|= I_CAP_DROPPED
;
5236 remove_cap(&cap
, false);
5241 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
)
5243 mds_rank_t mds
= session
->mds_num
;
5244 ceph_assert(in
->caps
.count(mds
));
5246 ldout(cct
, 10) << __func__
<< " on ino " << *in
5247 << " size " << in
->size
<< " -> " << m
->get_size()
5251 in
->caps_issued(&issued
);
5252 issued
|= in
->caps_dirty();
5253 update_inode_file_size(in
, issued
, m
->get_size(),
5254 m
->get_truncate_seq(), m
->get_truncate_size());
5257 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
)
5259 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
5260 int dirty
= m
->get_dirty();
5264 auto it
= in
->flushing_cap_tids
.begin();
5265 if (it
->first
< flush_ack_tid
) {
5266 ldout(cct
, 0) << __func__
<< " mds." << session
->mds_num
5267 << " got unexpected flush ack tid " << flush_ack_tid
5268 << " expected is " << it
->first
<< dendl
;
5270 for (; it
!= in
->flushing_cap_tids
.end(); ) {
5276 if (it
->first
== flush_ack_tid
)
5277 cleaned
= it
->second
;
5278 if (it
->first
<= flush_ack_tid
) {
5279 session
->flushing_caps_tids
.erase(it
->first
);
5280 in
->flushing_cap_tids
.erase(it
++);
5284 cleaned
&= ~it
->second
;
5290 ldout(cct
, 5) << __func__
<< " mds." << session
->mds_num
5291 << " cleaned " << ccap_string(cleaned
) << " on " << *in
5292 << " with " << ccap_string(dirty
) << dendl
;
5295 signal_cond_list(in
->waitfor_caps
);
5296 if (session
->flushing_caps_tids
.empty() ||
5297 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
5298 sync_cond
.notify_all();
5302 in
->cap_dirtier_uid
= -1;
5303 in
->cap_dirtier_gid
= -1;
5307 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
5309 if (in
->flushing_caps
) {
5310 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
5311 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
5312 in
->flushing_caps
&= ~cleaned
;
5313 if (in
->flushing_caps
== 0) {
5314 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
5315 num_flushing_caps
--;
5316 if (in
->flushing_cap_tids
.empty())
5317 in
->flushing_cap_item
.remove_myself();
5319 if (!in
->caps_dirty())
5326 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
)
5328 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
5329 mds_rank_t mds
= session
->mds_num
;
5330 ceph_assert(in
->caps
.count(mds
));
5331 snapid_t follows
= m
->get_snap_follows();
5333 if (auto it
= in
->cap_snaps
.find(follows
); it
!= in
->cap_snaps
.end()) {
5334 auto& capsnap
= it
->second
;
5335 if (flush_ack_tid
!= capsnap
.flush_tid
) {
5336 ldout(cct
, 10) << " tid " << flush_ack_tid
<< " != " << capsnap
.flush_tid
<< dendl
;
5338 InodeRef
tmp_ref(in
);
5339 ldout(cct
, 5) << __func__
<< " mds." << mds
<< " flushed snap follows " << follows
5340 << " on " << *in
<< dendl
;
5341 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
5342 in
->flushing_cap_tids
.erase(capsnap
.flush_tid
);
5343 if (in
->flushing_caps
== 0 && in
->flushing_cap_tids
.empty())
5344 in
->flushing_cap_item
.remove_myself();
5345 in
->cap_snaps
.erase(it
);
5347 signal_cond_list(in
->waitfor_caps
);
5348 if (session
->flushing_caps_tids
.empty() ||
5349 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
5350 sync_cond
.notify_all();
5353 ldout(cct
, 5) << __func__
<< " DUP(?) mds." << mds
<< " flushed snap follows " << follows
5354 << " on " << *in
<< dendl
;
5355 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
5359 class C_Client_DentryInvalidate
: public Context
{
5366 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
5367 client(c
), name(dn
->name
) {
5368 if (client
->use_faked_inos()) {
5369 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
5371 ino
.ino
= dn
->inode
->faked_ino
;
5373 dirino
= dn
->dir
->parent_inode
->vino();
5375 ino
= dn
->inode
->vino();
5378 ino
.ino
= inodeno_t();
5380 void finish(int r
) override
{
5381 // _async_dentry_invalidate is responsible for its own locking
5382 ceph_assert(ceph_mutex_is_not_locked_by_me(client
->client_lock
));
5383 client
->_async_dentry_invalidate(dirino
, ino
, name
);
5387 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
5389 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
5390 if (!mref_reader
.is_state_satisfied())
5393 ldout(cct
, 10) << __func__
<< " '" << name
<< "' ino " << ino
5394 << " in dir " << dirino
<< dendl
;
5395 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
.c_str(), name
.length());
5398 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
5400 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
5401 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
5404 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
5406 int ref
= in
->get_nref();
5407 ldout(cct
, 5) << __func__
<< " in " << *in
<<dendl
;
5409 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
5410 for (auto p
= in
->dir
->dentries
.begin();
5411 p
!= in
->dir
->dentries
.end(); ) {
5412 Dentry
*dn
= p
->second
;
5414 /* rmsnap removes whole subtree, need trim inodes recursively.
5415 * we don't need to invalidate dentries recursively. because
5416 * invalidating a directory dentry effectively invalidate
5418 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
5419 _try_to_trim_inode(dn
->inode
.get(), false);
5421 if (dn
->lru_is_expireable())
5422 unlink(dn
, true, false); // keep dir, drop dentry
5424 if (in
->dir
->dentries
.empty()) {
5430 if (ref
> 1 && (in
->flags
& I_SNAPDIR_OPEN
)) {
5431 InodeRef snapdir
= open_snapdir(in
);
5432 _try_to_trim_inode(snapdir
.get(), false);
5437 auto q
= in
->dentries
.begin();
5438 while (q
!= in
->dentries
.end()) {
5441 if( in
->ll_ref
> 0 && sched_inval
) {
5442 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
5443 // so in->dentries doesn't always reflect the state of kernel's dcache.
5444 _schedule_invalidate_dentry_callback(dn
, true);
5446 unlink(dn
, true, true);
5451 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
)
5453 mds_rank_t mds
= session
->mds_num
;
5454 int used
= get_caps_used(in
);
5455 int wanted
= in
->caps_wanted();
5457 const unsigned new_caps
= m
->get_caps();
5458 const bool was_stale
= session
->cap_gen
> cap
->gen
;
5459 ldout(cct
, 5) << __func__
<< " on in " << m
->get_ino()
5460 << " mds." << mds
<< " seq " << m
->get_seq()
5461 << " caps now " << ccap_string(new_caps
)
5462 << " was " << ccap_string(cap
->issued
)
5463 << (was_stale
? " (stale)" : "") << dendl
;
5466 cap
->issued
= cap
->implemented
= CEPH_CAP_PIN
;
5467 cap
->seq
= m
->get_seq();
5468 cap
->gen
= session
->cap_gen
;
5470 check_cap_issue(in
, new_caps
);
5474 in
->caps_issued(&issued
);
5475 issued
|= in
->caps_dirty();
5477 if ((new_caps
& CEPH_CAP_AUTH_SHARED
) &&
5478 !(issued
& CEPH_CAP_AUTH_EXCL
)) {
5479 in
->mode
= m
->head
.mode
;
5480 in
->uid
= m
->head
.uid
;
5481 in
->gid
= m
->head
.gid
;
5482 in
->btime
= m
->btime
;
5484 bool deleted_inode
= false;
5485 if ((new_caps
& CEPH_CAP_LINK_SHARED
) &&
5486 !(issued
& CEPH_CAP_LINK_EXCL
)) {
5487 in
->nlink
= m
->head
.nlink
;
5488 if (in
->nlink
== 0 &&
5489 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5490 deleted_inode
= true;
5492 if (!(issued
& CEPH_CAP_XATTR_EXCL
) &&
5493 m
->xattrbl
.length() &&
5494 m
->head
.xattr_version
> in
->xattr_version
) {
5495 auto p
= m
->xattrbl
.cbegin();
5496 decode(in
->xattrs
, p
);
5497 in
->xattr_version
= m
->head
.xattr_version
;
5500 if ((new_caps
& CEPH_CAP_FILE_SHARED
) && m
->dirstat_is_valid()) {
5501 in
->dirstat
.nfiles
= m
->get_nfiles();
5502 in
->dirstat
.nsubdirs
= m
->get_nsubdirs();
5505 if (new_caps
& CEPH_CAP_ANY_RD
) {
5506 update_inode_file_time(in
, issued
, m
->get_time_warp_seq(),
5507 m
->get_ctime(), m
->get_mtime(), m
->get_atime());
5510 if (new_caps
& (CEPH_CAP_ANY_FILE_RD
| CEPH_CAP_ANY_FILE_WR
)) {
5511 in
->layout
= m
->get_layout();
5512 update_inode_file_size(in
, issued
, m
->get_size(),
5513 m
->get_truncate_seq(), m
->get_truncate_size());
5516 if (m
->inline_version
> in
->inline_version
) {
5517 in
->inline_data
= m
->inline_data
;
5518 in
->inline_version
= m
->inline_version
;
5521 /* always take a newer change attr */
5522 if (m
->get_change_attr() > in
->change_attr
)
5523 in
->change_attr
= m
->get_change_attr();
5526 if (cap
== in
->auth_cap
&&
5527 (new_caps
& CEPH_CAP_ANY_FILE_WR
) &&
5528 (m
->get_max_size() != in
->max_size
)) {
5529 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5530 in
->max_size
= m
->get_max_size();
5531 if (in
->max_size
> in
->wanted_max_size
) {
5532 in
->wanted_max_size
= 0;
5533 in
->requested_max_size
= 0;
5538 if ((was_stale
|| m
->get_op() == CEPH_CAP_OP_IMPORT
) &&
5539 (wanted
& ~(cap
->wanted
| new_caps
))) {
5540 // If mds is importing cap, prior cap messages that update 'wanted'
5541 // may get dropped by mds (migrate seq mismatch).
5543 // We don't send cap message to update 'wanted' if what we want are
5544 // already issued. If mds revokes caps, cap message that releases caps
5545 // also tells mds what we want. But if caps got revoked by mds forcedly
5546 // (session stale). We may haven't told mds what we want.
5552 auto revoked
= cap
->issued
& ~new_caps
;
5554 ldout(cct
, 10) << " revocation of " << ccap_string(revoked
) << dendl
;
5555 cap
->issued
= new_caps
;
5556 cap
->implemented
|= new_caps
;
5558 // recall delegations if we're losing caps necessary for them
5559 if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_RD
))
5560 in
->recall_deleg(false);
5561 else if (revoked
& ceph_deleg_caps_for_type(CEPH_DELEGATION_WR
))
5562 in
->recall_deleg(true);
5564 used
= adjust_caps_used_for_lazyio(used
, cap
->issued
, cap
->implemented
);
5565 if ((used
& revoked
& (CEPH_CAP_FILE_BUFFER
| CEPH_CAP_FILE_LAZYIO
)) &&
5566 !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5567 // waitin' for flush
5568 } else if (used
& revoked
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) {
5572 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5575 } else if (cap
->issued
== new_caps
) {
5576 ldout(cct
, 10) << " caps unchanged at " << ccap_string(cap
->issued
) << dendl
;
5578 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~cap
->issued
) << dendl
;
5579 cap
->issued
= new_caps
;
5580 cap
->implemented
|= new_caps
;
5582 if (cap
== in
->auth_cap
) {
5583 // non-auth MDS is revoking the newly grant caps ?
5584 for (const auto &p
: in
->caps
) {
5585 if (&p
.second
== cap
)
5587 if (p
.second
.implemented
& ~p
.second
.issued
& new_caps
) {
5600 signal_cond_list(in
->waitfor_caps
);
5602 // may drop inode's last ref
5604 _try_to_trim_inode(in
, true);
5607 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5609 if (perms
.uid() == 0) {
5610 // Executable are overridable when there is at least one exec bit set
5611 if((want
& MAY_EXEC
) && !(in
->mode
& S_IXUGO
))
5612 return -CEPHFS_EACCES
;
5616 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5617 int ret
= _posix_acl_permission(in
, perms
, want
);
5618 if (ret
!= -CEPHFS_EAGAIN
)
5622 // check permissions before doing anything else
5623 if (!in
->check_mode(perms
, want
))
5624 return -CEPHFS_EACCES
;
5628 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5629 const UserPerm
& perms
)
5631 int r
= _getattr_for_perm(in
, perms
);
5636 if (strncmp(name
, "system.", 7) == 0) {
5637 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5640 r
= inode_permission(in
, perms
, want
);
5643 ldout(cct
, 5) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5647 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5648 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5652 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5653 const UserPerm
& perms
)
5655 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5656 int r
= _getattr_for_perm(in
, perms
);
5660 if (mask
& CEPH_SETATTR_SIZE
) {
5661 r
= inode_permission(in
, perms
, MAY_WRITE
);
5667 if (mask
& CEPH_SETATTR_UID
) {
5668 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5671 if (mask
& CEPH_SETATTR_GID
) {
5672 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5673 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5677 if (mask
& CEPH_SETATTR_MODE
) {
5678 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5681 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5682 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5683 stx
->stx_mode
&= ~S_ISGID
;
5686 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5687 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5688 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5689 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5690 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5691 check_mask
|= CEPH_SETATTR_MTIME
;
5692 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5693 check_mask
|= CEPH_SETATTR_ATIME
;
5694 if (check_mask
& mask
) {
5697 r
= inode_permission(in
, perms
, MAY_WRITE
);
5705 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5709 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5711 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5714 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5716 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5717 want
= MAY_READ
| MAY_WRITE
;
5718 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5720 if (flags
& O_TRUNC
)
5724 switch (in
->mode
& S_IFMT
) {
5729 if (want
& MAY_WRITE
) {
5736 r
= _getattr_for_perm(in
, perms
);
5740 r
= inode_permission(in
, perms
, want
);
5742 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5746 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5748 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5749 int r
= _getattr_for_perm(dir
, perms
);
5753 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5755 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5759 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5761 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << perms
<< dendl
;
5762 int r
= _getattr_for_perm(dir
, perms
);
5766 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5768 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5772 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5774 ldout(cct
, 20) << __func__
<< " " << *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5775 int r
= _getattr_for_perm(dir
, perms
);
5779 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5783 /* 'name == NULL' means rmsnap w/o permission checks */
5784 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5786 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5789 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5793 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5797 int Client::may_delete(const char *relpath
, const UserPerm
& perms
) {
5798 ldout(cct
, 20) << __func__
<< " " << relpath
<< "; " << perms
<< dendl
;
5800 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
5801 if (!mref_reader
.is_state_satisfied())
5804 filepath
path(relpath
);
5805 string name
= path
.last_dentry();
5809 std::scoped_lock
lock(client_lock
);
5810 int r
= path_walk(path
, &dir
, perms
);
5813 if (cct
->_conf
->client_permissions
) {
5814 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
5822 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5824 ldout(cct
, 20) << __func__
<< " " << *in
<< "; " << perms
<< dendl
;
5825 int r
= _getattr_for_perm(in
, perms
);
5829 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5835 if (!S_ISREG(in
->mode
))
5838 if (in
->mode
& S_ISUID
)
5841 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5844 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5846 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5850 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5852 int mask
= CEPH_STAT_CAP_MODE
;
5854 if (acl_type
!= NO_ACL
) {
5855 mask
|= CEPH_STAT_CAP_XATTR
;
5856 force
= in
->xattr_version
== 0;
5858 return _getattr(in
, mask
, perms
, force
);
5861 vinodeno_t
Client::_get_vino(Inode
*in
)
5863 /* The caller must hold the client lock */
5864 return vinodeno_t(in
->ino
, in
->snapid
);
5868 * Resolve an MDS spec to a list of MDS daemon GIDs.
5870 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5871 * It may be '*' in which case it matches all GIDs.
5873 * If no error is returned, the `targets` vector will be populated with at least
5876 int Client::resolve_mds(
5877 const std::string
&mds_spec
,
5878 std::vector
<mds_gid_t
> *targets
)
5881 ceph_assert(targets
!= nullptr);
5884 CachedStackStringStream css
;
5885 int role_r
= fsmap
->parse_role(mds_spec
, &role
, *css
);
5887 // We got a role, resolve it to a GID
5888 auto& info
= fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
);
5889 ldout(cct
, 10) << __func__
<< ": resolved " << mds_spec
<< " to role '"
5890 << role
<< "' aka " << info
.human_name() << dendl
;
5891 targets
->push_back(info
.global_id
);
5895 std::string strtol_err
;
5896 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5897 if (strtol_err
.empty()) {
5898 // It is a possible GID
5899 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5900 if (fsmap
->gid_exists(mds_gid
)) {
5901 auto& info
= fsmap
->get_info_gid(mds_gid
);
5902 ldout(cct
, 10) << __func__
<< ": validated gid " << mds_gid
<< " aka "
5903 << info
.human_name() << dendl
;
5904 targets
->push_back(mds_gid
);
5907 lderr(cct
) << __func__
<< ": gid " << mds_gid
<< " not in MDS map"
5909 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5910 return -CEPHFS_ENOENT
;
5912 } else if (mds_spec
== "*") {
5913 // It is a wildcard: use all MDSs
5914 const auto& mds_info
= fsmap
->get_mds_info();
5916 ldout(cct
, 10) << __func__
<< ": resolving `*' to all MDS daemons" << dendl
;
5917 if (mds_info
.empty()) {
5918 lderr(cct
) << __func__
<< ": no MDS daemons found" << dendl
;
5919 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5920 return -CEPHFS_ENOENT
;
5923 for (const auto& [gid
, info
] : mds_info
) {
5924 ldout(cct
, 10) << __func__
<< ": appending " << info
.human_name() << " to targets" << dendl
;
5925 targets
->push_back(gid
);
5929 // It did not parse as an integer, it is not a wildcard, it must be a name
5930 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5932 lderr(cct
) << __func__
<< ": no MDS daemons found by name `" << mds_spec
<< "'" << dendl
;
5933 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5934 return -CEPHFS_ENOENT
;
5936 auto& info
= fsmap
->get_info_gid(mds_gid
);
5937 ldout(cct
, 10) << __func__
<< ": resolved name '" << mds_spec
5938 << "' to " << info
.human_name() << dendl
;
5939 targets
->push_back(mds_gid
);
5947 * Authenticate with mon and establish global ID
5949 int Client::authenticate()
5951 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
5953 if (monclient
->is_authenticated()) {
5957 client_lock
.unlock();
5958 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5964 whoami
= monclient
->get_global_id();
5965 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5970 int Client::fetch_fsmap(bool user
)
5972 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
5974 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5975 // rather than MDSMap because no one MDSMap contains all the daemons, and
5976 // a `tell` can address any daemon.
5977 version_t fsmap_latest
;
5980 client_lock
.unlock();
5981 std::tie(fsmap_latest
, std::ignore
) =
5982 monclient
->get_version("fsmap", ca::use_blocked
[ec
]);
5984 } while (ec
== bs::errc::resource_unavailable_try_again
);
5987 lderr(cct
) << "Failed to learn FSMap version: " << ec
<< dendl
;
5988 return ceph::from_error_code(ec
);
5991 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5994 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5995 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5996 monclient
->renew_subs();
5997 wait_on_list(waiting_for_fsmap
);
5999 ceph_assert(fsmap_user
);
6000 ceph_assert(fsmap_user
->get_epoch() >= fsmap_latest
);
6002 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
6003 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
6004 monclient
->renew_subs();
6005 wait_on_list(waiting_for_fsmap
);
6008 ceph_assert(fsmap
->get_epoch() >= fsmap_latest
);
6010 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
6011 << fsmap_latest
<< dendl
;
6017 * @mds_spec one of ID, rank, GID, "*"
6020 int Client::mds_command(
6021 const std::string
&mds_spec
,
6022 const vector
<string
>& cmd
,
6023 const bufferlist
& inbl
,
6028 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
6029 if (!iref_reader
.is_state_satisfied())
6030 return -CEPHFS_ENOTCONN
;
6032 std::unique_lock
cl(client_lock
);
6040 r
= fetch_fsmap(false);
6045 // Look up MDS target(s) of the command
6046 std::vector
<mds_gid_t
> targets
;
6047 r
= resolve_mds(mds_spec
, &targets
);
6052 // If daemons are laggy, we won't send them commands. If all
6053 // are laggy then we fail.
6054 std::vector
<mds_gid_t
> non_laggy
;
6055 for (const auto& gid
: targets
) {
6056 const auto info
= fsmap
->get_info_gid(gid
);
6057 if (!info
.laggy()) {
6058 non_laggy
.push_back(gid
);
6061 if (non_laggy
.size() == 0) {
6062 *outs
= "All targeted MDS daemons are laggy";
6063 return -CEPHFS_ENOENT
;
6066 if (metadata
.empty()) {
6067 // We are called on an unmounted client, so metadata
6068 // won't be initialized yet.
6069 populate_metadata("");
6072 // Send commands to targets
6073 C_GatherBuilder
gather(cct
, onfinish
);
6074 for (const auto& target_gid
: non_laggy
) {
6075 const auto info
= fsmap
->get_info_gid(target_gid
);
6077 // Open a connection to the target MDS
6078 ConnectionRef conn
= messenger
->connect_to_mds(info
.get_addrs());
6082 std::scoped_lock
cmd_lock(command_lock
);
6083 // Generate MDSCommandOp state
6084 auto &op
= command_table
.start_command();
6086 op
.on_finish
= gather
.new_sub();
6091 op
.mds_gid
= target_gid
;
6094 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
6095 << " tid=" << op
.tid
<< cmd
<< dendl
;
6097 // Construct and send MCommand
6098 MessageRef m
= op
.get_message(monclient
->get_fsid());
6099 conn
->send_message2(std::move(m
));
6108 void Client::handle_command_reply(const MConstRef
<MCommandReply
>& m
)
6110 ceph_tid_t
const tid
= m
->get_tid();
6112 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
6114 std::scoped_lock
cmd_lock(command_lock
);
6115 if (!command_table
.exists(tid
)) {
6116 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
6120 auto &op
= command_table
.get_command(tid
);
6122 *op
.outbl
= m
->get_data();
6129 op
.on_finish
->complete(m
->r
);
6132 command_table
.erase(tid
);
6135 // -------------------
6138 int Client::subscribe_mdsmap(const std::string
&fs_name
)
6140 int r
= authenticate();
6142 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
6146 std::string resolved_fs_name
;
6147 if (fs_name
.empty()) {
6148 resolved_fs_name
= cct
->_conf
.get_val
<std::string
>("client_fs");
6149 if (resolved_fs_name
.empty())
6150 // Try the backwards compatibility fs name option
6151 resolved_fs_name
= cct
->_conf
.get_val
<std::string
>("client_mds_namespace");
6153 resolved_fs_name
= fs_name
;
6156 std::string want
= "mdsmap";
6157 if (!resolved_fs_name
.empty()) {
6158 r
= fetch_fsmap(true);
6161 fscid
= fsmap_user
->get_fs_cid(resolved_fs_name
);
6162 if (fscid
== FS_CLUSTER_ID_NONE
) {
6163 return -CEPHFS_ENOENT
;
6166 std::ostringstream oss
;
6167 oss
<< want
<< "." << fscid
;
6170 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
6172 monclient
->sub_want(want
, 0, 0);
6173 monclient
->renew_subs();
6178 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
6179 bool require_mds
, const std::string
&fs_name
)
6181 ceph_assert(is_initialized());
6184 * To make sure that the _unmount() must wait until the mount()
6187 RWRef_t
mref_writer(mount_state
, CLIENT_MOUNTING
, false);
6188 if (!mref_writer
.is_first_writer()) // already mounting or mounted
6191 std::unique_lock
cl(client_lock
);
6193 int r
= subscribe_mdsmap(fs_name
);
6195 lderr(cct
) << "mdsmap subscription failed: " << cpp_strerror(r
) << dendl
;
6199 start_tick_thread(); // start tick thread
6203 auto availability
= mdsmap
->is_cluster_available();
6204 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
6206 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
6207 return CEPH_FUSE_NO_MDS_UP
;
6208 } else if (availability
== MDSMap::AVAILABLE
) {
6209 // Continue to mount
6211 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
6212 // Else, wait. MDSMonitor will update the map to bring
6213 // us to a conclusion eventually.
6214 wait_on_list(waiting_for_mdsmap
);
6216 // Unexpected value!
6222 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
6224 filepath
fp(CEPH_INO_ROOT
);
6225 if (!mount_root
.empty()) {
6226 fp
= filepath(mount_root
.c_str());
6229 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6230 req
->set_filepath(fp
);
6231 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
6232 int res
= make_request(req
, perms
);
6234 if (res
== -CEPHFS_EACCES
&& root
) {
6235 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
6248 _ll_get(root
.get());
6251 if (!cct
->_conf
->client_trace
.empty()) {
6252 traceout
.open(cct
->_conf
->client_trace
.c_str());
6253 if (traceout
.is_open()) {
6254 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
6256 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
6261 ldout(cct, 3) << "op: // client trace data structs" << dendl;
6262 ldout(cct, 3) << "op: struct stat st;" << dendl;
6263 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
6264 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
6265 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
6266 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
6267 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
6268 ldout(cct, 3) << "op: int fd;" << dendl;
6271 mref_writer
.update_state(CLIENT_MOUNTED
);
6277 void Client::_close_sessions()
6279 for (auto it
= mds_sessions
.begin(); it
!= mds_sessions
.end(); ) {
6280 if (it
->second
.state
== MetaSession::STATE_REJECTED
)
6281 mds_sessions
.erase(it
++);
6286 while (!mds_sessions
.empty()) {
6287 // send session closes!
6288 for (auto &p
: mds_sessions
) {
6289 if (p
.second
.state
!= MetaSession::STATE_CLOSING
) {
6290 _close_mds_session(&p
.second
);
6291 mds_ranks_closing
.insert(p
.first
);
6295 // wait for sessions to close
6296 double timo
= cct
->_conf
.get_val
<std::chrono::seconds
>("client_shutdown_timeout").count();
6297 ldout(cct
, 2) << "waiting for " << mds_ranks_closing
.size() << " mds session(s) to close (timeout: "
6298 << timo
<< "s)" << dendl
;
6299 std::unique_lock l
{client_lock
, std::adopt_lock
};
6302 } else if (!mount_cond
.wait_for(l
, ceph::make_timespan(timo
), [this] { return mds_ranks_closing
.empty(); })) {
6303 ldout(cct
, 1) << mds_ranks_closing
.size() << " mds(s) did not respond to session close -- timing out." << dendl
;
6304 while (!mds_ranks_closing
.empty()) {
6305 auto session
= mds_sessions
.at(*mds_ranks_closing
.begin());
6306 // this prunes entry from mds_sessions and mds_ranks_closing
6307 _closed_mds_session(&session
, -CEPHFS_ETIMEDOUT
);
6311 mds_ranks_closing
.clear();
6316 void Client::flush_mdlog_sync()
6318 if (mds_requests
.empty())
6320 for (auto &p
: mds_sessions
) {
6321 flush_mdlog(&p
.second
);
6325 void Client::flush_mdlog(MetaSession
*session
)
6327 // Only send this to Luminous or newer MDS daemons, older daemons
6328 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
6329 const uint64_t features
= session
->con
->get_features();
6330 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
6331 auto m
= make_message
<MClientSession
>(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
6332 session
->con
->send_message2(std::move(m
));
6337 void Client::_abort_mds_sessions(int err
)
6339 for (auto p
= mds_requests
.begin(); p
!= mds_requests
.end(); ) {
6340 auto req
= p
->second
;
6342 // unsafe requests will be removed during close session below.
6343 if (req
->got_unsafe
)
6347 if (req
->caller_cond
) {
6349 req
->caller_cond
->notify_all();
6353 // Process aborts on any requests that were on this waitlist.
6354 // Any requests that were on a waiting_for_open session waitlist
6355 // will get kicked during close session below.
6356 signal_cond_list(waiting_for_mdsmap
);
6358 // Force-close all sessions
6359 while(!mds_sessions
.empty()) {
6360 auto& session
= mds_sessions
.begin()->second
;
6361 _closed_mds_session(&session
, err
);
6365 void Client::_unmount(bool abort
)
6368 * We are unmounting the client.
6370 * Just declare the state to STATE_UNMOUNTING to block and fail
6371 * any new comming "reader" and then try to wait all the in-flight
6372 * "readers" to finish.
6374 RWRef_t
mref_writer(mount_state
, CLIENT_UNMOUNTING
, false);
6375 if (!mref_writer
.is_first_writer())
6377 mref_writer
.wait_readers_done();
6379 std::unique_lock lock
{client_lock
};
6381 if (abort
|| blocklisted
) {
6382 ldout(cct
, 2) << "unmounting (" << (abort
? "abort)" : "blocklisted)") << dendl
;
6384 ldout(cct
, 2) << "unmounting" << dendl
;
6390 mount_aborted
= true;
6391 // Abort all mds sessions
6392 _abort_mds_sessions(-CEPHFS_ENOTCONN
);
6394 objecter
->op_cancel_writes(-CEPHFS_ENOTCONN
);
6396 // flush the mdlog for pending requests, if any
6400 mount_cond
.wait(lock
, [this] {
6401 if (!mds_requests
.empty()) {
6402 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests"
6405 return mds_requests
.empty();
6411 // clean up any unclosed files
6412 while (!fd_map
.empty()) {
6413 Fh
*fh
= fd_map
.begin()->second
;
6414 fd_map
.erase(fd_map
.begin());
6415 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
6419 while (!ll_unclosed_fh_set
.empty()) {
6420 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
6422 ll_unclosed_fh_set
.erase(fh
);
6423 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
6427 while (!opened_dirs
.empty()) {
6428 dir_result_t
*dirp
= *opened_dirs
.begin();
6429 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
6435 if (cct
->_conf
->client_oc
) {
6436 // flush/release all buffered data
6437 std::list
<InodeRef
> anchor
;
6438 for (auto& p
: inode_map
) {
6439 Inode
*in
= p
.second
;
6441 ldout(cct
, 0) << "null inode_map entry ino " << p
.first
<< dendl
;
6445 // prevent inode from getting freed
6446 anchor
.emplace_back(in
);
6448 if (abort
|| blocklisted
) {
6449 objectcacher
->purge_set(&in
->oset
);
6450 } else if (!in
->caps
.empty()) {
6452 _flush(in
, new C_Client_FlushComplete(this, in
));
6457 if (abort
|| blocklisted
) {
6458 for (auto p
= dirty_list
.begin(); !p
.end(); ) {
6461 if (in
->dirty_caps
) {
6462 ldout(cct
, 0) << " drop dirty caps on " << *in
<< dendl
;
6463 in
->mark_caps_clean();
6469 wait_sync_caps(last_flush_tid
);
6477 while (lru
.lru_get_size() > 0 ||
6478 !inode_map
.empty()) {
6479 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
6480 << "+" << inode_map
.size() << " items"
6481 << ", waiting (for caps to release?)"
6484 if (auto r
= mount_cond
.wait_for(lock
, ceph::make_timespan(5));
6485 r
== std::cv_status::timeout
) {
6489 ceph_assert(lru
.lru_get_size() == 0);
6490 ceph_assert(inode_map
.empty());
6493 if (!cct
->_conf
->client_trace
.empty()) {
6494 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
6498 // stop the tick thread
6499 tick_thread_stopped
= true;
6500 upkeep_cond
.notify_one();
6504 mref_writer
.update_state(CLIENT_UNMOUNTED
);
6506 ldout(cct
, 2) << "unmounted." << dendl
;
6509 void Client::unmount()
6514 void Client::abort_conn()
6519 void Client::flush_cap_releases()
6521 uint64_t nr_caps
= 0;
6523 // send any cap releases
6524 for (auto &p
: mds_sessions
) {
6525 auto &session
= p
.second
;
6526 if (session
.release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
6528 nr_caps
+= session
.release
->caps
.size();
6529 if (cct
->_conf
->client_inject_release_failure
) {
6530 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
6532 session
.con
->send_message2(std::move(session
.release
));
6534 session
.release
.reset();
6539 dec_pinned_icaps(nr_caps
);
6543 void Client::renew_and_flush_cap_releases()
6545 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
6547 if (!mount_aborted
&& mdsmap
->get_epoch()) {
6549 utime_t el
= ceph_clock_now() - last_cap_renew
;
6550 if (unlikely(el
> mdsmap
->get_session_timeout() / 3.0))
6553 flush_cap_releases();
6559 ldout(cct
, 20) << "tick" << dendl
;
6561 utime_t now
= ceph_clock_now();
6564 * If the mount() is not finished
6566 if (is_mounting() && !mds_requests
.empty()) {
6567 MetaRequest
*req
= mds_requests
.begin()->second
;
6569 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
6570 req
->abort(-CEPHFS_ETIMEDOUT
);
6571 if (req
->caller_cond
) {
6573 req
->caller_cond
->notify_all();
6575 signal_cond_list(waiting_for_mdsmap
);
6576 for (auto &p
: mds_sessions
) {
6577 signal_context_list(p
.second
.waiting_for_open
);
6582 renew_and_flush_cap_releases();
6585 xlist
<Inode
*>::iterator p
= delayed_list
.begin();
6589 if (!mount_aborted
&& in
->hold_caps_until
> now
)
6591 delayed_list
.pop_front();
6593 check_caps(in
, CHECK_CAPS_NODELAY
);
6597 collect_and_send_metrics();
6599 delay_put_inodes(is_unmounting());
6602 if (blocklisted
&& (is_mounted() || is_unmounting()) &&
6603 last_auto_reconnect
+ 30 * 60 < now
&&
6604 cct
->_conf
.get_val
<bool>("client_reconnect_stale")) {
6605 messenger
->client_reset();
6606 fd_gen
++; // invalidate open files
6607 blocklisted
= false;
6608 _kick_stale_sessions();
6609 last_auto_reconnect
= now
;
6613 void Client::start_tick_thread()
6615 upkeeper
= std::thread([this]() {
6616 using time
= ceph::coarse_mono_time
;
6617 using sec
= std::chrono::seconds
;
6619 auto last_tick
= time::min();
6621 std::unique_lock
cl(client_lock
);
6622 while (!tick_thread_stopped
) {
6623 auto now
= clock::now();
6624 auto since
= now
- last_tick
;
6626 auto t_interval
= clock::duration(cct
->_conf
.get_val
<sec
>("client_tick_interval"));
6627 auto d_interval
= clock::duration(cct
->_conf
.get_val
<sec
>("client_debug_inject_tick_delay"));
6629 auto interval
= std::max(t_interval
, d_interval
);
6630 if (likely(since
>= interval
*.90)) {
6632 last_tick
= clock::now();
6637 ldout(cct
, 20) << "upkeep thread waiting interval " << interval
<< dendl
;
6638 if (!tick_thread_stopped
)
6639 upkeep_cond
.wait_for(cl
, interval
);
6644 void Client::collect_and_send_metrics() {
6645 ldout(cct
, 20) << __func__
<< dendl
;
6647 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
6649 // right now, we only track and send global metrics. its sufficient
6650 // to send these metrics to MDS rank0.
6651 collect_and_send_global_metrics();
6654 void Client::collect_and_send_global_metrics() {
6655 ldout(cct
, 20) << __func__
<< dendl
;
6656 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
6658 if (!have_open_session((mds_rank_t
)0)) {
6659 ldout(cct
, 5) << __func__
<< ": no session with rank=0 -- not sending metric"
6663 auto session
= _get_or_open_mds_session((mds_rank_t
)0);
6664 if (!session
->mds_features
.test(CEPHFS_FEATURE_METRIC_COLLECT
)) {
6665 ldout(cct
, 5) << __func__
<< ": rank=0 does not support metrics" << dendl
;
6669 ClientMetricMessage metric
;
6670 std::vector
<ClientMetricMessage
> message
;
6673 metric
= ClientMetricMessage(ReadLatencyPayload(logger
->tget(l_c_read
)));
6674 message
.push_back(metric
);
6677 metric
= ClientMetricMessage(WriteLatencyPayload(logger
->tget(l_c_wrlat
)));
6678 message
.push_back(metric
);
6681 metric
= ClientMetricMessage(MetadataLatencyPayload(logger
->tget(l_c_lat
)));
6682 message
.push_back(metric
);
6684 // cap hit ratio -- nr_caps is unused right now
6685 auto [cap_hits
, cap_misses
] = get_cap_hit_rates();
6686 metric
= ClientMetricMessage(CapInfoPayload(cap_hits
, cap_misses
, 0));
6687 message
.push_back(metric
);
6689 // dentry lease hit ratio
6690 auto [dlease_hits
, dlease_misses
, nr
] = get_dlease_hit_rates();
6691 metric
= ClientMetricMessage(DentryLeasePayload(dlease_hits
, dlease_misses
, nr
));
6692 message
.push_back(metric
);
6696 auto [opened_files
, total_inodes
] = get_opened_files_rates();
6697 metric
= ClientMetricMessage(OpenedFilesPayload(opened_files
, total_inodes
));
6699 message
.push_back(metric
);
6703 auto [pinned_icaps
, total_inodes
] = get_pinned_icaps_rates();
6704 metric
= ClientMetricMessage(PinnedIcapsPayload(pinned_icaps
, total_inodes
));
6706 message
.push_back(metric
);
6710 auto [opened_inodes
, total_inodes
] = get_opened_inodes_rates();
6711 metric
= ClientMetricMessage(OpenedInodesPayload(opened_inodes
, total_inodes
));
6713 message
.push_back(metric
);
6715 session
->con
->send_message2(make_message
<MClientMetrics
>(std::move(message
)));
6718 void Client::renew_caps()
6720 ldout(cct
, 10) << "renew_caps()" << dendl
;
6721 last_cap_renew
= ceph_clock_now();
6723 for (auto &p
: mds_sessions
) {
6724 ldout(cct
, 15) << "renew_caps requesting from mds." << p
.first
<< dendl
;
6725 if (mdsmap
->get_state(p
.first
) >= MDSMap::STATE_REJOIN
)
6726 renew_caps(&p
.second
);
6730 void Client::renew_caps(MetaSession
*session
)
6732 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6733 session
->last_cap_renew_request
= ceph_clock_now();
6734 uint64_t seq
= ++session
->cap_renew_seq
;
6735 session
->con
->send_message2(make_message
<MClientSession
>(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6739 // ===============================================================
6740 // high level (POSIXy) interface
6742 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6743 InodeRef
*target
, const UserPerm
& perms
)
6745 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6746 MetaRequest
*req
= new MetaRequest(op
);
6748 dir
->make_nosnap_relative_path(path
);
6749 path
.push_dentry(name
);
6750 req
->set_filepath(path
);
6751 req
->set_inode(dir
);
6752 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6753 mask
|= DEBUG_GETATTR_CAPS
;
6754 req
->head
.args
.getattr
.mask
= mask
;
6756 ldout(cct
, 10) << __func__
<< " on " << path
<< dendl
;
6758 int r
= make_request(req
, perms
, target
);
6759 ldout(cct
, 10) << __func__
<< " res is " << r
<< dendl
;
6763 bool Client::_dentry_valid(const Dentry
*dn
)
6765 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
6767 // is dn lease valid?
6768 utime_t now
= ceph_clock_now();
6769 if (dn
->lease_mds
>= 0 && dn
->lease_ttl
> now
&&
6770 mds_sessions
.count(dn
->lease_mds
)) {
6771 MetaSession
&s
= mds_sessions
.at(dn
->lease_mds
);
6772 if (s
.cap_ttl
> now
&& s
.cap_gen
== dn
->lease_gen
) {
6777 ldout(cct
, 20) << " bad lease, cap_ttl " << s
.cap_ttl
<< ", cap_gen " << s
.cap_gen
6778 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6785 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6786 const UserPerm
& perms
, std::string
* alternate_name
)
6790 bool did_lookup_request
= false;
6791 // can only request shared caps
6792 mask
&= CEPH_CAP_ANY_SHARED
| CEPH_STAT_RSTAT
;
6794 if (dname
== "..") {
6795 if (dir
->dentries
.empty()) {
6796 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
6797 filepath
path(dir
->ino
);
6798 req
->set_filepath(path
);
6801 int r
= make_request(req
, perms
, &tmptarget
, NULL
, rand() % mdsmap
->get_num_in_mds());
6804 *target
= std::move(tmptarget
);
6805 ldout(cct
, 8) << __func__
<< " found target " << (*target
)->ino
<< dendl
;
6811 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6820 if (!dir
->is_dir()) {
6821 r
= -CEPHFS_ENOTDIR
;
6825 if (dname
.length() > NAME_MAX
) {
6826 r
= -CEPHFS_ENAMETOOLONG
;
6830 if (dname
== cct
->_conf
->client_snapdir
&&
6831 dir
->snapid
== CEPH_NOSNAP
) {
6832 *target
= open_snapdir(dir
);
6838 dir
->dir
->dentries
.count(dname
)) {
6839 dn
= dir
->dir
->dentries
[dname
];
6841 ldout(cct
, 20) << __func__
<< " have " << *dn
<< " from mds." << dn
->lease_mds
6842 << " ttl " << dn
->lease_ttl
<< " seq " << dn
->lease_seq
<< dendl
;
6844 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)) {
6845 if (_dentry_valid(dn
)) {
6846 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6847 // make trim_caps() behave.
6848 dir
->try_touch_cap(dn
->lease_mds
);
6852 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
6853 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6854 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
, true)))
6856 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6857 ldout(cct
, 10) << __func__
<< " concluded ENOENT locally for "
6858 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6859 return -CEPHFS_ENOENT
;
6863 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6866 // can we conclude ENOENT locally?
6867 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true) &&
6868 (dir
->flags
& I_COMPLETE
)) {
6869 ldout(cct
, 10) << __func__
<< " concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6870 return -CEPHFS_ENOENT
;
6874 if (did_lookup_request
) {
6878 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6879 did_lookup_request
= true;
6881 /* complete lookup to get dentry for alternate_name */
6889 *target
= dn
->inode
;
6891 *alternate_name
= dn
->alternate_name
;
6900 ldout(cct
, 10) << __func__
<< " " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6902 ldout(cct
, 10) << __func__
<< " " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6906 int Client::get_or_create(Inode
*dir
, const char* name
,
6907 Dentry
**pdn
, bool expect_null
)
6910 ldout(cct
, 20) << __func__
<< " " << *dir
<< " name " << name
<< dendl
;
6912 if (dir
->dir
->dentries
.count(name
)) {
6913 Dentry
*dn
= dir
->dir
->dentries
[name
];
6914 if (_dentry_valid(dn
)) {
6916 return -CEPHFS_EEXIST
;
6920 // otherwise link up a new one
6921 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6928 int Client::walk(std::string_view path
, walk_dentry_result
* wdr
, const UserPerm
& perms
, bool followsym
)
6930 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
6931 if (!mref_reader
.is_state_satisfied())
6932 return -CEPHFS_ENOTCONN
;
6934 ldout(cct
, 10) << __func__
<< ": " << path
<< dendl
;
6936 std::scoped_lock
lock(client_lock
);
6938 return path_walk(path
, wdr
, perms
, followsym
);
6941 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6942 const UserPerm
& perms
, bool followsym
, int mask
, InodeRef dirinode
)
6944 walk_dentry_result wdr
;
6945 int rc
= path_walk(origpath
, &wdr
, perms
, followsym
, mask
, dirinode
);
6946 *end
= std::move(wdr
.in
);
6950 int Client::path_walk(const filepath
& origpath
, walk_dentry_result
* result
, const UserPerm
& perms
,
6951 bool followsym
, int mask
, InodeRef dirinode
)
6953 filepath path
= origpath
;
6955 std::string alternate_name
;
6956 if (origpath
.absolute())
6965 ldout(cct
, 20) << __func__
<< " cur=" << *cur
<< dendl
;
6966 ldout(cct
, 10) << __func__
<< " " << path
<< dendl
;
6971 while (i
< path
.depth() && cur
) {
6973 const string
&dname
= path
[i
];
6974 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6975 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6977 if (cct
->_conf
->client_permissions
) {
6978 int r
= may_lookup(cur
.get(), perms
);
6981 caps
= CEPH_CAP_AUTH_SHARED
;
6984 /* Get extra requested caps on the last component */
6985 if (i
== (path
.depth() - 1))
6987 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
, &alternate_name
);
6990 // only follow trailing symlink if followsym. always follow
6991 // 'directory' symlinks.
6992 if (next
&& next
->is_symlink()) {
6994 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6995 if (symlinks
> MAXSYMLINKS
) {
6996 return -CEPHFS_ELOOP
;
6999 if (i
< path
.depth() - 1) {
7001 // replace consumed components of path with symlink dir target
7002 filepath
resolved(next
->symlink
.c_str());
7003 resolved
.append(path
.postfixpath(i
+ 1));
7006 if (next
->symlink
[0] == '/') {
7010 } else if (followsym
) {
7011 if (next
->symlink
[0] == '/') {
7012 path
= next
->symlink
.c_str();
7017 filepath
more(next
->symlink
.c_str());
7018 // we need to remove the symlink component from off of the path
7019 // before adding the target that the symlink points to. remain
7020 // at the same position in the path.
7031 return -CEPHFS_ENOENT
;
7033 result
->in
= std::move(cur
);
7034 result
->alternate_name
= std::move(alternate_name
);
7042 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
, std::string alternate_name
)
7044 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7045 if (!mref_reader
.is_state_satisfied())
7046 return -CEPHFS_ENOTCONN
;
7048 tout(cct
) << "link" << std::endl
;
7049 tout(cct
) << relexisting
<< std::endl
;
7050 tout(cct
) << relpath
<< std::endl
;
7052 filepath
existing(relexisting
);
7056 std::scoped_lock
lock(client_lock
);
7057 int r
= path_walk(existing
, &in
, perm
, true);
7060 if (std::string(relpath
) == "/") {
7064 filepath
path(relpath
);
7065 string name
= path
.last_dentry();
7068 r
= path_walk(path
, &dir
, perm
, true);
7071 if (cct
->_conf
->client_permissions
) {
7072 if (S_ISDIR(in
->mode
)) {
7076 r
= may_hardlink(in
.get(), perm
);
7079 r
= may_create(dir
.get(), perm
);
7083 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
, std::move(alternate_name
));
7087 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
7089 return unlinkat(CEPHFS_AT_FDCWD
, relpath
, 0, perm
);
7092 int Client::unlinkat(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perm
)
7094 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7095 if (!mref_reader
.is_state_satisfied()) {
7096 return -CEPHFS_ENOTCONN
;
7099 tout(cct
) << __func__
<< std::endl
;
7100 tout(cct
) << dirfd
<< std::endl
;
7101 tout(cct
) << relpath
<< std::endl
;
7102 tout(cct
) << flags
<< std::endl
;
7104 if (std::string(relpath
) == "/") {
7105 return flags
& AT_REMOVEDIR
? -CEPHFS_EBUSY
: -CEPHFS_EISDIR
;
7108 filepath
path(relpath
);
7109 string name
= path
.last_dentry();
7113 std::scoped_lock
lock(client_lock
);
7116 int r
= get_fd_inode(dirfd
, &dirinode
);
7121 r
= path_walk(path
, &dir
, perm
, true, 0, dirinode
);
7125 if (cct
->_conf
->client_permissions
) {
7126 r
= may_delete(dir
.get(), name
.c_str(), perm
);
7131 if (flags
& AT_REMOVEDIR
) {
7132 r
= _rmdir(dir
.get(), name
.c_str(), perm
);
7134 r
= _unlink(dir
.get(), name
.c_str(), perm
);
7139 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
, std::string alternate_name
)
7141 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7142 if (!mref_reader
.is_state_satisfied())
7143 return -CEPHFS_ENOTCONN
;
7145 tout(cct
) << __func__
<< std::endl
;
7146 tout(cct
) << relfrom
<< std::endl
;
7147 tout(cct
) << relto
<< std::endl
;
7149 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
7150 return -CEPHFS_EBUSY
;
7152 filepath
from(relfrom
);
7154 string fromname
= from
.last_dentry();
7156 string toname
= to
.last_dentry();
7159 InodeRef fromdir
, todir
;
7161 std::scoped_lock
lock(client_lock
);
7162 int r
= path_walk(from
, &fromdir
, perm
);
7165 r
= path_walk(to
, &todir
, perm
);
7169 if (cct
->_conf
->client_permissions
) {
7170 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
7173 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
7174 if (r
< 0 && r
!= -CEPHFS_ENOENT
)
7177 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
, std::move(alternate_name
));
7184 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
, std::string alternate_name
)
7186 return mkdirat(CEPHFS_AT_FDCWD
, relpath
, mode
, perm
, alternate_name
);
7189 int Client::mkdirat(int dirfd
, const char *relpath
, mode_t mode
, const UserPerm
& perm
,
7190 std::string alternate_name
)
7192 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7193 if (!mref_reader
.is_state_satisfied())
7194 return -CEPHFS_ENOTCONN
;
7196 tout(cct
) << __func__
<< std::endl
;
7197 tout(cct
) << dirfd
<< std::endl
;
7198 tout(cct
) << relpath
<< std::endl
;
7199 tout(cct
) << mode
<< std::endl
;
7200 ldout(cct
, 10) << __func__
<< ": " << relpath
<< dendl
;
7202 if (std::string(relpath
) == "/") {
7203 return -CEPHFS_EEXIST
;
7206 filepath
path(relpath
);
7207 string name
= path
.last_dentry();
7211 std::scoped_lock
lock(client_lock
);
7214 int r
= get_fd_inode(dirfd
, &dirinode
);
7219 r
= path_walk(path
, &dir
, perm
, true, 0, dirinode
);
7223 if (cct
->_conf
->client_permissions
) {
7224 r
= may_create(dir
.get(), perm
);
7229 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
, 0, {}, std::move(alternate_name
));
7232 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7234 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7235 if (!mref_reader
.is_state_satisfied())
7236 return -CEPHFS_ENOTCONN
;
7238 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
7239 tout(cct
) << __func__
<< std::endl
;
7240 tout(cct
) << relpath
<< std::endl
;
7241 tout(cct
) << mode
<< std::endl
;
7243 //get through existing parts of path
7244 filepath
path(relpath
);
7246 int r
= 0, caps
= 0;
7249 std::scoped_lock
lock(client_lock
);
7251 for (i
=0; i
<path
.depth(); ++i
) {
7252 if (cct
->_conf
->client_permissions
) {
7253 r
= may_lookup(cur
.get(), perms
);
7256 caps
= CEPH_CAP_AUTH_SHARED
;
7258 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
7263 if (r
!=-CEPHFS_ENOENT
) return r
;
7264 ldout(cct
, 20) << __func__
<< " got through " << i
<< " directories on path " << relpath
<< dendl
;
7265 //make new directory at each level
7266 for (; i
<path
.depth(); ++i
) {
7267 if (cct
->_conf
->client_permissions
) {
7268 r
= may_create(cur
.get(), perms
);
7273 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
7275 //check proper creation/existence
7276 if(-CEPHFS_EEXIST
== r
&& i
< path
.depth() - 1) {
7277 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
7281 //move to new dir and continue
7283 ldout(cct
, 20) << __func__
<< ": successfully created directory "
7284 << filepath(cur
->ino
).get_path() << dendl
;
7289 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
7291 return unlinkat(CEPHFS_AT_FDCWD
, relpath
, AT_REMOVEDIR
, perms
);
7294 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
7296 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7297 if (!mref_reader
.is_state_satisfied())
7298 return -CEPHFS_ENOTCONN
;
7300 tout(cct
) << __func__
<< std::endl
;
7301 tout(cct
) << relpath
<< std::endl
;
7302 tout(cct
) << mode
<< std::endl
;
7303 tout(cct
) << rdev
<< std::endl
;
7305 if (std::string(relpath
) == "/")
7306 return -CEPHFS_EEXIST
;
7308 filepath
path(relpath
);
7309 string name
= path
.last_dentry();
7313 std::scoped_lock
lock(client_lock
);
7314 int r
= path_walk(path
, &dir
, perms
);
7317 if (cct
->_conf
->client_permissions
) {
7318 int r
= may_create(dir
.get(), perms
);
7322 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
7327 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
, std::string alternate_name
)
7329 return symlinkat(target
, CEPHFS_AT_FDCWD
, relpath
, perms
, alternate_name
);
7332 int Client::symlinkat(const char *target
, int dirfd
, const char *relpath
, const UserPerm
& perms
,
7333 std::string alternate_name
)
7335 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7336 if (!mref_reader
.is_state_satisfied()) {
7337 return -CEPHFS_ENOTCONN
;
7340 tout(cct
) << __func__
<< std::endl
;
7341 tout(cct
) << target
<< std::endl
;
7342 tout(cct
) << dirfd
<< std::endl
;
7343 tout(cct
) << relpath
<< std::endl
;
7345 if (std::string(relpath
) == "/") {
7346 return -CEPHFS_EEXIST
;
7349 filepath
path(relpath
);
7350 string name
= path
.last_dentry();
7354 std::scoped_lock
lock(client_lock
);
7357 int r
= get_fd_inode(dirfd
, &dirinode
);
7361 r
= path_walk(path
, &dir
, perms
, true, 0, dirinode
);
7365 if (cct
->_conf
->client_permissions
) {
7366 int r
= may_create(dir
.get(), perms
);
7371 return _symlink(dir
.get(), name
.c_str(), target
, perms
, std::move(alternate_name
));
7374 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
7376 return readlinkat(CEPHFS_AT_FDCWD
, relpath
, buf
, size
, perms
);
7379 int Client::readlinkat(int dirfd
, const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
) {
7380 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7381 if (!mref_reader
.is_state_satisfied()) {
7382 return -CEPHFS_ENOTCONN
;
7385 tout(cct
) << __func__
<< std::endl
;
7386 tout(cct
) << dirfd
<< std::endl
;
7387 tout(cct
) << relpath
<< std::endl
;
7390 std::scoped_lock
lock(client_lock
);
7391 int r
= get_fd_inode(dirfd
, &dirinode
);
7397 filepath
path(relpath
);
7398 r
= path_walk(path
, &in
, perms
, false, 0, dirinode
);
7403 return _readlink(in
.get(), buf
, size
);
7406 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
7408 if (!in
->is_symlink())
7409 return -CEPHFS_EINVAL
;
7411 // copy into buf (at most size bytes)
7412 int r
= in
->symlink
.length();
7415 memcpy(buf
, in
->symlink
.c_str(), r
);
7422 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
7424 bool yes
= in
->caps_issued_mask(mask
, true);
7426 ldout(cct
, 10) << __func__
<< " mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
7430 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
7432 in
->make_nosnap_relative_path(path
);
7433 req
->set_filepath(path
);
7435 req
->head
.args
.getattr
.mask
= mask
;
7437 int res
= make_request(req
, perms
);
7438 ldout(cct
, 10) << __func__
<< " result=" << res
<< dendl
;
7442 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
7443 const UserPerm
& perms
, InodeRef
*inp
)
7445 int issued
= in
->caps_issued();
7447 ldout(cct
, 10) << __func__
<< " mask " << mask
<< " issued " <<
7448 ccap_string(issued
) << dendl
;
7450 if (in
->snapid
!= CEPH_NOSNAP
) {
7451 return -CEPHFS_EROFS
;
7453 if ((mask
& CEPH_SETATTR_SIZE
) &&
7454 (uint64_t)stx
->stx_size
> in
->size
&&
7455 is_quota_bytes_exceeded(in
, (uint64_t)stx
->stx_size
- in
->size
,
7457 return -CEPHFS_EDQUOT
;
7460 // make the change locally?
7461 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
7462 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
7463 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
7464 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
7465 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
7468 * This works because we implicitly flush the caps as part of the
7469 * request, so the cap update check will happen with the writeback
7470 * cap context, and then the setattr check will happen with the
7473 * In reality this pattern is likely pretty rare (different users
7474 * setattr'ing the same file). If that turns out not to be the
7475 * case later, we can build a more complex pipelined cap writeback
7479 mask
|= CEPH_SETATTR_CTIME
;
7484 // caller just needs us to bump the ctime
7485 in
->ctime
= ceph_clock_now();
7486 in
->cap_dirtier_uid
= perms
.uid();
7487 in
->cap_dirtier_gid
= perms
.gid();
7488 if (issued
& CEPH_CAP_AUTH_EXCL
)
7489 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7490 else if (issued
& CEPH_CAP_FILE_EXCL
)
7491 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
7492 else if (issued
& CEPH_CAP_XATTR_EXCL
)
7493 in
->mark_caps_dirty(CEPH_CAP_XATTR_EXCL
);
7495 mask
|= CEPH_SETATTR_CTIME
;
7498 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
7499 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
7501 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
7503 if (mask
& CEPH_SETATTR_UID
) {
7504 in
->ctime
= ceph_clock_now();
7505 in
->cap_dirtier_uid
= perms
.uid();
7506 in
->cap_dirtier_gid
= perms
.gid();
7507 in
->uid
= stx
->stx_uid
;
7508 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7509 mask
&= ~CEPH_SETATTR_UID
;
7511 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
7513 if (mask
& CEPH_SETATTR_GID
) {
7514 in
->ctime
= ceph_clock_now();
7515 in
->cap_dirtier_uid
= perms
.uid();
7516 in
->cap_dirtier_gid
= perms
.gid();
7517 in
->gid
= stx
->stx_gid
;
7518 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7519 mask
&= ~CEPH_SETATTR_GID
;
7521 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
7524 if (mask
& CEPH_SETATTR_MODE
) {
7525 in
->ctime
= ceph_clock_now();
7526 in
->cap_dirtier_uid
= perms
.uid();
7527 in
->cap_dirtier_gid
= perms
.gid();
7528 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
7529 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7530 mask
&= ~CEPH_SETATTR_MODE
;
7531 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
7532 } else if (kill_sguid
&& S_ISREG(in
->mode
) && (in
->mode
& (S_IXUSR
|S_IXGRP
|S_IXOTH
))) {
7533 /* Must squash the any setuid/setgid bits with an ownership change */
7534 in
->mode
&= ~(S_ISUID
|S_ISGID
);
7535 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7538 if (mask
& CEPH_SETATTR_BTIME
) {
7539 in
->ctime
= ceph_clock_now();
7540 in
->cap_dirtier_uid
= perms
.uid();
7541 in
->cap_dirtier_gid
= perms
.gid();
7542 in
->btime
= utime_t(stx
->stx_btime
);
7543 in
->mark_caps_dirty(CEPH_CAP_AUTH_EXCL
);
7544 mask
&= ~CEPH_SETATTR_BTIME
;
7545 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
7547 } else if (mask
& CEPH_SETATTR_SIZE
) {
7548 /* If we don't have Ax, then we must ask the server to clear them on truncate */
7549 mask
|= CEPH_SETATTR_KILL_SGUID
;
7552 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
7553 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
7554 if (mask
& CEPH_SETATTR_MTIME
)
7555 in
->mtime
= utime_t(stx
->stx_mtime
);
7556 if (mask
& CEPH_SETATTR_ATIME
)
7557 in
->atime
= utime_t(stx
->stx_atime
);
7558 in
->ctime
= ceph_clock_now();
7559 in
->cap_dirtier_uid
= perms
.uid();
7560 in
->cap_dirtier_gid
= perms
.gid();
7561 in
->time_warp_seq
++;
7562 in
->mark_caps_dirty(CEPH_CAP_FILE_EXCL
);
7563 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
7572 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
7576 in
->make_nosnap_relative_path(path
);
7577 req
->set_filepath(path
);
7580 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
7581 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
7583 if (mask
& CEPH_SETATTR_MODE
) {
7584 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
7585 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
7586 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
7588 if (mask
& CEPH_SETATTR_UID
) {
7589 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
7590 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
7591 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
7593 if (mask
& CEPH_SETATTR_GID
) {
7594 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
7595 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
7596 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
7598 if (mask
& CEPH_SETATTR_BTIME
) {
7599 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
7600 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
7602 if (mask
& CEPH_SETATTR_MTIME
) {
7603 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
7604 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
7607 if (mask
& CEPH_SETATTR_ATIME
) {
7608 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
7609 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
7612 if (mask
& CEPH_SETATTR_SIZE
) {
7613 if ((uint64_t)stx
->stx_size
< mdsmap
->get_max_filesize()) {
7614 req
->head
.args
.setattr
.size
= stx
->stx_size
;
7615 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
7618 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
7619 return -CEPHFS_EFBIG
;
7621 req
->inode_drop
|= CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_RD
|
7624 req
->head
.args
.setattr
.mask
= mask
;
7626 req
->regetattr_mask
= mask
;
7628 int res
= make_request(req
, perms
, inp
);
7629 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
7633 /* Note that we only care about attrs that setattr cares about */
7634 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
7636 stx
->stx_size
= st
->st_size
;
7637 stx
->stx_mode
= st
->st_mode
;
7638 stx
->stx_uid
= st
->st_uid
;
7639 stx
->stx_gid
= st
->st_gid
;
7641 stx
->stx_mtime
= st
->st_mtimespec
;
7642 stx
->stx_atime
= st
->st_atimespec
;
7644 stx
->stx_mtime
.tv_sec
= st
->st_mtime
;
7645 stx
->stx_atime
.tv_sec
= st
->st_atime
;
7647 stx
->stx_mtime
= st
->st_mtim
;
7648 stx
->stx_atime
= st
->st_atim
;
7652 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
7653 const UserPerm
& perms
, InodeRef
*inp
)
7655 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
7658 if (mask
& CEPH_SETATTR_MODE
)
7659 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
7663 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
7664 const UserPerm
& perms
)
7666 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
7667 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
7668 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
7669 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
7670 if (cct
->_conf
->client_permissions
) {
7671 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
7675 return __setattrx(in
.get(), stx
, mask
, perms
);
7678 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
7679 const UserPerm
& perms
)
7681 struct ceph_statx stx
;
7683 stat_to_statx(attr
, &stx
);
7684 mask
&= ~CEPH_SETATTR_BTIME
;
7686 if ((mask
& CEPH_SETATTR_UID
) && attr
->st_uid
== static_cast<uid_t
>(-1)) {
7687 mask
&= ~CEPH_SETATTR_UID
;
7689 if ((mask
& CEPH_SETATTR_GID
) && attr
->st_gid
== static_cast<uid_t
>(-1)) {
7690 mask
&= ~CEPH_SETATTR_GID
;
7693 return _setattrx(in
, &stx
, mask
, perms
);
7696 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
7697 const UserPerm
& perms
)
7699 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7700 if (!mref_reader
.is_state_satisfied())
7701 return -CEPHFS_ENOTCONN
;
7703 tout(cct
) << __func__
<< std::endl
;
7704 tout(cct
) << relpath
<< std::endl
;
7705 tout(cct
) << mask
<< std::endl
;
7707 filepath
path(relpath
);
7710 std::scoped_lock
lock(client_lock
);
7711 int r
= path_walk(path
, &in
, perms
);
7714 return _setattr(in
, attr
, mask
, perms
);
7717 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
7718 const UserPerm
& perms
, int flags
)
7720 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7721 if (!mref_reader
.is_state_satisfied())
7722 return -CEPHFS_ENOTCONN
;
7724 tout(cct
) << __func__
<< std::endl
;
7725 tout(cct
) << relpath
<< std::endl
;
7726 tout(cct
) << mask
<< std::endl
;
7728 filepath
path(relpath
);
7731 std::scoped_lock
lock(client_lock
);
7732 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
7735 return _setattrx(in
, stx
, mask
, perms
);
7738 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
7740 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7741 if (!mref_reader
.is_state_satisfied())
7742 return -CEPHFS_ENOTCONN
;
7744 tout(cct
) << __func__
<< std::endl
;
7745 tout(cct
) << fd
<< std::endl
;
7746 tout(cct
) << mask
<< std::endl
;
7748 std::scoped_lock
lock(client_lock
);
7749 Fh
*f
= get_filehandle(fd
);
7751 return -CEPHFS_EBADF
;
7752 #if defined(__linux__) && defined(O_PATH)
7753 if (f
->flags
& O_PATH
)
7754 return -CEPHFS_EBADF
;
7756 return _setattr(f
->inode
, attr
, mask
, perms
);
7759 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
7761 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7762 if (!mref_reader
.is_state_satisfied())
7763 return -CEPHFS_ENOTCONN
;
7765 tout(cct
) << __func__
<< std::endl
;
7766 tout(cct
) << fd
<< std::endl
;
7767 tout(cct
) << mask
<< std::endl
;
7769 std::scoped_lock
lock(client_lock
);
7770 Fh
*f
= get_filehandle(fd
);
7772 return -CEPHFS_EBADF
;
7773 #if defined(__linux__) && defined(O_PATH)
7774 if (f
->flags
& O_PATH
)
7775 return -CEPHFS_EBADF
;
7777 return _setattrx(f
->inode
, stx
, mask
, perms
);
7780 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
7781 frag_info_t
*dirstat
, int mask
)
7783 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7784 if (!mref_reader
.is_state_satisfied())
7785 return -CEPHFS_ENOTCONN
;
7787 ldout(cct
, 3) << __func__
<< " enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7788 tout(cct
) << "stat" << std::endl
;
7789 tout(cct
) << relpath
<< std::endl
;
7791 filepath
path(relpath
);
7794 std::scoped_lock
lock(client_lock
);
7795 int r
= path_walk(path
, &in
, perms
, true, mask
);
7798 r
= _getattr(in
, mask
, perms
);
7800 ldout(cct
, 3) << __func__
<< " exit on error!" << dendl
;
7803 fill_stat(in
, stbuf
, dirstat
);
7804 ldout(cct
, 3) << __func__
<< " exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7808 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
7812 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
7813 if (flags
& AT_NO_ATTR_SYNC
)
7816 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
7817 mask
|= CEPH_CAP_PIN
;
7818 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7819 mask
|= CEPH_CAP_AUTH_SHARED
;
7820 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
7821 mask
|= CEPH_CAP_LINK_SHARED
;
7822 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
7823 mask
|= CEPH_CAP_FILE_SHARED
;
7824 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
7825 mask
|= CEPH_CAP_XATTR_SHARED
;
7830 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
7831 const UserPerm
& perms
,
7832 unsigned int want
, unsigned int flags
)
7834 return statxat(CEPHFS_AT_FDCWD
, relpath
, stx
, perms
, want
, flags
);
7837 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
7838 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
7840 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
7841 if (!mref_reader
.is_state_satisfied())
7842 return -CEPHFS_ENOTCONN
;
7844 ldout(cct
, 3) << __func__
<< " enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7845 tout(cct
) << __func__
<< std::endl
;
7846 tout(cct
) << relpath
<< std::endl
;
7848 filepath
path(relpath
);
7851 std::scoped_lock
lock(client_lock
);
7852 // don't follow symlinks
7853 int r
= path_walk(path
, &in
, perms
, false, mask
);
7856 r
= _getattr(in
, mask
, perms
);
7858 ldout(cct
, 3) << __func__
<< " exit on error!" << dendl
;
7861 fill_stat(in
, stbuf
, dirstat
);
7862 ldout(cct
, 3) << __func__
<< " exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7866 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7868 ldout(cct
, 10) << __func__
<< " on " << in
->ino
<< " snap/dev" << in
->snapid
7869 << " mode 0" << oct
<< in
->mode
<< dec
7870 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7871 memset(st
, 0, sizeof(struct stat
));
7872 if (use_faked_inos())
7873 st
->st_ino
= in
->faked_ino
;
7875 st
->st_ino
= in
->ino
;
7876 st
->st_dev
= in
->snapid
;
7877 st
->st_mode
= in
->mode
;
7878 st
->st_rdev
= in
->rdev
;
7880 switch (in
->nlink
) {
7882 st
->st_nlink
= 0; /* dir is unlinked */
7885 st
->st_nlink
= 1 /* parent dentry */
7887 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7893 st
->st_nlink
= in
->nlink
;
7895 st
->st_uid
= in
->uid
;
7896 st
->st_gid
= in
->gid
;
7897 if (in
->ctime
> in
->mtime
) {
7898 stat_set_ctime_sec(st
, in
->ctime
.sec());
7899 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7901 stat_set_ctime_sec(st
, in
->mtime
.sec());
7902 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7904 stat_set_atime_sec(st
, in
->atime
.sec());
7905 stat_set_atime_nsec(st
, in
->atime
.nsec());
7906 stat_set_mtime_sec(st
, in
->mtime
.sec());
7907 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7909 if (cct
->_conf
->client_dirsize_rbytes
)
7910 st
->st_size
= in
->rstat
.rbytes
;
7912 st
->st_size
= in
->dirstat
.size();
7913 // The Windows "stat" structure provides just a subset of the fields that are
7914 // available on Linux.
7919 st
->st_size
= in
->size
;
7921 st
->st_blocks
= (in
->size
+ 511) >> 9;
7925 st
->st_blksize
= std::max
<uint32_t>(in
->layout
.stripe_unit
, 4096);
7929 *dirstat
= in
->dirstat
;
7933 return in
->caps_issued();
7936 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7938 ldout(cct
, 10) << __func__
<< " on " << in
->ino
<< " snap/dev" << in
->snapid
7939 << " mode 0" << oct
<< in
->mode
<< dec
7940 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7941 memset(stx
, 0, sizeof(struct ceph_statx
));
7944 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7945 * so that all bits are set.
7950 /* These are always considered to be available */
7951 stx
->stx_dev
= in
->snapid
;
7952 stx
->stx_blksize
= std::max
<uint32_t>(in
->layout
.stripe_unit
, 4096);
7954 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7955 stx
->stx_mode
= S_IFMT
& in
->mode
;
7956 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7957 stx
->stx_rdev
= in
->rdev
;
7958 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7960 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7961 stx
->stx_uid
= in
->uid
;
7962 stx
->stx_gid
= in
->gid
;
7963 stx
->stx_mode
= in
->mode
;
7964 in
->btime
.to_timespec(&stx
->stx_btime
);
7965 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7968 if (mask
& CEPH_CAP_LINK_SHARED
) {
7970 switch (in
->nlink
) {
7972 stx
->stx_nlink
= 0; /* dir is unlinked */
7975 stx
->stx_nlink
= 1 /* parent dentry */
7977 + in
->dirstat
.nsubdirs
; /* include <dir>/. self-reference */
7983 stx
->stx_nlink
= in
->nlink
;
7985 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7988 if (mask
& CEPH_CAP_FILE_SHARED
) {
7990 in
->atime
.to_timespec(&stx
->stx_atime
);
7991 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7994 if (cct
->_conf
->client_dirsize_rbytes
)
7995 stx
->stx_size
= in
->rstat
.rbytes
;
7997 stx
->stx_size
= in
->dirstat
.size();
7998 stx
->stx_blocks
= 1;
8000 stx
->stx_size
= in
->size
;
8001 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
8003 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
8004 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
8007 /* Change time and change_attr both require all shared caps to view */
8008 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
8009 stx
->stx_version
= in
->change_attr
;
8010 if (in
->ctime
> in
->mtime
)
8011 in
->ctime
.to_timespec(&stx
->stx_ctime
);
8013 in
->mtime
.to_timespec(&stx
->stx_ctime
);
8014 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
8019 void Client::touch_dn(Dentry
*dn
)
8024 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
8026 return chmodat(CEPHFS_AT_FDCWD
, relpath
, mode
, 0, perms
);
8029 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
8031 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8032 if (!mref_reader
.is_state_satisfied())
8033 return -CEPHFS_ENOTCONN
;
8035 tout(cct
) << __func__
<< std::endl
;
8036 tout(cct
) << fd
<< std::endl
;
8037 tout(cct
) << mode
<< std::endl
;
8039 std::scoped_lock
lock(client_lock
);
8040 Fh
*f
= get_filehandle(fd
);
8042 return -CEPHFS_EBADF
;
8043 #if defined(__linux__) && defined(O_PATH)
8044 if (f
->flags
& O_PATH
)
8045 return -CEPHFS_EBADF
;
8048 attr
.st_mode
= mode
;
8049 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
8052 int Client::chmodat(int dirfd
, const char *relpath
, mode_t mode
, int flags
,
8053 const UserPerm
& perms
) {
8054 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8055 if (!mref_reader
.is_state_satisfied()) {
8056 return -CEPHFS_ENOTCONN
;
8059 tout(cct
) << __func__
<< std::endl
;
8060 tout(cct
) << dirfd
<< std::endl
;
8061 tout(cct
) << relpath
<< std::endl
;
8062 tout(cct
) << mode
<< std::endl
;
8063 tout(cct
) << flags
<< std::endl
;
8065 filepath
path(relpath
);
8069 std::scoped_lock
lock(client_lock
);
8070 int r
= get_fd_inode(dirfd
, &dirinode
);
8075 r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), 0, dirinode
);
8080 attr
.st_mode
= mode
;
8081 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
8084 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
8086 return chmodat(CEPHFS_AT_FDCWD
, relpath
, mode
, AT_SYMLINK_NOFOLLOW
, perms
);
8089 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
8090 const UserPerm
& perms
)
8092 return chownat(CEPHFS_AT_FDCWD
, relpath
, new_uid
, new_gid
, 0, perms
);
8095 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
8097 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8098 if (!mref_reader
.is_state_satisfied())
8099 return -CEPHFS_ENOTCONN
;
8101 tout(cct
) << __func__
<< std::endl
;
8102 tout(cct
) << fd
<< std::endl
;
8103 tout(cct
) << new_uid
<< std::endl
;
8104 tout(cct
) << new_gid
<< std::endl
;
8106 std::scoped_lock
lock(client_lock
);
8107 Fh
*f
= get_filehandle(fd
);
8109 return -CEPHFS_EBADF
;
8110 #if defined(__linux__) && defined(O_PATH)
8111 if (f
->flags
& O_PATH
)
8112 return -CEPHFS_EBADF
;
8115 attr
.st_uid
= new_uid
;
8116 attr
.st_gid
= new_gid
;
8118 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
8119 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
8120 return _setattr(f
->inode
, &attr
, mask
, perms
);
8123 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
8124 const UserPerm
& perms
)
8126 return chownat(CEPHFS_AT_FDCWD
, relpath
, new_uid
, new_gid
, AT_SYMLINK_NOFOLLOW
, perms
);
8129 int Client::chownat(int dirfd
, const char *relpath
, uid_t new_uid
, gid_t new_gid
,
8130 int flags
, const UserPerm
& perms
) {
8131 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8132 if (!mref_reader
.is_state_satisfied()) {
8133 return -CEPHFS_ENOTCONN
;
8136 tout(cct
) << __func__
<< std::endl
;
8137 tout(cct
) << dirfd
<< std::endl
;
8138 tout(cct
) << relpath
<< std::endl
;
8139 tout(cct
) << new_uid
<< std::endl
;
8140 tout(cct
) << new_gid
<< std::endl
;
8141 tout(cct
) << flags
<< std::endl
;
8143 filepath
path(relpath
);
8147 std::scoped_lock
lock(client_lock
);
8148 int r
= get_fd_inode(dirfd
, &dirinode
);
8153 r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), 0, dirinode
);
8158 attr
.st_uid
= new_uid
;
8159 attr
.st_gid
= new_gid
;
8160 return _setattr(in
, &attr
, CEPH_SETATTR_UID
|CEPH_SETATTR_GID
, perms
);
8163 static void attr_set_atime_and_mtime(struct stat
*attr
,
8164 const utime_t
&atime
,
8165 const utime_t
&mtime
)
8167 stat_set_atime_sec(attr
, atime
.tv
.tv_sec
);
8168 stat_set_atime_nsec(attr
, atime
.tv
.tv_nsec
);
8169 stat_set_mtime_sec(attr
, mtime
.tv
.tv_sec
);
8170 stat_set_mtime_nsec(attr
, mtime
.tv
.tv_nsec
);
8173 // for [l]utime() invoke the timeval variant as the timespec
8174 // variant are not yet implemented. for futime[s](), invoke
8175 // the timespec variant.
8176 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
8177 const UserPerm
& perms
)
8179 struct timeval tv
[2];
8180 tv
[0].tv_sec
= buf
->actime
;
8182 tv
[1].tv_sec
= buf
->modtime
;
8185 return utimes(relpath
, tv
, perms
);
8188 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
8189 const UserPerm
& perms
)
8191 struct timeval tv
[2];
8192 tv
[0].tv_sec
= buf
->actime
;
8194 tv
[1].tv_sec
= buf
->modtime
;
8197 return lutimes(relpath
, tv
, perms
);
8200 int Client::futime(int fd
, struct utimbuf
*buf
, const UserPerm
& perms
)
8202 struct timespec ts
[2];
8203 ts
[0].tv_sec
= buf
->actime
;
8205 ts
[1].tv_sec
= buf
->modtime
;
8208 return futimens(fd
, ts
, perms
);
8211 int Client::utimes(const char *relpath
, struct timeval times
[2],
8212 const UserPerm
& perms
)
8214 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8215 if (!mref_reader
.is_state_satisfied())
8216 return -CEPHFS_ENOTCONN
;
8218 tout(cct
) << __func__
<< std::endl
;
8219 tout(cct
) << relpath
<< std::endl
;
8220 tout(cct
) << "atime: " << times
[0].tv_sec
<< "." << times
[0].tv_usec
8222 tout(cct
) << "mtime: " << times
[1].tv_sec
<< "." << times
[1].tv_usec
8225 filepath
path(relpath
);
8228 std::scoped_lock
lock(client_lock
);
8229 int r
= path_walk(path
, &in
, perms
);
8233 utime_t
atime(times
[0]);
8234 utime_t
mtime(times
[1]);
8236 attr_set_atime_and_mtime(&attr
, atime
, mtime
);
8237 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
8240 int Client::lutimes(const char *relpath
, struct timeval times
[2],
8241 const UserPerm
& perms
)
8243 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8244 if (!mref_reader
.is_state_satisfied())
8245 return -CEPHFS_ENOTCONN
;
8247 tout(cct
) << __func__
<< std::endl
;
8248 tout(cct
) << relpath
<< std::endl
;
8249 tout(cct
) << "atime: " << times
[0].tv_sec
<< "." << times
[0].tv_usec
8251 tout(cct
) << "mtime: " << times
[1].tv_sec
<< "." << times
[1].tv_usec
8254 filepath
path(relpath
);
8257 std::scoped_lock
lock(client_lock
);
8258 int r
= path_walk(path
, &in
, perms
, false);
8262 utime_t
atime(times
[0]);
8263 utime_t
mtime(times
[1]);
8265 attr_set_atime_and_mtime(&attr
, atime
, mtime
);
8266 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
8269 int Client::futimes(int fd
, struct timeval times
[2], const UserPerm
& perms
)
8271 struct timespec ts
[2];
8272 ts
[0].tv_sec
= times
[0].tv_sec
;
8273 ts
[0].tv_nsec
= times
[0].tv_usec
* 1000;
8274 ts
[1].tv_sec
= times
[1].tv_sec
;
8275 ts
[1].tv_nsec
= times
[1].tv_usec
* 1000;
8277 return futimens(fd
, ts
, perms
);
8280 int Client::futimens(int fd
, struct timespec times
[2], const UserPerm
& perms
)
8282 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8283 if (!mref_reader
.is_state_satisfied())
8284 return -CEPHFS_ENOTCONN
;
8286 tout(cct
) << __func__
<< std::endl
;
8287 tout(cct
) << fd
<< std::endl
;
8288 tout(cct
) << "atime: " << times
[0].tv_sec
<< "." << times
[0].tv_nsec
8290 tout(cct
) << "mtime: " << times
[1].tv_sec
<< "." << times
[1].tv_nsec
8293 std::scoped_lock
lock(client_lock
);
8294 Fh
*f
= get_filehandle(fd
);
8296 return -CEPHFS_EBADF
;
8297 #if defined(__linux__) && defined(O_PATH)
8298 if (f
->flags
& O_PATH
)
8299 return -CEPHFS_EBADF
;
8302 utime_t
atime(times
[0]);
8303 utime_t
mtime(times
[1]);
8305 attr_set_atime_and_mtime(&attr
, atime
, mtime
);
8306 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
8309 int Client::utimensat(int dirfd
, const char *relpath
, struct timespec times
[2], int flags
,
8310 const UserPerm
& perms
) {
8311 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8312 if (!mref_reader
.is_state_satisfied()) {
8313 return -CEPHFS_ENOTCONN
;
8316 tout(cct
) << __func__
<< std::endl
;
8317 tout(cct
) << dirfd
<< std::endl
;
8318 tout(cct
) << relpath
<< std::endl
;
8319 tout(cct
) << "atime: " << times
[0].tv_sec
<< "." << times
[0].tv_nsec
8321 tout(cct
) << "mtime: " << times
[1].tv_sec
<< "." << times
[1].tv_nsec
8323 tout(cct
) << flags
<< std::endl
;
8325 filepath
path(relpath
);
8329 std::scoped_lock
lock(client_lock
);
8330 int r
= get_fd_inode(dirfd
, &dirinode
);
8335 #if defined(__linux__) && defined(O_PATH)
8336 if (flags
& O_PATH
) {
8337 return -CEPHFS_EBADF
;
8341 r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), 0, dirinode
);
8346 utime_t
atime(times
[0]);
8347 utime_t
mtime(times
[1]);
8349 attr_set_atime_and_mtime(&attr
, atime
, mtime
);
8350 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
8353 int Client::flock(int fd
, int operation
, uint64_t owner
)
8355 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8356 if (!mref_reader
.is_state_satisfied())
8357 return -CEPHFS_ENOTCONN
;
8359 tout(cct
) << __func__
<< std::endl
;
8360 tout(cct
) << fd
<< std::endl
;
8361 tout(cct
) << operation
<< std::endl
;
8362 tout(cct
) << owner
<< std::endl
;
8364 std::scoped_lock
lock(client_lock
);
8365 Fh
*f
= get_filehandle(fd
);
8367 return -CEPHFS_EBADF
;
8369 return _flock(f
, operation
, owner
);
8372 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
8374 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8375 if (!mref_reader
.is_state_satisfied())
8376 return -CEPHFS_ENOTCONN
;
8378 tout(cct
) << __func__
<< std::endl
;
8379 tout(cct
) << relpath
<< std::endl
;
8381 filepath
path(relpath
);
8384 std::scoped_lock
lock(client_lock
);
8385 int r
= path_walk(path
, &in
, perms
, true);
8388 if (cct
->_conf
->client_permissions
) {
8389 int r
= may_open(in
.get(), O_RDONLY
, perms
);
8393 r
= _opendir(in
.get(), dirpp
, perms
);
8394 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
8395 if (r
!= -CEPHFS_ENOTDIR
)
8396 tout(cct
) << (uintptr_t)*dirpp
<< std::endl
;
8400 int Client::fdopendir(int dirfd
, dir_result_t
**dirpp
, const UserPerm
&perms
) {
8401 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8402 if (!mref_reader
.is_state_satisfied()) {
8403 return -CEPHFS_ENOTCONN
;
8406 tout(cct
) << __func__
<< std::endl
;
8407 tout(cct
) << dirfd
<< std::endl
;
8410 std::scoped_lock
locker(client_lock
);
8411 int r
= get_fd_inode(dirfd
, &dirinode
);
8416 if (cct
->_conf
->client_permissions
) {
8417 r
= may_open(dirinode
.get(), O_RDONLY
, perms
);
8422 r
= _opendir(dirinode
.get(), dirpp
, perms
);
8423 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
8424 if (r
!= -CEPHFS_ENOTDIR
) {
8425 tout(cct
) << (uintptr_t)*dirpp
<< std::endl
;
8430 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
8433 return -CEPHFS_ENOTDIR
;
8434 *dirpp
= new dir_result_t(in
, perms
);
8435 opened_dirs
.insert(*dirpp
);
8436 ldout(cct
, 8) << __func__
<< "(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
8441 int Client::closedir(dir_result_t
*dir
)
8443 tout(cct
) << __func__
<< std::endl
;
8444 tout(cct
) << (uintptr_t)dir
<< std::endl
;
8446 ldout(cct
, 3) << __func__
<< "(" << dir
<< ") = 0" << dendl
;
8447 std::scoped_lock
lock(client_lock
);
8452 void Client::_closedir(dir_result_t
*dirp
)
8454 ldout(cct
, 10) << __func__
<< "(" << dirp
<< ")" << dendl
;
8457 ldout(cct
, 10) << __func__
<< " detaching inode " << dirp
->inode
<< dendl
;
8458 dirp
->inode
.reset();
8460 _readdir_drop_dirp_buffer(dirp
);
8461 opened_dirs
.erase(dirp
);
8465 void Client::rewinddir(dir_result_t
*dirp
)
8467 ldout(cct
, 3) << __func__
<< "(" << dirp
<< ")" << dendl
;
8469 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8470 if (!mref_reader
.is_state_satisfied())
8473 std::scoped_lock
lock(client_lock
);
8474 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
8475 _readdir_drop_dirp_buffer(d
);
8479 loff_t
Client::telldir(dir_result_t
*dirp
)
8481 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
8482 ldout(cct
, 3) << __func__
<< "(" << dirp
<< ") = " << d
->offset
<< dendl
;
8486 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
8488 ldout(cct
, 3) << __func__
<< "(" << dirp
<< ", " << offset
<< ")" << dendl
;
8490 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8491 if (!mref_reader
.is_state_satisfied())
8494 std::scoped_lock
lock(client_lock
);
8496 if (offset
== dirp
->offset
)
8499 if (offset
> dirp
->offset
)
8500 dirp
->release_count
= 0; // bump if we do a forward seek
8502 dirp
->ordered_count
= 0; // disable filling readdir cache
8504 if (dirp
->hash_order()) {
8505 if (dirp
->offset
> offset
) {
8506 _readdir_drop_dirp_buffer(dirp
);
8511 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
8512 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
8513 _readdir_drop_dirp_buffer(dirp
);
8518 dirp
->offset
= offset
;
8523 // ino_t d_ino; /* inode number */
8524 // off_t d_off; /* offset to the next dirent */
8525 // unsigned short d_reclen; /* length of this record */
8526 // unsigned char d_type; /* type of file */
8527 // char d_name[256]; /* filename */
8529 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
8531 strncpy(de
->d_name
, name
, 255);
8532 de
->d_name
[255] = '\0';
8533 #if !defined(__CYGWIN__) && !(defined(_WIN32))
8535 #if !defined(__APPLE__) && !defined(__FreeBSD__)
8536 de
->d_off
= next_off
;
8539 de
->d_type
= IFTODT(type
);
8540 ldout(cct
, 10) << __func__
<< " '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
8541 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
8545 void Client::_readdir_next_frag(dir_result_t
*dirp
)
8547 frag_t fg
= dirp
->buffer_frag
;
8549 if (fg
.is_rightmost()) {
8550 ldout(cct
, 10) << __func__
<< " advance from " << fg
<< " to END" << dendl
;
8557 ldout(cct
, 10) << __func__
<< " advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
8559 if (dirp
->hash_order()) {
8561 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
8562 if (dirp
->offset
< new_offset
) // don't decrease offset
8563 dirp
->offset
= new_offset
;
8565 dirp
->last_name
.clear();
8566 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
8567 _readdir_rechoose_frag(dirp
);
8571 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
8573 ceph_assert(dirp
->inode
);
8575 if (dirp
->hash_order())
8578 frag_t cur
= frag_t(dirp
->offset_high());
8579 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
8581 ldout(cct
, 10) << __func__
<< " frag " << cur
<< " maps to " << fg
<< dendl
;
8582 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
8583 dirp
->last_name
.clear();
8584 dirp
->next_offset
= 2;
8588 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
8590 ldout(cct
, 10) << __func__
<< " " << dirp
<< dendl
;
8591 dirp
->buffer
.clear();
8594 int Client::_readdir_get_frag(dir_result_t
*dirp
)
8597 ceph_assert(dirp
->inode
);
8599 // get the current frag.
8601 if (dirp
->hash_order())
8602 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
8604 fg
= frag_t(dirp
->offset_high());
8606 ldout(cct
, 10) << __func__
<< " " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
8607 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
8609 int op
= CEPH_MDS_OP_READDIR
;
8610 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
8611 op
= CEPH_MDS_OP_LSSNAP
;
8613 InodeRef
& diri
= dirp
->inode
;
8615 MetaRequest
*req
= new MetaRequest(op
);
8617 diri
->make_nosnap_relative_path(path
);
8618 req
->set_filepath(path
);
8619 req
->set_inode(diri
.get());
8620 req
->head
.args
.readdir
.frag
= fg
;
8621 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
8622 if (dirp
->last_name
.length()) {
8623 req
->path2
.set_path(dirp
->last_name
);
8624 } else if (dirp
->hash_order()) {
8625 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
8630 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
8632 if (res
== -CEPHFS_EAGAIN
) {
8633 ldout(cct
, 10) << __func__
<< " got EAGAIN, retrying" << dendl
;
8634 _readdir_rechoose_frag(dirp
);
8635 return _readdir_get_frag(dirp
);
8639 ldout(cct
, 10) << __func__
<< " " << dirp
<< " got frag " << dirp
->buffer_frag
8640 << " size " << dirp
->buffer
.size() << dendl
;
8642 ldout(cct
, 10) << __func__
<< " got error " << res
<< ", setting end flag" << dendl
;
8649 struct dentry_off_lt
{
8650 bool operator()(const Dentry
* dn
, int64_t off
) const {
8651 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
8655 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
8656 int caps
, bool getref
)
8658 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
8659 ldout(cct
, 10) << __func__
<< " " << dirp
<< " on " << dirp
->inode
->ino
8660 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
8662 Dir
*dir
= dirp
->inode
->dir
;
8665 ldout(cct
, 10) << " dir is empty" << dendl
;
8670 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
8671 dir
->readdir_cache
.end(),
8672 dirp
->offset
, dentry_off_lt());
8677 if (!dirp
->inode
->is_complete_and_ordered())
8678 return -CEPHFS_EAGAIN
;
8679 if (pd
== dir
->readdir_cache
.end())
8682 if (dn
->inode
== NULL
) {
8683 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
8687 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
8688 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
8693 int idx
= pd
- dir
->readdir_cache
.begin();
8694 if (dn
->inode
->is_dir()) {
8695 mask
|= CEPH_STAT_RSTAT
;
8697 int r
= _getattr(dn
->inode
, mask
, dirp
->perms
);
8701 // the content of readdir_cache may change after _getattr(), so pd may be invalid iterator
8702 pd
= dir
->readdir_cache
.begin() + idx
;
8703 if (pd
>= dir
->readdir_cache
.end() || *pd
!= dn
)
8704 return -CEPHFS_EAGAIN
;
8706 struct ceph_statx stx
;
8708 fill_statx(dn
->inode
, caps
, &stx
);
8710 uint64_t next_off
= dn
->offset
+ 1;
8711 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
8713 if (pd
== dir
->readdir_cache
.end())
8714 next_off
= dir_result_t::END
;
8718 in
= dn
->inode
.get();
8722 dn_name
= dn
->name
; // fill in name while we have lock
8724 client_lock
.unlock();
8725 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
8727 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
8728 << " = " << r
<< dendl
;
8733 dirp
->offset
= next_off
;
8735 dirp
->next_offset
= 2;
8737 dirp
->next_offset
= dirp
->offset_low();
8738 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
8739 dirp
->release_count
= 0; // last_name no longer match cache index
8744 ldout(cct
, 10) << __func__
<< " " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
8749 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
8750 unsigned want
, unsigned flags
, bool getref
)
8752 int caps
= statx_to_mask(flags
, want
);
8754 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
8755 if (!mref_reader
.is_state_satisfied())
8756 return -CEPHFS_ENOTCONN
;
8758 std::unique_lock
cl(client_lock
);
8760 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
8762 ldout(cct
, 10) << __func__
<< " " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
8763 << dec
<< " at_end=" << dirp
->at_end()
8764 << " hash_order=" << dirp
->hash_order() << dendl
;
8767 struct ceph_statx stx
;
8768 memset(&de
, 0, sizeof(de
));
8769 memset(&stx
, 0, sizeof(stx
));
8771 InodeRef
& diri
= dirp
->inode
;
8776 if (dirp
->offset
== 0) {
8777 ldout(cct
, 15) << " including ." << dendl
;
8778 ceph_assert(diri
->dentries
.size() < 2); // can't have multiple hard-links to a dir
8779 uint64_t next_off
= 1;
8782 r
= _getattr(diri
, caps
| CEPH_STAT_RSTAT
, dirp
->perms
);
8786 fill_statx(diri
, caps
, &stx
);
8787 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
8789 Inode
*inode
= NULL
;
8796 r
= cb(p
, &de
, &stx
, next_off
, inode
);
8801 dirp
->offset
= next_off
;
8805 if (dirp
->offset
== 1) {
8806 ldout(cct
, 15) << " including .." << dendl
;
8807 uint64_t next_off
= 2;
8809 if (diri
->dentries
.empty())
8812 in
= diri
->get_first_parent()->dir
->parent_inode
;
8815 r
= _getattr(in
, caps
| CEPH_STAT_RSTAT
, dirp
->perms
);
8819 fill_statx(in
, caps
, &stx
);
8820 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
8822 Inode
*inode
= NULL
;
8829 r
= cb(p
, &de
, &stx
, next_off
, inode
);
8834 dirp
->offset
= next_off
;
8839 // can we read from our cache?
8840 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
8841 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
8842 << dirp
->inode
->is_complete_and_ordered()
8843 << " issued " << ccap_string(dirp
->inode
->caps_issued())
8845 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
8846 dirp
->inode
->is_complete_and_ordered() &&
8847 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
, true)) {
8848 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
8849 if (err
!= -CEPHFS_EAGAIN
)
8857 bool check_caps
= true;
8858 if (!dirp
->is_cached()) {
8859 int r
= _readdir_get_frag(dirp
);
8862 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
8863 // different than the requested one. (our dirfragtree was outdated)
8866 frag_t fg
= dirp
->buffer_frag
;
8868 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
8869 << " offset " << hex
<< dirp
->offset
<< dendl
;
8871 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
8872 dirp
->offset
, dir_result_t::dentry_off_lt());
8873 it
!= dirp
->buffer
.end();
8875 dir_result_t::dentry
&entry
= *it
;
8877 uint64_t next_off
= entry
.offset
+ 1;
8882 if(entry
.inode
->is_dir()){
8883 mask
|= CEPH_STAT_RSTAT
;
8885 r
= _getattr(entry
.inode
, mask
, dirp
->perms
);
8890 fill_statx(entry
.inode
, caps
, &stx
);
8891 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
8893 Inode
*inode
= NULL
;
8895 inode
= entry
.inode
.get();
8900 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
8903 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
8904 << " = " << r
<< dendl
;
8908 dirp
->offset
= next_off
;
8913 if (dirp
->next_offset
> 2) {
8914 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
8915 _readdir_drop_dirp_buffer(dirp
);
8919 if (!fg
.is_rightmost()) {
8921 _readdir_next_frag(dirp
);
8925 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
8926 diri
->dir_release_count
== dirp
->release_count
) {
8927 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
8928 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
8930 ceph_assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
8931 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
8933 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
8935 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
8936 diri
->flags
|= I_COMPLETE
;
8948 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
8950 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
8957 * 1 if we got a dirent
8958 * 0 for end of directory
8962 struct single_readdir
{
8964 struct ceph_statx
*stx
;
8969 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
8970 struct ceph_statx
*stx
, off_t off
,
8973 single_readdir
*c
= static_cast<single_readdir
*>(p
);
8976 return -1; // already filled this dirent
8986 struct dirent
*Client::readdir(dir_result_t
*d
)
8996 // our callback fills the dirent and sets sr.full=true on first
8997 // call, and returns -1 the second time around.
8998 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
9000 errno
= -ret
; // this sucks.
9001 return (dirent
*) NULL
;
9006 return (dirent
*) NULL
;
9009 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
9010 struct ceph_statx
*stx
, unsigned want
,
9011 unsigned flags
, Inode
**out
)
9019 // our callback fills the dirent and sets sr.full=true on first
9020 // call, and returns -1 the second time around.
9021 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
9033 struct getdents_result
{
9040 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
9041 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
9043 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
9049 dlen
= strlen(de
->d_name
) + 1;
9051 if (c
->pos
+ dlen
> c
->buflen
)
9052 return -1; // doesn't fit
9055 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
9057 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
9063 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
9068 gr
.fullent
= fullent
;
9071 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
9073 if (r
< 0) { // some error
9074 if (r
== -1) { // buffer ran out of space
9075 if (gr
.pos
) { // but we got some entries already!
9077 } // or we need a larger buffer
9078 return -CEPHFS_ERANGE
;
9079 } else { // actual error, return it
9088 struct getdir_result
{
9089 list
<string
> *contents
;
9093 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
9095 getdir_result
*r
= static_cast<getdir_result
*>(p
);
9097 r
->contents
->push_back(de
->d_name
);
9102 int Client::getdir(const char *relpath
, list
<string
>& contents
,
9103 const UserPerm
& perms
)
9105 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
9106 tout(cct
) << "getdir" << std::endl
;
9107 tout(cct
) << relpath
<< std::endl
;
9110 int r
= opendir(relpath
, &d
, perms
);
9115 gr
.contents
= &contents
;
9117 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
9127 /****** file i/o **********/
9129 // common parts for open and openat. call with client_lock locked.
9130 int Client::create_and_open(std::optional
<int> dirfd
, const char *relpath
, int flags
,
9131 const UserPerm
& perms
, mode_t mode
, int stripe_unit
,
9132 int stripe_count
, int object_size
, const char *data_pool
,
9133 std::string alternate_name
) {
9134 ceph_assert(ceph_mutex_is_locked(client_lock
));
9135 int cflags
= ceph_flags_sys2wire(flags
);
9136 tout(cct
) << cflags
<< std::endl
;
9140 #if defined(__linux__) && defined(O_PATH)
9141 /* When the O_PATH is being specified, others flags than O_DIRECTORY
9142 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
9143 * in kernel (fs/open.c). */
9145 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
9148 filepath
path(relpath
);
9150 bool created
= false;
9151 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
9152 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
9153 int mask
= ceph_caps_for_mode(ceph_flags_to_mode(cflags
));
9155 InodeRef dirinode
= nullptr;
9157 int r
= get_fd_inode(*dirfd
, &dirinode
);
9163 int r
= path_walk(path
, &in
, perms
, followsym
, mask
, dirinode
);
9164 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
9165 return -CEPHFS_EEXIST
;
9167 #if defined(__linux__) && defined(O_PATH)
9168 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
9170 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
9172 return -CEPHFS_ELOOP
;
9174 if (r
== -CEPHFS_ENOENT
&& (flags
& O_CREAT
)) {
9175 filepath dirpath
= path
;
9176 string dname
= dirpath
.last_dentry();
9177 dirpath
.pop_dentry();
9179 r
= path_walk(dirpath
, &dir
, perms
, true,
9180 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0, dirinode
);
9184 if (cct
->_conf
->client_permissions
) {
9185 r
= may_create(dir
.get(), perms
);
9189 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
9190 stripe_count
, object_size
, data_pool
, &created
, perms
,
9191 std::move(alternate_name
));
9197 // posix says we can only check permissions of existing files
9198 if (cct
->_conf
->client_permissions
) {
9199 r
= may_open(in
.get(), flags
, perms
);
9206 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
9208 // allocate a integer file descriptor
9211 ceph_assert(fd_map
.count(r
) == 0);
9219 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
9220 mode_t mode
, int stripe_unit
, int stripe_count
,
9221 int object_size
, const char *data_pool
, std::string alternate_name
)
9223 return openat(CEPHFS_AT_FDCWD
, relpath
, flags
, perms
, mode
, stripe_unit
,
9224 stripe_count
, object_size
, data_pool
, alternate_name
);
9227 int Client::_openat(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perms
,
9228 mode_t mode
, std::string alternate_name
) {
9229 return create_and_open(dirfd
, relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
, alternate_name
);
9232 int Client::openat(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perms
,
9233 mode_t mode
, int stripe_unit
, int stripe_count
, int object_size
,
9234 const char *data_pool
, std::string alternate_name
) {
9235 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9236 if (!mref_reader
.is_state_satisfied()) {
9237 return -CEPHFS_ENOTCONN
;
9240 ldout(cct
, 3) << "openat enter(" << relpath
<< ")" << dendl
;
9241 tout(cct
) << dirfd
<< std::endl
;
9242 tout(cct
) << relpath
<< std::endl
;
9243 tout(cct
) << flags
<< std::endl
;
9244 tout(cct
) << mode
<< std::endl
;
9246 std::scoped_lock
locker(client_lock
);
9247 int r
= create_and_open(dirfd
, relpath
, flags
, perms
, mode
, stripe_unit
, stripe_count
,
9248 object_size
, data_pool
, alternate_name
);
9250 tout(cct
) << r
<< std::endl
;
9251 ldout(cct
, 3) << "openat exit(" << relpath
<< ")" << dendl
;
9255 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
9256 const UserPerm
& perms
)
9258 ldout(cct
, 3) << __func__
<< " enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
9260 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9261 if (!mref_reader
.is_state_satisfied())
9262 return -CEPHFS_ENOTCONN
;
9264 std::scoped_lock
lock(client_lock
);
9265 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
9267 req
->set_filepath(path
);
9269 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
9271 sprintf(f
, "%u", h
);
9272 filepath
path2(dirino
);
9273 path2
.push_dentry(string(f
));
9274 req
->set_filepath2(path2
);
9276 int r
= make_request(req
, perms
, NULL
, NULL
,
9277 rand() % mdsmap
->get_num_in_mds());
9278 ldout(cct
, 3) << __func__
<< " exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
9284 * Load inode into local cache.
9286 * If inode pointer is non-NULL, and take a reference on
9287 * the resulting Inode object in one operation, so that caller
9288 * can safely assume inode will still be there after return.
9290 int Client::_lookup_vino(vinodeno_t vino
, const UserPerm
& perms
, Inode
**inode
)
9292 ldout(cct
, 8) << __func__
<< " enter(" << vino
<< ")" << dendl
;
9294 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9295 if (!mref_reader
.is_state_satisfied())
9296 return -CEPHFS_ENOTCONN
;
9298 if (is_reserved_vino(vino
))
9299 return -CEPHFS_ESTALE
;
9301 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
9302 filepath
path(vino
.ino
);
9303 req
->set_filepath(path
);
9306 * The MDS expects either a "real" snapid here or 0. The special value
9307 * carveouts for the snapid are all at the end of the range so we can
9308 * just look for any snapid below this value.
9310 if (vino
.snapid
< CEPH_NOSNAP
)
9311 req
->head
.args
.lookupino
.snapid
= vino
.snapid
;
9313 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
9314 if (r
== 0 && inode
!= NULL
) {
9315 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
9316 ceph_assert(p
!= inode_map
.end());
9320 ldout(cct
, 8) << __func__
<< " exit(" << vino
<< ") = " << r
<< dendl
;
9324 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
9326 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
9327 std::scoped_lock
lock(client_lock
);
9328 return _lookup_vino(vino
, perms
, inode
);
9332 * Find the parent inode of `ino` and insert it into
9333 * our cache. Conditionally also set `parent` to a referenced
9334 * Inode* if caller provides non-NULL value.
9336 int Client::_lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
9338 ldout(cct
, 8) << __func__
<< " enter(" << ino
->ino
<< ")" << dendl
;
9340 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
9341 filepath
path(ino
->ino
);
9342 req
->set_filepath(path
);
9345 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
9346 // Give caller a reference to the parent ino if they provided a pointer.
9347 if (parent
!= NULL
) {
9349 *parent
= target
.get();
9351 ldout(cct
, 8) << __func__
<< " found parent " << (*parent
)->ino
<< dendl
;
9356 ldout(cct
, 8) << __func__
<< " exit(" << ino
->ino
<< ") = " << r
<< dendl
;
9361 * Populate the parent dentry for `ino`, provided it is
9362 * a child of `parent`.
9364 int Client::_lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
9366 ceph_assert(parent
->is_dir());
9367 ldout(cct
, 3) << __func__
<< " enter(" << ino
->ino
<< ")" << dendl
;
9369 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9370 if (!mref_reader
.is_state_satisfied())
9371 return -CEPHFS_ENOTCONN
;
9373 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9374 req
->set_filepath2(filepath(parent
->ino
));
9375 req
->set_filepath(filepath(ino
->ino
));
9376 req
->set_inode(ino
);
9378 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
9379 ldout(cct
, 3) << __func__
<< " exit(" << ino
->ino
<< ") = " << r
<< dendl
;
9383 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
9385 std::scoped_lock
lock(client_lock
);
9386 return _lookup_name(ino
, parent
, perms
);
9389 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
9392 Fh
*f
= new Fh(in
, flags
, cmode
, fd_gen
, perms
);
9394 ldout(cct
, 10) << __func__
<< " " << in
->ino
<< " mode " << cmode
<< dendl
;
9396 if (in
->snapid
!= CEPH_NOSNAP
) {
9397 in
->snap_cap_refs
++;
9398 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
9399 << ccap_string(in
->caps_issued()) << dendl
;
9402 const auto& conf
= cct
->_conf
;
9403 f
->readahead
.set_trigger_requests(1);
9404 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
9405 uint64_t max_readahead
= Readahead::NO_LIMIT
;
9406 if (conf
->client_readahead_max_bytes
) {
9407 max_readahead
= std::min(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
9409 if (conf
->client_readahead_max_periods
) {
9410 max_readahead
= std::min(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
9412 f
->readahead
.set_max_readahead_size(max_readahead
);
9413 vector
<uint64_t> alignments
;
9414 alignments
.push_back(in
->layout
.get_period());
9415 alignments
.push_back(in
->layout
.stripe_unit
);
9416 f
->readahead
.set_alignments(alignments
);
9421 int Client::_release_fh(Fh
*f
)
9423 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
9424 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
9425 Inode
*in
= f
->inode
.get();
9426 ldout(cct
, 8) << __func__
<< " " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
9430 if (in
->snapid
== CEPH_NOSNAP
) {
9431 if (in
->put_open_ref(f
->mode
)) {
9432 _flush(in
, new C_Client_FlushComplete(this, in
));
9436 ceph_assert(in
->snap_cap_refs
> 0);
9437 in
->snap_cap_refs
--;
9440 _release_filelocks(f
);
9442 // Finally, read any async err (i.e. from flushes)
9443 int err
= f
->take_async_err();
9445 ldout(cct
, 1) << __func__
<< " " << f
<< " on inode " << *in
<< " caught async_err = "
9446 << cpp_strerror(err
) << dendl
;
9448 ldout(cct
, 10) << __func__
<< " " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9456 void Client::_put_fh(Fh
*f
)
9458 int left
= f
->put();
9464 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
9465 const UserPerm
& perms
)
9467 if (in
->snapid
!= CEPH_NOSNAP
&&
9468 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
9469 return -CEPHFS_EROFS
;
9472 // use normalized flags to generate cmode
9473 int cflags
= ceph_flags_sys2wire(flags
);
9474 if (cct
->_conf
.get_val
<bool>("client_force_lazyio"))
9475 cflags
|= CEPH_O_LAZY
;
9477 int cmode
= ceph_flags_to_mode(cflags
);
9478 int want
= ceph_caps_for_mode(cmode
);
9481 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
9483 if ((flags
& O_TRUNC
) == 0 && in
->caps_issued_mask(want
)) {
9485 check_caps(in
, CHECK_CAPS_NODELAY
);
9488 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
9490 in
->make_nosnap_relative_path(path
);
9491 req
->set_filepath(path
);
9492 req
->head
.args
.open
.flags
= cflags
& ~CEPH_O_CREAT
;
9493 req
->head
.args
.open
.mode
= mode
;
9494 req
->head
.args
.open
.pool
= -1;
9495 if (cct
->_conf
->client_debug_getattr_caps
)
9496 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
9498 req
->head
.args
.open
.mask
= 0;
9499 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
9501 result
= make_request(req
, perms
);
9504 * NFS expects that delegations will be broken on a conflicting open,
9505 * not just when there is actual conflicting access to the file. SMB leases
9506 * and oplocks also have similar semantics.
9508 * Ensure that clients that have delegations enabled will wait on minimal
9509 * caps during open, just to ensure that other clients holding delegations
9510 * return theirs first.
9512 if (deleg_timeout
&& result
== 0) {
9515 if (cmode
& CEPH_FILE_MODE_WR
)
9516 need
|= CEPH_CAP_FILE_WR
;
9517 if (cmode
& CEPH_FILE_MODE_RD
)
9518 need
|= CEPH_CAP_FILE_RD
;
9520 Fh
fh(in
, flags
, cmode
, fd_gen
, perms
);
9521 result
= get_caps(&fh
, need
, want
, &have
, -1);
9523 ldout(cct
, 8) << "Unable to get caps after open of inode " << *in
<<
9524 " . Denying open: " <<
9525 cpp_strerror(result
) << dendl
;
9527 put_cap_ref(in
, need
);
9535 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
9537 in
->put_open_ref(cmode
);
9545 int Client::_renew_caps(Inode
*in
)
9547 int wanted
= in
->caps_file_wanted();
9548 if (in
->is_any_caps() &&
9549 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
9550 check_caps(in
, CHECK_CAPS_NODELAY
);
9555 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
9557 else if (wanted
& CEPH_CAP_FILE_RD
)
9559 else if (wanted
& CEPH_CAP_FILE_WR
)
9562 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
9564 in
->make_nosnap_relative_path(path
);
9565 req
->set_filepath(path
);
9566 req
->head
.args
.open
.flags
= flags
;
9567 req
->head
.args
.open
.pool
= -1;
9568 if (cct
->_conf
->client_debug_getattr_caps
)
9569 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
9571 req
->head
.args
.open
.mask
= 0;
9574 // duplicate in case Cap goes away; not sure if that race is a concern?
9575 const UserPerm
*pperm
= in
->get_best_perms();
9579 int ret
= make_request(req
, perms
);
9583 int Client::_close(int fd
)
9585 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
9586 tout(cct
) << "close" << std::endl
;
9587 tout(cct
) << fd
<< std::endl
;
9589 Fh
*fh
= get_filehandle(fd
);
9591 return -CEPHFS_EBADF
;
9592 int err
= _release_fh(fh
);
9595 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
9599 int Client::close(int fd
) {
9600 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9601 if (!mref_reader
.is_state_satisfied())
9602 return -CEPHFS_ENOTCONN
;
9604 std::scoped_lock
lock(client_lock
);
9611 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
9613 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9614 if (!mref_reader
.is_state_satisfied())
9615 return -CEPHFS_ENOTCONN
;
9617 tout(cct
) << "lseek" << std::endl
;
9618 tout(cct
) << fd
<< std::endl
;
9619 tout(cct
) << offset
<< std::endl
;
9620 tout(cct
) << whence
<< std::endl
;
9622 std::scoped_lock
lock(client_lock
);
9623 Fh
*f
= get_filehandle(fd
);
9625 return -CEPHFS_EBADF
;
9626 #if defined(__linux__) && defined(O_PATH)
9627 if (f
->flags
& O_PATH
)
9628 return -CEPHFS_EBADF
;
9630 return _lseek(f
, offset
, whence
);
9633 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
9635 Inode
*in
= f
->inode
.get();
9636 bool whence_check
= false;
9641 whence_check
= true;
9646 whence_check
= true;
9652 whence_check
= true;
9658 int r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
9669 pos
= f
->pos
+ offset
;
9673 pos
= in
->size
+ offset
;
9678 if (offset
< 0 || static_cast<uint64_t>(offset
) >= in
->size
)
9679 return -CEPHFS_ENXIO
;
9686 if (offset
< 0 || static_cast<uint64_t>(offset
) >= in
->size
)
9687 return -CEPHFS_ENXIO
;
9693 ldout(cct
, 1) << __func__
<< ": invalid whence value " << whence
<< dendl
;
9694 return -CEPHFS_EINVAL
;
9698 return -CEPHFS_EINVAL
;
9703 ldout(cct
, 8) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
9708 void Client::lock_fh_pos(Fh
*f
)
9710 ldout(cct
, 10) << __func__
<< " " << f
<< dendl
;
9712 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
9713 ceph::condition_variable cond
;
9714 f
->pos_waiters
.push_back(&cond
);
9715 ldout(cct
, 10) << __func__
<< " BLOCKING on " << f
<< dendl
;
9716 std::unique_lock l
{client_lock
, std::adopt_lock
};
9717 cond
.wait(l
, [f
, me
=&cond
] {
9718 return !f
->pos_locked
&& f
->pos_waiters
.front() == me
;
9721 ldout(cct
, 10) << __func__
<< " UNBLOCKING on " << f
<< dendl
;
9722 ceph_assert(f
->pos_waiters
.front() == &cond
);
9723 f
->pos_waiters
.pop_front();
9726 f
->pos_locked
= true;
9729 void Client::unlock_fh_pos(Fh
*f
)
9731 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
9733 ldout(cct
, 10) << __func__
<< " " << f
<< dendl
;
9734 f
->pos_locked
= false;
9735 if (!f
->pos_waiters
.empty()) {
9736 // only wake up the oldest waiter
9737 auto cond
= f
->pos_waiters
.front();
9742 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
9744 if (!in
->inline_data
.length()) {
9745 onfinish
->complete(0);
9750 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
9751 object_t oid
= oid_buf
;
9753 ObjectOperation create_ops
;
9754 create_ops
.create(false);
9756 objecter
->mutate(oid
,
9757 OSDMap::file_to_object_locator(in
->layout
),
9759 in
->snaprealm
->get_snap_context(),
9760 ceph::real_clock::now(),
9764 bufferlist inline_version_bl
;
9765 encode(in
->inline_version
, inline_version_bl
);
9767 ObjectOperation uninline_ops
;
9768 uninline_ops
.cmpxattr("inline_version",
9769 CEPH_OSD_CMPXATTR_OP_GT
,
9770 CEPH_OSD_CMPXATTR_MODE_U64
,
9772 bufferlist inline_data
= in
->inline_data
;
9773 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
9774 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
9776 objecter
->mutate(oid
,
9777 OSDMap::file_to_object_locator(in
->layout
),
9779 in
->snaprealm
->get_snap_context(),
9780 ceph::real_clock::now(),
9789 // blocking osd interface
9791 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
9793 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
9794 if (!mref_reader
.is_state_satisfied())
9795 return -CEPHFS_ENOTCONN
;
9797 tout(cct
) << "read" << std::endl
;
9798 tout(cct
) << fd
<< std::endl
;
9799 tout(cct
) << size
<< std::endl
;
9800 tout(cct
) << offset
<< std::endl
;
9802 std::unique_lock
lock(client_lock
);
9803 Fh
*f
= get_filehandle(fd
);
9805 return -CEPHFS_EBADF
;
9806 #if defined(__linux__) && defined(O_PATH)
9807 if (f
->flags
& O_PATH
)
9808 return -CEPHFS_EBADF
;
9811 /* We can't return bytes written larger than INT_MAX, clamp size to that */
9812 size
= std::min(size
, (loff_t
)INT_MAX
);
9813 int r
= _read(f
, offset
, size
, &bl
);
9814 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
9817 bl
.begin().copy(bl
.length(), buf
);
9823 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
9826 return -CEPHFS_EINVAL
;
9827 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
9830 int64_t Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
9832 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
9835 bool movepos
= false;
9836 std::unique_ptr
<C_SaferCond
> onuninline
;
9838 const auto& conf
= cct
->_conf
;
9839 Inode
*in
= f
->inode
.get();
9841 utime_t start
= ceph_clock_now();
9843 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
9844 return -CEPHFS_EBADF
;
9845 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
9852 loff_t start_pos
= offset
;
9854 if (in
->inline_version
== 0) {
9855 auto r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
9860 ceph_assert(in
->inline_version
> 0);
9864 if (f
->mode
& CEPH_FILE_MODE_LAZY
)
9865 want
= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
;
9867 want
= CEPH_CAP_FILE_CACHE
;
9869 auto r
= get_caps(f
, CEPH_CAP_FILE_RD
, want
, &have
, -1);
9875 if (f
->flags
& O_DIRECT
)
9876 have
&= ~(CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
);
9878 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9879 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
9880 onuninline
.reset(new C_SaferCond("Client::_read_uninline_data flock"));
9881 uninline_data(in
, onuninline
.get());
9883 uint32_t len
= in
->inline_data
.length();
9884 uint64_t endoff
= offset
+ size
;
9885 if (endoff
> in
->size
)
9889 if (endoff
<= len
) {
9890 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
9892 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
9893 bl
->append_zero(endoff
- len
);
9895 rc
= endoff
- offset
;
9896 } else if ((uint64_t)offset
< endoff
) {
9897 bl
->append_zero(endoff
- offset
);
9898 rc
= endoff
- offset
;
9906 if (!conf
->client_debug_force_sync_read
&&
9908 (have
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
))) {
9910 if (f
->flags
& O_RSYNC
) {
9911 _flush_range(in
, offset
, size
);
9913 rc
= _read_async(f
, offset
, size
, bl
);
9917 if (f
->flags
& O_DIRECT
)
9918 _flush_range(in
, offset
, size
);
9920 bool checkeof
= false;
9921 rc
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
9928 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
9932 auto r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
9940 if ((uint64_t)offset
< in
->size
)
9946 ceph_assert(rc
>= 0);
9949 f
->pos
= start_pos
+ rc
;
9952 lat
= ceph_clock_now();
9954 logger
->tinc(l_c_read
, lat
);
9960 client_lock
.unlock();
9961 int ret
= onuninline
->wait();
9963 if (ret
>= 0 || ret
== -CEPHFS_ECANCELED
) {
9964 in
->inline_data
.clear();
9965 in
->inline_version
= CEPH_INLINE_NONE
;
9966 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
9972 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
9980 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
9983 f
->readahead
.inc_pending();
9986 Client::C_Readahead::~C_Readahead() {
9987 f
->readahead
.dec_pending();
9991 void Client::C_Readahead::finish(int r
) {
9992 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
9993 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
9996 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
9998 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
10000 const auto& conf
= cct
->_conf
;
10001 Inode
*in
= f
->inode
.get();
10003 ldout(cct
, 10) << __func__
<< " " << *in
<< " " << off
<< "~" << len
<< dendl
;
10005 // trim read based on file size?
10006 if (off
>= in
->size
)
10010 if (off
+ len
> in
->size
) {
10011 len
= in
->size
- off
;
10014 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
10015 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
10016 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
10018 // read (and possibly block)
10020 C_SaferCond
onfinish("Client::_read_async flock");
10021 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
10022 off
, len
, bl
, 0, &onfinish
);
10024 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
10025 client_lock
.unlock();
10026 r
= onfinish
.wait();
10027 client_lock
.lock();
10028 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
10031 if(f
->readahead
.get_min_readahead_size() > 0) {
10032 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
10033 if (readahead_extent
.second
> 0) {
10034 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
10035 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
10036 Context
*onfinish2
= new C_Readahead(this, f
);
10037 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
10038 readahead_extent
.first
, readahead_extent
.second
,
10039 NULL
, 0, onfinish2
);
10041 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
10042 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
10044 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
10053 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
10056 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
10058 Inode
*in
= f
->inode
.get();
10059 uint64_t pos
= off
;
10063 ldout(cct
, 10) << __func__
<< " " << *in
<< " " << off
<< "~" << len
<< dendl
;
10065 // 0 success, 1 continue and < 0 error happen.
10066 auto wait_and_copy
= [&](C_SaferCond
&onfinish
, bufferlist
&tbl
, int wanted
) {
10067 int r
= onfinish
.wait();
10069 // if we get ENOENT from OSD, assume 0 bytes returned
10070 if (r
== -CEPHFS_ENOENT
)
10075 if (tbl
.length()) {
10081 bl
->claim_append(tbl
);
10084 if (r
>= 0 && r
< wanted
) {
10085 if (pos
< in
->size
) {
10086 // zero up to known EOF
10087 int64_t some
= in
->size
- pos
;
10090 auto z
= buffer::ptr_node::create(some
);
10092 bl
->push_back(std::move(z
));
10107 C_SaferCond
onfinish("Client::_read_sync flock");
10111 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
10112 pos
, left
, &tbl
, 0,
10113 in
->truncate_size
, in
->truncate_seq
,
10115 client_lock
.unlock();
10116 int r
= wait_and_copy(onfinish
, tbl
, wanted
);
10117 client_lock
.lock();
10126 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
10128 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10129 if (!mref_reader
.is_state_satisfied())
10130 return -CEPHFS_ENOTCONN
;
10132 tout(cct
) << "write" << std::endl
;
10133 tout(cct
) << fd
<< std::endl
;
10134 tout(cct
) << size
<< std::endl
;
10135 tout(cct
) << offset
<< std::endl
;
10137 std::scoped_lock
lock(client_lock
);
10138 Fh
*fh
= get_filehandle(fd
);
10140 return -CEPHFS_EBADF
;
10141 #if defined(__linux__) && defined(O_PATH)
10142 if (fh
->flags
& O_PATH
)
10143 return -CEPHFS_EBADF
;
10145 /* We can't return bytes written larger than INT_MAX, clamp size to that */
10146 size
= std::min(size
, (loff_t
)INT_MAX
);
10147 int r
= _write(fh
, offset
, size
, buf
, NULL
, false);
10148 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
10152 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
10155 return -CEPHFS_EINVAL
;
10156 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
10159 int64_t Client::_preadv_pwritev_locked(Fh
*fh
, const struct iovec
*iov
,
10160 unsigned iovcnt
, int64_t offset
, bool write
,
10161 bool clamp_to_int
, std::unique_lock
<ceph::mutex
> &cl
)
10163 #if defined(__linux__) && defined(O_PATH)
10164 if (fh
->flags
& O_PATH
)
10165 return -CEPHFS_EBADF
;
10167 loff_t totallen
= 0;
10168 for (unsigned i
= 0; i
< iovcnt
; i
++) {
10169 totallen
+= iov
[i
].iov_len
;
10173 * Some of the API functions take 64-bit size values, but only return
10174 * 32-bit signed integers. Clamp the I/O sizes in those functions so that
10175 * we don't do I/Os larger than the values we can return.
10177 if (clamp_to_int
) {
10178 totallen
= std::min(totallen
, (loff_t
)INT_MAX
);
10181 int64_t w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
10182 ldout(cct
, 3) << "pwritev(" << fh
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
10186 int64_t r
= _read(fh
, offset
, totallen
, &bl
);
10187 ldout(cct
, 3) << "preadv(" << fh
<< ", " << offset
<< ") = " << r
<< dendl
;
10192 auto iter
= bl
.cbegin();
10193 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
10195 * This piece of code aims to handle the case that bufferlist
10196 * does not have enough data to fill in the iov
10198 const auto round_size
= std::min
<unsigned>(resid
, iov
[j
].iov_len
);
10199 iter
.copy(round_size
, reinterpret_cast<char*>(iov
[j
].iov_base
));
10200 resid
-= round_size
;
10201 /* iter is self-updating */
10208 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
10210 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10211 if (!mref_reader
.is_state_satisfied())
10212 return -CEPHFS_ENOTCONN
;
10214 tout(cct
) << fd
<< std::endl
;
10215 tout(cct
) << offset
<< std::endl
;
10217 std::unique_lock
cl(client_lock
);
10218 Fh
*fh
= get_filehandle(fd
);
10220 return -CEPHFS_EBADF
;
10221 return _preadv_pwritev_locked(fh
, iov
, iovcnt
, offset
, write
, true, cl
);
10224 int64_t Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
10225 const struct iovec
*iov
, int iovcnt
)
10227 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
10231 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
10232 return -CEPHFS_EFBIG
;
10234 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
10235 Inode
*in
= f
->inode
.get();
10237 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
10238 return -CEPHFS_ENOSPC
;
10241 ceph_assert(in
->snapid
== CEPH_NOSNAP
);
10243 // was Fh opened as writeable?
10244 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
10245 return -CEPHFS_EBADF
;
10247 // use/adjust fd pos?
10251 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
10252 * change out from under us.
10254 if (f
->flags
& O_APPEND
) {
10255 auto r
= _lseek(f
, 0, SEEK_END
);
10262 fpos
= offset
+size
;
10267 uint64_t endoff
= offset
+ size
;
10268 if (endoff
> in
->size
&& is_quota_bytes_exceeded(in
, endoff
- in
->size
,
10270 return -CEPHFS_EDQUOT
;
10273 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
10275 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
10278 utime_t start
= ceph_clock_now();
10280 if (in
->inline_version
== 0) {
10281 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
10284 ceph_assert(in
->inline_version
> 0);
10287 // copy into fresh buffer (since our write may be resub, async)
10291 bl
.append(buf
, size
);
10293 for (int i
= 0; i
< iovcnt
; i
++) {
10294 if (iov
[i
].iov_len
> 0) {
10295 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
10301 uint64_t totalwritten
;
10303 if (f
->mode
& CEPH_FILE_MODE_LAZY
)
10304 want
= CEPH_CAP_FILE_BUFFER
| CEPH_CAP_FILE_LAZYIO
;
10306 want
= CEPH_CAP_FILE_BUFFER
;
10307 int r
= get_caps(f
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
, want
, &have
, endoff
);
10311 /* clear the setuid/setgid bits, if any */
10312 if (unlikely(in
->mode
& (S_ISUID
|S_ISGID
)) && size
> 0) {
10313 struct ceph_statx stx
= { 0 };
10315 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
10316 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
10320 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
10323 if (f
->flags
& O_DIRECT
)
10324 have
&= ~(CEPH_CAP_FILE_BUFFER
| CEPH_CAP_FILE_LAZYIO
);
10326 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
10328 std::unique_ptr
<C_SaferCond
> onuninline
= nullptr;
10330 if (in
->inline_version
< CEPH_INLINE_NONE
) {
10331 if (endoff
> cct
->_conf
->client_max_inline_size
||
10332 endoff
> CEPH_INLINE_MAX_SIZE
||
10333 !(have
& CEPH_CAP_FILE_BUFFER
)) {
10334 onuninline
.reset(new C_SaferCond("Client::_write_uninline_data flock"));
10335 uninline_data(in
, onuninline
.get());
10337 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10339 uint32_t len
= in
->inline_data
.length();
10342 in
->inline_data
.begin(endoff
).copy(len
- endoff
, bl
); // XXX
10345 in
->inline_data
.splice(offset
, len
- offset
);
10346 else if (offset
> len
)
10347 in
->inline_data
.append_zero(offset
- len
);
10349 in
->inline_data
.append(bl
);
10350 in
->inline_version
++;
10352 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10358 if (cct
->_conf
->client_oc
&&
10359 (have
& (CEPH_CAP_FILE_BUFFER
| CEPH_CAP_FILE_LAZYIO
))) {
10360 // do buffered write
10361 if (!in
->oset
.dirty_or_tx
)
10362 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
10364 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10366 // async, caching, non-blocking.
10367 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
10368 in
->snaprealm
->get_snap_context(),
10369 offset
, size
, bl
, ceph::real_clock::now(),
10371 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10376 // flush cached write if O_SYNC is set on file fh
10377 // O_DSYNC == O_SYNC on linux < 2.6.33
10378 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
10379 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
10380 _flush_range(in
, offset
, size
);
10383 if (f
->flags
& O_DIRECT
)
10384 _flush_range(in
, offset
, size
);
10386 // simple, non-atomic sync write
10387 C_SaferCond
onfinish("Client::_write flock");
10388 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10390 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
10391 offset
, size
, bl
, ceph::real_clock::now(), 0,
10392 in
->truncate_size
, in
->truncate_seq
,
10394 client_lock
.unlock();
10395 r
= onfinish
.wait();
10396 client_lock
.lock();
10397 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
10402 // if we get here, write was successful, update client metadata
10405 lat
= ceph_clock_now();
10407 logger
->tinc(l_c_wrlat
, lat
);
10414 totalwritten
= size
;
10415 r
= (int64_t)totalwritten
;
10418 if (totalwritten
+ offset
> in
->size
) {
10419 in
->size
= totalwritten
+ offset
;
10420 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
10422 if (is_quota_bytes_approaching(in
, f
->actor_perms
)) {
10423 check_caps(in
, CHECK_CAPS_NODELAY
);
10424 } else if (is_max_size_approaching(in
)) {
10428 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
10430 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
10434 in
->mtime
= in
->ctime
= ceph_clock_now();
10436 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
10440 if (nullptr != onuninline
) {
10441 client_lock
.unlock();
10442 int uninline_ret
= onuninline
->wait();
10443 client_lock
.lock();
10445 if (uninline_ret
>= 0 || uninline_ret
== -CEPHFS_ECANCELED
) {
10446 in
->inline_data
.clear();
10447 in
->inline_version
= CEPH_INLINE_NONE
;
10448 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
10454 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
10458 int Client::_flush(Fh
*f
)
10460 Inode
*in
= f
->inode
.get();
10461 int err
= f
->take_async_err();
10463 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
10464 << cpp_strerror(err
) << dendl
;
10466 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
10472 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
10474 struct ceph_statx stx
;
10475 stx
.stx_size
= length
;
10476 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
10479 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
10481 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10482 if (!mref_reader
.is_state_satisfied())
10483 return -CEPHFS_ENOTCONN
;
10485 tout(cct
) << __func__
<< std::endl
;
10486 tout(cct
) << fd
<< std::endl
;
10487 tout(cct
) << length
<< std::endl
;
10489 std::scoped_lock
lock(client_lock
);
10490 Fh
*f
= get_filehandle(fd
);
10492 return -CEPHFS_EBADF
;
10493 #if defined(__linux__) && defined(O_PATH)
10494 if (f
->flags
& O_PATH
)
10495 return -CEPHFS_EBADF
;
10497 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
10498 return -CEPHFS_EBADF
;
10500 attr
.st_size
= length
;
10501 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
10504 int Client::fsync(int fd
, bool syncdataonly
)
10506 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10507 if (!mref_reader
.is_state_satisfied())
10508 return -CEPHFS_ENOTCONN
;
10510 tout(cct
) << "fsync" << std::endl
;
10511 tout(cct
) << fd
<< std::endl
;
10512 tout(cct
) << syncdataonly
<< std::endl
;
10514 std::scoped_lock
lock(client_lock
);
10515 Fh
*f
= get_filehandle(fd
);
10517 return -CEPHFS_EBADF
;
10518 #if defined(__linux__) && defined(O_PATH)
10519 if (f
->flags
& O_PATH
)
10520 return -CEPHFS_EBADF
;
10522 int r
= _fsync(f
, syncdataonly
);
10524 // The IOs in this fsync were okay, but maybe something happened
10525 // in the background that we shoudl be reporting?
10526 r
= f
->take_async_err();
10527 ldout(cct
, 5) << "fsync(" << fd
<< ", " << syncdataonly
10528 << ") = 0, async_err = " << r
<< dendl
;
10530 // Assume that an error we encountered during fsync, even reported
10531 // synchronously, would also have applied the error to the Fh, and we
10532 // should clear it here to avoid returning the same error again on next
10534 ldout(cct
, 5) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
10536 f
->take_async_err();
10541 int Client::_fsync(Inode
*in
, bool syncdataonly
)
10543 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
10546 std::unique_ptr
<C_SaferCond
> object_cacher_completion
= nullptr;
10547 ceph_tid_t flush_tid
= 0;
10550 utime_t start
= ceph_clock_now();
10552 ldout(cct
, 8) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
10554 if (cct
->_conf
->client_oc
) {
10555 object_cacher_completion
.reset(new C_SaferCond("Client::_fsync::lock"));
10556 tmp_ref
= in
; // take a reference; C_SaferCond doesn't and _flush won't either
10557 _flush(in
, object_cacher_completion
.get());
10558 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
10561 if (!syncdataonly
&& in
->dirty_caps
) {
10562 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
10563 if (in
->flushing_caps
)
10564 flush_tid
= last_flush_tid
;
10565 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
10567 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
10568 flush_mdlog_sync();
10570 MetaRequest
*req
= in
->unsafe_ops
.back();
10571 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
10574 wait_on_list(req
->waitfor_safe
);
10578 if (nullptr != object_cacher_completion
) { // wait on a real reply instead of guessing
10579 client_lock
.unlock();
10580 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
10581 r
= object_cacher_completion
->wait();
10582 client_lock
.lock();
10583 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
10585 // FIXME: this can starve
10586 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
10587 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
10588 << " uncommitted, waiting" << dendl
;
10589 wait_on_list(in
->waitfor_commit
);
10595 wait_sync_caps(in
, flush_tid
);
10597 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
10599 ldout(cct
, 8) << "ino " << in
->ino
<< " failed to commit to disk! "
10600 << cpp_strerror(-r
) << dendl
;
10603 lat
= ceph_clock_now();
10605 logger
->tinc(l_c_fsync
, lat
);
10610 int Client::_fsync(Fh
*f
, bool syncdataonly
)
10612 ldout(cct
, 8) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
10613 return _fsync(f
->inode
.get(), syncdataonly
);
10616 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
10618 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10619 if (!mref_reader
.is_state_satisfied())
10620 return -CEPHFS_ENOTCONN
;
10622 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
10623 tout(cct
) << fd
<< std::endl
;
10625 std::scoped_lock
lock(client_lock
);
10626 Fh
*f
= get_filehandle(fd
);
10628 return -CEPHFS_EBADF
;
10629 int r
= _getattr(f
->inode
, mask
, perms
);
10632 fill_stat(f
->inode
, stbuf
, NULL
);
10633 ldout(cct
, 5) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
10637 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
10638 unsigned int want
, unsigned int flags
)
10640 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10641 if (!mref_reader
.is_state_satisfied())
10642 return -CEPHFS_ENOTCONN
;
10644 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
10645 tout(cct
) << fd
<< std::endl
;
10647 std::scoped_lock
lock(client_lock
);
10648 Fh
*f
= get_filehandle(fd
);
10650 return -CEPHFS_EBADF
;
10652 unsigned mask
= statx_to_mask(flags
, want
);
10656 r
= _getattr(f
->inode
, mask
, perms
);
10658 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
10663 fill_statx(f
->inode
, mask
, stx
);
10664 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
10668 int Client::statxat(int dirfd
, const char *relpath
,
10669 struct ceph_statx
*stx
, const UserPerm
& perms
,
10670 unsigned int want
, unsigned int flags
) {
10671 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10672 if (!mref_reader
.is_state_satisfied()) {
10673 return -CEPHFS_ENOTCONN
;
10676 tout(cct
) << __func__
<< " flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
10677 tout(cct
) << dirfd
<< std::endl
;
10678 tout(cct
) << relpath
<< std::endl
;
10680 unsigned mask
= statx_to_mask(flags
, want
);
10683 std::scoped_lock
lock(client_lock
);
10684 int r
= get_fd_inode(dirfd
, &dirinode
);
10690 filepath
path(relpath
);
10691 r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
, dirinode
);
10695 r
= _getattr(in
, mask
, perms
);
10697 ldout(cct
, 3) << __func__
<< " exit on error!" << dendl
;
10701 fill_statx(in
, mask
, stx
);
10702 ldout(cct
, 3) << __func__
<< " dirfd" << dirfd
<< ", r= " << r
<< dendl
;
10706 // not written yet, but i want to link!
10708 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
10709 const UserPerm
& perms
)
10711 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10712 if (!mref_reader
.is_state_satisfied())
10713 return -CEPHFS_ENOTCONN
;
10715 tout(cct
) << "chdir" << std::endl
;
10716 tout(cct
) << relpath
<< std::endl
;
10718 filepath
path(relpath
);
10721 std::scoped_lock
lock(client_lock
);
10722 int r
= path_walk(path
, &in
, perms
);
10726 if (!(in
.get()->is_dir()))
10727 return -CEPHFS_ENOTDIR
;
10731 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
10733 _getcwd(new_cwd
, perms
);
10737 void Client::_getcwd(string
& dir
, const UserPerm
& perms
)
10740 ldout(cct
, 10) << __func__
<< " " << *cwd
<< dendl
;
10742 Inode
*in
= cwd
.get();
10743 while (in
!= root
.get()) {
10744 ceph_assert(in
->dentries
.size() < 2); // dirs can't be hard-linked
10746 // A cwd or ancester is unlinked
10747 if (in
->dentries
.empty()) {
10751 Dentry
*dn
= in
->get_first_parent();
10756 ldout(cct
, 10) << __func__
<< " looking up parent for " << *in
<< dendl
;
10757 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
10758 filepath
path(in
->ino
);
10759 req
->set_filepath(path
);
10760 req
->set_inode(in
);
10761 int res
= make_request(req
, perms
);
10770 path
.push_front_dentry(dn
->name
);
10771 in
= dn
->dir
->parent_inode
;
10774 dir
+= path
.get_path();
10777 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
10779 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10780 if (!mref_reader
.is_state_satisfied())
10783 std::scoped_lock
l(client_lock
);
10785 _getcwd(dir
, perms
);
10788 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
10789 const UserPerm
& perms
)
10791 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
10792 if (!mref_reader
.is_state_satisfied())
10793 return -CEPHFS_ENOTCONN
;
10795 tout(cct
) << __func__
<< std::endl
;
10796 unsigned long int total_files_on_fs
;
10801 std::unique_lock
lock(client_lock
);
10802 const vector
<int64_t> &data_pools
= mdsmap
->get_data_pools();
10803 if (data_pools
.size() == 1) {
10804 objecter
->get_fs_stats(stats
, data_pools
[0], &cond
);
10806 objecter
->get_fs_stats(stats
, boost::optional
<int64_t>(), &cond
);
10810 int rval
= cond
.wait();
10814 total_files_on_fs
= root
->rstat
.rfiles
+ root
->rstat
.rsubdirs
;
10817 ldout(cct
, 1) << "underlying call to statfs returned error: "
10818 << cpp_strerror(rval
)
10823 memset(stbuf
, 0, sizeof(*stbuf
));
10826 * we're going to set a block size of 4MB so we can represent larger
10827 * FSes without overflowing. Additionally convert the space
10828 * measurements from KB to bytes while making them in terms of
10829 * blocks. We use 4MB only because it is big enough, and because it
10830 * actually *is* the (ceph) default block size.
10832 const int CEPH_BLOCK_SHIFT
= 22;
10833 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
10834 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
10835 stbuf
->f_files
= total_files_on_fs
;
10836 stbuf
->f_ffree
= -1;
10837 stbuf
->f_favail
= -1;
10838 stbuf
->f_fsid
= -1; // ??
10839 stbuf
->f_flag
= 0; // ??
10840 stbuf
->f_namemax
= NAME_MAX
;
10842 // Usually quota_root will == root_ancestor, but if the mount root has no
10843 // quota but we can see a parent of it that does have a quota, we'll
10844 // respect that one instead.
10845 ceph_assert(root
!= nullptr);
10846 InodeRef quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
.get(), perms
);
10848 // get_quota_root should always give us something
10849 // because client quotas are always enabled
10850 ceph_assert(quota_root
!= nullptr);
10852 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
10854 // Skip the getattr if any sessions are stale, as we don't want to
10855 // block `df` if this client has e.g. been evicted, or if the MDS cluster
10857 if (!_any_stale_sessions()) {
10858 int r
= _getattr(quota_root
, 0, perms
, true);
10860 // Ignore return value: error getting latest inode metadata is not a good
10861 // reason to break "df".
10862 lderr(cct
) << "Error in getattr on quota root 0x"
10863 << std::hex
<< quota_root
->ino
<< std::dec
10864 << " statfs result may be outdated" << dendl
;
10868 // Special case: if there is a size quota set on the Inode acting
10869 // as the root for this client mount, then report the quota status
10870 // as the filesystem statistics.
10871 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
10872 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
10873 // It is possible for a quota to be exceeded: arithmetic here must
10874 // handle case where used > total.
10875 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
10877 stbuf
->f_blocks
= total
;
10878 stbuf
->f_bfree
= free
;
10879 stbuf
->f_bavail
= free
;
10881 // General case: report the cluster statistics returned from RADOS. Because
10882 // multiple pools may be used without one filesystem namespace via
10883 // layouts, this is the most correct thing we can do.
10884 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
10885 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
10886 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
10892 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
10893 struct flock
*fl
, uint64_t owner
, bool removing
)
10895 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
10896 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
10897 << " type " << fl
->l_type
<< " owner " << owner
10898 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
10900 if (in
->flags
& I_ERROR_FILELOCK
)
10901 return -CEPHFS_EIO
;
10904 if (F_RDLCK
== fl
->l_type
)
10905 lock_cmd
= CEPH_LOCK_SHARED
;
10906 else if (F_WRLCK
== fl
->l_type
)
10907 lock_cmd
= CEPH_LOCK_EXCL
;
10908 else if (F_UNLCK
== fl
->l_type
)
10909 lock_cmd
= CEPH_LOCK_UNLOCK
;
10911 return -CEPHFS_EIO
;
10913 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
10917 * Set the most significant bit, so that MDS knows the 'owner'
10918 * is sufficient to identify the owner of lock. (old code uses
10919 * both 'owner' and 'pid')
10921 owner
|= (1ULL << 63);
10923 MetaRequest
*req
= new MetaRequest(op
);
10925 in
->make_nosnap_relative_path(path
);
10926 req
->set_filepath(path
);
10927 req
->set_inode(in
);
10929 req
->head
.args
.filelock_change
.rule
= lock_type
;
10930 req
->head
.args
.filelock_change
.type
= lock_cmd
;
10931 req
->head
.args
.filelock_change
.owner
= owner
;
10932 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
10933 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
10934 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
10935 req
->head
.args
.filelock_change
.wait
= sleep
;
10940 if (sleep
&& switch_interrupt_cb
) {
10941 // enable interrupt
10942 switch_interrupt_cb(callback_handle
, req
->get());
10943 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
10944 // disable interrupt
10945 switch_interrupt_cb(callback_handle
, NULL
);
10946 if (ret
== 0 && req
->aborted()) {
10947 // effect of this lock request has been revoked by the 'lock intr' request
10948 ret
= req
->get_abort_code();
10952 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
10956 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
10957 ceph_filelock filelock
;
10958 auto p
= bl
.cbegin();
10959 decode(filelock
, p
);
10961 if (CEPH_LOCK_SHARED
== filelock
.type
)
10962 fl
->l_type
= F_RDLCK
;
10963 else if (CEPH_LOCK_EXCL
== filelock
.type
)
10964 fl
->l_type
= F_WRLCK
;
10966 fl
->l_type
= F_UNLCK
;
10968 fl
->l_whence
= SEEK_SET
;
10969 fl
->l_start
= filelock
.start
;
10970 fl
->l_len
= filelock
.length
;
10971 fl
->l_pid
= filelock
.pid
;
10972 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
10973 ceph_lock_state_t
*lock_state
;
10974 if (lock_type
== CEPH_LOCK_FCNTL
) {
10975 if (!in
->fcntl_locks
)
10976 in
->fcntl_locks
.reset(new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
));
10977 lock_state
= in
->fcntl_locks
.get();
10978 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
10979 if (!in
->flock_locks
)
10980 in
->flock_locks
.reset(new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
));
10981 lock_state
= in
->flock_locks
.get();
10984 return -CEPHFS_EINVAL
;
10986 _update_lock_state(fl
, owner
, lock_state
);
10989 if (lock_type
== CEPH_LOCK_FCNTL
) {
10990 if (!fh
->fcntl_locks
)
10991 fh
->fcntl_locks
.reset(new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
));
10992 lock_state
= fh
->fcntl_locks
.get();
10994 if (!fh
->flock_locks
)
10995 fh
->flock_locks
.reset(new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
));
10996 lock_state
= fh
->flock_locks
.get();
10998 _update_lock_state(fl
, owner
, lock_state
);
11006 int Client::_interrupt_filelock(MetaRequest
*req
)
11008 // Set abort code, but do not kick. The abort code prevents the request
11009 // from being re-sent.
11010 req
->abort(-CEPHFS_EINTR
);
11012 return 0; // haven't sent the request
11014 Inode
*in
= req
->inode();
11017 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
11018 lock_type
= CEPH_LOCK_FLOCK_INTR
;
11019 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
11020 lock_type
= CEPH_LOCK_FCNTL_INTR
;
11023 return -CEPHFS_EINVAL
;
11026 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
11028 in
->make_nosnap_relative_path(path
);
11029 intr_req
->set_filepath(path
);
11030 intr_req
->set_inode(in
);
11031 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
11032 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
11033 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
11035 UserPerm
perms(req
->get_uid(), req
->get_gid());
11036 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
11039 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
11041 if (!in
->fcntl_locks
&& !in
->flock_locks
)
11044 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
11045 encode(nr_fcntl_locks
, bl
);
11046 if (nr_fcntl_locks
) {
11047 auto &lock_state
= in
->fcntl_locks
;
11048 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
11049 p
!= lock_state
->held_locks
.end();
11051 encode(p
->second
, bl
);
11054 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
11055 encode(nr_flock_locks
, bl
);
11056 if (nr_flock_locks
) {
11057 auto &lock_state
= in
->flock_locks
;
11058 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
11059 p
!= lock_state
->held_locks
.end();
11061 encode(p
->second
, bl
);
11064 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< ", " << nr_fcntl_locks
11065 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
11068 void Client::_release_filelocks(Fh
*fh
)
11070 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
11073 Inode
*in
= fh
->inode
.get();
11074 ldout(cct
, 10) << __func__
<< " " << fh
<< " ino " << in
->ino
<< dendl
;
11076 list
<ceph_filelock
> activated_locks
;
11078 list
<pair
<int, ceph_filelock
> > to_release
;
11080 if (fh
->fcntl_locks
) {
11081 auto &lock_state
= fh
->fcntl_locks
;
11082 for(auto p
= lock_state
->held_locks
.begin(); p
!= lock_state
->held_locks
.end(); ) {
11084 if (in
->flags
& I_ERROR_FILELOCK
) {
11085 lock_state
->remove_lock(q
->second
, activated_locks
);
11087 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, q
->second
));
11090 lock_state
.reset();
11092 if (fh
->flock_locks
) {
11093 auto &lock_state
= fh
->flock_locks
;
11094 for(auto p
= lock_state
->held_locks
.begin(); p
!= lock_state
->held_locks
.end(); ) {
11096 if (in
->flags
& I_ERROR_FILELOCK
) {
11097 lock_state
->remove_lock(q
->second
, activated_locks
);
11099 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, q
->second
));
11102 lock_state
.reset();
11105 if ((in
->flags
& I_ERROR_FILELOCK
) && !in
->has_any_filelocks())
11106 in
->flags
&= ~I_ERROR_FILELOCK
;
11108 if (to_release
.empty())
11112 memset(&fl
, 0, sizeof(fl
));
11113 fl
.l_whence
= SEEK_SET
;
11114 fl
.l_type
= F_UNLCK
;
11116 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
11117 p
!= to_release
.end();
11119 fl
.l_start
= p
->second
.start
;
11120 fl
.l_len
= p
->second
.length
;
11121 fl
.l_pid
= p
->second
.pid
;
11122 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
11123 p
->second
.owner
, true);
11127 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
11128 ceph_lock_state_t
*lock_state
)
11131 if (F_RDLCK
== fl
->l_type
)
11132 lock_cmd
= CEPH_LOCK_SHARED
;
11133 else if (F_WRLCK
== fl
->l_type
)
11134 lock_cmd
= CEPH_LOCK_EXCL
;
11136 lock_cmd
= CEPH_LOCK_UNLOCK
;;
11138 ceph_filelock filelock
;
11139 filelock
.start
= fl
->l_start
;
11140 filelock
.length
= fl
->l_len
;
11141 filelock
.client
= 0;
11142 // see comment in _do_filelock()
11143 filelock
.owner
= owner
| (1ULL << 63);
11144 filelock
.pid
= fl
->l_pid
;
11145 filelock
.type
= lock_cmd
;
11147 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
11148 list
<ceph_filelock
> activated_locks
;
11149 lock_state
->remove_lock(filelock
, activated_locks
);
11151 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
11156 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
11158 Inode
*in
= fh
->inode
.get();
11159 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
11160 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
11164 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
11166 Inode
*in
= fh
->inode
.get();
11167 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
11168 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
11169 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
11173 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
11175 Inode
*in
= fh
->inode
.get();
11176 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
11178 int sleep
= !(cmd
& LOCK_NB
);
11193 return -CEPHFS_EINVAL
;
11197 memset(&fl
, 0, sizeof(fl
));
11199 fl
.l_whence
= SEEK_SET
;
11201 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
11202 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
11206 int Client::get_snap_info(const char *path
, const UserPerm
&perms
, SnapInfo
*snap_info
) {
11207 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11208 if (!mref_reader
.is_state_satisfied()) {
11209 return -CEPHFS_ENOTCONN
;
11212 std::unique_lock
locker(client_lock
);
11214 int r
= Client::path_walk(path
, &in
, perms
, true);
11219 if (in
->snapid
== CEPH_NOSNAP
) {
11220 return -CEPHFS_EINVAL
;
11223 snap_info
->id
= in
->snapid
;
11224 snap_info
->metadata
= in
->snap_metadata
;
11228 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
11230 /* Since the only thing this does is wrap a call to statfs, and
11231 statfs takes a lock, it doesn't seem we have a need to split it
11233 return statfs(0, stbuf
, perms
);
11236 void Client::ll_register_callbacks(struct ceph_client_callback_args
*args
)
11240 std::scoped_lock
l(client_lock
);
11241 ldout(cct
, 10) << __func__
<< " cb " << args
->handle
11242 << " invalidate_ino_cb " << args
->ino_cb
11243 << " invalidate_dentry_cb " << args
->dentry_cb
11244 << " switch_interrupt_cb " << args
->switch_intr_cb
11245 << " remount_cb " << args
->remount_cb
11247 callback_handle
= args
->handle
;
11248 if (args
->ino_cb
) {
11249 ino_invalidate_cb
= args
->ino_cb
;
11250 async_ino_invalidator
.start();
11252 if (args
->dentry_cb
) {
11253 dentry_invalidate_cb
= args
->dentry_cb
;
11254 async_dentry_invalidator
.start();
11256 if (args
->switch_intr_cb
) {
11257 switch_interrupt_cb
= args
->switch_intr_cb
;
11258 interrupt_finisher
.start();
11260 if (args
->remount_cb
) {
11261 remount_cb
= args
->remount_cb
;
11262 remount_finisher
.start();
11264 if (args
->ino_release_cb
) {
11265 ino_release_cb
= args
->ino_release_cb
;
11266 async_ino_releasor
.start();
11268 if (args
->umask_cb
)
11269 umask_cb
= args
->umask_cb
;
11272 int Client::test_dentry_handling(bool can_invalidate
)
11276 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
11277 if (!iref_reader
.is_state_satisfied())
11278 return -CEPHFS_ENOTCONN
;
11280 can_invalidate_dentries
= can_invalidate
;
11282 if (can_invalidate_dentries
) {
11283 ceph_assert(dentry_invalidate_cb
);
11284 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
11287 ceph_assert(remount_cb
);
11288 ldout(cct
, 1) << "using remount_cb" << dendl
;
11289 r
= _do_remount(false);
11295 int Client::_sync_fs()
11297 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
11299 ldout(cct
, 10) << __func__
<< dendl
;
11302 std::unique_ptr
<C_SaferCond
> cond
= nullptr;
11303 if (cct
->_conf
->client_oc
) {
11304 cond
.reset(new C_SaferCond("Client::_sync_fs:lock"));
11305 objectcacher
->flush_all(cond
.get());
11310 ceph_tid_t flush_tid
= last_flush_tid
;
11312 // wait for unsafe mds requests
11313 wait_unsafe_requests();
11315 wait_sync_caps(flush_tid
);
11317 if (nullptr != cond
) {
11318 client_lock
.unlock();
11319 ldout(cct
, 15) << __func__
<< " waiting on data to flush" << dendl
;
11321 ldout(cct
, 15) << __func__
<< " flush finished" << dendl
;
11322 client_lock
.lock();
11328 int Client::sync_fs()
11330 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11331 if (!mref_reader
.is_state_satisfied())
11332 return -CEPHFS_ENOTCONN
;
11334 std::scoped_lock
l(client_lock
);
11339 int64_t Client::drop_caches()
11341 std::scoped_lock
l(client_lock
);
11342 return objectcacher
->release_all();
11345 int Client::_lazyio(Fh
*fh
, int enable
)
11347 Inode
*in
= fh
->inode
.get();
11348 ldout(cct
, 20) << __func__
<< " " << *in
<< " " << !!enable
<< dendl
;
11350 if (!!(fh
->mode
& CEPH_FILE_MODE_LAZY
) == !!enable
)
11353 int orig_mode
= fh
->mode
;
11355 fh
->mode
|= CEPH_FILE_MODE_LAZY
;
11356 in
->get_open_ref(fh
->mode
);
11357 in
->put_open_ref(orig_mode
);
11358 check_caps(in
, CHECK_CAPS_NODELAY
);
11360 fh
->mode
&= ~CEPH_FILE_MODE_LAZY
;
11361 in
->get_open_ref(fh
->mode
);
11362 in
->put_open_ref(orig_mode
);
11369 int Client::lazyio(int fd
, int enable
)
11371 std::scoped_lock
l(client_lock
);
11372 Fh
*f
= get_filehandle(fd
);
11374 return -CEPHFS_EBADF
;
11376 return _lazyio(f
, enable
);
11379 int Client::ll_lazyio(Fh
*fh
, int enable
)
11381 ldout(cct
, 3) << __func__
<< " " << fh
<< " " << fh
->inode
->ino
<< " " << !!enable
<< dendl
;
11382 tout(cct
) << __func__
<< std::endl
;
11384 std::scoped_lock
lock(client_lock
);
11385 return _lazyio(fh
, enable
);
11388 int Client::lazyio_propagate(int fd
, loff_t offset
, size_t count
)
11390 std::scoped_lock
l(client_lock
);
11391 ldout(cct
, 3) << "op: client->lazyio_propagate(" << fd
11392 << ", " << offset
<< ", " << count
<< ")" << dendl
;
11394 Fh
*f
= get_filehandle(fd
);
11396 return -CEPHFS_EBADF
;
11404 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
11406 std::scoped_lock
l(client_lock
);
11407 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
11408 << ", " << offset
<< ", " << count
<< ")" << dendl
;
11410 Fh
*f
= get_filehandle(fd
);
11412 return -CEPHFS_EBADF
;
11413 Inode
*in
= f
->inode
.get();
11416 if (_release(in
)) {
11417 int r
=_getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
11425 // =============================
11428 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
,
11429 mode_t mode
, const std::map
<std::string
, std::string
> &metadata
)
11431 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11432 if (!mref_reader
.is_state_satisfied())
11433 return -CEPHFS_ENOTCONN
;
11435 std::scoped_lock
l(client_lock
);
11437 filepath
path(relpath
);
11439 int r
= path_walk(path
, &in
, perm
);
11442 if (cct
->_conf
->client_permissions
) {
11443 r
= may_create(in
.get(), perm
);
11447 Inode
*snapdir
= open_snapdir(in
.get());
11448 return _mkdir(snapdir
, name
, mode
, perm
, nullptr, metadata
);
11451 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
, bool check_perms
)
11453 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11454 if (!mref_reader
.is_state_satisfied())
11455 return -CEPHFS_ENOTCONN
;
11457 std::scoped_lock
l(client_lock
);
11459 filepath
path(relpath
);
11461 int r
= path_walk(path
, &in
, perms
);
11464 Inode
*snapdir
= open_snapdir(in
.get());
11465 if (cct
->_conf
->client_permissions
) {
11466 r
= may_delete(snapdir
, check_perms
? name
: NULL
, perms
);
11470 return _rmdir(snapdir
, name
, perms
);
11473 // =============================
11476 int Client::get_caps_issued(int fd
)
11478 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11479 if (!mref_reader
.is_state_satisfied())
11480 return -CEPHFS_ENOTCONN
;
11482 std::scoped_lock
lock(client_lock
);
11484 Fh
*f
= get_filehandle(fd
);
11486 return -CEPHFS_EBADF
;
11488 return f
->inode
->caps_issued();
11491 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
11493 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11494 if (!mref_reader
.is_state_satisfied())
11495 return -CEPHFS_ENOTCONN
;
11497 std::scoped_lock
lock(client_lock
);
11501 int r
= path_walk(p
, &in
, perms
, true);
11504 return in
->caps_issued();
11507 // =========================================
11510 Inode
*Client::open_snapdir(Inode
*diri
)
11513 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
11514 if (!inode_map
.count(vino
)) {
11515 in
= new Inode(this, vino
, &diri
->layout
);
11517 in
->ino
= diri
->ino
;
11518 in
->snapid
= CEPH_SNAPDIR
;
11519 in
->mode
= diri
->mode
;
11520 in
->uid
= diri
->uid
;
11521 in
->gid
= diri
->gid
;
11523 in
->mtime
= diri
->mtime
;
11524 in
->ctime
= diri
->ctime
;
11525 in
->btime
= diri
->btime
;
11526 in
->atime
= diri
->atime
;
11527 in
->size
= diri
->size
;
11528 in
->change_attr
= diri
->change_attr
;
11530 in
->dirfragtree
.clear();
11531 in
->snapdir_parent
= diri
;
11532 diri
->flags
|= I_SNAPDIR_OPEN
;
11533 inode_map
[vino
] = in
;
11534 if (use_faked_inos())
11535 _assign_faked_ino(in
);
11536 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
11538 in
= inode_map
[vino
];
11539 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
11544 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
11545 Inode
**out
, const UserPerm
& perms
)
11547 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11548 if (!mref_reader
.is_state_satisfied())
11549 return -CEPHFS_ENOTCONN
;
11551 vinodeno_t vparent
= _get_vino(parent
);
11552 ldout(cct
, 3) << __func__
<< " " << vparent
<< " " << name
<< dendl
;
11553 tout(cct
) << __func__
<< std::endl
;
11554 tout(cct
) << name
<< std::endl
;
11556 std::scoped_lock
lock(client_lock
);
11559 if (!fuse_default_permissions
) {
11560 if (strcmp(name
, ".") && strcmp(name
, "..")) {
11561 r
= may_lookup(parent
, perms
);
11567 string
dname(name
);
11570 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
11577 fill_stat(in
, attr
);
11581 ldout(cct
, 3) << __func__
<< " " << vparent
<< " " << name
11582 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11583 tout(cct
) << attr
->st_ino
<< std::endl
;
11588 int Client::ll_lookup_vino(
11590 const UserPerm
& perms
,
11593 ceph_assert(inode
!= NULL
);
11594 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11595 if (!mref_reader
.is_state_satisfied())
11596 return -CEPHFS_ENOTCONN
;
11598 if (is_reserved_vino(vino
))
11599 return -CEPHFS_ESTALE
;
11601 std::scoped_lock
lock(client_lock
);
11602 ldout(cct
, 3) << __func__
<< " " << vino
<< dendl
;
11604 // Check the cache first
11605 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
11606 if (p
!= inode_map
.end()) {
11607 *inode
= p
->second
;
11612 uint64_t snapid
= vino
.snapid
;
11614 // for snapdir, find the non-snapped dir inode
11615 if (snapid
== CEPH_SNAPDIR
)
11616 vino
.snapid
= CEPH_NOSNAP
;
11618 int r
= _lookup_vino(vino
, perms
, inode
);
11621 ceph_assert(*inode
!= NULL
);
11623 if (snapid
== CEPH_SNAPDIR
) {
11624 Inode
*tmp
= *inode
;
11626 // open the snapdir and put the inode ref
11627 *inode
= open_snapdir(tmp
);
11628 _ll_forget(tmp
, 1);
11634 int Client::ll_lookup_inode(
11635 struct inodeno_t ino
,
11636 const UserPerm
& perms
,
11639 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
11640 return ll_lookup_vino(vino
, perms
, inode
);
11643 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
11644 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11645 const UserPerm
& perms
)
11647 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11648 if (!mref_reader
.is_state_satisfied())
11649 return -CEPHFS_ENOTCONN
;
11651 vinodeno_t vparent
= _get_vino(parent
);
11652 ldout(cct
, 3) << __func__
<< " " << vparent
<< " " << name
<< dendl
;
11653 tout(cct
) << "ll_lookupx" << std::endl
;
11654 tout(cct
) << name
<< std::endl
;
11656 std::scoped_lock
lock(client_lock
);
11659 if (!fuse_default_permissions
) {
11660 r
= may_lookup(parent
, perms
);
11665 string
dname(name
);
11668 unsigned mask
= statx_to_mask(flags
, want
);
11669 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
11675 fill_statx(in
, mask
, stx
);
11679 ldout(cct
, 3) << __func__
<< " " << vparent
<< " " << name
11680 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11681 tout(cct
) << stx
->stx_ino
<< std::endl
;
11686 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
11687 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
11689 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11690 if (!mref_reader
.is_state_satisfied())
11691 return -CEPHFS_ENOTCONN
;
11693 filepath
fp(name
, 0);
11696 unsigned mask
= statx_to_mask(flags
, want
);
11698 ldout(cct
, 3) << __func__
<< " " << name
<< dendl
;
11699 tout(cct
) << __func__
<< std::endl
;
11700 tout(cct
) << name
<< std::endl
;
11702 std::scoped_lock
lock(client_lock
);
11703 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
11705 /* zero out mask, just in case... */
11712 fill_statx(in
, mask
, stx
);
11719 void Client::_ll_get(Inode
*in
)
11721 if (in
->ll_ref
== 0) {
11723 if (in
->is_dir() && !in
->dentries
.empty()) {
11724 ceph_assert(in
->dentries
.size() == 1); // dirs can't be hard-linked
11725 in
->get_first_parent()->get(); // pin dentry
11727 if (in
->snapid
!= CEPH_NOSNAP
)
11728 ll_snap_ref
[in
->snapid
]++;
11731 ldout(cct
, 20) << __func__
<< " " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
11734 int Client::_ll_put(Inode
*in
, uint64_t num
)
11737 ldout(cct
, 20) << __func__
<< " " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
11738 if (in
->ll_ref
== 0) {
11739 if (in
->is_dir() && !in
->dentries
.empty()) {
11740 ceph_assert(in
->dentries
.size() == 1); // dirs can't be hard-linked
11741 in
->get_first_parent()->put(); // unpin dentry
11743 if (in
->snapid
!= CEPH_NOSNAP
) {
11744 auto p
= ll_snap_ref
.find(in
->snapid
);
11745 ceph_assert(p
!= ll_snap_ref
.end());
11746 ceph_assert(p
->second
> 0);
11747 if (--p
->second
== 0)
11748 ll_snap_ref
.erase(p
);
11757 void Client::_ll_drop_pins()
11759 ldout(cct
, 10) << __func__
<< dendl
;
11760 std::set
<InodeRef
> to_be_put
; //this set will be deconstructed item by item when exit
11761 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
11762 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
11763 it
!= inode_map
.end();
11765 Inode
*in
= it
->second
;
11769 to_be_put
.insert(in
);
11770 _ll_put(in
, in
->ll_ref
);
11775 bool Client::_ll_forget(Inode
*in
, uint64_t count
)
11777 inodeno_t ino
= in
->ino
;
11779 ldout(cct
, 8) << __func__
<< " " << ino
<< " " << count
<< dendl
;
11780 tout(cct
) << __func__
<< std::endl
;
11781 tout(cct
) << ino
.val
<< std::endl
;
11782 tout(cct
) << count
<< std::endl
;
11784 // Ignore forget if we're no longer mounted
11785 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11786 if (!mref_reader
.is_state_satisfied())
11789 if (ino
== 1) return true; // ignore forget on root.
11792 if (in
->ll_ref
< count
) {
11793 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
11794 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
11795 _ll_put(in
, in
->ll_ref
);
11798 if (_ll_put(in
, count
) == 0)
11805 bool Client::ll_forget(Inode
*in
, uint64_t count
)
11807 std::scoped_lock
lock(client_lock
);
11808 return _ll_forget(in
, count
);
11811 bool Client::ll_put(Inode
*in
)
11813 /* ll_forget already takes the lock */
11814 return ll_forget(in
, 1);
11817 int Client::ll_get_snap_ref(snapid_t snap
)
11819 std::scoped_lock
lock(client_lock
);
11820 auto p
= ll_snap_ref
.find(snap
);
11821 if (p
!= ll_snap_ref
.end())
11826 snapid_t
Client::ll_get_snapid(Inode
*in
)
11828 std::scoped_lock
lock(client_lock
);
11832 Inode
*Client::ll_get_inode(ino_t ino
)
11834 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11835 if (!mref_reader
.is_state_satisfied())
11838 std::scoped_lock
lock(client_lock
);
11840 vinodeno_t vino
= _map_faked_ino(ino
);
11841 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
11842 if (p
== inode_map
.end())
11844 Inode
*in
= p
->second
;
11849 Inode
*Client::ll_get_inode(vinodeno_t vino
)
11851 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11852 if (!mref_reader
.is_state_satisfied())
11855 if (is_reserved_vino(vino
))
11858 std::scoped_lock
lock(client_lock
);
11860 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
11861 if (p
== inode_map
.end())
11863 Inode
*in
= p
->second
;
11868 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
11870 vinodeno_t vino
= _get_vino(in
);
11872 ldout(cct
, 8) << __func__
<< " " << vino
<< dendl
;
11873 tout(cct
) << __func__
<< std::endl
;
11874 tout(cct
) << vino
.ino
.val
<< std::endl
;
11876 if (vino
.snapid
< CEPH_NOSNAP
)
11879 return _getattr(in
, caps
, perms
);
11882 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
11884 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11885 if (!mref_reader
.is_state_satisfied())
11886 return -CEPHFS_ENOTCONN
;
11888 std::scoped_lock
lock(client_lock
);
11890 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
11893 fill_stat(in
, attr
);
11894 ldout(cct
, 3) << __func__
<< " " << _get_vino(in
) << " = " << res
<< dendl
;
11898 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
11899 unsigned int flags
, const UserPerm
& perms
)
11901 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11902 if (!mref_reader
.is_state_satisfied())
11903 return -CEPHFS_ENOTCONN
;
11905 std::scoped_lock
lock(client_lock
);
11908 unsigned mask
= statx_to_mask(flags
, want
);
11910 if (mask
&& !in
->caps_issued_mask(mask
, true))
11911 res
= _ll_getattr(in
, mask
, perms
);
11914 fill_statx(in
, mask
, stx
);
11915 ldout(cct
, 3) << __func__
<< " " << _get_vino(in
) << " = " << res
<< dendl
;
11919 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
11920 const UserPerm
& perms
, InodeRef
*inp
)
11922 vinodeno_t vino
= _get_vino(in
);
11924 ldout(cct
, 8) << __func__
<< " " << vino
<< " mask " << hex
<< mask
<< dec
11926 tout(cct
) << __func__
<< std::endl
;
11927 tout(cct
) << vino
.ino
.val
<< std::endl
;
11928 tout(cct
) << stx
->stx_mode
<< std::endl
;
11929 tout(cct
) << stx
->stx_uid
<< std::endl
;
11930 tout(cct
) << stx
->stx_gid
<< std::endl
;
11931 tout(cct
) << stx
->stx_size
<< std::endl
;
11932 tout(cct
) << stx
->stx_mtime
<< std::endl
;
11933 tout(cct
) << stx
->stx_atime
<< std::endl
;
11934 tout(cct
) << stx
->stx_btime
<< std::endl
;
11935 tout(cct
) << mask
<< std::endl
;
11937 if (!fuse_default_permissions
) {
11938 int res
= may_setattr(in
, stx
, mask
, perms
);
11943 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
11945 return __setattrx(in
, stx
, mask
, perms
, inp
);
11948 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
11949 const UserPerm
& perms
)
11951 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11952 if (!mref_reader
.is_state_satisfied())
11953 return -CEPHFS_ENOTCONN
;
11955 std::scoped_lock
lock(client_lock
);
11957 InodeRef
target(in
);
11958 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
11960 ceph_assert(in
== target
.get());
11961 fill_statx(in
, in
->caps_issued(), stx
);
11964 ldout(cct
, 3) << __func__
<< " " << _get_vino(in
) << " = " << res
<< dendl
;
11968 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
11969 const UserPerm
& perms
)
11971 struct ceph_statx stx
;
11972 stat_to_statx(attr
, &stx
);
11974 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11975 if (!mref_reader
.is_state_satisfied())
11976 return -CEPHFS_ENOTCONN
;
11978 std::scoped_lock
lock(client_lock
);
11980 InodeRef
target(in
);
11981 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
11983 ceph_assert(in
== target
.get());
11984 fill_stat(in
, attr
);
11987 ldout(cct
, 3) << __func__
<< " " << _get_vino(in
) << " = " << res
<< dendl
;
11995 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
11996 const UserPerm
& perms
)
11998 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
11999 if (!mref_reader
.is_state_satisfied())
12000 return -CEPHFS_ENOTCONN
;
12002 std::scoped_lock
lock(client_lock
);
12005 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
12008 return _getxattr(in
, name
, value
, size
, perms
);
12011 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
12012 const UserPerm
& perms
)
12014 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12015 if (!mref_reader
.is_state_satisfied())
12016 return -CEPHFS_ENOTCONN
;
12018 std::scoped_lock
lock(client_lock
);
12021 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
12024 return _getxattr(in
, name
, value
, size
, perms
);
12027 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
12028 const UserPerm
& perms
)
12030 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12031 if (!mref_reader
.is_state_satisfied())
12032 return -CEPHFS_ENOTCONN
;
12034 std::scoped_lock
lock(client_lock
);
12036 Fh
*f
= get_filehandle(fd
);
12038 return -CEPHFS_EBADF
;
12039 return _getxattr(f
->inode
, name
, value
, size
, perms
);
12042 int Client::listxattr(const char *path
, char *list
, size_t size
,
12043 const UserPerm
& perms
)
12045 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12046 if (!mref_reader
.is_state_satisfied())
12047 return -CEPHFS_ENOTCONN
;
12049 std::scoped_lock
lock(client_lock
);
12052 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
12055 return Client::_listxattr(in
.get(), list
, size
, perms
);
12058 int Client::llistxattr(const char *path
, char *list
, size_t size
,
12059 const UserPerm
& perms
)
12061 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12062 if (!mref_reader
.is_state_satisfied())
12063 return -CEPHFS_ENOTCONN
;
12065 std::scoped_lock
lock(client_lock
);
12068 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
12071 return Client::_listxattr(in
.get(), list
, size
, perms
);
12074 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
12076 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12077 if (!mref_reader
.is_state_satisfied())
12078 return -CEPHFS_ENOTCONN
;
12080 std::scoped_lock
lock(client_lock
);
12082 Fh
*f
= get_filehandle(fd
);
12084 return -CEPHFS_EBADF
;
12085 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
12088 int Client::removexattr(const char *path
, const char *name
,
12089 const UserPerm
& perms
)
12091 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12092 if (!mref_reader
.is_state_satisfied())
12093 return -CEPHFS_ENOTCONN
;
12095 std::scoped_lock
lock(client_lock
);
12098 int r
= Client::path_walk(path
, &in
, perms
, true);
12101 return _removexattr(in
, name
, perms
);
12104 int Client::lremovexattr(const char *path
, const char *name
,
12105 const UserPerm
& perms
)
12107 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12108 if (!mref_reader
.is_state_satisfied())
12109 return -CEPHFS_ENOTCONN
;
12111 std::scoped_lock
lock(client_lock
);
12114 int r
= Client::path_walk(path
, &in
, perms
, false);
12117 return _removexattr(in
, name
, perms
);
12120 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
12122 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12123 if (!mref_reader
.is_state_satisfied())
12124 return -CEPHFS_ENOTCONN
;
12126 std::scoped_lock
lock(client_lock
);
12128 Fh
*f
= get_filehandle(fd
);
12130 return -CEPHFS_EBADF
;
12131 return _removexattr(f
->inode
, name
, perms
);
12134 int Client::setxattr(const char *path
, const char *name
, const void *value
,
12135 size_t size
, int flags
, const UserPerm
& perms
)
12137 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12138 if (!mref_reader
.is_state_satisfied())
12139 return -CEPHFS_ENOTCONN
;
12141 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
12143 std::scoped_lock
lock(client_lock
);
12146 int r
= Client::path_walk(path
, &in
, perms
, true);
12149 return _setxattr(in
, name
, value
, size
, flags
, perms
);
12152 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
12153 size_t size
, int flags
, const UserPerm
& perms
)
12155 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12156 if (!mref_reader
.is_state_satisfied())
12157 return -CEPHFS_ENOTCONN
;
12159 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
12161 std::scoped_lock
lock(client_lock
);
12164 int r
= Client::path_walk(path
, &in
, perms
, false);
12167 return _setxattr(in
, name
, value
, size
, flags
, perms
);
12170 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
12171 int flags
, const UserPerm
& perms
)
12173 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12174 if (!mref_reader
.is_state_satisfied())
12175 return -CEPHFS_ENOTCONN
;
12177 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
12179 std::scoped_lock
lock(client_lock
);
12181 Fh
*f
= get_filehandle(fd
);
12183 return -CEPHFS_EBADF
;
12184 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
12187 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
12188 const UserPerm
& perms
)
12192 const VXattr
*vxattr
= _match_vxattr(in
, name
);
12194 r
= -CEPHFS_ENODATA
;
12196 // Do a force getattr to get the latest quota before returning
12197 // a value to userspace.
12199 if (vxattr
->flags
& VXATTR_RSTAT
) {
12200 flags
|= CEPH_STAT_RSTAT
;
12202 if (vxattr
->flags
& VXATTR_DIRSTAT
) {
12203 flags
|= CEPH_CAP_FILE_SHARED
;
12205 r
= _getattr(in
, flags
| CEPH_STAT_CAP_XATTR
, perms
, true);
12207 // Error from getattr!
12211 // call pointer-to-member function
12213 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
12214 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
12216 r
= -CEPHFS_ENODATA
;
12220 if (r
> (int)size
) {
12221 r
= -CEPHFS_ERANGE
;
12222 } else if (r
> 0) {
12223 memcpy(value
, buf
, r
);
12229 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
12230 r
= -CEPHFS_EOPNOTSUPP
;
12234 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
12237 r
= -CEPHFS_ENODATA
;
12238 if (in
->xattrs
.count(n
)) {
12239 r
= in
->xattrs
[n
].length();
12240 if (r
> 0 && size
!= 0) {
12241 if (size
>= (unsigned)r
)
12242 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
12244 r
= -CEPHFS_ERANGE
;
12249 ldout(cct
, 8) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
12253 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
12254 const UserPerm
& perms
)
12256 if (cct
->_conf
->client_permissions
) {
12257 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
12261 return _getxattr(in
.get(), name
, value
, size
, perms
);
12264 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
12265 size_t size
, const UserPerm
& perms
)
12267 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12268 if (!mref_reader
.is_state_satisfied())
12269 return -CEPHFS_ENOTCONN
;
12271 vinodeno_t vino
= _get_vino(in
);
12273 ldout(cct
, 3) << __func__
<< " " << vino
<< " " << name
<< " size " << size
<< dendl
;
12274 tout(cct
) << __func__
<< std::endl
;
12275 tout(cct
) << vino
.ino
.val
<< std::endl
;
12276 tout(cct
) << name
<< std::endl
;
12278 std::scoped_lock
lock(client_lock
);
12279 if (!fuse_default_permissions
) {
12280 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
12285 return _getxattr(in
, name
, value
, size
, perms
);
12288 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
12289 const UserPerm
& perms
)
12291 bool len_only
= (size
== 0);
12292 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
12298 for ([[maybe_unused
]] const auto &[xattr_name
, xattr_value_bl
] : in
->xattrs
) {
12299 if (xattr_name
.rfind("ceph.", 0) == 0) {
12303 size_t this_len
= xattr_name
.length() + 1;
12308 if (this_len
> size
) {
12309 r
= -CEPHFS_ERANGE
;
12313 memcpy(name
, xattr_name
.c_str(), this_len
);
12318 ldout(cct
, 8) << __func__
<< "(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
12322 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
12323 const UserPerm
& perms
)
12325 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12326 if (!mref_reader
.is_state_satisfied())
12327 return -CEPHFS_ENOTCONN
;
12329 vinodeno_t vino
= _get_vino(in
);
12331 ldout(cct
, 3) << __func__
<< " " << vino
<< " size " << size
<< dendl
;
12332 tout(cct
) << __func__
<< std::endl
;
12333 tout(cct
) << vino
.ino
.val
<< std::endl
;
12334 tout(cct
) << size
<< std::endl
;
12336 std::scoped_lock
lock(client_lock
);
12337 return _listxattr(in
, names
, size
, perms
);
12340 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
12341 size_t size
, int flags
, const UserPerm
& perms
)
12344 int xattr_flags
= 0;
12346 xattr_flags
|= CEPH_XATTR_REMOVE
;
12347 if (flags
& XATTR_CREATE
)
12348 xattr_flags
|= CEPH_XATTR_CREATE
;
12349 if (flags
& XATTR_REPLACE
)
12350 xattr_flags
|= CEPH_XATTR_REPLACE
;
12352 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
12354 in
->make_nosnap_relative_path(path
);
12355 req
->set_filepath(path
);
12356 req
->set_string2(name
);
12357 req
->set_inode(in
);
12358 req
->head
.args
.setxattr
.flags
= xattr_flags
;
12361 assert (value
|| size
== 0);
12362 bl
.append((const char*)value
, size
);
12365 int res
= make_request(req
, perms
);
12368 ldout(cct
, 3) << __func__
<< "(" << in
->ino
<< ", \"" << name
<< "\") = " <<
12373 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
12374 size_t size
, int flags
, const UserPerm
& perms
)
12376 if (in
->snapid
!= CEPH_NOSNAP
) {
12377 return -CEPHFS_EROFS
;
12382 } else if (value
== NULL
) {
12383 return -CEPHFS_EINVAL
;
12386 bool posix_acl_xattr
= false;
12387 if (acl_type
== POSIX_ACL
)
12388 posix_acl_xattr
= !strncmp(name
, "system.", 7);
12390 if (strncmp(name
, "user.", 5) &&
12391 strncmp(name
, "security.", 9) &&
12392 strncmp(name
, "trusted.", 8) &&
12393 strncmp(name
, "ceph.", 5) &&
12395 return -CEPHFS_EOPNOTSUPP
;
12397 bool check_realm
= false;
12399 if (posix_acl_xattr
) {
12400 if (!strcmp(name
, ACL_EA_ACCESS
)) {
12401 mode_t new_mode
= in
->mode
;
12403 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
12410 if (new_mode
!= in
->mode
) {
12411 struct ceph_statx stx
;
12412 stx
.stx_mode
= new_mode
;
12413 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
12418 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
12420 if (!S_ISDIR(in
->mode
))
12421 return -CEPHFS_EACCES
;
12422 int ret
= posix_acl_check(value
, size
);
12424 return -CEPHFS_EINVAL
;
12431 return -CEPHFS_EOPNOTSUPP
;
12434 const VXattr
*vxattr
= _match_vxattr(in
, name
);
12436 if (vxattr
->readonly
)
12437 return -CEPHFS_EOPNOTSUPP
;
12438 if (vxattr
->name
.compare(0, 10, "ceph.quota") == 0 && value
)
12439 check_realm
= true;
12443 int ret
= _do_setxattr(in
, name
, value
, size
, flags
, perms
);
12444 if (ret
>= 0 && check_realm
) {
12445 // check if snaprealm was created for quota inode
12446 if (in
->quota
.is_enable() &&
12447 !(in
->snaprealm
&& in
->snaprealm
->ino
== in
->ino
))
12448 ret
= -CEPHFS_EOPNOTSUPP
;
12454 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
12455 size_t size
, int flags
, const UserPerm
& perms
)
12457 if (cct
->_conf
->client_permissions
) {
12458 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
12462 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
12465 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
12468 if (name
== "layout") {
12469 string::iterator begin
= value
.begin();
12470 string::iterator end
= value
.end();
12471 keys_and_values
<string::iterator
> p
; // create instance of parser
12472 std::map
<string
, string
> m
; // map to receive results
12473 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
12474 return -CEPHFS_EINVAL
;
12477 return -CEPHFS_EINVAL
;
12478 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
12479 if (q
->first
== "pool") {
12484 } else if (name
== "layout.pool") {
12488 if (tmp
.length()) {
12491 pool
= boost::lexical_cast
<unsigned>(tmp
);
12492 if (!osdmap
->have_pg_pool(pool
))
12493 return -CEPHFS_ENOENT
;
12494 } catch (boost::bad_lexical_cast
const&) {
12495 pool
= osdmap
->lookup_pg_pool_name(tmp
);
12497 return -CEPHFS_ENOENT
;
12505 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
12507 // For setting pool of layout, MetaRequest need osdmap epoch.
12508 // There is a race which create a new data pool but client and mds both don't have.
12509 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
12510 ldout(cct
, 15) << __func__
<< ": name = " << name
<< dendl
;
12511 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
12512 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
12513 string
rest(strstr(name
, "layout"));
12514 string
v((const char*)value
, size
);
12515 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
12516 return _setxattr_check_data_pool(rest
, v
, &o
);
12519 if (r
== -CEPHFS_ENOENT
) {
12521 ldout(cct
, 20) << __func__
<< ": waiting for latest osdmap" << dendl
;
12522 objecter
->wait_for_latest_osdmap(ca::use_blocked
[ec
]);
12523 ldout(cct
, 20) << __func__
<< ": got latest osdmap: " << ec
<< dendl
;
12528 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
12529 size_t size
, int flags
, const UserPerm
& perms
)
12531 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12532 if (!mref_reader
.is_state_satisfied())
12533 return -CEPHFS_ENOTCONN
;
12535 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
12537 vinodeno_t vino
= _get_vino(in
);
12539 ldout(cct
, 3) << __func__
<< " " << vino
<< " " << name
<< " size " << size
<< dendl
;
12540 tout(cct
) << __func__
<< std::endl
;
12541 tout(cct
) << vino
.ino
.val
<< std::endl
;
12542 tout(cct
) << name
<< std::endl
;
12544 std::scoped_lock
lock(client_lock
);
12545 if (!fuse_default_permissions
) {
12546 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
12550 return _setxattr(in
, name
, value
, size
, flags
, perms
);
12553 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
12555 if (in
->snapid
!= CEPH_NOSNAP
) {
12556 return -CEPHFS_EROFS
;
12559 // same xattrs supported by kernel client
12560 if (strncmp(name
, "user.", 5) &&
12561 strncmp(name
, "system.", 7) &&
12562 strncmp(name
, "security.", 9) &&
12563 strncmp(name
, "trusted.", 8) &&
12564 strncmp(name
, "ceph.", 5))
12565 return -CEPHFS_EOPNOTSUPP
;
12567 const VXattr
*vxattr
= _match_vxattr(in
, name
);
12568 if (vxattr
&& vxattr
->readonly
)
12569 return -CEPHFS_EOPNOTSUPP
;
12571 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
12573 in
->make_nosnap_relative_path(path
);
12574 req
->set_filepath(path
);
12575 req
->set_filepath2(name
);
12576 req
->set_inode(in
);
12578 int res
= make_request(req
, perms
);
12581 ldout(cct
, 8) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
12585 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
12587 if (cct
->_conf
->client_permissions
) {
12588 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
12592 return _removexattr(in
.get(), name
, perms
);
12595 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
12597 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12598 if (!mref_reader
.is_state_satisfied())
12599 return -CEPHFS_ENOTCONN
;
12601 vinodeno_t vino
= _get_vino(in
);
12603 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
12604 tout(cct
) << "ll_removexattr" << std::endl
;
12605 tout(cct
) << vino
.ino
.val
<< std::endl
;
12606 tout(cct
) << name
<< std::endl
;
12608 std::scoped_lock
lock(client_lock
);
12609 if (!fuse_default_permissions
) {
12610 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
12615 return _removexattr(in
, name
, perms
);
12618 bool Client::_vxattrcb_quota_exists(Inode
*in
)
12620 return in
->quota
.is_enable() &&
12621 (in
->snapid
!= CEPH_NOSNAP
||
12622 (in
->snaprealm
&& in
->snaprealm
->ino
== in
->ino
));
12624 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
12626 return snprintf(val
, size
,
12627 "max_bytes=%lld max_files=%lld",
12628 (long long int)in
->quota
.max_bytes
,
12629 (long long int)in
->quota
.max_files
);
12631 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
12633 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
12635 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
12637 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
12640 bool Client::_vxattrcb_layout_exists(Inode
*in
)
12642 return in
->layout
!= file_layout_t();
12644 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
12646 int r
= snprintf(val
, size
,
12647 "stripe_unit=%llu stripe_count=%llu object_size=%llu pool=",
12648 (unsigned long long)in
->layout
.stripe_unit
,
12649 (unsigned long long)in
->layout
.stripe_count
,
12650 (unsigned long long)in
->layout
.object_size
);
12651 objecter
->with_osdmap([&](const OSDMap
& o
) {
12652 if (o
.have_pg_pool(in
->layout
.pool_id
))
12653 r
+= snprintf(val
+ r
, size
- r
, "%s",
12654 o
.get_pool_name(in
->layout
.pool_id
).c_str());
12656 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
12657 (uint64_t)in
->layout
.pool_id
);
12659 if (in
->layout
.pool_ns
.length())
12660 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
12661 in
->layout
.pool_ns
.c_str());
12664 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
12666 return snprintf(val
, size
, "%llu", (unsigned long long)in
->layout
.stripe_unit
);
12668 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
12670 return snprintf(val
, size
, "%llu", (unsigned long long)in
->layout
.stripe_count
);
12672 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
12674 return snprintf(val
, size
, "%llu", (unsigned long long)in
->layout
.object_size
);
12676 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
12679 objecter
->with_osdmap([&](const OSDMap
& o
) {
12680 if (o
.have_pg_pool(in
->layout
.pool_id
))
12681 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
12682 in
->layout
.pool_id
).c_str());
12684 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
12688 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
12690 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
12692 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
12694 return snprintf(val
, size
, "%llu", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
12696 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
12698 return snprintf(val
, size
, "%llu", (unsigned long long)in
->dirstat
.nfiles
);
12700 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
12702 return snprintf(val
, size
, "%llu", (unsigned long long)in
->dirstat
.nsubdirs
);
12704 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
12706 return snprintf(val
, size
, "%llu", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
12708 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
12710 return snprintf(val
, size
, "%llu", (unsigned long long)in
->rstat
.rfiles
);
12712 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
12714 return snprintf(val
, size
, "%llu", (unsigned long long)in
->rstat
.rsubdirs
);
12716 size_t Client::_vxattrcb_dir_rsnaps(Inode
*in
, char *val
, size_t size
)
12718 return snprintf(val
, size
, "%llu", (unsigned long long)in
->rstat
.rsnaps
);
12720 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
12722 return snprintf(val
, size
, "%llu", (unsigned long long)in
->rstat
.rbytes
);
12724 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
12726 return snprintf(val
, size
, "%ld.%09ld", (long)in
->rstat
.rctime
.sec(),
12727 (long)in
->rstat
.rctime
.nsec());
12729 bool Client::_vxattrcb_dir_pin_exists(Inode
*in
)
12731 return in
->dir_pin
!= -CEPHFS_ENODATA
;
12733 size_t Client::_vxattrcb_dir_pin(Inode
*in
, char *val
, size_t size
)
12735 return snprintf(val
, size
, "%ld", (long)in
->dir_pin
);
12738 bool Client::_vxattrcb_snap_btime_exists(Inode
*in
)
12740 return !in
->snap_btime
.is_zero();
12743 size_t Client::_vxattrcb_snap_btime(Inode
*in
, char *val
, size_t size
)
12745 return snprintf(val
, size
, "%llu.%09lu",
12746 (long long unsigned)in
->snap_btime
.sec(),
12747 (long unsigned)in
->snap_btime
.nsec());
12750 bool Client::_vxattrcb_mirror_info_exists(Inode
*in
)
12752 // checking one of the xattrs would suffice
12753 return in
->xattrs
.count("ceph.mirror.info.cluster_id") != 0;
12756 size_t Client::_vxattrcb_mirror_info(Inode
*in
, char *val
, size_t size
)
12758 return snprintf(val
, size
, "cluster_id=%.*s fs_id=%.*s",
12759 in
->xattrs
["ceph.mirror.info.cluster_id"].length(),
12760 in
->xattrs
["ceph.mirror.info.cluster_id"].c_str(),
12761 in
->xattrs
["ceph.mirror.info.fs_id"].length(),
12762 in
->xattrs
["ceph.mirror.info.fs_id"].c_str());
12765 size_t Client::_vxattrcb_cluster_fsid(Inode
*in
, char *val
, size_t size
)
12767 return snprintf(val
, size
, "%s", monclient
->get_fsid().to_string().c_str());
12770 size_t Client::_vxattrcb_client_id(Inode
*in
, char *val
, size_t size
)
12772 auto name
= messenger
->get_myname();
12773 return snprintf(val
, size
, "%s%ld", name
.type_str(), name
.num());
12776 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
12777 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
12779 #define XATTR_NAME_CEPH(_type, _name, _flags) \
12781 name: CEPH_XATTR_NAME(_type, _name), \
12782 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
12787 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
12789 name: CEPH_XATTR_NAME2(_type, _name, _field), \
12790 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
12792 exists_cb: &Client::_vxattrcb_layout_exists, \
12795 #define XATTR_QUOTA_FIELD(_type, _name) \
12797 name: CEPH_XATTR_NAME(_type, _name), \
12798 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
12800 exists_cb: &Client::_vxattrcb_quota_exists, \
12804 const Client::VXattr
Client::_dir_vxattrs
[] = {
12806 name
: "ceph.dir.layout",
12807 getxattr_cb
: &Client::_vxattrcb_layout
,
12809 exists_cb
: &Client::_vxattrcb_layout_exists
,
12812 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
12813 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
12814 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
12815 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
12816 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
12817 XATTR_NAME_CEPH(dir
, entries
, VXATTR_DIRSTAT
),
12818 XATTR_NAME_CEPH(dir
, files
, VXATTR_DIRSTAT
),
12819 XATTR_NAME_CEPH(dir
, subdirs
, VXATTR_DIRSTAT
),
12820 XATTR_NAME_CEPH(dir
, rentries
, VXATTR_RSTAT
),
12821 XATTR_NAME_CEPH(dir
, rfiles
, VXATTR_RSTAT
),
12822 XATTR_NAME_CEPH(dir
, rsubdirs
, VXATTR_RSTAT
),
12823 XATTR_NAME_CEPH(dir
, rsnaps
, VXATTR_RSTAT
),
12824 XATTR_NAME_CEPH(dir
, rbytes
, VXATTR_RSTAT
),
12825 XATTR_NAME_CEPH(dir
, rctime
, VXATTR_RSTAT
),
12827 name
: "ceph.quota",
12828 getxattr_cb
: &Client::_vxattrcb_quota
,
12830 exists_cb
: &Client::_vxattrcb_quota_exists
,
12833 XATTR_QUOTA_FIELD(quota
, max_bytes
),
12834 XATTR_QUOTA_FIELD(quota
, max_files
),
12836 name
: "ceph.dir.pin",
12837 getxattr_cb
: &Client::_vxattrcb_dir_pin
,
12839 exists_cb
: &Client::_vxattrcb_dir_pin_exists
,
12843 name
: "ceph.snap.btime",
12844 getxattr_cb
: &Client::_vxattrcb_snap_btime
,
12846 exists_cb
: &Client::_vxattrcb_snap_btime_exists
,
12850 name
: "ceph.mirror.info",
12851 getxattr_cb
: &Client::_vxattrcb_mirror_info
,
12853 exists_cb
: &Client::_vxattrcb_mirror_info_exists
,
12856 { name
: "" } /* Required table terminator */
12859 const Client::VXattr
Client::_file_vxattrs
[] = {
12861 name
: "ceph.file.layout",
12862 getxattr_cb
: &Client::_vxattrcb_layout
,
12864 exists_cb
: &Client::_vxattrcb_layout_exists
,
12867 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
12868 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
12869 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
12870 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
12871 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
12873 name
: "ceph.snap.btime",
12874 getxattr_cb
: &Client::_vxattrcb_snap_btime
,
12876 exists_cb
: &Client::_vxattrcb_snap_btime_exists
,
12879 { name
: "" } /* Required table terminator */
12882 const Client::VXattr
Client::_common_vxattrs
[] = {
12884 name
: "ceph.cluster_fsid",
12885 getxattr_cb
: &Client::_vxattrcb_cluster_fsid
,
12887 exists_cb
: nullptr,
12891 name
: "ceph.client_id",
12892 getxattr_cb
: &Client::_vxattrcb_client_id
,
12894 exists_cb
: nullptr,
12897 { name
: "" } /* Required table terminator */
12900 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
12903 return _dir_vxattrs
;
12904 else if (in
->is_file())
12905 return _file_vxattrs
;
12909 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
12911 if (strncmp(name
, "ceph.", 5) == 0) {
12912 const VXattr
*vxattr
= _get_vxattrs(in
);
12914 while (!vxattr
->name
.empty()) {
12915 if (vxattr
->name
== name
)
12921 // for common vxattrs
12922 vxattr
= _common_vxattrs
;
12923 while (!vxattr
->name
.empty()) {
12924 if (vxattr
->name
== name
)
12933 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
12935 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
12936 if (!mref_reader
.is_state_satisfied())
12937 return -CEPHFS_ENOTCONN
;
12939 vinodeno_t vino
= _get_vino(in
);
12941 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
12942 tout(cct
) << "ll_readlink" << std::endl
;
12943 tout(cct
) << vino
.ino
.val
<< std::endl
;
12945 std::scoped_lock
lock(client_lock
);
12946 for (auto dn
: in
->dentries
) {
12950 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
12951 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
12955 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
12956 const UserPerm
& perms
, InodeRef
*inp
)
12958 ldout(cct
, 8) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
12959 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
12960 << ", gid " << perms
.gid() << ")" << dendl
;
12962 if (strlen(name
) > NAME_MAX
)
12963 return -CEPHFS_ENAMETOOLONG
;
12965 if (dir
->snapid
!= CEPH_NOSNAP
) {
12966 return -CEPHFS_EROFS
;
12968 if (is_quota_files_exceeded(dir
, perms
)) {
12969 return -CEPHFS_EDQUOT
;
12972 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
12975 dir
->make_nosnap_relative_path(path
);
12976 path
.push_dentry(name
);
12977 req
->set_filepath(path
);
12978 req
->set_inode(dir
);
12979 req
->head
.args
.mknod
.rdev
= rdev
;
12980 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
12981 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
12983 bufferlist xattrs_bl
;
12984 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
12987 req
->head
.args
.mknod
.mode
= mode
;
12988 if (xattrs_bl
.length() > 0)
12989 req
->set_data(xattrs_bl
);
12992 res
= get_or_create(dir
, name
, &de
);
12995 req
->set_dentry(de
);
12997 res
= make_request(req
, perms
, inp
);
13001 ldout(cct
, 8) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
13009 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
13010 dev_t rdev
, struct stat
*attr
, Inode
**out
,
13011 const UserPerm
& perms
)
13013 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13014 if (!mref_reader
.is_state_satisfied())
13015 return -CEPHFS_ENOTCONN
;
13017 vinodeno_t vparent
= _get_vino(parent
);
13019 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
13020 tout(cct
) << "ll_mknod" << std::endl
;
13021 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13022 tout(cct
) << name
<< std::endl
;
13023 tout(cct
) << mode
<< std::endl
;
13024 tout(cct
) << rdev
<< std::endl
;
13026 std::scoped_lock
lock(client_lock
);
13027 if (!fuse_default_permissions
) {
13028 int r
= may_create(parent
, perms
);
13034 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
13036 fill_stat(in
, attr
);
13039 tout(cct
) << attr
->st_ino
<< std::endl
;
13040 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
13041 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
13046 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
13047 dev_t rdev
, Inode
**out
,
13048 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
13049 const UserPerm
& perms
)
13051 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13052 if (!mref_reader
.is_state_satisfied())
13053 return -CEPHFS_ENOTCONN
;
13055 unsigned caps
= statx_to_mask(flags
, want
);
13057 vinodeno_t vparent
= _get_vino(parent
);
13059 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
13060 tout(cct
) << "ll_mknodx" << std::endl
;
13061 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13062 tout(cct
) << name
<< std::endl
;
13063 tout(cct
) << mode
<< std::endl
;
13064 tout(cct
) << rdev
<< std::endl
;
13066 std::scoped_lock
lock(client_lock
);
13068 if (!fuse_default_permissions
) {
13069 int r
= may_create(parent
, perms
);
13075 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
13077 fill_statx(in
, caps
, stx
);
13080 tout(cct
) << stx
->stx_ino
<< std::endl
;
13081 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
13082 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
13087 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
13088 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
13089 int object_size
, const char *data_pool
, bool *created
,
13090 const UserPerm
& perms
, std::string alternate_name
)
13092 ldout(cct
, 8) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
13093 mode
<< dec
<< ")" << dendl
;
13095 if (strlen(name
) > NAME_MAX
)
13096 return -CEPHFS_ENAMETOOLONG
;
13097 if (dir
->snapid
!= CEPH_NOSNAP
) {
13098 return -CEPHFS_EROFS
;
13100 if (is_quota_files_exceeded(dir
, perms
)) {
13101 return -CEPHFS_EDQUOT
;
13104 // use normalized flags to generate cmode
13105 int cflags
= ceph_flags_sys2wire(flags
);
13106 if (cct
->_conf
.get_val
<bool>("client_force_lazyio"))
13107 cflags
|= CEPH_O_LAZY
;
13109 int cmode
= ceph_flags_to_mode(cflags
);
13111 int64_t pool_id
= -1;
13112 if (data_pool
&& *data_pool
) {
13113 pool_id
= objecter
->with_osdmap(
13114 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
13116 return -CEPHFS_EINVAL
;
13117 if (pool_id
> 0xffffffffll
)
13118 return -CEPHFS_ERANGE
; // bummer!
13121 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
13124 dir
->make_nosnap_relative_path(path
);
13125 path
.push_dentry(name
);
13126 req
->set_filepath(path
);
13127 req
->set_alternate_name(std::move(alternate_name
));
13128 req
->set_inode(dir
);
13129 req
->head
.args
.open
.flags
= cflags
| CEPH_O_CREAT
;
13131 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
13132 req
->head
.args
.open
.stripe_count
= stripe_count
;
13133 req
->head
.args
.open
.object_size
= object_size
;
13134 if (cct
->_conf
->client_debug_getattr_caps
)
13135 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
13137 req
->head
.args
.open
.mask
= 0;
13138 req
->head
.args
.open
.pool
= pool_id
;
13139 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13140 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13143 bufferlist xattrs_bl
;
13144 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
13147 req
->head
.args
.open
.mode
= mode
;
13148 if (xattrs_bl
.length() > 0)
13149 req
->set_data(xattrs_bl
);
13152 res
= get_or_create(dir
, name
, &de
);
13155 req
->set_dentry(de
);
13157 res
= make_request(req
, perms
, inp
, created
);
13162 /* If the caller passed a value in fhp, do the open */
13164 (*inp
)->get_open_ref(cmode
);
13165 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
13171 ldout(cct
, 8) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
13172 << " layout " << stripe_unit
13173 << ' ' << stripe_count
13174 << ' ' << object_size
13175 <<") = " << res
<< dendl
;
13183 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
13184 InodeRef
*inp
, const std::map
<std::string
, std::string
> &metadata
,
13185 std::string alternate_name
)
13187 ldout(cct
, 8) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
13188 << mode
<< dec
<< ", uid " << perm
.uid()
13189 << ", gid " << perm
.gid() << ")" << dendl
;
13191 if (strlen(name
) > NAME_MAX
)
13192 return -CEPHFS_ENAMETOOLONG
;
13194 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
13195 return -CEPHFS_EROFS
;
13197 if (is_quota_files_exceeded(dir
, perm
)) {
13198 return -CEPHFS_EDQUOT
;
13201 bool is_snap_op
= dir
->snapid
== CEPH_SNAPDIR
;
13202 MetaRequest
*req
= new MetaRequest(is_snap_op
?
13203 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
13206 dir
->make_nosnap_relative_path(path
);
13207 path
.push_dentry(name
);
13208 req
->set_filepath(path
);
13209 req
->set_inode(dir
);
13210 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13211 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13212 req
->set_alternate_name(std::move(alternate_name
));
13216 int res
= _posix_acl_create(dir
, &mode
, bl
, perm
);
13219 req
->head
.args
.mkdir
.mode
= mode
;
13221 SnapPayload payload
;
13222 // clear the bufferlist that may have been populated by the call
13223 // to _posix_acl_create(). MDS mksnap does not make use of it.
13224 // So, reuse it to pass metadata payload.
13226 payload
.metadata
= metadata
;
13227 encode(payload
, bl
);
13229 if (bl
.length() > 0) {
13234 res
= get_or_create(dir
, name
, &de
);
13237 req
->set_dentry(de
);
13239 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
13240 res
= make_request(req
, perm
, inp
);
13241 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
13245 ldout(cct
, 8) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
13253 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
13254 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
13256 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13257 if (!mref_reader
.is_state_satisfied())
13258 return -CEPHFS_ENOTCONN
;
13260 vinodeno_t vparent
= _get_vino(parent
);
13262 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
13263 tout(cct
) << "ll_mkdir" << std::endl
;
13264 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13265 tout(cct
) << name
<< std::endl
;
13266 tout(cct
) << mode
<< std::endl
;
13268 std::scoped_lock
lock(client_lock
);
13270 if (!fuse_default_permissions
) {
13271 int r
= may_create(parent
, perm
);
13277 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
13279 fill_stat(in
, attr
);
13282 tout(cct
) << attr
->st_ino
<< std::endl
;
13283 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
13284 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
13289 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
13290 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
13291 const UserPerm
& perms
)
13293 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13294 if (!mref_reader
.is_state_satisfied())
13295 return -CEPHFS_ENOTCONN
;
13297 vinodeno_t vparent
= _get_vino(parent
);
13299 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
13300 tout(cct
) << "ll_mkdirx" << std::endl
;
13301 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13302 tout(cct
) << name
<< std::endl
;
13303 tout(cct
) << mode
<< std::endl
;
13305 std::scoped_lock
lock(client_lock
);
13307 if (!fuse_default_permissions
) {
13308 int r
= may_create(parent
, perms
);
13314 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
13316 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
13322 tout(cct
) << stx
->stx_ino
<< std::endl
;
13323 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
13324 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
13329 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
13330 const UserPerm
& perms
, std::string alternate_name
, InodeRef
*inp
)
13332 ldout(cct
, 8) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
13333 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
13336 if (strlen(name
) > NAME_MAX
)
13337 return -CEPHFS_ENAMETOOLONG
;
13339 if (dir
->snapid
!= CEPH_NOSNAP
) {
13340 return -CEPHFS_EROFS
;
13342 if (is_quota_files_exceeded(dir
, perms
)) {
13343 return -CEPHFS_EDQUOT
;
13346 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
13349 dir
->make_nosnap_relative_path(path
);
13350 path
.push_dentry(name
);
13351 req
->set_filepath(path
);
13352 req
->set_alternate_name(std::move(alternate_name
));
13353 req
->set_inode(dir
);
13354 req
->set_string2(target
);
13355 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13356 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13359 int res
= get_or_create(dir
, name
, &de
);
13362 req
->set_dentry(de
);
13364 res
= make_request(req
, perms
, inp
);
13367 ldout(cct
, 8) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
13376 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
13377 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
13379 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13380 if (!mref_reader
.is_state_satisfied())
13381 return -CEPHFS_ENOTCONN
;
13383 vinodeno_t vparent
= _get_vino(parent
);
13385 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
13387 tout(cct
) << "ll_symlink" << std::endl
;
13388 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13389 tout(cct
) << name
<< std::endl
;
13390 tout(cct
) << value
<< std::endl
;
13392 std::scoped_lock
lock(client_lock
);
13394 if (!fuse_default_permissions
) {
13395 int r
= may_create(parent
, perms
);
13401 int r
= _symlink(parent
, name
, value
, perms
, "", &in
);
13403 fill_stat(in
, attr
);
13406 tout(cct
) << attr
->st_ino
<< std::endl
;
13407 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
13408 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
13413 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
13414 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
13415 unsigned flags
, const UserPerm
& perms
)
13417 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13418 if (!mref_reader
.is_state_satisfied())
13419 return -CEPHFS_ENOTCONN
;
13421 vinodeno_t vparent
= _get_vino(parent
);
13423 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
13425 tout(cct
) << "ll_symlinkx" << std::endl
;
13426 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13427 tout(cct
) << name
<< std::endl
;
13428 tout(cct
) << value
<< std::endl
;
13430 std::scoped_lock
lock(client_lock
);
13432 if (!fuse_default_permissions
) {
13433 int r
= may_create(parent
, perms
);
13439 int r
= _symlink(parent
, name
, value
, perms
, "", &in
);
13441 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
13444 tout(cct
) << stx
->stx_ino
<< std::endl
;
13445 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
13446 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
13451 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
13453 ldout(cct
, 8) << "_unlink(" << dir
->ino
<< " " << name
13454 << " uid " << perm
.uid() << " gid " << perm
.gid()
13457 if (dir
->snapid
!= CEPH_NOSNAP
) {
13458 return -CEPHFS_EROFS
;
13461 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
13464 dir
->make_nosnap_relative_path(path
);
13465 path
.push_dentry(name
);
13466 req
->set_filepath(path
);
13472 int res
= get_or_create(dir
, name
, &de
);
13475 req
->set_dentry(de
);
13476 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13477 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13479 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
13483 in
= otherin
.get();
13484 req
->set_other_inode(in
);
13485 in
->break_all_delegs();
13486 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
13488 req
->set_inode(dir
);
13490 res
= make_request(req
, perm
);
13493 ldout(cct
, 8) << "unlink(" << path
<< ") = " << res
<< dendl
;
13501 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
13503 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13504 if (!mref_reader
.is_state_satisfied())
13505 return -CEPHFS_ENOTCONN
;
13507 vinodeno_t vino
= _get_vino(in
);
13509 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
13510 tout(cct
) << "ll_unlink" << std::endl
;
13511 tout(cct
) << vino
.ino
.val
<< std::endl
;
13512 tout(cct
) << name
<< std::endl
;
13514 std::scoped_lock
lock(client_lock
);
13516 if (!fuse_default_permissions
) {
13517 int r
= may_delete(in
, name
, perm
);
13521 return _unlink(in
, name
, perm
);
13524 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
13526 ldout(cct
, 8) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
13527 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
13529 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
13530 return -CEPHFS_EROFS
;
13533 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
: CEPH_MDS_OP_RMDIR
;
13534 MetaRequest
*req
= new MetaRequest(op
);
13536 dir
->make_nosnap_relative_path(path
);
13537 path
.push_dentry(name
);
13538 req
->set_filepath(path
);
13539 req
->set_inode(dir
);
13541 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13542 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13543 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
13548 int res
= get_or_create(dir
, name
, &de
);
13551 if (op
== CEPH_MDS_OP_RMDIR
)
13552 req
->set_dentry(de
);
13556 res
= _lookup(dir
, name
, 0, &in
, perms
);
13560 if (op
== CEPH_MDS_OP_RMSNAP
) {
13561 unlink(de
, true, true);
13564 req
->set_other_inode(in
.get());
13566 res
= make_request(req
, perms
);
13569 ldout(cct
, 8) << "rmdir(" << path
<< ") = " << res
<< dendl
;
13577 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
13579 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13580 if (!mref_reader
.is_state_satisfied())
13581 return -CEPHFS_ENOTCONN
;
13583 vinodeno_t vino
= _get_vino(in
);
13585 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
13586 tout(cct
) << "ll_rmdir" << std::endl
;
13587 tout(cct
) << vino
.ino
.val
<< std::endl
;
13588 tout(cct
) << name
<< std::endl
;
13590 std::scoped_lock
lock(client_lock
);
13592 if (!fuse_default_permissions
) {
13593 int r
= may_delete(in
, name
, perms
);
13598 return _rmdir(in
, name
, perms
);
13601 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
, std::string alternate_name
)
13603 ldout(cct
, 8) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
13604 << todir
->ino
<< " " << toname
13605 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
13608 if (fromdir
->snapid
!= todir
->snapid
)
13609 return -CEPHFS_EXDEV
;
13611 int op
= CEPH_MDS_OP_RENAME
;
13612 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
13613 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
13614 op
= CEPH_MDS_OP_RENAMESNAP
;
13616 return -CEPHFS_EROFS
;
13618 if (fromdir
!= todir
) {
13619 Inode
*fromdir_root
=
13620 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
13621 Inode
*todir_root
=
13622 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
13623 if (fromdir_root
!= todir_root
) {
13624 return -CEPHFS_EXDEV
;
13629 MetaRequest
*req
= new MetaRequest(op
);
13632 fromdir
->make_nosnap_relative_path(from
);
13633 from
.push_dentry(fromname
);
13635 todir
->make_nosnap_relative_path(to
);
13636 to
.push_dentry(toname
);
13637 req
->set_filepath(to
);
13638 req
->set_filepath2(from
);
13639 req
->set_alternate_name(std::move(alternate_name
));
13642 int res
= get_or_create(fromdir
, fromname
, &oldde
);
13646 res
= get_or_create(todir
, toname
, &de
);
13650 if (op
== CEPH_MDS_OP_RENAME
) {
13651 req
->set_old_dentry(oldde
);
13652 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
13653 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
13655 req
->set_dentry(de
);
13656 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
13657 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
13659 InodeRef oldin
, otherin
;
13660 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
13664 Inode
*oldinode
= oldin
.get();
13665 oldinode
->break_all_delegs();
13666 req
->set_old_inode(oldinode
);
13667 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
13669 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
13673 Inode
*in
= otherin
.get();
13674 req
->set_other_inode(in
);
13675 in
->break_all_delegs();
13677 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
13679 case -CEPHFS_ENOENT
:
13685 req
->set_inode(todir
);
13687 // renamesnap reply contains no tracedn, so we need to invalidate
13689 unlink(oldde
, true, true);
13690 unlink(de
, true, true);
13692 req
->set_inode(todir
);
13695 res
= make_request(req
, perm
, &target
);
13696 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
13698 // renamed item from our cache
13701 ldout(cct
, 8) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
13709 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
13710 const char *newname
, const UserPerm
& perm
)
13712 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13713 if (!mref_reader
.is_state_satisfied())
13714 return -CEPHFS_ENOTCONN
;
13716 vinodeno_t vparent
= _get_vino(parent
);
13717 vinodeno_t vnewparent
= _get_vino(newparent
);
13719 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
13720 << vnewparent
<< " " << newname
<< dendl
;
13721 tout(cct
) << "ll_rename" << std::endl
;
13722 tout(cct
) << vparent
.ino
.val
<< std::endl
;
13723 tout(cct
) << name
<< std::endl
;
13724 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
13725 tout(cct
) << newname
<< std::endl
;
13727 std::scoped_lock
lock(client_lock
);
13729 if (!fuse_default_permissions
) {
13730 int r
= may_delete(parent
, name
, perm
);
13733 r
= may_delete(newparent
, newname
, perm
);
13734 if (r
< 0 && r
!= -CEPHFS_ENOENT
)
13738 return _rename(parent
, name
, newparent
, newname
, perm
, "");
13741 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, std::string alternate_name
, InodeRef
*inp
)
13743 ldout(cct
, 8) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
13744 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
13746 if (strlen(newname
) > NAME_MAX
)
13747 return -CEPHFS_ENAMETOOLONG
;
13749 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
13750 return -CEPHFS_EROFS
;
13752 if (is_quota_files_exceeded(dir
, perm
)) {
13753 return -CEPHFS_EDQUOT
;
13756 in
->break_all_delegs();
13757 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
13759 filepath
path(newname
, dir
->ino
);
13760 req
->set_filepath(path
);
13761 req
->set_alternate_name(std::move(alternate_name
));
13762 filepath
existing(in
->ino
);
13763 req
->set_filepath2(existing
);
13765 req
->set_inode(dir
);
13766 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
13767 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
13770 int res
= get_or_create(dir
, newname
, &de
);
13773 req
->set_dentry(de
);
13775 res
= make_request(req
, perm
, inp
);
13776 ldout(cct
, 10) << "link result is " << res
<< dendl
;
13779 ldout(cct
, 8) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
13787 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
13788 const UserPerm
& perm
)
13790 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13791 if (!mref_reader
.is_state_satisfied())
13792 return -CEPHFS_ENOTCONN
;
13794 vinodeno_t vino
= _get_vino(in
);
13795 vinodeno_t vnewparent
= _get_vino(newparent
);
13797 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
13799 tout(cct
) << "ll_link" << std::endl
;
13800 tout(cct
) << vino
.ino
.val
<< std::endl
;
13801 tout(cct
) << vnewparent
<< std::endl
;
13802 tout(cct
) << newname
<< std::endl
;
13806 std::scoped_lock
lock(client_lock
);
13808 if (!fuse_default_permissions
) {
13809 if (S_ISDIR(in
->mode
))
13810 return -CEPHFS_EPERM
;
13812 int r
= may_hardlink(in
, perm
);
13816 r
= may_create(newparent
, perm
);
13821 return _link(in
, newparent
, newname
, perm
, "", &target
);
13824 int Client::ll_num_osds(void)
13826 std::scoped_lock
lock(client_lock
);
13827 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
13830 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
13832 std::scoped_lock
lock(client_lock
);
13835 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
13836 if (!o
.exists(osd
))
13838 g
= o
.get_addrs(osd
).front();
13843 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
13844 *addr
= ntohl(nb_addr
);
13848 uint32_t Client::ll_stripe_unit(Inode
*in
)
13850 std::scoped_lock
lock(client_lock
);
13851 return in
->layout
.stripe_unit
;
13854 uint64_t Client::ll_snap_seq(Inode
*in
)
13856 std::scoped_lock
lock(client_lock
);
13857 return in
->snaprealm
->seq
;
13860 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
13862 std::scoped_lock
lock(client_lock
);
13863 *layout
= in
->layout
;
13867 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
13869 return ll_file_layout(fh
->inode
.get(), layout
);
13872 /* Currently we cannot take advantage of redundancy in reads, since we
13873 would have to go through all possible placement groups (a
13874 potentially quite large number determined by a hash), and use CRUSH
13875 to calculate the appropriate set of OSDs for each placement group,
13876 then index into that. An array with one entry per OSD is much more
13877 tractable and works for demonstration purposes. */
13879 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
13880 file_layout_t
* layout
)
13882 std::scoped_lock
lock(client_lock
);
13884 inodeno_t ino
= in
->ino
;
13885 uint32_t object_size
= layout
->object_size
;
13886 uint32_t su
= layout
->stripe_unit
;
13887 uint32_t stripe_count
= layout
->stripe_count
;
13888 uint64_t stripes_per_object
= object_size
/ su
;
13889 uint64_t stripeno
= 0, stripepos
= 0;
13892 stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
13893 stripepos
= blockno
% stripe_count
; // which object in the object set (X)
13895 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
13896 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
13898 object_t oid
= file_object_t(ino
, objectno
);
13899 return objecter
->with_osdmap([&](const OSDMap
& o
) {
13900 ceph_object_layout olayout
=
13901 o
.file_to_object_layout(oid
, *layout
);
13902 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
13905 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
13910 /* Return the offset of the block, internal to the object */
13912 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
13914 std::scoped_lock
lock(client_lock
);
13915 file_layout_t
*layout
=&(in
->layout
);
13916 uint32_t object_size
= layout
->object_size
;
13917 uint32_t su
= layout
->stripe_unit
;
13918 uint64_t stripes_per_object
= object_size
/ su
;
13920 return (blockno
% stripes_per_object
) * su
;
13923 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
13924 const UserPerm
& perms
)
13926 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13927 if (!mref_reader
.is_state_satisfied())
13928 return -CEPHFS_ENOTCONN
;
13930 vinodeno_t vino
= _get_vino(in
);
13932 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
13933 tout(cct
) << "ll_opendir" << std::endl
;
13934 tout(cct
) << vino
.ino
.val
<< std::endl
;
13936 std::scoped_lock
lock(client_lock
);
13938 if (!fuse_default_permissions
) {
13939 int r
= may_open(in
, flags
, perms
);
13944 int r
= _opendir(in
, dirpp
, perms
);
13945 tout(cct
) << (uintptr_t)*dirpp
<< std::endl
;
13947 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
13952 int Client::ll_releasedir(dir_result_t
*dirp
)
13954 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13955 if (!mref_reader
.is_state_satisfied())
13956 return -CEPHFS_ENOTCONN
;
13958 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
13959 tout(cct
) << "ll_releasedir" << std::endl
;
13960 tout(cct
) << (uintptr_t)dirp
<< std::endl
;
13962 std::scoped_lock
lock(client_lock
);
13968 int Client::ll_fsyncdir(dir_result_t
*dirp
)
13970 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13971 if (!mref_reader
.is_state_satisfied())
13972 return -CEPHFS_ENOTCONN
;
13974 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
13975 tout(cct
) << "ll_fsyncdir" << std::endl
;
13976 tout(cct
) << (uintptr_t)dirp
<< std::endl
;
13978 std::scoped_lock
lock(client_lock
);
13979 return _fsync(dirp
->inode
.get(), false);
13982 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
13984 ceph_assert(!(flags
& O_CREAT
));
13986 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
13987 if (!mref_reader
.is_state_satisfied())
13988 return -CEPHFS_ENOTCONN
;
13990 vinodeno_t vino
= _get_vino(in
);
13992 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
13993 tout(cct
) << "ll_open" << std::endl
;
13994 tout(cct
) << vino
.ino
.val
<< std::endl
;
13995 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
13997 std::scoped_lock
lock(client_lock
);
14000 if (!fuse_default_permissions
) {
14001 r
= may_open(in
, flags
, perms
);
14006 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
14009 Fh
*fhptr
= fhp
? *fhp
: NULL
;
14011 ll_unclosed_fh_set
.insert(fhptr
);
14013 tout(cct
) << (uintptr_t)fhptr
<< std::endl
;
14014 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
14015 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
14019 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
14020 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
14021 const UserPerm
& perms
)
14025 vinodeno_t vparent
= _get_vino(parent
);
14027 ldout(cct
, 8) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
14028 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
14029 << ", gid " << perms
.gid() << dendl
;
14030 tout(cct
) << "ll_create" << std::endl
;
14031 tout(cct
) << vparent
.ino
.val
<< std::endl
;
14032 tout(cct
) << name
<< std::endl
;
14033 tout(cct
) << mode
<< std::endl
;
14034 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
14036 bool created
= false;
14037 int r
= _lookup(parent
, name
, caps
, in
, perms
);
14039 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
14040 return -CEPHFS_EEXIST
;
14042 if (r
== -CEPHFS_ENOENT
&& (flags
& O_CREAT
)) {
14043 if (!fuse_default_permissions
) {
14044 r
= may_create(parent
, perms
);
14048 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
14059 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
14061 if (!fuse_default_permissions
) {
14062 r
= may_open(in
->get(), flags
, perms
);
14065 int release_r
= _release_fh(*fhp
);
14066 ceph_assert(release_r
== 0); // during create, no async data ops should have happened
14071 if (*fhp
== NULL
) {
14072 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
14080 ll_unclosed_fh_set
.insert(*fhp
);
14085 Inode
*inode
= in
->get();
14086 if (use_faked_inos())
14087 ino
= inode
->faked_ino
;
14092 tout(cct
) << (uintptr_t)*fhp
<< std::endl
;
14093 tout(cct
) << ino
<< std::endl
;
14094 ldout(cct
, 8) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
14095 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
14096 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
14101 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
14102 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
14103 const UserPerm
& perms
)
14105 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14106 if (!mref_reader
.is_state_satisfied())
14107 return -CEPHFS_ENOTCONN
;
14109 std::scoped_lock
lock(client_lock
);
14112 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
14117 // passing an Inode in outp requires an additional ref
14122 fill_stat(in
, attr
);
14130 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
14131 int oflags
, Inode
**outp
, Fh
**fhp
,
14132 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
14133 const UserPerm
& perms
)
14135 unsigned caps
= statx_to_mask(lflags
, want
);
14136 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14137 if (!mref_reader
.is_state_satisfied())
14138 return -CEPHFS_ENOTCONN
;
14140 std::scoped_lock
lock(client_lock
);
14143 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
14147 // passing an Inode in outp requires an additional ref
14152 fill_statx(in
, caps
, stx
);
14161 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
14163 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14164 if (!mref_reader
.is_state_satisfied())
14165 return -CEPHFS_ENOTCONN
;
14167 tout(cct
) << "ll_lseek" << std::endl
;
14168 tout(cct
) << offset
<< std::endl
;
14169 tout(cct
) << whence
<< std::endl
;
14171 std::scoped_lock
lock(client_lock
);
14172 return _lseek(fh
, offset
, whence
);
14175 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
14177 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14178 if (!mref_reader
.is_state_satisfied())
14179 return -CEPHFS_ENOTCONN
;
14181 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
14182 tout(cct
) << "ll_read" << std::endl
;
14183 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14184 tout(cct
) << off
<< std::endl
;
14185 tout(cct
) << len
<< std::endl
;
14187 /* We can't return bytes written larger than INT_MAX, clamp len to that */
14188 len
= std::min(len
, (loff_t
)INT_MAX
);
14189 std::scoped_lock
lock(client_lock
);
14191 int r
= _read(fh
, off
, len
, bl
);
14192 ldout(cct
, 3) << "ll_read " << fh
<< " " << off
<< "~" << len
<< " = " << r
14197 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
14201 file_layout_t
* layout
)
14203 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14204 if (!mref_reader
.is_state_satisfied())
14205 return -CEPHFS_ENOTCONN
;
14207 vinodeno_t vino
= _get_vino(in
);
14208 object_t oid
= file_object_t(vino
.ino
, blockid
);
14209 C_SaferCond onfinish
;
14212 objecter
->read(oid
,
14213 object_locator_t(layout
->pool_id
),
14218 CEPH_OSD_FLAG_READ
,
14221 int r
= onfinish
.wait();
14223 bl
.begin().copy(bl
.length(), buf
);
14230 /* It appears that the OSD doesn't return success unless the entire
14231 buffer was written, return the write length on success. */
14233 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
14234 char* buf
, uint64_t offset
,
14235 uint64_t length
, file_layout_t
* layout
,
14236 uint64_t snapseq
, uint32_t sync
)
14238 vinodeno_t vino
= ll_get_vino(in
);
14240 std::unique_ptr
<C_SaferCond
> onsafe
= nullptr;
14242 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14243 if (!mref_reader
.is_state_satisfied())
14244 return -CEPHFS_ENOTCONN
;
14247 return -CEPHFS_EINVAL
;
14249 if (true || sync
) {
14250 /* if write is stable, the epilogue is waiting on
14252 onsafe
.reset(new C_SaferCond("Client::ll_write_block flock"));
14254 object_t oid
= file_object_t(vino
.ino
, blockid
);
14255 SnapContext fakesnap
;
14256 ceph::bufferlist bl
;
14258 bl
.push_back(buffer::copy(buf
, length
));
14261 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
14264 fakesnap
.seq
= snapseq
;
14266 /* lock just in time */
14267 objecter
->write(oid
,
14268 object_locator_t(layout
->pool_id
),
14273 ceph::real_clock::now(),
14277 if (nullptr != onsafe
) {
14278 r
= onsafe
->wait();
14288 int Client::ll_commit_blocks(Inode
*in
,
14293 BarrierContext *bctx;
14294 vinodeno_t vino = _get_vino(in);
14295 uint64_t ino = vino.ino;
14297 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
14298 << offset << " to " << length << dendl;
14301 return -CEPHFS_EINVAL;
14304 std::scoped_lock lock(client_lock);
14305 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
14306 if (p != barriers.end()) {
14307 barrier_interval civ(offset, offset + length);
14308 p->second->commit_barrier(civ);
14314 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
14316 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
14317 "~" << len
<< dendl
;
14318 tout(cct
) << "ll_write" << std::endl
;
14319 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14320 tout(cct
) << off
<< std::endl
;
14321 tout(cct
) << len
<< std::endl
;
14323 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14324 if (!mref_reader
.is_state_satisfied())
14325 return -CEPHFS_ENOTCONN
;
14327 /* We can't return bytes written larger than INT_MAX, clamp len to that */
14328 len
= std::min(len
, (loff_t
)INT_MAX
);
14329 std::scoped_lock
lock(client_lock
);
14331 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
14332 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
14337 int64_t Client::ll_writev(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
)
14339 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14340 if (!mref_reader
.is_state_satisfied())
14341 return -CEPHFS_ENOTCONN
;
14343 std::unique_lock
cl(client_lock
);
14344 return _preadv_pwritev_locked(fh
, iov
, iovcnt
, off
, true, false, cl
);
14347 int64_t Client::ll_readv(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
)
14349 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14350 if (!mref_reader
.is_state_satisfied())
14351 return -CEPHFS_ENOTCONN
;
14353 std::unique_lock
cl(client_lock
);
14354 return _preadv_pwritev_locked(fh
, iov
, iovcnt
, off
, false, false, cl
);
14357 int Client::ll_flush(Fh
*fh
)
14359 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14360 if (!mref_reader
.is_state_satisfied())
14361 return -CEPHFS_ENOTCONN
;
14363 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
14364 tout(cct
) << "ll_flush" << std::endl
;
14365 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14367 std::scoped_lock
lock(client_lock
);
14371 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
14373 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14374 if (!mref_reader
.is_state_satisfied())
14375 return -CEPHFS_ENOTCONN
;
14377 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
14378 tout(cct
) << "ll_fsync" << std::endl
;
14379 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14381 std::scoped_lock
lock(client_lock
);
14382 int r
= _fsync(fh
, syncdataonly
);
14384 // If we're returning an error, clear it from the FH
14385 fh
->take_async_err();
14390 int Client::ll_sync_inode(Inode
*in
, bool syncdataonly
)
14392 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14393 if (!mref_reader
.is_state_satisfied())
14394 return -CEPHFS_ENOTCONN
;
14396 ldout(cct
, 3) << "ll_sync_inode " << *in
<< " " << dendl
;
14397 tout(cct
) << "ll_sync_inode" << std::endl
;
14398 tout(cct
) << (uintptr_t)in
<< std::endl
;
14400 std::scoped_lock
lock(client_lock
);
14401 return _fsync(in
, syncdataonly
);
14404 #ifdef FALLOC_FL_PUNCH_HOLE
14406 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
14408 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
14410 if (offset
< 0 || length
<= 0)
14411 return -CEPHFS_EINVAL
;
14413 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
14414 return -CEPHFS_EOPNOTSUPP
;
14416 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
14417 return -CEPHFS_EOPNOTSUPP
;
14419 Inode
*in
= fh
->inode
.get();
14421 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
14422 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
14423 return -CEPHFS_ENOSPC
;
14426 if (in
->snapid
!= CEPH_NOSNAP
)
14427 return -CEPHFS_EROFS
;
14429 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
14430 return -CEPHFS_EBADF
;
14432 uint64_t size
= offset
+ length
;
14433 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
14435 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
)) {
14436 return -CEPHFS_EDQUOT
;
14440 int r
= get_caps(fh
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
14444 std::unique_ptr
<C_SaferCond
> onuninline
= nullptr;
14445 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
14446 if (in
->inline_version
< CEPH_INLINE_NONE
&&
14447 (have
& CEPH_CAP_FILE_BUFFER
)) {
14449 auto inline_iter
= in
->inline_data
.cbegin();
14450 int len
= in
->inline_data
.length();
14451 if (offset
< len
) {
14453 inline_iter
.copy(offset
, bl
);
14455 if (offset
+ size
> len
)
14456 size
= len
- offset
;
14458 bl
.append_zero(size
);
14459 if (offset
+ size
< len
) {
14460 inline_iter
+= size
;
14461 inline_iter
.copy(len
- offset
- size
, bl
);
14463 in
->inline_data
= bl
;
14464 in
->inline_version
++;
14466 in
->mtime
= in
->ctime
= ceph_clock_now();
14468 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
14470 if (in
->inline_version
< CEPH_INLINE_NONE
) {
14471 onuninline
.reset(new C_SaferCond("Client::_fallocate_uninline_data flock"));
14472 uninline_data(in
, onuninline
.get());
14475 C_SaferCond
onfinish("Client::_punch_hole flock");
14477 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
14479 _invalidate_inode_cache(in
, offset
, length
);
14480 filer
->zero(in
->ino
, &in
->layout
,
14481 in
->snaprealm
->get_snap_context(),
14483 ceph::real_clock::now(),
14484 0, true, &onfinish
);
14485 in
->mtime
= in
->ctime
= ceph_clock_now();
14487 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
14489 client_lock
.unlock();
14491 client_lock
.lock();
14492 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
14494 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
14495 uint64_t size
= offset
+ length
;
14496 if (size
> in
->size
) {
14498 in
->mtime
= in
->ctime
= ceph_clock_now();
14500 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
14502 if (is_quota_bytes_approaching(in
, fh
->actor_perms
)) {
14503 check_caps(in
, CHECK_CAPS_NODELAY
);
14504 } else if (is_max_size_approaching(in
)) {
14510 if (nullptr != onuninline
) {
14511 client_lock
.unlock();
14512 int ret
= onuninline
->wait();
14513 client_lock
.lock();
14515 if (ret
>= 0 || ret
== -CEPHFS_ECANCELED
) {
14516 in
->inline_data
.clear();
14517 in
->inline_version
= CEPH_INLINE_NONE
;
14518 in
->mark_caps_dirty(CEPH_CAP_FILE_WR
);
14524 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
14529 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
14531 return -CEPHFS_EOPNOTSUPP
;
14537 int Client::ll_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
14539 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14540 if (!mref_reader
.is_state_satisfied())
14541 return -CEPHFS_ENOTCONN
;
14543 ldout(cct
, 3) << __func__
<< " " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
14544 tout(cct
) << __func__
<< " " << mode
<< " " << offset
<< " " << length
<< std::endl
;
14545 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14547 std::scoped_lock
lock(client_lock
);
14548 return _fallocate(fh
, mode
, offset
, length
);
14551 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
14553 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14554 if (!mref_reader
.is_state_satisfied())
14555 return -CEPHFS_ENOTCONN
;
14557 tout(cct
) << __func__
<< " " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
14559 std::scoped_lock
lock(client_lock
);
14560 Fh
*fh
= get_filehandle(fd
);
14562 return -CEPHFS_EBADF
;
14563 #if defined(__linux__) && defined(O_PATH)
14564 if (fh
->flags
& O_PATH
)
14565 return -CEPHFS_EBADF
;
14567 return _fallocate(fh
, mode
, offset
, length
);
14570 int Client::ll_release(Fh
*fh
)
14572 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14573 if (!mref_reader
.is_state_satisfied())
14574 return -CEPHFS_ENOTCONN
;
14576 ldout(cct
, 3) << __func__
<< " (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
14578 tout(cct
) << __func__
<< " (fh)" << std::endl
;
14579 tout(cct
) << (uintptr_t)fh
<< std::endl
;
14581 std::scoped_lock
lock(client_lock
);
14583 if (ll_unclosed_fh_set
.count(fh
))
14584 ll_unclosed_fh_set
.erase(fh
);
14585 return _release_fh(fh
);
14588 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
14590 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14591 if (!mref_reader
.is_state_satisfied())
14592 return -CEPHFS_ENOTCONN
;
14594 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
14595 tout(cct
) << "ll_getk (fh)" << (uintptr_t)fh
<< std::endl
;
14597 std::scoped_lock
lock(client_lock
);
14598 return _getlk(fh
, fl
, owner
);
14601 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
14603 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14604 if (!mref_reader
.is_state_satisfied())
14605 return -CEPHFS_ENOTCONN
;
14607 ldout(cct
, 3) << __func__
<< " (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
14608 tout(cct
) << __func__
<< " (fh)" << (uintptr_t)fh
<< std::endl
;
14610 std::scoped_lock
lock(client_lock
);
14611 return _setlk(fh
, fl
, owner
, sleep
);
14614 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
14616 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14617 if (!mref_reader
.is_state_satisfied())
14618 return -CEPHFS_ENOTCONN
;
14620 ldout(cct
, 3) << __func__
<< " (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
14621 tout(cct
) << __func__
<< " (fh)" << (uintptr_t)fh
<< std::endl
;
14623 std::scoped_lock
lock(client_lock
);
14624 return _flock(fh
, cmd
, owner
);
14627 int Client::set_deleg_timeout(uint32_t timeout
)
14629 std::scoped_lock
lock(client_lock
);
14632 * The whole point is to prevent blocklisting so we must time out the
14633 * delegation before the session autoclose timeout kicks in.
14635 if (timeout
>= mdsmap
->get_session_autoclose())
14636 return -CEPHFS_EINVAL
;
14638 deleg_timeout
= timeout
;
14642 int Client::ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
)
14644 int ret
= -CEPHFS_EINVAL
;
14646 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14647 if (!mref_reader
.is_state_satisfied())
14648 return -CEPHFS_ENOTCONN
;
14650 std::scoped_lock
lock(client_lock
);
14652 Inode
*inode
= fh
->inode
.get();
14655 case CEPH_DELEGATION_NONE
:
14656 inode
->unset_deleg(fh
);
14661 ret
= inode
->set_deleg(fh
, cmd
, cb
, priv
);
14662 } catch (std::bad_alloc
&) {
14663 ret
= -CEPHFS_ENOMEM
;
14670 class C_Client_RequestInterrupt
: public Context
{
14675 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
14678 void finish(int r
) override
{
14679 std::scoped_lock
l(client
->client_lock
);
14680 ceph_assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
14681 client
->_interrupt_filelock(req
);
14682 client
->put_request(req
);
14686 void Client::ll_interrupt(void *d
)
14688 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
14689 ldout(cct
, 3) << __func__
<< " tid " << req
->get_tid() << dendl
;
14690 tout(cct
) << __func__
<< " tid " << req
->get_tid() << std::endl
;
14691 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
14694 // =========================================
14697 // expose file layouts
14699 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
14700 const UserPerm
& perms
)
14702 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14703 if (!mref_reader
.is_state_satisfied())
14704 return -CEPHFS_ENOTCONN
;
14706 std::scoped_lock
lock(client_lock
);
14708 filepath
path(relpath
);
14710 int r
= path_walk(path
, &in
, perms
);
14716 ldout(cct
, 3) << __func__
<< "(" << relpath
<< ") = 0" << dendl
;
14720 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
14722 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14723 if (!mref_reader
.is_state_satisfied())
14724 return -CEPHFS_ENOTCONN
;
14726 std::scoped_lock
lock(client_lock
);
14728 Fh
*f
= get_filehandle(fd
);
14730 return -CEPHFS_EBADF
;
14731 Inode
*in
= f
->inode
.get();
14735 ldout(cct
, 3) << __func__
<< "(" << fd
<< ") = 0" << dendl
;
14739 int64_t Client::get_default_pool_id()
14741 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14742 if (!mref_reader
.is_state_satisfied())
14743 return -CEPHFS_ENOTCONN
;
14745 std::scoped_lock
lock(client_lock
);
14747 /* first data pool is the default */
14748 return mdsmap
->get_first_data_pool();
14753 int64_t Client::get_pool_id(const char *pool_name
)
14755 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14756 if (!mref_reader
.is_state_satisfied())
14757 return -CEPHFS_ENOTCONN
;
14759 std::scoped_lock
lock(client_lock
);
14761 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
14765 string
Client::get_pool_name(int64_t pool
)
14767 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14768 if (!mref_reader
.is_state_satisfied())
14771 std::scoped_lock
lock(client_lock
);
14773 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
14774 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
14778 int Client::get_pool_replication(int64_t pool
)
14780 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14781 if (!mref_reader
.is_state_satisfied())
14782 return -CEPHFS_ENOTCONN
;
14784 std::scoped_lock
lock(client_lock
);
14786 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
14787 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -CEPHFS_ENOENT
;
14791 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
14793 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14794 if (!mref_reader
.is_state_satisfied())
14795 return -CEPHFS_ENOTCONN
;
14797 std::scoped_lock
lock(client_lock
);
14799 Fh
*f
= get_filehandle(fd
);
14801 return -CEPHFS_EBADF
;
14802 Inode
*in
= f
->inode
.get();
14804 vector
<ObjectExtent
> extents
;
14805 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
14806 ceph_assert(extents
.size() == 1);
14808 objecter
->with_osdmap([&](const OSDMap
& o
) {
14809 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
14810 o
.pg_to_acting_osds(pg
, osds
);
14814 return -CEPHFS_EINVAL
;
14817 * Return the remainder of the extent (stripe unit)
14819 * If length = 1 is passed to Striper::file_to_extents we get a single
14820 * extent back, but its length is one so we still need to compute the length
14821 * to the end of the stripe unit.
14823 * If length = su then we may get 1 or 2 objects back in the extents vector
14824 * which would have to be examined. Even then, the offsets are local to the
14825 * object, so matching up to the file offset is extra work.
14827 * It seems simpler to stick with length = 1 and manually compute the
14831 uint64_t su
= in
->layout
.stripe_unit
;
14832 *len
= su
- (off
% su
);
14838 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
14840 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14841 if (!mref_reader
.is_state_satisfied())
14842 return -CEPHFS_ENOTCONN
;
14844 std::scoped_lock
lock(client_lock
);
14847 return -CEPHFS_EINVAL
;
14848 return objecter
->with_osdmap([&](const OSDMap
& o
) {
14849 return o
.crush
->get_full_location_ordered(id
, path
);
14853 int Client::get_file_stripe_address(int fd
, loff_t offset
,
14854 vector
<entity_addr_t
>& address
)
14856 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14857 if (!mref_reader
.is_state_satisfied())
14858 return -CEPHFS_ENOTCONN
;
14860 std::scoped_lock
lock(client_lock
);
14862 Fh
*f
= get_filehandle(fd
);
14864 return -CEPHFS_EBADF
;
14865 Inode
*in
= f
->inode
.get();
14868 vector
<ObjectExtent
> extents
;
14869 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
14870 in
->truncate_size
, extents
);
14871 ceph_assert(extents
.size() == 1);
14873 // now we have the object and its 'layout'
14874 return objecter
->with_osdmap([&](const OSDMap
& o
) {
14875 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
14877 o
.pg_to_acting_osds(pg
, osds
);
14879 return -CEPHFS_EINVAL
;
14880 for (unsigned i
= 0; i
< osds
.size(); i
++) {
14881 entity_addr_t addr
= o
.get_addrs(osds
[i
]).front();
14882 address
.push_back(addr
);
14888 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
14890 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14891 if (!mref_reader
.is_state_satisfied())
14892 return -CEPHFS_ENOTCONN
;
14894 std::scoped_lock
lock(client_lock
);
14896 return objecter
->with_osdmap([&](const OSDMap
& o
) {
14897 if (!o
.exists(osd
))
14898 return -CEPHFS_ENOENT
;
14900 addr
= o
.get_addrs(osd
).front();
14905 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
14906 loff_t length
, loff_t offset
)
14908 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14909 if (!mref_reader
.is_state_satisfied())
14910 return -CEPHFS_ENOTCONN
;
14912 std::scoped_lock
lock(client_lock
);
14914 Fh
*f
= get_filehandle(fd
);
14916 return -CEPHFS_EBADF
;
14917 Inode
*in
= f
->inode
.get();
14919 // map to a list of extents
14920 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
14922 ldout(cct
, 3) << __func__
<< "(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
14927 /* find an osd with the same ip. -CEPHFS_ENXIO if none. */
14928 int Client::get_local_osd()
14930 RWRef_t
mref_reader(mount_state
, CLIENT_MOUNTING
);
14931 if (!mref_reader
.is_state_satisfied())
14932 return -CEPHFS_ENOTCONN
;
14934 std::scoped_lock
lock(client_lock
);
14936 objecter
->with_osdmap([this](const OSDMap
& o
) {
14937 if (o
.get_epoch() != local_osd_epoch
) {
14938 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddrs().front());
14939 local_osd_epoch
= o
.get_epoch();
14950 // ===============================
14952 void Client::ms_handle_connect(Connection
*con
)
14954 ldout(cct
, 10) << __func__
<< " on " << con
->get_peer_addr() << dendl
;
14957 bool Client::ms_handle_reset(Connection
*con
)
14959 ldout(cct
, 0) << __func__
<< " on " << con
->get_peer_addr() << dendl
;
14963 void Client::ms_handle_remote_reset(Connection
*con
)
14965 std::scoped_lock
lock(client_lock
);
14966 ldout(cct
, 0) << __func__
<< " on " << con
->get_peer_addr() << dendl
;
14967 switch (con
->get_peer_type()) {
14968 case CEPH_ENTITY_TYPE_MDS
:
14970 // kludge to figure out which mds this is; fixme with a Connection* state
14971 mds_rank_t mds
= MDS_RANK_NONE
;
14972 MetaSession
*s
= NULL
;
14973 for (auto &p
: mds_sessions
) {
14974 if (mdsmap
->have_inst(p
.first
) && mdsmap
->get_addrs(p
.first
) == con
->get_peer_addrs()) {
14980 assert (s
!= NULL
);
14981 switch (s
->state
) {
14982 case MetaSession::STATE_CLOSING
:
14983 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
14984 _closed_mds_session(s
);
14987 case MetaSession::STATE_OPENING
:
14989 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
14990 list
<Context
*> waiters
;
14991 waiters
.swap(s
->waiting_for_open
);
14992 _closed_mds_session(s
);
14993 MetaSession
*news
= _get_or_open_mds_session(mds
);
14994 news
->waiting_for_open
.swap(waiters
);
14998 case MetaSession::STATE_OPEN
:
15000 objecter
->maybe_request_map(); /* to check if we are blocklisted */
15001 if (cct
->_conf
.get_val
<bool>("client_reconnect_stale")) {
15002 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
15003 _closed_mds_session(s
);
15005 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
15006 s
->state
= MetaSession::STATE_STALE
;
15011 case MetaSession::STATE_NEW
:
15012 case MetaSession::STATE_CLOSED
:
15022 bool Client::ms_handle_refused(Connection
*con
)
15024 ldout(cct
, 1) << __func__
<< " on " << con
->get_peer_addr() << dendl
;
15028 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
15030 Inode
*quota_in
= root_ancestor
;
15031 SnapRealm
*realm
= in
->snaprealm
;
15033 ldout(cct
, 10) << __func__
<< " realm " << realm
->ino
<< dendl
;
15034 if (realm
->ino
!= in
->ino
) {
15035 auto p
= inode_map
.find(vinodeno_t(realm
->ino
, CEPH_NOSNAP
));
15036 if (p
== inode_map
.end())
15039 if (p
->second
->quota
.is_enable()) {
15040 quota_in
= p
->second
;
15044 realm
= realm
->pparent
;
15046 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << quota_in
->vino() << dendl
;
15051 * Traverse quota ancestors of the Inode, return true
15052 * if any of them passes the passed function
15054 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
15055 std::function
<bool (const Inode
&in
)> test
)
15058 ceph_assert(in
!= NULL
);
15063 if (in
== root_ancestor
) {
15064 // We're done traversing, drop out
15067 // Continue up the tree
15068 in
= get_quota_root(in
, perms
);
15075 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
15077 return check_quota_condition(in
, perms
,
15078 [](const Inode
&in
) {
15079 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
15083 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
15084 const UserPerm
& perms
)
15086 return check_quota_condition(in
, perms
,
15087 [&new_bytes
](const Inode
&in
) {
15088 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
15089 > in
.quota
.max_bytes
;
15093 bool Client::is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
)
15095 ceph_assert(in
->size
>= in
->reported_size
);
15096 const uint64_t size
= in
->size
- in
->reported_size
;
15097 return check_quota_condition(in
, perms
,
15098 [&size
](const Inode
&in
) {
15099 if (in
.quota
.max_bytes
) {
15100 if (in
.rstat
.rbytes
>= in
.quota
.max_bytes
) {
15104 const uint64_t space
= in
.quota
.max_bytes
- in
.rstat
.rbytes
;
15105 return (space
>> 4) < size
;
15119 int Client::check_pool_perm(Inode
*in
, int need
)
15121 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
15123 if (!cct
->_conf
->client_check_pool_perm
)
15126 /* Only need to do this for regular files */
15127 if (!in
->is_file())
15130 int64_t pool_id
= in
->layout
.pool_id
;
15131 std::string pool_ns
= in
->layout
.pool_ns
;
15132 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
15135 auto it
= pool_perms
.find(perm_key
);
15136 if (it
== pool_perms
.end())
15138 if (it
->second
== POOL_CHECKING
) {
15139 // avoid concurrent checkings
15140 wait_on_list(waiting_for_pool_perm
);
15143 ceph_assert(have
& POOL_CHECKED
);
15149 if (in
->snapid
!= CEPH_NOSNAP
) {
15150 // pool permission check needs to write to the first object. But for snapshot,
15151 // head of the first object may have alread been deleted. To avoid creating
15152 // orphan object, skip the check for now.
15156 pool_perms
[perm_key
] = POOL_CHECKING
;
15159 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
15160 object_t oid
= oid_buf
;
15162 SnapContext nullsnapc
;
15164 C_SaferCond rd_cond
;
15165 ObjectOperation rd_op
;
15166 rd_op
.stat(nullptr, nullptr, nullptr);
15168 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
15169 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
15171 C_SaferCond wr_cond
;
15172 ObjectOperation wr_op
;
15173 wr_op
.create(true);
15175 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
15176 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
15178 client_lock
.unlock();
15179 int rd_ret
= rd_cond
.wait();
15180 int wr_ret
= wr_cond
.wait();
15181 client_lock
.lock();
15183 bool errored
= false;
15185 if (rd_ret
== 0 || rd_ret
== -CEPHFS_ENOENT
)
15187 else if (rd_ret
!= -CEPHFS_EPERM
) {
15188 ldout(cct
, 10) << __func__
<< " on pool " << pool_id
<< " ns " << pool_ns
15189 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
15193 if (wr_ret
== 0 || wr_ret
== -CEPHFS_EEXIST
)
15194 have
|= POOL_WRITE
;
15195 else if (wr_ret
!= -CEPHFS_EPERM
) {
15196 ldout(cct
, 10) << __func__
<< " on pool " << pool_id
<< " ns " << pool_ns
15197 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
15202 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
15203 // Raise EIO because actual error code might be misleading for
15204 // userspace filesystem user.
15205 pool_perms
.erase(perm_key
);
15206 signal_cond_list(waiting_for_pool_perm
);
15207 return -CEPHFS_EIO
;
15210 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
15211 signal_cond_list(waiting_for_pool_perm
);
15214 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
15215 ldout(cct
, 10) << __func__
<< " on pool " << pool_id
<< " ns " << pool_ns
15216 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
15217 return -CEPHFS_EPERM
;
15219 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
15220 ldout(cct
, 10) << __func__
<< " on pool " << pool_id
<< " ns " << pool_ns
15221 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
15222 return -CEPHFS_EPERM
;
15228 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
15230 if (acl_type
== POSIX_ACL
) {
15231 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
15232 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
15234 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
15237 return -CEPHFS_EAGAIN
;
15240 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
15242 if (acl_type
== NO_ACL
)
15245 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
15249 if (acl_type
== POSIX_ACL
) {
15250 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
15251 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
15252 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
15253 r
= posix_acl_access_chmod(acl
, mode
);
15256 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
15262 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
15266 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
15267 const UserPerm
& perms
)
15269 if (acl_type
== NO_ACL
)
15272 if (S_ISLNK(*mode
))
15275 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
15279 if (acl_type
== POSIX_ACL
) {
15280 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
15281 map
<string
, bufferptr
> xattrs
;
15283 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
15284 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
15285 r
= posix_acl_inherit_mode(acl
, mode
);
15290 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
15294 xattrs
[ACL_EA_ACCESS
] = acl
;
15297 if (S_ISDIR(*mode
))
15298 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
15302 encode(xattrs
, xattrs_bl
);
15305 *mode
&= ~umask_cb(callback_handle
);
15310 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
15314 void Client::set_filer_flags(int flags
)
15316 std::scoped_lock
l(client_lock
);
15317 ceph_assert(flags
== 0 ||
15318 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
15319 objecter
->add_global_op_flags(flags
);
15322 void Client::clear_filer_flags(int flags
)
15324 std::scoped_lock
l(client_lock
);
15325 ceph_assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
15326 objecter
->clear_global_op_flag(flags
);
15329 // called before mount
15330 void Client::set_uuid(const std::string
& uuid
)
15332 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
15333 ceph_assert(iref_reader
.is_state_satisfied());
15335 std::scoped_lock
l(client_lock
);
15336 assert(!uuid
.empty());
15338 metadata
["uuid"] = uuid
;
15342 // called before mount. 0 means infinite
15343 void Client::set_session_timeout(unsigned timeout
)
15345 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
15346 ceph_assert(iref_reader
.is_state_satisfied());
15348 std::scoped_lock
l(client_lock
);
15350 metadata
["timeout"] = stringify(timeout
);
15353 // called before mount
15354 int Client::start_reclaim(const std::string
& uuid
, unsigned flags
,
15355 const std::string
& fs_name
)
15357 RWRef_t
iref_reader(initialize_state
, CLIENT_INITIALIZED
);
15358 if (!iref_reader
.is_state_satisfied())
15359 return -CEPHFS_ENOTCONN
;
15362 return -CEPHFS_EINVAL
;
15364 std::unique_lock
l(client_lock
);
15366 auto it
= metadata
.find("uuid");
15367 if (it
!= metadata
.end() && it
->second
== uuid
)
15368 return -CEPHFS_EINVAL
;
15371 int r
= subscribe_mdsmap(fs_name
);
15373 lderr(cct
) << "mdsmap subscription failed: " << cpp_strerror(r
) << dendl
;
15377 if (metadata
.empty())
15378 populate_metadata("");
15380 while (mdsmap
->get_epoch() == 0)
15381 wait_on_list(waiting_for_mdsmap
);
15384 for (unsigned mds
= 0; mds
< mdsmap
->get_num_in_mds(); ) {
15385 if (!mdsmap
->is_up(mds
)) {
15386 ldout(cct
, 10) << "mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
15387 wait_on_list(waiting_for_mdsmap
);
15391 MetaSession
*session
;
15392 if (!have_open_session(mds
)) {
15393 session
= _get_or_open_mds_session(mds
);
15394 if (session
->state
== MetaSession::STATE_REJECTED
)
15395 return -CEPHFS_EPERM
;
15396 if (session
->state
!= MetaSession::STATE_OPENING
) {
15398 return -CEPHFS_EINVAL
;
15400 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
15401 wait_on_context_list(session
->waiting_for_open
);
15405 session
= &mds_sessions
.at(mds
);
15406 if (!session
->mds_features
.test(CEPHFS_FEATURE_RECLAIM_CLIENT
))
15407 return -CEPHFS_EOPNOTSUPP
;
15409 if (session
->reclaim_state
== MetaSession::RECLAIM_NULL
||
15410 session
->reclaim_state
== MetaSession::RECLAIMING
) {
15411 session
->reclaim_state
= MetaSession::RECLAIMING
;
15412 auto m
= make_message
<MClientReclaim
>(uuid
, flags
);
15413 session
->con
->send_message2(std::move(m
));
15414 wait_on_list(waiting_for_reclaim
);
15415 } else if (session
->reclaim_state
== MetaSession::RECLAIM_FAIL
) {
15416 return reclaim_errno
? : -CEPHFS_ENOTRECOVERABLE
;
15422 // didn't find target session in any mds
15423 if (reclaim_target_addrs
.empty()) {
15424 if (flags
& CEPH_RECLAIM_RESET
)
15425 return -CEPHFS_ENOENT
;
15426 return -CEPHFS_ENOTRECOVERABLE
;
15429 if (flags
& CEPH_RECLAIM_RESET
)
15432 // use blocklist to check if target session was killed
15433 // (config option mds_session_blocklist_on_evict needs to be true)
15434 ldout(cct
, 10) << __func__
<< ": waiting for OSD epoch " << reclaim_osd_epoch
<< dendl
;
15437 objecter
->wait_for_map(reclaim_osd_epoch
, ca::use_blocked
[ec
]);
15441 return ceph::from_error_code(ec
);
15443 bool blocklisted
= objecter
->with_osdmap(
15444 [this](const OSDMap
&osd_map
) -> bool {
15445 return osd_map
.is_blocklisted(reclaim_target_addrs
);
15448 return -CEPHFS_ENOTRECOVERABLE
;
15450 metadata
["reclaiming_uuid"] = uuid
;
15454 void Client::finish_reclaim()
15456 auto it
= metadata
.find("reclaiming_uuid");
15457 if (it
== metadata
.end()) {
15458 for (auto &p
: mds_sessions
)
15459 p
.second
.reclaim_state
= MetaSession::RECLAIM_NULL
;
15463 for (auto &p
: mds_sessions
) {
15464 p
.second
.reclaim_state
= MetaSession::RECLAIM_NULL
;
15465 auto m
= make_message
<MClientReclaim
>("", MClientReclaim::FLAG_FINISH
);
15466 p
.second
.con
->send_message2(std::move(m
));
15469 metadata
["uuid"] = it
->second
;
15470 metadata
.erase(it
);
15473 void Client::handle_client_reclaim_reply(const MConstRef
<MClientReclaimReply
>& reply
)
15475 mds_rank_t from
= mds_rank_t(reply
->get_source().num());
15476 ldout(cct
, 10) << __func__
<< " " << *reply
<< " from mds." << from
<< dendl
;
15478 std::scoped_lock
cl(client_lock
);
15479 MetaSession
*session
= _get_mds_session(from
, reply
->get_connection().get());
15481 ldout(cct
, 10) << " discarding reclaim reply from sessionless mds." << from
<< dendl
;
15485 if (reply
->get_result() >= 0) {
15486 session
->reclaim_state
= MetaSession::RECLAIM_OK
;
15487 if (reply
->get_epoch() > reclaim_osd_epoch
)
15488 reclaim_osd_epoch
= reply
->get_epoch();
15489 if (!reply
->get_addrs().empty())
15490 reclaim_target_addrs
= reply
->get_addrs();
15492 session
->reclaim_state
= MetaSession::RECLAIM_FAIL
;
15493 reclaim_errno
= reply
->get_result();
15496 signal_cond_list(waiting_for_reclaim
);
15500 * This is included in cap release messages, to cause
15501 * the MDS to wait until this OSD map epoch. It is necessary
15502 * in corner cases where we cancel RADOS ops, so that
15503 * nobody else tries to do IO to the same objects in
15504 * the same epoch as the cancelled ops.
15506 void Client::set_cap_epoch_barrier(epoch_t e
)
15508 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
15509 cap_epoch_barrier
= e
;
15512 const char** Client::get_tracked_conf_keys() const
15514 static const char* keys
[] = {
15515 "client_cache_size",
15516 "client_cache_mid",
15518 "client_deleg_timeout",
15519 "client_deleg_break_on_open",
15521 "client_oc_max_objects",
15522 "client_oc_max_dirty",
15523 "client_oc_target_dirty",
15524 "client_oc_max_dirty_age",
15530 void Client::handle_conf_change(const ConfigProxy
& conf
,
15531 const std::set
<std::string
> &changed
)
15533 std::scoped_lock
lock(client_lock
);
15535 if (changed
.count("client_cache_mid")) {
15536 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
15538 if (changed
.count("client_acl_type")) {
15540 if (cct
->_conf
->client_acl_type
== "posix_acl")
15541 acl_type
= POSIX_ACL
;
15543 if (changed
.count("client_oc_size")) {
15544 objectcacher
->set_max_size(cct
->_conf
->client_oc_size
);
15546 if (changed
.count("client_oc_max_objects")) {
15547 objectcacher
->set_max_objects(cct
->_conf
->client_oc_max_objects
);
15549 if (changed
.count("client_oc_max_dirty")) {
15550 objectcacher
->set_max_dirty(cct
->_conf
->client_oc_max_dirty
);
15552 if (changed
.count("client_oc_target_dirty")) {
15553 objectcacher
->set_target_dirty(cct
->_conf
->client_oc_target_dirty
);
15555 if (changed
.count("client_oc_max_dirty_age")) {
15556 objectcacher
->set_max_dirty_age(cct
->_conf
->client_oc_max_dirty_age
);
15560 void intrusive_ptr_add_ref(Inode
*in
)
15565 void intrusive_ptr_release(Inode
*in
)
15567 in
->client
->put_inode(in
);
15570 mds_rank_t
Client::_get_random_up_mds() const
15572 ceph_assert(ceph_mutex_is_locked_by_me(client_lock
));
15574 std::set
<mds_rank_t
> up
;
15575 mdsmap
->get_up_mds_set(up
);
15578 return MDS_RANK_NONE
;
15579 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
15580 for (int n
= rand() % up
.size(); n
; n
--)
15586 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
,
15587 boost::asio::io_context
& ictx
)
15588 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, ictx
))
15590 monclient
->set_messenger(m
);
15591 objecter
->set_client_incarnation(0);
15594 StandaloneClient::~StandaloneClient()
15597 objecter
= nullptr;
15600 int StandaloneClient::init()
15602 RWRef_t
iref_writer(initialize_state
, CLIENT_INITIALIZING
, false);
15603 ceph_assert(iref_writer
.is_first_writer());
15608 client_lock
.lock();
15610 messenger
->add_dispatcher_tail(objecter
);
15611 messenger
->add_dispatcher_tail(this);
15613 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
15614 int r
= monclient
->init();
15616 // need to do cleanup because we're in an intermediate init state
15618 std::scoped_lock
l(timer_lock
);
15622 client_lock
.unlock();
15623 objecter
->shutdown();
15624 objectcacher
->stop();
15625 monclient
->shutdown();
15630 client_lock
.unlock();
15632 iref_writer
.update_state(CLIENT_INITIALIZED
);
15637 void StandaloneClient::shutdown()
15639 Client::shutdown();
15640 objecter
->shutdown();
15641 monclient
->shutdown();