1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2017 Red Hat Ltd
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "OSDMonitor.h"
18 #include "FSCommands.h"
19 #include "MDSMonitor.h"
20 #include "MgrStatMonitor.h"
21 #include "mds/cephfs_features.h"
23 using TOPNSPC::common::cmd_getval
;
36 using ceph::bufferlist
;
39 using ceph::ErasureCodeInterfaceRef
;
40 using ceph::ErasureCodeProfile
;
41 using ceph::Formatter
;
42 using ceph::JSONFormatter
;
43 using ceph::make_message
;
44 using ceph::mono_clock
;
45 using ceph::mono_time
;
47 class FlagSetHandler
: public FileSystemCommandHandler
51 : FileSystemCommandHandler("fs flag set")
59 const cmdmap_t
& cmdmap
,
60 std::ostream
&ss
) override
63 cmd_getval(cmdmap
, "flag_name", flag_name
);
66 cmd_getval(cmdmap
, "val", flag_val
);
69 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
71 if (flag_name
== "enable_multiple") {
72 bool flag_bool
= false;
73 int r
= parse_bool(flag_val
, &flag_bool
, ss
);
75 ss
<< "Invalid boolean value '" << flag_val
<< "'";
79 fsmap
.set_enable_multiple(flag_bool
);
82 ss
<< "Unknown flag '" << flag_name
<< "'";
88 class FailHandler
: public FileSystemCommandHandler
92 : FileSystemCommandHandler("fs fail")
100 const cmdmap_t
& cmdmap
,
101 std::ostream
& ss
) override
103 if (!mon
->osdmon()->is_writeable()) {
104 // not allowed to write yet, so retry when we can
105 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
110 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
111 ss
<< "Missing filesystem name";
115 auto fs
= fsmap
.get_filesystem(fs_name
);
117 auto f
= [](auto fs
) {
118 fs
->mds_map
.set_flag(CEPH_MDSMAP_NOT_JOINABLE
);
120 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
122 std::vector
<mds_gid_t
> to_fail
;
123 for (const auto& p
: fs
->mds_map
.get_mds_info()) {
124 to_fail
.push_back(p
.first
);
127 for (const auto& gid
: to_fail
) {
128 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
130 if (!to_fail
.empty()) {
131 mon
->osdmon()->propose_pending();
135 ss
<< " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
141 class FsNewHandler
: public FileSystemCommandHandler
144 explicit FsNewHandler(Paxos
*paxos
)
145 : FileSystemCommandHandler("fs new"), m_paxos(paxos
)
149 bool batched_propose() override
{
157 const cmdmap_t
& cmdmap
,
158 std::ostream
&ss
) override
160 ceph_assert(m_paxos
->is_plugged());
162 string metadata_name
;
163 cmd_getval(cmdmap
, "metadata", metadata_name
);
164 int64_t metadata
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(metadata_name
);
166 ss
<< "pool '" << metadata_name
<< "' does not exist";
171 cmd_getval(cmdmap
, "data", data_name
);
172 int64_t data
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(data_name
);
174 ss
<< "pool '" << data_name
<< "' does not exist";
178 ss
<< "pool '" << data_name
<< "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
183 cmd_getval(cmdmap
, "fs_name", fs_name
);
184 if (fs_name
.empty()) {
185 // Ensure fs name is not empty so that we can implement
186 // commmands that refer to FS by name in future.
187 ss
<< "Filesystem name may not be empty";
191 if (fsmap
.get_filesystem(fs_name
)) {
192 auto fs
= fsmap
.get_filesystem(fs_name
);
193 if (*(fs
->mds_map
.get_data_pools().begin()) == data
194 && fs
->mds_map
.get_metadata_pool() == metadata
) {
195 // Identical FS created already, this is a no-op
196 ss
<< "filesystem '" << fs_name
<< "' already exists";
199 ss
<< "filesystem already exists with name '" << fs_name
<< "'";
205 cmd_getval(cmdmap
, "force", force
);
207 const pool_stat_t
*stat
= mon
->mgrstatmon()->get_pool_stat(metadata
);
209 int64_t metadata_num_objects
= stat
->stats
.sum
.num_objects
;
210 if (!force
&& metadata_num_objects
> 0) {
211 ss
<< "pool '" << metadata_name
212 << "' already contains some objects. Use an empty pool instead.";
217 if (fsmap
.filesystem_count() > 0
218 && !fsmap
.get_enable_multiple()) {
219 ss
<< "Creation of multiple filesystems is disabled. To enable "
220 "this experimental feature, use 'ceph fs flag set enable_multiple "
225 for (auto& fs
: fsmap
.get_filesystems()) {
226 const std::vector
<int64_t> &data_pools
= fs
->mds_map
.get_data_pools();
230 "allow_dangerous_metadata_overlay", sure
);
232 if ((std::find(data_pools
.begin(), data_pools
.end(), data
) != data_pools
.end()
233 || fs
->mds_map
.get_metadata_pool() == metadata
)
235 ss
<< "Filesystem '" << fs_name
236 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
241 int64_t fscid
= FS_CLUSTER_ID_NONE
;
242 if (cmd_getval(cmdmap
, "fscid", fscid
)) {
244 ss
<< "Pass --force to create a file system with a specific ID";
247 if (fsmap
.filesystem_exists(fscid
)) {
248 ss
<< "filesystem already exists with id '" << fscid
<< "'";
253 pg_pool_t
const *data_pool
= mon
->osdmon()->osdmap
.get_pg_pool(data
);
254 ceph_assert(data_pool
!= NULL
); // Checked it existed above
255 pg_pool_t
const *metadata_pool
= mon
->osdmon()->osdmap
.get_pg_pool(metadata
);
256 ceph_assert(metadata_pool
!= NULL
); // Checked it existed above
258 int r
= _check_pool(mon
->osdmon()->osdmap
, data
, POOL_DATA_DEFAULT
, force
, &ss
);
263 r
= _check_pool(mon
->osdmon()->osdmap
, metadata
, POOL_METADATA
, force
, &ss
);
268 if (!mon
->osdmon()->is_writeable()) {
269 // not allowed to write yet, so retry when we can
270 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
273 mon
->osdmon()->do_application_enable(data
,
274 pg_pool_t::APPLICATION_NAME_CEPHFS
,
275 "data", fs_name
, true);
276 mon
->osdmon()->do_application_enable(metadata
,
277 pg_pool_t::APPLICATION_NAME_CEPHFS
,
278 "metadata", fs_name
, true);
279 mon
->osdmon()->do_set_pool_opt(metadata
,
280 pool_opts_t::RECOVERY_PRIORITY
,
281 static_cast<int64_t>(5));
282 mon
->osdmon()->do_set_pool_opt(metadata
,
283 pool_opts_t::PG_NUM_MIN
,
284 static_cast<int64_t>(16));
285 mon
->osdmon()->do_set_pool_opt(metadata
,
286 pool_opts_t::PG_AUTOSCALE_BIAS
,
287 static_cast<double>(4.0));
288 mon
->osdmon()->propose_pending();
290 // All checks passed, go ahead and create.
291 auto&& fs
= fsmap
.create_filesystem(fs_name
, metadata
, data
,
292 mon
->get_quorum_con_features(), fscid
);
294 ss
<< "new fs with metadata pool " << metadata
<< " and data pool " << data
;
296 // assign a standby to rank 0 to avoid health warnings
297 auto info
= fsmap
.find_replacement_for({fs
->fscid
, 0});
300 mon
->clog
->info() << info
->human_name() << " assigned to filesystem "
301 << fs_name
<< " as rank 0";
302 fsmap
.promote(info
->global_id
, *fs
, 0);
312 class SetHandler
: public FileSystemCommandHandler
316 : FileSystemCommandHandler("fs set")
323 const cmdmap_t
& cmdmap
,
324 std::ostream
&ss
) override
327 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
328 ss
<< "Missing filesystem name";
332 auto fs
= fsmap
.get_filesystem(fs_name
);
334 if (!cmd_getval(cmdmap
, "var", var
) || var
.empty()) {
335 ss
<< "Invalid variable";
341 if (!cmd_getval(cmdmap
, "val", val
)) {
344 // we got a string. see if it contains an int.
345 n
= strict_strtoll(val
.c_str(), 10, &interr
);
346 if (var
== "max_mds") {
347 // NOTE: see also "mds set_max_mds", which can modify the same field.
348 if (interr
.length()) {
354 ss
<< "You must specify at least one MDS";
358 if (n
> 1 && n
> fs
->mds_map
.get_max_mds()) {
359 if (fs
->mds_map
.was_snaps_ever_allowed() &&
360 !fs
->mds_map
.allows_multimds_snaps()) {
361 ss
<< "multi-active MDS is not allowed while there are snapshots possibly created by pre-mimic MDS";
366 ss
<< "may not have more than " << MAX_MDS
<< " MDS ranks";
370 fsmap
.modify_filesystem(
372 [n
](std::shared_ptr
<Filesystem
> fs
)
374 fs
->mds_map
.clear_flag(CEPH_MDSMAP_NOT_JOINABLE
);
375 fs
->mds_map
.set_max_mds(n
);
377 } else if (var
== "inline_data") {
378 bool enable_inline
= false;
379 int r
= parse_bool(val
, &enable_inline
, ss
);
385 bool confirm
= false;
386 cmd_getval(cmdmap
, "yes_i_really_really_mean_it", confirm
);
388 ss
<< "Inline data support is deprecated and will be removed in a future release. "
389 << "Add --yes-i-really-really-mean-it if you are certain you want this enabled.";
392 ss
<< "inline data enabled";
394 fsmap
.modify_filesystem(
396 [](std::shared_ptr
<Filesystem
> fs
)
398 fs
->mds_map
.set_inline_data_enabled(true);
401 ss
<< "inline data disabled";
402 fsmap
.modify_filesystem(
404 [](std::shared_ptr
<Filesystem
> fs
)
406 fs
->mds_map
.set_inline_data_enabled(false);
409 } else if (var
== "balancer") {
411 ss
<< "unsetting the metadata load balancer";
413 ss
<< "setting the metadata load balancer to " << val
;
415 fsmap
.modify_filesystem(
417 [val
](std::shared_ptr
<Filesystem
> fs
)
419 fs
->mds_map
.set_balancer(val
);
422 } else if (var
== "max_file_size") {
423 if (interr
.length()) {
424 ss
<< var
<< " requires an integer value";
427 if (n
< CEPH_MIN_STRIPE_UNIT
) {
428 ss
<< var
<< " must at least " << CEPH_MIN_STRIPE_UNIT
;
431 fsmap
.modify_filesystem(
433 [n
](std::shared_ptr
<Filesystem
> fs
)
435 fs
->mds_map
.set_max_filesize(n
);
437 } else if (var
== "allow_new_snaps") {
438 bool enable_snaps
= false;
439 int r
= parse_bool(val
, &enable_snaps
, ss
);
445 fsmap
.modify_filesystem(
447 [](std::shared_ptr
<Filesystem
> fs
)
449 fs
->mds_map
.clear_snaps_allowed();
451 ss
<< "disabled new snapshots";
453 fsmap
.modify_filesystem(
455 [](std::shared_ptr
<Filesystem
> fs
)
457 fs
->mds_map
.set_snaps_allowed();
459 ss
<< "enabled new snapshots";
461 } else if (var
== "allow_multimds") {
462 ss
<< "Multiple MDS is always enabled. Use the max_mds"
463 << " parameter to control the number of active MDSs"
464 << " allowed. This command is DEPRECATED and will be"
465 << " REMOVED from future releases.";
466 } else if (var
== "allow_multimds_snaps") {
468 int r
= parse_bool(val
, &enable
, ss
);
474 if (!cmd_getval(cmdmap
, "confirm", confirm
) ||
475 confirm
!= "--yes-i-am-really-a-mds") {
476 ss
<< "Warning! This command is for MDS only. Do not run it manually";
481 ss
<< "enabled multimds with snapshot";
482 fsmap
.modify_filesystem(
484 [](std::shared_ptr
<Filesystem
> fs
)
486 fs
->mds_map
.set_multimds_snaps_allowed();
489 ss
<< "disabled multimds with snapshot";
490 fsmap
.modify_filesystem(
492 [](std::shared_ptr
<Filesystem
> fs
)
494 fs
->mds_map
.clear_multimds_snaps_allowed();
497 } else if (var
== "allow_dirfrags") {
498 ss
<< "Directory fragmentation is now permanently enabled."
499 << " This command is DEPRECATED and will be REMOVED from future releases.";
500 } else if (var
== "down") {
501 bool is_down
= false;
502 int r
= parse_bool(val
, &is_down
, ss
);
507 ss
<< fs
->mds_map
.get_fs_name();
509 fsmap
.modify_filesystem(
511 [is_down
](std::shared_ptr
<Filesystem
> fs
)
514 if (fs
->mds_map
.get_max_mds() > 0) {
515 fs
->mds_map
.set_old_max_mds();
516 fs
->mds_map
.set_max_mds(0);
517 } /* else already down! */
519 mds_rank_t oldmax
= fs
->mds_map
.get_old_max_mds();
520 fs
->mds_map
.set_max_mds(oldmax
? oldmax
: 1);
525 ss
<< " marked down. ";
527 ss
<< " marked up, max_mds = " << fs
->mds_map
.get_max_mds();
529 } else if (var
== "cluster_down" || var
== "joinable") {
530 bool joinable
= true;
531 int r
= parse_bool(val
, &joinable
, ss
);
535 if (var
== "cluster_down") {
536 joinable
= !joinable
;
539 ss
<< fs
->mds_map
.get_fs_name();
541 fsmap
.modify_filesystem(
543 [joinable
](std::shared_ptr
<Filesystem
> fs
)
546 fs
->mds_map
.clear_flag(CEPH_MDSMAP_NOT_JOINABLE
);
548 fs
->mds_map
.set_flag(CEPH_MDSMAP_NOT_JOINABLE
);
553 ss
<< " marked joinable; MDS may join as newly active.";
555 ss
<< " marked not joinable; MDS cannot join as newly active.";
558 if (var
== "cluster_down") {
559 ss
<< " WARNING: cluster_down flag is deprecated and will be"
560 << " removed in a future version. Please use \"joinable\".";
562 } else if (var
== "standby_count_wanted") {
563 if (interr
.length()) {
564 ss
<< var
<< " requires an integer value";
568 ss
<< var
<< " must be non-negative";
571 fsmap
.modify_filesystem(
573 [n
](std::shared_ptr
<Filesystem
> fs
)
575 fs
->mds_map
.set_standby_count_wanted(n
);
577 } else if (var
== "session_timeout") {
578 if (interr
.length()) {
579 ss
<< var
<< " requires an integer value";
583 ss
<< var
<< " must be at least 30s";
586 fsmap
.modify_filesystem(
588 [n
](std::shared_ptr
<Filesystem
> fs
)
590 fs
->mds_map
.set_session_timeout((uint32_t)n
);
592 } else if (var
== "session_autoclose") {
593 if (interr
.length()) {
594 ss
<< var
<< " requires an integer value";
598 ss
<< var
<< " must be at least 30s";
601 fsmap
.modify_filesystem(
603 [n
](std::shared_ptr
<Filesystem
> fs
)
605 fs
->mds_map
.set_session_autoclose((uint32_t)n
);
607 } else if (var
== "allow_standby_replay") {
609 int r
= parse_bool(val
, &allow
, ss
);
615 if (!mon
->osdmon()->is_writeable()) {
616 // not allowed to write yet, so retry when we can
617 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
620 std::vector
<mds_gid_t
> to_fail
;
621 for (const auto& [gid
, info
]: fs
->mds_map
.get_mds_info()) {
622 if (info
.state
== MDSMap::STATE_STANDBY_REPLAY
) {
623 to_fail
.push_back(gid
);
627 for (const auto& gid
: to_fail
) {
628 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
630 if (!to_fail
.empty()) {
631 mon
->osdmon()->propose_pending();
635 auto f
= [allow
](auto& fs
) {
637 fs
->mds_map
.set_standby_replay_allowed();
639 fs
->mds_map
.clear_standby_replay_allowed();
642 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
643 } else if (var
== "min_compat_client") {
644 auto vno
= ceph_release_from_name(val
.c_str());
646 ss
<< "version " << val
<< " is not recognized";
649 ss
<< "WARNING: setting min_compat_client is deprecated"
650 " and may not do what you want.\n"
651 "The oldest release to set is octopus.\n"
652 "Please migrate to `ceph fs required_client_features ...`.";
653 auto f
= [vno
](auto&& fs
) {
654 fs
->mds_map
.set_min_compat_client(vno
);
656 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
658 ss
<< "unknown variable " << var
;
666 class CompatSetHandler
: public FileSystemCommandHandler
670 : FileSystemCommandHandler("fs compat")
678 const cmdmap_t
& cmdmap
,
679 std::ostream
&ss
) override
681 static const std::set
<std::string
> subops
= {"rm_incompat", "rm_compat", "add_incompat", "add_compat"};
684 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
685 ss
<< "Missing filesystem name";
688 auto fs
= fsmap
.get_filesystem(fs_name
);
690 ss
<< "Not found: '" << fs_name
<< "'";
695 if (!cmd_getval(cmdmap
, "subop", subop
) || subops
.count(subop
) == 0) {
696 ss
<< "subop `" << subop
<< "' not recognized. Must be one of: " << subops
;
701 if (!cmd_getval(cmdmap
, "feature", feature
) || feature
<= 0) {
702 ss
<< "Invalid feature";
706 if (fs
->mds_map
.get_num_up_mds() > 0) {
707 ss
<< "file system must be failed or down; use `ceph fs fail` to bring down";
711 CompatSet cs
= fs
->mds_map
.compat
;
712 if (subop
== "rm_compat") {
713 if (cs
.compat
.contains(feature
)) {
714 ss
<< "removed compat feature " << feature
;
715 cs
.compat
.remove(feature
);
717 ss
<< "already removed compat feature " << feature
;
719 } else if (subop
== "rm_incompat") {
720 if (cs
.incompat
.contains(feature
)) {
721 ss
<< "removed incompat feature " << feature
;
722 cs
.incompat
.remove(feature
);
724 ss
<< "already removed incompat feature " << feature
;
726 } else if (subop
== "add_compat" || subop
== "add_incompat") {
728 if (!cmd_getval(cmdmap
, "feature_str", feature_str
) || feature_str
.empty()) {
729 ss
<< "adding a feature requires a feature string";
732 auto f
= CompatSet::Feature(feature
, feature_str
);
733 if (subop
== "add_compat") {
734 if (cs
.compat
.contains(feature
)) {
735 auto name
= cs
.compat
.get_name(feature
);
736 if (name
== feature_str
) {
737 ss
<< "feature already exists";
739 ss
<< "feature with differing name `" << name
<< "' exists";
744 ss
<< "added compat feature " << f
;
746 } else if (subop
== "add_incompat") {
747 if (cs
.incompat
.contains(feature
)) {
748 auto name
= cs
.incompat
.get_name(feature
);
749 if (name
== feature_str
) {
750 ss
<< "feature already exists";
752 ss
<< "feature with differing name `" << name
<< "' exists";
756 cs
.incompat
.insert(f
);
757 ss
<< "added incompat feature " << f
;
759 } else ceph_assert(0);
760 } else ceph_assert(0);
762 auto modifyf
= [cs
= std::move(cs
)](auto&& fs
) {
763 fs
->mds_map
.compat
= cs
;
766 fsmap
.modify_filesystem(fs
->fscid
, std::move(modifyf
));
771 class RequiredClientFeaturesHandler
: public FileSystemCommandHandler
774 RequiredClientFeaturesHandler()
775 : FileSystemCommandHandler("fs required_client_features")
783 const cmdmap_t
& cmdmap
,
784 std::ostream
&ss
) override
787 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
788 ss
<< "Missing filesystem name";
791 auto fs
= fsmap
.get_filesystem(fs_name
);
793 ss
<< "Not found: '" << fs_name
<< "'";
797 if (!cmd_getval(cmdmap
, "subop", subop
) ||
798 (subop
!= "add" && subop
!= "rm")) {
799 ss
<< "Must either add or rm a feature; " << subop
<< " is not recognized";
803 if (!cmd_getval(cmdmap
, "val", val
) || val
.empty()) {
804 ss
<< "Missing feature id/name";
808 int feature
= cephfs_feature_from_name(val
);
811 feature
= strict_strtol(val
.c_str(), 10, &err
);
813 ss
<< "Invalid feature name: " << val
;
816 if (feature
< 0 || feature
> CEPHFS_FEATURE_MAX
) {
817 ss
<< "Invalid feature id: " << feature
;
822 if (subop
== "add") {
824 fsmap
.modify_filesystem(
826 [feature
, &ret
](auto&& fs
)
828 if (fs
->mds_map
.get_required_client_features().test(feature
))
830 fs
->mds_map
.add_required_client_feature(feature
);
834 ss
<< "added feature '" << cephfs_feature_name(feature
) << "' to required_client_features";
836 ss
<< "feature '" << cephfs_feature_name(feature
) << "' is already set";
840 fsmap
.modify_filesystem(
842 [feature
, &ret
](auto&& fs
)
844 if (!fs
->mds_map
.get_required_client_features().test(feature
))
846 fs
->mds_map
.remove_required_client_feature(feature
);
850 ss
<< "removed feature '" << cephfs_feature_name(feature
) << "' from required_client_features";
852 ss
<< "feature '" << cephfs_feature_name(feature
) << "' is already unset";
860 class AddDataPoolHandler
: public FileSystemCommandHandler
863 explicit AddDataPoolHandler(Paxos
*paxos
)
864 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos
)
867 bool batched_propose() override
{
875 const cmdmap_t
& cmdmap
,
876 std::ostream
&ss
) override
878 ceph_assert(m_paxos
->is_plugged());
881 cmd_getval(cmdmap
, "pool", poolname
);
884 if (!cmd_getval(cmdmap
, "fs_name", fs_name
)
885 || fs_name
.empty()) {
886 ss
<< "Missing filesystem name";
890 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
893 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
895 ss
<< "pool '" << poolname
<< "' does not exist";
900 int r
= _check_pool(mon
->osdmon()->osdmap
, poolid
, POOL_DATA_EXTRA
, false, &ss
);
905 auto fs
= fsmap
.get_filesystem(fs_name
);
906 // no-op when the data_pool already on fs
907 if (fs
->mds_map
.is_data_pool(poolid
)) {
908 ss
<< "data pool " << poolid
<< " is already on fs " << fs_name
;
912 if (!mon
->osdmon()->is_writeable()) {
913 // not allowed to write yet, so retry when we can
914 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
917 mon
->osdmon()->do_application_enable(poolid
,
918 pg_pool_t::APPLICATION_NAME_CEPHFS
,
919 "data", fs_name
, true);
920 mon
->osdmon()->propose_pending();
922 fsmap
.modify_filesystem(
924 [poolid
](std::shared_ptr
<Filesystem
> fs
)
926 fs
->mds_map
.add_data_pool(poolid
);
929 ss
<< "added data pool " << poolid
<< " to fsmap";
938 class SetDefaultHandler
: public FileSystemCommandHandler
942 : FileSystemCommandHandler("fs set-default")
949 const cmdmap_t
& cmdmap
,
950 std::ostream
&ss
) override
953 cmd_getval(cmdmap
, "fs_name", fs_name
);
954 auto fs
= fsmap
.get_filesystem(fs_name
);
956 ss
<< "filesystem '" << fs_name
<< "' does not exist";
960 fsmap
.set_legacy_client_fscid(fs
->fscid
);
965 class RemoveFilesystemHandler
: public FileSystemCommandHandler
968 RemoveFilesystemHandler()
969 : FileSystemCommandHandler("fs rm")
976 const cmdmap_t
& cmdmap
,
977 std::ostream
&ss
) override
979 /* We may need to blocklist ranks. */
980 if (!mon
->osdmon()->is_writeable()) {
981 // not allowed to write yet, so retry when we can
982 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
986 // Check caller has correctly named the FS to delete
987 // (redundant while there is only one FS, but command
988 // syntax should apply to multi-FS future)
990 cmd_getval(cmdmap
, "fs_name", fs_name
);
991 auto fs
= fsmap
.get_filesystem(fs_name
);
993 // Consider absence success to make deletes idempotent
994 ss
<< "filesystem '" << fs_name
<< "' does not exist";
998 // Check that no MDS daemons are active
999 if (fs
->mds_map
.get_num_up_mds() > 0) {
1000 ss
<< "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
1004 // Check for confirmation flag
1006 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
1008 ss
<< "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
1009 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
1013 if (fsmap
.get_legacy_client_fscid() == fs
->fscid
) {
1014 fsmap
.set_legacy_client_fscid(FS_CLUSTER_ID_NONE
);
1017 std::vector
<mds_gid_t
> to_fail
;
1018 // There may be standby_replay daemons left here
1019 for (const auto &i
: fs
->mds_map
.get_mds_info()) {
1020 ceph_assert(i
.second
.state
== MDSMap::STATE_STANDBY_REPLAY
);
1021 to_fail
.push_back(i
.first
);
1024 for (const auto &gid
: to_fail
) {
1025 // Standby replays don't write, so it isn't important to
1026 // wait for an osdmap propose here: ignore return value.
1027 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
1029 if (!to_fail
.empty()) {
1030 mon
->osdmon()->propose_pending(); /* maybe new blocklists */
1033 fsmap
.erase_filesystem(fs
->fscid
);
1039 class ResetFilesystemHandler
: public FileSystemCommandHandler
1042 ResetFilesystemHandler()
1043 : FileSystemCommandHandler("fs reset")
1050 const cmdmap_t
& cmdmap
,
1051 std::ostream
&ss
) override
1054 cmd_getval(cmdmap
, "fs_name", fs_name
);
1055 auto fs
= fsmap
.get_filesystem(fs_name
);
1056 if (fs
== nullptr) {
1057 ss
<< "filesystem '" << fs_name
<< "' does not exist";
1058 // Unlike fs rm, we consider this case an error
1062 // Check that no MDS daemons are active
1063 if (fs
->mds_map
.get_num_up_mds() > 0) {
1064 ss
<< "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
1065 " and use `ceph mds fail` to make this so";
1069 // Check for confirmation flag
1071 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
1073 ss
<< "this is a potentially destructive operation, only for use by experts in disaster recovery. "
1074 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
1078 fsmap
.reset_filesystem(fs
->fscid
);
1084 class RemoveDataPoolHandler
: public FileSystemCommandHandler
1087 RemoveDataPoolHandler()
1088 : FileSystemCommandHandler("fs rm_data_pool")
1095 const cmdmap_t
& cmdmap
,
1096 std::ostream
&ss
) override
1099 cmd_getval(cmdmap
, "pool", poolname
);
1101 std::string fs_name
;
1102 if (!cmd_getval(cmdmap
, "fs_name", fs_name
)
1103 || fs_name
.empty()) {
1104 ss
<< "Missing filesystem name";
1108 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
1111 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
1113 ss
<< "pool '" << poolname
<< "' does not exist";
1115 } else if (poolid
< 0) {
1116 ss
<< "invalid pool id '" << poolid
<< "'";
1121 ceph_assert(poolid
>= 0); // Checked by parsing code above
1123 auto fs
= fsmap
.get_filesystem(fs_name
);
1124 if (fs
->mds_map
.get_first_data_pool() == poolid
) {
1125 ss
<< "cannot remove default data pool";
1130 fsmap
.modify_filesystem(fs
->fscid
,
1131 [&r
, poolid
](std::shared_ptr
<Filesystem
> fs
)
1133 r
= fs
->mds_map
.remove_data_pool(poolid
);
1136 // It was already removed, succeed in silence
1138 } else if (r
== 0) {
1139 // We removed it, succeed
1140 ss
<< "removed data pool " << poolid
<< " from fsmap";
1143 // Unexpected error, bubble up
1150 * For commands with an alternative prefix
1152 template<typename T
>
1153 class AliasHandler
: public T
1155 std::string alias_prefix
;
1158 explicit AliasHandler(const std::string
&new_prefix
)
1161 alias_prefix
= new_prefix
;
1164 std::string
const &get_prefix() const override
{return alias_prefix
;}
1170 const cmdmap_t
& cmdmap
,
1171 std::ostream
&ss
) override
1173 return T::handle(mon
, fsmap
, op
, cmdmap
, ss
);
1177 class MirrorHandlerEnable
: public FileSystemCommandHandler
1180 MirrorHandlerEnable()
1181 : FileSystemCommandHandler("fs mirror enable")
1184 int handle(Monitor
*mon
,
1185 FSMap
&fsmap
, MonOpRequestRef op
,
1186 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1187 std::string fs_name
;
1188 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1189 ss
<< "Missing filesystem name";
1193 auto fs
= fsmap
.get_filesystem(fs_name
);
1194 if (fs
== nullptr) {
1195 ss
<< "Filesystem '" << fs_name
<< "' not found";
1199 if (fs
->mirror_info
.is_mirrored()) {
1203 auto f
= [](auto &&fs
) {
1204 fs
->mirror_info
.enable_mirroring();
1206 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1212 class MirrorHandlerDisable
: public FileSystemCommandHandler
1215 MirrorHandlerDisable()
1216 : FileSystemCommandHandler("fs mirror disable")
1219 int handle(Monitor
*mon
,
1220 FSMap
&fsmap
, MonOpRequestRef op
,
1221 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1222 std::string fs_name
;
1223 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1224 ss
<< "Missing filesystem name";
1228 auto fs
= fsmap
.get_filesystem(fs_name
);
1229 if (fs
== nullptr) {
1230 ss
<< "Filesystem '" << fs_name
<< "' not found";
1234 if (!fs
->mirror_info
.is_mirrored()) {
1238 auto f
= [](auto &&fs
) {
1239 fs
->mirror_info
.disable_mirroring();
1241 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1247 class MirrorHandlerAddPeer
: public FileSystemCommandHandler
1250 MirrorHandlerAddPeer()
1251 : FileSystemCommandHandler("fs mirror peer_add")
1254 boost::optional
<std::pair
<string
, string
>>
1255 extract_remote_cluster_conf(const std::string
&spec
) {
1256 auto pos
= spec
.find("@");
1257 if (pos
== std::string_view::npos
) {
1258 return boost::optional
<std::pair
<string
, string
>>();
1261 auto client
= spec
.substr(0, pos
);
1262 auto cluster
= spec
.substr(pos
+1);
1264 return std::make_pair(client
, cluster
);
1267 bool peer_add(FSMap
&fsmap
, Filesystem::const_ref
&&fs
,
1268 const cmdmap_t
&cmdmap
, std::ostream
&ss
) {
1271 string remote_fs_name
;
1272 cmd_getval(cmdmap
, "uuid", peer_uuid
);
1273 cmd_getval(cmdmap
, "remote_cluster_spec", remote_spec
);
1274 cmd_getval(cmdmap
, "remote_fs_name", remote_fs_name
);
1276 // verify (and extract) remote cluster specification
1277 auto remote_conf
= extract_remote_cluster_conf(remote_spec
);
1279 ss
<< "invalid remote cluster spec -- should be <client>@<cluster>";
1283 if (fs
->mirror_info
.has_peer(peer_uuid
)) {
1284 ss
<< "peer already exists";
1287 if (fs
->mirror_info
.has_peer((*remote_conf
).first
, (*remote_conf
).second
,
1289 ss
<< "peer already exists";
1293 auto f
= [peer_uuid
, remote_conf
, remote_fs_name
](auto &&fs
) {
1294 fs
->mirror_info
.peer_add(peer_uuid
, (*remote_conf
).first
,
1295 (*remote_conf
).second
, remote_fs_name
);
1297 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1301 int handle(Monitor
*mon
,
1302 FSMap
&fsmap
, MonOpRequestRef op
,
1303 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1304 std::string fs_name
;
1305 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1306 ss
<< "Missing filesystem name";
1310 auto fs
= fsmap
.get_filesystem(fs_name
);
1311 if (fs
== nullptr) {
1312 ss
<< "Filesystem '" << fs_name
<< "' not found";
1316 if (!fs
->mirror_info
.is_mirrored()) {
1317 ss
<< "Mirroring not enabled for filesystem '" << fs_name
<< "'";
1321 auto res
= peer_add(fsmap
, std::move(fs
), cmdmap
, ss
);
1330 class MirrorHandlerRemovePeer
: public FileSystemCommandHandler
1333 MirrorHandlerRemovePeer()
1334 : FileSystemCommandHandler("fs mirror peer_remove")
1337 bool peer_remove(FSMap
&fsmap
, Filesystem::const_ref
&&fs
,
1338 const cmdmap_t
&cmdmap
, std::ostream
&ss
) {
1340 cmd_getval(cmdmap
, "uuid", peer_uuid
);
1342 if (!fs
->mirror_info
.has_peer(peer_uuid
)) {
1343 ss
<< "cannot find peer with uuid: " << peer_uuid
;
1347 auto f
= [peer_uuid
](auto &&fs
) {
1348 fs
->mirror_info
.peer_remove(peer_uuid
);
1350 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1354 int handle(Monitor
*mon
,
1355 FSMap
&fsmap
, MonOpRequestRef op
,
1356 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1357 std::string fs_name
;
1358 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1359 ss
<< "Missing filesystem name";
1363 auto fs
= fsmap
.get_filesystem(fs_name
);
1364 if (fs
== nullptr) {
1365 ss
<< "Filesystem '" << fs_name
<< "' not found";
1369 if (!fs
->mirror_info
.is_mirrored()) {
1370 ss
<< "Mirroring not enabled for filesystem '" << fs_name
<< "'";
1374 auto res
= peer_remove(fsmap
, std::move(fs
), cmdmap
, ss
);
1383 std::list
<std::shared_ptr
<FileSystemCommandHandler
> >
1384 FileSystemCommandHandler::load(Paxos
*paxos
)
1386 std::list
<std::shared_ptr
<FileSystemCommandHandler
> > handlers
;
1388 handlers
.push_back(std::make_shared
<SetHandler
>());
1389 handlers
.push_back(std::make_shared
<FailHandler
>());
1390 handlers
.push_back(std::make_shared
<FlagSetHandler
>());
1391 handlers
.push_back(std::make_shared
<CompatSetHandler
>());
1392 handlers
.push_back(std::make_shared
<RequiredClientFeaturesHandler
>());
1393 handlers
.push_back(std::make_shared
<AddDataPoolHandler
>(paxos
));
1394 handlers
.push_back(std::make_shared
<RemoveDataPoolHandler
>());
1395 handlers
.push_back(std::make_shared
<FsNewHandler
>(paxos
));
1396 handlers
.push_back(std::make_shared
<RemoveFilesystemHandler
>());
1397 handlers
.push_back(std::make_shared
<ResetFilesystemHandler
>());
1399 handlers
.push_back(std::make_shared
<SetDefaultHandler
>());
1400 handlers
.push_back(std::make_shared
<AliasHandler
<SetDefaultHandler
> >(
1402 handlers
.push_back(std::make_shared
<MirrorHandlerEnable
>());
1403 handlers
.push_back(std::make_shared
<MirrorHandlerDisable
>());
1404 handlers
.push_back(std::make_shared
<MirrorHandlerAddPeer
>());
1405 handlers
.push_back(std::make_shared
<MirrorHandlerRemovePeer
>());
1410 int FileSystemCommandHandler::_check_pool(
1412 const int64_t pool_id
,
1415 std::ostream
*ss
) const
1417 ceph_assert(ss
!= NULL
);
1419 const pg_pool_t
*pool
= osd_map
.get_pg_pool(pool_id
);
1421 *ss
<< "pool id '" << pool_id
<< "' does not exist";
1425 const string
& pool_name
= osd_map
.get_pool_name(pool_id
);
1427 if (pool
->is_erasure()) {
1428 if (type
== POOL_METADATA
) {
1429 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1430 << " is an erasure-coded pool. Use of erasure-coded pools"
1431 << " for CephFS metadata is not permitted";
1433 } else if (type
== POOL_DATA_DEFAULT
&& !force
) {
1434 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1435 " is an erasure-coded pool."
1436 " Use of an EC pool for the default data pool is discouraged;"
1437 " see the online CephFS documentation for more information."
1438 " Use --force to override.";
1440 } else if (!pool
->allows_ecoverwrites()) {
1441 // non-overwriteable EC pools are only acceptable with a cache tier overlay
1442 if (!pool
->has_tiers() || !pool
->has_read_tier() || !pool
->has_write_tier()) {
1443 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1444 << " is an erasure-coded pool, with no overwrite support";
1448 // That cache tier overlay must be writeback, not readonly (it's the
1449 // write operations like modify+truncate we care about support for)
1450 const pg_pool_t
*write_tier
= osd_map
.get_pg_pool(
1452 ceph_assert(write_tier
!= NULL
); // OSDMonitor shouldn't allow DNE tier
1453 if (write_tier
->cache_mode
== pg_pool_t::CACHEMODE_FORWARD
1454 || write_tier
->cache_mode
== pg_pool_t::CACHEMODE_READONLY
) {
1455 *ss
<< "EC pool '" << pool_name
<< "' has a write tier ("
1456 << osd_map
.get_pool_name(pool
->write_tier
)
1457 << ") that is configured "
1458 "to forward writes. Use a cache mode such as 'writeback' for "
1465 if (pool
->is_tier()) {
1466 *ss
<< " pool '" << pool_name
<< "' (id '" << pool_id
1467 << "') is already in use as a cache tier.";
1471 if (!force
&& !pool
->application_metadata
.empty() &&
1472 pool
->application_metadata
.count(
1473 pg_pool_t::APPLICATION_NAME_CEPHFS
) == 0) {
1474 *ss
<< " pool '" << pool_name
<< "' (id '" << pool_id
1475 << "') has a non-CephFS application enabled.";
1479 // Nothing special about this pool, so it is permissible
1483 int FileSystemCommandHandler::is_op_allowed(
1484 const MonOpRequestRef
& op
, const FSMap
& fsmap
, const cmdmap_t
& cmdmap
,
1485 std::ostream
&ss
) const
1488 cmd_getval(cmdmap
, "fs_name", fs_name
);
1490 // so that fsmap can filtered and the original copy is untouched.
1491 FSMap fsmap_copy
= fsmap
;
1492 fsmap_copy
.filter(op
->get_session()->get_allowed_fs_names());
1494 auto fs
= fsmap_copy
.get_filesystem(fs_name
);
1495 if (fs
== nullptr) {
1496 /* let "fs rm" handle idempotent case where file system does not exist */
1497 if (!(get_prefix() == "fs rm" && fsmap
.get_filesystem(fs_name
) == nullptr)) {
1498 ss
<< "Filesystem not found: '" << fs_name
<< "'";
1503 if (!op
->get_session()->fs_name_capable(fs_name
, MON_CAP_W
)) {
1504 ss
<< "Permission denied: '" << fs_name
<< "'";