1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2017 Red Hat Ltd
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "OSDMonitor.h"
18 #include "FSCommands.h"
19 #include "MDSMonitor.h"
20 #include "MgrStatMonitor.h"
21 #include "mds/cephfs_features.h"
23 using TOPNSPC::common::cmd_getval
;
36 using ceph::bufferlist
;
39 using ceph::ErasureCodeInterfaceRef
;
40 using ceph::ErasureCodeProfile
;
41 using ceph::Formatter
;
42 using ceph::JSONFormatter
;
43 using ceph::make_message
;
44 using ceph::mono_clock
;
45 using ceph::mono_time
;
47 class FlagSetHandler
: public FileSystemCommandHandler
51 : FileSystemCommandHandler("fs flag set")
59 const cmdmap_t
& cmdmap
,
60 std::ostream
&ss
) override
63 cmd_getval(cmdmap
, "flag_name", flag_name
);
66 cmd_getval(cmdmap
, "val", flag_val
);
69 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
71 if (flag_name
== "enable_multiple") {
72 bool flag_bool
= false;
73 int r
= parse_bool(flag_val
, &flag_bool
, ss
);
75 ss
<< "Invalid boolean value '" << flag_val
<< "'";
79 fsmap
.set_enable_multiple(flag_bool
);
82 ss
<< "Unknown flag '" << flag_name
<< "'";
88 class FailHandler
: public FileSystemCommandHandler
92 : FileSystemCommandHandler("fs fail")
100 const cmdmap_t
& cmdmap
,
101 std::ostream
& ss
) override
103 if (!mon
->osdmon()->is_writeable()) {
104 // not allowed to write yet, so retry when we can
105 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
110 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
111 ss
<< "Missing filesystem name";
115 auto fs
= fsmap
.get_filesystem(fs_name
);
117 auto f
= [](auto fs
) {
118 fs
->mds_map
.set_flag(CEPH_MDSMAP_NOT_JOINABLE
);
120 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
122 std::vector
<mds_gid_t
> to_fail
;
123 for (const auto& p
: fs
->mds_map
.get_mds_info()) {
124 to_fail
.push_back(p
.first
);
127 for (const auto& gid
: to_fail
) {
128 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
130 if (!to_fail
.empty()) {
131 mon
->osdmon()->propose_pending();
135 ss
<< " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
141 class FsNewHandler
: public FileSystemCommandHandler
144 explicit FsNewHandler(Paxos
*paxos
)
145 : FileSystemCommandHandler("fs new"), m_paxos(paxos
)
149 bool batched_propose() override
{
157 const cmdmap_t
& cmdmap
,
158 std::ostream
&ss
) override
160 ceph_assert(m_paxos
->is_plugged());
162 string metadata_name
;
163 cmd_getval(cmdmap
, "metadata", metadata_name
);
164 int64_t metadata
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(metadata_name
);
166 ss
<< "pool '" << metadata_name
<< "' does not exist";
171 cmd_getval(cmdmap
, "data", data_name
);
172 int64_t data
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(data_name
);
174 ss
<< "pool '" << data_name
<< "' does not exist";
178 ss
<< "pool '" << data_name
<< "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
183 cmd_getval(cmdmap
, "fs_name", fs_name
);
184 if (fs_name
.empty()) {
185 // Ensure fs name is not empty so that we can implement
186 // commmands that refer to FS by name in future.
187 ss
<< "Filesystem name may not be empty";
191 if (fsmap
.get_filesystem(fs_name
)) {
192 auto fs
= fsmap
.get_filesystem(fs_name
);
193 if (*(fs
->mds_map
.get_data_pools().begin()) == data
194 && fs
->mds_map
.get_metadata_pool() == metadata
) {
195 // Identical FS created already, this is a no-op
196 ss
<< "filesystem '" << fs_name
<< "' already exists";
199 ss
<< "filesystem already exists with name '" << fs_name
<< "'";
205 cmd_getval(cmdmap
, "force", force
);
207 const pool_stat_t
*stat
= mon
->mgrstatmon()->get_pool_stat(metadata
);
209 int64_t metadata_num_objects
= stat
->stats
.sum
.num_objects
;
210 if (!force
&& metadata_num_objects
> 0) {
211 ss
<< "pool '" << metadata_name
212 << "' already contains some objects. Use an empty pool instead.";
217 if (fsmap
.filesystem_count() > 0
218 && !fsmap
.get_enable_multiple()) {
219 ss
<< "Creation of multiple filesystems is disabled. To enable "
220 "this experimental feature, use 'ceph fs flag set enable_multiple "
225 for (auto& fs
: fsmap
.get_filesystems()) {
226 const std::vector
<int64_t> &data_pools
= fs
->mds_map
.get_data_pools();
230 "allow_dangerous_metadata_overlay", sure
);
232 if ((std::find(data_pools
.begin(), data_pools
.end(), data
) != data_pools
.end()
233 || fs
->mds_map
.get_metadata_pool() == metadata
)
235 ss
<< "Filesystem '" << fs_name
236 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
241 int64_t fscid
= FS_CLUSTER_ID_NONE
;
242 if (cmd_getval(cmdmap
, "fscid", fscid
)) {
244 ss
<< "Pass --force to create a file system with a specific ID";
247 if (fsmap
.filesystem_exists(fscid
)) {
248 ss
<< "filesystem already exists with id '" << fscid
<< "'";
253 pg_pool_t
const *data_pool
= mon
->osdmon()->osdmap
.get_pg_pool(data
);
254 ceph_assert(data_pool
!= NULL
); // Checked it existed above
255 pg_pool_t
const *metadata_pool
= mon
->osdmon()->osdmap
.get_pg_pool(metadata
);
256 ceph_assert(metadata_pool
!= NULL
); // Checked it existed above
258 int r
= _check_pool(mon
->osdmon()->osdmap
, data
, POOL_DATA_DEFAULT
, force
, &ss
);
263 r
= _check_pool(mon
->osdmon()->osdmap
, metadata
, POOL_METADATA
, force
, &ss
);
268 if (!mon
->osdmon()->is_writeable()) {
269 // not allowed to write yet, so retry when we can
270 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
273 mon
->osdmon()->do_application_enable(data
,
274 pg_pool_t::APPLICATION_NAME_CEPHFS
,
275 "data", fs_name
, true);
276 mon
->osdmon()->do_application_enable(metadata
,
277 pg_pool_t::APPLICATION_NAME_CEPHFS
,
278 "metadata", fs_name
, true);
279 mon
->osdmon()->do_set_pool_opt(metadata
,
280 pool_opts_t::RECOVERY_PRIORITY
,
281 static_cast<int64_t>(5));
282 mon
->osdmon()->do_set_pool_opt(metadata
,
283 pool_opts_t::PG_NUM_MIN
,
284 static_cast<int64_t>(16));
285 mon
->osdmon()->do_set_pool_opt(metadata
,
286 pool_opts_t::PG_AUTOSCALE_BIAS
,
287 static_cast<double>(4.0));
288 mon
->osdmon()->propose_pending();
290 bool recover
= false;
291 cmd_getval(cmdmap
, "recover", recover
);
293 // All checks passed, go ahead and create.
294 auto&& fs
= fsmap
.create_filesystem(fs_name
, metadata
, data
,
295 mon
->get_quorum_con_features(), fscid
, recover
);
297 ss
<< "new fs with metadata pool " << metadata
<< " and data pool " << data
;
303 // assign a standby to rank 0 to avoid health warnings
304 auto info
= fsmap
.find_replacement_for({fs
->fscid
, 0});
307 mon
->clog
->info() << info
->human_name() << " assigned to filesystem "
308 << fs_name
<< " as rank 0";
309 fsmap
.promote(info
->global_id
, *fs
, 0);
319 class SetHandler
: public FileSystemCommandHandler
323 : FileSystemCommandHandler("fs set")
330 const cmdmap_t
& cmdmap
,
331 std::ostream
&ss
) override
334 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
335 ss
<< "Missing filesystem name";
339 auto fs
= fsmap
.get_filesystem(fs_name
);
341 if (!cmd_getval(cmdmap
, "var", var
) || var
.empty()) {
342 ss
<< "Invalid variable";
348 if (!cmd_getval(cmdmap
, "val", val
)) {
351 // we got a string. see if it contains an int.
352 n
= strict_strtoll(val
.c_str(), 10, &interr
);
353 if (var
== "max_mds") {
354 // NOTE: see also "mds set_max_mds", which can modify the same field.
355 if (interr
.length()) {
361 ss
<< "You must specify at least one MDS";
365 if (n
> 1 && n
> fs
->mds_map
.get_max_mds()) {
366 if (fs
->mds_map
.was_snaps_ever_allowed() &&
367 !fs
->mds_map
.allows_multimds_snaps()) {
368 ss
<< "multi-active MDS is not allowed while there are snapshots possibly created by pre-mimic MDS";
373 ss
<< "may not have more than " << MAX_MDS
<< " MDS ranks";
377 fsmap
.modify_filesystem(
379 [n
](std::shared_ptr
<Filesystem
> fs
)
381 fs
->mds_map
.clear_flag(CEPH_MDSMAP_NOT_JOINABLE
);
382 fs
->mds_map
.set_max_mds(n
);
384 } else if (var
== "inline_data") {
385 bool enable_inline
= false;
386 int r
= parse_bool(val
, &enable_inline
, ss
);
392 bool confirm
= false;
393 cmd_getval(cmdmap
, "yes_i_really_really_mean_it", confirm
);
395 ss
<< "Inline data support is deprecated and will be removed in a future release. "
396 << "Add --yes-i-really-really-mean-it if you are certain you want this enabled.";
399 ss
<< "inline data enabled";
401 fsmap
.modify_filesystem(
403 [](std::shared_ptr
<Filesystem
> fs
)
405 fs
->mds_map
.set_inline_data_enabled(true);
408 ss
<< "inline data disabled";
409 fsmap
.modify_filesystem(
411 [](std::shared_ptr
<Filesystem
> fs
)
413 fs
->mds_map
.set_inline_data_enabled(false);
416 } else if (var
== "balancer") {
418 ss
<< "unsetting the metadata load balancer";
420 ss
<< "setting the metadata load balancer to " << val
;
422 fsmap
.modify_filesystem(
424 [val
](std::shared_ptr
<Filesystem
> fs
)
426 fs
->mds_map
.set_balancer(val
);
429 } else if (var
== "max_file_size") {
430 if (interr
.length()) {
431 ss
<< var
<< " requires an integer value";
434 if (n
< CEPH_MIN_STRIPE_UNIT
) {
435 ss
<< var
<< " must at least " << CEPH_MIN_STRIPE_UNIT
;
438 fsmap
.modify_filesystem(
440 [n
](std::shared_ptr
<Filesystem
> fs
)
442 fs
->mds_map
.set_max_filesize(n
);
444 } else if (var
== "allow_new_snaps") {
445 bool enable_snaps
= false;
446 int r
= parse_bool(val
, &enable_snaps
, ss
);
452 fsmap
.modify_filesystem(
454 [](std::shared_ptr
<Filesystem
> fs
)
456 fs
->mds_map
.clear_snaps_allowed();
458 ss
<< "disabled new snapshots";
460 fsmap
.modify_filesystem(
462 [](std::shared_ptr
<Filesystem
> fs
)
464 fs
->mds_map
.set_snaps_allowed();
466 ss
<< "enabled new snapshots";
468 } else if (var
== "allow_multimds") {
469 ss
<< "Multiple MDS is always enabled. Use the max_mds"
470 << " parameter to control the number of active MDSs"
471 << " allowed. This command is DEPRECATED and will be"
472 << " REMOVED from future releases.";
473 } else if (var
== "allow_multimds_snaps") {
475 int r
= parse_bool(val
, &enable
, ss
);
481 if (!cmd_getval(cmdmap
, "confirm", confirm
) ||
482 confirm
!= "--yes-i-am-really-a-mds") {
483 ss
<< "Warning! This command is for MDS only. Do not run it manually";
488 ss
<< "enabled multimds with snapshot";
489 fsmap
.modify_filesystem(
491 [](std::shared_ptr
<Filesystem
> fs
)
493 fs
->mds_map
.set_multimds_snaps_allowed();
496 ss
<< "disabled multimds with snapshot";
497 fsmap
.modify_filesystem(
499 [](std::shared_ptr
<Filesystem
> fs
)
501 fs
->mds_map
.clear_multimds_snaps_allowed();
504 } else if (var
== "allow_dirfrags") {
505 ss
<< "Directory fragmentation is now permanently enabled."
506 << " This command is DEPRECATED and will be REMOVED from future releases.";
507 } else if (var
== "down") {
508 bool is_down
= false;
509 int r
= parse_bool(val
, &is_down
, ss
);
514 ss
<< fs
->mds_map
.get_fs_name();
516 fsmap
.modify_filesystem(
518 [is_down
](std::shared_ptr
<Filesystem
> fs
)
521 if (fs
->mds_map
.get_max_mds() > 0) {
522 fs
->mds_map
.set_old_max_mds();
523 fs
->mds_map
.set_max_mds(0);
524 } /* else already down! */
526 mds_rank_t oldmax
= fs
->mds_map
.get_old_max_mds();
527 fs
->mds_map
.set_max_mds(oldmax
? oldmax
: 1);
532 ss
<< " marked down. ";
534 ss
<< " marked up, max_mds = " << fs
->mds_map
.get_max_mds();
536 } else if (var
== "cluster_down" || var
== "joinable") {
537 bool joinable
= true;
538 int r
= parse_bool(val
, &joinable
, ss
);
542 if (var
== "cluster_down") {
543 joinable
= !joinable
;
546 ss
<< fs
->mds_map
.get_fs_name();
548 fsmap
.modify_filesystem(
550 [joinable
](std::shared_ptr
<Filesystem
> fs
)
553 fs
->mds_map
.clear_flag(CEPH_MDSMAP_NOT_JOINABLE
);
555 fs
->mds_map
.set_flag(CEPH_MDSMAP_NOT_JOINABLE
);
560 ss
<< " marked joinable; MDS may join as newly active.";
562 ss
<< " marked not joinable; MDS cannot join as newly active.";
565 if (var
== "cluster_down") {
566 ss
<< " WARNING: cluster_down flag is deprecated and will be"
567 << " removed in a future version. Please use \"joinable\".";
569 } else if (var
== "standby_count_wanted") {
570 if (interr
.length()) {
571 ss
<< var
<< " requires an integer value";
575 ss
<< var
<< " must be non-negative";
578 fsmap
.modify_filesystem(
580 [n
](std::shared_ptr
<Filesystem
> fs
)
582 fs
->mds_map
.set_standby_count_wanted(n
);
584 } else if (var
== "session_timeout") {
585 if (interr
.length()) {
586 ss
<< var
<< " requires an integer value";
590 ss
<< var
<< " must be at least 30s";
593 fsmap
.modify_filesystem(
595 [n
](std::shared_ptr
<Filesystem
> fs
)
597 fs
->mds_map
.set_session_timeout((uint32_t)n
);
599 } else if (var
== "session_autoclose") {
600 if (interr
.length()) {
601 ss
<< var
<< " requires an integer value";
605 ss
<< var
<< " must be at least 30s";
608 fsmap
.modify_filesystem(
610 [n
](std::shared_ptr
<Filesystem
> fs
)
612 fs
->mds_map
.set_session_autoclose((uint32_t)n
);
614 } else if (var
== "allow_standby_replay") {
616 int r
= parse_bool(val
, &allow
, ss
);
622 if (!mon
->osdmon()->is_writeable()) {
623 // not allowed to write yet, so retry when we can
624 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
627 std::vector
<mds_gid_t
> to_fail
;
628 for (const auto& [gid
, info
]: fs
->mds_map
.get_mds_info()) {
629 if (info
.state
== MDSMap::STATE_STANDBY_REPLAY
) {
630 to_fail
.push_back(gid
);
634 for (const auto& gid
: to_fail
) {
635 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
637 if (!to_fail
.empty()) {
638 mon
->osdmon()->propose_pending();
642 auto f
= [allow
](auto& fs
) {
644 fs
->mds_map
.set_standby_replay_allowed();
646 fs
->mds_map
.clear_standby_replay_allowed();
649 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
650 } else if (var
== "min_compat_client") {
651 auto vno
= ceph_release_from_name(val
.c_str());
653 ss
<< "version " << val
<< " is not recognized";
656 ss
<< "WARNING: setting min_compat_client is deprecated"
657 " and may not do what you want.\n"
658 "The oldest release to set is octopus.\n"
659 "Please migrate to `ceph fs required_client_features ...`.";
660 auto f
= [vno
](auto&& fs
) {
661 fs
->mds_map
.set_min_compat_client(vno
);
663 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
665 ss
<< "unknown variable " << var
;
673 class CompatSetHandler
: public FileSystemCommandHandler
677 : FileSystemCommandHandler("fs compat")
685 const cmdmap_t
& cmdmap
,
686 std::ostream
&ss
) override
688 static const std::set
<std::string
> subops
= {"rm_incompat", "rm_compat", "add_incompat", "add_compat"};
691 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
692 ss
<< "Missing filesystem name";
695 auto fs
= fsmap
.get_filesystem(fs_name
);
697 ss
<< "Not found: '" << fs_name
<< "'";
702 if (!cmd_getval(cmdmap
, "subop", subop
) || subops
.count(subop
) == 0) {
703 ss
<< "subop `" << subop
<< "' not recognized. Must be one of: " << subops
;
708 if (!cmd_getval(cmdmap
, "feature", feature
) || feature
<= 0) {
709 ss
<< "Invalid feature";
713 if (fs
->mds_map
.get_num_up_mds() > 0) {
714 ss
<< "file system must be failed or down; use `ceph fs fail` to bring down";
718 CompatSet cs
= fs
->mds_map
.compat
;
719 if (subop
== "rm_compat") {
720 if (cs
.compat
.contains(feature
)) {
721 ss
<< "removed compat feature " << feature
;
722 cs
.compat
.remove(feature
);
724 ss
<< "already removed compat feature " << feature
;
726 } else if (subop
== "rm_incompat") {
727 if (cs
.incompat
.contains(feature
)) {
728 ss
<< "removed incompat feature " << feature
;
729 cs
.incompat
.remove(feature
);
731 ss
<< "already removed incompat feature " << feature
;
733 } else if (subop
== "add_compat" || subop
== "add_incompat") {
735 if (!cmd_getval(cmdmap
, "feature_str", feature_str
) || feature_str
.empty()) {
736 ss
<< "adding a feature requires a feature string";
739 auto f
= CompatSet::Feature(feature
, feature_str
);
740 if (subop
== "add_compat") {
741 if (cs
.compat
.contains(feature
)) {
742 auto name
= cs
.compat
.get_name(feature
);
743 if (name
== feature_str
) {
744 ss
<< "feature already exists";
746 ss
<< "feature with differing name `" << name
<< "' exists";
751 ss
<< "added compat feature " << f
;
753 } else if (subop
== "add_incompat") {
754 if (cs
.incompat
.contains(feature
)) {
755 auto name
= cs
.incompat
.get_name(feature
);
756 if (name
== feature_str
) {
757 ss
<< "feature already exists";
759 ss
<< "feature with differing name `" << name
<< "' exists";
763 cs
.incompat
.insert(f
);
764 ss
<< "added incompat feature " << f
;
766 } else ceph_assert(0);
767 } else ceph_assert(0);
769 auto modifyf
= [cs
= std::move(cs
)](auto&& fs
) {
770 fs
->mds_map
.compat
= cs
;
773 fsmap
.modify_filesystem(fs
->fscid
, std::move(modifyf
));
778 class RequiredClientFeaturesHandler
: public FileSystemCommandHandler
781 RequiredClientFeaturesHandler()
782 : FileSystemCommandHandler("fs required_client_features")
790 const cmdmap_t
& cmdmap
,
791 std::ostream
&ss
) override
794 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
795 ss
<< "Missing filesystem name";
798 auto fs
= fsmap
.get_filesystem(fs_name
);
800 ss
<< "Not found: '" << fs_name
<< "'";
804 if (!cmd_getval(cmdmap
, "subop", subop
) ||
805 (subop
!= "add" && subop
!= "rm")) {
806 ss
<< "Must either add or rm a feature; " << subop
<< " is not recognized";
810 if (!cmd_getval(cmdmap
, "val", val
) || val
.empty()) {
811 ss
<< "Missing feature id/name";
815 int feature
= cephfs_feature_from_name(val
);
818 feature
= strict_strtol(val
.c_str(), 10, &err
);
820 ss
<< "Invalid feature name: " << val
;
823 if (feature
< 0 || feature
> CEPHFS_FEATURE_MAX
) {
824 ss
<< "Invalid feature id: " << feature
;
829 if (subop
== "add") {
831 fsmap
.modify_filesystem(
833 [feature
, &ret
](auto&& fs
)
835 if (fs
->mds_map
.get_required_client_features().test(feature
))
837 fs
->mds_map
.add_required_client_feature(feature
);
841 ss
<< "added feature '" << cephfs_feature_name(feature
) << "' to required_client_features";
843 ss
<< "feature '" << cephfs_feature_name(feature
) << "' is already set";
847 fsmap
.modify_filesystem(
849 [feature
, &ret
](auto&& fs
)
851 if (!fs
->mds_map
.get_required_client_features().test(feature
))
853 fs
->mds_map
.remove_required_client_feature(feature
);
857 ss
<< "removed feature '" << cephfs_feature_name(feature
) << "' from required_client_features";
859 ss
<< "feature '" << cephfs_feature_name(feature
) << "' is already unset";
867 class AddDataPoolHandler
: public FileSystemCommandHandler
870 explicit AddDataPoolHandler(Paxos
*paxos
)
871 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos
)
874 bool batched_propose() override
{
882 const cmdmap_t
& cmdmap
,
883 std::ostream
&ss
) override
885 ceph_assert(m_paxos
->is_plugged());
888 cmd_getval(cmdmap
, "pool", poolname
);
891 if (!cmd_getval(cmdmap
, "fs_name", fs_name
)
892 || fs_name
.empty()) {
893 ss
<< "Missing filesystem name";
897 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
900 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
902 ss
<< "pool '" << poolname
<< "' does not exist";
907 int r
= _check_pool(mon
->osdmon()->osdmap
, poolid
, POOL_DATA_EXTRA
, false, &ss
);
912 auto fs
= fsmap
.get_filesystem(fs_name
);
913 // no-op when the data_pool already on fs
914 if (fs
->mds_map
.is_data_pool(poolid
)) {
915 ss
<< "data pool " << poolid
<< " is already on fs " << fs_name
;
919 if (!mon
->osdmon()->is_writeable()) {
920 // not allowed to write yet, so retry when we can
921 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
924 mon
->osdmon()->do_application_enable(poolid
,
925 pg_pool_t::APPLICATION_NAME_CEPHFS
,
926 "data", fs_name
, true);
927 mon
->osdmon()->propose_pending();
929 fsmap
.modify_filesystem(
931 [poolid
](std::shared_ptr
<Filesystem
> fs
)
933 fs
->mds_map
.add_data_pool(poolid
);
936 ss
<< "added data pool " << poolid
<< " to fsmap";
945 class SetDefaultHandler
: public FileSystemCommandHandler
949 : FileSystemCommandHandler("fs set-default")
956 const cmdmap_t
& cmdmap
,
957 std::ostream
&ss
) override
960 cmd_getval(cmdmap
, "fs_name", fs_name
);
961 auto fs
= fsmap
.get_filesystem(fs_name
);
963 ss
<< "filesystem '" << fs_name
<< "' does not exist";
967 fsmap
.set_legacy_client_fscid(fs
->fscid
);
972 class RemoveFilesystemHandler
: public FileSystemCommandHandler
975 RemoveFilesystemHandler()
976 : FileSystemCommandHandler("fs rm")
983 const cmdmap_t
& cmdmap
,
984 std::ostream
&ss
) override
986 /* We may need to blocklist ranks. */
987 if (!mon
->osdmon()->is_writeable()) {
988 // not allowed to write yet, so retry when we can
989 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
993 // Check caller has correctly named the FS to delete
994 // (redundant while there is only one FS, but command
995 // syntax should apply to multi-FS future)
997 cmd_getval(cmdmap
, "fs_name", fs_name
);
998 auto fs
= fsmap
.get_filesystem(fs_name
);
1000 // Consider absence success to make deletes idempotent
1001 ss
<< "filesystem '" << fs_name
<< "' does not exist";
1005 // Check that no MDS daemons are active
1006 if (fs
->mds_map
.get_num_up_mds() > 0) {
1007 ss
<< "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
1011 // Check for confirmation flag
1013 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
1015 ss
<< "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
1016 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
1020 if (fsmap
.get_legacy_client_fscid() == fs
->fscid
) {
1021 fsmap
.set_legacy_client_fscid(FS_CLUSTER_ID_NONE
);
1024 std::vector
<mds_gid_t
> to_fail
;
1025 // There may be standby_replay daemons left here
1026 for (const auto &i
: fs
->mds_map
.get_mds_info()) {
1027 ceph_assert(i
.second
.state
== MDSMap::STATE_STANDBY_REPLAY
);
1028 to_fail
.push_back(i
.first
);
1031 for (const auto &gid
: to_fail
) {
1032 // Standby replays don't write, so it isn't important to
1033 // wait for an osdmap propose here: ignore return value.
1034 mon
->mdsmon()->fail_mds_gid(fsmap
, gid
);
1036 if (!to_fail
.empty()) {
1037 mon
->osdmon()->propose_pending(); /* maybe new blocklists */
1040 fsmap
.erase_filesystem(fs
->fscid
);
1046 class ResetFilesystemHandler
: public FileSystemCommandHandler
1049 ResetFilesystemHandler()
1050 : FileSystemCommandHandler("fs reset")
1057 const cmdmap_t
& cmdmap
,
1058 std::ostream
&ss
) override
1061 cmd_getval(cmdmap
, "fs_name", fs_name
);
1062 auto fs
= fsmap
.get_filesystem(fs_name
);
1063 if (fs
== nullptr) {
1064 ss
<< "filesystem '" << fs_name
<< "' does not exist";
1065 // Unlike fs rm, we consider this case an error
1069 // Check that no MDS daemons are active
1070 if (fs
->mds_map
.get_num_up_mds() > 0) {
1071 ss
<< "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
1072 " and use `ceph mds fail` to make this so";
1076 // Check for confirmation flag
1078 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
1080 ss
<< "this is a potentially destructive operation, only for use by experts in disaster recovery. "
1081 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
1085 fsmap
.reset_filesystem(fs
->fscid
);
1091 class RenameFilesystemHandler
: public FileSystemCommandHandler
1094 explicit RenameFilesystemHandler(Paxos
*paxos
)
1095 : FileSystemCommandHandler("fs rename"), m_paxos(paxos
)
1099 bool batched_propose() override
{
1107 const cmdmap_t
& cmdmap
,
1108 std::ostream
&ss
) override
1110 ceph_assert(m_paxos
->is_plugged());
1113 cmd_getval(cmdmap
, "fs_name", fs_name
);
1114 auto fs
= fsmap
.get_filesystem(fs_name
);
1117 cmd_getval(cmdmap
, "new_fs_name", new_fs_name
);
1118 auto new_fs
= fsmap
.get_filesystem(new_fs_name
);
1120 if (fs
== nullptr) {
1122 // make 'fs rename' idempotent
1123 ss
<< "File system may already have been renamed. Desired file system '"
1124 << new_fs_name
<< "' exists.";
1127 ss
<< "File system '" << fs_name
<< "' does not exist";
1133 ss
<< "Desired file system name '" << new_fs_name
<< "' already in use";
1137 if (fs
->mirror_info
.mirrored
) {
1138 ss
<< "Mirroring is enabled on file system '"<< fs_name
<< "'. Disable mirroring on the "
1139 "file system after ensuring it's OK to do so, and then retry to rename.";
1143 // Check for confirmation flag
1145 cmd_getval(cmdmap
, "yes_i_really_mean_it", sure
);
1147 ss
<< "this is a potentially disruptive operation, clients' cephx credentials need reauthorized "
1148 "to access the file system and its pools with the new name. "
1149 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
1153 if (!mon
->osdmon()->is_writeable()) {
1154 // not allowed to write yet, so retry when we can
1155 mon
->osdmon()->wait_for_writeable(op
, new PaxosService::C_RetryMessage(mon
->mdsmon(), op
));
1158 for (const auto p
: fs
->mds_map
.get_data_pools()) {
1159 mon
->osdmon()->do_application_enable(p
,
1160 pg_pool_t::APPLICATION_NAME_CEPHFS
,
1161 "data", new_fs_name
, true);
1164 mon
->osdmon()->do_application_enable(fs
->mds_map
.get_metadata_pool(),
1165 pg_pool_t::APPLICATION_NAME_CEPHFS
,
1166 "metadata", new_fs_name
, true);
1167 mon
->osdmon()->propose_pending();
1169 auto f
= [new_fs_name
](auto fs
) {
1170 fs
->mds_map
.set_fs_name(new_fs_name
);
1172 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1174 ss
<< "File system is renamed. cephx credentials authorized to "
1175 "old file system name need to be reauthorized to new file "
1185 class RemoveDataPoolHandler
: public FileSystemCommandHandler
1188 RemoveDataPoolHandler()
1189 : FileSystemCommandHandler("fs rm_data_pool")
1196 const cmdmap_t
& cmdmap
,
1197 std::ostream
&ss
) override
1200 cmd_getval(cmdmap
, "pool", poolname
);
1202 std::string fs_name
;
1203 if (!cmd_getval(cmdmap
, "fs_name", fs_name
)
1204 || fs_name
.empty()) {
1205 ss
<< "Missing filesystem name";
1209 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
1212 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
1214 ss
<< "pool '" << poolname
<< "' does not exist";
1216 } else if (poolid
< 0) {
1217 ss
<< "invalid pool id '" << poolid
<< "'";
1222 ceph_assert(poolid
>= 0); // Checked by parsing code above
1224 auto fs
= fsmap
.get_filesystem(fs_name
);
1225 if (fs
->mds_map
.get_first_data_pool() == poolid
) {
1226 ss
<< "cannot remove default data pool";
1231 fsmap
.modify_filesystem(fs
->fscid
,
1232 [&r
, poolid
](std::shared_ptr
<Filesystem
> fs
)
1234 r
= fs
->mds_map
.remove_data_pool(poolid
);
1237 // It was already removed, succeed in silence
1239 } else if (r
== 0) {
1240 // We removed it, succeed
1241 ss
<< "removed data pool " << poolid
<< " from fsmap";
1244 // Unexpected error, bubble up
1251 * For commands with an alternative prefix
1253 template<typename T
>
1254 class AliasHandler
: public T
1256 std::string alias_prefix
;
1259 explicit AliasHandler(const std::string
&new_prefix
)
1262 alias_prefix
= new_prefix
;
1265 std::string
const &get_prefix() const override
{return alias_prefix
;}
1271 const cmdmap_t
& cmdmap
,
1272 std::ostream
&ss
) override
1274 return T::handle(mon
, fsmap
, op
, cmdmap
, ss
);
1278 class MirrorHandlerEnable
: public FileSystemCommandHandler
1281 MirrorHandlerEnable()
1282 : FileSystemCommandHandler("fs mirror enable")
1285 int handle(Monitor
*mon
,
1286 FSMap
&fsmap
, MonOpRequestRef op
,
1287 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1288 std::string fs_name
;
1289 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1290 ss
<< "Missing filesystem name";
1294 auto fs
= fsmap
.get_filesystem(fs_name
);
1295 if (fs
== nullptr) {
1296 ss
<< "Filesystem '" << fs_name
<< "' not found";
1300 if (fs
->mirror_info
.is_mirrored()) {
1304 auto f
= [](auto &&fs
) {
1305 fs
->mirror_info
.enable_mirroring();
1307 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1313 class MirrorHandlerDisable
: public FileSystemCommandHandler
1316 MirrorHandlerDisable()
1317 : FileSystemCommandHandler("fs mirror disable")
1320 int handle(Monitor
*mon
,
1321 FSMap
&fsmap
, MonOpRequestRef op
,
1322 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1323 std::string fs_name
;
1324 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1325 ss
<< "Missing filesystem name";
1329 auto fs
= fsmap
.get_filesystem(fs_name
);
1330 if (fs
== nullptr) {
1331 ss
<< "Filesystem '" << fs_name
<< "' not found";
1335 if (!fs
->mirror_info
.is_mirrored()) {
1339 auto f
= [](auto &&fs
) {
1340 fs
->mirror_info
.disable_mirroring();
1342 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1348 class MirrorHandlerAddPeer
: public FileSystemCommandHandler
1351 MirrorHandlerAddPeer()
1352 : FileSystemCommandHandler("fs mirror peer_add")
1355 boost::optional
<std::pair
<string
, string
>>
1356 extract_remote_cluster_conf(const std::string
&spec
) {
1357 auto pos
= spec
.find("@");
1358 if (pos
== std::string_view::npos
) {
1359 return boost::optional
<std::pair
<string
, string
>>();
1362 auto client
= spec
.substr(0, pos
);
1363 auto cluster
= spec
.substr(pos
+1);
1365 return std::make_pair(client
, cluster
);
1368 bool peer_add(FSMap
&fsmap
, Filesystem::const_ref
&&fs
,
1369 const cmdmap_t
&cmdmap
, std::ostream
&ss
) {
1372 string remote_fs_name
;
1373 cmd_getval(cmdmap
, "uuid", peer_uuid
);
1374 cmd_getval(cmdmap
, "remote_cluster_spec", remote_spec
);
1375 cmd_getval(cmdmap
, "remote_fs_name", remote_fs_name
);
1377 // verify (and extract) remote cluster specification
1378 auto remote_conf
= extract_remote_cluster_conf(remote_spec
);
1380 ss
<< "invalid remote cluster spec -- should be <client>@<cluster>";
1384 if (fs
->mirror_info
.has_peer(peer_uuid
)) {
1385 ss
<< "peer already exists";
1388 if (fs
->mirror_info
.has_peer((*remote_conf
).first
, (*remote_conf
).second
,
1390 ss
<< "peer already exists";
1394 auto f
= [peer_uuid
, remote_conf
, remote_fs_name
](auto &&fs
) {
1395 fs
->mirror_info
.peer_add(peer_uuid
, (*remote_conf
).first
,
1396 (*remote_conf
).second
, remote_fs_name
);
1398 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1402 int handle(Monitor
*mon
,
1403 FSMap
&fsmap
, MonOpRequestRef op
,
1404 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1405 std::string fs_name
;
1406 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1407 ss
<< "Missing filesystem name";
1411 auto fs
= fsmap
.get_filesystem(fs_name
);
1412 if (fs
== nullptr) {
1413 ss
<< "Filesystem '" << fs_name
<< "' not found";
1417 if (!fs
->mirror_info
.is_mirrored()) {
1418 ss
<< "Mirroring not enabled for filesystem '" << fs_name
<< "'";
1422 auto res
= peer_add(fsmap
, std::move(fs
), cmdmap
, ss
);
1431 class MirrorHandlerRemovePeer
: public FileSystemCommandHandler
1434 MirrorHandlerRemovePeer()
1435 : FileSystemCommandHandler("fs mirror peer_remove")
1438 bool peer_remove(FSMap
&fsmap
, Filesystem::const_ref
&&fs
,
1439 const cmdmap_t
&cmdmap
, std::ostream
&ss
) {
1441 cmd_getval(cmdmap
, "uuid", peer_uuid
);
1443 if (!fs
->mirror_info
.has_peer(peer_uuid
)) {
1444 ss
<< "cannot find peer with uuid: " << peer_uuid
;
1448 auto f
= [peer_uuid
](auto &&fs
) {
1449 fs
->mirror_info
.peer_remove(peer_uuid
);
1451 fsmap
.modify_filesystem(fs
->fscid
, std::move(f
));
1455 int handle(Monitor
*mon
,
1456 FSMap
&fsmap
, MonOpRequestRef op
,
1457 const cmdmap_t
& cmdmap
, std::ostream
&ss
) override
{
1458 std::string fs_name
;
1459 if (!cmd_getval(cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
1460 ss
<< "Missing filesystem name";
1464 auto fs
= fsmap
.get_filesystem(fs_name
);
1465 if (fs
== nullptr) {
1466 ss
<< "Filesystem '" << fs_name
<< "' not found";
1470 if (!fs
->mirror_info
.is_mirrored()) {
1471 ss
<< "Mirroring not enabled for filesystem '" << fs_name
<< "'";
1475 auto res
= peer_remove(fsmap
, std::move(fs
), cmdmap
, ss
);
1484 std::list
<std::shared_ptr
<FileSystemCommandHandler
> >
1485 FileSystemCommandHandler::load(Paxos
*paxos
)
1487 std::list
<std::shared_ptr
<FileSystemCommandHandler
> > handlers
;
1489 handlers
.push_back(std::make_shared
<SetHandler
>());
1490 handlers
.push_back(std::make_shared
<FailHandler
>());
1491 handlers
.push_back(std::make_shared
<FlagSetHandler
>());
1492 handlers
.push_back(std::make_shared
<CompatSetHandler
>());
1493 handlers
.push_back(std::make_shared
<RequiredClientFeaturesHandler
>());
1494 handlers
.push_back(std::make_shared
<AddDataPoolHandler
>(paxos
));
1495 handlers
.push_back(std::make_shared
<RemoveDataPoolHandler
>());
1496 handlers
.push_back(std::make_shared
<FsNewHandler
>(paxos
));
1497 handlers
.push_back(std::make_shared
<RemoveFilesystemHandler
>());
1498 handlers
.push_back(std::make_shared
<ResetFilesystemHandler
>());
1499 handlers
.push_back(std::make_shared
<RenameFilesystemHandler
>(paxos
));
1501 handlers
.push_back(std::make_shared
<SetDefaultHandler
>());
1502 handlers
.push_back(std::make_shared
<AliasHandler
<SetDefaultHandler
> >(
1504 handlers
.push_back(std::make_shared
<MirrorHandlerEnable
>());
1505 handlers
.push_back(std::make_shared
<MirrorHandlerDisable
>());
1506 handlers
.push_back(std::make_shared
<MirrorHandlerAddPeer
>());
1507 handlers
.push_back(std::make_shared
<MirrorHandlerRemovePeer
>());
1512 int FileSystemCommandHandler::_check_pool(
1514 const int64_t pool_id
,
1517 std::ostream
*ss
) const
1519 ceph_assert(ss
!= NULL
);
1521 const pg_pool_t
*pool
= osd_map
.get_pg_pool(pool_id
);
1523 *ss
<< "pool id '" << pool_id
<< "' does not exist";
1527 const string
& pool_name
= osd_map
.get_pool_name(pool_id
);
1529 if (pool
->is_erasure()) {
1530 if (type
== POOL_METADATA
) {
1531 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1532 << " is an erasure-coded pool. Use of erasure-coded pools"
1533 << " for CephFS metadata is not permitted";
1535 } else if (type
== POOL_DATA_DEFAULT
&& !force
) {
1536 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1537 " is an erasure-coded pool."
1538 " Use of an EC pool for the default data pool is discouraged;"
1539 " see the online CephFS documentation for more information."
1540 " Use --force to override.";
1542 } else if (!pool
->allows_ecoverwrites()) {
1543 // non-overwriteable EC pools are only acceptable with a cache tier overlay
1544 if (!pool
->has_tiers() || !pool
->has_read_tier() || !pool
->has_write_tier()) {
1545 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
1546 << " is an erasure-coded pool, with no overwrite support";
1550 // That cache tier overlay must be writeback, not readonly (it's the
1551 // write operations like modify+truncate we care about support for)
1552 const pg_pool_t
*write_tier
= osd_map
.get_pg_pool(
1554 ceph_assert(write_tier
!= NULL
); // OSDMonitor shouldn't allow DNE tier
1555 if (write_tier
->cache_mode
== pg_pool_t::CACHEMODE_FORWARD
1556 || write_tier
->cache_mode
== pg_pool_t::CACHEMODE_READONLY
) {
1557 *ss
<< "EC pool '" << pool_name
<< "' has a write tier ("
1558 << osd_map
.get_pool_name(pool
->write_tier
)
1559 << ") that is configured "
1560 "to forward writes. Use a cache mode such as 'writeback' for "
1567 if (pool
->is_tier()) {
1568 *ss
<< " pool '" << pool_name
<< "' (id '" << pool_id
1569 << "') is already in use as a cache tier.";
1573 if (!force
&& !pool
->application_metadata
.empty() &&
1574 pool
->application_metadata
.count(
1575 pg_pool_t::APPLICATION_NAME_CEPHFS
) == 0) {
1576 *ss
<< " pool '" << pool_name
<< "' (id '" << pool_id
1577 << "') has a non-CephFS application enabled.";
1581 // Nothing special about this pool, so it is permissible
1585 int FileSystemCommandHandler::is_op_allowed(
1586 const MonOpRequestRef
& op
, const FSMap
& fsmap
, const cmdmap_t
& cmdmap
,
1587 std::ostream
&ss
) const
1590 cmd_getval(cmdmap
, "fs_name", fs_name
);
1592 // so that fsmap can filtered and the original copy is untouched.
1593 FSMap fsmap_copy
= fsmap
;
1594 fsmap_copy
.filter(op
->get_session()->get_allowed_fs_names());
1596 auto fs
= fsmap_copy
.get_filesystem(fs_name
);
1597 if (fs
== nullptr) {
1598 auto prefix
= get_prefix();
1599 /* let "fs rm" and "fs rename" handle idempotent cases where file systems do not exist */
1600 if (!(prefix
== "fs rm" || prefix
== "fs rename") && fsmap
.get_filesystem(fs_name
) == nullptr) {
1601 ss
<< "Filesystem not found: '" << fs_name
<< "'";
1606 if (!op
->get_session()->fs_name_capable(fs_name
, MON_CAP_W
)) {
1607 ss
<< "Permission denied: '" << fs_name
<< "'";