1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2017 Red Hat Ltd
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 #include "OSDMonitor.h"
17 #include "PGMonitor.h"
19 #include "FSCommands.h"
20 #include "MDSMonitor.h"
24 static const string
EXPERIMENTAL_WARNING("Warning! This feature is experimental."
25 "It may cause problems up to and including data loss."
26 "Consult the documentation at ceph.com, and if unsure, do not proceed."
27 "Add --yes-i-really-mean-it if you are certain.");
31 class FlagSetHandler
: public FileSystemCommandHandler
35 : FileSystemCommandHandler("fs flag set")
43 map
<string
, cmd_vartype
> &cmdmap
,
44 std::stringstream
&ss
) override
47 cmd_getval(g_ceph_context
, cmdmap
, "flag_name", flag_name
);
50 cmd_getval(g_ceph_context
, cmdmap
, "val", flag_val
);
53 cmd_getval(g_ceph_context
, cmdmap
, "confirm", confirm
);
55 if (flag_name
== "enable_multiple") {
56 bool flag_bool
= false;
57 int r
= parse_bool(flag_val
, &flag_bool
, ss
);
59 ss
<< "Invalid boolean value '" << flag_val
<< "'";
63 bool jewel
= mon
->get_quorum_con_features() & CEPH_FEATURE_SERVER_JEWEL
;
64 if (flag_bool
&& !jewel
) {
65 ss
<< "Multiple-filesystems are forbidden until all mons are updated";
68 if (confirm
!= "--yes-i-really-mean-it") {
69 ss
<< EXPERIMENTAL_WARNING
;
71 fsmap
.set_enable_multiple(flag_bool
);
74 ss
<< "Unknown flag '" << flag_name
<< "'";
80 class FsNewHandler
: public FileSystemCommandHandler
84 : FileSystemCommandHandler("fs new")
92 map
<string
, cmd_vartype
> &cmdmap
,
93 std::stringstream
&ss
) override
96 cmd_getval(g_ceph_context
, cmdmap
, "metadata", metadata_name
);
97 int64_t metadata
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(metadata_name
);
99 ss
<< "pool '" << metadata_name
<< "' does not exist";
104 cmd_getval(g_ceph_context
,cmdmap
, "force", force
);
105 const pool_stat_t
*stat
= mon
->pgservice
->get_pool_stat(metadata
);
107 int64_t metadata_num_objects
= stat
->stats
.sum
.num_objects
;
108 if (force
!= "--force" && metadata_num_objects
> 0) {
109 ss
<< "pool '" << metadata_name
110 << "' already contains some objects. Use an empty pool instead.";
116 cmd_getval(g_ceph_context
, cmdmap
, "data", data_name
);
117 int64_t data
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(data_name
);
119 ss
<< "pool '" << data_name
<< "' does not exist";
123 ss
<< "pool '" << data_name
<< "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
128 cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
);
129 if (fs_name
.empty()) {
130 // Ensure fs name is not empty so that we can implement
131 // commmands that refer to FS by name in future.
132 ss
<< "Filesystem name may not be empty";
136 if (fsmap
.get_filesystem(fs_name
)) {
137 auto fs
= fsmap
.get_filesystem(fs_name
);
138 if (*(fs
->mds_map
.get_data_pools().begin()) == data
139 && fs
->mds_map
.get_metadata_pool() == metadata
) {
140 // Identical FS created already, this is a no-op
141 ss
<< "filesystem '" << fs_name
<< "' already exists";
144 ss
<< "filesystem already exists with name '" << fs_name
<< "'";
149 if (fsmap
.filesystem_count() > 0
150 && !fsmap
.get_enable_multiple()) {
151 ss
<< "Creation of multiple filesystems is disabled. To enable "
152 "this experimental feature, use 'ceph fs flag set enable_multiple "
157 for (auto fs
: fsmap
.get_filesystems()) {
158 const std::vector
<int64_t> &data_pools
= fs
->mds_map
.get_data_pools();
160 if ((std::find(data_pools
.begin(), data_pools
.end(), data
) != data_pools
.end()
161 || fs
->mds_map
.get_metadata_pool() == metadata
)
162 && ((!cmd_getval(g_ceph_context
, cmdmap
, "sure", sure
)
163 || sure
!= "--allow-dangerous-metadata-overlay"))) {
164 ss
<< "Filesystem '" << fs_name
165 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
170 pg_pool_t
const *data_pool
= mon
->osdmon()->osdmap
.get_pg_pool(data
);
171 assert(data_pool
!= NULL
); // Checked it existed above
172 pg_pool_t
const *metadata_pool
= mon
->osdmon()->osdmap
.get_pg_pool(metadata
);
173 assert(metadata_pool
!= NULL
); // Checked it existed above
175 // we must make these checks before we even allow ourselves to *think*
176 // about requesting a proposal to the osdmonitor and bail out now if
177 // we believe we must. bailing out *after* we request the proposal is
178 // bad business as we could have changed the osdmon's state and ending up
179 // returning an error to the user.
180 int r
= _check_pool(mon
->osdmon()->osdmap
, data
, false, &ss
);
185 r
= _check_pool(mon
->osdmon()->osdmap
, metadata
, true, &ss
);
190 // All checks passed, go ahead and create.
191 fsmap
.create_filesystem(fs_name
, metadata
, data
,
192 mon
->get_quorum_con_features());
193 ss
<< "new fs with metadata pool " << metadata
<< " and data pool " << data
;
198 class SetHandler
: public FileSystemCommandHandler
202 : FileSystemCommandHandler("fs set")
209 map
<string
, cmd_vartype
> &cmdmap
,
210 std::stringstream
&ss
) override
213 if (!cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
) || fs_name
.empty()) {
214 ss
<< "Missing filesystem name";
218 auto fs
= fsmap
.get_filesystem(fs_name
);
220 ss
<< "Not found: '" << fs_name
<< "'";
225 if (!cmd_getval(g_ceph_context
, cmdmap
, "var", var
) || var
.empty()) {
226 ss
<< "Invalid variable";
232 if (!cmd_getval(g_ceph_context
, cmdmap
, "val", val
)) {
235 // we got a string. see if it contains an int.
236 n
= strict_strtoll(val
.c_str(), 10, &interr
);
237 if (var
== "max_mds") {
238 // NOTE: see also "mds set_max_mds", which can modify the same field.
239 if (interr
.length()) {
245 ss
<< "You must specify at least one MDS";
249 if (!fs
->mds_map
.allows_multimds() && n
> fs
->mds_map
.get_max_mds() &&
251 ss
<< "multi-MDS clusters are not enabled; set 'allow_multimds' to enable";
255 ss
<< "may not have more than " << MAX_MDS
<< " MDS ranks";
258 fsmap
.modify_filesystem(
260 [n
](std::shared_ptr
<Filesystem
> fs
)
262 fs
->mds_map
.set_max_mds(n
);
264 } else if (var
== "inline_data") {
265 bool enable_inline
= false;
266 int r
= parse_bool(val
, &enable_inline
, ss
);
273 if (!cmd_getval(g_ceph_context
, cmdmap
, "confirm", confirm
) ||
274 confirm
!= "--yes-i-really-mean-it") {
275 ss
<< EXPERIMENTAL_WARNING
;
278 ss
<< "inline data enabled";
280 fsmap
.modify_filesystem(
282 [](std::shared_ptr
<Filesystem
> fs
)
284 fs
->mds_map
.set_inline_data_enabled(true);
288 CompatSet c
= fsmap
.get_compat();
289 c
.incompat
.insert(MDS_FEATURE_INCOMPAT_INLINE
);
290 fsmap
.update_compat(c
);
292 ss
<< "inline data disabled";
293 fsmap
.modify_filesystem(
295 [](std::shared_ptr
<Filesystem
> fs
)
297 fs
->mds_map
.set_inline_data_enabled(false);
300 } else if (var
== "balancer") {
302 ss
<< "unsetting the metadata load balancer";
304 ss
<< "setting the metadata load balancer to " << val
;
306 fsmap
.modify_filesystem(
308 [val
](std::shared_ptr
<Filesystem
> fs
)
310 fs
->mds_map
.set_balancer(val
);
313 } else if (var
== "max_file_size") {
314 if (interr
.length()) {
315 ss
<< var
<< " requires an integer value";
318 if (n
< CEPH_MIN_STRIPE_UNIT
) {
319 ss
<< var
<< " must at least " << CEPH_MIN_STRIPE_UNIT
;
322 fsmap
.modify_filesystem(
324 [n
](std::shared_ptr
<Filesystem
> fs
)
326 fs
->mds_map
.set_max_filesize(n
);
328 } else if (var
== "allow_new_snaps") {
329 bool enable_snaps
= false;
330 int r
= parse_bool(val
, &enable_snaps
, ss
);
336 fsmap
.modify_filesystem(
338 [](std::shared_ptr
<Filesystem
> fs
)
340 fs
->mds_map
.clear_snaps_allowed();
342 ss
<< "disabled new snapshots";
345 if (!cmd_getval(g_ceph_context
, cmdmap
, "confirm", confirm
) ||
346 confirm
!= "--yes-i-really-mean-it") {
347 ss
<< EXPERIMENTAL_WARNING
;
350 fsmap
.modify_filesystem(
352 [](std::shared_ptr
<Filesystem
> fs
)
354 fs
->mds_map
.set_snaps_allowed();
356 ss
<< "enabled new snapshots";
358 } else if (var
== "allow_multimds") {
359 bool enable_multimds
= false;
360 int r
= parse_bool(val
, &enable_multimds
, ss
);
365 if (!enable_multimds
) {
366 fsmap
.modify_filesystem(fs
->fscid
,
367 [](std::shared_ptr
<Filesystem
> fs
)
369 fs
->mds_map
.clear_multimds_allowed();
371 ss
<< "disallowed increasing the cluster size past 1";
373 fsmap
.modify_filesystem(
375 [](std::shared_ptr
<Filesystem
> fs
)
377 fs
->mds_map
.set_multimds_allowed();
379 ss
<< "enabled creation of more than 1 active MDS";
381 } else if (var
== "allow_dirfrags") {
382 bool enable_dirfrags
= false;
383 int r
= parse_bool(val
, &enable_dirfrags
, ss
);
388 if (!enable_dirfrags
) {
389 fsmap
.modify_filesystem(fs
->fscid
,
390 [](std::shared_ptr
<Filesystem
> fs
)
392 fs
->mds_map
.clear_dirfrags_allowed();
394 ss
<< "disallowed new directory fragmentation";
396 fsmap
.modify_filesystem(
398 [](std::shared_ptr
<Filesystem
> fs
)
400 fs
->mds_map
.set_dirfrags_allowed();
402 ss
<< "enabled directory fragmentation";
404 } else if (var
== "cluster_down") {
405 bool is_down
= false;
406 int r
= parse_bool(val
, &is_down
, ss
);
411 fsmap
.modify_filesystem(
413 [is_down
](std::shared_ptr
<Filesystem
> fs
)
416 fs
->mds_map
.set_flag(CEPH_MDSMAP_DOWN
);
418 fs
->mds_map
.clear_flag(CEPH_MDSMAP_DOWN
);
422 ss
<< "marked " << (is_down
? "down" : "up");
423 } else if (var
== "standby_count_wanted") {
424 if (interr
.length()) {
425 ss
<< var
<< " requires an integer value";
429 ss
<< var
<< " must be non-negative";
432 fsmap
.modify_filesystem(
434 [n
](std::shared_ptr
<Filesystem
> fs
)
436 fs
->mds_map
.set_standby_count_wanted(n
);
439 ss
<< "unknown variable " << var
;
447 class AddDataPoolHandler
: public FileSystemCommandHandler
451 : FileSystemCommandHandler("fs add_data_pool")
458 map
<string
, cmd_vartype
> &cmdmap
,
459 std::stringstream
&ss
) override
462 cmd_getval(g_ceph_context
, cmdmap
, "pool", poolname
);
465 if (!cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
)
466 || fs_name
.empty()) {
467 ss
<< "Missing filesystem name";
471 auto fs
= fsmap
.get_filesystem(fs_name
);
473 ss
<< "Not found: '" << fs_name
<< "'";
477 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
480 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
482 ss
<< "pool '" << poolname
<< "' does not exist";
487 int r
= _check_pool(mon
->osdmon()->osdmap
, poolid
, false, &ss
);
492 // no-op when the data_pool already on fs
493 if (fs
->mds_map
.is_data_pool(poolid
)) {
494 ss
<< "data pool " << poolid
<< " is already on fs " << fs_name
;
498 fsmap
.modify_filesystem(
500 [poolid
](std::shared_ptr
<Filesystem
> fs
)
502 fs
->mds_map
.add_data_pool(poolid
);
505 ss
<< "added data pool " << poolid
<< " to fsmap";
511 class SetDefaultHandler
: public FileSystemCommandHandler
515 : FileSystemCommandHandler("fs set-default")
522 map
<string
, cmd_vartype
> &cmdmap
,
523 std::stringstream
&ss
) override
526 cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
);
527 auto fs
= fsmap
.get_filesystem(fs_name
);
529 ss
<< "filesystem '" << fs_name
<< "' does not exist";
533 fsmap
.set_legacy_client_fscid(fs
->fscid
);
538 class RemoveFilesystemHandler
: public FileSystemCommandHandler
541 RemoveFilesystemHandler()
542 : FileSystemCommandHandler("fs rm")
549 map
<string
, cmd_vartype
> &cmdmap
,
550 std::stringstream
&ss
) override
552 // Check caller has correctly named the FS to delete
553 // (redundant while there is only one FS, but command
554 // syntax should apply to multi-FS future)
556 cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
);
557 auto fs
= fsmap
.get_filesystem(fs_name
);
559 // Consider absence success to make deletes idempotent
560 ss
<< "filesystem '" << fs_name
<< "' does not exist";
564 // Check that no MDS daemons are active
565 if (fs
->mds_map
.get_num_up_mds() > 0) {
566 ss
<< "all MDS daemons must be inactive before removing filesystem";
570 // Check for confirmation flag
572 cmd_getval(g_ceph_context
, cmdmap
, "sure", sure
);
573 if (sure
!= "--yes-i-really-mean-it") {
574 ss
<< "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
575 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
579 if (fsmap
.get_legacy_client_fscid() == fs
->fscid
) {
580 fsmap
.set_legacy_client_fscid(FS_CLUSTER_ID_NONE
);
583 std::vector
<mds_gid_t
> to_fail
;
584 // There may be standby_replay daemons left here
585 for (const auto &i
: fs
->mds_map
.get_mds_info()) {
586 assert(i
.second
.state
== MDSMap::STATE_STANDBY_REPLAY
);
587 to_fail
.push_back(i
.first
);
590 for (const auto &gid
: to_fail
) {
591 // Standby replays don't write, so it isn't important to
592 // wait for an osdmap propose here: ignore return value.
593 mon
->mdsmon()->fail_mds_gid(gid
);
596 fsmap
.erase_filesystem(fs
->fscid
);
602 class ResetFilesystemHandler
: public FileSystemCommandHandler
605 ResetFilesystemHandler()
606 : FileSystemCommandHandler("fs reset")
613 map
<string
, cmd_vartype
> &cmdmap
,
614 std::stringstream
&ss
) override
617 cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
);
618 auto fs
= fsmap
.get_filesystem(fs_name
);
620 ss
<< "filesystem '" << fs_name
<< "' does not exist";
621 // Unlike fs rm, we consider this case an error
625 // Check that no MDS daemons are active
626 if (fs
->mds_map
.get_num_up_mds() > 0) {
627 ss
<< "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
628 " and use `ceph mds fail` to make this so";
632 // Check for confirmation flag
634 cmd_getval(g_ceph_context
, cmdmap
, "sure", sure
);
635 if (sure
!= "--yes-i-really-mean-it") {
636 ss
<< "this is a potentially destructive operation, only for use by experts in disaster recovery. "
637 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
641 fsmap
.reset_filesystem(fs
->fscid
);
647 class RemoveDataPoolHandler
: public FileSystemCommandHandler
650 RemoveDataPoolHandler()
651 : FileSystemCommandHandler("fs rm_data_pool")
658 map
<string
, cmd_vartype
> &cmdmap
,
659 std::stringstream
&ss
) override
662 cmd_getval(g_ceph_context
, cmdmap
, "pool", poolname
);
665 if (!cmd_getval(g_ceph_context
, cmdmap
, "fs_name", fs_name
)
666 || fs_name
.empty()) {
667 ss
<< "Missing filesystem name";
671 auto fs
= fsmap
.get_filesystem(fs_name
);
673 ss
<< "Not found: '" << fs_name
<< "'";
677 int64_t poolid
= mon
->osdmon()->osdmap
.lookup_pg_pool_name(poolname
);
680 poolid
= strict_strtol(poolname
.c_str(), 10, &err
);
682 ss
<< "pool '" << poolname
<< "' does not exist";
684 } else if (poolid
< 0) {
685 ss
<< "invalid pool id '" << poolid
<< "'";
690 assert(poolid
>= 0); // Checked by parsing code above
692 if (fs
->mds_map
.get_first_data_pool() == poolid
) {
693 ss
<< "cannot remove default data pool";
699 fsmap
.modify_filesystem(fs
->fscid
,
700 [&r
, poolid
](std::shared_ptr
<Filesystem
> fs
)
702 r
= fs
->mds_map
.remove_data_pool(poolid
);
705 // It was already removed, succeed in silence
708 // We removed it, succeed
709 ss
<< "removed data pool " << poolid
<< " from fsmap";
712 // Unexpected error, bubble up
720 * For commands that refer to a particular filesystem,
721 * enable wrapping to implement the legacy version of
722 * the command (like "mds add_data_pool" vs "fs add_data_pool")
724 * The wrapped handler must expect a fs_name argument in
728 class LegacyHandler
: public T
730 std::string legacy_prefix
;
733 LegacyHandler(const std::string
&new_prefix
)
736 legacy_prefix
= new_prefix
;
739 std::string
const &get_prefix() override
{return legacy_prefix
;}
745 map
<string
, cmd_vartype
> &cmdmap
,
746 std::stringstream
&ss
) override
748 auto fs
= fsmap
.get_legacy_filesystem();
750 ss
<< "No filesystem configured";
753 std::map
<string
, cmd_vartype
> modified
= cmdmap
;
754 modified
["fs_name"] = fs
->mds_map
.get_fs_name();
755 return T::handle(mon
, fsmap
, op
, modified
, ss
);
760 * For commands with an alternative prefix
763 class AliasHandler
: public T
765 std::string alias_prefix
;
768 AliasHandler(const std::string
&new_prefix
)
771 alias_prefix
= new_prefix
;
774 std::string
const &get_prefix() override
{return alias_prefix
;}
780 map
<string
, cmd_vartype
> &cmdmap
,
781 std::stringstream
&ss
) override
783 return T::handle(mon
, fsmap
, op
, cmdmap
, ss
);
788 std::list
<std::shared_ptr
<FileSystemCommandHandler
> > FileSystemCommandHandler::load()
790 std::list
<std::shared_ptr
<FileSystemCommandHandler
> > handlers
;
792 handlers
.push_back(std::make_shared
<SetHandler
>());
793 handlers
.push_back(std::make_shared
<LegacyHandler
<SetHandler
> >("mds set"));
794 handlers
.push_back(std::make_shared
<FlagSetHandler
>());
795 handlers
.push_back(std::make_shared
<AddDataPoolHandler
>());
796 handlers
.push_back(std::make_shared
<LegacyHandler
<AddDataPoolHandler
> >(
797 "mds add_data_pool"));
798 handlers
.push_back(std::make_shared
<RemoveDataPoolHandler
>());
799 handlers
.push_back(std::make_shared
<LegacyHandler
<RemoveDataPoolHandler
> >(
800 "mds remove_data_pool"));
801 handlers
.push_back(std::make_shared
<LegacyHandler
<RemoveDataPoolHandler
> >(
802 "mds rm_data_pool"));
803 handlers
.push_back(std::make_shared
<FsNewHandler
>());
804 handlers
.push_back(std::make_shared
<RemoveFilesystemHandler
>());
805 handlers
.push_back(std::make_shared
<ResetFilesystemHandler
>());
807 handlers
.push_back(std::make_shared
<SetDefaultHandler
>());
808 handlers
.push_back(std::make_shared
<AliasHandler
<SetDefaultHandler
> >(
814 int FileSystemCommandHandler::parse_bool(
815 const std::string
&bool_str
,
819 assert(result
!= nullptr);
822 int64_t n
= strict_strtoll(bool_str
.c_str(), 10, &interr
);
824 if (bool_str
== "false" || bool_str
== "no"
825 || (interr
.length() == 0 && n
== 0)) {
828 } else if (bool_str
== "true" || bool_str
== "yes"
829 || (interr
.length() == 0 && n
== 1)) {
833 ss
<< "value must be false|no|0 or true|yes|1";
838 int FileSystemCommandHandler::_check_pool(
840 const int64_t pool_id
,
842 std::stringstream
*ss
) const
846 const pg_pool_t
*pool
= osd_map
.get_pg_pool(pool_id
);
848 *ss
<< "pool id '" << pool_id
<< "' does not exist";
852 const string
& pool_name
= osd_map
.get_pool_name(pool_id
);
854 if (pool
->is_erasure() && metadata
) {
855 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
856 << " is an erasure-coded pool. Use of erasure-coded pools"
857 << " for CephFS metadata is not permitted";
859 } else if (pool
->is_erasure() && !pool
->allows_ecoverwrites()) {
860 // non-overwriteable EC pools are only acceptable with a cache tier overlay
861 if (!pool
->has_tiers() || !pool
->has_read_tier() || !pool
->has_write_tier()) {
862 *ss
<< "pool '" << pool_name
<< "' (id '" << pool_id
<< "')"
863 << " is an erasure-coded pool, with no overwrite support";
867 // That cache tier overlay must be writeback, not readonly (it's the
868 // write operations like modify+truncate we care about support for)
869 const pg_pool_t
*write_tier
= osd_map
.get_pg_pool(
871 assert(write_tier
!= NULL
); // OSDMonitor shouldn't allow DNE tier
872 if (write_tier
->cache_mode
== pg_pool_t::CACHEMODE_FORWARD
873 || write_tier
->cache_mode
== pg_pool_t::CACHEMODE_READONLY
) {
874 *ss
<< "EC pool '" << pool_name
<< "' has a write tier ("
875 << osd_map
.get_pool_name(pool
->write_tier
)
876 << ") that is configured "
877 "to forward writes. Use a cache mode such as 'writeback' for "
883 if (pool
->is_tier()) {
884 *ss
<< " pool '" << pool_name
<< "' (id '" << pool_id
885 << "') is already in use as a cache tier.";
889 // Nothing special about this pool, so it is permissible