]> git.proxmox.com Git - ceph.git/blob - ceph/src/mon/FSCommands.cc
import 15.2.2 octopus source
[ceph.git] / ceph / src / mon / FSCommands.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 Red Hat Ltd
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #include "OSDMonitor.h"
17
18 #include "FSCommands.h"
19 #include "MDSMonitor.h"
20 #include "MgrStatMonitor.h"
21
22 using TOPNSPC::common::cmd_getval;
23
24 static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
25 "It may cause problems up to and including data loss."
26 "Consult the documentation at ceph.com, and if unsure, do not proceed."
27 "Add --yes-i-really-mean-it if you are certain.");
28
29
30
31 class FlagSetHandler : public FileSystemCommandHandler
32 {
33 public:
34 FlagSetHandler()
35 : FileSystemCommandHandler("fs flag set")
36 {
37 }
38
39 int handle(
40 Monitor *mon,
41 FSMap &fsmap,
42 MonOpRequestRef op,
43 const cmdmap_t& cmdmap,
44 std::stringstream &ss) override
45 {
46 string flag_name;
47 cmd_getval(cmdmap, "flag_name", flag_name);
48
49 string flag_val;
50 cmd_getval(cmdmap, "val", flag_val);
51
52 bool sure = false;
53 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
54
55 if (flag_name == "enable_multiple") {
56 bool flag_bool = false;
57 int r = parse_bool(flag_val, &flag_bool, ss);
58 if (r != 0) {
59 ss << "Invalid boolean value '" << flag_val << "'";
60 return r;
61 }
62
63 if (!sure) {
64 ss << EXPERIMENTAL_WARNING;
65 }
66 fsmap.set_enable_multiple(flag_bool);
67 return 0;
68 } else {
69 ss << "Unknown flag '" << flag_name << "'";
70 return -EINVAL;
71 }
72 }
73 };
74
75 class FailHandler : public FileSystemCommandHandler
76 {
77 public:
78 FailHandler()
79 : FileSystemCommandHandler("fs fail")
80 {
81 }
82
83 int handle(
84 Monitor* mon,
85 FSMap& fsmap,
86 MonOpRequestRef op,
87 const cmdmap_t& cmdmap,
88 std::stringstream& ss) override
89 {
90 if (!mon->osdmon()->is_writeable()) {
91 // not allowed to write yet, so retry when we can
92 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
93 return -EAGAIN;
94 }
95
96 std::string fs_name;
97 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
98 ss << "Missing filesystem name";
99 return -EINVAL;
100 }
101
102 auto fs = fsmap.get_filesystem(fs_name);
103 if (fs == nullptr) {
104 ss << "Not found: '" << fs_name << "'";
105 return -ENOENT;
106 }
107
108 auto f = [](auto fs) {
109 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
110 };
111 fsmap.modify_filesystem(fs->fscid, std::move(f));
112
113 std::vector<mds_gid_t> to_fail;
114 for (const auto& p : fs->mds_map.get_mds_info()) {
115 to_fail.push_back(p.first);
116 }
117
118 for (const auto& gid : to_fail) {
119 mon->mdsmon()->fail_mds_gid(fsmap, gid);
120 }
121 if (!to_fail.empty()) {
122 mon->osdmon()->propose_pending();
123 }
124
125 ss << fs_name;
126 ss << " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
127
128 return 0;
129 }
130 };
131
132 class FsNewHandler : public FileSystemCommandHandler
133 {
134 public:
135 explicit FsNewHandler(Paxos *paxos)
136 : FileSystemCommandHandler("fs new"), m_paxos(paxos)
137 {
138 }
139
140 bool batched_propose() override {
141 return true;
142 }
143
144 int handle(
145 Monitor *mon,
146 FSMap &fsmap,
147 MonOpRequestRef op,
148 const cmdmap_t& cmdmap,
149 std::stringstream &ss) override
150 {
151 ceph_assert(m_paxos->is_plugged());
152
153 string metadata_name;
154 cmd_getval(cmdmap, "metadata", metadata_name);
155 int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
156 if (metadata < 0) {
157 ss << "pool '" << metadata_name << "' does not exist";
158 return -ENOENT;
159 }
160
161 string data_name;
162 cmd_getval(cmdmap, "data", data_name);
163 int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
164 if (data < 0) {
165 ss << "pool '" << data_name << "' does not exist";
166 return -ENOENT;
167 }
168 if (data == 0) {
169 ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
170 return -EINVAL;
171 }
172
173 string fs_name;
174 cmd_getval(cmdmap, "fs_name", fs_name);
175 if (fs_name.empty()) {
176 // Ensure fs name is not empty so that we can implement
177 // commmands that refer to FS by name in future.
178 ss << "Filesystem name may not be empty";
179 return -EINVAL;
180 }
181
182 if (fsmap.get_filesystem(fs_name)) {
183 auto fs = fsmap.get_filesystem(fs_name);
184 if (*(fs->mds_map.get_data_pools().begin()) == data
185 && fs->mds_map.get_metadata_pool() == metadata) {
186 // Identical FS created already, this is a no-op
187 ss << "filesystem '" << fs_name << "' already exists";
188 return 0;
189 } else {
190 ss << "filesystem already exists with name '" << fs_name << "'";
191 return -EINVAL;
192 }
193 }
194
195 bool force = false;
196 cmd_getval(cmdmap, "force", force);
197
198 const pool_stat_t *stat = mon->mgrstatmon()->get_pool_stat(metadata);
199 if (stat) {
200 int64_t metadata_num_objects = stat->stats.sum.num_objects;
201 if (!force && metadata_num_objects > 0) {
202 ss << "pool '" << metadata_name
203 << "' already contains some objects. Use an empty pool instead.";
204 return -EINVAL;
205 }
206 }
207
208 if (fsmap.filesystem_count() > 0
209 && !fsmap.get_enable_multiple()) {
210 ss << "Creation of multiple filesystems is disabled. To enable "
211 "this experimental feature, use 'ceph fs flag set enable_multiple "
212 "true'";
213 return -EINVAL;
214 }
215
216 for (auto& fs : fsmap.get_filesystems()) {
217 const std::vector<int64_t> &data_pools = fs->mds_map.get_data_pools();
218
219 bool sure = false;
220 cmd_getval(cmdmap,
221 "allow_dangerous_metadata_overlay", sure);
222
223 if ((std::find(data_pools.begin(), data_pools.end(), data) != data_pools.end()
224 || fs->mds_map.get_metadata_pool() == metadata)
225 && !sure) {
226 ss << "Filesystem '" << fs_name
227 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
228 return -EEXIST;
229 }
230 }
231
232 pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
233 ceph_assert(data_pool != NULL); // Checked it existed above
234 pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
235 ceph_assert(metadata_pool != NULL); // Checked it existed above
236
237 int r = _check_pool(mon->osdmon()->osdmap, data, POOL_DATA_DEFAULT, force, &ss);
238 if (r < 0) {
239 return r;
240 }
241
242 r = _check_pool(mon->osdmon()->osdmap, metadata, POOL_METADATA, force, &ss);
243 if (r < 0) {
244 return r;
245 }
246
247 if (!mon->osdmon()->is_writeable()) {
248 // not allowed to write yet, so retry when we can
249 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
250 return -EAGAIN;
251 }
252 mon->osdmon()->do_application_enable(data,
253 pg_pool_t::APPLICATION_NAME_CEPHFS,
254 "data", fs_name, true);
255 mon->osdmon()->do_application_enable(metadata,
256 pg_pool_t::APPLICATION_NAME_CEPHFS,
257 "metadata", fs_name, true);
258 mon->osdmon()->do_set_pool_opt(metadata,
259 pool_opts_t::RECOVERY_PRIORITY,
260 static_cast<int64_t>(5));
261 mon->osdmon()->do_set_pool_opt(metadata,
262 pool_opts_t::PG_NUM_MIN,
263 static_cast<int64_t>(16));
264 mon->osdmon()->do_set_pool_opt(metadata,
265 pool_opts_t::PG_AUTOSCALE_BIAS,
266 static_cast<double>(4.0));
267 mon->osdmon()->propose_pending();
268
269 // All checks passed, go ahead and create.
270 auto&& fs = fsmap.create_filesystem(fs_name, metadata, data,
271 mon->get_quorum_con_features());
272
273 ss << "new fs with metadata pool " << metadata << " and data pool " << data;
274
275 // assign a standby to rank 0 to avoid health warnings
276 auto info = fsmap.find_replacement_for({fs->fscid, 0});
277
278 if (info) {
279 mon->clog->info() << info->human_name() << " assigned to filesystem "
280 << fs_name << " as rank 0";
281 fsmap.promote(info->global_id, *fs, 0);
282 }
283
284 return 0;
285 }
286
287 private:
288 Paxos *m_paxos;
289 };
290
291 class SetHandler : public FileSystemCommandHandler
292 {
293 public:
294 SetHandler()
295 : FileSystemCommandHandler("fs set")
296 {}
297
298 int handle(
299 Monitor *mon,
300 FSMap &fsmap,
301 MonOpRequestRef op,
302 const cmdmap_t& cmdmap,
303 std::stringstream &ss) override
304 {
305 std::string fs_name;
306 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
307 ss << "Missing filesystem name";
308 return -EINVAL;
309 }
310
311 auto fs = fsmap.get_filesystem(fs_name);
312 if (fs == nullptr) {
313 ss << "Not found: '" << fs_name << "'";
314 return -ENOENT;
315 }
316
317 string var;
318 if (!cmd_getval(cmdmap, "var", var) || var.empty()) {
319 ss << "Invalid variable";
320 return -EINVAL;
321 }
322 string val;
323 string interr;
324 int64_t n = 0;
325 if (!cmd_getval(cmdmap, "val", val)) {
326 return -EINVAL;
327 }
328 // we got a string. see if it contains an int.
329 n = strict_strtoll(val.c_str(), 10, &interr);
330 if (var == "max_mds") {
331 // NOTE: see also "mds set_max_mds", which can modify the same field.
332 if (interr.length()) {
333 ss << interr;
334 return -EINVAL;
335 }
336
337 if (n <= 0) {
338 ss << "You must specify at least one MDS";
339 return -EINVAL;
340 }
341 if (n > 1 && n > fs->mds_map.get_max_mds()) {
342 if (fs->mds_map.was_snaps_ever_allowed() &&
343 !fs->mds_map.allows_multimds_snaps()) {
344 ss << "multi-active MDS is not allowed while there are snapshots possibly created by pre-mimic MDS";
345 return -EINVAL;
346 }
347 }
348 if (n > MAX_MDS) {
349 ss << "may not have more than " << MAX_MDS << " MDS ranks";
350 return -EINVAL;
351 }
352
353 fsmap.modify_filesystem(
354 fs->fscid,
355 [n](std::shared_ptr<Filesystem> fs)
356 {
357 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
358 fs->mds_map.set_max_mds(n);
359 });
360 } else if (var == "inline_data") {
361 bool enable_inline = false;
362 int r = parse_bool(val, &enable_inline, ss);
363 if (r != 0) {
364 return r;
365 }
366
367 if (enable_inline) {
368 bool confirm = false;
369 cmd_getval(cmdmap, "yes_i_really_really_mean_it", confirm);
370 if (!confirm) {
371 ss << "Inline data support is deprecated and will be removed in a future release. "
372 << "Add --yes-i-really-really-mean-it if you are certain you want this enabled.";
373 return -EPERM;
374 }
375 ss << "inline data enabled";
376
377 fsmap.modify_filesystem(
378 fs->fscid,
379 [](std::shared_ptr<Filesystem> fs)
380 {
381 fs->mds_map.set_inline_data_enabled(true);
382 });
383
384 // Update `compat`
385 CompatSet c = fsmap.get_compat();
386 c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
387 fsmap.update_compat(c);
388 } else {
389 ss << "inline data disabled";
390 fsmap.modify_filesystem(
391 fs->fscid,
392 [](std::shared_ptr<Filesystem> fs)
393 {
394 fs->mds_map.set_inline_data_enabled(false);
395 });
396 }
397 } else if (var == "balancer") {
398 if (val.empty()) {
399 ss << "unsetting the metadata load balancer";
400 } else {
401 ss << "setting the metadata load balancer to " << val;
402 }
403 fsmap.modify_filesystem(
404 fs->fscid,
405 [val](std::shared_ptr<Filesystem> fs)
406 {
407 fs->mds_map.set_balancer(val);
408 });
409 return true;
410 } else if (var == "max_file_size") {
411 if (interr.length()) {
412 ss << var << " requires an integer value";
413 return -EINVAL;
414 }
415 if (n < CEPH_MIN_STRIPE_UNIT) {
416 ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
417 return -ERANGE;
418 }
419 fsmap.modify_filesystem(
420 fs->fscid,
421 [n](std::shared_ptr<Filesystem> fs)
422 {
423 fs->mds_map.set_max_filesize(n);
424 });
425 } else if (var == "allow_new_snaps") {
426 bool enable_snaps = false;
427 int r = parse_bool(val, &enable_snaps, ss);
428 if (r != 0) {
429 return r;
430 }
431
432 if (!enable_snaps) {
433 fsmap.modify_filesystem(
434 fs->fscid,
435 [](std::shared_ptr<Filesystem> fs)
436 {
437 fs->mds_map.clear_snaps_allowed();
438 });
439 ss << "disabled new snapshots";
440 } else {
441 fsmap.modify_filesystem(
442 fs->fscid,
443 [](std::shared_ptr<Filesystem> fs)
444 {
445 fs->mds_map.set_snaps_allowed();
446 });
447 ss << "enabled new snapshots";
448 }
449 } else if (var == "allow_multimds") {
450 ss << "Multiple MDS is always enabled. Use the max_mds"
451 << " parameter to control the number of active MDSs"
452 << " allowed. This command is DEPRECATED and will be"
453 << " REMOVED from future releases.";
454 } else if (var == "allow_multimds_snaps") {
455 bool enable = false;
456 int r = parse_bool(val, &enable, ss);
457 if (r != 0) {
458 return r;
459 }
460
461 string confirm;
462 if (!cmd_getval(cmdmap, "confirm", confirm) ||
463 confirm != "--yes-i-am-really-a-mds") {
464 ss << "Warning! This command is for MDS only. Do not run it manually";
465 return -EPERM;
466 }
467
468 if (enable) {
469 ss << "enabled multimds with snapshot";
470 fsmap.modify_filesystem(
471 fs->fscid,
472 [](std::shared_ptr<Filesystem> fs)
473 {
474 fs->mds_map.set_multimds_snaps_allowed();
475 });
476 } else {
477 ss << "disabled multimds with snapshot";
478 fsmap.modify_filesystem(
479 fs->fscid,
480 [](std::shared_ptr<Filesystem> fs)
481 {
482 fs->mds_map.clear_multimds_snaps_allowed();
483 });
484 }
485 } else if (var == "allow_dirfrags") {
486 ss << "Directory fragmentation is now permanently enabled."
487 << " This command is DEPRECATED and will be REMOVED from future releases.";
488 } else if (var == "down") {
489 bool is_down = false;
490 int r = parse_bool(val, &is_down, ss);
491 if (r != 0) {
492 return r;
493 }
494
495 ss << fs->mds_map.get_fs_name();
496
497 fsmap.modify_filesystem(
498 fs->fscid,
499 [is_down](std::shared_ptr<Filesystem> fs)
500 {
501 if (is_down) {
502 if (fs->mds_map.get_max_mds() > 0) {
503 fs->mds_map.set_old_max_mds();
504 fs->mds_map.set_max_mds(0);
505 } /* else already down! */
506 } else {
507 mds_rank_t oldmax = fs->mds_map.get_old_max_mds();
508 fs->mds_map.set_max_mds(oldmax ? oldmax : 1);
509 }
510 });
511
512 if (is_down) {
513 ss << " marked down. ";
514 } else {
515 ss << " marked up, max_mds = " << fs->mds_map.get_max_mds();
516 }
517 } else if (var == "cluster_down" || var == "joinable") {
518 bool joinable = true;
519 int r = parse_bool(val, &joinable, ss);
520 if (r != 0) {
521 return r;
522 }
523 if (var == "cluster_down") {
524 joinable = !joinable;
525 }
526
527 ss << fs->mds_map.get_fs_name();
528
529 fsmap.modify_filesystem(
530 fs->fscid,
531 [joinable](std::shared_ptr<Filesystem> fs)
532 {
533 if (joinable) {
534 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
535 } else {
536 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
537 }
538 });
539
540 if (joinable) {
541 ss << " marked joinable; MDS may join as newly active.";
542 } else {
543 ss << " marked not joinable; MDS cannot join as newly active.";
544 }
545
546 if (var == "cluster_down") {
547 ss << " WARNING: cluster_down flag is deprecated and will be"
548 << " removed in a future version. Please use \"joinable\".";
549 }
550 } else if (var == "standby_count_wanted") {
551 if (interr.length()) {
552 ss << var << " requires an integer value";
553 return -EINVAL;
554 }
555 if (n < 0) {
556 ss << var << " must be non-negative";
557 return -ERANGE;
558 }
559 fsmap.modify_filesystem(
560 fs->fscid,
561 [n](std::shared_ptr<Filesystem> fs)
562 {
563 fs->mds_map.set_standby_count_wanted(n);
564 });
565 } else if (var == "session_timeout") {
566 if (interr.length()) {
567 ss << var << " requires an integer value";
568 return -EINVAL;
569 }
570 if (n < 30) {
571 ss << var << " must be at least 30s";
572 return -ERANGE;
573 }
574 fsmap.modify_filesystem(
575 fs->fscid,
576 [n](std::shared_ptr<Filesystem> fs)
577 {
578 fs->mds_map.set_session_timeout((uint32_t)n);
579 });
580 } else if (var == "session_autoclose") {
581 if (interr.length()) {
582 ss << var << " requires an integer value";
583 return -EINVAL;
584 }
585 if (n < 30) {
586 ss << var << " must be at least 30s";
587 return -ERANGE;
588 }
589 fsmap.modify_filesystem(
590 fs->fscid,
591 [n](std::shared_ptr<Filesystem> fs)
592 {
593 fs->mds_map.set_session_autoclose((uint32_t)n);
594 });
595 } else if (var == "allow_standby_replay") {
596 bool allow = false;
597 int r = parse_bool(val, &allow, ss);
598 if (r != 0) {
599 return r;
600 }
601
602 auto f = [allow](auto& fs) {
603 if (allow) {
604 fs->mds_map.set_standby_replay_allowed();
605 } else {
606 fs->mds_map.clear_standby_replay_allowed();
607 }
608 };
609 fsmap.modify_filesystem(fs->fscid, std::move(f));
610 } else if (var == "min_compat_client") {
611 auto vno = ceph_release_from_name(val.c_str());
612 if (!vno) {
613 ss << "version " << val << " is not recognized";
614 return -EINVAL;
615 }
616 auto f = [vno](auto&& fs) {
617 fs->mds_map.set_min_compat_client(vno);
618 };
619 fsmap.modify_filesystem(fs->fscid, std::move(f));
620 } else {
621 ss << "unknown variable " << var;
622 return -EINVAL;
623 }
624
625 return 0;
626 }
627 };
628
629 class AddDataPoolHandler : public FileSystemCommandHandler
630 {
631 public:
632 explicit AddDataPoolHandler(Paxos *paxos)
633 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos)
634 {}
635
636 bool batched_propose() override {
637 return true;
638 }
639
640 int handle(
641 Monitor *mon,
642 FSMap &fsmap,
643 MonOpRequestRef op,
644 const cmdmap_t& cmdmap,
645 std::stringstream &ss) override
646 {
647 ceph_assert(m_paxos->is_plugged());
648
649 string poolname;
650 cmd_getval(cmdmap, "pool", poolname);
651
652 std::string fs_name;
653 if (!cmd_getval(cmdmap, "fs_name", fs_name)
654 || fs_name.empty()) {
655 ss << "Missing filesystem name";
656 return -EINVAL;
657 }
658
659 auto fs = fsmap.get_filesystem(fs_name);
660 if (fs == nullptr) {
661 ss << "Not found: '" << fs_name << "'";
662 return -ENOENT;
663 }
664
665 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
666 if (poolid < 0) {
667 string err;
668 poolid = strict_strtol(poolname.c_str(), 10, &err);
669 if (err.length()) {
670 ss << "pool '" << poolname << "' does not exist";
671 return -ENOENT;
672 }
673 }
674
675 int r = _check_pool(mon->osdmon()->osdmap, poolid, POOL_DATA_EXTRA, false, &ss);
676 if (r != 0) {
677 return r;
678 }
679
680 // no-op when the data_pool already on fs
681 if (fs->mds_map.is_data_pool(poolid)) {
682 ss << "data pool " << poolid << " is already on fs " << fs_name;
683 return 0;
684 }
685
686 if (!mon->osdmon()->is_writeable()) {
687 // not allowed to write yet, so retry when we can
688 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
689 return -EAGAIN;
690 }
691 mon->osdmon()->do_application_enable(poolid,
692 pg_pool_t::APPLICATION_NAME_CEPHFS,
693 "data", fs_name, true);
694 mon->osdmon()->propose_pending();
695
696 fsmap.modify_filesystem(
697 fs->fscid,
698 [poolid](std::shared_ptr<Filesystem> fs)
699 {
700 fs->mds_map.add_data_pool(poolid);
701 });
702
703 ss << "added data pool " << poolid << " to fsmap";
704
705 return 0;
706 }
707
708 private:
709 Paxos *m_paxos;
710 };
711
712 class SetDefaultHandler : public FileSystemCommandHandler
713 {
714 public:
715 SetDefaultHandler()
716 : FileSystemCommandHandler("fs set-default")
717 {}
718
719 int handle(
720 Monitor *mon,
721 FSMap &fsmap,
722 MonOpRequestRef op,
723 const cmdmap_t& cmdmap,
724 std::stringstream &ss) override
725 {
726 std::string fs_name;
727 cmd_getval(cmdmap, "fs_name", fs_name);
728 auto fs = fsmap.get_filesystem(fs_name);
729 if (fs == nullptr) {
730 ss << "filesystem '" << fs_name << "' does not exist";
731 return -ENOENT;
732 }
733
734 fsmap.set_legacy_client_fscid(fs->fscid);
735 return 0;
736 }
737 };
738
739 class RemoveFilesystemHandler : public FileSystemCommandHandler
740 {
741 public:
742 RemoveFilesystemHandler()
743 : FileSystemCommandHandler("fs rm")
744 {}
745
746 int handle(
747 Monitor *mon,
748 FSMap &fsmap,
749 MonOpRequestRef op,
750 const cmdmap_t& cmdmap,
751 std::stringstream &ss) override
752 {
753 /* We may need to blacklist ranks. */
754 if (!mon->osdmon()->is_writeable()) {
755 // not allowed to write yet, so retry when we can
756 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
757 return -EAGAIN;
758 }
759
760 // Check caller has correctly named the FS to delete
761 // (redundant while there is only one FS, but command
762 // syntax should apply to multi-FS future)
763 string fs_name;
764 cmd_getval(cmdmap, "fs_name", fs_name);
765 auto fs = fsmap.get_filesystem(fs_name);
766 if (fs == nullptr) {
767 // Consider absence success to make deletes idempotent
768 ss << "filesystem '" << fs_name << "' does not exist";
769 return 0;
770 }
771
772 // Check that no MDS daemons are active
773 if (fs->mds_map.get_num_up_mds() > 0) {
774 ss << "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
775 return -EINVAL;
776 }
777
778 // Check for confirmation flag
779 bool sure = false;
780 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
781 if (!sure) {
782 ss << "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
783 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
784 return -EPERM;
785 }
786
787 if (fsmap.get_legacy_client_fscid() == fs->fscid) {
788 fsmap.set_legacy_client_fscid(FS_CLUSTER_ID_NONE);
789 }
790
791 std::vector<mds_gid_t> to_fail;
792 // There may be standby_replay daemons left here
793 for (const auto &i : fs->mds_map.get_mds_info()) {
794 ceph_assert(i.second.state == MDSMap::STATE_STANDBY_REPLAY);
795 to_fail.push_back(i.first);
796 }
797
798 for (const auto &gid : to_fail) {
799 // Standby replays don't write, so it isn't important to
800 // wait for an osdmap propose here: ignore return value.
801 mon->mdsmon()->fail_mds_gid(fsmap, gid);
802 }
803 if (!to_fail.empty()) {
804 mon->osdmon()->propose_pending(); /* maybe new blacklists */
805 }
806
807 fsmap.erase_filesystem(fs->fscid);
808
809 return 0;
810 }
811 };
812
813 class ResetFilesystemHandler : public FileSystemCommandHandler
814 {
815 public:
816 ResetFilesystemHandler()
817 : FileSystemCommandHandler("fs reset")
818 {}
819
820 int handle(
821 Monitor *mon,
822 FSMap &fsmap,
823 MonOpRequestRef op,
824 const cmdmap_t& cmdmap,
825 std::stringstream &ss) override
826 {
827 string fs_name;
828 cmd_getval(cmdmap, "fs_name", fs_name);
829 auto fs = fsmap.get_filesystem(fs_name);
830 if (fs == nullptr) {
831 ss << "filesystem '" << fs_name << "' does not exist";
832 // Unlike fs rm, we consider this case an error
833 return -ENOENT;
834 }
835
836 // Check that no MDS daemons are active
837 if (fs->mds_map.get_num_up_mds() > 0) {
838 ss << "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
839 " and use `ceph mds fail` to make this so";
840 return -EINVAL;
841 }
842
843 // Check for confirmation flag
844 bool sure = false;
845 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
846 if (!sure) {
847 ss << "this is a potentially destructive operation, only for use by experts in disaster recovery. "
848 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
849 return -EPERM;
850 }
851
852 fsmap.reset_filesystem(fs->fscid);
853
854 return 0;
855 }
856 };
857
858 class RemoveDataPoolHandler : public FileSystemCommandHandler
859 {
860 public:
861 RemoveDataPoolHandler()
862 : FileSystemCommandHandler("fs rm_data_pool")
863 {}
864
865 int handle(
866 Monitor *mon,
867 FSMap &fsmap,
868 MonOpRequestRef op,
869 const cmdmap_t& cmdmap,
870 std::stringstream &ss) override
871 {
872 string poolname;
873 cmd_getval(cmdmap, "pool", poolname);
874
875 std::string fs_name;
876 if (!cmd_getval(cmdmap, "fs_name", fs_name)
877 || fs_name.empty()) {
878 ss << "Missing filesystem name";
879 return -EINVAL;
880 }
881
882 auto fs = fsmap.get_filesystem(fs_name);
883 if (fs == nullptr) {
884 ss << "Not found: '" << fs_name << "'";
885 return -ENOENT;
886 }
887
888 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
889 if (poolid < 0) {
890 string err;
891 poolid = strict_strtol(poolname.c_str(), 10, &err);
892 if (err.length()) {
893 ss << "pool '" << poolname << "' does not exist";
894 return -ENOENT;
895 } else if (poolid < 0) {
896 ss << "invalid pool id '" << poolid << "'";
897 return -EINVAL;
898 }
899 }
900
901 ceph_assert(poolid >= 0); // Checked by parsing code above
902
903 if (fs->mds_map.get_first_data_pool() == poolid) {
904 ss << "cannot remove default data pool";
905 return -EINVAL;
906 }
907
908
909 int r = 0;
910 fsmap.modify_filesystem(fs->fscid,
911 [&r, poolid](std::shared_ptr<Filesystem> fs)
912 {
913 r = fs->mds_map.remove_data_pool(poolid);
914 });
915 if (r == -ENOENT) {
916 // It was already removed, succeed in silence
917 return 0;
918 } else if (r == 0) {
919 // We removed it, succeed
920 ss << "removed data pool " << poolid << " from fsmap";
921 return 0;
922 } else {
923 // Unexpected error, bubble up
924 return r;
925 }
926 }
927 };
928
929 /**
930 * For commands with an alternative prefix
931 */
932 template<typename T>
933 class AliasHandler : public T
934 {
935 std::string alias_prefix;
936
937 public:
938 explicit AliasHandler(const std::string &new_prefix)
939 : T()
940 {
941 alias_prefix = new_prefix;
942 }
943
944 std::string const &get_prefix() override {return alias_prefix;}
945
946 int handle(
947 Monitor *mon,
948 FSMap &fsmap,
949 MonOpRequestRef op,
950 const cmdmap_t& cmdmap,
951 std::stringstream &ss) override
952 {
953 return T::handle(mon, fsmap, op, cmdmap, ss);
954 }
955 };
956
957
958 std::list<std::shared_ptr<FileSystemCommandHandler> >
959 FileSystemCommandHandler::load(Paxos *paxos)
960 {
961 std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
962
963 handlers.push_back(std::make_shared<SetHandler>());
964 handlers.push_back(std::make_shared<FailHandler>());
965 handlers.push_back(std::make_shared<FlagSetHandler>());
966 handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
967 handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
968 handlers.push_back(std::make_shared<FsNewHandler>(paxos));
969 handlers.push_back(std::make_shared<RemoveFilesystemHandler>());
970 handlers.push_back(std::make_shared<ResetFilesystemHandler>());
971
972 handlers.push_back(std::make_shared<SetDefaultHandler>());
973 handlers.push_back(std::make_shared<AliasHandler<SetDefaultHandler> >(
974 "fs set_default"));
975
976 return handlers;
977 }
978
979 int FileSystemCommandHandler::_check_pool(
980 OSDMap &osd_map,
981 const int64_t pool_id,
982 int type,
983 bool force,
984 std::stringstream *ss) const
985 {
986 ceph_assert(ss != NULL);
987
988 const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
989 if (!pool) {
990 *ss << "pool id '" << pool_id << "' does not exist";
991 return -ENOENT;
992 }
993
994 const string& pool_name = osd_map.get_pool_name(pool_id);
995
996 if (pool->is_erasure()) {
997 if (type == POOL_METADATA) {
998 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
999 << " is an erasure-coded pool. Use of erasure-coded pools"
1000 << " for CephFS metadata is not permitted";
1001 return -EINVAL;
1002 } else if (type == POOL_DATA_DEFAULT && !force) {
1003 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1004 " is an erasure-coded pool."
1005 " Use of an EC pool for the default data pool is discouraged;"
1006 " see the online CephFS documentation for more information."
1007 " Use --force to override.";
1008 return -EINVAL;
1009 } else if (!pool->allows_ecoverwrites()) {
1010 // non-overwriteable EC pools are only acceptable with a cache tier overlay
1011 if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
1012 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1013 << " is an erasure-coded pool, with no overwrite support";
1014 return -EINVAL;
1015 }
1016
1017 // That cache tier overlay must be writeback, not readonly (it's the
1018 // write operations like modify+truncate we care about support for)
1019 const pg_pool_t *write_tier = osd_map.get_pg_pool(
1020 pool->write_tier);
1021 ceph_assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
1022 if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
1023 || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
1024 *ss << "EC pool '" << pool_name << "' has a write tier ("
1025 << osd_map.get_pool_name(pool->write_tier)
1026 << ") that is configured "
1027 "to forward writes. Use a cache mode such as 'writeback' for "
1028 "CephFS";
1029 return -EINVAL;
1030 }
1031 }
1032 }
1033
1034 if (pool->is_tier()) {
1035 *ss << " pool '" << pool_name << "' (id '" << pool_id
1036 << "') is already in use as a cache tier.";
1037 return -EINVAL;
1038 }
1039
1040 if (!force && !pool->application_metadata.empty() &&
1041 pool->application_metadata.count(
1042 pg_pool_t::APPLICATION_NAME_CEPHFS) == 0) {
1043 *ss << " pool '" << pool_name << "' (id '" << pool_id
1044 << "') has a non-CephFS application enabled.";
1045 return -EINVAL;
1046 }
1047
1048 // Nothing special about this pool, so it is permissible
1049 return 0;
1050 }
1051