]> git.proxmox.com Git - ceph.git/blob - ceph/src/mon/FSCommands.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / mon / FSCommands.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 Red Hat Ltd
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #include "OSDMonitor.h"
17
18 #include "FSCommands.h"
19 #include "MDSMonitor.h"
20 #include "MgrStatMonitor.h"
21 #include "mds/cephfs_features.h"
22
23 using TOPNSPC::common::cmd_getval;
24
25 using std::dec;
26 using std::hex;
27 using std::list;
28 using std::map;
29 using std::make_pair;
30 using std::ostream;
31 using std::ostringstream;
32 using std::pair;
33 using std::set;
34 using std::string;
35 using std::stringstream;
36 using std::to_string;
37 using std::vector;
38
39 using ceph::bufferlist;
40 using ceph::decode;
41 using ceph::encode;
42 using ceph::ErasureCodeInterfaceRef;
43 using ceph::ErasureCodeProfile;
44 using ceph::Formatter;
45 using ceph::JSONFormatter;
46 using ceph::make_message;
47 using ceph::mono_clock;
48 using ceph::mono_time;
49
50 class FlagSetHandler : public FileSystemCommandHandler
51 {
52 public:
53 FlagSetHandler()
54 : FileSystemCommandHandler("fs flag set")
55 {
56 }
57
58 int handle(
59 Monitor *mon,
60 FSMap& fsmap,
61 MonOpRequestRef op,
62 const cmdmap_t& cmdmap,
63 std::stringstream &ss) override
64 {
65 string flag_name;
66 cmd_getval(cmdmap, "flag_name", flag_name);
67
68 string flag_val;
69 cmd_getval(cmdmap, "val", flag_val);
70
71 bool sure = false;
72 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
73
74 if (flag_name == "enable_multiple") {
75 bool flag_bool = false;
76 int r = parse_bool(flag_val, &flag_bool, ss);
77 if (r != 0) {
78 ss << "Invalid boolean value '" << flag_val << "'";
79 return r;
80 }
81
82 fsmap.set_enable_multiple(flag_bool);
83 return 0;
84 } else {
85 ss << "Unknown flag '" << flag_name << "'";
86 return -EINVAL;
87 }
88 }
89 };
90
91 class FailHandler : public FileSystemCommandHandler
92 {
93 public:
94 FailHandler()
95 : FileSystemCommandHandler("fs fail")
96 {
97 }
98
99 int handle(
100 Monitor* mon,
101 FSMap& fsmap,
102 MonOpRequestRef op,
103 const cmdmap_t& cmdmap,
104 std::stringstream& ss) override
105 {
106 if (!mon->osdmon()->is_writeable()) {
107 // not allowed to write yet, so retry when we can
108 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
109 return -EAGAIN;
110 }
111
112 std::string fs_name;
113 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
114 ss << "Missing filesystem name";
115 return -EINVAL;
116 }
117
118 auto fs = fsmap.get_filesystem(fs_name);
119
120 auto f = [](auto fs) {
121 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
122 };
123 fsmap.modify_filesystem(fs->fscid, std::move(f));
124
125 std::vector<mds_gid_t> to_fail;
126 for (const auto& p : fs->mds_map.get_mds_info()) {
127 to_fail.push_back(p.first);
128 }
129
130 for (const auto& gid : to_fail) {
131 mon->mdsmon()->fail_mds_gid(fsmap, gid);
132 }
133 if (!to_fail.empty()) {
134 mon->osdmon()->propose_pending();
135 }
136
137 ss << fs_name;
138 ss << " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
139
140 return 0;
141 }
142 };
143
144 class FsNewHandler : public FileSystemCommandHandler
145 {
146 public:
147 explicit FsNewHandler(Paxos *paxos)
148 : FileSystemCommandHandler("fs new"), m_paxos(paxos)
149 {
150 }
151
152 bool batched_propose() override {
153 return true;
154 }
155
156 int handle(
157 Monitor *mon,
158 FSMap& fsmap,
159 MonOpRequestRef op,
160 const cmdmap_t& cmdmap,
161 std::stringstream &ss) override
162 {
163 ceph_assert(m_paxos->is_plugged());
164
165 string metadata_name;
166 cmd_getval(cmdmap, "metadata", metadata_name);
167 int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
168 if (metadata < 0) {
169 ss << "pool '" << metadata_name << "' does not exist";
170 return -ENOENT;
171 }
172
173 string data_name;
174 cmd_getval(cmdmap, "data", data_name);
175 int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
176 if (data < 0) {
177 ss << "pool '" << data_name << "' does not exist";
178 return -ENOENT;
179 }
180 if (data == 0) {
181 ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
182 return -EINVAL;
183 }
184
185 string fs_name;
186 cmd_getval(cmdmap, "fs_name", fs_name);
187 if (fs_name.empty()) {
188 // Ensure fs name is not empty so that we can implement
189 // commmands that refer to FS by name in future.
190 ss << "Filesystem name may not be empty";
191 return -EINVAL;
192 }
193
194 if (fsmap.get_filesystem(fs_name)) {
195 auto fs = fsmap.get_filesystem(fs_name);
196 if (*(fs->mds_map.get_data_pools().begin()) == data
197 && fs->mds_map.get_metadata_pool() == metadata) {
198 // Identical FS created already, this is a no-op
199 ss << "filesystem '" << fs_name << "' already exists";
200 return 0;
201 } else {
202 ss << "filesystem already exists with name '" << fs_name << "'";
203 return -EINVAL;
204 }
205 }
206
207 bool force = false;
208 cmd_getval(cmdmap, "force", force);
209
210 const pool_stat_t *stat = mon->mgrstatmon()->get_pool_stat(metadata);
211 if (stat) {
212 int64_t metadata_num_objects = stat->stats.sum.num_objects;
213 if (!force && metadata_num_objects > 0) {
214 ss << "pool '" << metadata_name
215 << "' already contains some objects. Use an empty pool instead.";
216 return -EINVAL;
217 }
218 }
219
220 if (fsmap.filesystem_count() > 0
221 && !fsmap.get_enable_multiple()) {
222 ss << "Creation of multiple filesystems is disabled. To enable "
223 "this experimental feature, use 'ceph fs flag set enable_multiple "
224 "true'";
225 return -EINVAL;
226 }
227
228 for (auto& fs : fsmap.get_filesystems()) {
229 const std::vector<int64_t> &data_pools = fs->mds_map.get_data_pools();
230
231 bool sure = false;
232 cmd_getval(cmdmap,
233 "allow_dangerous_metadata_overlay", sure);
234
235 if ((std::find(data_pools.begin(), data_pools.end(), data) != data_pools.end()
236 || fs->mds_map.get_metadata_pool() == metadata)
237 && !sure) {
238 ss << "Filesystem '" << fs_name
239 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
240 return -EEXIST;
241 }
242 }
243
244 pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
245 ceph_assert(data_pool != NULL); // Checked it existed above
246 pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
247 ceph_assert(metadata_pool != NULL); // Checked it existed above
248
249 int r = _check_pool(mon->osdmon()->osdmap, data, POOL_DATA_DEFAULT, force, &ss);
250 if (r < 0) {
251 return r;
252 }
253
254 r = _check_pool(mon->osdmon()->osdmap, metadata, POOL_METADATA, force, &ss);
255 if (r < 0) {
256 return r;
257 }
258
259 if (!mon->osdmon()->is_writeable()) {
260 // not allowed to write yet, so retry when we can
261 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
262 return -EAGAIN;
263 }
264 mon->osdmon()->do_application_enable(data,
265 pg_pool_t::APPLICATION_NAME_CEPHFS,
266 "data", fs_name, true);
267 mon->osdmon()->do_application_enable(metadata,
268 pg_pool_t::APPLICATION_NAME_CEPHFS,
269 "metadata", fs_name, true);
270 mon->osdmon()->do_set_pool_opt(metadata,
271 pool_opts_t::RECOVERY_PRIORITY,
272 static_cast<int64_t>(5));
273 mon->osdmon()->do_set_pool_opt(metadata,
274 pool_opts_t::PG_NUM_MIN,
275 static_cast<int64_t>(16));
276 mon->osdmon()->do_set_pool_opt(metadata,
277 pool_opts_t::PG_AUTOSCALE_BIAS,
278 static_cast<double>(4.0));
279 mon->osdmon()->propose_pending();
280
281 // All checks passed, go ahead and create.
282 auto&& fs = fsmap.create_filesystem(fs_name, metadata, data,
283 mon->get_quorum_con_features());
284
285 ss << "new fs with metadata pool " << metadata << " and data pool " << data;
286
287 // assign a standby to rank 0 to avoid health warnings
288 auto info = fsmap.find_replacement_for({fs->fscid, 0});
289
290 if (info) {
291 mon->clog->info() << info->human_name() << " assigned to filesystem "
292 << fs_name << " as rank 0";
293 fsmap.promote(info->global_id, *fs, 0);
294 }
295
296 return 0;
297 }
298
299 private:
300 Paxos *m_paxos;
301 };
302
303 class SetHandler : public FileSystemCommandHandler
304 {
305 public:
306 SetHandler()
307 : FileSystemCommandHandler("fs set")
308 {}
309
310 int handle(
311 Monitor *mon,
312 FSMap& fsmap,
313 MonOpRequestRef op,
314 const cmdmap_t& cmdmap,
315 std::stringstream &ss) override
316 {
317 std::string fs_name;
318 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
319 ss << "Missing filesystem name";
320 return -EINVAL;
321 }
322
323 auto fs = fsmap.get_filesystem(fs_name);
324 string var;
325 if (!cmd_getval(cmdmap, "var", var) || var.empty()) {
326 ss << "Invalid variable";
327 return -EINVAL;
328 }
329 string val;
330 string interr;
331 int64_t n = 0;
332 if (!cmd_getval(cmdmap, "val", val)) {
333 return -EINVAL;
334 }
335 // we got a string. see if it contains an int.
336 n = strict_strtoll(val.c_str(), 10, &interr);
337 if (var == "max_mds") {
338 // NOTE: see also "mds set_max_mds", which can modify the same field.
339 if (interr.length()) {
340 ss << interr;
341 return -EINVAL;
342 }
343
344 if (n <= 0) {
345 ss << "You must specify at least one MDS";
346 return -EINVAL;
347 }
348
349 if (n > 1 && n > fs->mds_map.get_max_mds()) {
350 if (fs->mds_map.was_snaps_ever_allowed() &&
351 !fs->mds_map.allows_multimds_snaps()) {
352 ss << "multi-active MDS is not allowed while there are snapshots possibly created by pre-mimic MDS";
353 return -EINVAL;
354 }
355 }
356 if (n > MAX_MDS) {
357 ss << "may not have more than " << MAX_MDS << " MDS ranks";
358 return -EINVAL;
359 }
360
361 fsmap.modify_filesystem(
362 fs->fscid,
363 [n](std::shared_ptr<Filesystem> fs)
364 {
365 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
366 fs->mds_map.set_max_mds(n);
367 });
368 } else if (var == "inline_data") {
369 bool enable_inline = false;
370 int r = parse_bool(val, &enable_inline, ss);
371 if (r != 0) {
372 return r;
373 }
374
375 if (enable_inline) {
376 bool confirm = false;
377 cmd_getval(cmdmap, "yes_i_really_really_mean_it", confirm);
378 if (!confirm) {
379 ss << "Inline data support is deprecated and will be removed in a future release. "
380 << "Add --yes-i-really-really-mean-it if you are certain you want this enabled.";
381 return -EPERM;
382 }
383 ss << "inline data enabled";
384
385 fsmap.modify_filesystem(
386 fs->fscid,
387 [](std::shared_ptr<Filesystem> fs)
388 {
389 fs->mds_map.set_inline_data_enabled(true);
390 });
391
392 // Update `compat`
393 CompatSet c = fsmap.get_compat();
394 c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
395 fsmap.update_compat(c);
396 } else {
397 ss << "inline data disabled";
398 fsmap.modify_filesystem(
399 fs->fscid,
400 [](std::shared_ptr<Filesystem> fs)
401 {
402 fs->mds_map.set_inline_data_enabled(false);
403 });
404 }
405 } else if (var == "balancer") {
406 if (val.empty()) {
407 ss << "unsetting the metadata load balancer";
408 } else {
409 ss << "setting the metadata load balancer to " << val;
410 }
411 fsmap.modify_filesystem(
412 fs->fscid,
413 [val](std::shared_ptr<Filesystem> fs)
414 {
415 fs->mds_map.set_balancer(val);
416 });
417 return true;
418 } else if (var == "max_file_size") {
419 if (interr.length()) {
420 ss << var << " requires an integer value";
421 return -EINVAL;
422 }
423 if (n < CEPH_MIN_STRIPE_UNIT) {
424 ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
425 return -ERANGE;
426 }
427 fsmap.modify_filesystem(
428 fs->fscid,
429 [n](std::shared_ptr<Filesystem> fs)
430 {
431 fs->mds_map.set_max_filesize(n);
432 });
433 } else if (var == "allow_new_snaps") {
434 bool enable_snaps = false;
435 int r = parse_bool(val, &enable_snaps, ss);
436 if (r != 0) {
437 return r;
438 }
439
440 if (!enable_snaps) {
441 fsmap.modify_filesystem(
442 fs->fscid,
443 [](std::shared_ptr<Filesystem> fs)
444 {
445 fs->mds_map.clear_snaps_allowed();
446 });
447 ss << "disabled new snapshots";
448 } else {
449 fsmap.modify_filesystem(
450 fs->fscid,
451 [](std::shared_ptr<Filesystem> fs)
452 {
453 fs->mds_map.set_snaps_allowed();
454 });
455 ss << "enabled new snapshots";
456 }
457 } else if (var == "allow_multimds") {
458 ss << "Multiple MDS is always enabled. Use the max_mds"
459 << " parameter to control the number of active MDSs"
460 << " allowed. This command is DEPRECATED and will be"
461 << " REMOVED from future releases.";
462 } else if (var == "allow_multimds_snaps") {
463 bool enable = false;
464 int r = parse_bool(val, &enable, ss);
465 if (r != 0) {
466 return r;
467 }
468
469 string confirm;
470 if (!cmd_getval(cmdmap, "confirm", confirm) ||
471 confirm != "--yes-i-am-really-a-mds") {
472 ss << "Warning! This command is for MDS only. Do not run it manually";
473 return -EPERM;
474 }
475
476 if (enable) {
477 ss << "enabled multimds with snapshot";
478 fsmap.modify_filesystem(
479 fs->fscid,
480 [](std::shared_ptr<Filesystem> fs)
481 {
482 fs->mds_map.set_multimds_snaps_allowed();
483 });
484 } else {
485 ss << "disabled multimds with snapshot";
486 fsmap.modify_filesystem(
487 fs->fscid,
488 [](std::shared_ptr<Filesystem> fs)
489 {
490 fs->mds_map.clear_multimds_snaps_allowed();
491 });
492 }
493 } else if (var == "allow_dirfrags") {
494 ss << "Directory fragmentation is now permanently enabled."
495 << " This command is DEPRECATED and will be REMOVED from future releases.";
496 } else if (var == "down") {
497 bool is_down = false;
498 int r = parse_bool(val, &is_down, ss);
499 if (r != 0) {
500 return r;
501 }
502
503 ss << fs->mds_map.get_fs_name();
504
505 fsmap.modify_filesystem(
506 fs->fscid,
507 [is_down](std::shared_ptr<Filesystem> fs)
508 {
509 if (is_down) {
510 if (fs->mds_map.get_max_mds() > 0) {
511 fs->mds_map.set_old_max_mds();
512 fs->mds_map.set_max_mds(0);
513 } /* else already down! */
514 } else {
515 mds_rank_t oldmax = fs->mds_map.get_old_max_mds();
516 fs->mds_map.set_max_mds(oldmax ? oldmax : 1);
517 }
518 });
519
520 if (is_down) {
521 ss << " marked down. ";
522 } else {
523 ss << " marked up, max_mds = " << fs->mds_map.get_max_mds();
524 }
525 } else if (var == "cluster_down" || var == "joinable") {
526 bool joinable = true;
527 int r = parse_bool(val, &joinable, ss);
528 if (r != 0) {
529 return r;
530 }
531 if (var == "cluster_down") {
532 joinable = !joinable;
533 }
534
535 ss << fs->mds_map.get_fs_name();
536
537 fsmap.modify_filesystem(
538 fs->fscid,
539 [joinable](std::shared_ptr<Filesystem> fs)
540 {
541 if (joinable) {
542 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
543 } else {
544 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
545 }
546 });
547
548 if (joinable) {
549 ss << " marked joinable; MDS may join as newly active.";
550 } else {
551 ss << " marked not joinable; MDS cannot join as newly active.";
552 }
553
554 if (var == "cluster_down") {
555 ss << " WARNING: cluster_down flag is deprecated and will be"
556 << " removed in a future version. Please use \"joinable\".";
557 }
558 } else if (var == "standby_count_wanted") {
559 if (interr.length()) {
560 ss << var << " requires an integer value";
561 return -EINVAL;
562 }
563 if (n < 0) {
564 ss << var << " must be non-negative";
565 return -ERANGE;
566 }
567 fsmap.modify_filesystem(
568 fs->fscid,
569 [n](std::shared_ptr<Filesystem> fs)
570 {
571 fs->mds_map.set_standby_count_wanted(n);
572 });
573 } else if (var == "session_timeout") {
574 if (interr.length()) {
575 ss << var << " requires an integer value";
576 return -EINVAL;
577 }
578 if (n < 30) {
579 ss << var << " must be at least 30s";
580 return -ERANGE;
581 }
582 fsmap.modify_filesystem(
583 fs->fscid,
584 [n](std::shared_ptr<Filesystem> fs)
585 {
586 fs->mds_map.set_session_timeout((uint32_t)n);
587 });
588 } else if (var == "session_autoclose") {
589 if (interr.length()) {
590 ss << var << " requires an integer value";
591 return -EINVAL;
592 }
593 if (n < 30) {
594 ss << var << " must be at least 30s";
595 return -ERANGE;
596 }
597 fsmap.modify_filesystem(
598 fs->fscid,
599 [n](std::shared_ptr<Filesystem> fs)
600 {
601 fs->mds_map.set_session_autoclose((uint32_t)n);
602 });
603 } else if (var == "allow_standby_replay") {
604 bool allow = false;
605 int r = parse_bool(val, &allow, ss);
606 if (r != 0) {
607 return r;
608 }
609
610 if (!allow) {
611 if (!mon->osdmon()->is_writeable()) {
612 // not allowed to write yet, so retry when we can
613 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
614 return -EAGAIN;
615 }
616 std::vector<mds_gid_t> to_fail;
617 for (const auto& [gid, info]: fs->mds_map.get_mds_info()) {
618 if (info.state == MDSMap::STATE_STANDBY_REPLAY) {
619 to_fail.push_back(gid);
620 }
621 }
622
623 for (const auto& gid : to_fail) {
624 mon->mdsmon()->fail_mds_gid(fsmap, gid);
625 }
626 if (!to_fail.empty()) {
627 mon->osdmon()->propose_pending();
628 }
629 }
630
631 auto f = [allow](auto& fs) {
632 if (allow) {
633 fs->mds_map.set_standby_replay_allowed();
634 } else {
635 fs->mds_map.clear_standby_replay_allowed();
636 }
637 };
638 fsmap.modify_filesystem(fs->fscid, std::move(f));
639 } else if (var == "min_compat_client") {
640 auto vno = ceph_release_from_name(val.c_str());
641 if (!vno) {
642 ss << "version " << val << " is not recognized";
643 return -EINVAL;
644 }
645 auto f = [vno](auto&& fs) {
646 fs->mds_map.set_min_compat_client(vno);
647 };
648 fsmap.modify_filesystem(fs->fscid, std::move(f));
649 } else {
650 ss << "unknown variable " << var;
651 return -EINVAL;
652 }
653
654 return 0;
655 }
656 };
657
658 class RequiredClientFeaturesHandler : public FileSystemCommandHandler
659 {
660 public:
661 RequiredClientFeaturesHandler()
662 : FileSystemCommandHandler("fs required_client_features")
663 {
664 }
665
666 int handle(
667 Monitor *mon,
668 FSMap &fsmap,
669 MonOpRequestRef op,
670 const cmdmap_t& cmdmap,
671 std::stringstream &ss) override
672 {
673 std::string fs_name;
674 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
675 ss << "Missing filesystem name";
676 return -EINVAL;
677 }
678 auto fs = fsmap.get_filesystem(fs_name);
679 if (fs == nullptr) {
680 ss << "Not found: '" << fs_name << "'";
681 return -ENOENT;
682 }
683 string subop;
684 if (!cmd_getval(cmdmap, "subop", subop) ||
685 (subop != "add" && subop != "rm")) {
686 ss << "Must either add or rm a feature; " << subop << " is not recognized";
687 return -EINVAL;
688 }
689 string val;
690 if (!cmd_getval(cmdmap, "val", val) || val.empty()) {
691 ss << "Missing feature id/name";
692 return -EINVAL;
693 }
694
695 int feature = cephfs_feature_from_name(val);
696 if (feature < 0) {
697 string err;
698 feature = strict_strtol(val.c_str(), 10, &err);
699 if (err.length()) {
700 ss << "Invalid feature name: " << val;
701 return -EINVAL;
702 }
703 if (feature < 0 || feature > CEPHFS_FEATURE_MAX) {
704 ss << "Invalid feature id: " << feature;
705 return -EINVAL;
706 }
707 }
708
709 if (subop == "add") {
710 bool ret = false;
711 fsmap.modify_filesystem(
712 fs->fscid,
713 [feature, &ret](auto&& fs)
714 {
715 if (fs->mds_map.get_required_client_features().test(feature))
716 return;
717 fs->mds_map.add_required_client_feature(feature);
718 ret = true;
719 });
720 if (ret) {
721 ss << "added feature '" << cephfs_feature_name(feature) << "' to required_client_features";
722 } else {
723 ss << "feature '" << cephfs_feature_name(feature) << "' is already set";
724 }
725 } else {
726 bool ret = false;
727 fsmap.modify_filesystem(
728 fs->fscid,
729 [feature, &ret](auto&& fs)
730 {
731 if (!fs->mds_map.get_required_client_features().test(feature))
732 return;
733 fs->mds_map.remove_required_client_feature(feature);
734 ret = true;
735 });
736 if (ret) {
737 ss << "removed feature '" << cephfs_feature_name(feature) << "' from required_client_features";
738 } else {
739 ss << "feature '" << cephfs_feature_name(feature) << "' is already unset";
740 }
741 }
742 return 0;
743 }
744 };
745
746
747 class AddDataPoolHandler : public FileSystemCommandHandler
748 {
749 public:
750 explicit AddDataPoolHandler(Paxos *paxos)
751 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos)
752 {}
753
754 bool batched_propose() override {
755 return true;
756 }
757
758 int handle(
759 Monitor *mon,
760 FSMap& fsmap,
761 MonOpRequestRef op,
762 const cmdmap_t& cmdmap,
763 std::stringstream &ss) override
764 {
765 ceph_assert(m_paxos->is_plugged());
766
767 string poolname;
768 cmd_getval(cmdmap, "pool", poolname);
769
770 std::string fs_name;
771 if (!cmd_getval(cmdmap, "fs_name", fs_name)
772 || fs_name.empty()) {
773 ss << "Missing filesystem name";
774 return -EINVAL;
775 }
776
777 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
778 if (poolid < 0) {
779 string err;
780 poolid = strict_strtol(poolname.c_str(), 10, &err);
781 if (err.length()) {
782 ss << "pool '" << poolname << "' does not exist";
783 return -ENOENT;
784 }
785 }
786
787 int r = _check_pool(mon->osdmon()->osdmap, poolid, POOL_DATA_EXTRA, false, &ss);
788 if (r != 0) {
789 return r;
790 }
791
792 auto fs = fsmap.get_filesystem(fs_name);
793 // no-op when the data_pool already on fs
794 if (fs->mds_map.is_data_pool(poolid)) {
795 ss << "data pool " << poolid << " is already on fs " << fs_name;
796 return 0;
797 }
798
799 if (!mon->osdmon()->is_writeable()) {
800 // not allowed to write yet, so retry when we can
801 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
802 return -EAGAIN;
803 }
804 mon->osdmon()->do_application_enable(poolid,
805 pg_pool_t::APPLICATION_NAME_CEPHFS,
806 "data", fs_name, true);
807 mon->osdmon()->propose_pending();
808
809 fsmap.modify_filesystem(
810 fs->fscid,
811 [poolid](std::shared_ptr<Filesystem> fs)
812 {
813 fs->mds_map.add_data_pool(poolid);
814 });
815
816 ss << "added data pool " << poolid << " to fsmap";
817
818 return 0;
819 }
820
821 private:
822 Paxos *m_paxos;
823 };
824
825 class SetDefaultHandler : public FileSystemCommandHandler
826 {
827 public:
828 SetDefaultHandler()
829 : FileSystemCommandHandler("fs set-default")
830 {}
831
832 int handle(
833 Monitor *mon,
834 FSMap& fsmap,
835 MonOpRequestRef op,
836 const cmdmap_t& cmdmap,
837 std::stringstream &ss) override
838 {
839 std::string fs_name;
840 cmd_getval(cmdmap, "fs_name", fs_name);
841 auto fs = fsmap.get_filesystem(fs_name);
842 if (fs == nullptr) {
843 ss << "filesystem '" << fs_name << "' does not exist";
844 return -ENOENT;
845 }
846
847 fsmap.set_legacy_client_fscid(fs->fscid);
848 return 0;
849 }
850 };
851
852 class RemoveFilesystemHandler : public FileSystemCommandHandler
853 {
854 public:
855 RemoveFilesystemHandler()
856 : FileSystemCommandHandler("fs rm")
857 {}
858
859 int handle(
860 Monitor *mon,
861 FSMap& fsmap,
862 MonOpRequestRef op,
863 const cmdmap_t& cmdmap,
864 std::stringstream &ss) override
865 {
866 /* We may need to blocklist ranks. */
867 if (!mon->osdmon()->is_writeable()) {
868 // not allowed to write yet, so retry when we can
869 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
870 return -EAGAIN;
871 }
872
873 // Check caller has correctly named the FS to delete
874 // (redundant while there is only one FS, but command
875 // syntax should apply to multi-FS future)
876 string fs_name;
877 cmd_getval(cmdmap, "fs_name", fs_name);
878 auto fs = fsmap.get_filesystem(fs_name);
879 if (fs == nullptr) {
880 // Consider absence success to make deletes idempotent
881 ss << "filesystem '" << fs_name << "' does not exist";
882 return 0;
883 }
884
885 // Check that no MDS daemons are active
886 if (fs->mds_map.get_num_up_mds() > 0) {
887 ss << "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
888 return -EINVAL;
889 }
890
891 // Check for confirmation flag
892 bool sure = false;
893 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
894 if (!sure) {
895 ss << "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
896 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
897 return -EPERM;
898 }
899
900 if (fsmap.get_legacy_client_fscid() == fs->fscid) {
901 fsmap.set_legacy_client_fscid(FS_CLUSTER_ID_NONE);
902 }
903
904 std::vector<mds_gid_t> to_fail;
905 // There may be standby_replay daemons left here
906 for (const auto &i : fs->mds_map.get_mds_info()) {
907 ceph_assert(i.second.state == MDSMap::STATE_STANDBY_REPLAY);
908 to_fail.push_back(i.first);
909 }
910
911 for (const auto &gid : to_fail) {
912 // Standby replays don't write, so it isn't important to
913 // wait for an osdmap propose here: ignore return value.
914 mon->mdsmon()->fail_mds_gid(fsmap, gid);
915 }
916 if (!to_fail.empty()) {
917 mon->osdmon()->propose_pending(); /* maybe new blocklists */
918 }
919
920 fsmap.erase_filesystem(fs->fscid);
921
922 return 0;
923 }
924 };
925
926 class ResetFilesystemHandler : public FileSystemCommandHandler
927 {
928 public:
929 ResetFilesystemHandler()
930 : FileSystemCommandHandler("fs reset")
931 {}
932
933 int handle(
934 Monitor *mon,
935 FSMap& fsmap,
936 MonOpRequestRef op,
937 const cmdmap_t& cmdmap,
938 std::stringstream &ss) override
939 {
940 string fs_name;
941 cmd_getval(cmdmap, "fs_name", fs_name);
942 auto fs = fsmap.get_filesystem(fs_name);
943 if (fs == nullptr) {
944 ss << "filesystem '" << fs_name << "' does not exist";
945 // Unlike fs rm, we consider this case an error
946 return -ENOENT;
947 }
948
949 // Check that no MDS daemons are active
950 if (fs->mds_map.get_num_up_mds() > 0) {
951 ss << "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
952 " and use `ceph mds fail` to make this so";
953 return -EINVAL;
954 }
955
956 // Check for confirmation flag
957 bool sure = false;
958 cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
959 if (!sure) {
960 ss << "this is a potentially destructive operation, only for use by experts in disaster recovery. "
961 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
962 return -EPERM;
963 }
964
965 fsmap.reset_filesystem(fs->fscid);
966
967 return 0;
968 }
969 };
970
971 class RemoveDataPoolHandler : public FileSystemCommandHandler
972 {
973 public:
974 RemoveDataPoolHandler()
975 : FileSystemCommandHandler("fs rm_data_pool")
976 {}
977
978 int handle(
979 Monitor *mon,
980 FSMap& fsmap,
981 MonOpRequestRef op,
982 const cmdmap_t& cmdmap,
983 std::stringstream &ss) override
984 {
985 string poolname;
986 cmd_getval(cmdmap, "pool", poolname);
987
988 std::string fs_name;
989 if (!cmd_getval(cmdmap, "fs_name", fs_name)
990 || fs_name.empty()) {
991 ss << "Missing filesystem name";
992 return -EINVAL;
993 }
994
995 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
996 if (poolid < 0) {
997 string err;
998 poolid = strict_strtol(poolname.c_str(), 10, &err);
999 if (err.length()) {
1000 ss << "pool '" << poolname << "' does not exist";
1001 return -ENOENT;
1002 } else if (poolid < 0) {
1003 ss << "invalid pool id '" << poolid << "'";
1004 return -EINVAL;
1005 }
1006 }
1007
1008 ceph_assert(poolid >= 0); // Checked by parsing code above
1009
1010 auto fs = fsmap.get_filesystem(fs_name);
1011 if (fs->mds_map.get_first_data_pool() == poolid) {
1012 ss << "cannot remove default data pool";
1013 return -EINVAL;
1014 }
1015
1016 int r = 0;
1017 fsmap.modify_filesystem(fs->fscid,
1018 [&r, poolid](std::shared_ptr<Filesystem> fs)
1019 {
1020 r = fs->mds_map.remove_data_pool(poolid);
1021 });
1022 if (r == -ENOENT) {
1023 // It was already removed, succeed in silence
1024 return 0;
1025 } else if (r == 0) {
1026 // We removed it, succeed
1027 ss << "removed data pool " << poolid << " from fsmap";
1028 return 0;
1029 } else {
1030 // Unexpected error, bubble up
1031 return r;
1032 }
1033 }
1034 };
1035
1036 /**
1037 * For commands with an alternative prefix
1038 */
1039 template<typename T>
1040 class AliasHandler : public T
1041 {
1042 std::string alias_prefix;
1043
1044 public:
1045 explicit AliasHandler(const std::string &new_prefix)
1046 : T()
1047 {
1048 alias_prefix = new_prefix;
1049 }
1050
1051 std::string const &get_prefix() const override {return alias_prefix;}
1052
1053 int handle(
1054 Monitor *mon,
1055 FSMap& fsmap,
1056 MonOpRequestRef op,
1057 const cmdmap_t& cmdmap,
1058 std::stringstream &ss) override
1059 {
1060 return T::handle(mon, fsmap, op, cmdmap, ss);
1061 }
1062 };
1063
1064 class MirrorHandlerEnable : public FileSystemCommandHandler
1065 {
1066 public:
1067 MirrorHandlerEnable()
1068 : FileSystemCommandHandler("fs mirror enable")
1069 {}
1070
1071 int handle(Monitor *mon,
1072 FSMap &fsmap, MonOpRequestRef op,
1073 const cmdmap_t& cmdmap, std::stringstream &ss) override {
1074 std::string fs_name;
1075 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
1076 ss << "Missing filesystem name";
1077 return -EINVAL;
1078 }
1079
1080 auto fs = fsmap.get_filesystem(fs_name);
1081 if (fs == nullptr) {
1082 ss << "Filesystem '" << fs_name << "' not found";
1083 return -ENOENT;
1084 }
1085
1086 if (fs->mirror_info.is_mirrored()) {
1087 return 0;
1088 }
1089
1090 auto f = [](auto &&fs) {
1091 fs->mirror_info.enable_mirroring();
1092 };
1093 fsmap.modify_filesystem(fs->fscid, std::move(f));
1094
1095 return 0;
1096 }
1097 };
1098
1099 class MirrorHandlerDisable : public FileSystemCommandHandler
1100 {
1101 public:
1102 MirrorHandlerDisable()
1103 : FileSystemCommandHandler("fs mirror disable")
1104 {}
1105
1106 int handle(Monitor *mon,
1107 FSMap &fsmap, MonOpRequestRef op,
1108 const cmdmap_t& cmdmap, std::stringstream &ss) override {
1109 std::string fs_name;
1110 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
1111 ss << "Missing filesystem name";
1112 return -EINVAL;
1113 }
1114
1115 auto fs = fsmap.get_filesystem(fs_name);
1116 if (fs == nullptr) {
1117 ss << "Filesystem '" << fs_name << "' not found";
1118 return -ENOENT;
1119 }
1120
1121 if (!fs->mirror_info.is_mirrored()) {
1122 return 0;
1123 }
1124
1125 auto f = [](auto &&fs) {
1126 fs->mirror_info.disable_mirroring();
1127 };
1128 fsmap.modify_filesystem(fs->fscid, std::move(f));
1129
1130 return 0;
1131 }
1132 };
1133
1134 class MirrorHandlerAddPeer : public FileSystemCommandHandler
1135 {
1136 public:
1137 MirrorHandlerAddPeer()
1138 : FileSystemCommandHandler("fs mirror peer_add")
1139 {}
1140
1141 boost::optional<std::pair<string, string>>
1142 extract_remote_cluster_conf(const std::string &spec) {
1143 auto pos = spec.find("@");
1144 if (pos == std::string_view::npos) {
1145 return boost::optional<std::pair<string, string>>();
1146 }
1147
1148 auto client = spec.substr(0, pos);
1149 auto cluster = spec.substr(pos+1);
1150
1151 return std::make_pair(client, cluster);
1152 }
1153
1154 bool peer_add(FSMap &fsmap, Filesystem::const_ref &&fs,
1155 const cmdmap_t &cmdmap, std::stringstream &ss) {
1156 string peer_uuid;
1157 string remote_spec;
1158 string remote_fs_name;
1159 cmd_getval(cmdmap, "uuid", peer_uuid);
1160 cmd_getval(cmdmap, "remote_cluster_spec", remote_spec);
1161 cmd_getval(cmdmap, "remote_fs_name", remote_fs_name);
1162
1163 // verify (and extract) remote cluster specification
1164 auto remote_conf = extract_remote_cluster_conf(remote_spec);
1165 if (!remote_conf) {
1166 ss << "invalid remote cluster spec -- should be <client>@<cluster>";
1167 return false;
1168 }
1169
1170 if (fs->mirror_info.has_peer(peer_uuid)) {
1171 ss << "peer already exists";
1172 return true;
1173 }
1174 if (fs->mirror_info.has_peer((*remote_conf).first, (*remote_conf).second,
1175 remote_fs_name)) {
1176 ss << "peer already exists";
1177 return true;
1178 }
1179
1180 auto f = [peer_uuid, remote_conf, remote_fs_name](auto &&fs) {
1181 fs->mirror_info.peer_add(peer_uuid, (*remote_conf).first,
1182 (*remote_conf).second, remote_fs_name);
1183 };
1184 fsmap.modify_filesystem(fs->fscid, std::move(f));
1185 return true;
1186 }
1187
1188 int handle(Monitor *mon,
1189 FSMap &fsmap, MonOpRequestRef op,
1190 const cmdmap_t& cmdmap, std::stringstream &ss) override {
1191 std::string fs_name;
1192 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
1193 ss << "Missing filesystem name";
1194 return -EINVAL;
1195 }
1196
1197 auto fs = fsmap.get_filesystem(fs_name);
1198 if (fs == nullptr) {
1199 ss << "Filesystem '" << fs_name << "' not found";
1200 return -ENOENT;
1201 }
1202
1203 if (!fs->mirror_info.is_mirrored()) {
1204 ss << "Mirroring not enabled for filesystem '" << fs_name << "'";
1205 return -EINVAL;
1206 }
1207
1208 auto res = peer_add(fsmap, std::move(fs), cmdmap, ss);
1209 if (!res) {
1210 return -EINVAL;
1211 }
1212
1213 return 0;
1214 }
1215 };
1216
1217 class MirrorHandlerRemovePeer : public FileSystemCommandHandler
1218 {
1219 public:
1220 MirrorHandlerRemovePeer()
1221 : FileSystemCommandHandler("fs mirror peer_remove")
1222 {}
1223
1224 bool peer_remove(FSMap &fsmap, Filesystem::const_ref &&fs,
1225 const cmdmap_t &cmdmap, std::stringstream &ss) {
1226 string peer_uuid;
1227 cmd_getval(cmdmap, "uuid", peer_uuid);
1228
1229 if (!fs->mirror_info.has_peer(peer_uuid)) {
1230 ss << "cannot find peer with uuid: " << peer_uuid;
1231 return true;
1232 }
1233
1234 auto f = [peer_uuid](auto &&fs) {
1235 fs->mirror_info.peer_remove(peer_uuid);
1236 };
1237 fsmap.modify_filesystem(fs->fscid, std::move(f));
1238 return true;
1239 }
1240
1241 int handle(Monitor *mon,
1242 FSMap &fsmap, MonOpRequestRef op,
1243 const cmdmap_t& cmdmap, std::stringstream &ss) override {
1244 std::string fs_name;
1245 if (!cmd_getval(cmdmap, "fs_name", fs_name) || fs_name.empty()) {
1246 ss << "Missing filesystem name";
1247 return -EINVAL;
1248 }
1249
1250 auto fs = fsmap.get_filesystem(fs_name);
1251 if (fs == nullptr) {
1252 ss << "Filesystem '" << fs_name << "' not found";
1253 return -ENOENT;
1254 }
1255
1256 if (!fs->mirror_info.is_mirrored()) {
1257 ss << "Mirroring not enabled for filesystem '" << fs_name << "'";
1258 return -EINVAL;
1259 }
1260
1261 auto res = peer_remove(fsmap, std::move(fs), cmdmap, ss);
1262 if (!res) {
1263 return -EINVAL;
1264 }
1265
1266 return 0;
1267 }
1268 };
1269
1270 std::list<std::shared_ptr<FileSystemCommandHandler> >
1271 FileSystemCommandHandler::load(Paxos *paxos)
1272 {
1273 std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
1274
1275 handlers.push_back(std::make_shared<SetHandler>());
1276 handlers.push_back(std::make_shared<FailHandler>());
1277 handlers.push_back(std::make_shared<FlagSetHandler>());
1278 handlers.push_back(std::make_shared<RequiredClientFeaturesHandler>());
1279 handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
1280 handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
1281 handlers.push_back(std::make_shared<FsNewHandler>(paxos));
1282 handlers.push_back(std::make_shared<RemoveFilesystemHandler>());
1283 handlers.push_back(std::make_shared<ResetFilesystemHandler>());
1284
1285 handlers.push_back(std::make_shared<SetDefaultHandler>());
1286 handlers.push_back(std::make_shared<AliasHandler<SetDefaultHandler> >(
1287 "fs set_default"));
1288 handlers.push_back(std::make_shared<MirrorHandlerEnable>());
1289 handlers.push_back(std::make_shared<MirrorHandlerDisable>());
1290 handlers.push_back(std::make_shared<MirrorHandlerAddPeer>());
1291 handlers.push_back(std::make_shared<MirrorHandlerRemovePeer>());
1292
1293 return handlers;
1294 }
1295
1296 int FileSystemCommandHandler::_check_pool(
1297 OSDMap &osd_map,
1298 const int64_t pool_id,
1299 int type,
1300 bool force,
1301 std::stringstream *ss) const
1302 {
1303 ceph_assert(ss != NULL);
1304
1305 const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
1306 if (!pool) {
1307 *ss << "pool id '" << pool_id << "' does not exist";
1308 return -ENOENT;
1309 }
1310
1311 const string& pool_name = osd_map.get_pool_name(pool_id);
1312
1313 if (pool->is_erasure()) {
1314 if (type == POOL_METADATA) {
1315 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1316 << " is an erasure-coded pool. Use of erasure-coded pools"
1317 << " for CephFS metadata is not permitted";
1318 return -EINVAL;
1319 } else if (type == POOL_DATA_DEFAULT && !force) {
1320 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1321 " is an erasure-coded pool."
1322 " Use of an EC pool for the default data pool is discouraged;"
1323 " see the online CephFS documentation for more information."
1324 " Use --force to override.";
1325 return -EINVAL;
1326 } else if (!pool->allows_ecoverwrites()) {
1327 // non-overwriteable EC pools are only acceptable with a cache tier overlay
1328 if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
1329 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1330 << " is an erasure-coded pool, with no overwrite support";
1331 return -EINVAL;
1332 }
1333
1334 // That cache tier overlay must be writeback, not readonly (it's the
1335 // write operations like modify+truncate we care about support for)
1336 const pg_pool_t *write_tier = osd_map.get_pg_pool(
1337 pool->write_tier);
1338 ceph_assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
1339 if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
1340 || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
1341 *ss << "EC pool '" << pool_name << "' has a write tier ("
1342 << osd_map.get_pool_name(pool->write_tier)
1343 << ") that is configured "
1344 "to forward writes. Use a cache mode such as 'writeback' for "
1345 "CephFS";
1346 return -EINVAL;
1347 }
1348 }
1349 }
1350
1351 if (pool->is_tier()) {
1352 *ss << " pool '" << pool_name << "' (id '" << pool_id
1353 << "') is already in use as a cache tier.";
1354 return -EINVAL;
1355 }
1356
1357 if (!force && !pool->application_metadata.empty() &&
1358 pool->application_metadata.count(
1359 pg_pool_t::APPLICATION_NAME_CEPHFS) == 0) {
1360 *ss << " pool '" << pool_name << "' (id '" << pool_id
1361 << "') has a non-CephFS application enabled.";
1362 return -EINVAL;
1363 }
1364
1365 // Nothing special about this pool, so it is permissible
1366 return 0;
1367 }
1368
1369 int FileSystemCommandHandler::is_op_allowed(
1370 const MonOpRequestRef& op, const FSMap& fsmap, const cmdmap_t& cmdmap,
1371 std::stringstream &ss) const
1372 {
1373 string fs_name;
1374 cmd_getval(cmdmap, "fs_name", fs_name);
1375
1376 // so that fsmap can filtered and the original copy is untouched.
1377 FSMap fsmap_copy = fsmap;
1378 fsmap_copy.filter(op->get_session()->get_allowed_fs_names());
1379
1380 auto fs = fsmap_copy.get_filesystem(fs_name);
1381 if (fs == nullptr) {
1382 /* let "fs rm" handle idempotent case where file system does not exist */
1383 if (!(get_prefix() == "fs rm" && fsmap.get_filesystem(fs_name) == nullptr)) {
1384 ss << "Filesystem not found: '" << fs_name << "'";
1385 return -ENOENT;
1386 }
1387 }
1388
1389 if (!op->get_session()->fs_name_capable(fs_name, MON_CAP_W)) {
1390 ss << "Permission denied: '" << fs_name << "'";
1391 return -EPERM;
1392 }
1393
1394 return 1;
1395 }