]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/FSCommands.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / mon / FSCommands.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 Red Hat Ltd
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#include "OSDMonitor.h"
7c673cae
FG
17
18#include "FSCommands.h"
19#include "MDSMonitor.h"
11fdf7f2 20#include "MgrStatMonitor.h"
7c673cae
FG
21
22
23static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental."
24"It may cause problems up to and including data loss."
25"Consult the documentation at ceph.com, and if unsure, do not proceed."
26"Add --yes-i-really-mean-it if you are certain.");
27
28
29
30class FlagSetHandler : public FileSystemCommandHandler
31{
32 public:
33 FlagSetHandler()
34 : FileSystemCommandHandler("fs flag set")
35 {
36 }
37
38 int handle(
39 Monitor *mon,
40 FSMap &fsmap,
41 MonOpRequestRef op,
11fdf7f2 42 const cmdmap_t& cmdmap,
7c673cae
FG
43 std::stringstream &ss) override
44 {
45 string flag_name;
11fdf7f2 46 cmd_getval(g_ceph_context, cmdmap, "flag_name", flag_name);
7c673cae
FG
47
48 string flag_val;
11fdf7f2 49 cmd_getval(g_ceph_context, cmdmap, "val", flag_val);
7c673cae 50
11fdf7f2
TL
51 bool sure = false;
52 cmd_getval(g_ceph_context, cmdmap, "yes_i_really_mean_it", sure);
7c673cae
FG
53
54 if (flag_name == "enable_multiple") {
55 bool flag_bool = false;
56 int r = parse_bool(flag_val, &flag_bool, ss);
57 if (r != 0) {
58 ss << "Invalid boolean value '" << flag_val << "'";
59 return r;
60 }
61
62 bool jewel = mon->get_quorum_con_features() & CEPH_FEATURE_SERVER_JEWEL;
63 if (flag_bool && !jewel) {
64 ss << "Multiple-filesystems are forbidden until all mons are updated";
65 return -EINVAL;
66 }
11fdf7f2 67 if (!sure) {
7c673cae
FG
68 ss << EXPERIMENTAL_WARNING;
69 }
70 fsmap.set_enable_multiple(flag_bool);
71 return 0;
72 } else {
73 ss << "Unknown flag '" << flag_name << "'";
74 return -EINVAL;
75 }
76 }
77};
78
11fdf7f2
TL
79class FailHandler : public FileSystemCommandHandler
80{
81 public:
82 FailHandler()
83 : FileSystemCommandHandler("fs fail")
84 {
85 }
86
87 int handle(
88 Monitor* mon,
89 FSMap& fsmap,
90 MonOpRequestRef op,
91 const cmdmap_t& cmdmap,
92 std::stringstream& ss) override
93 {
94 if (!mon->osdmon()->is_writeable()) {
95 // not allowed to write yet, so retry when we can
96 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
97 return -EAGAIN;
98 }
99
100 std::string fs_name;
101 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
102 ss << "Missing filesystem name";
103 return -EINVAL;
104 }
105
106 auto fs = fsmap.get_filesystem(fs_name);
107 if (fs == nullptr) {
108 ss << "Not found: '" << fs_name << "'";
109 return -ENOENT;
110 }
111
112 auto f = [](auto fs) {
113 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
114 };
115 fsmap.modify_filesystem(fs->fscid, std::move(f));
116
117 std::vector<mds_gid_t> to_fail;
118 for (const auto& p : fs->mds_map.get_mds_info()) {
119 to_fail.push_back(p.first);
120 }
121
122 for (const auto& gid : to_fail) {
123 mon->mdsmon()->fail_mds_gid(fsmap, gid);
124 }
125 if (!to_fail.empty()) {
126 mon->osdmon()->propose_pending();
127 }
128
129 ss << fs_name;
130 ss << " marked not joinable; MDS cannot join the cluster. All MDS ranks marked failed.";
131
132 return 0;
133 }
134};
135
7c673cae
FG
136class FsNewHandler : public FileSystemCommandHandler
137{
138 public:
11fdf7f2 139 explicit FsNewHandler(Paxos *paxos)
c07f9fc5 140 : FileSystemCommandHandler("fs new"), m_paxos(paxos)
7c673cae
FG
141 {
142 }
143
c07f9fc5
FG
144 bool batched_propose() override {
145 return true;
146 }
147
7c673cae
FG
148 int handle(
149 Monitor *mon,
150 FSMap &fsmap,
151 MonOpRequestRef op,
11fdf7f2 152 const cmdmap_t& cmdmap,
31f18b77 153 std::stringstream &ss) override
7c673cae 154 {
11fdf7f2 155 ceph_assert(m_paxos->is_plugged());
c07f9fc5 156
7c673cae 157 string metadata_name;
11fdf7f2 158 cmd_getval(g_ceph_context, cmdmap, "metadata", metadata_name);
7c673cae
FG
159 int64_t metadata = mon->osdmon()->osdmap.lookup_pg_pool_name(metadata_name);
160 if (metadata < 0) {
161 ss << "pool '" << metadata_name << "' does not exist";
162 return -ENOENT;
163 }
164
7c673cae 165 string data_name;
11fdf7f2 166 cmd_getval(g_ceph_context, cmdmap, "data", data_name);
7c673cae
FG
167 int64_t data = mon->osdmon()->osdmap.lookup_pg_pool_name(data_name);
168 if (data < 0) {
169 ss << "pool '" << data_name << "' does not exist";
170 return -ENOENT;
171 }
172 if (data == 0) {
173 ss << "pool '" << data_name << "' has id 0, which CephFS does not allow. Use another pool or recreate it to get a non-zero pool id.";
174 return -EINVAL;
175 }
c07f9fc5 176
7c673cae 177 string fs_name;
11fdf7f2 178 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
7c673cae
FG
179 if (fs_name.empty()) {
180 // Ensure fs name is not empty so that we can implement
181 // commmands that refer to FS by name in future.
182 ss << "Filesystem name may not be empty";
183 return -EINVAL;
184 }
185
186 if (fsmap.get_filesystem(fs_name)) {
187 auto fs = fsmap.get_filesystem(fs_name);
188 if (*(fs->mds_map.get_data_pools().begin()) == data
189 && fs->mds_map.get_metadata_pool() == metadata) {
190 // Identical FS created already, this is a no-op
191 ss << "filesystem '" << fs_name << "' already exists";
192 return 0;
193 } else {
194 ss << "filesystem already exists with name '" << fs_name << "'";
195 return -EINVAL;
196 }
197 }
198
11fdf7f2
TL
199 bool force = false;
200 cmd_getval(g_ceph_context,cmdmap, "force", force);
201
202 const pool_stat_t *stat = mon->mgrstatmon()->get_pool_stat(metadata);
203 if (stat) {
204 int64_t metadata_num_objects = stat->stats.sum.num_objects;
205 if (!force && metadata_num_objects > 0) {
206 ss << "pool '" << metadata_name
207 << "' already contains some objects. Use an empty pool instead.";
208 return -EINVAL;
209 }
210 }
211
7c673cae
FG
212 if (fsmap.filesystem_count() > 0
213 && !fsmap.get_enable_multiple()) {
214 ss << "Creation of multiple filesystems is disabled. To enable "
215 "this experimental feature, use 'ceph fs flag set enable_multiple "
216 "true'";
217 return -EINVAL;
218 }
219
11fdf7f2 220 for (auto& fs : fsmap.get_filesystems()) {
31f18b77 221 const std::vector<int64_t> &data_pools = fs->mds_map.get_data_pools();
11fdf7f2
TL
222
223 bool sure = false;
224 cmd_getval(g_ceph_context, cmdmap,
225 "allow_dangerous_metadata_overlay", sure);
226
31f18b77 227 if ((std::find(data_pools.begin(), data_pools.end(), data) != data_pools.end()
7c673cae 228 || fs->mds_map.get_metadata_pool() == metadata)
11fdf7f2 229 && !sure) {
7c673cae
FG
230 ss << "Filesystem '" << fs_name
231 << "' is already using one of the specified RADOS pools. This should ONLY be done in emergencies and after careful reading of the documentation. Pass --allow-dangerous-metadata-overlay to permit this.";
232 return -EEXIST;
233 }
234 }
235
236 pg_pool_t const *data_pool = mon->osdmon()->osdmap.get_pg_pool(data);
11fdf7f2 237 ceph_assert(data_pool != NULL); // Checked it existed above
7c673cae 238 pg_pool_t const *metadata_pool = mon->osdmon()->osdmap.get_pg_pool(metadata);
11fdf7f2 239 ceph_assert(metadata_pool != NULL); // Checked it existed above
7c673cae 240
92f5a8d4 241 int r = _check_pool(mon->osdmon()->osdmap, data, POOL_DATA_DEFAULT, force, &ss);
7c673cae
FG
242 if (r < 0) {
243 return r;
244 }
245
92f5a8d4 246 r = _check_pool(mon->osdmon()->osdmap, metadata, POOL_METADATA, force, &ss);
7c673cae
FG
247 if (r < 0) {
248 return r;
249 }
35e4c445 250
11fdf7f2
TL
251 if (!mon->osdmon()->is_writeable()) {
252 // not allowed to write yet, so retry when we can
253 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
254 return -EAGAIN;
35e4c445 255 }
11fdf7f2
TL
256 mon->osdmon()->do_application_enable(data,
257 pg_pool_t::APPLICATION_NAME_CEPHFS,
258 "data", fs_name);
259 mon->osdmon()->do_application_enable(metadata,
260 pg_pool_t::APPLICATION_NAME_CEPHFS,
261 "metadata", fs_name);
494da23a
TL
262 mon->osdmon()->do_set_pool_opt(metadata,
263 pool_opts_t::RECOVERY_PRIORITY,
264 static_cast<int64_t>(5));
265 mon->osdmon()->do_set_pool_opt(metadata,
266 pool_opts_t::PG_NUM_MIN,
267 static_cast<int64_t>(16));
268 mon->osdmon()->do_set_pool_opt(metadata,
269 pool_opts_t::PG_AUTOSCALE_BIAS,
270 static_cast<double>(4.0));
11fdf7f2 271 mon->osdmon()->propose_pending();
c07f9fc5 272
7c673cae 273 // All checks passed, go ahead and create.
11fdf7f2
TL
274 auto&& fs = fsmap.create_filesystem(fs_name, metadata, data,
275 mon->get_quorum_con_features());
276
7c673cae 277 ss << "new fs with metadata pool " << metadata << " and data pool " << data;
11fdf7f2
TL
278
279 // assign a standby to rank 0 to avoid health warnings
280 std::string _name;
281 mds_gid_t gid = fsmap.find_replacement_for({fs->fscid, 0}, _name);
282
283 if (gid != MDS_GID_NONE) {
284 const auto &info = fsmap.get_info_gid(gid);
285 mon->clog->info() << info.human_name() << " assigned to filesystem "
286 << fs_name << " as rank 0";
287 fsmap.promote(gid, *fs, 0);
288 }
289
7c673cae
FG
290 return 0;
291 }
c07f9fc5
FG
292
293private:
294 Paxos *m_paxos;
7c673cae
FG
295};
296
297class SetHandler : public FileSystemCommandHandler
298{
299public:
300 SetHandler()
301 : FileSystemCommandHandler("fs set")
302 {}
303
304 int handle(
305 Monitor *mon,
306 FSMap &fsmap,
307 MonOpRequestRef op,
11fdf7f2 308 const cmdmap_t& cmdmap,
7c673cae
FG
309 std::stringstream &ss) override
310 {
311 std::string fs_name;
11fdf7f2 312 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name) || fs_name.empty()) {
7c673cae
FG
313 ss << "Missing filesystem name";
314 return -EINVAL;
315 }
316
317 auto fs = fsmap.get_filesystem(fs_name);
318 if (fs == nullptr) {
319 ss << "Not found: '" << fs_name << "'";
320 return -ENOENT;
321 }
322
323 string var;
11fdf7f2 324 if (!cmd_getval(g_ceph_context, cmdmap, "var", var) || var.empty()) {
7c673cae
FG
325 ss << "Invalid variable";
326 return -EINVAL;
327 }
328 string val;
329 string interr;
330 int64_t n = 0;
11fdf7f2 331 if (!cmd_getval(g_ceph_context, cmdmap, "val", val)) {
7c673cae
FG
332 return -EINVAL;
333 }
334 // we got a string. see if it contains an int.
335 n = strict_strtoll(val.c_str(), 10, &interr);
336 if (var == "max_mds") {
337 // NOTE: see also "mds set_max_mds", which can modify the same field.
338 if (interr.length()) {
339 ss << interr;
340 return -EINVAL;
341 }
342
343 if (n <= 0) {
344 ss << "You must specify at least one MDS";
345 return -EINVAL;
346 }
11fdf7f2
TL
347 if (n > 1 && n > fs->mds_map.get_max_mds()) {
348 if (fs->mds_map.was_snaps_ever_allowed() &&
349 !fs->mds_map.allows_multimds_snaps()) {
350 ss << "multi-active MDS is not allowed while there are snapshots possibly created by pre-mimic MDS";
351 return -EINVAL;
352 }
7c673cae
FG
353 }
354 if (n > MAX_MDS) {
355 ss << "may not have more than " << MAX_MDS << " MDS ranks";
356 return -EINVAL;
357 }
11fdf7f2 358
7c673cae
FG
359 fsmap.modify_filesystem(
360 fs->fscid,
361 [n](std::shared_ptr<Filesystem> fs)
362 {
11fdf7f2 363 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
7c673cae
FG
364 fs->mds_map.set_max_mds(n);
365 });
366 } else if (var == "inline_data") {
367 bool enable_inline = false;
368 int r = parse_bool(val, &enable_inline, ss);
369 if (r != 0) {
370 return r;
371 }
372
373 if (enable_inline) {
11fdf7f2
TL
374 bool confirm = false;
375 cmd_getval(g_ceph_context, cmdmap, "yes_i_really_mean_it", confirm);
376 if (!confirm) {
7c673cae
FG
377 ss << EXPERIMENTAL_WARNING;
378 return -EPERM;
379 }
380 ss << "inline data enabled";
381
382 fsmap.modify_filesystem(
383 fs->fscid,
384 [](std::shared_ptr<Filesystem> fs)
385 {
386 fs->mds_map.set_inline_data_enabled(true);
387 });
388
389 // Update `compat`
390 CompatSet c = fsmap.get_compat();
391 c.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
392 fsmap.update_compat(c);
393 } else {
394 ss << "inline data disabled";
395 fsmap.modify_filesystem(
396 fs->fscid,
397 [](std::shared_ptr<Filesystem> fs)
398 {
399 fs->mds_map.set_inline_data_enabled(false);
400 });
401 }
402 } else if (var == "balancer") {
31f18b77
FG
403 if (val.empty()) {
404 ss << "unsetting the metadata load balancer";
405 } else {
406 ss << "setting the metadata load balancer to " << val;
407 }
408 fsmap.modify_filesystem(
409 fs->fscid,
410 [val](std::shared_ptr<Filesystem> fs)
7c673cae
FG
411 {
412 fs->mds_map.set_balancer(val);
413 });
414 return true;
415 } else if (var == "max_file_size") {
416 if (interr.length()) {
417 ss << var << " requires an integer value";
418 return -EINVAL;
419 }
420 if (n < CEPH_MIN_STRIPE_UNIT) {
421 ss << var << " must at least " << CEPH_MIN_STRIPE_UNIT;
422 return -ERANGE;
423 }
424 fsmap.modify_filesystem(
425 fs->fscid,
426 [n](std::shared_ptr<Filesystem> fs)
427 {
428 fs->mds_map.set_max_filesize(n);
429 });
430 } else if (var == "allow_new_snaps") {
431 bool enable_snaps = false;
432 int r = parse_bool(val, &enable_snaps, ss);
433 if (r != 0) {
434 return r;
435 }
436
437 if (!enable_snaps) {
438 fsmap.modify_filesystem(
439 fs->fscid,
440 [](std::shared_ptr<Filesystem> fs)
441 {
442 fs->mds_map.clear_snaps_allowed();
443 });
444 ss << "disabled new snapshots";
445 } else {
7c673cae
FG
446 fsmap.modify_filesystem(
447 fs->fscid,
448 [](std::shared_ptr<Filesystem> fs)
449 {
450 fs->mds_map.set_snaps_allowed();
451 });
452 ss << "enabled new snapshots";
453 }
454 } else if (var == "allow_multimds") {
11fdf7f2
TL
455 ss << "Multiple MDS is always enabled. Use the max_mds"
456 << " parameter to control the number of active MDSs"
457 << " allowed. This command is DEPRECATED and will be"
458 << " REMOVED from future releases.";
459 } else if (var == "allow_multimds_snaps") {
460 bool enable = false;
461 int r = parse_bool(val, &enable, ss);
7c673cae 462 if (r != 0) {
11fdf7f2 463 return r;
7c673cae
FG
464 }
465
11fdf7f2
TL
466 string confirm;
467 if (!cmd_getval(g_ceph_context, cmdmap, "confirm", confirm) ||
468 confirm != "--yes-i-am-really-a-mds") {
469 ss << "Warning! This command is for MDS only. Do not run it manually";
470 return -EPERM;
471 }
472
473 if (enable) {
474 ss << "enabled multimds with snapshot";
7c673cae
FG
475 fsmap.modify_filesystem(
476 fs->fscid,
477 [](std::shared_ptr<Filesystem> fs)
478 {
11fdf7f2 479 fs->mds_map.set_multimds_snaps_allowed();
7c673cae 480 });
7c673cae 481 } else {
11fdf7f2 482 ss << "disabled multimds with snapshot";
7c673cae
FG
483 fsmap.modify_filesystem(
484 fs->fscid,
485 [](std::shared_ptr<Filesystem> fs)
486 {
11fdf7f2 487 fs->mds_map.clear_multimds_snaps_allowed();
7c673cae 488 });
7c673cae 489 }
11fdf7f2
TL
490 } else if (var == "allow_dirfrags") {
491 ss << "Directory fragmentation is now permanently enabled."
492 << " This command is DEPRECATED and will be REMOVED from future releases.";
493 } else if (var == "down") {
7c673cae
FG
494 bool is_down = false;
495 int r = parse_bool(val, &is_down, ss);
496 if (r != 0) {
497 return r;
498 }
499
11fdf7f2
TL
500 ss << fs->mds_map.get_fs_name();
501
7c673cae
FG
502 fsmap.modify_filesystem(
503 fs->fscid,
504 [is_down](std::shared_ptr<Filesystem> fs)
505 {
11fdf7f2
TL
506 if (is_down) {
507 if (fs->mds_map.get_max_mds() > 0) {
508 fs->mds_map.set_old_max_mds();
509 fs->mds_map.set_max_mds(0);
510 } /* else already down! */
511 } else {
512 mds_rank_t oldmax = fs->mds_map.get_old_max_mds();
513 fs->mds_map.set_max_mds(oldmax ? oldmax : 1);
514 }
515 });
516
517 if (is_down) {
518 ss << " marked down. ";
519 } else {
520 ss << " marked up, max_mds = " << fs->mds_map.get_max_mds();
521 }
522 } else if (var == "cluster_down" || var == "joinable") {
523 bool joinable = true;
524 int r = parse_bool(val, &joinable, ss);
525 if (r != 0) {
526 return r;
527 }
528 if (var == "cluster_down") {
529 joinable = !joinable;
530 }
531
532 ss << fs->mds_map.get_fs_name();
533
534 fsmap.modify_filesystem(
535 fs->fscid,
536 [joinable](std::shared_ptr<Filesystem> fs)
537 {
538 if (joinable) {
539 fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
540 } else {
541 fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
542 }
7c673cae
FG
543 });
544
11fdf7f2
TL
545 if (joinable) {
546 ss << " marked joinable; MDS may join as newly active.";
547 } else {
548 ss << " marked not joinable; MDS cannot join as newly active.";
549 }
550
551 if (var == "cluster_down") {
552 ss << " WARNING: cluster_down flag is deprecated and will be"
553 << " removed in a future version. Please use \"joinable\".";
554 }
7c673cae
FG
555 } else if (var == "standby_count_wanted") {
556 if (interr.length()) {
557 ss << var << " requires an integer value";
558 return -EINVAL;
559 }
560 if (n < 0) {
561 ss << var << " must be non-negative";
562 return -ERANGE;
563 }
564 fsmap.modify_filesystem(
565 fs->fscid,
566 [n](std::shared_ptr<Filesystem> fs)
567 {
568 fs->mds_map.set_standby_count_wanted(n);
569 });
f64942e4
AA
570 } else if (var == "session_timeout") {
571 if (interr.length()) {
572 ss << var << " requires an integer value";
573 return -EINVAL;
574 }
575 if (n < 30) {
576 ss << var << " must be at least 30s";
577 return -ERANGE;
578 }
579 fsmap.modify_filesystem(
580 fs->fscid,
581 [n](std::shared_ptr<Filesystem> fs)
582 {
583 fs->mds_map.set_session_timeout((uint32_t)n);
584 });
585 } else if (var == "session_autoclose") {
586 if (interr.length()) {
587 ss << var << " requires an integer value";
588 return -EINVAL;
589 }
590 if (n < 30) {
591 ss << var << " must be at least 30s";
592 return -ERANGE;
593 }
594 fsmap.modify_filesystem(
595 fs->fscid,
596 [n](std::shared_ptr<Filesystem> fs)
597 {
598 fs->mds_map.set_session_autoclose((uint32_t)n);
599 });
11fdf7f2
TL
600 } else if (var == "allow_standby_replay") {
601 bool allow = false;
602 int r = parse_bool(val, &allow, ss);
603 if (r != 0) {
604 return r;
605 }
606
607 auto f = [allow](auto& fs) {
608 if (allow) {
609 fs->mds_map.set_standby_replay_allowed();
610 } else {
611 fs->mds_map.clear_standby_replay_allowed();
612 }
613 };
614 fsmap.modify_filesystem(fs->fscid, std::move(f));
615 } else if (var == "min_compat_client") {
616 int vno = ceph_release_from_name(val.c_str());
617 if (vno <= 0) {
618 ss << "version " << val << " is not recognized";
619 return -EINVAL;
620 }
621 fsmap.modify_filesystem(
622 fs->fscid,
623 [vno](std::shared_ptr<Filesystem> fs)
624 {
625 fs->mds_map.set_min_compat_client((uint8_t)vno);
626 });
7c673cae
FG
627 } else {
628 ss << "unknown variable " << var;
629 return -EINVAL;
630 }
631
632 return 0;
633 }
634};
635
636class AddDataPoolHandler : public FileSystemCommandHandler
637{
638 public:
11fdf7f2 639 explicit AddDataPoolHandler(Paxos *paxos)
c07f9fc5 640 : FileSystemCommandHandler("fs add_data_pool"), m_paxos(paxos)
7c673cae
FG
641 {}
642
c07f9fc5
FG
643 bool batched_propose() override {
644 return true;
645 }
646
7c673cae
FG
647 int handle(
648 Monitor *mon,
649 FSMap &fsmap,
650 MonOpRequestRef op,
11fdf7f2 651 const cmdmap_t& cmdmap,
7c673cae
FG
652 std::stringstream &ss) override
653 {
11fdf7f2 654 ceph_assert(m_paxos->is_plugged());
c07f9fc5 655
7c673cae 656 string poolname;
11fdf7f2 657 cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
7c673cae
FG
658
659 std::string fs_name;
11fdf7f2 660 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
7c673cae
FG
661 || fs_name.empty()) {
662 ss << "Missing filesystem name";
663 return -EINVAL;
664 }
665
666 auto fs = fsmap.get_filesystem(fs_name);
667 if (fs == nullptr) {
668 ss << "Not found: '" << fs_name << "'";
669 return -ENOENT;
670 }
671
672 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
673 if (poolid < 0) {
674 string err;
675 poolid = strict_strtol(poolname.c_str(), 10, &err);
676 if (err.length()) {
677 ss << "pool '" << poolname << "' does not exist";
678 return -ENOENT;
679 }
680 }
681
92f5a8d4 682 int r = _check_pool(mon->osdmon()->osdmap, poolid, POOL_DATA_EXTRA, false, &ss);
7c673cae
FG
683 if (r != 0) {
684 return r;
685 }
686
31f18b77
FG
687 // no-op when the data_pool already on fs
688 if (fs->mds_map.is_data_pool(poolid)) {
689 ss << "data pool " << poolid << " is already on fs " << fs_name;
690 return 0;
691 }
692
11fdf7f2
TL
693 if (!mon->osdmon()->is_writeable()) {
694 // not allowed to write yet, so retry when we can
695 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
696 return -EAGAIN;
b5b8bbf5 697 }
11fdf7f2
TL
698 mon->osdmon()->do_application_enable(poolid,
699 pg_pool_t::APPLICATION_NAME_CEPHFS,
700 "data", fs_name);
701 mon->osdmon()->propose_pending();
c07f9fc5 702
7c673cae
FG
703 fsmap.modify_filesystem(
704 fs->fscid,
705 [poolid](std::shared_ptr<Filesystem> fs)
706 {
707 fs->mds_map.add_data_pool(poolid);
708 });
709
710 ss << "added data pool " << poolid << " to fsmap";
711
712 return 0;
713 }
c07f9fc5
FG
714
715private:
716 Paxos *m_paxos;
7c673cae
FG
717};
718
719class SetDefaultHandler : public FileSystemCommandHandler
720{
721 public:
722 SetDefaultHandler()
723 : FileSystemCommandHandler("fs set-default")
724 {}
725
726 int handle(
727 Monitor *mon,
728 FSMap &fsmap,
729 MonOpRequestRef op,
11fdf7f2 730 const cmdmap_t& cmdmap,
7c673cae
FG
731 std::stringstream &ss) override
732 {
733 std::string fs_name;
11fdf7f2 734 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
7c673cae
FG
735 auto fs = fsmap.get_filesystem(fs_name);
736 if (fs == nullptr) {
737 ss << "filesystem '" << fs_name << "' does not exist";
738 return -ENOENT;
739 }
740
741 fsmap.set_legacy_client_fscid(fs->fscid);
742 return 0;
743 }
744};
745
746class RemoveFilesystemHandler : public FileSystemCommandHandler
747{
748 public:
749 RemoveFilesystemHandler()
750 : FileSystemCommandHandler("fs rm")
751 {}
752
753 int handle(
754 Monitor *mon,
755 FSMap &fsmap,
756 MonOpRequestRef op,
11fdf7f2 757 const cmdmap_t& cmdmap,
7c673cae
FG
758 std::stringstream &ss) override
759 {
a8e16298
TL
760 /* We may need to blacklist ranks. */
761 if (!mon->osdmon()->is_writeable()) {
762 // not allowed to write yet, so retry when we can
763 mon->osdmon()->wait_for_writeable(op, new PaxosService::C_RetryMessage(mon->mdsmon(), op));
764 return -EAGAIN;
765 }
766
7c673cae
FG
767 // Check caller has correctly named the FS to delete
768 // (redundant while there is only one FS, but command
769 // syntax should apply to multi-FS future)
770 string fs_name;
11fdf7f2 771 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
7c673cae
FG
772 auto fs = fsmap.get_filesystem(fs_name);
773 if (fs == nullptr) {
774 // Consider absence success to make deletes idempotent
775 ss << "filesystem '" << fs_name << "' does not exist";
776 return 0;
777 }
778
779 // Check that no MDS daemons are active
780 if (fs->mds_map.get_num_up_mds() > 0) {
11fdf7f2 781 ss << "all MDS daemons must be inactive/failed before removing filesystem. See `ceph fs fail`.";
7c673cae
FG
782 return -EINVAL;
783 }
784
785 // Check for confirmation flag
11fdf7f2
TL
786 bool sure = false;
787 cmd_getval(g_ceph_context, cmdmap, "yes_i_really_mean_it", sure);
788 if (!sure) {
7c673cae
FG
789 ss << "this is a DESTRUCTIVE operation and will make data in your filesystem permanently" \
790 " inaccessible. Add --yes-i-really-mean-it if you are sure you wish to continue.";
791 return -EPERM;
792 }
793
794 if (fsmap.get_legacy_client_fscid() == fs->fscid) {
795 fsmap.set_legacy_client_fscid(FS_CLUSTER_ID_NONE);
796 }
797
798 std::vector<mds_gid_t> to_fail;
799 // There may be standby_replay daemons left here
800 for (const auto &i : fs->mds_map.get_mds_info()) {
11fdf7f2 801 ceph_assert(i.second.state == MDSMap::STATE_STANDBY_REPLAY);
7c673cae
FG
802 to_fail.push_back(i.first);
803 }
804
805 for (const auto &gid : to_fail) {
806 // Standby replays don't write, so it isn't important to
807 // wait for an osdmap propose here: ignore return value.
1adf2230 808 mon->mdsmon()->fail_mds_gid(fsmap, gid);
7c673cae 809 }
a8e16298
TL
810 if (!to_fail.empty()) {
811 mon->osdmon()->propose_pending(); /* maybe new blacklists */
812 }
7c673cae
FG
813
814 fsmap.erase_filesystem(fs->fscid);
815
816 return 0;
817 }
818};
819
820class ResetFilesystemHandler : public FileSystemCommandHandler
821{
822 public:
823 ResetFilesystemHandler()
824 : FileSystemCommandHandler("fs reset")
825 {}
826
827 int handle(
828 Monitor *mon,
829 FSMap &fsmap,
830 MonOpRequestRef op,
11fdf7f2 831 const cmdmap_t& cmdmap,
7c673cae
FG
832 std::stringstream &ss) override
833 {
834 string fs_name;
11fdf7f2 835 cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name);
7c673cae
FG
836 auto fs = fsmap.get_filesystem(fs_name);
837 if (fs == nullptr) {
838 ss << "filesystem '" << fs_name << "' does not exist";
839 // Unlike fs rm, we consider this case an error
840 return -ENOENT;
841 }
842
843 // Check that no MDS daemons are active
844 if (fs->mds_map.get_num_up_mds() > 0) {
845 ss << "all MDS daemons must be inactive before resetting filesystem: set the cluster_down flag"
846 " and use `ceph mds fail` to make this so";
847 return -EINVAL;
848 }
849
850 // Check for confirmation flag
11fdf7f2
TL
851 bool sure = false;
852 cmd_getval(g_ceph_context, cmdmap, "yes_i_really_mean_it", sure);
853 if (!sure) {
7c673cae
FG
854 ss << "this is a potentially destructive operation, only for use by experts in disaster recovery. "
855 "Add --yes-i-really-mean-it if you are sure you wish to continue.";
856 return -EPERM;
857 }
858
859 fsmap.reset_filesystem(fs->fscid);
860
861 return 0;
862 }
863};
864
865class RemoveDataPoolHandler : public FileSystemCommandHandler
866{
867 public:
868 RemoveDataPoolHandler()
869 : FileSystemCommandHandler("fs rm_data_pool")
870 {}
871
872 int handle(
873 Monitor *mon,
874 FSMap &fsmap,
875 MonOpRequestRef op,
11fdf7f2 876 const cmdmap_t& cmdmap,
7c673cae
FG
877 std::stringstream &ss) override
878 {
879 string poolname;
11fdf7f2 880 cmd_getval(g_ceph_context, cmdmap, "pool", poolname);
7c673cae
FG
881
882 std::string fs_name;
11fdf7f2 883 if (!cmd_getval(g_ceph_context, cmdmap, "fs_name", fs_name)
7c673cae
FG
884 || fs_name.empty()) {
885 ss << "Missing filesystem name";
886 return -EINVAL;
887 }
888
889 auto fs = fsmap.get_filesystem(fs_name);
890 if (fs == nullptr) {
891 ss << "Not found: '" << fs_name << "'";
892 return -ENOENT;
893 }
894
895 int64_t poolid = mon->osdmon()->osdmap.lookup_pg_pool_name(poolname);
896 if (poolid < 0) {
897 string err;
898 poolid = strict_strtol(poolname.c_str(), 10, &err);
899 if (err.length()) {
900 ss << "pool '" << poolname << "' does not exist";
901 return -ENOENT;
902 } else if (poolid < 0) {
903 ss << "invalid pool id '" << poolid << "'";
904 return -EINVAL;
905 }
906 }
907
11fdf7f2 908 ceph_assert(poolid >= 0); // Checked by parsing code above
7c673cae
FG
909
910 if (fs->mds_map.get_first_data_pool() == poolid) {
911 ss << "cannot remove default data pool";
912 return -EINVAL;
913 }
914
915
916 int r = 0;
917 fsmap.modify_filesystem(fs->fscid,
918 [&r, poolid](std::shared_ptr<Filesystem> fs)
919 {
920 r = fs->mds_map.remove_data_pool(poolid);
921 });
922 if (r == -ENOENT) {
923 // It was already removed, succeed in silence
924 return 0;
925 } else if (r == 0) {
926 // We removed it, succeed
927 ss << "removed data pool " << poolid << " from fsmap";
928 return 0;
929 } else {
930 // Unexpected error, bubble up
931 return r;
932 }
933 }
934};
935
7c673cae
FG
936/**
937 * For commands with an alternative prefix
938 */
939template<typename T>
940class AliasHandler : public T
941{
942 std::string alias_prefix;
943
944 public:
11fdf7f2 945 explicit AliasHandler(const std::string &new_prefix)
7c673cae
FG
946 : T()
947 {
948 alias_prefix = new_prefix;
949 }
950
951 std::string const &get_prefix() override {return alias_prefix;}
952
953 int handle(
954 Monitor *mon,
955 FSMap &fsmap,
956 MonOpRequestRef op,
11fdf7f2 957 const cmdmap_t& cmdmap,
7c673cae
FG
958 std::stringstream &ss) override
959 {
960 return T::handle(mon, fsmap, op, cmdmap, ss);
961 }
962};
963
964
c07f9fc5
FG
965std::list<std::shared_ptr<FileSystemCommandHandler> >
966FileSystemCommandHandler::load(Paxos *paxos)
7c673cae
FG
967{
968 std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
969
970 handlers.push_back(std::make_shared<SetHandler>());
11fdf7f2 971 handlers.push_back(std::make_shared<FailHandler>());
7c673cae 972 handlers.push_back(std::make_shared<FlagSetHandler>());
c07f9fc5 973 handlers.push_back(std::make_shared<AddDataPoolHandler>(paxos));
7c673cae 974 handlers.push_back(std::make_shared<RemoveDataPoolHandler>());
c07f9fc5 975 handlers.push_back(std::make_shared<FsNewHandler>(paxos));
7c673cae
FG
976 handlers.push_back(std::make_shared<RemoveFilesystemHandler>());
977 handlers.push_back(std::make_shared<ResetFilesystemHandler>());
978
979 handlers.push_back(std::make_shared<SetDefaultHandler>());
980 handlers.push_back(std::make_shared<AliasHandler<SetDefaultHandler> >(
981 "fs set_default"));
982
983 return handlers;
984}
985
7c673cae
FG
986int FileSystemCommandHandler::_check_pool(
987 OSDMap &osd_map,
988 const int64_t pool_id,
92f5a8d4 989 int type,
c07f9fc5 990 bool force,
7c673cae
FG
991 std::stringstream *ss) const
992{
11fdf7f2 993 ceph_assert(ss != NULL);
7c673cae
FG
994
995 const pg_pool_t *pool = osd_map.get_pg_pool(pool_id);
996 if (!pool) {
997 *ss << "pool id '" << pool_id << "' does not exist";
998 return -ENOENT;
999 }
1000
1001 const string& pool_name = osd_map.get_pool_name(pool_id);
1002
92f5a8d4
TL
1003 if (pool->is_erasure()) {
1004 if (type == POOL_METADATA) {
7c673cae
FG
1005 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1006 << " is an erasure-coded pool. Use of erasure-coded pools"
1007 << " for CephFS metadata is not permitted";
92f5a8d4
TL
1008 return -EINVAL;
1009 } else if (type == POOL_DATA_DEFAULT && !force) {
7c673cae 1010 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
92f5a8d4
TL
1011 " is an erasure-coded pool."
1012 " Use of an EC pool for the default data pool is discouraged;"
1013 " see the online CephFS documentation for more information."
1014 " Use --force to override.";
7c673cae 1015 return -EINVAL;
92f5a8d4
TL
1016 } else if (!pool->allows_ecoverwrites()) {
1017 // non-overwriteable EC pools are only acceptable with a cache tier overlay
1018 if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) {
1019 *ss << "pool '" << pool_name << "' (id '" << pool_id << "')"
1020 << " is an erasure-coded pool, with no overwrite support";
1021 return -EINVAL;
1022 }
7c673cae 1023
92f5a8d4
TL
1024 // That cache tier overlay must be writeback, not readonly (it's the
1025 // write operations like modify+truncate we care about support for)
1026 const pg_pool_t *write_tier = osd_map.get_pg_pool(
1027 pool->write_tier);
1028 ceph_assert(write_tier != NULL); // OSDMonitor shouldn't allow DNE tier
1029 if (write_tier->cache_mode == pg_pool_t::CACHEMODE_FORWARD
1030 || write_tier->cache_mode == pg_pool_t::CACHEMODE_READONLY) {
1031 *ss << "EC pool '" << pool_name << "' has a write tier ("
1032 << osd_map.get_pool_name(pool->write_tier)
1033 << ") that is configured "
1034 "to forward writes. Use a cache mode such as 'writeback' for "
1035 "CephFS";
1036 return -EINVAL;
1037 }
7c673cae
FG
1038 }
1039 }
1040
1041 if (pool->is_tier()) {
1042 *ss << " pool '" << pool_name << "' (id '" << pool_id
1043 << "') is already in use as a cache tier.";
1044 return -EINVAL;
1045 }
1046
c07f9fc5
FG
1047 if (!force && !pool->application_metadata.empty() &&
1048 pool->application_metadata.count(
1049 pg_pool_t::APPLICATION_NAME_CEPHFS) == 0) {
1050 *ss << " pool '" << pool_name << "' (id '" << pool_id
1051 << "') has a non-CephFS application enabled.";
1052 return -EINVAL;
1053 }
1054
7c673cae
FG
1055 // Nothing special about this pool, so it is permissible
1056 return 0;
1057}
1058