1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include <boost/program_options/variables_map.hpp>
5 #include <boost/program_options/parsers.hpp>
13 #include "global/global_init.h"
14 #include "common/ceph_argparse.h"
15 #include "include/stringify.h"
16 #include "common/errno.h"
17 #include "common/safe_io.h"
19 #include "os/bluestore/BlueFS.h"
20 #include "os/bluestore/BlueStore.h"
21 #include "common/admin_socket.h"
23 namespace po
= boost::program_options
;
25 void usage(po::options_description
&desc
)
27 cout
<< desc
<< std::endl
;
30 void validate_path(CephContext
*cct
, const string
& path
, bool bluefs
)
32 BlueStore
bluestore(cct
, path
);
34 int r
= bluestore
.read_meta("type", &type
);
36 cerr
<< "failed to load os-type: " << cpp_strerror(r
) << std::endl
;
39 if (type
!= "bluestore") {
40 cerr
<< "expected bluestore, but type is " << type
<< std::endl
;
48 r
= bluestore
.read_meta("kv_backend", &kv_backend
);
50 cerr
<< "failed to load kv_backend: " << cpp_strerror(r
) << std::endl
;
53 if (kv_backend
!= "rocksdb") {
54 cerr
<< "expect kv_backend to be rocksdb, but is " << kv_backend
58 string bluefs_enabled
;
59 r
= bluestore
.read_meta("bluefs", &bluefs_enabled
);
61 cerr
<< "failed to load do_bluefs: " << cpp_strerror(r
) << std::endl
;
64 if (bluefs_enabled
!= "1") {
65 cerr
<< "bluefs not enabled for rocksdb" << std::endl
;
70 const char* find_device_path(
73 const vector
<string
>& devs
)
75 for (auto& i
: devs
) {
76 bluestore_bdev_label_t label
;
77 int r
= BlueStore::_read_bdev_label(cct
, i
, &label
);
79 cerr
<< "unable to read label for " << i
<< ": "
80 << cpp_strerror(r
) << std::endl
;
83 if ((id
== BlueFS::BDEV_SLOW
&& label
.description
== "main") ||
84 (id
== BlueFS::BDEV_DB
&& label
.description
== "bluefs db") ||
85 (id
== BlueFS::BDEV_WAL
&& label
.description
== "bluefs wal")) {
94 const vector
<string
>& devs
,
95 map
<string
, int>* got
,
107 for (auto& d
: devs
) {
108 bluestore_bdev_label_t label
;
109 int r
= BlueStore::_read_bdev_label(cct
, d
, &label
);
111 cerr
<< "unable to read label for " << d
<< ": "
112 << cpp_strerror(r
) << std::endl
;
116 if (label
.description
== "main")
118 else if (label
.description
== "bluefs db") {
119 id
= BlueFS::BDEV_DB
;
125 else if (label
.description
== "bluefs wal") {
126 id
= BlueFS::BDEV_WAL
;
136 int id
= was_db
? BlueFS::BDEV_SLOW
: BlueFS::BDEV_DB
;
137 got
->emplace(main
, id
);
144 const vector
<string
>& devs
)
146 map
<string
, int> got
;
147 parse_devices(cct
, devs
, &got
, nullptr, nullptr);
149 char target_path
[PATH_MAX
] = "";
150 if(!e
.first
.empty()) {
151 if (realpath(e
.first
.c_str(), target_path
) == nullptr) {
152 cerr
<< "failed to retrieve absolute path for " << e
.first
153 << ": " << cpp_strerror(errno
)
158 cout
<< " slot " << e
.second
<< " " << e
.first
;
159 if (target_path
[0]) {
160 cout
<< " -> " << target_path
;
163 int r
= fs
->add_block_device(e
.second
, e
.first
, false);
165 cerr
<< "unable to open " << e
.first
<< ": " << cpp_strerror(r
) << std::endl
;
174 const vector
<string
>& devs
)
176 validate_path(cct
, path
, true);
177 BlueFS
*fs
= new BlueFS(cct
);
179 add_devices(fs
, cct
, devs
);
183 cerr
<< "unable to mount bluefs: " << cpp_strerror(r
)
193 const vector
<string
>& devs
)
195 BlueFS
* fs
= open_bluefs(cct
, path
, devs
);
196 int r
= fs
->log_dump();
198 cerr
<< "log_dump failed" << ": "
199 << cpp_strerror(r
) << std::endl
;
206 void inferring_bluefs_devices(vector
<string
>& devs
, std::string
& path
)
208 cout
<< "inferring bluefs devices from bluestore path" << std::endl
;
209 for (auto fn
: {"block", "block.wal", "block.db"}) {
210 string p
= path
+ "/" + fn
;
212 if (::stat(p
.c_str(), &st
) == 0) {
218 int main(int argc
, char **argv
)
222 vector
<string
> devs_source
;
228 vector
<string
> allocs_name
;
230 bool fsck_deep
= false;
231 po::options_description
po_options("Options");
232 po_options
.add_options()
233 ("help,h", "produce help message")
234 ("path", po::value
<string
>(&path
), "bluestore path")
235 ("out-dir", po::value
<string
>(&out_dir
), "output directory")
236 ("log-file,l", po::value
<string
>(&log_file
), "log file")
237 ("log-level", po::value
<int>(&log_level
), "log level (30=most, 20=lots, 10=some, 1=little)")
238 ("dev", po::value
<vector
<string
>>(&devs
), "device(s)")
239 ("devs-source", po::value
<vector
<string
>>(&devs_source
), "bluefs-dev-migrate source device(s)")
240 ("dev-target", po::value
<string
>(&dev_target
), "target/resulting device")
241 ("deep", po::value
<bool>(&fsck_deep
), "deep fsck (read all data)")
242 ("key,k", po::value
<string
>(&key
), "label metadata key name")
243 ("value,v", po::value
<string
>(&value
), "label metadata value")
244 ("allocator", po::value
<vector
<string
>>(&allocs_name
), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'/'bluefs-slow'")
246 po::options_description
po_positional("Positional options");
247 po_positional
.add_options()
248 ("command", po::value
<string
>(&action
),
253 "bluefs-bdev-sizes, "
254 "bluefs-bdev-expand, "
255 "bluefs-bdev-new-db, "
256 "bluefs-bdev-new-wal, "
257 "bluefs-bdev-migrate, "
266 po::options_description
po_all("All options");
267 po_all
.add(po_options
).add(po_positional
);
268 po::positional_options_description pd
;
269 pd
.add("command", 1);
271 vector
<string
> ceph_option_strings
;
272 po::variables_map vm
;
274 po::parsed_options parsed
=
275 po::command_line_parser(argc
, argv
).options(po_all
).allow_unregistered().positional(pd
).run();
276 po::store( parsed
, vm
);
278 ceph_option_strings
= po::collect_unrecognized(parsed
.options
,
279 po::include_positional
);
280 } catch(po::error
&e
) {
281 std::cerr
<< e
.what() << std::endl
;
284 // normalize path (remove ending '/' if any)
285 if (path
.size() > 1 && *(path
.end() - 1) == '/') {
286 path
.resize(path
.size() - 1);
288 if (vm
.count("help")) {
292 if (action
.empty()) {
293 cerr
<< "must specify an action; --help for help" << std::endl
;
297 if (action
== "fsck" || action
== "repair" || action
== "quick-fix") {
299 cerr
<< "must specify bluestore path" << std::endl
;
303 if (action
== "prime-osd-dir") {
304 if (devs
.size() != 1) {
305 cerr
<< "must specify the main bluestore device" << std::endl
;
309 cerr
<< "must specify osd dir to prime" << std::endl
;
313 if (action
== "set-label-key" ||
314 action
== "rm-label-key") {
315 if (devs
.size() != 1) {
316 cerr
<< "must specify the main bluestore device" << std::endl
;
319 if (key
.size() == 0) {
320 cerr
<< "must specify a key name with -k" << std::endl
;
323 if (action
== "set-label-key" && value
.size() == 0) {
324 cerr
<< "must specify a value with -v" << std::endl
;
328 if (action
== "show-label") {
329 if (devs
.empty() && path
.empty()) {
330 cerr
<< "must specify bluestore path *or* raw device(s)" << std::endl
;
334 inferring_bluefs_devices(devs
, path
);
336 if (action
== "bluefs-export" || action
== "bluefs-log-dump") {
338 cerr
<< "must specify bluestore path" << std::endl
;
341 if ((action
== "bluefs-export") && out_dir
.empty()) {
342 cerr
<< "must specify out-dir to export bluefs" << std::endl
;
345 inferring_bluefs_devices(devs
, path
);
347 if (action
== "bluefs-bdev-sizes" || action
== "bluefs-bdev-expand") {
349 cerr
<< "must specify bluestore path" << std::endl
;
352 inferring_bluefs_devices(devs
, path
);
354 if (action
== "bluefs-bdev-new-db" || action
== "bluefs-bdev-new-wal") {
356 cerr
<< "must specify bluestore path" << std::endl
;
359 if (dev_target
.empty()) {
360 cout
<< "NOTICE: --dev-target option omitted, will allocate as a file" << std::endl
;
362 inferring_bluefs_devices(devs
, path
);
364 if (action
== "bluefs-bdev-migrate") {
366 cerr
<< "must specify bluestore path" << std::endl
;
369 inferring_bluefs_devices(devs
, path
);
370 if (devs_source
.size() == 0) {
371 cerr
<< "must specify source devices with --devs-source" << std::endl
;
374 if (dev_target
.empty()) {
375 cerr
<< "must specify target device with --dev-target" << std::endl
;
379 if (action
== "free-score" || action
== "free-dump") {
381 cerr
<< "must specify bluestore path" << std::endl
;
384 for (auto name
: allocs_name
) {
387 name
!= "bluefs-db" &&
388 name
!= "bluefs-wal" &&
389 name
!= "bluefs-slow") {
390 cerr
<< "unknown allocator '" << name
<< "'" << std::endl
;
394 if (allocs_name
.empty())
395 allocs_name
= vector
<string
>{"block", "bluefs-db", "bluefs-wal", "bluefs-slow"};
397 vector
<const char*> args
;
398 if (log_file
.size()) {
399 args
.push_back("--log-file");
400 args
.push_back(log_file
.c_str());
402 snprintf(ll
, sizeof(ll
), "%d", log_level
);
403 args
.push_back("--debug-bluestore");
405 args
.push_back("--debug-bluefs");
408 args
.push_back("--no-log-to-stderr");
409 args
.push_back("--err-to-stderr");
411 for (auto& i
: ceph_option_strings
) {
412 args
.push_back(i
.c_str());
414 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
415 CODE_ENVIRONMENT_UTILITY
,
416 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
418 common_init_finish(cct
.get());
420 if (action
== "fsck" ||
421 action
== "repair" ||
422 action
== "quick-fix") {
423 validate_path(cct
.get(), path
, false);
424 BlueStore
bluestore(cct
.get(), path
);
426 if (action
== "fsck") {
427 r
= bluestore
.fsck(fsck_deep
);
428 } else if (action
== "repair") {
429 r
= bluestore
.repair(fsck_deep
);
431 r
= bluestore
.quick_fix();
434 cerr
<< "error from fsck: " << cpp_strerror(r
) << std::endl
;
437 cerr
<< action
<< " found " << r
<< " error(s)" << std::endl
;
440 cout
<< action
<< " success" << std::endl
;
443 else if (action
== "prime-osd-dir") {
444 bluestore_bdev_label_t label
;
445 int r
= BlueStore::_read_bdev_label(cct
.get(), devs
.front(), &label
);
447 cerr
<< "failed to read label for " << devs
.front() << ": "
448 << cpp_strerror(r
) << std::endl
;
452 // kludge some things into the map that we want to populate into
454 label
.meta
["path_block"] = devs
.front();
455 label
.meta
["type"] = "bluestore";
456 label
.meta
["fsid"] = stringify(label
.osd_uuid
);
466 auto i
= label
.meta
.find(k
);
467 if (i
== label
.meta
.end()) {
470 string p
= path
+ "/" + k
;
471 string v
= i
->second
;
472 if (k
== "osd_key") {
473 p
= path
+ "/keyring";
475 v
+= label
.meta
["whoami"];
476 v
+= "]\nkey = " + i
->second
;
479 int fd
= ::open(p
.c_str(), O_CREAT
|O_TRUNC
|O_WRONLY
|O_CLOEXEC
, 0600);
481 cerr
<< "error writing " << p
<< ": " << cpp_strerror(errno
)
485 int r
= safe_write(fd
, v
.c_str(), v
.size());
487 cerr
<< "error writing to " << p
<< ": " << cpp_strerror(errno
)
494 else if (action
== "show-label") {
495 JSONFormatter
jf(true);
496 jf
.open_object_section("devices");
497 for (auto& i
: devs
) {
498 bluestore_bdev_label_t label
;
499 int r
= BlueStore::_read_bdev_label(cct
.get(), i
, &label
);
501 cerr
<< "unable to read label for " << i
<< ": "
502 << cpp_strerror(r
) << std::endl
;
505 jf
.open_object_section(i
.c_str());
512 else if (action
== "set-label-key") {
513 bluestore_bdev_label_t label
;
514 int r
= BlueStore::_read_bdev_label(cct
.get(), devs
.front(), &label
);
516 cerr
<< "unable to read label for " << devs
.front() << ": "
517 << cpp_strerror(r
) << std::endl
;
521 label
.size
= strtoull(value
.c_str(), nullptr, 10);
522 } else if (key
=="osd_uuid") {
523 label
.osd_uuid
.parse(value
.c_str());
524 } else if (key
=="btime") {
527 int r
= utime_t::parse_date(value
.c_str(), &epoch
, &nsec
);
529 label
.btime
= utime_t(epoch
, nsec
);
531 } else if (key
=="description") {
532 label
.description
= value
;
534 label
.meta
[key
] = value
;
536 r
= BlueStore::_write_bdev_label(cct
.get(), devs
.front(), label
);
538 cerr
<< "unable to write label for " << devs
.front() << ": "
539 << cpp_strerror(r
) << std::endl
;
543 else if (action
== "rm-label-key") {
544 bluestore_bdev_label_t label
;
545 int r
= BlueStore::_read_bdev_label(cct
.get(), devs
.front(), &label
);
547 cerr
<< "unable to read label for " << devs
.front() << ": "
548 << cpp_strerror(r
) << std::endl
;
551 if (!label
.meta
.count(key
)) {
552 cerr
<< "key '" << key
<< "' not present" << std::endl
;
555 label
.meta
.erase(key
);
556 r
= BlueStore::_write_bdev_label(cct
.get(), devs
.front(), label
);
558 cerr
<< "unable to write label for " << devs
.front() << ": "
559 << cpp_strerror(r
) << std::endl
;
563 else if (action
== "bluefs-bdev-sizes") {
564 BlueFS
*fs
= open_bluefs(cct
.get(), path
, devs
);
565 fs
->dump_block_extents(cout
);
568 else if (action
== "bluefs-bdev-expand") {
569 BlueStore
bluestore(cct
.get(), path
);
570 auto r
= bluestore
.expand_devices(cout
);
572 cerr
<< "failed to expand bluestore devices: "
573 << cpp_strerror(r
) << std::endl
;
577 else if (action
== "bluefs-export") {
578 BlueFS
*fs
= open_bluefs(cct
.get(), path
, devs
);
581 int r
= fs
->readdir("", &dirs
);
583 cerr
<< "readdir in root failed: " << cpp_strerror(r
) << std::endl
;
587 if (::access(out_dir
.c_str(), F_OK
)) {
588 r
= ::mkdir(out_dir
.c_str(), 0755);
591 cerr
<< "mkdir " << out_dir
<< " failed: " << cpp_strerror(r
) << std::endl
;
596 for (auto& dir
: dirs
) {
599 cout
<< dir
<< "/" << std::endl
;
601 r
= fs
->readdir(dir
, &ls
);
603 cerr
<< "readdir " << dir
<< " failed: " << cpp_strerror(r
) << std::endl
;
606 string full
= out_dir
+ "/" + dir
;
607 if (::access(full
.c_str(), F_OK
)) {
608 r
= ::mkdir(full
.c_str(), 0755);
611 cerr
<< "mkdir " << full
<< " failed: " << cpp_strerror(r
) << std::endl
;
615 for (auto& file
: ls
) {
618 cout
<< dir
<< "/" << file
<< std::endl
;
621 r
= fs
->stat(dir
, file
, &size
, &mtime
);
623 cerr
<< "stat " << file
<< " failed: " << cpp_strerror(r
) << std::endl
;
626 string path
= out_dir
+ "/" + dir
+ "/" + file
;
627 int fd
= ::open(path
.c_str(), O_CREAT
|O_WRONLY
|O_TRUNC
|O_CLOEXEC
, 0644);
630 cerr
<< "open " << path
<< " failed: " << cpp_strerror(r
) << std::endl
;
634 BlueFS::FileReader
*h
;
635 r
= fs
->open_for_read(dir
, file
, &h
, false);
637 cerr
<< "open_for_read " << dir
<< "/" << file
<< " failed: "
638 << cpp_strerror(r
) << std::endl
;
645 r
= fs
->read(h
, &h
->buf
, pos
, left
, &bl
, NULL
);
647 cerr
<< "read " << dir
<< "/" << file
<< " from " << pos
648 << " failed: " << cpp_strerror(r
) << std::endl
;
651 int rc
= bl
.write_fd(fd
);
653 cerr
<< "write to " << path
<< " failed: "
654 << cpp_strerror(r
) << std::endl
;
667 } else if (action
== "bluefs-log-dump") {
668 log_dump(cct
.get(), path
, devs
);
669 } else if (action
== "bluefs-bdev-new-db" || action
== "bluefs-bdev-new-wal") {
670 map
<string
, int> cur_devs_map
;
671 bool need_db
= action
== "bluefs-bdev-new-db";
673 bool has_wal
= false;
675 char target_path
[PATH_MAX
] = "";
677 parse_devices(cct
.get(), devs
, &cur_devs_map
, &has_db
, &has_wal
);
679 if (has_db
&& has_wal
) {
680 cerr
<< "can't allocate new device, both WAL and DB exist"
683 } else if (need_db
&& has_db
) {
684 cerr
<< "can't allocate new DB device, already exists"
687 } else if (!need_db
&& has_wal
) {
688 cerr
<< "can't allocate new WAL device, already exists"
691 } else if(!dev_target
.empty() &&
692 realpath(dev_target
.c_str(), target_path
) == nullptr) {
693 cerr
<< "failed to retrieve absolute path for " << dev_target
694 << ": " << cpp_strerror(errno
)
699 // Create either DB or WAL volume
700 int r
= EXIT_FAILURE
;
701 if (need_db
&& cct
->_conf
->bluestore_block_db_size
== 0) {
702 cerr
<< "DB size isn't specified, "
703 "please set Ceph bluestore-block-db-size config parameter "
705 } else if (!need_db
&& cct
->_conf
->bluestore_block_wal_size
== 0) {
706 cerr
<< "WAL size isn't specified, "
707 "please set Ceph bluestore-block-wal-size config parameter "
710 BlueStore
bluestore(cct
.get(), path
);
711 r
= bluestore
.add_new_bluefs_device(
712 need_db
? BlueFS::BDEV_NEWDB
: BlueFS::BDEV_NEWWAL
,
715 cout
<< (need_db
? "DB" : "WAL") << " device added " << target_path
718 cerr
<< "failed to add " << (need_db
? "DB" : "WAL") << " device:"
724 } else if (action
== "bluefs-bdev-migrate") {
725 map
<string
, int> cur_devs_map
;
726 set
<int> src_dev_ids
;
727 map
<string
, int> src_devs
;
729 parse_devices(cct
.get(), devs
, &cur_devs_map
, nullptr, nullptr);
730 for (auto& s
: devs_source
) {
731 auto i
= cur_devs_map
.find(s
);
732 if (i
!= cur_devs_map
.end()) {
733 if (s
== dev_target
) {
734 cerr
<< "Device " << dev_target
735 << " is present in both source and target lists, omitted."
738 src_devs
.emplace(*i
);
739 src_dev_ids
.emplace(i
->second
);
742 cerr
<< "can't migrate " << s
<< ", not a valid bluefs volume "
748 auto i
= cur_devs_map
.find(dev_target
);
750 if (i
!= cur_devs_map
.end()) {
751 // Migrate to an existing BlueFS volume
753 auto dev_target_id
= i
->second
;
754 if (dev_target_id
== BlueFS::BDEV_WAL
) {
755 // currently we're unable to migrate to WAL device since there is no space
756 // reserved for superblock
757 cerr
<< "Migrate to WAL device isn't supported." << std::endl
;
761 BlueStore
bluestore(cct
.get(), path
);
762 int r
= bluestore
.migrate_to_existing_bluefs_device(
766 for(auto src
: src_devs
) {
767 if (src
.second
!= BlueFS::BDEV_SLOW
) {
768 cout
<< " device removed:" << src
.second
<< " " << src
.first
773 bool need_db
= dev_target_id
== BlueFS::BDEV_DB
;
774 cerr
<< "failed to migrate to existing BlueFS device: "
775 << (need_db
? BlueFS::BDEV_DB
: BlueFS::BDEV_WAL
)
782 // Migrate to a new BlueFS volume
783 // via creating either DB or WAL volume
784 char target_path
[PATH_MAX
] = "";
786 if (src_dev_ids
.count(BlueFS::BDEV_DB
)) {
787 // if we have DB device in the source list - we create DB device
788 // (and may be remove WAL).
789 dev_target_id
= BlueFS::BDEV_NEWDB
;
790 } else if (src_dev_ids
.count(BlueFS::BDEV_WAL
)) {
791 dev_target_id
= BlueFS::BDEV_NEWWAL
;
793 cerr
<< "Unable to migrate Slow volume to new location, "
794 "please allocate new DB or WAL with "
795 "--bluefs-bdev-new-db(wal) command"
799 if(!dev_target
.empty() &&
800 realpath(dev_target
.c_str(), target_path
) == nullptr) {
801 cerr
<< "failed to retrieve absolute path for " << dev_target
802 << ": " << cpp_strerror(errno
)
807 BlueStore
bluestore(cct
.get(), path
);
809 bool need_db
= dev_target_id
== BlueFS::BDEV_NEWDB
;
810 int r
= bluestore
.migrate_to_new_bluefs_device(
815 for(auto src
: src_devs
) {
816 if (src
.second
!= BlueFS::BDEV_SLOW
) {
817 cout
<< " device removed:" << src
.second
<< " " << src
.first
821 cout
<< " device added: "
822 << (need_db
? BlueFS::BDEV_DB
: BlueFS::BDEV_DB
)
823 << " " << target_path
826 cerr
<< "failed to migrate to new BlueFS device: "
827 << (need_db
? BlueFS::BDEV_DB
: BlueFS::BDEV_DB
)
828 << " " << target_path
834 } else if (action
== "free-dump" || action
== "free-score") {
835 AdminSocket
*admin_socket
= g_ceph_context
->get_admin_socket();
836 ceph_assert(admin_socket
);
837 std::string action_name
= action
== "free-dump" ? "dump" : "score";
838 validate_path(cct
.get(), path
, false);
839 BlueStore
bluestore(cct
.get(), path
);
840 int r
= bluestore
.cold_open();
842 cerr
<< "error from cold_open: " << cpp_strerror(r
) << std::endl
;
846 for (auto alloc_name
: allocs_name
) {
847 ceph::bufferlist out
;
848 bool b
= admin_socket
->execute_command(
849 "{\"prefix\": \"bluestore allocator " + action_name
+ " " + alloc_name
+ "\"}", out
);
851 cerr
<< "failure querying '" << alloc_name
<< "'" << std::endl
;
854 cout
<< alloc_name
<< ":" << std::endl
;
855 cout
<< std::string(out
.c_str(),out
.length()) << std::endl
;
858 bluestore
.cold_close();
860 cerr
<< "unrecognized action " << action
<< std::endl
;