]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/bluestore_tool.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / os / bluestore / bluestore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <boost/program_options/variables_map.hpp>
5#include <boost/program_options/parsers.hpp>
6
7#include <stdio.h>
8#include <string.h>
20effc67 9#include <filesystem>
7c673cae 10#include <iostream>
20effc67 11#include <fstream>
7c673cae
FG
12#include <time.h>
13#include <fcntl.h>
14#include <unistd.h>
15#include "global/global_init.h"
16#include "common/ceph_argparse.h"
17#include "include/stringify.h"
18#include "common/errno.h"
3efd9988 19#include "common/safe_io.h"
7c673cae
FG
20
21#include "os/bluestore/BlueFS.h"
22#include "os/bluestore/BlueStore.h"
eafe8130 23#include "common/admin_socket.h"
f67539c2 24#include "kv/RocksDBStore.h"
7c673cae 25
20effc67
TL
26using namespace std;
27namespace fs = std::filesystem;
7c673cae
FG
28namespace po = boost::program_options;
29
30void usage(po::options_description &desc)
31{
32 cout << desc << std::endl;
33}
34
c07f9fc5
FG
35void validate_path(CephContext *cct, const string& path, bool bluefs)
36{
37 BlueStore bluestore(cct, path);
38 string type;
39 int r = bluestore.read_meta("type", &type);
40 if (r < 0) {
41 cerr << "failed to load os-type: " << cpp_strerror(r) << std::endl;
42 exit(EXIT_FAILURE);
43 }
44 if (type != "bluestore") {
45 cerr << "expected bluestore, but type is " << type << std::endl;
46 exit(EXIT_FAILURE);
47 }
48 if (!bluefs) {
49 return;
50 }
51
52 string kv_backend;
53 r = bluestore.read_meta("kv_backend", &kv_backend);
54 if (r < 0) {
55 cerr << "failed to load kv_backend: " << cpp_strerror(r) << std::endl;
56 exit(EXIT_FAILURE);
57 }
58 if (kv_backend != "rocksdb") {
59 cerr << "expect kv_backend to be rocksdb, but is " << kv_backend
60 << std::endl;
61 exit(EXIT_FAILURE);
62 }
63 string bluefs_enabled;
64 r = bluestore.read_meta("bluefs", &bluefs_enabled);
65 if (r < 0) {
66 cerr << "failed to load do_bluefs: " << cpp_strerror(r) << std::endl;
67 exit(EXIT_FAILURE);
68 }
69 if (bluefs_enabled != "1") {
70 cerr << "bluefs not enabled for rocksdb" << std::endl;
71 exit(EXIT_FAILURE);
72 }
73}
74
f64942e4
AA
75const char* find_device_path(
76 int id,
77 CephContext *cct,
78 const vector<string>& devs)
79{
80 for (auto& i : devs) {
81 bluestore_bdev_label_t label;
82 int r = BlueStore::_read_bdev_label(cct, i, &label);
83 if (r < 0) {
84 cerr << "unable to read label for " << i << ": "
85 << cpp_strerror(r) << std::endl;
86 exit(EXIT_FAILURE);
87 }
88 if ((id == BlueFS::BDEV_SLOW && label.description == "main") ||
89 (id == BlueFS::BDEV_DB && label.description == "bluefs db") ||
90 (id == BlueFS::BDEV_WAL && label.description == "bluefs wal")) {
91 return i.c_str();
92 }
93 }
94 return nullptr;
95}
96
11fdf7f2 97void parse_devices(
3efd9988 98 CephContext *cct,
11fdf7f2
TL
99 const vector<string>& devs,
100 map<string, int>* got,
101 bool* has_db,
102 bool* has_wal)
3efd9988 103{
3efd9988 104 string main;
11fdf7f2
TL
105 bool was_db = false;
106 if (has_wal) {
107 *has_wal = false;
108 }
109 if (has_db) {
110 *has_db = false;
111 }
112 for (auto& d : devs) {
3efd9988 113 bluestore_bdev_label_t label;
11fdf7f2 114 int r = BlueStore::_read_bdev_label(cct, d, &label);
3efd9988 115 if (r < 0) {
11fdf7f2 116 cerr << "unable to read label for " << d << ": "
3efd9988
FG
117 << cpp_strerror(r) << std::endl;
118 exit(EXIT_FAILURE);
119 }
120 int id = -1;
121 if (label.description == "main")
11fdf7f2
TL
122 main = d;
123 else if (label.description == "bluefs db") {
3efd9988 124 id = BlueFS::BDEV_DB;
11fdf7f2
TL
125 was_db = true;
126 if (has_db) {
127 *has_db = true;
128 }
129 }
130 else if (label.description == "bluefs wal") {
3efd9988 131 id = BlueFS::BDEV_WAL;
11fdf7f2
TL
132 if (has_wal) {
133 *has_wal = true;
3efd9988
FG
134 }
135 }
11fdf7f2
TL
136 if (id >= 0) {
137 got->emplace(d, id);
138 }
3efd9988
FG
139 }
140 if (main.length()) {
11fdf7f2
TL
141 int id = was_db ? BlueFS::BDEV_SLOW : BlueFS::BDEV_DB;
142 got->emplace(main, id);
143 }
144}
145
146void add_devices(
147 BlueFS *fs,
148 CephContext *cct,
149 const vector<string>& devs)
150{
151 map<string, int> got;
152 parse_devices(cct, devs, &got, nullptr, nullptr);
153 for(auto e : got) {
154 char target_path[PATH_MAX] = "";
155 if(!e.first.empty()) {
156 if (realpath(e.first.c_str(), target_path) == nullptr) {
157 cerr << "failed to retrieve absolute path for " << e.first
158 << ": " << cpp_strerror(errno)
159 << std::endl;
160 }
161 }
162
163 cout << " slot " << e.second << " " << e.first;
164 if (target_path[0]) {
165 cout << " -> " << target_path;
166 }
167 cout << std::endl;
f67539c2
TL
168
169 // We provide no shared allocator which prevents bluefs to operate in R/W mode.
170 // Read-only mode isn't strictly enforced though
171 int r = fs->add_block_device(e.second, e.first, false, 0); // 'reserved' is fake
3efd9988 172 if (r < 0) {
11fdf7f2 173 cerr << "unable to open " << e.first << ": " << cpp_strerror(r) << std::endl;
3efd9988
FG
174 exit(EXIT_FAILURE);
175 }
176 }
11fdf7f2
TL
177}
178
f67539c2 179BlueFS *open_bluefs_readonly(
11fdf7f2
TL
180 CephContext *cct,
181 const string& path,
182 const vector<string>& devs)
183{
184 validate_path(cct, path, true);
185 BlueFS *fs = new BlueFS(cct);
186
187 add_devices(fs, cct, devs);
3efd9988
FG
188
189 int r = fs->mount();
190 if (r < 0) {
191 cerr << "unable to mount bluefs: " << cpp_strerror(r)
192 << std::endl;
193 exit(EXIT_FAILURE);
194 }
195 return fs;
196}
197
11fdf7f2
TL
198void log_dump(
199 CephContext *cct,
200 const string& path,
201 const vector<string>& devs)
202{
f67539c2
TL
203 validate_path(cct, path, true);
204 BlueFS *fs = new BlueFS(cct);
205
206 add_devices(fs, cct, devs);
11fdf7f2
TL
207 int r = fs->log_dump();
208 if (r < 0) {
209 cerr << "log_dump failed" << ": "
210 << cpp_strerror(r) << std::endl;
211 exit(EXIT_FAILURE);
212 }
213
214 delete fs;
215}
216
217void inferring_bluefs_devices(vector<string>& devs, std::string& path)
218{
219 cout << "inferring bluefs devices from bluestore path" << std::endl;
220 for (auto fn : {"block", "block.wal", "block.db"}) {
221 string p = path + "/" + fn;
222 struct stat st;
223 if (::stat(p.c_str(), &st) == 0) {
224 devs.push_back(p);
225 }
226 }
227}
228
20effc67
TL
229static void bluefs_import(
230 const string& input_file,
231 const string& dest_file,
232 CephContext *cct,
233 const string& path,
234 const vector<string>& devs)
235{
236 int r;
237 std::ifstream f(input_file.c_str(), std::ifstream::binary);
238 if (!f) {
239 r = -errno;
240 cerr << "open " << input_file.c_str() << " failed: " << cpp_strerror(r) << std::endl;
241 exit(EXIT_FAILURE);
242 }
243 BlueStore bluestore(cct, path);
244 KeyValueDB *db_ptr;
245 r = bluestore.open_db_environment(&db_ptr, false);
246 if (r < 0) {
247 cerr << "error preparing db environment: " << cpp_strerror(r) << std::endl;
248 exit(EXIT_FAILURE);
249 }
250 BlueFS* bs = bluestore.get_bluefs();
251
252 BlueFS::FileWriter *h;
253 fs::path file_path(dest_file);
254 const string dir = file_path.parent_path().native();
255 const string file_name = file_path.filename().native();
256 bs->open_for_write(dir, file_name, &h, false);
257 uint64_t max_block = 4096;
258 char buf[max_block];
259 uint64_t left = fs::file_size(input_file.c_str());
260 uint64_t size = 0;
261 while (left) {
262 size = std::min(max_block, left);
263 f.read(buf, size);
264 h->append(buf, size);
265 left -= size;
266 }
267 f.close();
268 bs->fsync(h);
269 bs->close_writer(h);
270 bluestore.close_db_environment();
271 return;
272}
273
7c673cae
FG
274int main(int argc, char **argv)
275{
276 string out_dir;
20effc67 277 string osd_instance;
7c673cae 278 vector<string> devs;
11fdf7f2
TL
279 vector<string> devs_source;
280 string dev_target;
7c673cae
FG
281 string path;
282 string action;
3efd9988 283 string log_file;
20effc67
TL
284 string input_file;
285 string dest_file;
3efd9988 286 string key, value;
eafe8130 287 vector<string> allocs_name;
f67539c2
TL
288 string empty_sharding(1, '\0');
289 string new_sharding = empty_sharding;
290 string resharding_ctrl;
3efd9988 291 int log_level = 30;
31f18b77 292 bool fsck_deep = false;
7c673cae
FG
293 po::options_description po_options("Options");
294 po_options.add_options()
295 ("help,h", "produce help message")
20effc67 296 (",i", po::value<string>(&osd_instance), "OSD instance. Requires access to monitor/ceph.conf")
7c673cae
FG
297 ("path", po::value<string>(&path), "bluestore path")
298 ("out-dir", po::value<string>(&out_dir), "output directory")
20effc67
TL
299 ("input-file", po::value<string>(&input_file), "import file")
300 ("dest-file", po::value<string>(&dest_file), "destination file")
3efd9988
FG
301 ("log-file,l", po::value<string>(&log_file), "log file")
302 ("log-level", po::value<int>(&log_level), "log level (30=most, 20=lots, 10=some, 1=little)")
7c673cae 303 ("dev", po::value<vector<string>>(&devs), "device(s)")
11fdf7f2
TL
304 ("devs-source", po::value<vector<string>>(&devs_source), "bluefs-dev-migrate source device(s)")
305 ("dev-target", po::value<string>(&dev_target), "target/resulting device")
7c673cae 306 ("deep", po::value<bool>(&fsck_deep), "deep fsck (read all data)")
3efd9988
FG
307 ("key,k", po::value<string>(&key), "label metadata key name")
308 ("value,v", po::value<string>(&value), "label metadata value")
20effc67 309 ("allocator", po::value<vector<string>>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'")
f67539c2
TL
310 ("sharding", po::value<string>(&new_sharding), "new sharding to apply")
311 ("resharding-ctrl", po::value<string>(&resharding_ctrl), "gives control over resharding procedure details")
7c673cae
FG
312 ;
313 po::options_description po_positional("Positional options");
314 po_positional.add_options()
eafe8130
TL
315 ("command", po::value<string>(&action),
316 "fsck, "
20effc67
TL
317 "qfsck, "
318 "allocmap, "
319 "restore_cfb, "
eafe8130
TL
320 "repair, "
321 "quick-fix, "
322 "bluefs-export, "
20effc67 323 "bluefs-import, "
eafe8130
TL
324 "bluefs-bdev-sizes, "
325 "bluefs-bdev-expand, "
326 "bluefs-bdev-new-db, "
327 "bluefs-bdev-new-wal, "
328 "bluefs-bdev-migrate, "
329 "show-label, "
330 "set-label-key, "
331 "rm-label-key, "
332 "prime-osd-dir, "
333 "bluefs-log-dump, "
334 "free-dump, "
f6b5b4d7 335 "free-score, "
20effc67 336 "free-fragmentation, "
f67539c2
TL
337 "bluefs-stats, "
338 "reshard, "
339 "show-sharding")
7c673cae
FG
340 ;
341 po::options_description po_all("All options");
342 po_all.add(po_options).add(po_positional);
7c673cae
FG
343
344 vector<string> ceph_option_strings;
345 po::variables_map vm;
346 try {
347 po::parsed_options parsed =
20effc67 348 po::command_line_parser(argc, argv).options(po_all).allow_unregistered().run();
7c673cae
FG
349 po::store( parsed, vm);
350 po::notify(vm);
351 ceph_option_strings = po::collect_unrecognized(parsed.options,
352 po::include_positional);
353 } catch(po::error &e) {
354 std::cerr << e.what() << std::endl;
c07f9fc5 355 exit(EXIT_FAILURE);
7c673cae 356 }
11fdf7f2
TL
357 // normalize path (remove ending '/' if any)
358 if (path.size() > 1 && *(path.end() - 1) == '/') {
359 path.resize(path.size() - 1);
360 }
7c673cae
FG
361 if (vm.count("help")) {
362 usage(po_all);
c07f9fc5 363 exit(EXIT_SUCCESS);
7c673cae 364 }
20effc67
TL
365
366 vector<const char*> args;
367 if (log_file.size()) {
368 args.push_back("--log-file");
369 args.push_back(log_file.c_str());
370 static char ll[10];
371 snprintf(ll, sizeof(ll), "%d", log_level);
372 args.push_back("--debug-bluestore");
373 args.push_back(ll);
374 args.push_back("--debug-bluefs");
375 args.push_back(ll);
376 args.push_back("--debug-rocksdb");
377 args.push_back(ll);
378 } else {
379 // do not write to default-named log "osd.x.log" if --log-file is not provided
380 if (!osd_instance.empty()) {
381 args.push_back("--no-log-to-file");
382 }
383 }
384
385 if (!osd_instance.empty()) {
386 args.push_back("-i");
387 args.push_back(osd_instance.c_str());
388 }
389 args.push_back("--no-log-to-stderr");
390 args.push_back("--err-to-stderr");
391
392 for (auto& i : ceph_option_strings) {
393 args.push_back(i.c_str());
394 }
395 auto cct = global_init(NULL, args, osd_instance.empty() ? CEPH_ENTITY_TYPE_CLIENT : CEPH_ENTITY_TYPE_OSD,
396 CODE_ENVIRONMENT_UTILITY,
397 osd_instance.empty() ? CINIT_FLAG_NO_DEFAULT_CONFIG_FILE : 0);
398
399 common_init_finish(cct.get());
400 if (action.empty()) {
401 // if action ("command") is not yet defined try to use first param as action
402 if (args.size() > 0) {
403 if (args.size() == 1) {
404 // treat first unparsed value as action
405 action = args[0];
406 } else {
407 std::cerr << "Unknown options: " << args << std::endl;
408 exit(EXIT_FAILURE);
409 }
410 }
411 } else {
412 if (args.size() != 0) {
413 std::cerr << "Unknown options: " << args << std::endl;
414 exit(EXIT_FAILURE);
415 }
416 }
417
7c673cae
FG
418 if (action.empty()) {
419 cerr << "must specify an action; --help for help" << std::endl;
c07f9fc5 420 exit(EXIT_FAILURE);
7c673cae
FG
421 }
422
20effc67
TL
423 if (!osd_instance.empty()) {
424 // when "-i" is provided "osd data" can be used as path
425 if (path.size() == 0) {
426 path = cct->_conf.get_val<std::string>("osd_data");
427 }
428 }
429
430 if (action == "fsck" || action == "repair" || action == "quick-fix" || action == "allocmap" || action == "qfsck" || action == "restore_cfb") {
7c673cae
FG
431 if (path.empty()) {
432 cerr << "must specify bluestore path" << std::endl;
c07f9fc5 433 exit(EXIT_FAILURE);
7c673cae
FG
434 }
435 }
3efd9988
FG
436 if (action == "prime-osd-dir") {
437 if (devs.size() != 1) {
438 cerr << "must specify the main bluestore device" << std::endl;
439 exit(EXIT_FAILURE);
440 }
441 if (path.empty()) {
442 cerr << "must specify osd dir to prime" << std::endl;
443 exit(EXIT_FAILURE);
444 }
445 }
446 if (action == "set-label-key" ||
447 action == "rm-label-key") {
448 if (devs.size() != 1) {
449 cerr << "must specify the main bluestore device" << std::endl;
450 exit(EXIT_FAILURE);
451 }
452 if (key.size() == 0) {
453 cerr << "must specify a key name with -k" << std::endl;
454 exit(EXIT_FAILURE);
455 }
456 if (action == "set-label-key" && value.size() == 0) {
457 cerr << "must specify a value with -v" << std::endl;
458 exit(EXIT_FAILURE);
459 }
460 }
c07f9fc5 461 if (action == "show-label") {
7c673cae
FG
462 if (devs.empty() && path.empty()) {
463 cerr << "must specify bluestore path *or* raw device(s)" << std::endl;
c07f9fc5 464 exit(EXIT_FAILURE);
7c673cae 465 }
11fdf7f2
TL
466 if (devs.empty())
467 inferring_bluefs_devices(devs, path);
7c673cae 468 }
20effc67
TL
469 if (action == "bluefs-export" ||
470 action == "bluefs-import" ||
471 action == "bluefs-log-dump") {
c07f9fc5
FG
472 if (path.empty()) {
473 cerr << "must specify bluestore path" << std::endl;
474 exit(EXIT_FAILURE);
475 }
11fdf7f2 476 if ((action == "bluefs-export") && out_dir.empty()) {
c07f9fc5
FG
477 cerr << "must specify out-dir to export bluefs" << std::endl;
478 exit(EXIT_FAILURE);
479 }
20effc67
TL
480 if (action == "bluefs-import" && input_file.empty()) {
481 cerr << "must specify input_file to import bluefs" << std::endl;
482 exit(EXIT_FAILURE);
483 }
484 if (action == "bluefs-import" && dest_file.empty()) {
485 cerr << "must specify dest_file to import bluefs" << std::endl;
486 exit(EXIT_FAILURE);
487 }
11fdf7f2 488 inferring_bluefs_devices(devs, path);
c07f9fc5 489 }
3efd9988
FG
490 if (action == "bluefs-bdev-sizes" || action == "bluefs-bdev-expand") {
491 if (path.empty()) {
492 cerr << "must specify bluestore path" << std::endl;
493 exit(EXIT_FAILURE);
494 }
11fdf7f2
TL
495 inferring_bluefs_devices(devs, path);
496 }
497 if (action == "bluefs-bdev-new-db" || action == "bluefs-bdev-new-wal") {
498 if (path.empty()) {
499 cerr << "must specify bluestore path" << std::endl;
500 exit(EXIT_FAILURE);
501 }
502 if (dev_target.empty()) {
503 cout << "NOTICE: --dev-target option omitted, will allocate as a file" << std::endl;
504 }
505 inferring_bluefs_devices(devs, path);
506 }
507 if (action == "bluefs-bdev-migrate") {
508 if (path.empty()) {
509 cerr << "must specify bluestore path" << std::endl;
510 exit(EXIT_FAILURE);
511 }
512 inferring_bluefs_devices(devs, path);
513 if (devs_source.size() == 0) {
514 cerr << "must specify source devices with --devs-source" << std::endl;
515 exit(EXIT_FAILURE);
516 }
517 if (dev_target.empty()) {
518 cerr << "must specify target device with --dev-target" << std::endl;
519 exit(EXIT_FAILURE);
3efd9988
FG
520 }
521 }
20effc67 522 if (action == "free-score" || action == "free-dump" || action == "free-fragmentation") {
eafe8130
TL
523 if (path.empty()) {
524 cerr << "must specify bluestore path" << std::endl;
525 exit(EXIT_FAILURE);
526 }
527 for (auto name : allocs_name) {
528 if (!name.empty() &&
529 name != "block" &&
530 name != "bluefs-db" &&
20effc67 531 name != "bluefs-wal") {
eafe8130
TL
532 cerr << "unknown allocator '" << name << "'" << std::endl;
533 exit(EXIT_FAILURE);
534 }
535 }
536 if (allocs_name.empty())
20effc67 537 allocs_name = vector<string>{"block", "bluefs-db", "bluefs-wal"};
eafe8130 538 }
f67539c2
TL
539 if (action == "reshard") {
540 if (path.empty()) {
541 cerr << "must specify bluestore path" << std::endl;
542 exit(EXIT_FAILURE);
543 }
544 if (new_sharding == empty_sharding) {
545 cerr << "must provide reshard specification" << std::endl;
546 exit(EXIT_FAILURE);
547 }
548 }
3efd9988 549
20effc67
TL
550 if (action == "restore_cfb") {
551#ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION
552 cerr << action << " bluestore.restore_cfb is not supported!!! " << std::endl;
553 exit(EXIT_FAILURE);
554#else
555 cout << action << " bluestore.restore_cfb" << std::endl;
556 validate_path(cct.get(), path, false);
557 BlueStore bluestore(cct.get(), path);
558 int r = bluestore.push_allocation_to_rocksdb();
559 if (r < 0) {
560 cerr << action << " failed: " << cpp_strerror(r) << std::endl;
561 exit(EXIT_FAILURE);
562 } else {
563 cout << action << " success" << std::endl;
564 }
565#endif
7c673cae 566 }
20effc67
TL
567 else if (action == "allocmap") {
568#ifdef CEPH_BLUESTORE_TOOL_DISABLE_ALLOCMAP
569 cerr << action << " bluestore.allocmap is not supported!!! " << std::endl;
570 exit(EXIT_FAILURE);
571#else
572 cout << action << " bluestore.allocmap" << std::endl;
573 validate_path(cct.get(), path, false);
574 BlueStore bluestore(cct.get(), path);
575 int r = bluestore.read_allocation_from_drive_for_bluestore_tool();
576 if (r < 0) {
577 cerr << action << " failed: " << cpp_strerror(r) << std::endl;
578 exit(EXIT_FAILURE);
579 } else {
580 cout << action << " success" << std::endl;
581 }
582#endif
583 }
584 else if( action == "qfsck" ) {
585#ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION
586 cerr << action << " bluestore.qfsck is not supported!!! " << std::endl;
587 exit(EXIT_FAILURE);
588#else
589 cout << action << " bluestore.quick-fsck" << std::endl;
590 validate_path(cct.get(), path, false);
591 BlueStore bluestore(cct.get(), path);
592 int r = bluestore.read_allocation_from_drive_for_bluestore_tool();
593 if (r < 0) {
594 cerr << action << " failed: " << cpp_strerror(r) << std::endl;
595 exit(EXIT_FAILURE);
596 } else {
597 cout << action << " success" << std::endl;
598 }
599#endif
600 }
601 else if (action == "fsck" ||
eafe8130
TL
602 action == "repair" ||
603 action == "quick-fix") {
c07f9fc5 604 validate_path(cct.get(), path, false);
7c673cae 605 BlueStore bluestore(cct.get(), path);
3efd9988
FG
606 int r;
607 if (action == "fsck") {
608 r = bluestore.fsck(fsck_deep);
eafe8130 609 } else if (action == "repair") {
3efd9988 610 r = bluestore.repair(fsck_deep);
eafe8130
TL
611 } else {
612 r = bluestore.quick_fix();
3efd9988 613 }
7c673cae 614 if (r < 0) {
9f95a23c 615 cerr << action << " failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 616 exit(EXIT_FAILURE);
11fdf7f2 617 } else if (r > 0) {
9f95a23c 618 cerr << action << " status: remaining " << r << " error(s) and warning(s)" << std::endl;
11fdf7f2
TL
619 exit(EXIT_FAILURE);
620 } else {
621 cout << action << " success" << std::endl;
7c673cae 622 }
3efd9988
FG
623 }
624 else if (action == "prime-osd-dir") {
625 bluestore_bdev_label_t label;
626 int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label);
627 if (r < 0) {
628 cerr << "failed to read label for " << devs.front() << ": "
629 << cpp_strerror(r) << std::endl;
630 exit(EXIT_FAILURE);
631 }
632
633 // kludge some things into the map that we want to populate into
634 // target dir
635 label.meta["path_block"] = devs.front();
636 label.meta["type"] = "bluestore";
637 label.meta["fsid"] = stringify(label.osd_uuid);
638
639 for (auto kk : {
640 "whoami",
641 "osd_key",
3efd9988
FG
642 "ceph_fsid",
643 "fsid",
644 "type",
645 "ready" }) {
646 string k = kk;
647 auto i = label.meta.find(k);
648 if (i == label.meta.end()) {
649 continue;
650 }
651 string p = path + "/" + k;
652 string v = i->second;
653 if (k == "osd_key") {
654 p = path + "/keyring";
655 v = "[osd.";
656 v += label.meta["whoami"];
657 v += "]\nkey = " + i->second;
658 }
11fdf7f2
TL
659 v += "\n";
660 int fd = ::open(p.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_CLOEXEC, 0600);
661 if (fd < 0) {
662 cerr << "error writing " << p << ": " << cpp_strerror(errno)
663 << std::endl;
664 exit(EXIT_FAILURE);
3efd9988 665 }
11fdf7f2
TL
666 int r = safe_write(fd, v.c_str(), v.size());
667 if (r < 0) {
668 cerr << "error writing to " << p << ": " << cpp_strerror(errno)
669 << std::endl;
670 exit(EXIT_FAILURE);
671 }
672 ::close(fd);
3efd9988 673 }
7c673cae
FG
674 }
675 else if (action == "show-label") {
676 JSONFormatter jf(true);
3efd9988 677 jf.open_object_section("devices");
7c673cae
FG
678 for (auto& i : devs) {
679 bluestore_bdev_label_t label;
680 int r = BlueStore::_read_bdev_label(cct.get(), i, &label);
681 if (r < 0) {
682 cerr << "unable to read label for " << i << ": "
683 << cpp_strerror(r) << std::endl;
c07f9fc5 684 exit(EXIT_FAILURE);
7c673cae
FG
685 }
686 jf.open_object_section(i.c_str());
687 label.dump(&jf);
688 jf.close_section();
689 }
690 jf.close_section();
691 jf.flush(cout);
692 }
3efd9988
FG
693 else if (action == "set-label-key") {
694 bluestore_bdev_label_t label;
695 int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label);
696 if (r < 0) {
697 cerr << "unable to read label for " << devs.front() << ": "
698 << cpp_strerror(r) << std::endl;
699 exit(EXIT_FAILURE);
7c673cae 700 }
f64942e4
AA
701 if (key == "size") {
702 label.size = strtoull(value.c_str(), nullptr, 10);
703 } else if (key =="osd_uuid") {
704 label.osd_uuid.parse(value.c_str());
705 } else if (key =="btime") {
706 uint64_t epoch;
707 uint64_t nsec;
708 int r = utime_t::parse_date(value.c_str(), &epoch, &nsec);
709 if (r == 0) {
710 label.btime = utime_t(epoch, nsec);
711 }
712 } else if (key =="description") {
713 label.description = value;
714 } else {
715 label.meta[key] = value;
716 }
3efd9988
FG
717 r = BlueStore::_write_bdev_label(cct.get(), devs.front(), label);
718 if (r < 0) {
719 cerr << "unable to write label for " << devs.front() << ": "
720 << cpp_strerror(r) << std::endl;
721 exit(EXIT_FAILURE);
7c673cae 722 }
3efd9988
FG
723 }
724 else if (action == "rm-label-key") {
725 bluestore_bdev_label_t label;
726 int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label);
7c673cae 727 if (r < 0) {
3efd9988
FG
728 cerr << "unable to read label for " << devs.front() << ": "
729 << cpp_strerror(r) << std::endl;
730 exit(EXIT_FAILURE);
731 }
732 if (!label.meta.count(key)) {
733 cerr << "key '" << key << "' not present" << std::endl;
734 exit(EXIT_FAILURE);
735 }
736 label.meta.erase(key);
737 r = BlueStore::_write_bdev_label(cct.get(), devs.front(), label);
738 if (r < 0) {
739 cerr << "unable to write label for " << devs.front() << ": "
740 << cpp_strerror(r) << std::endl;
c07f9fc5 741 exit(EXIT_FAILURE);
7c673cae 742 }
3efd9988
FG
743 }
744 else if (action == "bluefs-bdev-sizes") {
1911f103
TL
745 BlueStore bluestore(cct.get(), path);
746 bluestore.dump_bluefs_sizes(cout);
3efd9988
FG
747 }
748 else if (action == "bluefs-bdev-expand") {
11fdf7f2
TL
749 BlueStore bluestore(cct.get(), path);
750 auto r = bluestore.expand_devices(cout);
751 if (r <0) {
752 cerr << "failed to expand bluestore devices: "
753 << cpp_strerror(r) << std::endl;
754 exit(EXIT_FAILURE);
3efd9988 755 }
3efd9988 756 }
20effc67
TL
757 else if (action == "bluefs-import") {
758 bluefs_import(input_file, dest_file, cct.get(), path, devs);
759 }
3efd9988 760 else if (action == "bluefs-export") {
f67539c2 761 BlueFS *fs = open_bluefs_readonly(cct.get(), path, devs);
7c673cae
FG
762
763 vector<string> dirs;
3efd9988 764 int r = fs->readdir("", &dirs);
7c673cae
FG
765 if (r < 0) {
766 cerr << "readdir in root failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 767 exit(EXIT_FAILURE);
7c673cae 768 }
11fdf7f2
TL
769
770 if (::access(out_dir.c_str(), F_OK)) {
771 r = ::mkdir(out_dir.c_str(), 0755);
772 if (r < 0) {
773 r = -errno;
774 cerr << "mkdir " << out_dir << " failed: " << cpp_strerror(r) << std::endl;
775 exit(EXIT_FAILURE);
776 }
777 }
778
7c673cae
FG
779 for (auto& dir : dirs) {
780 if (dir[0] == '.')
781 continue;
782 cout << dir << "/" << std::endl;
783 vector<string> ls;
3efd9988 784 r = fs->readdir(dir, &ls);
7c673cae
FG
785 if (r < 0) {
786 cerr << "readdir " << dir << " failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 787 exit(EXIT_FAILURE);
7c673cae
FG
788 }
789 string full = out_dir + "/" + dir;
11fdf7f2
TL
790 if (::access(full.c_str(), F_OK)) {
791 r = ::mkdir(full.c_str(), 0755);
792 if (r < 0) {
793 r = -errno;
794 cerr << "mkdir " << full << " failed: " << cpp_strerror(r) << std::endl;
795 exit(EXIT_FAILURE);
796 }
7c673cae
FG
797 }
798 for (auto& file : ls) {
799 if (file[0] == '.')
800 continue;
801 cout << dir << "/" << file << std::endl;
802 uint64_t size;
803 utime_t mtime;
3efd9988 804 r = fs->stat(dir, file, &size, &mtime);
7c673cae
FG
805 if (r < 0) {
806 cerr << "stat " << file << " failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 807 exit(EXIT_FAILURE);
7c673cae
FG
808 }
809 string path = out_dir + "/" + dir + "/" + file;
91327a77 810 int fd = ::open(path.c_str(), O_CREAT|O_WRONLY|O_TRUNC|O_CLOEXEC, 0644);
7c673cae
FG
811 if (fd < 0) {
812 r = -errno;
813 cerr << "open " << path << " failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 814 exit(EXIT_FAILURE);
7c673cae 815 }
7c673cae
FG
816 if (size > 0) {
817 BlueFS::FileReader *h;
3efd9988 818 r = fs->open_for_read(dir, file, &h, false);
7c673cae
FG
819 if (r < 0) {
820 cerr << "open_for_read " << dir << "/" << file << " failed: "
821 << cpp_strerror(r) << std::endl;
c07f9fc5 822 exit(EXIT_FAILURE);
7c673cae
FG
823 }
824 int pos = 0;
825 int left = size;
826 while (left) {
827 bufferlist bl;
f67539c2 828 r = fs->read(h, pos, left, &bl, NULL);
7c673cae
FG
829 if (r <= 0) {
830 cerr << "read " << dir << "/" << file << " from " << pos
831 << " failed: " << cpp_strerror(r) << std::endl;
c07f9fc5 832 exit(EXIT_FAILURE);
7c673cae
FG
833 }
834 int rc = bl.write_fd(fd);
835 if (rc < 0) {
836 cerr << "write to " << path << " failed: "
837 << cpp_strerror(r) << std::endl;
c07f9fc5 838 exit(EXIT_FAILURE);
7c673cae
FG
839 }
840 pos += r;
841 left -= r;
842 }
843 delete h;
844 }
845 ::close(fd);
846 }
847 }
3efd9988
FG
848 fs->umount();
849 delete fs;
11fdf7f2
TL
850 } else if (action == "bluefs-log-dump") {
851 log_dump(cct.get(), path, devs);
852 } else if (action == "bluefs-bdev-new-db" || action == "bluefs-bdev-new-wal") {
853 map<string, int> cur_devs_map;
854 bool need_db = action == "bluefs-bdev-new-db";
855
856 bool has_wal = false;
857 bool has_db = false;
11fdf7f2
TL
858
859 parse_devices(cct.get(), devs, &cur_devs_map, &has_db, &has_wal);
860
861 if (has_db && has_wal) {
862 cerr << "can't allocate new device, both WAL and DB exist"
863 << std::endl;
864 exit(EXIT_FAILURE);
865 } else if (need_db && has_db) {
866 cerr << "can't allocate new DB device, already exists"
867 << std::endl;
868 exit(EXIT_FAILURE);
869 } else if (!need_db && has_wal) {
870 cerr << "can't allocate new WAL device, already exists"
871 << std::endl;
872 exit(EXIT_FAILURE);
11fdf7f2
TL
873 }
874
20effc67
TL
875 auto [target_path, has_size_spec] =
876 [&dev_target]() -> std::pair<string, bool> {
877 if (dev_target.empty()) {
878 return {"", false};
879 }
880 std::error_code ec;
881 fs::path target_path = fs::weakly_canonical(fs::path{dev_target}, ec);
882 if (ec) {
883 cerr << "failed to retrieve absolute path for " << dev_target
884 << ": " << ec.message()
885 << std::endl;
886 exit(EXIT_FAILURE);
887 }
888 return {target_path.native(),
889 (fs::exists(target_path) &&
890 fs::is_regular_file(target_path) &&
891 fs::file_size(target_path) > 0)};
892 }();
522d829b 893 // Attach either DB or WAL volume, create if needed
522d829b 894 // check if we need additional size specification
20effc67 895 if (!has_size_spec) {
522d829b
TL
896 if (need_db && cct->_conf->bluestore_block_db_size == 0) {
897 cerr << "Might need DB size specification, "
898 "please set Ceph bluestore-block-db-size config parameter "
899 << std::endl;
20effc67 900 return EXIT_FAILURE;
522d829b
TL
901 } else if (!need_db && cct->_conf->bluestore_block_wal_size == 0) {
902 cerr << "Might need WAL size specification, "
903 "please set Ceph bluestore-block-wal-size config parameter "
904 << std::endl;
20effc67 905 return EXIT_FAILURE;
522d829b
TL
906 }
907 }
20effc67
TL
908 BlueStore bluestore(cct.get(), path);
909 int r = bluestore.add_new_bluefs_device(
910 need_db ? BlueFS::BDEV_NEWDB : BlueFS::BDEV_NEWWAL,
911 target_path);
522d829b 912 if (r == 0) {
20effc67
TL
913 cout << (need_db ? "DB" : "WAL") << " device added " << target_path
914 << std::endl;
915 } else {
916 cerr << "failed to add " << (need_db ? "DB" : "WAL") << " device:"
917 << cpp_strerror(r)
918 << std::endl;
11fdf7f2 919 }
522d829b 920 return r;
11fdf7f2
TL
921 } else if (action == "bluefs-bdev-migrate") {
922 map<string, int> cur_devs_map;
923 set<int> src_dev_ids;
924 map<string, int> src_devs;
925
926 parse_devices(cct.get(), devs, &cur_devs_map, nullptr, nullptr);
927 for (auto& s : devs_source) {
928 auto i = cur_devs_map.find(s);
929 if (i != cur_devs_map.end()) {
494da23a
TL
930 if (s == dev_target) {
931 cerr << "Device " << dev_target
932 << " is present in both source and target lists, omitted."
933 << std::endl;
934 } else {
935 src_devs.emplace(*i);
936 src_dev_ids.emplace(i->second);
937 }
11fdf7f2
TL
938 } else {
939 cerr << "can't migrate " << s << ", not a valid bluefs volume "
940 << std::endl;
941 exit(EXIT_FAILURE);
942 }
943 }
944
945 auto i = cur_devs_map.find(dev_target);
946
947 if (i != cur_devs_map.end()) {
948 // Migrate to an existing BlueFS volume
949
950 auto dev_target_id = i->second;
951 if (dev_target_id == BlueFS::BDEV_WAL) {
952 // currently we're unable to migrate to WAL device since there is no space
953 // reserved for superblock
954 cerr << "Migrate to WAL device isn't supported." << std::endl;
955 exit(EXIT_FAILURE);
956 }
957
11fdf7f2
TL
958 BlueStore bluestore(cct.get(), path);
959 int r = bluestore.migrate_to_existing_bluefs_device(
960 src_dev_ids,
961 dev_target_id);
962 if (r == 0) {
963 for(auto src : src_devs) {
964 if (src.second != BlueFS::BDEV_SLOW) {
965 cout << " device removed:" << src.second << " " << src.first
966 << std::endl;
967 }
968 }
969 } else {
494da23a 970 bool need_db = dev_target_id == BlueFS::BDEV_DB;
11fdf7f2 971 cerr << "failed to migrate to existing BlueFS device: "
494da23a 972 << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_WAL)
11fdf7f2
TL
973 << " " << dev_target
974 << cpp_strerror(r)
975 << std::endl;
976 }
494da23a 977 return r;
11fdf7f2
TL
978 } else {
979 // Migrate to a new BlueFS volume
980 // via creating either DB or WAL volume
981 char target_path[PATH_MAX] = "";
982 int dev_target_id;
983 if (src_dev_ids.count(BlueFS::BDEV_DB)) {
984 // if we have DB device in the source list - we create DB device
985 // (and may be remove WAL).
986 dev_target_id = BlueFS::BDEV_NEWDB;
987 } else if (src_dev_ids.count(BlueFS::BDEV_WAL)) {
988 dev_target_id = BlueFS::BDEV_NEWWAL;
989 } else {
990 cerr << "Unable to migrate Slow volume to new location, "
991 "please allocate new DB or WAL with "
992 "--bluefs-bdev-new-db(wal) command"
993 << std::endl;
994 exit(EXIT_FAILURE);
995 }
996 if(!dev_target.empty() &&
997 realpath(dev_target.c_str(), target_path) == nullptr) {
998 cerr << "failed to retrieve absolute path for " << dev_target
999 << ": " << cpp_strerror(errno)
1000 << std::endl;
1001 exit(EXIT_FAILURE);
1002 }
1003
1004 BlueStore bluestore(cct.get(), path);
1005
1006 bool need_db = dev_target_id == BlueFS::BDEV_NEWDB;
1007 int r = bluestore.migrate_to_new_bluefs_device(
1008 src_dev_ids,
1009 dev_target_id,
1010 target_path);
1011 if (r == 0) {
1012 for(auto src : src_devs) {
1013 if (src.second != BlueFS::BDEV_SLOW) {
1014 cout << " device removed:" << src.second << " " << src.first
1015 << std::endl;
1016 }
1017 }
1018 cout << " device added: "
1019 << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_DB)
1020 << " " << target_path
1021 << std::endl;
1022 } else {
1023 cerr << "failed to migrate to new BlueFS device: "
1024 << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_DB)
1025 << " " << target_path
1026 << cpp_strerror(r)
1027 << std::endl;
1028 }
1029 return r;
1030 }
20effc67 1031 } else if (action == "free-dump" || action == "free-score" || action == "fragmentation") {
eafe8130
TL
1032 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
1033 ceph_assert(admin_socket);
20effc67
TL
1034 std::string action_name = action == "free-dump" ? "dump" :
1035 action == "free-score" ? "score" : "fragmentation";
eafe8130
TL
1036 validate_path(cct.get(), path, false);
1037 BlueStore bluestore(cct.get(), path);
1038 int r = bluestore.cold_open();
1039 if (r < 0) {
1040 cerr << "error from cold_open: " << cpp_strerror(r) << std::endl;
1041 exit(EXIT_FAILURE);
1042 }
1043
1044 for (auto alloc_name : allocs_name) {
9f95a23c
TL
1045 ceph::bufferlist in, out;
1046 ostringstream err;
f6b5b4d7 1047 int r = admin_socket->execute_command(
9f95a23c
TL
1048 {"{\"prefix\": \"bluestore allocator " + action_name + " " + alloc_name + "\"}"},
1049 in, err, &out);
f6b5b4d7 1050 if (r != 0) {
eafe8130 1051 cerr << "failure querying '" << alloc_name << "'" << std::endl;
20effc67
TL
1052 } else {
1053 cout << alloc_name << ":" << std::endl;
1054 cout << std::string(out.c_str(),out.length()) << std::endl;
eafe8130 1055 }
eafe8130
TL
1056 }
1057
1058 bluestore.cold_close();
f6b5b4d7
TL
1059 } else if (action == "bluefs-stats") {
1060 AdminSocket* admin_socket = g_ceph_context->get_admin_socket();
1061 ceph_assert(admin_socket);
1062 validate_path(cct.get(), path, false);
1063 BlueStore bluestore(cct.get(), path);
1064 int r = bluestore.cold_open();
1065 if (r < 0) {
1066 cerr << "error from cold_open: " << cpp_strerror(r) << std::endl;
1067 exit(EXIT_FAILURE);
1068 }
1069
1070 ceph::bufferlist in, out;
1071 ostringstream err;
1072 r = admin_socket->execute_command(
1073 { "{\"prefix\": \"bluefs stats\"}" },
1074 in, err, &out);
1075 if (r != 0) {
1076 cerr << "failure querying bluefs stats: " << cpp_strerror(r) << std::endl;
1077 exit(EXIT_FAILURE);
1078 }
1079 cout << std::string(out.c_str(), out.length()) << std::endl;
1080 bluestore.cold_close();
f67539c2
TL
1081 } else if (action == "reshard") {
1082 auto get_ctrl = [&](size_t& val) {
1083 if (!resharding_ctrl.empty()) {
1084 size_t pos;
1085 std::string token;
1086 pos = resharding_ctrl.find('/');
1087 token = resharding_ctrl.substr(0, pos);
1088 if (pos != std::string::npos)
1089 resharding_ctrl.erase(0, pos + 1);
1090 else
1091 resharding_ctrl.erase();
1092 char* endptr;
1093 val = strtoll(token.c_str(), &endptr, 0);
1094 if (*endptr != '\0') {
1095 cerr << "invalid --resharding-ctrl. '" << token << "' is not a number" << std::endl;
1096 exit(EXIT_FAILURE);
1097 }
1098 }
1099 };
1100 BlueStore bluestore(cct.get(), path);
1101 KeyValueDB *db_ptr;
1102 RocksDBStore::resharding_ctrl ctrl;
1103 if (!resharding_ctrl.empty()) {
1104 get_ctrl(ctrl.bytes_per_iterator);
1105 get_ctrl(ctrl.keys_per_iterator);
1106 get_ctrl(ctrl.bytes_per_batch);
1107 get_ctrl(ctrl.keys_per_batch);
1108 if (!resharding_ctrl.empty()) {
1109 cerr << "extra chars in --resharding-ctrl" << std::endl;
1110 exit(EXIT_FAILURE);
1111 }
1112 }
1113 int r = bluestore.open_db_environment(&db_ptr, true);
1114 if (r < 0) {
1115 cerr << "error preparing db environment: " << cpp_strerror(r) << std::endl;
1116 exit(EXIT_FAILURE);
1117 }
1118 ceph_assert(db_ptr);
1119 RocksDBStore* rocks_db = dynamic_cast<RocksDBStore*>(db_ptr);
1120 ceph_assert(rocks_db);
1121 r = rocks_db->reshard(new_sharding, &ctrl);
1122 if (r < 0) {
1123 cerr << "error resharding: " << cpp_strerror(r) << std::endl;
1124 } else {
1125 cout << "reshard success" << std::endl;
1126 }
1127 bluestore.close_db_environment();
1128 } else if (action == "show-sharding") {
1129 BlueStore bluestore(cct.get(), path);
1130 KeyValueDB *db_ptr;
1131 int r = bluestore.open_db_environment(&db_ptr, false);
1132 if (r < 0) {
1133 cerr << "error preparing db environment: " << cpp_strerror(r) << std::endl;
1134 exit(EXIT_FAILURE);
1135 }
1136 ceph_assert(db_ptr);
1137 RocksDBStore* rocks_db = dynamic_cast<RocksDBStore*>(db_ptr);
1138 ceph_assert(rocks_db);
1139 std::string sharding;
1140 bool res = rocks_db->get_sharding(sharding);
1141 bluestore.close_db_environment();
1142 if (!res) {
1143 cerr << "failed to retrieve sharding def" << std::endl;
1144 exit(EXIT_FAILURE);
1145 }
1146 cout << sharding << std::endl;
7c673cae
FG
1147 } else {
1148 cerr << "unrecognized action " << action << std::endl;
1149 return 1;
1150 }
1151
1152 return 0;
1153}