]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <boost/program_options/variables_map.hpp> | |
5 | #include <boost/program_options/parsers.hpp> | |
6 | ||
7 | #include <stdio.h> | |
8 | #include <string.h> | |
9 | #include <iostream> | |
10 | #include <time.h> | |
11 | #include <fcntl.h> | |
12 | #include <unistd.h> | |
13 | #include "global/global_init.h" | |
14 | #include "common/ceph_argparse.h" | |
15 | #include "include/stringify.h" | |
16 | #include "common/errno.h" | |
3efd9988 | 17 | #include "common/safe_io.h" |
7c673cae FG |
18 | |
19 | #include "os/bluestore/BlueFS.h" | |
20 | #include "os/bluestore/BlueStore.h" | |
eafe8130 | 21 | #include "common/admin_socket.h" |
f67539c2 | 22 | #include "kv/RocksDBStore.h" |
7c673cae FG |
23 | |
24 | namespace po = boost::program_options; | |
25 | ||
26 | void usage(po::options_description &desc) | |
27 | { | |
28 | cout << desc << std::endl; | |
29 | } | |
30 | ||
c07f9fc5 FG |
31 | void validate_path(CephContext *cct, const string& path, bool bluefs) |
32 | { | |
33 | BlueStore bluestore(cct, path); | |
34 | string type; | |
35 | int r = bluestore.read_meta("type", &type); | |
36 | if (r < 0) { | |
37 | cerr << "failed to load os-type: " << cpp_strerror(r) << std::endl; | |
38 | exit(EXIT_FAILURE); | |
39 | } | |
40 | if (type != "bluestore") { | |
41 | cerr << "expected bluestore, but type is " << type << std::endl; | |
42 | exit(EXIT_FAILURE); | |
43 | } | |
44 | if (!bluefs) { | |
45 | return; | |
46 | } | |
47 | ||
48 | string kv_backend; | |
49 | r = bluestore.read_meta("kv_backend", &kv_backend); | |
50 | if (r < 0) { | |
51 | cerr << "failed to load kv_backend: " << cpp_strerror(r) << std::endl; | |
52 | exit(EXIT_FAILURE); | |
53 | } | |
54 | if (kv_backend != "rocksdb") { | |
55 | cerr << "expect kv_backend to be rocksdb, but is " << kv_backend | |
56 | << std::endl; | |
57 | exit(EXIT_FAILURE); | |
58 | } | |
59 | string bluefs_enabled; | |
60 | r = bluestore.read_meta("bluefs", &bluefs_enabled); | |
61 | if (r < 0) { | |
62 | cerr << "failed to load do_bluefs: " << cpp_strerror(r) << std::endl; | |
63 | exit(EXIT_FAILURE); | |
64 | } | |
65 | if (bluefs_enabled != "1") { | |
66 | cerr << "bluefs not enabled for rocksdb" << std::endl; | |
67 | exit(EXIT_FAILURE); | |
68 | } | |
69 | } | |
70 | ||
f64942e4 AA |
71 | const char* find_device_path( |
72 | int id, | |
73 | CephContext *cct, | |
74 | const vector<string>& devs) | |
75 | { | |
76 | for (auto& i : devs) { | |
77 | bluestore_bdev_label_t label; | |
78 | int r = BlueStore::_read_bdev_label(cct, i, &label); | |
79 | if (r < 0) { | |
80 | cerr << "unable to read label for " << i << ": " | |
81 | << cpp_strerror(r) << std::endl; | |
82 | exit(EXIT_FAILURE); | |
83 | } | |
84 | if ((id == BlueFS::BDEV_SLOW && label.description == "main") || | |
85 | (id == BlueFS::BDEV_DB && label.description == "bluefs db") || | |
86 | (id == BlueFS::BDEV_WAL && label.description == "bluefs wal")) { | |
87 | return i.c_str(); | |
88 | } | |
89 | } | |
90 | return nullptr; | |
91 | } | |
92 | ||
11fdf7f2 | 93 | void parse_devices( |
3efd9988 | 94 | CephContext *cct, |
11fdf7f2 TL |
95 | const vector<string>& devs, |
96 | map<string, int>* got, | |
97 | bool* has_db, | |
98 | bool* has_wal) | |
3efd9988 | 99 | { |
3efd9988 | 100 | string main; |
11fdf7f2 TL |
101 | bool was_db = false; |
102 | if (has_wal) { | |
103 | *has_wal = false; | |
104 | } | |
105 | if (has_db) { | |
106 | *has_db = false; | |
107 | } | |
108 | for (auto& d : devs) { | |
3efd9988 | 109 | bluestore_bdev_label_t label; |
11fdf7f2 | 110 | int r = BlueStore::_read_bdev_label(cct, d, &label); |
3efd9988 | 111 | if (r < 0) { |
11fdf7f2 | 112 | cerr << "unable to read label for " << d << ": " |
3efd9988 FG |
113 | << cpp_strerror(r) << std::endl; |
114 | exit(EXIT_FAILURE); | |
115 | } | |
116 | int id = -1; | |
117 | if (label.description == "main") | |
11fdf7f2 TL |
118 | main = d; |
119 | else if (label.description == "bluefs db") { | |
3efd9988 | 120 | id = BlueFS::BDEV_DB; |
11fdf7f2 TL |
121 | was_db = true; |
122 | if (has_db) { | |
123 | *has_db = true; | |
124 | } | |
125 | } | |
126 | else if (label.description == "bluefs wal") { | |
3efd9988 | 127 | id = BlueFS::BDEV_WAL; |
11fdf7f2 TL |
128 | if (has_wal) { |
129 | *has_wal = true; | |
3efd9988 FG |
130 | } |
131 | } | |
11fdf7f2 TL |
132 | if (id >= 0) { |
133 | got->emplace(d, id); | |
134 | } | |
3efd9988 FG |
135 | } |
136 | if (main.length()) { | |
11fdf7f2 TL |
137 | int id = was_db ? BlueFS::BDEV_SLOW : BlueFS::BDEV_DB; |
138 | got->emplace(main, id); | |
139 | } | |
140 | } | |
141 | ||
142 | void add_devices( | |
143 | BlueFS *fs, | |
144 | CephContext *cct, | |
145 | const vector<string>& devs) | |
146 | { | |
147 | map<string, int> got; | |
148 | parse_devices(cct, devs, &got, nullptr, nullptr); | |
149 | for(auto e : got) { | |
150 | char target_path[PATH_MAX] = ""; | |
151 | if(!e.first.empty()) { | |
152 | if (realpath(e.first.c_str(), target_path) == nullptr) { | |
153 | cerr << "failed to retrieve absolute path for " << e.first | |
154 | << ": " << cpp_strerror(errno) | |
155 | << std::endl; | |
156 | } | |
157 | } | |
158 | ||
159 | cout << " slot " << e.second << " " << e.first; | |
160 | if (target_path[0]) { | |
161 | cout << " -> " << target_path; | |
162 | } | |
163 | cout << std::endl; | |
f67539c2 TL |
164 | |
165 | // We provide no shared allocator which prevents bluefs to operate in R/W mode. | |
166 | // Read-only mode isn't strictly enforced though | |
167 | int r = fs->add_block_device(e.second, e.first, false, 0); // 'reserved' is fake | |
3efd9988 | 168 | if (r < 0) { |
11fdf7f2 | 169 | cerr << "unable to open " << e.first << ": " << cpp_strerror(r) << std::endl; |
3efd9988 FG |
170 | exit(EXIT_FAILURE); |
171 | } | |
172 | } | |
11fdf7f2 TL |
173 | } |
174 | ||
f67539c2 | 175 | BlueFS *open_bluefs_readonly( |
11fdf7f2 TL |
176 | CephContext *cct, |
177 | const string& path, | |
178 | const vector<string>& devs) | |
179 | { | |
180 | validate_path(cct, path, true); | |
181 | BlueFS *fs = new BlueFS(cct); | |
182 | ||
183 | add_devices(fs, cct, devs); | |
3efd9988 FG |
184 | |
185 | int r = fs->mount(); | |
186 | if (r < 0) { | |
187 | cerr << "unable to mount bluefs: " << cpp_strerror(r) | |
188 | << std::endl; | |
189 | exit(EXIT_FAILURE); | |
190 | } | |
191 | return fs; | |
192 | } | |
193 | ||
11fdf7f2 TL |
194 | void log_dump( |
195 | CephContext *cct, | |
196 | const string& path, | |
197 | const vector<string>& devs) | |
198 | { | |
f67539c2 TL |
199 | validate_path(cct, path, true); |
200 | BlueFS *fs = new BlueFS(cct); | |
201 | ||
202 | add_devices(fs, cct, devs); | |
11fdf7f2 TL |
203 | int r = fs->log_dump(); |
204 | if (r < 0) { | |
205 | cerr << "log_dump failed" << ": " | |
206 | << cpp_strerror(r) << std::endl; | |
207 | exit(EXIT_FAILURE); | |
208 | } | |
209 | ||
210 | delete fs; | |
211 | } | |
212 | ||
213 | void inferring_bluefs_devices(vector<string>& devs, std::string& path) | |
214 | { | |
215 | cout << "inferring bluefs devices from bluestore path" << std::endl; | |
216 | for (auto fn : {"block", "block.wal", "block.db"}) { | |
217 | string p = path + "/" + fn; | |
218 | struct stat st; | |
219 | if (::stat(p.c_str(), &st) == 0) { | |
220 | devs.push_back(p); | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
7c673cae FG |
225 | int main(int argc, char **argv) |
226 | { | |
227 | string out_dir; | |
228 | vector<string> devs; | |
11fdf7f2 TL |
229 | vector<string> devs_source; |
230 | string dev_target; | |
7c673cae FG |
231 | string path; |
232 | string action; | |
3efd9988 FG |
233 | string log_file; |
234 | string key, value; | |
eafe8130 | 235 | vector<string> allocs_name; |
f67539c2 TL |
236 | string empty_sharding(1, '\0'); |
237 | string new_sharding = empty_sharding; | |
238 | string resharding_ctrl; | |
3efd9988 | 239 | int log_level = 30; |
31f18b77 | 240 | bool fsck_deep = false; |
7c673cae FG |
241 | po::options_description po_options("Options"); |
242 | po_options.add_options() | |
243 | ("help,h", "produce help message") | |
244 | ("path", po::value<string>(&path), "bluestore path") | |
245 | ("out-dir", po::value<string>(&out_dir), "output directory") | |
3efd9988 FG |
246 | ("log-file,l", po::value<string>(&log_file), "log file") |
247 | ("log-level", po::value<int>(&log_level), "log level (30=most, 20=lots, 10=some, 1=little)") | |
7c673cae | 248 | ("dev", po::value<vector<string>>(&devs), "device(s)") |
11fdf7f2 TL |
249 | ("devs-source", po::value<vector<string>>(&devs_source), "bluefs-dev-migrate source device(s)") |
250 | ("dev-target", po::value<string>(&dev_target), "target/resulting device") | |
7c673cae | 251 | ("deep", po::value<bool>(&fsck_deep), "deep fsck (read all data)") |
3efd9988 FG |
252 | ("key,k", po::value<string>(&key), "label metadata key name") |
253 | ("value,v", po::value<string>(&value), "label metadata value") | |
eafe8130 | 254 | ("allocator", po::value<vector<string>>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'/'bluefs-slow'") |
f67539c2 TL |
255 | ("sharding", po::value<string>(&new_sharding), "new sharding to apply") |
256 | ("resharding-ctrl", po::value<string>(&resharding_ctrl), "gives control over resharding procedure details") | |
7c673cae FG |
257 | ; |
258 | po::options_description po_positional("Positional options"); | |
259 | po_positional.add_options() | |
eafe8130 TL |
260 | ("command", po::value<string>(&action), |
261 | "fsck, " | |
262 | "repair, " | |
263 | "quick-fix, " | |
264 | "bluefs-export, " | |
265 | "bluefs-bdev-sizes, " | |
266 | "bluefs-bdev-expand, " | |
267 | "bluefs-bdev-new-db, " | |
268 | "bluefs-bdev-new-wal, " | |
269 | "bluefs-bdev-migrate, " | |
270 | "show-label, " | |
271 | "set-label-key, " | |
272 | "rm-label-key, " | |
273 | "prime-osd-dir, " | |
274 | "bluefs-log-dump, " | |
275 | "free-dump, " | |
f6b5b4d7 | 276 | "free-score, " |
f67539c2 TL |
277 | "bluefs-stats, " |
278 | "reshard, " | |
279 | "show-sharding") | |
7c673cae FG |
280 | ; |
281 | po::options_description po_all("All options"); | |
282 | po_all.add(po_options).add(po_positional); | |
283 | po::positional_options_description pd; | |
284 | pd.add("command", 1); | |
285 | ||
286 | vector<string> ceph_option_strings; | |
287 | po::variables_map vm; | |
288 | try { | |
289 | po::parsed_options parsed = | |
290 | po::command_line_parser(argc, argv).options(po_all).allow_unregistered().positional(pd).run(); | |
291 | po::store( parsed, vm); | |
292 | po::notify(vm); | |
293 | ceph_option_strings = po::collect_unrecognized(parsed.options, | |
294 | po::include_positional); | |
295 | } catch(po::error &e) { | |
296 | std::cerr << e.what() << std::endl; | |
c07f9fc5 | 297 | exit(EXIT_FAILURE); |
7c673cae | 298 | } |
11fdf7f2 TL |
299 | // normalize path (remove ending '/' if any) |
300 | if (path.size() > 1 && *(path.end() - 1) == '/') { | |
301 | path.resize(path.size() - 1); | |
302 | } | |
7c673cae FG |
303 | if (vm.count("help")) { |
304 | usage(po_all); | |
c07f9fc5 | 305 | exit(EXIT_SUCCESS); |
7c673cae FG |
306 | } |
307 | if (action.empty()) { | |
308 | cerr << "must specify an action; --help for help" << std::endl; | |
c07f9fc5 | 309 | exit(EXIT_FAILURE); |
7c673cae FG |
310 | } |
311 | ||
eafe8130 | 312 | if (action == "fsck" || action == "repair" || action == "quick-fix") { |
7c673cae FG |
313 | if (path.empty()) { |
314 | cerr << "must specify bluestore path" << std::endl; | |
c07f9fc5 | 315 | exit(EXIT_FAILURE); |
7c673cae FG |
316 | } |
317 | } | |
3efd9988 FG |
318 | if (action == "prime-osd-dir") { |
319 | if (devs.size() != 1) { | |
320 | cerr << "must specify the main bluestore device" << std::endl; | |
321 | exit(EXIT_FAILURE); | |
322 | } | |
323 | if (path.empty()) { | |
324 | cerr << "must specify osd dir to prime" << std::endl; | |
325 | exit(EXIT_FAILURE); | |
326 | } | |
327 | } | |
328 | if (action == "set-label-key" || | |
329 | action == "rm-label-key") { | |
330 | if (devs.size() != 1) { | |
331 | cerr << "must specify the main bluestore device" << std::endl; | |
332 | exit(EXIT_FAILURE); | |
333 | } | |
334 | if (key.size() == 0) { | |
335 | cerr << "must specify a key name with -k" << std::endl; | |
336 | exit(EXIT_FAILURE); | |
337 | } | |
338 | if (action == "set-label-key" && value.size() == 0) { | |
339 | cerr << "must specify a value with -v" << std::endl; | |
340 | exit(EXIT_FAILURE); | |
341 | } | |
342 | } | |
c07f9fc5 | 343 | if (action == "show-label") { |
7c673cae FG |
344 | if (devs.empty() && path.empty()) { |
345 | cerr << "must specify bluestore path *or* raw device(s)" << std::endl; | |
c07f9fc5 | 346 | exit(EXIT_FAILURE); |
7c673cae | 347 | } |
11fdf7f2 TL |
348 | if (devs.empty()) |
349 | inferring_bluefs_devices(devs, path); | |
7c673cae | 350 | } |
11fdf7f2 | 351 | if (action == "bluefs-export" || action == "bluefs-log-dump") { |
c07f9fc5 FG |
352 | if (path.empty()) { |
353 | cerr << "must specify bluestore path" << std::endl; | |
354 | exit(EXIT_FAILURE); | |
355 | } | |
11fdf7f2 | 356 | if ((action == "bluefs-export") && out_dir.empty()) { |
c07f9fc5 FG |
357 | cerr << "must specify out-dir to export bluefs" << std::endl; |
358 | exit(EXIT_FAILURE); | |
359 | } | |
11fdf7f2 | 360 | inferring_bluefs_devices(devs, path); |
c07f9fc5 | 361 | } |
3efd9988 FG |
362 | if (action == "bluefs-bdev-sizes" || action == "bluefs-bdev-expand") { |
363 | if (path.empty()) { | |
364 | cerr << "must specify bluestore path" << std::endl; | |
365 | exit(EXIT_FAILURE); | |
366 | } | |
11fdf7f2 TL |
367 | inferring_bluefs_devices(devs, path); |
368 | } | |
369 | if (action == "bluefs-bdev-new-db" || action == "bluefs-bdev-new-wal") { | |
370 | if (path.empty()) { | |
371 | cerr << "must specify bluestore path" << std::endl; | |
372 | exit(EXIT_FAILURE); | |
373 | } | |
374 | if (dev_target.empty()) { | |
375 | cout << "NOTICE: --dev-target option omitted, will allocate as a file" << std::endl; | |
376 | } | |
377 | inferring_bluefs_devices(devs, path); | |
378 | } | |
379 | if (action == "bluefs-bdev-migrate") { | |
380 | if (path.empty()) { | |
381 | cerr << "must specify bluestore path" << std::endl; | |
382 | exit(EXIT_FAILURE); | |
383 | } | |
384 | inferring_bluefs_devices(devs, path); | |
385 | if (devs_source.size() == 0) { | |
386 | cerr << "must specify source devices with --devs-source" << std::endl; | |
387 | exit(EXIT_FAILURE); | |
388 | } | |
389 | if (dev_target.empty()) { | |
390 | cerr << "must specify target device with --dev-target" << std::endl; | |
391 | exit(EXIT_FAILURE); | |
3efd9988 FG |
392 | } |
393 | } | |
eafe8130 TL |
394 | if (action == "free-score" || action == "free-dump") { |
395 | if (path.empty()) { | |
396 | cerr << "must specify bluestore path" << std::endl; | |
397 | exit(EXIT_FAILURE); | |
398 | } | |
399 | for (auto name : allocs_name) { | |
400 | if (!name.empty() && | |
401 | name != "block" && | |
402 | name != "bluefs-db" && | |
403 | name != "bluefs-wal" && | |
404 | name != "bluefs-slow") { | |
405 | cerr << "unknown allocator '" << name << "'" << std::endl; | |
406 | exit(EXIT_FAILURE); | |
407 | } | |
408 | } | |
409 | if (allocs_name.empty()) | |
410 | allocs_name = vector<string>{"block", "bluefs-db", "bluefs-wal", "bluefs-slow"}; | |
411 | } | |
f67539c2 TL |
412 | if (action == "reshard") { |
413 | if (path.empty()) { | |
414 | cerr << "must specify bluestore path" << std::endl; | |
415 | exit(EXIT_FAILURE); | |
416 | } | |
417 | if (new_sharding == empty_sharding) { | |
418 | cerr << "must provide reshard specification" << std::endl; | |
419 | exit(EXIT_FAILURE); | |
420 | } | |
421 | } | |
7c673cae | 422 | vector<const char*> args; |
3efd9988 FG |
423 | if (log_file.size()) { |
424 | args.push_back("--log-file"); | |
425 | args.push_back(log_file.c_str()); | |
426 | static char ll[10]; | |
427 | snprintf(ll, sizeof(ll), "%d", log_level); | |
428 | args.push_back("--debug-bluestore"); | |
429 | args.push_back(ll); | |
430 | args.push_back("--debug-bluefs"); | |
431 | args.push_back(ll); | |
f67539c2 TL |
432 | args.push_back("--debug-rocksdb"); |
433 | args.push_back(ll); | |
3efd9988 FG |
434 | } |
435 | args.push_back("--no-log-to-stderr"); | |
436 | args.push_back("--err-to-stderr"); | |
437 | ||
7c673cae FG |
438 | for (auto& i : ceph_option_strings) { |
439 | args.push_back(i.c_str()); | |
440 | } | |
7c673cae | 441 | auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, |
f64942e4 AA |
442 | CODE_ENVIRONMENT_UTILITY, |
443 | CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); | |
444 | ||
7c673cae FG |
445 | common_init_finish(cct.get()); |
446 | ||
7c673cae | 447 | if (action == "fsck" || |
eafe8130 TL |
448 | action == "repair" || |
449 | action == "quick-fix") { | |
c07f9fc5 | 450 | validate_path(cct.get(), path, false); |
7c673cae | 451 | BlueStore bluestore(cct.get(), path); |
3efd9988 FG |
452 | int r; |
453 | if (action == "fsck") { | |
454 | r = bluestore.fsck(fsck_deep); | |
eafe8130 | 455 | } else if (action == "repair") { |
3efd9988 | 456 | r = bluestore.repair(fsck_deep); |
eafe8130 TL |
457 | } else { |
458 | r = bluestore.quick_fix(); | |
3efd9988 | 459 | } |
7c673cae | 460 | if (r < 0) { |
9f95a23c | 461 | cerr << action << " failed: " << cpp_strerror(r) << std::endl; |
c07f9fc5 | 462 | exit(EXIT_FAILURE); |
11fdf7f2 | 463 | } else if (r > 0) { |
9f95a23c | 464 | cerr << action << " status: remaining " << r << " error(s) and warning(s)" << std::endl; |
11fdf7f2 TL |
465 | exit(EXIT_FAILURE); |
466 | } else { | |
467 | cout << action << " success" << std::endl; | |
7c673cae | 468 | } |
3efd9988 FG |
469 | } |
470 | else if (action == "prime-osd-dir") { | |
471 | bluestore_bdev_label_t label; | |
472 | int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label); | |
473 | if (r < 0) { | |
474 | cerr << "failed to read label for " << devs.front() << ": " | |
475 | << cpp_strerror(r) << std::endl; | |
476 | exit(EXIT_FAILURE); | |
477 | } | |
478 | ||
479 | // kludge some things into the map that we want to populate into | |
480 | // target dir | |
481 | label.meta["path_block"] = devs.front(); | |
482 | label.meta["type"] = "bluestore"; | |
483 | label.meta["fsid"] = stringify(label.osd_uuid); | |
484 | ||
485 | for (auto kk : { | |
486 | "whoami", | |
487 | "osd_key", | |
3efd9988 FG |
488 | "ceph_fsid", |
489 | "fsid", | |
490 | "type", | |
491 | "ready" }) { | |
492 | string k = kk; | |
493 | auto i = label.meta.find(k); | |
494 | if (i == label.meta.end()) { | |
495 | continue; | |
496 | } | |
497 | string p = path + "/" + k; | |
498 | string v = i->second; | |
499 | if (k == "osd_key") { | |
500 | p = path + "/keyring"; | |
501 | v = "[osd."; | |
502 | v += label.meta["whoami"]; | |
503 | v += "]\nkey = " + i->second; | |
504 | } | |
11fdf7f2 TL |
505 | v += "\n"; |
506 | int fd = ::open(p.c_str(), O_CREAT|O_TRUNC|O_WRONLY|O_CLOEXEC, 0600); | |
507 | if (fd < 0) { | |
508 | cerr << "error writing " << p << ": " << cpp_strerror(errno) | |
509 | << std::endl; | |
510 | exit(EXIT_FAILURE); | |
3efd9988 | 511 | } |
11fdf7f2 TL |
512 | int r = safe_write(fd, v.c_str(), v.size()); |
513 | if (r < 0) { | |
514 | cerr << "error writing to " << p << ": " << cpp_strerror(errno) | |
515 | << std::endl; | |
516 | exit(EXIT_FAILURE); | |
517 | } | |
518 | ::close(fd); | |
3efd9988 | 519 | } |
7c673cae FG |
520 | } |
521 | else if (action == "show-label") { | |
522 | JSONFormatter jf(true); | |
3efd9988 | 523 | jf.open_object_section("devices"); |
7c673cae FG |
524 | for (auto& i : devs) { |
525 | bluestore_bdev_label_t label; | |
526 | int r = BlueStore::_read_bdev_label(cct.get(), i, &label); | |
527 | if (r < 0) { | |
528 | cerr << "unable to read label for " << i << ": " | |
529 | << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 530 | exit(EXIT_FAILURE); |
7c673cae FG |
531 | } |
532 | jf.open_object_section(i.c_str()); | |
533 | label.dump(&jf); | |
534 | jf.close_section(); | |
535 | } | |
536 | jf.close_section(); | |
537 | jf.flush(cout); | |
538 | } | |
3efd9988 FG |
539 | else if (action == "set-label-key") { |
540 | bluestore_bdev_label_t label; | |
541 | int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label); | |
542 | if (r < 0) { | |
543 | cerr << "unable to read label for " << devs.front() << ": " | |
544 | << cpp_strerror(r) << std::endl; | |
545 | exit(EXIT_FAILURE); | |
7c673cae | 546 | } |
f64942e4 AA |
547 | if (key == "size") { |
548 | label.size = strtoull(value.c_str(), nullptr, 10); | |
549 | } else if (key =="osd_uuid") { | |
550 | label.osd_uuid.parse(value.c_str()); | |
551 | } else if (key =="btime") { | |
552 | uint64_t epoch; | |
553 | uint64_t nsec; | |
554 | int r = utime_t::parse_date(value.c_str(), &epoch, &nsec); | |
555 | if (r == 0) { | |
556 | label.btime = utime_t(epoch, nsec); | |
557 | } | |
558 | } else if (key =="description") { | |
559 | label.description = value; | |
560 | } else { | |
561 | label.meta[key] = value; | |
562 | } | |
3efd9988 FG |
563 | r = BlueStore::_write_bdev_label(cct.get(), devs.front(), label); |
564 | if (r < 0) { | |
565 | cerr << "unable to write label for " << devs.front() << ": " | |
566 | << cpp_strerror(r) << std::endl; | |
567 | exit(EXIT_FAILURE); | |
7c673cae | 568 | } |
3efd9988 FG |
569 | } |
570 | else if (action == "rm-label-key") { | |
571 | bluestore_bdev_label_t label; | |
572 | int r = BlueStore::_read_bdev_label(cct.get(), devs.front(), &label); | |
7c673cae | 573 | if (r < 0) { |
3efd9988 FG |
574 | cerr << "unable to read label for " << devs.front() << ": " |
575 | << cpp_strerror(r) << std::endl; | |
576 | exit(EXIT_FAILURE); | |
577 | } | |
578 | if (!label.meta.count(key)) { | |
579 | cerr << "key '" << key << "' not present" << std::endl; | |
580 | exit(EXIT_FAILURE); | |
581 | } | |
582 | label.meta.erase(key); | |
583 | r = BlueStore::_write_bdev_label(cct.get(), devs.front(), label); | |
584 | if (r < 0) { | |
585 | cerr << "unable to write label for " << devs.front() << ": " | |
586 | << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 587 | exit(EXIT_FAILURE); |
7c673cae | 588 | } |
3efd9988 FG |
589 | } |
590 | else if (action == "bluefs-bdev-sizes") { | |
1911f103 TL |
591 | BlueStore bluestore(cct.get(), path); |
592 | bluestore.dump_bluefs_sizes(cout); | |
3efd9988 FG |
593 | } |
594 | else if (action == "bluefs-bdev-expand") { | |
11fdf7f2 TL |
595 | BlueStore bluestore(cct.get(), path); |
596 | auto r = bluestore.expand_devices(cout); | |
597 | if (r <0) { | |
598 | cerr << "failed to expand bluestore devices: " | |
599 | << cpp_strerror(r) << std::endl; | |
600 | exit(EXIT_FAILURE); | |
3efd9988 | 601 | } |
3efd9988 FG |
602 | } |
603 | else if (action == "bluefs-export") { | |
f67539c2 | 604 | BlueFS *fs = open_bluefs_readonly(cct.get(), path, devs); |
7c673cae FG |
605 | |
606 | vector<string> dirs; | |
3efd9988 | 607 | int r = fs->readdir("", &dirs); |
7c673cae FG |
608 | if (r < 0) { |
609 | cerr << "readdir in root failed: " << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 610 | exit(EXIT_FAILURE); |
7c673cae | 611 | } |
11fdf7f2 TL |
612 | |
613 | if (::access(out_dir.c_str(), F_OK)) { | |
614 | r = ::mkdir(out_dir.c_str(), 0755); | |
615 | if (r < 0) { | |
616 | r = -errno; | |
617 | cerr << "mkdir " << out_dir << " failed: " << cpp_strerror(r) << std::endl; | |
618 | exit(EXIT_FAILURE); | |
619 | } | |
620 | } | |
621 | ||
7c673cae FG |
622 | for (auto& dir : dirs) { |
623 | if (dir[0] == '.') | |
624 | continue; | |
625 | cout << dir << "/" << std::endl; | |
626 | vector<string> ls; | |
3efd9988 | 627 | r = fs->readdir(dir, &ls); |
7c673cae FG |
628 | if (r < 0) { |
629 | cerr << "readdir " << dir << " failed: " << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 630 | exit(EXIT_FAILURE); |
7c673cae FG |
631 | } |
632 | string full = out_dir + "/" + dir; | |
11fdf7f2 TL |
633 | if (::access(full.c_str(), F_OK)) { |
634 | r = ::mkdir(full.c_str(), 0755); | |
635 | if (r < 0) { | |
636 | r = -errno; | |
637 | cerr << "mkdir " << full << " failed: " << cpp_strerror(r) << std::endl; | |
638 | exit(EXIT_FAILURE); | |
639 | } | |
7c673cae FG |
640 | } |
641 | for (auto& file : ls) { | |
642 | if (file[0] == '.') | |
643 | continue; | |
644 | cout << dir << "/" << file << std::endl; | |
645 | uint64_t size; | |
646 | utime_t mtime; | |
3efd9988 | 647 | r = fs->stat(dir, file, &size, &mtime); |
7c673cae FG |
648 | if (r < 0) { |
649 | cerr << "stat " << file << " failed: " << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 650 | exit(EXIT_FAILURE); |
7c673cae FG |
651 | } |
652 | string path = out_dir + "/" + dir + "/" + file; | |
91327a77 | 653 | int fd = ::open(path.c_str(), O_CREAT|O_WRONLY|O_TRUNC|O_CLOEXEC, 0644); |
7c673cae FG |
654 | if (fd < 0) { |
655 | r = -errno; | |
656 | cerr << "open " << path << " failed: " << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 657 | exit(EXIT_FAILURE); |
7c673cae | 658 | } |
7c673cae FG |
659 | if (size > 0) { |
660 | BlueFS::FileReader *h; | |
3efd9988 | 661 | r = fs->open_for_read(dir, file, &h, false); |
7c673cae FG |
662 | if (r < 0) { |
663 | cerr << "open_for_read " << dir << "/" << file << " failed: " | |
664 | << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 665 | exit(EXIT_FAILURE); |
7c673cae FG |
666 | } |
667 | int pos = 0; | |
668 | int left = size; | |
669 | while (left) { | |
670 | bufferlist bl; | |
f67539c2 | 671 | r = fs->read(h, pos, left, &bl, NULL); |
7c673cae FG |
672 | if (r <= 0) { |
673 | cerr << "read " << dir << "/" << file << " from " << pos | |
674 | << " failed: " << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 675 | exit(EXIT_FAILURE); |
7c673cae FG |
676 | } |
677 | int rc = bl.write_fd(fd); | |
678 | if (rc < 0) { | |
679 | cerr << "write to " << path << " failed: " | |
680 | << cpp_strerror(r) << std::endl; | |
c07f9fc5 | 681 | exit(EXIT_FAILURE); |
7c673cae FG |
682 | } |
683 | pos += r; | |
684 | left -= r; | |
685 | } | |
686 | delete h; | |
687 | } | |
688 | ::close(fd); | |
689 | } | |
690 | } | |
3efd9988 FG |
691 | fs->umount(); |
692 | delete fs; | |
11fdf7f2 TL |
693 | } else if (action == "bluefs-log-dump") { |
694 | log_dump(cct.get(), path, devs); | |
695 | } else if (action == "bluefs-bdev-new-db" || action == "bluefs-bdev-new-wal") { | |
696 | map<string, int> cur_devs_map; | |
697 | bool need_db = action == "bluefs-bdev-new-db"; | |
698 | ||
699 | bool has_wal = false; | |
700 | bool has_db = false; | |
701 | char target_path[PATH_MAX] = ""; | |
702 | ||
703 | parse_devices(cct.get(), devs, &cur_devs_map, &has_db, &has_wal); | |
704 | ||
705 | if (has_db && has_wal) { | |
706 | cerr << "can't allocate new device, both WAL and DB exist" | |
707 | << std::endl; | |
708 | exit(EXIT_FAILURE); | |
709 | } else if (need_db && has_db) { | |
710 | cerr << "can't allocate new DB device, already exists" | |
711 | << std::endl; | |
712 | exit(EXIT_FAILURE); | |
713 | } else if (!need_db && has_wal) { | |
714 | cerr << "can't allocate new WAL device, already exists" | |
715 | << std::endl; | |
716 | exit(EXIT_FAILURE); | |
717 | } else if(!dev_target.empty() && | |
718 | realpath(dev_target.c_str(), target_path) == nullptr) { | |
719 | cerr << "failed to retrieve absolute path for " << dev_target | |
720 | << ": " << cpp_strerror(errno) | |
721 | << std::endl; | |
722 | exit(EXIT_FAILURE); | |
723 | } | |
724 | ||
725 | // Create either DB or WAL volume | |
726 | int r = EXIT_FAILURE; | |
727 | if (need_db && cct->_conf->bluestore_block_db_size == 0) { | |
728 | cerr << "DB size isn't specified, " | |
729 | "please set Ceph bluestore-block-db-size config parameter " | |
730 | << std::endl; | |
731 | } else if (!need_db && cct->_conf->bluestore_block_wal_size == 0) { | |
732 | cerr << "WAL size isn't specified, " | |
733 | "please set Ceph bluestore-block-wal-size config parameter " | |
734 | << std::endl; | |
735 | } else { | |
736 | BlueStore bluestore(cct.get(), path); | |
737 | r = bluestore.add_new_bluefs_device( | |
738 | need_db ? BlueFS::BDEV_NEWDB : BlueFS::BDEV_NEWWAL, | |
739 | target_path); | |
740 | if (r == 0) { | |
741 | cout << (need_db ? "DB" : "WAL") << " device added " << target_path | |
742 | << std::endl; | |
743 | } else { | |
744 | cerr << "failed to add " << (need_db ? "DB" : "WAL") << " device:" | |
745 | << cpp_strerror(r) | |
746 | << std::endl; | |
747 | } | |
748 | return r; | |
749 | } | |
750 | } else if (action == "bluefs-bdev-migrate") { | |
751 | map<string, int> cur_devs_map; | |
752 | set<int> src_dev_ids; | |
753 | map<string, int> src_devs; | |
754 | ||
755 | parse_devices(cct.get(), devs, &cur_devs_map, nullptr, nullptr); | |
756 | for (auto& s : devs_source) { | |
757 | auto i = cur_devs_map.find(s); | |
758 | if (i != cur_devs_map.end()) { | |
494da23a TL |
759 | if (s == dev_target) { |
760 | cerr << "Device " << dev_target | |
761 | << " is present in both source and target lists, omitted." | |
762 | << std::endl; | |
763 | } else { | |
764 | src_devs.emplace(*i); | |
765 | src_dev_ids.emplace(i->second); | |
766 | } | |
11fdf7f2 TL |
767 | } else { |
768 | cerr << "can't migrate " << s << ", not a valid bluefs volume " | |
769 | << std::endl; | |
770 | exit(EXIT_FAILURE); | |
771 | } | |
772 | } | |
773 | ||
774 | auto i = cur_devs_map.find(dev_target); | |
775 | ||
776 | if (i != cur_devs_map.end()) { | |
777 | // Migrate to an existing BlueFS volume | |
778 | ||
779 | auto dev_target_id = i->second; | |
780 | if (dev_target_id == BlueFS::BDEV_WAL) { | |
781 | // currently we're unable to migrate to WAL device since there is no space | |
782 | // reserved for superblock | |
783 | cerr << "Migrate to WAL device isn't supported." << std::endl; | |
784 | exit(EXIT_FAILURE); | |
785 | } | |
786 | ||
11fdf7f2 TL |
787 | BlueStore bluestore(cct.get(), path); |
788 | int r = bluestore.migrate_to_existing_bluefs_device( | |
789 | src_dev_ids, | |
790 | dev_target_id); | |
791 | if (r == 0) { | |
792 | for(auto src : src_devs) { | |
793 | if (src.second != BlueFS::BDEV_SLOW) { | |
794 | cout << " device removed:" << src.second << " " << src.first | |
795 | << std::endl; | |
796 | } | |
797 | } | |
798 | } else { | |
494da23a | 799 | bool need_db = dev_target_id == BlueFS::BDEV_DB; |
11fdf7f2 | 800 | cerr << "failed to migrate to existing BlueFS device: " |
494da23a | 801 | << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_WAL) |
11fdf7f2 TL |
802 | << " " << dev_target |
803 | << cpp_strerror(r) | |
804 | << std::endl; | |
805 | } | |
494da23a | 806 | return r; |
11fdf7f2 TL |
807 | } else { |
808 | // Migrate to a new BlueFS volume | |
809 | // via creating either DB or WAL volume | |
810 | char target_path[PATH_MAX] = ""; | |
811 | int dev_target_id; | |
812 | if (src_dev_ids.count(BlueFS::BDEV_DB)) { | |
813 | // if we have DB device in the source list - we create DB device | |
814 | // (and may be remove WAL). | |
815 | dev_target_id = BlueFS::BDEV_NEWDB; | |
816 | } else if (src_dev_ids.count(BlueFS::BDEV_WAL)) { | |
817 | dev_target_id = BlueFS::BDEV_NEWWAL; | |
818 | } else { | |
819 | cerr << "Unable to migrate Slow volume to new location, " | |
820 | "please allocate new DB or WAL with " | |
821 | "--bluefs-bdev-new-db(wal) command" | |
822 | << std::endl; | |
823 | exit(EXIT_FAILURE); | |
824 | } | |
825 | if(!dev_target.empty() && | |
826 | realpath(dev_target.c_str(), target_path) == nullptr) { | |
827 | cerr << "failed to retrieve absolute path for " << dev_target | |
828 | << ": " << cpp_strerror(errno) | |
829 | << std::endl; | |
830 | exit(EXIT_FAILURE); | |
831 | } | |
832 | ||
833 | BlueStore bluestore(cct.get(), path); | |
834 | ||
835 | bool need_db = dev_target_id == BlueFS::BDEV_NEWDB; | |
836 | int r = bluestore.migrate_to_new_bluefs_device( | |
837 | src_dev_ids, | |
838 | dev_target_id, | |
839 | target_path); | |
840 | if (r == 0) { | |
841 | for(auto src : src_devs) { | |
842 | if (src.second != BlueFS::BDEV_SLOW) { | |
843 | cout << " device removed:" << src.second << " " << src.first | |
844 | << std::endl; | |
845 | } | |
846 | } | |
847 | cout << " device added: " | |
848 | << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_DB) | |
849 | << " " << target_path | |
850 | << std::endl; | |
851 | } else { | |
852 | cerr << "failed to migrate to new BlueFS device: " | |
853 | << (need_db ? BlueFS::BDEV_DB : BlueFS::BDEV_DB) | |
854 | << " " << target_path | |
855 | << cpp_strerror(r) | |
856 | << std::endl; | |
857 | } | |
858 | return r; | |
859 | } | |
eafe8130 TL |
860 | } else if (action == "free-dump" || action == "free-score") { |
861 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
862 | ceph_assert(admin_socket); | |
863 | std::string action_name = action == "free-dump" ? "dump" : "score"; | |
864 | validate_path(cct.get(), path, false); | |
865 | BlueStore bluestore(cct.get(), path); | |
866 | int r = bluestore.cold_open(); | |
867 | if (r < 0) { | |
868 | cerr << "error from cold_open: " << cpp_strerror(r) << std::endl; | |
869 | exit(EXIT_FAILURE); | |
870 | } | |
871 | ||
872 | for (auto alloc_name : allocs_name) { | |
9f95a23c TL |
873 | ceph::bufferlist in, out; |
874 | ostringstream err; | |
f6b5b4d7 | 875 | int r = admin_socket->execute_command( |
9f95a23c TL |
876 | {"{\"prefix\": \"bluestore allocator " + action_name + " " + alloc_name + "\"}"}, |
877 | in, err, &out); | |
f6b5b4d7 | 878 | if (r != 0) { |
eafe8130 TL |
879 | cerr << "failure querying '" << alloc_name << "'" << std::endl; |
880 | exit(EXIT_FAILURE); | |
881 | } | |
882 | cout << alloc_name << ":" << std::endl; | |
883 | cout << std::string(out.c_str(),out.length()) << std::endl; | |
884 | } | |
885 | ||
886 | bluestore.cold_close(); | |
f6b5b4d7 TL |
887 | } else if (action == "bluefs-stats") { |
888 | AdminSocket* admin_socket = g_ceph_context->get_admin_socket(); | |
889 | ceph_assert(admin_socket); | |
890 | validate_path(cct.get(), path, false); | |
891 | BlueStore bluestore(cct.get(), path); | |
892 | int r = bluestore.cold_open(); | |
893 | if (r < 0) { | |
894 | cerr << "error from cold_open: " << cpp_strerror(r) << std::endl; | |
895 | exit(EXIT_FAILURE); | |
896 | } | |
897 | ||
898 | ceph::bufferlist in, out; | |
899 | ostringstream err; | |
900 | r = admin_socket->execute_command( | |
901 | { "{\"prefix\": \"bluefs stats\"}" }, | |
902 | in, err, &out); | |
903 | if (r != 0) { | |
904 | cerr << "failure querying bluefs stats: " << cpp_strerror(r) << std::endl; | |
905 | exit(EXIT_FAILURE); | |
906 | } | |
907 | cout << std::string(out.c_str(), out.length()) << std::endl; | |
908 | bluestore.cold_close(); | |
f67539c2 TL |
909 | } else if (action == "reshard") { |
910 | auto get_ctrl = [&](size_t& val) { | |
911 | if (!resharding_ctrl.empty()) { | |
912 | size_t pos; | |
913 | std::string token; | |
914 | pos = resharding_ctrl.find('/'); | |
915 | token = resharding_ctrl.substr(0, pos); | |
916 | if (pos != std::string::npos) | |
917 | resharding_ctrl.erase(0, pos + 1); | |
918 | else | |
919 | resharding_ctrl.erase(); | |
920 | char* endptr; | |
921 | val = strtoll(token.c_str(), &endptr, 0); | |
922 | if (*endptr != '\0') { | |
923 | cerr << "invalid --resharding-ctrl. '" << token << "' is not a number" << std::endl; | |
924 | exit(EXIT_FAILURE); | |
925 | } | |
926 | } | |
927 | }; | |
928 | BlueStore bluestore(cct.get(), path); | |
929 | KeyValueDB *db_ptr; | |
930 | RocksDBStore::resharding_ctrl ctrl; | |
931 | if (!resharding_ctrl.empty()) { | |
932 | get_ctrl(ctrl.bytes_per_iterator); | |
933 | get_ctrl(ctrl.keys_per_iterator); | |
934 | get_ctrl(ctrl.bytes_per_batch); | |
935 | get_ctrl(ctrl.keys_per_batch); | |
936 | if (!resharding_ctrl.empty()) { | |
937 | cerr << "extra chars in --resharding-ctrl" << std::endl; | |
938 | exit(EXIT_FAILURE); | |
939 | } | |
940 | } | |
941 | int r = bluestore.open_db_environment(&db_ptr, true); | |
942 | if (r < 0) { | |
943 | cerr << "error preparing db environment: " << cpp_strerror(r) << std::endl; | |
944 | exit(EXIT_FAILURE); | |
945 | } | |
946 | ceph_assert(db_ptr); | |
947 | RocksDBStore* rocks_db = dynamic_cast<RocksDBStore*>(db_ptr); | |
948 | ceph_assert(rocks_db); | |
949 | r = rocks_db->reshard(new_sharding, &ctrl); | |
950 | if (r < 0) { | |
951 | cerr << "error resharding: " << cpp_strerror(r) << std::endl; | |
952 | } else { | |
953 | cout << "reshard success" << std::endl; | |
954 | } | |
955 | bluestore.close_db_environment(); | |
956 | } else if (action == "show-sharding") { | |
957 | BlueStore bluestore(cct.get(), path); | |
958 | KeyValueDB *db_ptr; | |
959 | int r = bluestore.open_db_environment(&db_ptr, false); | |
960 | if (r < 0) { | |
961 | cerr << "error preparing db environment: " << cpp_strerror(r) << std::endl; | |
962 | exit(EXIT_FAILURE); | |
963 | } | |
964 | ceph_assert(db_ptr); | |
965 | RocksDBStore* rocks_db = dynamic_cast<RocksDBStore*>(db_ptr); | |
966 | ceph_assert(rocks_db); | |
967 | std::string sharding; | |
968 | bool res = rocks_db->get_sharding(sharding); | |
969 | bluestore.close_db_environment(); | |
970 | if (!res) { | |
971 | cerr << "failed to retrieve sharding def" << std::endl; | |
972 | exit(EXIT_FAILURE); | |
973 | } | |
974 | cout << sharding << std::endl; | |
7c673cae FG |
975 | } else { |
976 | cerr << "unrecognized action " << action << std::endl; | |
977 | return 1; | |
978 | } | |
979 | ||
980 | return 0; | |
981 | } |