]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/ceph_objectstore_tool.cc
f4ad1a54de9a0bbb6853561c7c677cff3d13da3f
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <boost/program_options/variables_map.hpp>
16 #include <boost/program_options/parsers.hpp>
17 #include <boost/scoped_ptr.hpp>
18 #include <boost/optional.hpp>
19
20 #include <stdlib.h>
21
22 #include "common/Formatter.h"
23 #include "common/errno.h"
24 #include "common/ceph_argparse.h"
25 #include "common/url_escape.h"
26
27 #include "global/global_init.h"
28
29 #include "os/ObjectStore.h"
30 #include "os/filestore/FileJournal.h"
31 #include "os/filestore/FileStore.h"
32 #ifdef HAVE_LIBFUSE
33 #include "os/FuseStore.h"
34 #endif
35
36 #include "osd/PGLog.h"
37 #include "osd/OSD.h"
38 #include "osd/PG.h"
39 #include "osd/ECUtil.h"
40
41 #include "json_spirit/json_spirit_value.h"
42 #include "json_spirit/json_spirit_reader.h"
43
44 #include "rebuild_mondb.h"
45 #include "ceph_objectstore_tool.h"
46 #include "include/compat.h"
47 #include "include/util.h"
48
49 using namespace std;
50 namespace po = boost::program_options;
51
52 #ifdef INTERNAL_TEST
53 CompatSet get_test_compat_set() {
54 CompatSet::FeatureSet ceph_osd_feature_compat;
55 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
56 CompatSet::FeatureSet ceph_osd_feature_incompat;
57 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
58 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
64 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
65 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
66 #ifdef INTERNAL_TEST2
67 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
68 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
69 #endif
70 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
71 ceph_osd_feature_incompat);
72 }
73 #endif
74
75 const ssize_t max_read = 1024 * 1024;
76 const int fd_none = INT_MIN;
77 bool outistty;
78 bool dry_run;
79
80 struct action_on_object_t {
81 virtual ~action_on_object_t() {}
82 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
83 };
84
85 int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
86 {
87 auto ch = store->open_collection(coll);
88
89 unsigned LIST_AT_A_TIME = 100;
90 ghobject_t next;
91 while (!next.is_max()) {
92 vector<ghobject_t> list;
93 int r = store->collection_list(ch,
94 next,
95 ghobject_t::get_max(),
96 LIST_AT_A_TIME,
97 &list,
98 &next);
99 if (r < 0) {
100 cerr << "Error listing collection: " << coll << ", "
101 << cpp_strerror(r) << std::endl;
102 return r;
103 }
104 for (vector<ghobject_t>::iterator obj = list.begin();
105 obj != list.end();
106 ++obj) {
107 object_info_t oi;
108 if (coll != coll_t::meta()) {
109 bufferlist attr;
110 r = store->getattr(ch, *obj, OI_ATTR, attr);
111 if (r < 0) {
112 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
113 << cpp_strerror(r) << std::endl;
114 } else {
115 auto bp = attr.cbegin();
116 try {
117 decode(oi, bp);
118 } catch (...) {
119 r = -EINVAL;
120 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
121 << cpp_strerror(r) << std::endl;
122 }
123 }
124 }
125 action.call(store, coll, *obj, oi);
126 }
127 }
128 return 0;
129 }
130
131 int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
132 {
133 spg_t pgid;
134 // Scan collections in case this is an ec pool but no shard specified
135 unsigned scanned = 0;
136 int r = 0;
137 vector<coll_t> colls_to_check;
138 vector<coll_t> candidates;
139
140 r = store->list_collections(candidates);
141 if (r < 0) {
142 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
143 return r;
144 }
145 pgid.parse(pgidstr.c_str());
146 for (vector<coll_t>::iterator i = candidates.begin();
147 i != candidates.end();
148 ++i) {
149 spg_t cand_pgid;
150 if (i->is_meta() && pgidstr == "meta") {
151 colls_to_check.push_back(*i);
152 continue;
153 }
154 if (!i->is_pg(&cand_pgid))
155 continue;
156
157 // If an exact match or treat no shard as any shard
158 if (cand_pgid == pgid ||
159 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
160 colls_to_check.push_back(*i);
161 }
162 }
163
164 if (debug)
165 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
166 for (vector<coll_t>::iterator i = colls_to_check.begin();
167 i != colls_to_check.end();
168 ++i, ++scanned) {
169 if (debug)
170 cerr << "Scanning " << *i << ", " << scanned << "/"
171 << colls_to_check.size() << " completed" << std::endl;
172 r = _action_on_all_objects_in_pg(store, *i, action, debug);
173 if (r < 0)
174 break;
175 }
176 return r;
177 }
178
179 int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
180 {
181 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
182 return r;
183 }
184
185 int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
186 {
187 unsigned scanned = 0;
188 int r = 0;
189 vector<coll_t> colls_to_check;
190 vector<coll_t> candidates;
191 r = store->list_collections(candidates);
192 if (r < 0) {
193 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
194 return r;
195 }
196 for (vector<coll_t>::iterator i = candidates.begin();
197 i != candidates.end();
198 ++i) {
199 if (i->is_pg()) {
200 colls_to_check.push_back(*i);
201 }
202 }
203
204 if (debug)
205 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
206 for (vector<coll_t>::iterator i = colls_to_check.begin();
207 i != colls_to_check.end();
208 ++i, ++scanned) {
209 if (debug)
210 cerr << "Scanning " << *i << ", " << scanned << "/"
211 << colls_to_check.size() << " completed" << std::endl;
212 r = _action_on_all_objects_in_pg(store, *i, action, debug);
213 if (r < 0)
214 return r;
215 }
216 return 0;
217 }
218
219 int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
220 {
221 int r = _action_on_all_objects(store, action, debug);
222 return r;
223 }
224
225 struct pgid_object_list {
226 list<pair<coll_t, ghobject_t> > _objects;
227
228 void insert(coll_t coll, ghobject_t &ghobj) {
229 _objects.push_back(make_pair(coll, ghobj));
230 }
231
232 void dump(Formatter *f, bool human_readable) const {
233 if (!human_readable)
234 f->open_array_section("pgid_objects");
235 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
236 i != _objects.end();
237 ++i) {
238 f->open_array_section("pgid_object");
239 spg_t pgid;
240 bool is_pg = i->first.is_pg(&pgid);
241 if (is_pg)
242 f->dump_string("pgid", stringify(pgid));
243 if (!is_pg || !human_readable)
244 f->dump_string("coll", i->first.to_str());
245 f->open_object_section("ghobject");
246 i->second.dump(f);
247 f->close_section();
248 f->close_section();
249 if (human_readable) {
250 f->flush(cout);
251 cout << std::endl;
252 }
253 }
254 if (!human_readable) {
255 f->close_section();
256 f->flush(cout);
257 cout << std::endl;
258 }
259 }
260 };
261
262 struct lookup_ghobject : public action_on_object_t {
263 pgid_object_list _objects;
264 const string _name;
265 const boost::optional<std::string> _namespace;
266 bool _need_snapset;
267
268 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
269 _namespace(nspace), _need_snapset(need_snapset) { }
270
271 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
272 if (_need_snapset && !ghobj.hobj.has_snapset())
273 return;
274 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
275 (!_namespace || ghobj.hobj.nspace == _namespace))
276 _objects.insert(coll, ghobj);
277 return;
278 }
279
280 int size() const {
281 return _objects._objects.size();
282 }
283
284 pair<coll_t, ghobject_t> pop() {
285 pair<coll_t, ghobject_t> front = _objects._objects.front();
286 _objects._objects.pop_front();
287 return front;
288 }
289
290 void dump(Formatter *f, bool human_readable) const {
291 _objects.dump(f, human_readable);
292 }
293 };
294
295 struct lookup_slow_ghobject : public action_on_object_t {
296 list<tuple<
297 coll_t,
298 ghobject_t,
299 ceph::signedspan,
300 ceph::signedspan,
301 ceph::signedspan,
302 string> > _objects;
303 const string _name;
304 double threshold;
305
306 coll_t last_coll;
307
308 lookup_slow_ghobject(const string& name, double _threshold) :
309 _name(name), threshold(_threshold) { }
310
311 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
312 ObjectMap::ObjectMapIterator iter;
313 auto start1 = mono_clock::now();
314 ceph::signedspan first_seek_time = start1 - start1;
315 ceph::signedspan last_seek_time = first_seek_time;
316 ceph::signedspan total_time = first_seek_time;
317 {
318 auto ch = store->open_collection(coll);
319 iter = store->get_omap_iterator(ch, ghobj);
320 if (!iter) {
321 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
322 << " obj:" << ghobj
323 << std::endl;
324 return;
325 }
326 auto start = mono_clock::now();
327 iter->seek_to_first();
328 first_seek_time = mono_clock::now() - start;
329
330 while(iter->valid()) {
331 start = mono_clock::now();
332 iter->next();
333 last_seek_time = mono_clock::now() - start;
334 }
335 }
336
337 if (coll != last_coll) {
338 cerr << ">>> inspecting coll" << coll << std::endl;
339 last_coll = coll;
340 }
341
342 total_time = mono_clock::now() - start1;
343 if ( total_time >= make_timespan(threshold)) {
344 _objects.emplace_back(coll, ghobj,
345 first_seek_time, last_seek_time, total_time,
346 url_escape(iter->tail_key()));
347 cerr << ">>>>> found obj " << ghobj
348 << " first_seek_time "
349 << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
350 << " last_seek_time "
351 << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
352 << " total_time "
353 << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
354 << " tail key: " << url_escape(iter->tail_key())
355 << std::endl;
356 }
357 return;
358 }
359
360 int size() const {
361 return _objects.size();
362 }
363
364 void dump(Formatter *f, bool human_readable) const {
365 if (!human_readable)
366 f->open_array_section("objects");
367 for (auto i = _objects.begin();
368 i != _objects.end();
369 ++i) {
370 f->open_array_section("object");
371 coll_t coll;
372 ghobject_t ghobj;
373 ceph::signedspan first_seek_time;
374 ceph::signedspan last_seek_time;
375 ceph::signedspan total_time;
376 string tail_key;
377 std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
378
379 spg_t pgid;
380 bool is_pg = coll.is_pg(&pgid);
381 if (is_pg)
382 f->dump_string("pgid", stringify(pgid));
383 if (!is_pg || !human_readable)
384 f->dump_string("coll", coll.to_str());
385 f->dump_object("ghobject", ghobj);
386 f->open_object_section("times");
387 f->dump_int("first_seek_time",
388 std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
389 f->dump_int("last_seek_time",
390 std::chrono::duration_cast<std::chrono::seconds>
391 (last_seek_time).count());
392 f->dump_int("total_time",
393 std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
394 f->dump_string("tail_key", tail_key);
395 f->close_section();
396
397 f->close_section();
398 if (human_readable) {
399 f->flush(cout);
400 cout << std::endl;
401 }
402 }
403 if (!human_readable) {
404 f->close_section();
405 f->flush(cout);
406 cout << std::endl;
407 }
408 }
409 };
410
411 int file_fd = fd_none;
412 bool debug;
413 bool force = false;
414 bool no_superblock = false;
415
416 super_header sh;
417
418 static int get_fd_data(int fd, bufferlist &bl)
419 {
420 uint64_t total = 0;
421 do {
422 ssize_t bytes = bl.read_fd(fd, max_read);
423 if (bytes < 0) {
424 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
425 return bytes;
426 }
427
428 if (bytes == 0)
429 break;
430
431 total += bytes;
432 } while(true);
433
434 ceph_assert(bl.length() == total);
435 return 0;
436 }
437
438 int get_log(ObjectStore *fs, __u8 struct_ver,
439 spg_t pgid, const pg_info_t &info,
440 PGLog::IndexedLog &log, pg_missing_t &missing)
441 {
442 try {
443 auto ch = fs->open_collection(coll_t(pgid));
444 if (!ch) {
445 return -ENOENT;
446 }
447 ostringstream oss;
448 ceph_assert(struct_ver > 0);
449 PGLog::read_log_and_missing(
450 fs, ch,
451 pgid.make_pgmeta_oid(),
452 info, log, missing,
453 oss,
454 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
455 if (debug && oss.str().size())
456 cerr << oss.str() << std::endl;
457 }
458 catch (const buffer::error &e) {
459 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
460 return -EFAULT;
461 }
462 return 0;
463 }
464
465 void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
466 pg_missing_t &missing)
467 {
468 formatter->open_object_section("op_log");
469 formatter->open_object_section("pg_log_t");
470 log.dump(formatter);
471 formatter->close_section();
472 formatter->flush(out);
473 formatter->open_object_section("pg_missing_t");
474 missing.dump(formatter);
475 formatter->close_section();
476 formatter->close_section();
477 formatter->flush(out);
478 }
479
480 //Based on part of OSD::load_pgs()
481 int finish_remove_pgs(ObjectStore *store)
482 {
483 vector<coll_t> ls;
484 int r = store->list_collections(ls);
485 if (r < 0) {
486 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
487 << std::endl;
488 return r;
489 }
490
491 for (vector<coll_t>::iterator it = ls.begin();
492 it != ls.end();
493 ++it) {
494 spg_t pgid;
495
496 if (it->is_temp(&pgid) ||
497 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
498 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
499 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
500 continue;
501 }
502
503 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
504 }
505 return 0;
506 }
507
508 #pragma GCC diagnostic ignored "-Wpragmas"
509 #pragma GCC diagnostic push
510 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
511
512 int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
513 {
514 pg_info_t info(pgid);
515 coll_t coll(pgid);
516 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
517
518 epoch_t map_epoch = 0;
519 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
520 if (r < 0)
521 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
522 PastIntervals past_intervals;
523 __u8 struct_v;
524 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
525 if (r < 0) {
526 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
527 return r;
528 }
529 ceph_assert(struct_v >= 8);
530 // new omap key
531 cout << "setting '_remove' omap key" << std::endl;
532 map<string,bufferlist> values;
533 encode((char)1, values["_remove"]);
534 t->omap_setkeys(coll, pgmeta_oid, values);
535 return 0;
536 }
537
538 #pragma GCC diagnostic pop
539 #pragma GCC diagnostic warning "-Wpragmas"
540
541 template<typename Func>
542 void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
543 {
544 bool finished = false;
545 std::condition_variable cond;
546 std::mutex m;
547 txn->register_on_complete(make_lambda_context([&](int) {
548 std::unique_lock lock{m};
549 finished = true;
550 cond.notify_one();
551 }));
552 std::move(func)();
553 std::unique_lock lock{m};
554 cond.wait(lock, [&] {return finished;});
555 }
556
557 int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
558 {
559 if (!dry_run)
560 finish_remove_pgs(store);
561 if (!store->collection_exists(coll_t(r_pgid)))
562 return -ENOENT;
563
564 cout << " marking collection for removal" << std::endl;
565 if (dry_run)
566 return 0;
567 ObjectStore::Transaction rmt;
568 int r = mark_pg_for_removal(store, r_pgid, &rmt);
569 if (r < 0) {
570 return r;
571 }
572 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
573 store->queue_transaction(ch, std::move(rmt));
574 finish_remove_pgs(store);
575 return r;
576 }
577
578 int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
579 PastIntervals &past_intervals)
580 {
581 //Empty for this
582 coll_t coll(info.pgid);
583 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
584 map<string,bufferlist> km;
585 string key_to_remove;
586 pg_info_t last_written_info;
587 int ret = prepare_info_keymap(
588 g_ceph_context,
589 &km, &key_to_remove,
590 epoch,
591 info,
592 last_written_info,
593 past_intervals,
594 true, true, false);
595 if (ret) cerr << "Failed to write info" << std::endl;
596 t.omap_setkeys(coll, pgmeta_oid, km);
597 if (!key_to_remove.empty()) {
598 t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
599 }
600 return ret;
601 }
602
603 typedef map<eversion_t, hobject_t> divergent_priors_t;
604
605 int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
606 pg_log_t &log, PastIntervals &past_intervals,
607 divergent_priors_t &divergent,
608 pg_missing_t &missing)
609 {
610 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
611 int ret = write_info(t, epoch, info, past_intervals);
612 if (ret)
613 return ret;
614
615 coll_t coll(info.pgid);
616 map<string,bufferlist> km;
617 const bool require_rollback = !info.pgid.is_no_shard();
618 if (!divergent.empty()) {
619 ceph_assert(missing.get_items().empty());
620 PGLog::write_log_and_missing_wo_missing(
621 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent,
622 require_rollback);
623 } else {
624 pg_missing_tracker_t tmissing(missing);
625 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
626 PGLog::write_log_and_missing(
627 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing,
628 require_rollback,
629 &rebuilt_missing_set_with_deletes);
630 }
631 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
632 return 0;
633 }
634
635 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
636 pg_info_t &info, const spg_t &pgid,
637 epoch_t map_epoch,
638 PastIntervals &past_intervals)
639 {
640 ghobject_t oid = pgid.make_pgmeta_oid();
641 struct stat st;
642 auto ch = store->open_collection(coll);
643 int r = store->stat(ch, oid, &st);
644 ceph_assert(r == 0);
645 ceph_assert(st.st_size == 0);
646
647 cerr << "Log bounds are: " << "(" << info.log_tail << ","
648 << info.last_update << "]" << std::endl;
649
650 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
651 if (info.last_update.version - info.log_tail.version <= max_entries) {
652 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
653 return 0;
654 }
655
656 ceph_assert(info.last_update.version > max_entries);
657 version_t trim_to = info.last_update.version - max_entries;
658 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
659 eversion_t new_tail;
660 bool done = false;
661
662 while (!done) {
663 // gather keys so we can delete them in a batch without
664 // affecting the iterator
665 set<string> keys_to_trim;
666 {
667 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
668 if (!p)
669 break;
670 for (p->seek_to_first(); p->valid(); p->next()) {
671 if (p->key()[0] == '_')
672 continue;
673 if (p->key() == "can_rollback_to")
674 continue;
675 if (p->key() == "divergent_priors")
676 continue;
677 if (p->key() == "rollback_info_trimmed_to")
678 continue;
679 if (p->key() == "may_include_deletes_in_missing")
680 continue;
681 if (p->key().substr(0, 7) == string("missing"))
682 continue;
683 if (p->key().substr(0, 4) == string("dup_"))
684 continue;
685
686 bufferlist bl = p->value();
687 auto bp = bl.cbegin();
688 pg_log_entry_t e;
689 try {
690 e.decode_with_checksum(bp);
691 } catch (const buffer::error &e) {
692 cerr << "Error reading pg log entry: " << e.what() << std::endl;
693 }
694 if (debug) {
695 cerr << "read entry " << e << std::endl;
696 }
697 if (e.version.version > trim_to) {
698 done = true;
699 break;
700 }
701 keys_to_trim.insert(p->key());
702 new_tail = e.version;
703 if (keys_to_trim.size() >= trim_at_once)
704 break;
705 }
706
707 if (!p->valid())
708 done = true;
709 } // deconstruct ObjectMapIterator
710
711 // delete the keys
712 if (!dry_run && !keys_to_trim.empty()) {
713 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
714 ObjectStore::Transaction t;
715 t.omap_rmkeys(coll, oid, keys_to_trim);
716 store->queue_transaction(ch, std::move(t));
717 ch->flush();
718 }
719 }
720
721 // update pg info with new tail
722 if (!dry_run && new_tail != eversion_t()) {
723 info.log_tail = new_tail;
724 ObjectStore::Transaction t;
725 int ret = write_info(t, map_epoch, info, past_intervals);
726 if (ret)
727 return ret;
728 store->queue_transaction(ch, std::move(t));
729 ch->flush();
730 }
731
732 // compact the db since we just removed a bunch of data
733 cerr << "Finished trimming, now compacting..." << std::endl;
734 if (!dry_run)
735 store->compact();
736 return 0;
737 }
738
739 int do_trim_pg_log_dups(ObjectStore *store, const coll_t &coll,
740 pg_info_t &info, const spg_t &pgid,
741 epoch_t map_epoch,
742 PastIntervals &past_intervals)
743 {
744 ghobject_t oid = pgid.make_pgmeta_oid();
745 struct stat st;
746 auto ch = store->open_collection(coll);
747 int r = store->stat(ch, oid, &st);
748 ceph_assert(r == 0);
749 ceph_assert(st.st_size == 0);
750
751 const size_t max_dup_entries = g_ceph_context->_conf->osd_pg_log_dups_tracked;
752 ceph_assert(max_dup_entries > 0);
753 const size_t max_chunk_size = g_ceph_context->_conf->osd_pg_log_trim_max;
754 ceph_assert(max_chunk_size > 0);
755
756 cout << "max_dup_entries=" << max_dup_entries
757 << " max_chunk_size=" << max_chunk_size << std::endl;
758 if (dry_run) {
759 cout << "Dry run enabled, so when many chunks are needed,"
760 << " the trimming will never stop!" << std::endl;
761 }
762
763 set<string> keys_to_keep;
764 size_t num_removed = 0;
765 do {
766 set<string> keys_to_trim;
767 {
768 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
769 if (!p)
770 break;
771 for (p->seek_to_first(); p->valid(); p->next()) {
772 if (p->key()[0] == '_')
773 continue;
774 if (p->key() == "can_rollback_to")
775 continue;
776 if (p->key() == "divergent_priors")
777 continue;
778 if (p->key() == "rollback_info_trimmed_to")
779 continue;
780 if (p->key() == "may_include_deletes_in_missing")
781 continue;
782 if (p->key().substr(0, 7) == string("missing"))
783 continue;
784 if (p->key().substr(0, 4) != string("dup_"))
785 continue;
786 keys_to_keep.insert(p->key());
787 if (keys_to_keep.size() > max_dup_entries) {
788 auto oldest_to_keep = keys_to_keep.begin();
789 keys_to_trim.emplace(*oldest_to_keep);
790 keys_to_keep.erase(oldest_to_keep);
791 }
792 if (keys_to_trim.size() >= max_chunk_size) {
793 break;
794 }
795 }
796 } // deconstruct ObjectMapIterator
797 // delete the keys
798 num_removed = keys_to_trim.size();
799 if (!dry_run && !keys_to_trim.empty()) {
800 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
801 ObjectStore::Transaction t;
802 t.omap_rmkeys(coll, oid, keys_to_trim);
803 store->queue_transaction(ch, std::move(t));
804 ch->flush();
805 }
806 } while (num_removed == max_chunk_size);
807
808 // compact the db since we just removed a bunch of data
809 cerr << "Finished trimming, now compacting..." << std::endl;
810 if (!dry_run)
811 store->compact();
812 return 0;
813 }
814
815 const int OMAP_BATCH_SIZE = 25;
816 void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
817 {
818 oset.clear();
819 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
820 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
821 }
822 }
823
824 int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
825 {
826 struct stat st;
827 mysize_t total;
828 footer ft;
829
830 auto ch = store->open_collection(cid);
831 int ret = store->stat(ch, obj, &st);
832 if (ret < 0)
833 return ret;
834
835 cerr << "Read " << obj << std::endl;
836
837 total = st.st_size;
838 if (debug)
839 cerr << "size=" << total << std::endl;
840
841 object_begin objb(obj);
842
843 {
844 bufferptr bp;
845 bufferlist bl;
846 ret = store->getattr(ch, obj, OI_ATTR, bp);
847 if (ret < 0) {
848 cerr << "getattr failure object_info " << ret << std::endl;
849 return ret;
850 }
851 bl.push_back(bp);
852 decode(objb.oi, bl);
853 if (debug)
854 cerr << "object_info: " << objb.oi << std::endl;
855 }
856
857 // NOTE: we include whiteouts, lost, etc.
858
859 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
860 if (ret < 0)
861 return ret;
862
863 uint64_t offset = 0;
864 bufferlist rawdatabl;
865 while(total > 0) {
866 rawdatabl.clear();
867 mysize_t len = max_read;
868 if (len > total)
869 len = total;
870
871 ret = store->read(ch, obj, offset, len, rawdatabl);
872 if (ret < 0)
873 return ret;
874 if (ret == 0)
875 return -EINVAL;
876
877 data_section dblock(offset, len, rawdatabl);
878 if (debug)
879 cerr << "data section offset=" << offset << " len=" << len << std::endl;
880
881 total -= ret;
882 offset += ret;
883
884 ret = write_section(TYPE_DATA, dblock, file_fd);
885 if (ret) return ret;
886 }
887
888 //Handle attrs for this object
889 map<string,bufferptr,less<>> aset;
890 ret = store->getattrs(ch, obj, aset);
891 if (ret) return ret;
892 attr_section as(aset);
893 ret = write_section(TYPE_ATTRS, as, file_fd);
894 if (ret)
895 return ret;
896
897 if (debug) {
898 cerr << "attrs size " << aset.size() << std::endl;
899 }
900
901 //Handle omap information
902 bufferlist hdrbuf;
903 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
904 if (ret < 0) {
905 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
906 return ret;
907 }
908
909 omap_hdr_section ohs(hdrbuf);
910 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
911 if (ret)
912 return ret;
913
914 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
915 if (!iter) {
916 ret = -ENOENT;
917 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
918 return ret;
919 }
920 iter->seek_to_first();
921 int mapcount = 0;
922 map<string, bufferlist> out;
923 while(iter->valid()) {
924 get_omap_batch(iter, out);
925
926 if (out.empty()) break;
927
928 mapcount += out.size();
929 omap_section oms(out);
930 ret = write_section(TYPE_OMAP, oms, file_fd);
931 if (ret)
932 return ret;
933 }
934 if (debug)
935 cerr << "omap map size " << mapcount << std::endl;
936
937 ret = write_simple(TYPE_OBJECT_END, file_fd);
938 if (ret)
939 return ret;
940
941 return 0;
942 }
943
944 int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
945 {
946 ghobject_t next;
947 auto ch = store->open_collection(coll);
948 while (!next.is_max()) {
949 vector<ghobject_t> objects;
950 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
951 &objects, &next);
952 if (r < 0)
953 return r;
954 for (vector<ghobject_t>::iterator i = objects.begin();
955 i != objects.end();
956 ++i) {
957 ceph_assert(!i->hobj.is_meta());
958 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
959 continue;
960 }
961 r = export_file(store, coll, *i);
962 if (r < 0)
963 return r;
964 }
965 }
966 return 0;
967 }
968
969 int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
970 OSDMap::Incremental inc;
971 auto it = bl.cbegin();
972 inc.decode(it);
973 if (e == 0) {
974 e = inc.epoch;
975 } else if (e != inc.epoch) {
976 cerr << "incremental.epoch mismatch: "
977 << inc.epoch << " != " << e << std::endl;
978 if (force) {
979 cerr << "But will continue anyway." << std::endl;
980 } else {
981 return -EINVAL;
982 }
983 }
984 auto ch = store->open_collection(coll_t::meta());
985 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
986 if (!store->exists(ch, inc_oid)) {
987 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
988 if (!force) {
989 return -ENOENT;
990 }
991 cout << "Creating a new epoch." << std::endl;
992 }
993 if (dry_run)
994 return 0;
995 ObjectStore::Transaction t;
996 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
997 t.truncate(coll_t::meta(), inc_oid, bl.length());
998 store->queue_transaction(ch, std::move(t));
999 return 0;
1000 }
1001
1002 int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
1003 {
1004 auto ch = store->open_collection(coll_t::meta());
1005 if (store->read(ch,
1006 OSD::get_inc_osdmap_pobject_name(e),
1007 0, 0, bl) < 0) {
1008 return -ENOENT;
1009 }
1010 return 0;
1011 }
1012
1013 int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
1014 OSDMap osdmap;
1015 osdmap.decode(bl);
1016 if (e == 0) {
1017 e = osdmap.get_epoch();
1018 } else if (e != osdmap.get_epoch()) {
1019 cerr << "osdmap.epoch mismatch: "
1020 << e << " != " << osdmap.get_epoch() << std::endl;
1021 if (force) {
1022 cerr << "But will continue anyway." << std::endl;
1023 } else {
1024 return -EINVAL;
1025 }
1026 }
1027 auto ch = store->open_collection(coll_t::meta());
1028 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
1029 if (!store->exists(ch, full_oid)) {
1030 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
1031 if (!force) {
1032 return -ENOENT;
1033 }
1034 cout << "Creating a new epoch." << std::endl;
1035 }
1036 if (dry_run)
1037 return 0;
1038 ObjectStore::Transaction t;
1039 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
1040 t.truncate(coll_t::meta(), full_oid, bl.length());
1041 store->queue_transaction(ch, std::move(t));
1042 return 0;
1043 }
1044
1045 int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
1046 {
1047 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
1048 bool found = store->read(
1049 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
1050 if (!found) {
1051 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
1052 return -ENOENT;
1053 }
1054 osdmap.decode(bl);
1055 if (debug)
1056 cerr << osdmap << std::endl;
1057 return 0;
1058 }
1059
1060 int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
1061 {
1062 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
1063 bufferlist bl;
1064 auto pghist = OSD::make_pg_num_history_oid();
1065 int r = store->read(ch, pghist, 0, 0, bl, 0);
1066 if (r >= 0 && bl.length() > 0) {
1067 auto p = bl.cbegin();
1068 decode(*h, p);
1069 }
1070 cout << __func__ << " pg_num_history " << *h << std::endl;
1071 return 0;
1072 }
1073
1074 int add_osdmap(ObjectStore *store, metadata_section &ms)
1075 {
1076 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
1077 }
1078
1079 int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
1080 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
1081 const OSDSuperblock& superblock,
1082 PastIntervals &past_intervals)
1083 {
1084 PGLog::IndexedLog log;
1085 pg_missing_t missing;
1086
1087 cerr << "Exporting " << pgid << " info " << info << std::endl;
1088
1089 int ret = get_log(fs, struct_ver, pgid, info, log, missing);
1090 if (ret > 0)
1091 return ret;
1092
1093 if (debug) {
1094 Formatter *formatter = Formatter::create("json-pretty");
1095 ceph_assert(formatter);
1096 dump_log(formatter, cerr, log, missing);
1097 delete formatter;
1098 }
1099 write_super();
1100
1101 pg_begin pgb(pgid, superblock);
1102 // Special case: If replicated pg don't require the importing OSD to have shard feature
1103 if (pgid.is_no_shard()) {
1104 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1105 }
1106 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
1107 if (ret)
1108 return ret;
1109
1110 // The metadata_section is now before files, so import can detect
1111 // errors and abort without wasting time.
1112 metadata_section ms(
1113 struct_ver,
1114 map_epoch,
1115 info,
1116 log,
1117 past_intervals,
1118 missing);
1119 ret = add_osdmap(fs, ms);
1120 if (ret)
1121 return ret;
1122 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
1123 if (ret)
1124 return ret;
1125
1126 ret = export_files(fs, coll);
1127 if (ret) {
1128 cerr << "export_files error " << ret << std::endl;
1129 return ret;
1130 }
1131
1132 ret = write_simple(TYPE_PG_END, file_fd);
1133 if (ret)
1134 return ret;
1135
1136 return 0;
1137 }
1138
1139 int dump_data(Formatter *formatter, bufferlist &bl)
1140 {
1141 auto ebliter = bl.cbegin();
1142 data_section ds;
1143 ds.decode(ebliter);
1144
1145 formatter->open_object_section("data_block");
1146 formatter->dump_unsigned("offset", ds.offset);
1147 formatter->dump_unsigned("len", ds.len);
1148 // XXX: Add option to dump data like od -cx ?
1149 formatter->close_section();
1150 formatter->flush(cout);
1151 return 0;
1152 }
1153
1154 int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
1155 ObjectStore::Transaction *t, bufferlist &bl)
1156 {
1157 auto ebliter = bl.cbegin();
1158 data_section ds;
1159 ds.decode(ebliter);
1160
1161 if (debug)
1162 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
1163 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
1164 return 0;
1165 }
1166
1167 int dump_attrs(
1168 Formatter *formatter, ghobject_t hoid,
1169 bufferlist &bl)
1170 {
1171 auto ebliter = bl.cbegin();
1172 attr_section as;
1173 as.decode(ebliter);
1174
1175 // This could have been handled in the caller if we didn't need to
1176 // support exports that didn't include object_info_t in object_begin.
1177 if (hoid.generation == ghobject_t::NO_GEN &&
1178 hoid.hobj.is_head()) {
1179 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1180 if (mi != as.data.end()) {
1181 SnapSet snapset;
1182 auto p = mi->second.cbegin();
1183 snapset.decode(p);
1184 formatter->open_object_section("snapset");
1185 snapset.dump(formatter);
1186 formatter->close_section();
1187 } else {
1188 formatter->open_object_section("snapset");
1189 formatter->dump_string("error", "missing SS_ATTR");
1190 formatter->close_section();
1191 }
1192 }
1193
1194 formatter->open_object_section("attrs");
1195 formatter->open_array_section("user");
1196 for (auto kv : as.data) {
1197 // Skip system attributes
1198 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
1199 continue;
1200 formatter->open_object_section("user_attr");
1201 formatter->dump_string("name", kv.first.substr(1));
1202 bool b64;
1203 formatter->dump_string("value", cleanbin(kv.second, b64));
1204 formatter->dump_bool("Base64", b64);
1205 formatter->close_section();
1206 }
1207 formatter->close_section();
1208 formatter->open_array_section("system");
1209 for (auto kv : as.data) {
1210 // Skip user attributes
1211 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1212 continue;
1213 formatter->open_object_section("sys_attr");
1214 formatter->dump_string("name", kv.first);
1215 formatter->close_section();
1216 }
1217 formatter->close_section();
1218 formatter->close_section();
1219 formatter->flush(cout);
1220
1221 return 0;
1222 }
1223
1224 int get_attrs(
1225 ObjectStore *store, coll_t coll, ghobject_t hoid,
1226 ObjectStore::Transaction *t, bufferlist &bl,
1227 OSDriver &driver, SnapMapper &snap_mapper)
1228 {
1229 auto ebliter = bl.cbegin();
1230 attr_section as;
1231 as.decode(ebliter);
1232
1233 auto ch = store->open_collection(coll);
1234 if (debug)
1235 cerr << "\tattrs: len " << as.data.size() << std::endl;
1236 t->setattrs(coll, hoid, as.data);
1237
1238 // This could have been handled in the caller if we didn't need to
1239 // support exports that didn't include object_info_t in object_begin.
1240 if (hoid.generation == ghobject_t::NO_GEN &&
1241 hoid.hobj.is_head()) {
1242 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1243 if (mi != as.data.end()) {
1244 SnapSet snapset;
1245 auto p = mi->second.cbegin();
1246 snapset.decode(p);
1247 cout << "snapset " << snapset << std::endl;
1248 for (auto& p : snapset.clone_snaps) {
1249 ghobject_t clone = hoid;
1250 clone.hobj.snap = p.first;
1251 set<snapid_t> snaps(p.second.begin(), p.second.end());
1252 if (!store->exists(ch, clone)) {
1253 // no clone, skip. this is probably a cache pool. this works
1254 // because we use a separate transaction per object and clones
1255 // come before head in the archive.
1256 if (debug)
1257 cerr << "\tskipping missing " << clone << " (snaps "
1258 << snaps << ")" << std::endl;
1259 continue;
1260 }
1261 if (debug)
1262 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1263 << std::endl;
1264 OSDriver::OSTransaction _t(driver.get_transaction(t));
1265 ceph_assert(!snaps.empty());
1266 snap_mapper.add_oid(clone.hobj, snaps, &_t);
1267 }
1268 } else {
1269 cerr << "missing SS_ATTR on " << hoid << std::endl;
1270 }
1271 }
1272 return 0;
1273 }
1274
1275 int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1276 {
1277 auto ebliter = bl.cbegin();
1278 omap_hdr_section oh;
1279 oh.decode(ebliter);
1280
1281 formatter->open_object_section("omap_header");
1282 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1283 formatter->close_section();
1284 formatter->flush(cout);
1285 return 0;
1286 }
1287
1288 int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1289 ObjectStore::Transaction *t, bufferlist &bl)
1290 {
1291 auto ebliter = bl.cbegin();
1292 omap_hdr_section oh;
1293 oh.decode(ebliter);
1294
1295 if (debug)
1296 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1297 << std::endl;
1298 t->omap_setheader(coll, hoid, oh.hdr);
1299 return 0;
1300 }
1301
1302 int dump_omap(Formatter *formatter, bufferlist &bl)
1303 {
1304 auto ebliter = bl.cbegin();
1305 omap_section os;
1306 os.decode(ebliter);
1307
1308 formatter->open_object_section("omaps");
1309 formatter->dump_unsigned("count", os.omap.size());
1310 formatter->open_array_section("data");
1311 for (auto o : os.omap) {
1312 formatter->open_object_section("omap");
1313 formatter->dump_string("name", o.first);
1314 bool b64;
1315 formatter->dump_string("value", cleanbin(o.second, b64));
1316 formatter->dump_bool("Base64", b64);
1317 formatter->close_section();
1318 }
1319 formatter->close_section();
1320 formatter->close_section();
1321 formatter->flush(cout);
1322 return 0;
1323 }
1324
1325 int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1326 ObjectStore::Transaction *t, bufferlist &bl)
1327 {
1328 auto ebliter = bl.cbegin();
1329 omap_section os;
1330 os.decode(ebliter);
1331
1332 if (debug)
1333 cerr << "\tomap: size " << os.omap.size() << std::endl;
1334 t->omap_setkeys(coll, hoid, os.omap);
1335 return 0;
1336 }
1337
1338 int ObjectStoreTool::dump_object(Formatter *formatter,
1339 bufferlist &bl)
1340 {
1341 auto ebliter = bl.cbegin();
1342 object_begin ob;
1343 ob.decode(ebliter);
1344
1345 if (ob.hoid.hobj.is_temp()) {
1346 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1347 return -EFAULT;
1348 }
1349
1350 formatter->open_object_section("object");
1351 formatter->open_object_section("oid");
1352 ob.hoid.dump(formatter);
1353 formatter->close_section();
1354 formatter->open_object_section("object_info");
1355 ob.oi.dump(formatter);
1356 formatter->close_section();
1357
1358 bufferlist ebl;
1359 bool done = false;
1360 while(!done) {
1361 sectiontype_t type;
1362 int ret = read_section(&type, &ebl);
1363 if (ret)
1364 return ret;
1365
1366 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1367 //cout << "\t\tsection size " << ebl.length() << std::endl;
1368 if (type >= END_OF_TYPES) {
1369 cout << "Skipping unknown object section type" << std::endl;
1370 continue;
1371 }
1372 switch(type) {
1373 case TYPE_DATA:
1374 if (dry_run) break;
1375 ret = dump_data(formatter, ebl);
1376 if (ret) return ret;
1377 break;
1378 case TYPE_ATTRS:
1379 if (dry_run) break;
1380 ret = dump_attrs(formatter, ob.hoid, ebl);
1381 if (ret) return ret;
1382 break;
1383 case TYPE_OMAP_HDR:
1384 if (dry_run) break;
1385 ret = dump_omap_hdr(formatter, ebl);
1386 if (ret) return ret;
1387 break;
1388 case TYPE_OMAP:
1389 if (dry_run) break;
1390 ret = dump_omap(formatter, ebl);
1391 if (ret) return ret;
1392 break;
1393 case TYPE_OBJECT_END:
1394 done = true;
1395 break;
1396 default:
1397 cerr << "Unknown section type " << type << std::endl;
1398 return -EFAULT;
1399 }
1400 }
1401 formatter->close_section();
1402 return 0;
1403 }
1404
1405 int ObjectStoreTool::get_object(ObjectStore *store,
1406 OSDriver& driver,
1407 SnapMapper& mapper,
1408 coll_t coll,
1409 bufferlist &bl, OSDMap &origmap,
1410 bool *skipped_objects)
1411 {
1412 ObjectStore::Transaction tran;
1413 ObjectStore::Transaction *t = &tran;
1414 auto ebliter = bl.cbegin();
1415 object_begin ob;
1416 ob.decode(ebliter);
1417
1418 if (ob.hoid.hobj.is_temp()) {
1419 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1420 return -EFAULT;
1421 }
1422 ceph_assert(g_ceph_context);
1423
1424 auto ch = store->open_collection(coll);
1425 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1426 object_t oid = ob.hoid.hobj.oid;
1427 object_locator_t loc(ob.hoid.hobj);
1428 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1429 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
1430
1431 spg_t coll_pgid;
1432 if (coll.is_pg(&coll_pgid) == false) {
1433 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1434 return -EFAULT;
1435 }
1436 if (coll_pgid.shard != ob.hoid.shard_id) {
1437 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1438 << " but object shard is " << ob.hoid.shard_id << std::endl;
1439 return -EFAULT;
1440 }
1441
1442 if (coll_pgid.pgid != pgid) {
1443 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1444 *skipped_objects = true;
1445 skip_object(bl);
1446 return 0;
1447 }
1448 }
1449
1450 if (!dry_run)
1451 t->touch(coll, ob.hoid);
1452
1453 cout << "Write " << ob.hoid << std::endl;
1454
1455 bufferlist ebl;
1456 bool done = false;
1457 while(!done) {
1458 sectiontype_t type;
1459 int ret = read_section(&type, &ebl);
1460 if (ret)
1461 return ret;
1462
1463 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1464 //cout << "\t\tsection size " << ebl.length() << std::endl;
1465 if (type >= END_OF_TYPES) {
1466 cout << "Skipping unknown object section type" << std::endl;
1467 continue;
1468 }
1469 switch(type) {
1470 case TYPE_DATA:
1471 if (dry_run) break;
1472 ret = get_data(store, coll, ob.hoid, t, ebl);
1473 if (ret) return ret;
1474 break;
1475 case TYPE_ATTRS:
1476 if (dry_run) break;
1477 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
1478 if (ret) return ret;
1479 break;
1480 case TYPE_OMAP_HDR:
1481 if (dry_run) break;
1482 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1483 if (ret) return ret;
1484 break;
1485 case TYPE_OMAP:
1486 if (dry_run) break;
1487 ret = get_omap(store, coll, ob.hoid, t, ebl);
1488 if (ret) return ret;
1489 break;
1490 case TYPE_OBJECT_END:
1491 done = true;
1492 break;
1493 default:
1494 cerr << "Unknown section type " << type << std::endl;
1495 return -EFAULT;
1496 }
1497 }
1498 if (!dry_run) {
1499 wait_until_done(t, [&] {
1500 store->queue_transaction(ch, std::move(*t));
1501 ch->flush();
1502 });
1503 }
1504 return 0;
1505 }
1506
1507 int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1508 {
1509 auto ebliter = bl.cbegin();
1510 ms.decode(ebliter);
1511
1512 formatter->open_object_section("metadata_section");
1513
1514 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1515 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1516
1517 formatter->open_object_section("OSDMap");
1518 ms.osdmap.dump(formatter);
1519 formatter->close_section();
1520 formatter->flush(cout);
1521 cout << std::endl;
1522
1523 formatter->open_object_section("info");
1524 ms.info.dump(formatter);
1525 formatter->close_section();
1526 formatter->flush(cout);
1527
1528 formatter->open_object_section("log");
1529 ms.log.dump(formatter);
1530 formatter->close_section();
1531 formatter->flush(cout);
1532
1533 formatter->open_object_section("pg_missing_t");
1534 ms.missing.dump(formatter);
1535 formatter->close_section();
1536
1537 // XXX: ms.past_intervals?
1538
1539 formatter->close_section();
1540 formatter->flush(cout);
1541
1542 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1543 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1544 return -EFAULT;
1545 }
1546
1547 return 0;
1548 }
1549
1550 int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
1551 const OSDSuperblock& sb, spg_t pgid)
1552 {
1553 auto ebliter = bl.cbegin();
1554 ms.decode(ebliter);
1555 spg_t old_pgid = ms.info.pgid;
1556 ms.info.pgid = pgid;
1557
1558 if (debug) {
1559 cout << "export pgid " << old_pgid << std::endl;
1560 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1561 cout << "map epoch " << ms.map_epoch << std::endl;
1562
1563 #ifdef DIAGNOSTIC
1564 Formatter *formatter = new JSONFormatter(true);
1565 formatter->open_object_section("stuff");
1566
1567 formatter->open_object_section("importing OSDMap");
1568 ms.osdmap.dump(formatter);
1569 formatter->close_section();
1570 formatter->flush(cout);
1571 cout << std::endl;
1572
1573 cout << "osd current epoch " << sb.current_epoch << std::endl;
1574
1575 formatter->open_object_section("info");
1576 ms.info.dump(formatter);
1577 formatter->close_section();
1578 formatter->flush(cout);
1579 cout << std::endl;
1580
1581 formatter->open_object_section("log");
1582 ms.log.dump(formatter);
1583 formatter->close_section();
1584 formatter->flush(cout);
1585 cout << std::endl;
1586
1587 formatter->close_section();
1588 formatter->flush(cout);
1589 cout << std::endl;
1590 #endif
1591 }
1592
1593 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1594 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1595 return -EFAULT;
1596 }
1597
1598 if (ms.map_epoch > sb.current_epoch) {
1599 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1600 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1601 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1602 return -EINVAL;
1603 }
1604
1605 // Old exports didn't include OSDMap
1606 if (ms.osdmap.get_epoch() == 0) {
1607 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1608 " Not supported." << std::endl;
1609 return -EINVAL;
1610 }
1611
1612 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1613 cerr << "PG export's map " << ms.osdmap.get_epoch()
1614 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1615 if (!force) {
1616 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1617 << std::endl;
1618 return -EINVAL;
1619 }
1620 }
1621 if (debug) {
1622 cerr << "Import pgid " << ms.info.pgid << std::endl;
1623 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
1624 cerr << "history.same_interval_since "
1625 << ms.info.history.same_interval_since << std::endl;
1626 }
1627
1628 return 0;
1629 }
1630
1631 // out: pg_log_t that only has entries that apply to import_pgid using curmap
1632 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1633 void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1634 const string &hit_set_namespace, const divergent_priors_t &in,
1635 divergent_priors_t &out, divergent_priors_t &reject)
1636 {
1637 out.clear();
1638 reject.clear();
1639
1640 for (divergent_priors_t::const_iterator i = in.begin();
1641 i != in.end(); ++i) {
1642
1643 // Reject divergent priors for temporary objects
1644 if (i->second.is_temp()) {
1645 reject.insert(*i);
1646 continue;
1647 }
1648
1649 if (i->second.nspace != hit_set_namespace) {
1650 object_t oid = i->second.oid;
1651 object_locator_t loc(i->second);
1652 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1653 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1654
1655 if (import_pgid.pgid == pgid) {
1656 out.insert(*i);
1657 } else {
1658 reject.insert(*i);
1659 }
1660 } else {
1661 out.insert(*i);
1662 }
1663 }
1664 }
1665
1666 int ObjectStoreTool::dump_export(Formatter *formatter)
1667 {
1668 bufferlist ebl;
1669 pg_info_t info;
1670 PGLog::IndexedLog log;
1671 //bool skipped_objects = false;
1672
1673 int ret = read_super();
1674 if (ret)
1675 return ret;
1676
1677 if (sh.magic != super_header::super_magic) {
1678 cerr << "Invalid magic number" << std::endl;
1679 return -EFAULT;
1680 }
1681
1682 if (sh.version > super_header::super_ver) {
1683 cerr << "Can't handle export format version=" << sh.version << std::endl;
1684 return -EINVAL;
1685 }
1686
1687 formatter->open_object_section("Export");
1688
1689 //First section must be TYPE_PG_BEGIN
1690 sectiontype_t type;
1691 ret = read_section(&type, &ebl);
1692 if (ret)
1693 return ret;
1694 if (type == TYPE_POOL_BEGIN) {
1695 cerr << "Dump of pool exports not supported" << std::endl;
1696 return -EINVAL;
1697 } else if (type != TYPE_PG_BEGIN) {
1698 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1699 return -EFAULT;
1700 }
1701
1702 auto ebliter = ebl.cbegin();
1703 pg_begin pgb;
1704 pgb.decode(ebliter);
1705 spg_t pgid = pgb.pgid;
1706
1707 formatter->dump_string("pgid", stringify(pgid));
1708 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1709 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1710
1711 bool done = false;
1712 bool found_metadata = false;
1713 metadata_section ms;
1714 bool objects_started = false;
1715 while(!done) {
1716 ret = read_section(&type, &ebl);
1717 if (ret)
1718 return ret;
1719
1720 if (debug) {
1721 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
1722 }
1723 if (type >= END_OF_TYPES) {
1724 cerr << "Skipping unknown section type" << std::endl;
1725 continue;
1726 }
1727 switch(type) {
1728 case TYPE_OBJECT_BEGIN:
1729 if (!objects_started) {
1730 formatter->open_array_section("objects");
1731 objects_started = true;
1732 }
1733 ret = dump_object(formatter, ebl);
1734 if (ret) return ret;
1735 break;
1736 case TYPE_PG_METADATA:
1737 if (objects_started)
1738 cerr << "WARNING: metadata_section out of order" << std::endl;
1739 ret = dump_pg_metadata(formatter, ebl, ms);
1740 if (ret) return ret;
1741 found_metadata = true;
1742 break;
1743 case TYPE_PG_END:
1744 if (objects_started) {
1745 formatter->close_section();
1746 }
1747 done = true;
1748 break;
1749 default:
1750 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1751 return -EFAULT;
1752 }
1753 }
1754
1755 if (!found_metadata) {
1756 cerr << "Missing metadata section" << std::endl;
1757 return -EFAULT;
1758 }
1759
1760 formatter->close_section();
1761 formatter->flush(cout);
1762
1763 return 0;
1764 }
1765
1766 int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
1767 bool force, std::string pgidstr)
1768 {
1769 bufferlist ebl;
1770 pg_info_t info;
1771 PGLog::IndexedLog log;
1772 bool skipped_objects = false;
1773
1774 if (!dry_run)
1775 finish_remove_pgs(store);
1776
1777 int ret = read_super();
1778 if (ret)
1779 return ret;
1780
1781 if (sh.magic != super_header::super_magic) {
1782 cerr << "Invalid magic number" << std::endl;
1783 return -EFAULT;
1784 }
1785
1786 if (sh.version > super_header::super_ver) {
1787 cerr << "Can't handle export format version=" << sh.version << std::endl;
1788 return -EINVAL;
1789 }
1790
1791 //First section must be TYPE_PG_BEGIN
1792 sectiontype_t type;
1793 ret = read_section(&type, &ebl);
1794 if (ret)
1795 return ret;
1796 if (type == TYPE_POOL_BEGIN) {
1797 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1798 return -EINVAL;
1799 } else if (type != TYPE_PG_BEGIN) {
1800 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1801 return -EFAULT;
1802 }
1803
1804 auto ebliter = ebl.cbegin();
1805 pg_begin pgb;
1806 pgb.decode(ebliter);
1807 spg_t pgid = pgb.pgid;
1808
1809 if (pgidstr.length()) {
1810 spg_t user_pgid;
1811
1812 bool ok = user_pgid.parse(pgidstr.c_str());
1813 // This succeeded in main() already
1814 ceph_assert(ok);
1815 if (pgid != user_pgid) {
1816 cerr << "specified pgid " << user_pgid
1817 << " does not match actual pgid " << pgid << std::endl;
1818 return -EINVAL;
1819 }
1820 }
1821
1822 if (!pgb.superblock.cluster_fsid.is_zero()
1823 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1824 cerr << "Export came from different cluster with fsid "
1825 << pgb.superblock.cluster_fsid << std::endl;
1826 return -EINVAL;
1827 }
1828
1829 if (debug) {
1830 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1831 }
1832
1833 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
1834 if (pgid.is_no_shard())
1835 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1836
1837 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1838 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1839
1840 cerr << "Export has incompatible features set " << unsupported << std::endl;
1841
1842 // Let them import if they specify the --force option
1843 if (!force)
1844 return 11; // Positive return means exit status
1845 }
1846
1847 // we need the latest OSDMap to check for collisions
1848 OSDMap curmap;
1849 bufferlist bl;
1850 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1851 if (ret) {
1852 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
1853 return ret;
1854 }
1855 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1856 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1857 // Special exit code for this error, used by test code
1858 return 10; // Positive return means exit status
1859 }
1860
1861 pool_pg_num_history_t pg_num_history;
1862 get_pg_num_history(store, &pg_num_history);
1863
1864 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
1865
1866 // Check for PG already present.
1867 coll_t coll(pgid);
1868 if (store->collection_exists(coll)) {
1869 cerr << "pgid " << pgid << " already exists" << std::endl;
1870 return -EEXIST;
1871 }
1872
1873 ObjectStore::CollectionHandle ch;
1874
1875 OSDriver driver(
1876 store,
1877 coll_t(),
1878 OSD::make_snapmapper_oid());
1879 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
1880
1881 cout << "Importing pgid " << pgid;
1882 cout << std::endl;
1883
1884 bool done = false;
1885 bool found_metadata = false;
1886 metadata_section ms;
1887 while(!done) {
1888 ret = read_section(&type, &ebl);
1889 if (ret)
1890 return ret;
1891
1892 if (debug) {
1893 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1894 }
1895 if (type >= END_OF_TYPES) {
1896 cout << "Skipping unknown section type" << std::endl;
1897 continue;
1898 }
1899 switch(type) {
1900 case TYPE_OBJECT_BEGIN:
1901 ceph_assert(found_metadata);
1902 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1903 &skipped_objects);
1904 if (ret) return ret;
1905 break;
1906 case TYPE_PG_METADATA:
1907 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
1908 if (ret) return ret;
1909 found_metadata = true;
1910
1911 if (pgid != ms.info.pgid) {
1912 cerr << "specified pgid " << pgid << " does not match import file pgid "
1913 << ms.info.pgid << std::endl;
1914 return -EINVAL;
1915 }
1916
1917 // make sure there are no conflicting splits or merges
1918 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1919 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1920 if (p != pg_num_history.pg_nums.end() &&
1921 !p->second.empty()) {
1922 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1923 unsigned pg_num = start_pg_num;
1924 for (auto q = p->second.lower_bound(ms.map_epoch);
1925 q != p->second.end();
1926 ++q) {
1927 unsigned new_pg_num = q->second;
1928 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1929 << " -> " << new_pg_num << std::endl;
1930
1931 // check for merge target
1932 spg_t target;
1933 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1934 // FIXME: this checks assumes the OSD's PG is at the OSD's
1935 // map epoch; it could be, say, at *our* epoch, pre-merge.
1936 coll_t coll(target);
1937 if (store->collection_exists(coll)) {
1938 cerr << "pgid " << pgid << " merges to target " << target
1939 << " which already exists" << std::endl;
1940 return 12;
1941 }
1942 }
1943
1944 // check for split children
1945 set<spg_t> children;
1946 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1947 cerr << " children are " << children << std::endl;
1948 for (auto child : children) {
1949 coll_t coll(child);
1950 if (store->collection_exists(coll)) {
1951 cerr << "pgid " << pgid << " splits to " << children
1952 << " and " << child << " exists" << std::endl;
1953 return 12;
1954 }
1955 }
1956 }
1957 pg_num = new_pg_num;
1958 }
1959 }
1960 } else {
1961 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1962 << " for splits or mergers" << std::endl;
1963 }
1964
1965 if (!dry_run) {
1966 ObjectStore::Transaction t;
1967 ch = store->create_new_collection(coll);
1968 create_pg_collection(
1969 t, pgid,
1970 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
1971 init_pg_ondisk(t, pgid, NULL);
1972
1973 // mark this coll for removal until we're done
1974 map<string,bufferlist> values;
1975 encode((char)1, values["_remove"]);
1976 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1977
1978 store->queue_transaction(ch, std::move(t));
1979 }
1980
1981 break;
1982 case TYPE_PG_END:
1983 ceph_assert(found_metadata);
1984 done = true;
1985 break;
1986 default:
1987 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1988 return -EFAULT;
1989 }
1990 }
1991
1992 if (!found_metadata) {
1993 cerr << "Missing metadata section" << std::endl;
1994 return -EFAULT;
1995 }
1996
1997 ObjectStore::Transaction t;
1998 if (!dry_run) {
1999 pg_log_t newlog, reject;
2000 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
2001 ms.log, newlog, reject);
2002 if (debug) {
2003 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
2004 i != newlog.log.end(); ++i)
2005 cerr << "Keeping log entry " << *i << std::endl;
2006 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
2007 i != reject.log.end(); ++i)
2008 cerr << "Skipping log entry " << *i << std::endl;
2009 }
2010
2011 divergent_priors_t newdp, rejectdp;
2012 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
2013 ms.divergent_priors, newdp, rejectdp);
2014 ms.divergent_priors = newdp;
2015 if (debug) {
2016 for (divergent_priors_t::iterator i = newdp.begin();
2017 i != newdp.end(); ++i)
2018 cerr << "Keeping divergent_prior " << *i << std::endl;
2019 for (divergent_priors_t::iterator i = rejectdp.begin();
2020 i != rejectdp.end(); ++i)
2021 cerr << "Skipping divergent_prior " << *i << std::endl;
2022 }
2023
2024 ms.missing.filter_objects([&](const hobject_t &obj) {
2025 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
2026 return false;
2027 ceph_assert(!obj.is_temp());
2028 object_t oid = obj.oid;
2029 object_locator_t loc(obj);
2030 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
2031 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
2032
2033 return pgid.pgid != _pgid;
2034 });
2035
2036
2037 if (debug) {
2038 pg_missing_t missing;
2039 Formatter *formatter = Formatter::create("json-pretty");
2040 dump_log(formatter, cerr, newlog, ms.missing);
2041 delete formatter;
2042 }
2043
2044 // Just like a split invalidate stats since the object count is changed
2045 if (skipped_objects)
2046 ms.info.stats.stats_invalid = true;
2047
2048 ret = write_pg(
2049 t,
2050 ms.map_epoch,
2051 ms.info,
2052 newlog,
2053 ms.past_intervals,
2054 ms.divergent_priors,
2055 ms.missing);
2056 if (ret) return ret;
2057 }
2058
2059 // done, clear removal flag
2060 if (debug)
2061 cerr << "done, clearing removal flag" << std::endl;
2062
2063 if (!dry_run) {
2064 t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
2065 wait_until_done(&t, [&] {
2066 store->queue_transaction(ch, std::move(t));
2067 // make sure we flush onreadable items before mapper/driver are destroyed.
2068 ch->flush();
2069 });
2070 }
2071 return 0;
2072 }
2073
2074 int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
2075 Formatter *formatter, bool debug, bool human_readable, bool head)
2076 {
2077 int r;
2078 lookup_ghobject lookup(object, nspace, head);
2079 if (pgidstr.length() > 0) {
2080 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2081 } else {
2082 r = action_on_all_objects(store, lookup, debug);
2083 }
2084 if (r)
2085 return r;
2086 lookup.dump(formatter, human_readable);
2087 formatter->flush(cout);
2088 return 0;
2089 }
2090
2091 int do_list_slow(ObjectStore *store, string pgidstr, string object,
2092 double threshold, Formatter *formatter, bool debug, bool human_readable)
2093 {
2094 int r;
2095 lookup_slow_ghobject lookup(object, threshold);
2096 if (pgidstr.length() > 0) {
2097 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2098 } else {
2099 r = action_on_all_objects(store, lookup, debug);
2100 }
2101 if (r)
2102 return r;
2103 lookup.dump(formatter, human_readable);
2104 formatter->flush(cout);
2105 return 0;
2106 }
2107
2108 int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
2109 {
2110 int r;
2111 boost::optional<std::string> nspace; // Not specified
2112 lookup_ghobject lookup(object, nspace);
2113 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
2114 if (r)
2115 return r;
2116 lookup.dump(formatter, human_readable);
2117 formatter->flush(cout);
2118 return 0;
2119 }
2120
2121 enum rmtype {
2122 BOTH,
2123 SNAPMAP,
2124 NOSNAPMAP
2125 };
2126
2127 int remove_object(coll_t coll, ghobject_t &ghobj,
2128 SnapMapper &mapper,
2129 MapCacher::Transaction<std::string, bufferlist> *_t,
2130 ObjectStore::Transaction *t,
2131 enum rmtype type)
2132 {
2133 if (type == BOTH || type == SNAPMAP) {
2134 int r = mapper.remove_oid(ghobj.hobj, _t);
2135 if (r < 0 && r != -ENOENT) {
2136 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
2137 return r;
2138 }
2139 }
2140
2141 if (type == BOTH || type == NOSNAPMAP) {
2142 t->remove(coll, ghobj);
2143 }
2144 return 0;
2145 }
2146
2147 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
2148
2149 int do_remove_object(ObjectStore *store, coll_t coll,
2150 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
2151 {
2152 auto ch = store->open_collection(coll);
2153 spg_t pg;
2154 coll.is_pg_prefix(&pg);
2155 OSDriver driver(
2156 store,
2157 coll_t(),
2158 OSD::make_snapmapper_oid());
2159 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
2160 struct stat st;
2161
2162 int r = store->stat(ch, ghobj, &st);
2163 if (r < 0) {
2164 cerr << "remove: " << cpp_strerror(r) << std::endl;
2165 return r;
2166 }
2167
2168 SnapSet ss;
2169 if (ghobj.hobj.has_snapset()) {
2170 r = get_snapset(store, coll, ghobj, ss, false);
2171 if (r < 0) {
2172 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
2173 // If --force and bad snapset let them remove the head
2174 if (!(force && !all))
2175 return r;
2176 }
2177 // cout << "snapset " << ss << std::endl;
2178 if (!ss.clone_snaps.empty() && !all) {
2179 if (force) {
2180 cout << "WARNING: only removing "
2181 << (ghobj.hobj.is_head() ? "head" : "snapdir")
2182 << " with clones present" << std::endl;
2183 ss.clone_snaps.clear();
2184 } else {
2185 cerr << "Clones are present, use removeall to delete everything"
2186 << std::endl;
2187 return -EINVAL;
2188 }
2189 }
2190 }
2191
2192 ObjectStore::Transaction t;
2193 OSDriver::OSTransaction _t(driver.get_transaction(&t));
2194
2195 ghobject_t snapobj = ghobj;
2196 for (auto& p : ss.clone_snaps) {
2197 snapobj.hobj.snap = p.first;
2198 cout << "remove clone " << snapobj << std::endl;
2199 if (!dry_run) {
2200 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
2201 if (r < 0)
2202 return r;
2203 }
2204 }
2205
2206 cout << "remove " << ghobj << std::endl;
2207
2208 if (!dry_run) {
2209 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
2210 if (r < 0)
2211 return r;
2212 }
2213
2214 if (!dry_run) {
2215 wait_until_done(&t, [&] {
2216 store->queue_transaction(ch, std::move(t));
2217 ch->flush();
2218 });
2219 }
2220 return 0;
2221 }
2222
2223 int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2224 {
2225 auto ch = store->open_collection(coll);
2226 map<string,bufferptr,less<>> aset;
2227 int r = store->getattrs(ch, ghobj, aset);
2228 if (r < 0) {
2229 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2230 return r;
2231 }
2232
2233 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2234 string key(i->first);
2235 if (outistty)
2236 key = cleanbin(key);
2237 cout << key << std::endl;
2238 }
2239 return 0;
2240 }
2241
2242 int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2243 {
2244 auto ch = store->open_collection(coll);
2245 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
2246 if (!iter) {
2247 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2248 return -ENOENT;
2249 }
2250 iter->seek_to_first();
2251 map<string, bufferlist> oset;
2252 while(iter->valid()) {
2253 get_omap_batch(iter, oset);
2254
2255 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2256 string key(i->first);
2257 if (outistty)
2258 key = cleanbin(key);
2259 cout << key << std::endl;
2260 }
2261 }
2262 return 0;
2263 }
2264
2265 int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2266 {
2267 auto ch = store->open_collection(coll);
2268 struct stat st;
2269 mysize_t total;
2270
2271 int ret = store->stat(ch, ghobj, &st);
2272 if (ret < 0) {
2273 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2274 return ret;
2275 }
2276
2277 total = st.st_size;
2278 if (debug)
2279 cerr << "size=" << total << std::endl;
2280
2281 uint64_t offset = 0;
2282 bufferlist rawdatabl;
2283 while(total > 0) {
2284 rawdatabl.clear();
2285 mysize_t len = max_read;
2286 if (len > total)
2287 len = total;
2288
2289 ret = store->read(ch, ghobj, offset, len, rawdatabl);
2290 if (ret < 0)
2291 return ret;
2292 if (ret == 0)
2293 return -EINVAL;
2294
2295 if (debug)
2296 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2297
2298 total -= ret;
2299 offset += ret;
2300
2301 ret = write(fd, rawdatabl.c_str(), ret);
2302 if (ret == -1) {
2303 perror("write");
2304 return -errno;
2305 }
2306 }
2307
2308 return 0;
2309 }
2310
2311 int do_set_bytes(ObjectStore *store, coll_t coll,
2312 ghobject_t &ghobj, int fd)
2313 {
2314 ObjectStore::Transaction tran;
2315 ObjectStore::Transaction *t = &tran;
2316
2317 if (debug)
2318 cerr << "Write " << ghobj << std::endl;
2319
2320 if (!dry_run) {
2321 t->touch(coll, ghobj);
2322 t->truncate(coll, ghobj, 0);
2323 }
2324
2325 uint64_t offset = 0;
2326 bufferlist rawdatabl;
2327 do {
2328 rawdatabl.clear();
2329 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2330 if (bytes < 0) {
2331 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2332 return bytes;
2333 }
2334
2335 if (bytes == 0)
2336 break;
2337
2338 if (debug)
2339 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2340 if (!dry_run)
2341 t->write(coll, ghobj, offset, bytes, rawdatabl);
2342
2343 offset += bytes;
2344 // XXX: Should we queue_transaction() every once in a while for very large files
2345 } while(true);
2346
2347 auto ch = store->open_collection(coll);
2348 if (!dry_run)
2349 store->queue_transaction(ch, std::move(*t));
2350 return 0;
2351 }
2352
2353 int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2354 {
2355 auto ch = store->open_collection(coll);
2356 bufferptr bp;
2357
2358 int r = store->getattr(ch, ghobj, key.c_str(), bp);
2359 if (r < 0) {
2360 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2361 return r;
2362 }
2363
2364 string value(bp.c_str(), bp.length());
2365 if (outistty) {
2366 value = cleanbin(value);
2367 value.push_back('\n');
2368 }
2369 cout << value;
2370
2371 return 0;
2372 }
2373
2374 int do_set_attr(ObjectStore *store, coll_t coll,
2375 ghobject_t &ghobj, string key, int fd)
2376 {
2377 ObjectStore::Transaction tran;
2378 ObjectStore::Transaction *t = &tran;
2379 bufferlist bl;
2380
2381 if (debug)
2382 cerr << "Setattr " << ghobj << std::endl;
2383
2384 int ret = get_fd_data(fd, bl);
2385 if (ret < 0)
2386 return ret;
2387
2388 if (dry_run)
2389 return 0;
2390
2391 t->touch(coll, ghobj);
2392
2393 t->setattr(coll, ghobj, key, bl);
2394
2395 auto ch = store->open_collection(coll);
2396 store->queue_transaction(ch, std::move(*t));
2397 return 0;
2398 }
2399
2400 int do_rm_attr(ObjectStore *store, coll_t coll,
2401 ghobject_t &ghobj, string key)
2402 {
2403 ObjectStore::Transaction tran;
2404 ObjectStore::Transaction *t = &tran;
2405
2406 if (debug)
2407 cerr << "Rmattr " << ghobj << std::endl;
2408
2409 if (dry_run)
2410 return 0;
2411
2412 t->rmattr(coll, ghobj, key);
2413
2414 auto ch = store->open_collection(coll);
2415 store->queue_transaction(ch, std::move(*t));
2416 return 0;
2417 }
2418
2419 int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2420 {
2421 auto ch = store->open_collection(coll);
2422 set<string> keys;
2423 map<string, bufferlist> out;
2424
2425 keys.insert(key);
2426
2427 int r = store->omap_get_values(ch, ghobj, keys, &out);
2428 if (r < 0) {
2429 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2430 return r;
2431 }
2432
2433 if (out.empty()) {
2434 cerr << "Key not found" << std::endl;
2435 return -ENOENT;
2436 }
2437
2438 ceph_assert(out.size() == 1);
2439
2440 bufferlist bl = out.begin()->second;
2441 string value(bl.c_str(), bl.length());
2442 if (outistty) {
2443 value = cleanbin(value);
2444 value.push_back('\n');
2445 }
2446 cout << value;
2447
2448 return 0;
2449 }
2450
2451 int do_set_omap(ObjectStore *store, coll_t coll,
2452 ghobject_t &ghobj, string key, int fd)
2453 {
2454 ObjectStore::Transaction tran;
2455 ObjectStore::Transaction *t = &tran;
2456 map<string, bufferlist> attrset;
2457 bufferlist valbl;
2458
2459 if (debug)
2460 cerr << "Set_omap " << ghobj << std::endl;
2461
2462 int ret = get_fd_data(fd, valbl);
2463 if (ret < 0)
2464 return ret;
2465
2466 attrset.insert(pair<string, bufferlist>(key, valbl));
2467
2468 if (dry_run)
2469 return 0;
2470
2471 t->touch(coll, ghobj);
2472
2473 t->omap_setkeys(coll, ghobj, attrset);
2474
2475 auto ch = store->open_collection(coll);
2476 store->queue_transaction(ch, std::move(*t));
2477 return 0;
2478 }
2479
2480 int do_rm_omap(ObjectStore *store, coll_t coll,
2481 ghobject_t &ghobj, string key)
2482 {
2483 ObjectStore::Transaction tran;
2484 ObjectStore::Transaction *t = &tran;
2485
2486 if (debug)
2487 cerr << "Rm_omap " << ghobj << std::endl;
2488
2489 if (dry_run)
2490 return 0;
2491
2492 t->omap_rmkey(coll, ghobj, key);
2493
2494 auto ch = store->open_collection(coll);
2495 store->queue_transaction(ch, std::move(*t));
2496 return 0;
2497 }
2498
2499 int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2500 {
2501 auto ch = store->open_collection(coll);
2502 bufferlist hdrbl;
2503
2504 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
2505 if (r < 0) {
2506 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2507 return r;
2508 }
2509
2510 string header(hdrbl.c_str(), hdrbl.length());
2511 if (outistty) {
2512 header = cleanbin(header);
2513 header.push_back('\n');
2514 }
2515 cout << header;
2516
2517 return 0;
2518 }
2519
2520 int do_set_omaphdr(ObjectStore *store, coll_t coll,
2521 ghobject_t &ghobj, int fd)
2522 {
2523 ObjectStore::Transaction tran;
2524 ObjectStore::Transaction *t = &tran;
2525 bufferlist hdrbl;
2526
2527 if (debug)
2528 cerr << "Omap_setheader " << ghobj << std::endl;
2529
2530 int ret = get_fd_data(fd, hdrbl);
2531 if (ret)
2532 return ret;
2533
2534 if (dry_run)
2535 return 0;
2536
2537 t->touch(coll, ghobj);
2538
2539 t->omap_setheader(coll, ghobj, hdrbl);
2540
2541 auto ch = store->open_collection(coll);
2542 store->queue_transaction(ch, std::move(*t));
2543 return 0;
2544 }
2545
2546 struct do_fix_lost : public action_on_object_t {
2547 void call(ObjectStore *store, coll_t coll,
2548 ghobject_t &ghobj, object_info_t &oi) override {
2549 if (oi.is_lost()) {
2550 cout << coll << "/" << ghobj << " is lost";
2551 if (!dry_run)
2552 cout << ", fixing";
2553 cout << std::endl;
2554 if (dry_run)
2555 return;
2556 oi.clear_flag(object_info_t::FLAG_LOST);
2557 bufferlist bl;
2558 encode(oi, bl, -1); /* fixme: using full features */
2559 ObjectStore::Transaction t;
2560 t.setattr(coll, ghobj, OI_ATTR, bl);
2561 auto ch = store->open_collection(coll);
2562 store->queue_transaction(ch, std::move(t));
2563 }
2564 return;
2565 }
2566 };
2567
2568 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2569 {
2570 auto ch = store->open_collection(coll);
2571 bufferlist attr;
2572 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
2573 if (r < 0) {
2574 if (!silent)
2575 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2576 << cpp_strerror(r) << std::endl;
2577 return r;
2578 }
2579 auto bp = attr.cbegin();
2580 try {
2581 decode(ss, bp);
2582 } catch (...) {
2583 r = -EINVAL;
2584 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2585 << cpp_strerror(r) << std::endl;
2586 return r;
2587 }
2588 return 0;
2589 }
2590
2591 int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2592 {
2593 auto ch = store->open_collection(coll);
2594 int r = 0;
2595 formatter->open_object_section("obj");
2596 formatter->open_object_section("id");
2597 ghobj.dump(formatter);
2598 formatter->close_section();
2599
2600 bufferlist attr;
2601 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
2602 if (gr < 0) {
2603 r = gr;
2604 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2605 << cpp_strerror(r) << std::endl;
2606 } else {
2607 object_info_t oi;
2608 auto bp = attr.cbegin();
2609 try {
2610 decode(oi, bp);
2611 formatter->open_object_section("info");
2612 oi.dump(formatter);
2613 formatter->close_section();
2614 } catch (...) {
2615 r = -EINVAL;
2616 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2617 << cpp_strerror(r) << std::endl;
2618 }
2619 }
2620 struct stat st;
2621 int sr = store->stat(ch, ghobj, &st, true);
2622 if (sr < 0) {
2623 r = sr;
2624 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2625 << cpp_strerror(r) << std::endl;
2626 } else {
2627 formatter->open_object_section("stat");
2628 formatter->dump_int("size", st.st_size);
2629 formatter->dump_int("blksize", st.st_blksize);
2630 formatter->dump_int("blocks", st.st_blocks);
2631 formatter->dump_int("nlink", st.st_nlink);
2632 formatter->close_section();
2633 }
2634
2635 if (ghobj.hobj.has_snapset()) {
2636 SnapSet ss;
2637 int snr = get_snapset(store, coll, ghobj, ss);
2638 if (snr < 0) {
2639 r = snr;
2640 } else {
2641 formatter->open_object_section("SnapSet");
2642 ss.dump(formatter);
2643 formatter->close_section();
2644 }
2645 }
2646 bufferlist hattr;
2647 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
2648 if (gr == 0) {
2649 ECUtil::HashInfo hinfo;
2650 auto hp = hattr.cbegin();
2651 try {
2652 decode(hinfo, hp);
2653 formatter->open_object_section("hinfo");
2654 hinfo.dump(formatter);
2655 formatter->close_section();
2656 } catch (...) {
2657 r = -EINVAL;
2658 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2659 << cpp_strerror(r) << std::endl;
2660 }
2661 }
2662 gr = store->dump_onode(ch, ghobj, "onode", formatter);
2663
2664 formatter->close_section();
2665 formatter->flush(cout);
2666 cout << std::endl;
2667 return r;
2668 }
2669
2670 int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2671 {
2672 auto ch = store->open_collection(coll);
2673 bufferlist attr;
2674 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2675 if (r < 0) {
2676 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2677 << cpp_strerror(r) << std::endl;
2678 return r;
2679 }
2680 object_info_t oi;
2681 auto bp = attr.cbegin();
2682 try {
2683 decode(oi, bp);
2684 } catch (...) {
2685 r = -EINVAL;
2686 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2687 << cpp_strerror(r) << std::endl;
2688 return r;
2689 }
2690 if (!dry_run) {
2691 attr.clear();
2692 oi.alloc_hint_flags += 0xff;
2693 ObjectStore::Transaction t;
2694 encode(oi, attr, -1); /* fixme: using full features */
2695 t.setattr(coll, ghobj, OI_ATTR, attr);
2696 auto ch = store->open_collection(coll);
2697 r = store->queue_transaction(ch, std::move(t));
2698 if (r < 0) {
2699 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2700 << cpp_strerror(r) << std::endl;
2701 return r;
2702 }
2703 }
2704 return 0;
2705 }
2706
2707 int set_size(
2708 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2709 bool corrupt)
2710 {
2711 auto ch = store->open_collection(coll);
2712 if (ghobj.hobj.is_snapdir()) {
2713 cerr << "Can't set the size of a snapdir" << std::endl;
2714 return -EINVAL;
2715 }
2716 bufferlist attr;
2717 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2718 if (r < 0) {
2719 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2720 << cpp_strerror(r) << std::endl;
2721 return r;
2722 }
2723 object_info_t oi;
2724 auto bp = attr.cbegin();
2725 try {
2726 decode(oi, bp);
2727 } catch (...) {
2728 r = -EINVAL;
2729 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2730 << cpp_strerror(r) << std::endl;
2731 return r;
2732 }
2733 struct stat st;
2734 r = store->stat(ch, ghobj, &st, true);
2735 if (r < 0) {
2736 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2737 << cpp_strerror(r) << std::endl;
2738 }
2739 ghobject_t head(ghobj);
2740 SnapSet ss;
2741 bool found_head = true;
2742 map<snapid_t, uint64_t>::iterator csi;
2743 bool is_snap = ghobj.hobj.is_snap();
2744 if (is_snap) {
2745 head.hobj = head.hobj.get_head();
2746 r = get_snapset(store, coll, head, ss, true);
2747 if (r < 0 && r != -ENOENT) {
2748 // Requested get_snapset() silent, so if not -ENOENT show error
2749 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2750 << cpp_strerror(r) << std::endl;
2751 return r;
2752 }
2753 if (r == -ENOENT) {
2754 head.hobj = head.hobj.get_snapdir();
2755 r = get_snapset(store, coll, head, ss);
2756 if (r < 0)
2757 return r;
2758 found_head = false;
2759 } else {
2760 found_head = true;
2761 }
2762 csi = ss.clone_size.find(ghobj.hobj.snap);
2763 if (csi == ss.clone_size.end()) {
2764 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2765 return -EINVAL;
2766 }
2767 }
2768 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2769 && (!is_snap || csi->second == setsize)) {
2770 cout << "Size of object is already " << setsize << std::endl;
2771 return 0;
2772 }
2773 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2774 << ", obj info size " << oi.size;
2775 if (is_snap) {
2776 cout << ", " << (found_head ? "head" : "snapdir")
2777 << " clone_size " << csi->second;
2778 csi->second = setsize;
2779 }
2780 cout << std::endl;
2781 if (!dry_run) {
2782 attr.clear();
2783 oi.size = setsize;
2784 ObjectStore::Transaction t;
2785 // Only modify object info if we want to corrupt it
2786 if (!corrupt && (uint64_t)st.st_size != setsize) {
2787 t.truncate(coll, ghobj, setsize);
2788 // Changing objectstore size will invalidate data_digest, so clear it.
2789 oi.clear_data_digest();
2790 }
2791 encode(oi, attr, -1); /* fixme: using full features */
2792 t.setattr(coll, ghobj, OI_ATTR, attr);
2793 if (is_snap) {
2794 bufferlist snapattr;
2795 snapattr.clear();
2796 encode(ss, snapattr);
2797 t.setattr(coll, head, SS_ATTR, snapattr);
2798 }
2799 auto ch = store->open_collection(coll);
2800 r = store->queue_transaction(ch, std::move(t));
2801 if (r < 0) {
2802 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2803 << cpp_strerror(r) << std::endl;
2804 return r;
2805 }
2806 }
2807 return 0;
2808 }
2809
2810 int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2811 auto ch = store->open_collection(coll);
2812 bufferlist attr;
2813 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2814 if (r < 0) {
2815 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2816 << cpp_strerror(r) << std::endl;
2817 return r;
2818 }
2819 object_info_t oi;
2820 auto bp = attr.cbegin();
2821 try {
2822 decode(oi, bp);
2823 } catch (...) {
2824 r = -EINVAL;
2825 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2826 << cpp_strerror(r) << std::endl;
2827 return r;
2828 }
2829 if (!dry_run) {
2830 attr.clear();
2831 oi.clear_data_digest();
2832 encode(oi, attr, -1); /* fixme: using full features */
2833 ObjectStore::Transaction t;
2834 t.setattr(coll, ghobj, OI_ATTR, attr);
2835 auto ch = store->open_collection(coll);
2836 r = store->queue_transaction(ch, std::move(t));
2837 if (r < 0) {
2838 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2839 << cpp_strerror(r) << std::endl;
2840 return r;
2841 }
2842 }
2843 return 0;
2844 }
2845
2846 int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
2847 string arg)
2848 {
2849 SnapSet ss;
2850 int ret = get_snapset(store, coll, ghobj, ss);
2851 if (ret < 0)
2852 return ret;
2853
2854 // Use "corrupt" to clear entire SnapSet
2855 // Use "seq" to just corrupt SnapSet.seq
2856 if (arg == "corrupt" || arg == "seq")
2857 ss.seq = 0;
2858 // Use "snaps" to just clear SnapSet.clone_snaps
2859 if (arg == "corrupt" || arg == "snaps")
2860 ss.clone_snaps.clear();
2861 // By default just clear clone, clone_overlap and clone_size
2862 if (arg == "corrupt")
2863 arg = "";
2864 if (arg == "" || arg == "clones")
2865 ss.clones.clear();
2866 if (arg == "" || arg == "clone_overlap")
2867 ss.clone_overlap.clear();
2868 if (arg == "" || arg == "clone_size")
2869 ss.clone_size.clear();
2870 // Break all clone sizes by adding 1
2871 if (arg == "size") {
2872 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2873 i != ss.clone_size.end(); ++i)
2874 ++(i->second);
2875 }
2876
2877 if (!dry_run) {
2878 bufferlist bl;
2879 encode(ss, bl);
2880 ObjectStore::Transaction t;
2881 t.setattr(coll, ghobj, SS_ATTR, bl);
2882 auto ch = store->open_collection(coll);
2883 int r = store->queue_transaction(ch, std::move(t));
2884 if (r < 0) {
2885 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2886 << cpp_strerror(r) << std::endl;
2887 return r;
2888 }
2889 }
2890 return 0;
2891 }
2892
2893 vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2894 {
2895 return std::find(v.begin(), v.end(), clid);
2896 }
2897
2898 map<snapid_t, interval_set<uint64_t> >::iterator
2899 find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2900 {
2901 return m.find(clid);
2902 }
2903
2904 map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2905 snapid_t clid)
2906 {
2907 return m.find(clid);
2908 }
2909
2910 template<class T>
2911 int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2912 {
2913 typename T::iterator i = find(mv, cloneid);
2914 if (i != mv.end()) {
2915 mv.erase(i);
2916 } else {
2917 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2918 if (force) {
2919 cerr << " (ignored)" << std::endl;
2920 return 0;
2921 }
2922 cerr << std::endl;
2923 return -EINVAL;
2924 }
2925 return 0;
2926 }
2927
2928 int remove_clone(
2929 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
2930 {
2931 // XXX: Don't allow this if in a cache tier or former cache tier
2932 // bool allow_incomplete_clones() const {
2933 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2934
2935 SnapSet snapset;
2936 int ret = get_snapset(store, coll, ghobj, snapset);
2937 if (ret < 0)
2938 return ret;
2939
2940 // Derived from trim_object()
2941 // ...from snapset
2942 vector<snapid_t>::iterator p;
2943 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2944 if (*p == cloneid)
2945 break;
2946 if (p == snapset.clones.end()) {
2947 cerr << "Clone " << cloneid << " not present";
2948 return -ENOENT;
2949 }
2950 if (p != snapset.clones.begin()) {
2951 // not the oldest... merge overlap into next older clone
2952 vector<snapid_t>::iterator n = p - 1;
2953 hobject_t prev_coid = ghobj.hobj;
2954 prev_coid.snap = *n;
2955 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2956
2957 //if (adjust_prev_bytes)
2958 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2959
2960 snapset.clone_overlap[*n].intersection_of(
2961 snapset.clone_overlap[*p]);
2962
2963 //if (adjust_prev_bytes)
2964 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2965 }
2966
2967 ret = remove_from(snapset.clones, "clones", cloneid, force);
2968 if (ret) return ret;
2969 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2970 if (ret) return ret;
2971 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2972 if (ret) return ret;
2973
2974 if (dry_run)
2975 return 0;
2976
2977 bufferlist bl;
2978 encode(snapset, bl);
2979 ObjectStore::Transaction t;
2980 t.setattr(coll, ghobj, SS_ATTR, bl);
2981 auto ch = store->open_collection(coll);
2982 int r = store->queue_transaction(ch, std::move(t));
2983 if (r < 0) {
2984 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2985 << cpp_strerror(r) << std::endl;
2986 return r;
2987 }
2988 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2989 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2990 return 0;
2991 }
2992
2993 int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2994 {
2995 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
2996 << " to " << dst->get_type() << ": " << dstpath
2997 << std::endl;
2998 int num, i;
2999 vector<coll_t> collections;
3000 int r;
3001
3002 r = src->mount();
3003 if (r < 0) {
3004 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
3005 return r;
3006 }
3007 r = dst->mount();
3008 if (r < 0) {
3009 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
3010 goto out_src;
3011 }
3012
3013 if (src->get_fsid() != dst->get_fsid()) {
3014 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
3015 << std::endl;
3016 goto out;
3017 }
3018 cout << "fsid " << src->get_fsid() << std::endl;
3019
3020 // make sure dst is empty
3021 r = dst->list_collections(collections);
3022 if (r < 0) {
3023 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
3024 goto out;
3025 }
3026 if (!collections.empty()) {
3027 cerr << "destination store is not empty" << std::endl;
3028 goto out;
3029 }
3030
3031 r = src->list_collections(collections);
3032 if (r < 0) {
3033 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
3034 goto out;
3035 }
3036
3037 num = collections.size();
3038 cout << num << " collections" << std::endl;
3039 i = 1;
3040 for (auto cid : collections) {
3041 cout << i++ << "/" << num << " " << cid << std::endl;
3042 auto ch = src->open_collection(cid);
3043 auto dch = dst->create_new_collection(cid);
3044 {
3045 ObjectStore::Transaction t;
3046 int bits = src->collection_bits(ch);
3047 if (bits < 0) {
3048 if (src->get_type() == "filestore" && cid.is_meta()) {
3049 bits = 0;
3050 } else {
3051 cerr << "cannot get bit count for collection " << cid << ": "
3052 << cpp_strerror(bits) << std::endl;
3053 goto out;
3054 }
3055 }
3056 t.create_collection(cid, bits);
3057 dst->queue_transaction(dch, std::move(t));
3058 }
3059
3060 ghobject_t pos;
3061 uint64_t n = 0;
3062 uint64_t bytes = 0, keys = 0;
3063 while (true) {
3064 vector<ghobject_t> ls;
3065 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
3066 if (r < 0) {
3067 cerr << "collection_list on " << cid << " from " << pos << " got: "
3068 << cpp_strerror(r) << std::endl;
3069 goto out;
3070 }
3071 if (ls.empty()) {
3072 break;
3073 }
3074
3075 for (auto& oid : ls) {
3076 //cout << " " << cid << " " << oid << std::endl;
3077 if (n % 100 == 0) {
3078 cout << " " << std::setw(16) << n << " objects, "
3079 << std::setw(16) << bytes << " bytes, "
3080 << std::setw(16) << keys << " keys"
3081 << std::setw(1) << "\r" << std::flush;
3082 }
3083 n++;
3084
3085 ObjectStore::Transaction t;
3086 t.touch(cid, oid);
3087
3088 map<string,bufferptr,less<>> attrs;
3089 src->getattrs(ch, oid, attrs);
3090 if (!attrs.empty()) {
3091 t.setattrs(cid, oid, attrs);
3092 }
3093
3094 bufferlist bl;
3095 src->read(ch, oid, 0, 0, bl);
3096 if (bl.length()) {
3097 t.write(cid, oid, 0, bl.length(), bl);
3098 bytes += bl.length();
3099 }
3100
3101 bufferlist header;
3102 map<string,bufferlist> omap;
3103 src->omap_get(ch, oid, &header, &omap);
3104 if (header.length()) {
3105 t.omap_setheader(cid, oid, header);
3106 ++keys;
3107 }
3108 if (!omap.empty()) {
3109 keys += omap.size();
3110 t.omap_setkeys(cid, oid, omap);
3111 }
3112
3113 dst->queue_transaction(dch, std::move(t));
3114 }
3115 }
3116 cout << " " << std::setw(16) << n << " objects, "
3117 << std::setw(16) << bytes << " bytes, "
3118 << std::setw(16) << keys << " keys"
3119 << std::setw(1) << std::endl;
3120 }
3121
3122 // keyring
3123 cout << "keyring" << std::endl;
3124 {
3125 bufferlist bl;
3126 string s = srcpath + "/keyring";
3127 string err;
3128 r = bl.read_file(s.c_str(), &err);
3129 if (r < 0) {
3130 cerr << "failed to copy " << s << ": " << err << std::endl;
3131 } else {
3132 string d = dstpath + "/keyring";
3133 bl.write_file(d.c_str(), 0600);
3134 }
3135 }
3136
3137 // osd metadata
3138 cout << "duping osd metadata" << std::endl;
3139 {
3140 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
3141 string val;
3142 src->read_meta(k, &val);
3143 dst->write_meta(k, val);
3144 }
3145 }
3146
3147 dst->write_meta("ready", "ready");
3148
3149 cout << "done." << std::endl;
3150 r = 0;
3151 out:
3152 dst->umount();
3153 out_src:
3154 src->umount();
3155 return r;
3156 }
3157
3158 void usage(po::options_description &desc)
3159 {
3160 cerr << std::endl;
3161 cerr << desc << std::endl;
3162 cerr << std::endl;
3163 cerr << "Positional syntax:" << std::endl;
3164 cerr << std::endl;
3165 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
3166 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
3167 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
3168 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
3169 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
3170 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
3171 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
3172 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
3173 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
3174 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
3175 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
3176 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
3177 cerr << std::endl;
3178 cerr << "<object> can be a JSON object description as displayed" << std::endl;
3179 cerr << "by --op list." << std::endl;
3180 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
3181 cerr << "the OSD's PGs." << std::endl;
3182 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
3183 cerr << "specifies the pgmeta object" << std::endl;
3184 cerr << std::endl;
3185 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
3186 cerr << "if not specified or if '-' specified." << std::endl;
3187 }
3188
3189 bool ends_with(const string& check, const string& ending)
3190 {
3191 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
3192 }
3193
3194 // Based on FileStore::dump_journal(), set-up enough to only dump
3195 int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
3196 {
3197 int r;
3198
3199 if (!journalpath.length())
3200 return -EINVAL;
3201
3202 FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
3203 journalpath.c_str(), m_journal_dio);
3204 r = journal->_fdump(*f, false);
3205 delete journal;
3206 return r;
3207 }
3208
3209 int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
3210 const string &pool_name, const spg_t &pgid, bool dry_run,
3211 int target_level)
3212 {
3213 int r = 0;
3214
3215 FileStore *fs = dynamic_cast<FileStore*>(os);
3216 if (!fs) {
3217 cerr << "Nothing to do for non-filestore backend" << std::endl;
3218 return 0; // making this return success makes testing easier
3219 }
3220
3221 OSDMap curmap;
3222 bufferlist bl;
3223 r = get_osdmap(os, superblock.current_epoch, curmap, bl);
3224 if (r) {
3225 cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
3226 return r;
3227 }
3228
3229 int64_t poolid = -1;
3230 if (pool_name.length()) {
3231 poolid = curmap.lookup_pg_pool_name(pool_name);
3232 if (poolid < 0) {
3233 cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
3234 << std::endl;
3235 return poolid;
3236 }
3237 }
3238
3239 vector<coll_t> collections, filtered_colls;
3240 r = os->list_collections(collections);
3241 if (r < 0) {
3242 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
3243 return r;
3244 }
3245
3246 for (auto const &coll : collections) {
3247 spg_t coll_pgid;
3248 if (coll.is_pg(&coll_pgid) &&
3249 ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
3250 coll_pgid == pgid)) {
3251 filtered_colls.push_back(coll);
3252 }
3253 }
3254
3255 size_t done = 0, total = filtered_colls.size();
3256 for (auto const &coll : filtered_colls) {
3257 if (dry_run) {
3258 cerr << "Would apply layout settings to " << coll << std::endl;
3259 } else {
3260 cerr << "Finished " << done << "/" << total << " collections" << "\r";
3261 r = fs->apply_layout_settings(coll, target_level);
3262 if (r < 0) {
3263 cerr << "Error applying layout settings to " << coll << std::endl;
3264 return r;
3265 }
3266 }
3267 ++done;
3268 }
3269
3270 cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
3271 return r;
3272 }
3273
3274 int main(int argc, char **argv)
3275 {
3276 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3277 string target_data_path, fsid;
3278 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
3279 boost::optional<std::string> nspace;
3280 spg_t pgid;
3281 unsigned epoch = 0;
3282 unsigned slow_threshold = 16;
3283 ghobject_t ghobj;
3284 bool human_readable;
3285 Formatter *formatter;
3286 bool head, tty;
3287
3288 po::options_description desc("Allowed options");
3289 desc.add_options()
3290 ("help", "produce help message")
3291 ("type", po::value<string>(&type),
3292 "Arg is one of [bluestore (default), filestore, memstore]")
3293 ("data-path", po::value<string>(&dpath),
3294 "path to object store, mandatory")
3295 ("journal-path", po::value<string>(&jpath),
3296 "path to journal, use if tool can't find it")
3297 ("pgid", po::value<string>(&pgidstr),
3298 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups and mandatory for apply-layout-settings if --pool is not specified")
3299 ("pool", po::value<string>(&pool),
3300 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3301 ("op", po::value<string>(&op),
3302 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3303 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]")
3304 ("epoch", po::value<unsigned>(&epoch),
3305 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3306 ("file", po::value<string>(&file),
3307 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
3308 ("mon-store-path", po::value<string>(&mon_store_path),
3309 "path of monstore to update-mon-db")
3310 ("fsid", po::value<string>(&fsid),
3311 "fsid for new store created by mkfs")
3312 ("target-data-path", po::value<string>(&target_data_path),
3313 "path of target object store (for --op dup)")
3314 ("mountpoint", po::value<string>(&mountpoint),
3315 "fuse mountpoint")
3316 ("format", po::value<string>(&format)->default_value("json-pretty"),
3317 "Output format which may be json, json-pretty, xml, xml-pretty")
3318 ("debug", "Enable diagnostic output to stderr")
3319 ("no-mon-config", "Do not contact mons for config")
3320 ("no-superblock", "Do not read superblock")
3321 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3322 ("skip-journal-replay", "Disable journal replay")
3323 ("skip-mount-omap", "Disable mounting of omap")
3324 ("head", "Find head/snapdir when searching for objects by name")
3325 ("dry-run", "Don't modify the objectstore")
3326 ("tty", "Treat stdout as a tty (no binary data)")
3327 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
3328 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
3329 ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
3330 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
3331 ;
3332
3333 po::options_description positional("Positional options");
3334 positional.add_options()
3335 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3336 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
3337 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
3338 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
3339 ;
3340
3341 po::options_description all;
3342 all.add(desc).add(positional);
3343
3344 po::positional_options_description pd;
3345 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3346
3347 vector<string> ceph_option_strings;
3348
3349 po::variables_map vm;
3350 try {
3351 po::parsed_options parsed =
3352 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3353 po::store( parsed, vm);
3354 po::notify(vm);
3355 ceph_option_strings = po::collect_unrecognized(parsed.options,
3356 po::include_positional);
3357 } catch(po::error &e) {
3358 std::cerr << e.what() << std::endl;
3359 return 1;
3360 }
3361
3362 if (vm.count("help")) {
3363 usage(desc);
3364 return 1;
3365 }
3366
3367 // Compatibility with previous option name
3368 if (op == "dump-import")
3369 op = "dump-export";
3370
3371 debug = (vm.count("debug") > 0);
3372
3373 force = (vm.count("force") > 0);
3374
3375 no_superblock = (vm.count("no-superblock") > 0);
3376
3377 if (vm.count("namespace"))
3378 nspace = argnspace;
3379
3380 dry_run = (vm.count("dry-run") > 0);
3381 tty = (vm.count("tty") > 0);
3382
3383 osflagbits_t flags = 0;
3384 if (dry_run || vm.count("skip-journal-replay"))
3385 flags |= SKIP_JOURNAL_REPLAY;
3386 if (vm.count("skip-mount-omap"))
3387 flags |= SKIP_MOUNT_OMAP;
3388 if (op == "update-mon-db")
3389 flags |= SKIP_JOURNAL_REPLAY;
3390
3391 head = (vm.count("head") > 0);
3392
3393 // infer osd id so we can authenticate
3394 char fn[PATH_MAX];
3395 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3396 int fd = ::open(fn, O_RDONLY);
3397 if (fd >= 0) {
3398 bufferlist bl;
3399 bl.read_fd(fd, 64);
3400 string s(bl.c_str(), bl.length());
3401 int whoami = atoi(s.c_str());
3402 vector<string> tmp;
3403 // identify ourselves as this osd so we can auth and fetch our configs
3404 tmp.push_back("-n");
3405 tmp.push_back(string("osd.") + stringify(whoami));
3406 // populate osd_data so that the default keyring location works
3407 tmp.push_back("--osd-data");
3408 tmp.push_back(dpath);
3409 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3410 ceph_option_strings.end());
3411 tmp.swap(ceph_option_strings);
3412 }
3413
3414 vector<const char *> ceph_options;
3415 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3416 for (vector<string>::iterator i = ceph_option_strings.begin();
3417 i != ceph_option_strings.end();
3418 ++i) {
3419 ceph_options.push_back(i->c_str());
3420 }
3421
3422 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
3423 fd = ::open(fn, O_RDONLY);
3424 if (fd >= 0) {
3425 bufferlist bl;
3426 bl.read_fd(fd, 64);
3427 if (bl.length()) {
3428 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3429 if (vm.count("type") && dp_type != "" && type != dp_type)
3430 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3431 << dp_type << "\"" << std::endl;
3432 type = dp_type;
3433 //cout << "object store type is " << type << std::endl;
3434 }
3435 ::close(fd);
3436 }
3437
3438 if (!vm.count("type") && type == "") {
3439 type = "bluestore";
3440 }
3441 if (!vm.count("data-path") &&
3442 op != "dump-export" &&
3443 !(op == "dump-journal" && type == "filestore")) {
3444 cerr << "Must provide --data-path" << std::endl;
3445 usage(desc);
3446 return 1;
3447 }
3448 if (type == "filestore" && !vm.count("journal-path")) {
3449 jpath = dpath + "/journal";
3450 }
3451 if (!vm.count("op") && !vm.count("object")) {
3452 cerr << "Must provide --op or object command..." << std::endl;
3453 usage(desc);
3454 return 1;
3455 }
3456 if (op != "list" && op != "apply-layout-settings" &&
3457 vm.count("op") && vm.count("object")) {
3458 cerr << "Can't specify both --op and object command syntax" << std::endl;
3459 usage(desc);
3460 return 1;
3461 }
3462 if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
3463 cerr << "apply-layout-settings requires either --pool or --pgid"
3464 << std::endl;
3465 usage(desc);
3466 return 1;
3467 }
3468 if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
3469 cerr << "Invalid syntax, missing command" << std::endl;
3470 usage(desc);
3471 return 1;
3472 }
3473 if (op == "fuse" && mountpoint.length() == 0) {
3474 cerr << "Missing fuse mountpoint" << std::endl;
3475 usage(desc);
3476 return 1;
3477 }
3478 outistty = isatty(STDOUT_FILENO) || tty;
3479
3480 file_fd = fd_none;
3481 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
3482 if (!vm.count("file") || file == "-") {
3483 if (outistty) {
3484 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3485 return 1;
3486 }
3487 file_fd = STDOUT_FILENO;
3488 } else {
3489 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3490 }
3491 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") {
3492 if (!vm.count("file") || file == "-") {
3493 if (isatty(STDIN_FILENO)) {
3494 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3495 return 1;
3496 }
3497 file_fd = STDIN_FILENO;
3498 } else {
3499 file_fd = open(file.c_str(), O_RDONLY);
3500 }
3501 }
3502
3503 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3504
3505 if (vm.count("file") && file_fd == fd_none && !dry_run) {
3506 cerr << "--file option only applies to import, dump-export, export, export-remove, "
3507 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3508 return 1;
3509 }
3510
3511 if (file_fd != fd_none && file_fd < 0) {
3512 string err = string("file: ") + file;
3513 perror(err.c_str());
3514 return 1;
3515 }
3516 int init_flags = 0;
3517 if (vm.count("no-mon-config") > 0) {
3518 init_flags |= CINIT_FLAG_NO_MON_CONFIG;
3519 }
3520
3521 auto cct = global_init(
3522 NULL, ceph_options,
3523 CEPH_ENTITY_TYPE_OSD,
3524 CODE_ENVIRONMENT_UTILITY_NODOUT,
3525 init_flags);
3526 common_init_finish(g_ceph_context);
3527 if (debug) {
3528 g_conf().set_val_or_die("log_to_stderr", "true");
3529 g_conf().set_val_or_die("err_to_stderr", "true");
3530 }
3531 g_conf().apply_changes(nullptr);
3532
3533 // Special list handling. Treating pretty_format as human readable,
3534 // with one object per line and not an enclosing array.
3535 human_readable = ends_with(format, "-pretty");
3536 if ((op == "list" || op == "meta-list") && human_readable) {
3537 // Remove -pretty from end of format which we know is there
3538 format = format.substr(0, format.size() - strlen("-pretty"));
3539 }
3540
3541 formatter = Formatter::create(format);
3542 if (formatter == NULL) {
3543 cerr << "unrecognized format: " << format << std::endl;
3544 return 1;
3545 }
3546
3547 // Special handling for filestore journal, so we can dump it without mounting
3548 if (op == "dump-journal" && type == "filestore") {
3549 int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
3550 if (ret < 0) {
3551 cerr << "journal-path: " << jpath << ": "
3552 << cpp_strerror(ret) << std::endl;
3553 return 1;
3554 }
3555 formatter->flush(cout);
3556 return 0;
3557 }
3558
3559 if (op == "dump-export") {
3560 int ret = tool.dump_export(formatter);
3561 if (ret < 0) {
3562 cerr << "dump-export: "
3563 << cpp_strerror(ret) << std::endl;
3564 return 1;
3565 }
3566 return 0;
3567 }
3568
3569 //Verify that data-path really exists
3570 struct stat st;
3571 if (::stat(dpath.c_str(), &st) == -1) {
3572 string err = string("data-path: ") + dpath;
3573 perror(err.c_str());
3574 return 1;
3575 }
3576
3577 if (pgidstr.length() && pgidstr != "meta" && !pgid.parse(pgidstr.c_str())) {
3578 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3579 return 1;
3580 }
3581
3582 //Verify that the journal-path really exists
3583 if (type == "filestore") {
3584 if (::stat(jpath.c_str(), &st) == -1) {
3585 string err = string("journal-path: ") + jpath;
3586 perror(err.c_str());
3587 return 1;
3588 }
3589 if (S_ISDIR(st.st_mode)) {
3590 cerr << "journal-path: " << jpath << ": "
3591 << cpp_strerror(EISDIR) << std::endl;
3592 return 1;
3593 }
3594 }
3595
3596 std::unique_ptr<ObjectStore> fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3597 if (!fs) {
3598 cerr << "Unable to create store of type " << type << std::endl;
3599 return 1;
3600 }
3601
3602 if (op == "fsck" || op == "fsck-deep") {
3603 int r = fs->fsck(op == "fsck-deep");
3604 if (r < 0) {
3605 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3606 return 1;
3607 }
3608 if (r > 0) {
3609 cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
3610 return 1;
3611 }
3612 cout << "fsck success" << std::endl;
3613 return 0;
3614 }
3615 if (op == "repair" || op == "repair-deep") {
3616 int r = fs->repair(op == "repair-deep");
3617 if (r < 0) {
3618 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3619 return 1;
3620 }
3621 if (r > 0) {
3622 cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
3623 return 1;
3624 }
3625 cout << "repair success" << std::endl;
3626 return 0;
3627 }
3628 if (op == "mkfs") {
3629 if (fsid.length()) {
3630 uuid_d f;
3631 bool r = f.parse(fsid.c_str());
3632 if (!r) {
3633 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3634 return 1;
3635 }
3636 fs->set_fsid(f);
3637 }
3638 int r = fs->mkfs();
3639 if (r < 0) {
3640 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
3641 return 1;
3642 }
3643 return 0;
3644 }
3645 if (op == "dup") {
3646 string target_type;
3647 char fn[PATH_MAX];
3648 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3649 int fd = ::open(fn, O_RDONLY);
3650 if (fd < 0) {
3651 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3652 exit(1);
3653 }
3654 bufferlist bl;
3655 bl.read_fd(fd, 64);
3656 if (bl.length()) {
3657 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3658 }
3659 ::close(fd);
3660 unique_ptr<ObjectStore> targetfs = ObjectStore::create(
3661 g_ceph_context, target_type,
3662 target_data_path, "", 0);
3663 if (!targetfs) {
3664 cerr << "Unable to open store of type " << target_type << std::endl;
3665 return 1;
3666 }
3667 int r = dup(dpath, fs.get(), target_data_path, targetfs.get());
3668 if (r < 0) {
3669 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3670 return 1;
3671 }
3672 return 0;
3673 }
3674
3675 int ret = fs->mount();
3676 if (ret < 0) {
3677 if (ret == -EBUSY) {
3678 cerr << "OSD has the store locked" << std::endl;
3679 } else {
3680 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3681 }
3682 return 1;
3683 }
3684
3685 if (op == "fuse") {
3686 #ifdef HAVE_LIBFUSE
3687 FuseStore fuse(fs.get(), mountpoint);
3688 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3689 int r = fuse.main();
3690 fs->umount();
3691 if (r < 0) {
3692 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3693 return 1;
3694 }
3695 #else
3696 cerr << "fuse support not enabled" << std::endl;
3697 #endif
3698 return 0;
3699 }
3700
3701 vector<coll_t> ls;
3702 vector<coll_t>::iterator it;
3703 CompatSet supported;
3704
3705 #ifdef INTERNAL_TEST
3706 supported = get_test_compat_set();
3707 #else
3708 supported = OSD::get_osd_compat_set();
3709 #endif
3710
3711 bufferlist bl;
3712 auto ch = fs->open_collection(coll_t::meta());
3713 std::unique_ptr<OSDSuperblock> superblock;
3714 if (!no_superblock) {
3715 superblock.reset(new OSDSuperblock);
3716 bufferlist::const_iterator p;
3717 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
3718 if (ret < 0) {
3719 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3720 goto out;
3721 }
3722
3723 p = bl.cbegin();
3724 decode(*superblock, p);
3725
3726 if (debug) {
3727 cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
3728 }
3729
3730 if (debug) {
3731 cerr << "Supported features: " << supported << std::endl;
3732 cerr << "On-disk features: " << superblock->compat_features << std::endl;
3733 }
3734 if (supported.compare(superblock->compat_features) == -1) {
3735 CompatSet unsupported = supported.unsupported(superblock->compat_features);
3736 cerr << "On-disk OSD incompatible features set "
3737 << unsupported << std::endl;
3738 ret = -EINVAL;
3739 goto out;
3740 }
3741 }
3742
3743 if (op == "apply-layout-settings") {
3744 int target_level = 0;
3745 // Single positional argument with apply-layout-settings
3746 // for target_level.
3747 if (vm.count("object") && isdigit(object[0])) {
3748 target_level = atoi(object.c_str());
3749 // This requires --arg1 to be specified since
3750 // this is the third positional argument and normally
3751 // used with object operations.
3752 } else if (vm.count("arg1") && isdigit(arg1[0])) {
3753 target_level = atoi(arg1.c_str());
3754 }
3755 ceph_assert(superblock != nullptr);
3756 ret = apply_layout_settings(fs.get(), *superblock, pool, pgid, dry_run, target_level);
3757 goto out;
3758 }
3759
3760 if (op != "list" && vm.count("object")) {
3761 // Special case: Create pgmeta_oid if empty string specified
3762 // This can't conflict with any actual object names.
3763 if (object == "") {
3764 ghobj = pgid.make_pgmeta_oid();
3765 } else {
3766 json_spirit::Value v;
3767 try {
3768 if (!json_spirit::read(object, v) ||
3769 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3770 // Special: Need head/snapdir so set even if user didn't specify
3771 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3772 head = true;
3773 lookup_ghobject lookup(object, nspace, head);
3774 if (pgidstr == "meta")
3775 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t::meta(), lookup, debug);
3776 else if (pgidstr.length())
3777 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), lookup, debug);
3778 else
3779 ret = action_on_all_objects(fs.get(), lookup, debug);
3780 if (ret) {
3781 throw std::runtime_error("Internal error");
3782 } else {
3783 if (lookup.size() != 1) {
3784 stringstream ss;
3785 if (lookup.size() == 0)
3786 ss << "No object id '" << object << "' found or invalid JSON specified";
3787 else
3788 ss << "Found " << lookup.size() << " objects with id '" << object
3789 << "', please use a JSON spec from --op list instead";
3790 throw std::runtime_error(ss.str());
3791 }
3792 pair<coll_t, ghobject_t> found = lookup.pop();
3793 pgidstr = found.first.to_str();
3794 pgid.parse(pgidstr.c_str());
3795 ghobj = found.second;
3796 }
3797 } else {
3798 stringstream ss;
3799 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3800 ss << "Without --pgid the object '" << object
3801 << "' must be a JSON array";
3802 throw std::runtime_error(ss.str());
3803 }
3804 if (v.type() == json_spirit::array_type) {
3805 json_spirit::Array array = v.get_array();
3806 if (array.size() != 2) {
3807 ss << "Object '" << object
3808 << "' must be a JSON array with 2 elements";
3809 throw std::runtime_error(ss.str());
3810 }
3811 vector<json_spirit::Value>::iterator i = array.begin();
3812 ceph_assert(i != array.end());
3813 if (i->type() != json_spirit::str_type) {
3814 ss << "Object '" << object
3815 << "' must be a JSON array with the first element a string";
3816 throw std::runtime_error(ss.str());
3817 }
3818 string object_pgidstr = i->get_str();
3819 if (object_pgidstr != "meta") {
3820 spg_t object_pgid;
3821 object_pgid.parse(object_pgidstr.c_str());
3822 if (pgidstr.length() > 0) {
3823 if (object_pgid != pgid) {
3824 ss << "object '" << object
3825 << "' has a pgid different from the --pgid="
3826 << pgidstr << " option";
3827 throw std::runtime_error(ss.str());
3828 }
3829 } else {
3830 pgidstr = object_pgidstr;
3831 pgid = object_pgid;
3832 }
3833 } else {
3834 pgidstr = object_pgidstr;
3835 }
3836 ++i;
3837 v = *i;
3838 }
3839 try {
3840 ghobj.decode(v);
3841 } catch (std::runtime_error& e) {
3842 ss << "Decode object JSON error: " << e.what();
3843 throw std::runtime_error(ss.str());
3844 }
3845 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3846 cerr << "Object pool and pgid pool don't match" << std::endl;
3847 ret = 1;
3848 goto out;
3849 }
3850 if (pgidstr != "meta") {
3851 auto ch = fs->open_collection(coll_t(pgid));
3852 if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
3853 stringstream ss;
3854 ss << "object " << ghobj << " not contained by pg " << pgid;
3855 throw std::runtime_error(ss.str());
3856 }
3857 }
3858 }
3859 } catch (std::runtime_error& e) {
3860 cerr << e.what() << std::endl;
3861 ret = 1;
3862 goto out;
3863 }
3864 }
3865 }
3866
3867 // The ops which require --pgid option are checked here and
3868 // mentioned in the usage for --pgid.
3869 if ((op == "info" || op == "log" || op == "remove" || op == "export"
3870 || op == "export-remove" || op == "mark-complete"
3871 || op == "reset-last-complete"
3872 || op == "trim-pg-log"
3873 || op == "trim-pg-log-dups") &&
3874 pgidstr.length() == 0) {
3875 cerr << "Must provide pgid" << std::endl;
3876 usage(desc);
3877 ret = 1;
3878 goto out;
3879 }
3880
3881 if (op == "import") {
3882 ceph_assert(superblock != nullptr);
3883 try {
3884 ret = tool.do_import(fs.get(), *superblock, force, pgidstr);
3885 }
3886 catch (const buffer::error &e) {
3887 cerr << "do_import threw exception error " << e.what() << std::endl;
3888 ret = -EFAULT;
3889 }
3890 if (ret == -EFAULT) {
3891 cerr << "Corrupt input for import" << std::endl;
3892 }
3893 if (ret == 0)
3894 cout << "Import successful" << std::endl;
3895 goto out;
3896 } else if (op == "dump-journal-mount") {
3897 // Undocumented feature to dump journal with mounted fs
3898 // This doesn't support the format option, but it uses the
3899 // ObjectStore::dump_journal() and mounts to get replay to run.
3900 ret = fs->dump_journal(cout);
3901 if (ret) {
3902 if (ret == -EOPNOTSUPP) {
3903 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
3904 } else {
3905 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
3906 }
3907 }
3908 goto out;
3909 } else if (op == "get-osdmap") {
3910 bufferlist bl;
3911 OSDMap osdmap;
3912 if (epoch == 0) {
3913 ceph_assert(superblock != nullptr);
3914 epoch = superblock->current_epoch;
3915 }
3916 ret = get_osdmap(fs.get(), epoch, osdmap, bl);
3917 if (ret) {
3918 cerr << "Failed to get osdmap#" << epoch << ": "
3919 << cpp_strerror(ret) << std::endl;
3920 goto out;
3921 }
3922 ret = bl.write_fd(file_fd);
3923 if (ret) {
3924 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3925 } else {
3926 cout << "osdmap#" << epoch << " exported." << std::endl;
3927 }
3928 goto out;
3929 } else if (op == "set-osdmap") {
3930 bufferlist bl;
3931 ret = get_fd_data(file_fd, bl);
3932 if (ret < 0) {
3933 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
3934 } else {
3935 ret = set_osdmap(fs.get(), epoch, bl, force);
3936 }
3937 goto out;
3938 } else if (op == "get-inc-osdmap") {
3939 bufferlist bl;
3940 if (epoch == 0) {
3941 ceph_assert(superblock != nullptr);
3942 epoch = superblock->current_epoch;
3943 }
3944 ret = get_inc_osdmap(fs.get(), epoch, bl);
3945 if (ret < 0) {
3946 cerr << "Failed to get incremental osdmap# " << epoch << ": "
3947 << cpp_strerror(ret) << std::endl;
3948 goto out;
3949 }
3950 ret = bl.write_fd(file_fd);
3951 if (ret) {
3952 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3953 } else {
3954 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
3955 }
3956 goto out;
3957 } else if (op == "set-inc-osdmap") {
3958 bufferlist bl;
3959 ret = get_fd_data(file_fd, bl);
3960 if (ret < 0) {
3961 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
3962 goto out;
3963 } else {
3964 ret = set_inc_osdmap(fs.get(), epoch, bl, force);
3965 }
3966 goto out;
3967 } else if (op == "update-mon-db") {
3968 if (!vm.count("mon-store-path")) {
3969 cerr << "Please specify the path to monitor db to update" << std::endl;
3970 ret = -EINVAL;
3971 } else {
3972 ceph_assert(superblock != nullptr);
3973 ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
3974 }
3975 goto out;
3976 }
3977
3978 if (op == "remove") {
3979 if (!force && !dry_run) {
3980 cerr << "Please use export-remove or you must use --force option" << std::endl;
3981 ret = -EINVAL;
3982 goto out;
3983 }
3984 ret = initiate_new_remove_pg(fs.get(), pgid);
3985 if (ret < 0) {
3986 cerr << "PG '" << pgid << "' not found" << std::endl;
3987 goto out;
3988 }
3989 cout << "Remove successful" << std::endl;
3990 goto out;
3991 }
3992
3993 if (op == "fix-lost") {
3994 boost::scoped_ptr<action_on_object_t> action;
3995 action.reset(new do_fix_lost());
3996 if (pgidstr.length())
3997 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), *action, debug);
3998 else
3999 ret = action_on_all_objects(fs.get(), *action, debug);
4000 goto out;
4001 }
4002
4003 if (op == "list") {
4004 ret = do_list(fs.get(), pgidstr, object, nspace, formatter, debug,
4005 human_readable, head);
4006 if (ret < 0) {
4007 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
4008 }
4009 goto out;
4010 }
4011 if (op == "list-slow-omap") {
4012 ret = do_list_slow(fs.get(), pgidstr, object, slow_threshold, formatter, debug,
4013 human_readable);
4014 if (ret < 0) {
4015 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
4016 }
4017 goto out;
4018 }
4019
4020 if (op == "dump-super") {
4021 ceph_assert(superblock != nullptr);
4022 formatter->open_object_section("superblock");
4023 superblock->dump(formatter);
4024 formatter->close_section();
4025 formatter->flush(cout);
4026 cout << std::endl;
4027 goto out;
4028 }
4029
4030 if (op == "statfs") {
4031 store_statfs_t statsbuf;
4032 ret = fs->statfs(&statsbuf);
4033 if (ret < 0) {
4034 cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
4035 goto out;
4036 }
4037 formatter->open_object_section("statfs");
4038 statsbuf.dump(formatter);
4039 formatter->close_section();
4040 formatter->flush(cout);
4041 cout << std::endl;
4042 goto out;
4043 }
4044
4045 if (op == "meta-list") {
4046 ret = do_meta(fs.get(), object, formatter, debug, human_readable);
4047 if (ret < 0) {
4048 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
4049 }
4050 goto out;
4051 }
4052
4053 ret = fs->list_collections(ls);
4054 if (ret < 0) {
4055 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
4056 goto out;
4057 }
4058
4059 if (debug && op == "list-pgs")
4060 cout << "Performing list-pgs operation" << std::endl;
4061
4062 // Find pg
4063 for (it = ls.begin(); it != ls.end(); ++it) {
4064 spg_t tmppgid;
4065
4066 if (pgidstr == "meta") {
4067 if (it->to_str() == "meta")
4068 break;
4069 else
4070 continue;
4071 }
4072
4073 if (!it->is_pg(&tmppgid)) {
4074 continue;
4075 }
4076
4077 if (it->is_temp(&tmppgid)) {
4078 continue;
4079 }
4080
4081 if (op != "list-pgs" && tmppgid != pgid) {
4082 continue;
4083 }
4084
4085 if (op != "list-pgs") {
4086 //Found!
4087 break;
4088 }
4089
4090 cout << tmppgid << std::endl;
4091 }
4092
4093 if (op == "list-pgs") {
4094 ret = 0;
4095 goto out;
4096 }
4097
4098 // If not an object command nor any of the ops handled below, then output this usage
4099 // before complaining about a bad pgid
4100 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") {
4101 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
4102 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)"
4103 << std::endl;
4104 usage(desc);
4105 ret = 1;
4106 goto out;
4107 }
4108 epoch_t map_epoch;
4109 // The following code for export, info, log require omap or !skip-mount-omap
4110 if (it != ls.end()) {
4111
4112 coll_t coll = *it;
4113
4114 if (vm.count("objcmd")) {
4115 ret = 0;
4116 if (objcmd == "remove" || objcmd == "removeall") {
4117 bool all = (objcmd == "removeall");
4118 enum rmtype type = BOTH;
4119 if (rmtypestr == "nosnapmap")
4120 type = NOSNAPMAP;
4121 else if (rmtypestr == "snapmap")
4122 type = SNAPMAP;
4123 ret = do_remove_object(fs.get(), coll, ghobj, all, force, type);
4124 goto out;
4125 } else if (objcmd == "list-attrs") {
4126 ret = do_list_attrs(fs.get(), coll, ghobj);
4127 goto out;
4128 } else if (objcmd == "list-omap") {
4129 ret = do_list_omap(fs.get(), coll, ghobj);
4130 goto out;
4131 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
4132 if (objcmd == "get-bytes") {
4133 int fd;
4134 if (vm.count("arg1") == 0 || arg1 == "-") {
4135 fd = STDOUT_FILENO;
4136 } else {
4137 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
4138 if (fd == -1) {
4139 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4140 ret = 1;
4141 goto out;
4142 }
4143 }
4144 ret = do_get_bytes(fs.get(), coll, ghobj, fd);
4145 if (fd != STDOUT_FILENO)
4146 close(fd);
4147 } else {
4148 int fd;
4149 if (vm.count("arg1") == 0 || arg1 == "-") {
4150 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4151 if (isatty(STDIN_FILENO)) {
4152 cerr << "stdin is a tty and no file specified" << std::endl;
4153 ret = 1;
4154 goto out;
4155 }
4156 fd = STDIN_FILENO;
4157 } else {
4158 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4159 if (fd == -1) {
4160 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4161 ret = 1;
4162 goto out;
4163 }
4164 }
4165 ret = do_set_bytes(fs.get(), coll, ghobj, fd);
4166 if (fd != STDIN_FILENO)
4167 close(fd);
4168 }
4169 goto out;
4170 } else if (objcmd == "get-attr") {
4171 if (vm.count("arg1") == 0) {
4172 usage(desc);
4173 ret = 1;
4174 goto out;
4175 }
4176 ret = do_get_attr(fs.get(), coll, ghobj, arg1);
4177 goto out;
4178 } else if (objcmd == "set-attr") {
4179 if (vm.count("arg1") == 0) {
4180 usage(desc);
4181 ret = 1;
4182 }
4183
4184 int fd;
4185 if (vm.count("arg2") == 0 || arg2 == "-") {
4186 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4187 if (isatty(STDIN_FILENO)) {
4188 cerr << "stdin is a tty and no file specified" << std::endl;
4189 ret = 1;
4190 goto out;
4191 }
4192 fd = STDIN_FILENO;
4193 } else {
4194 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4195 if (fd == -1) {
4196 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4197 ret = 1;
4198 goto out;
4199 }
4200 }
4201 ret = do_set_attr(fs.get(), coll, ghobj, arg1, fd);
4202 if (fd != STDIN_FILENO)
4203 close(fd);
4204 goto out;
4205 } else if (objcmd == "rm-attr") {
4206 if (vm.count("arg1") == 0) {
4207 usage(desc);
4208 ret = 1;
4209 goto out;
4210 }
4211 ret = do_rm_attr(fs.get(), coll, ghobj, arg1);
4212 goto out;
4213 } else if (objcmd == "get-omap") {
4214 if (vm.count("arg1") == 0) {
4215 usage(desc);
4216 ret = 1;
4217 goto out;
4218 }
4219 ret = do_get_omap(fs.get(), coll, ghobj, arg1);
4220 goto out;
4221 } else if (objcmd == "set-omap") {
4222 if (vm.count("arg1") == 0) {
4223 usage(desc);
4224 ret = 1;
4225 goto out;
4226 }
4227 int fd;
4228 if (vm.count("arg2") == 0 || arg2 == "-") {
4229 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4230 if (isatty(STDIN_FILENO)) {
4231 cerr << "stdin is a tty and no file specified" << std::endl;
4232 ret = 1;
4233 goto out;
4234 }
4235 fd = STDIN_FILENO;
4236 } else {
4237 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4238 if (fd == -1) {
4239 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4240 ret = 1;
4241 goto out;
4242 }
4243 }
4244 ret = do_set_omap(fs.get(), coll, ghobj, arg1, fd);
4245 if (fd != STDIN_FILENO)
4246 close(fd);
4247 goto out;
4248 } else if (objcmd == "rm-omap") {
4249 if (vm.count("arg1") == 0) {
4250 usage(desc);
4251 ret = 1;
4252 goto out;
4253 }
4254 ret = do_rm_omap(fs.get(), coll, ghobj, arg1);
4255 goto out;
4256 } else if (objcmd == "get-omaphdr") {
4257 if (vm.count("arg1")) {
4258 usage(desc);
4259 ret = 1;
4260 goto out;
4261 }
4262 ret = do_get_omaphdr(fs.get(), coll, ghobj);
4263 goto out;
4264 } else if (objcmd == "set-omaphdr") {
4265 // Extra arg
4266 if (vm.count("arg2")) {
4267 usage(desc);
4268 ret = 1;
4269 goto out;
4270 }
4271 int fd;
4272 if (vm.count("arg1") == 0 || arg1 == "-") {
4273 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4274 if (isatty(STDIN_FILENO)) {
4275 cerr << "stdin is a tty and no file specified" << std::endl;
4276 ret = 1;
4277 goto out;
4278 }
4279 fd = STDIN_FILENO;
4280 } else {
4281 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4282 if (fd == -1) {
4283 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4284 ret = 1;
4285 goto out;
4286 }
4287 }
4288 ret = do_set_omaphdr(fs.get(), coll, ghobj, fd);
4289 if (fd != STDIN_FILENO)
4290 close(fd);
4291 goto out;
4292 } else if (objcmd == "dump") {
4293 // There should not be any other arguments
4294 if (vm.count("arg1") || vm.count("arg2")) {
4295 usage(desc);
4296 ret = 1;
4297 goto out;
4298 }
4299 ret = print_obj_info(fs.get(), coll, ghobj, formatter);
4300 goto out;
4301 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4302 // There should not be any other arguments
4303 if (vm.count("arg1") || vm.count("arg2")) {
4304 usage(desc);
4305 ret = 1;
4306 goto out;
4307 }
4308 ret = corrupt_info(fs.get(), coll, ghobj, formatter);
4309 goto out;
4310 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4311 // Undocumented testing feature
4312 bool corrupt = (objcmd == "corrupt-size");
4313 // Extra arg
4314 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4315 usage(desc);
4316 ret = 1;
4317 goto out;
4318 }
4319 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4320 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4321 ret = 1;
4322 goto out;
4323 }
4324 uint64_t size = atoll(arg1.c_str());
4325 ret = set_size(fs.get(), coll, ghobj, size, formatter, corrupt);
4326 goto out;
4327 } else if (objcmd == "clear-data-digest") {
4328 ret = clear_data_digest(fs.get(), coll, ghobj);
4329 goto out;
4330 } else if (objcmd == "clear-snapset") {
4331 // UNDOCUMENTED: For testing zap SnapSet
4332 // IGNORE extra args since not in usage anyway
4333 if (!ghobj.hobj.has_snapset()) {
4334 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4335 ret = 1;
4336 goto out;
4337 }
4338 ret = clear_snapset(fs.get(), coll, ghobj, arg1);
4339 goto out;
4340 } else if (objcmd == "remove-clone-metadata") {
4341 // Extra arg
4342 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4343 usage(desc);
4344 ret = 1;
4345 goto out;
4346 }
4347 if (!ghobj.hobj.has_snapset()) {
4348 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4349 ret = 1;
4350 goto out;
4351 }
4352 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4353 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4354 ret = 1;
4355 goto out;
4356 }
4357 snapid_t cloneid = atoi(arg1.c_str());
4358 ret = remove_clone(fs.get(), coll, ghobj, cloneid, force);
4359 goto out;
4360 }
4361 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4362 usage(desc);
4363 ret = 1;
4364 goto out;
4365 }
4366
4367 map_epoch = 0;
4368 ret = PG::peek_map_epoch(fs.get(), pgid, &map_epoch);
4369 if (ret < 0)
4370 cerr << "peek_map_epoch reports error" << std::endl;
4371 if (debug)
4372 cerr << "map_epoch " << map_epoch << std::endl;
4373
4374 pg_info_t info(pgid);
4375 PastIntervals past_intervals;
4376 __u8 struct_ver;
4377 ret = PG::read_info(fs.get(), pgid, coll, info, past_intervals, struct_ver);
4378 if (ret < 0) {
4379 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4380 goto out;
4381 }
4382 if (struct_ver < PG::get_compat_struct_v()) {
4383 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4384 ret = -EFAULT;
4385 goto out;
4386 }
4387 if (debug)
4388 cerr << "struct_v " << (int)struct_ver << std::endl;
4389
4390 if (op == "export" || op == "export-remove") {
4391 ceph_assert(superblock != nullptr);
4392 ret = tool.do_export(fs.get(), coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
4393 if (ret == 0) {
4394 cerr << "Export successful" << std::endl;
4395 if (op == "export-remove") {
4396 ret = initiate_new_remove_pg(fs.get(), pgid);
4397 // Export succeeded, so pgid is there
4398 ceph_assert(ret == 0);
4399 cerr << "Remove successful" << std::endl;
4400 }
4401 }
4402 } else if (op == "info") {
4403 formatter->open_object_section("info");
4404 info.dump(formatter);
4405 formatter->close_section();
4406 formatter->flush(cout);
4407 cout << std::endl;
4408 } else if (op == "log") {
4409 PGLog::IndexedLog log;
4410 pg_missing_t missing;
4411 ret = get_log(fs.get(), struct_ver, pgid, info, log, missing);
4412 if (ret < 0)
4413 goto out;
4414
4415 dump_log(formatter, cout, log, missing);
4416 } else if (op == "mark-complete") {
4417 ObjectStore::Transaction tran;
4418 ObjectStore::Transaction *t = &tran;
4419
4420 if (struct_ver < PG::get_compat_struct_v()) {
4421 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
4422 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4423 << std::endl;
4424 ret = 1;
4425 goto out;
4426 }
4427
4428 cout << "Marking complete " << std::endl;
4429
4430 ceph_assert(superblock != nullptr);
4431 info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
4432 info.last_backfill = hobject_t::get_max();
4433 info.last_epoch_started = superblock->current_epoch;
4434 info.history.last_epoch_started = superblock->current_epoch;
4435 info.history.last_epoch_clean = superblock->current_epoch;
4436 past_intervals.clear();
4437
4438 if (!dry_run) {
4439 ret = write_info(*t, map_epoch, info, past_intervals);
4440 if (ret != 0)
4441 goto out;
4442 auto ch = fs->open_collection(coll_t(pgid));
4443 fs->queue_transaction(ch, std::move(*t));
4444 }
4445 cout << "Marking complete succeeded" << std::endl;
4446 } else if (op == "trim-pg-log") {
4447 ret = do_trim_pg_log(fs.get(), coll, info, pgid,
4448 map_epoch, past_intervals);
4449 if (ret < 0) {
4450 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4451 goto out;
4452 }
4453 cout << "Finished trimming pg log" << std::endl;
4454 goto out;
4455 } else if (op == "trim-pg-log-dups") {
4456 ret = do_trim_pg_log_dups(fs.get(), coll, info, pgid,
4457 map_epoch, past_intervals);
4458 if (ret < 0) {
4459 cerr << "Error trimming pg log dups: " << cpp_strerror(ret) << std::endl;
4460 goto out;
4461 }
4462 cout << "Finished trimming pg log dups" << std::endl;
4463 goto out;
4464 } else if (op == "reset-last-complete") {
4465 if (!force) {
4466 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4467 << "certain to lead to permanent data loss unless you know exactly "
4468 << "what you are doing. Pass --force to proceed anyway."
4469 << std::endl;
4470 ret = -EINVAL;
4471 goto out;
4472 }
4473 ObjectStore::Transaction tran;
4474 ObjectStore::Transaction *t = &tran;
4475
4476 if (struct_ver < PG::get_compat_struct_v()) {
4477 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4478 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4479 << std::endl;
4480 ret = 1;
4481 goto out;
4482 }
4483
4484 cout << "Reseting last_complete " << std::endl;
4485
4486 info.last_complete = info.last_update;
4487
4488 if (!dry_run) {
4489 ret = write_info(*t, map_epoch, info, past_intervals);
4490 if (ret != 0)
4491 goto out;
4492 fs->queue_transaction(ch, std::move(*t));
4493 }
4494 cout << "Reseting last_complete succeeded" << std::endl;
4495
4496 } else {
4497 ceph_assert(!"Should have already checked for valid --op");
4498 }
4499 } else {
4500 cerr << "PG '" << pgid << "' not found" << std::endl;
4501 ret = -ENOENT;
4502 }
4503
4504 out:
4505 if (debug) {
4506 ostringstream ostr;
4507 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
4508 cct->get_perfcounters_collection()->dump_formatted(f, false);
4509 ostr << "ceph-objectstore-tool ";
4510 f->flush(ostr);
4511 delete f;
4512 cout << ostr.str() << std::endl;
4513 }
4514
4515 int r = fs->umount();
4516 if (r < 0) {
4517 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4518 // If no previous error, then use umount() error
4519 if (ret == 0)
4520 ret = r;
4521 }
4522
4523 if (dry_run) {
4524 // Export output can go to stdout, so put this message on stderr
4525 if (op == "export")
4526 cerr << "dry-run: Nothing changed" << std::endl;
4527 else
4528 cout << "dry-run: Nothing changed" << std::endl;
4529 }
4530
4531 if (ret < 0)
4532 ret = 1;
4533 return ret;
4534 }