]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/ceph_objectstore_tool.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <boost/program_options/variables_map.hpp>
16#include <boost/program_options/parsers.hpp>
17#include <boost/scoped_ptr.hpp>
18#include <boost/optional.hpp>
19
20#include <stdlib.h>
21
22#include "common/Formatter.h"
23#include "common/errno.h"
24#include "common/ceph_argparse.h"
25
26#include "global/global_init.h"
27
28#include "os/ObjectStore.h"
29#include "os/filestore/FileJournal.h"
30#include "os/filestore/FileStore.h"
31#ifdef HAVE_LIBFUSE
32#include "os/FuseStore.h"
33#endif
34
35#include "osd/PGLog.h"
36#include "osd/OSD.h"
37#include "osd/PG.h"
a8e16298 38#include "osd/ECUtil.h"
7c673cae
FG
39
40#include "json_spirit/json_spirit_value.h"
41#include "json_spirit/json_spirit_reader.h"
42
43#include "rebuild_mondb.h"
44#include "ceph_objectstore_tool.h"
45#include "include/compat.h"
46#include "include/util.h"
47
48namespace po = boost::program_options;
7c673cae
FG
49
50#ifdef INTERNAL_TEST
51CompatSet get_test_compat_set() {
52 CompatSet::FeatureSet ceph_osd_feature_compat;
53 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
54 CompatSet::FeatureSet ceph_osd_feature_incompat;
55 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
56 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
57 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
58 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
64#ifdef INTERNAL_TEST2
65 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
66 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
67#endif
68 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
69 ceph_osd_feature_incompat);
70}
71#endif
72
73const ssize_t max_read = 1024 * 1024;
74const int fd_none = INT_MIN;
75bool outistty;
3efd9988 76bool dry_run;
7c673cae
FG
77
78struct action_on_object_t {
79 virtual ~action_on_object_t() {}
11fdf7f2 80 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
7c673cae
FG
81};
82
83int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
84{
11fdf7f2 85 auto ch = store->open_collection(coll);
7c673cae
FG
86 unsigned LIST_AT_A_TIME = 100;
87 ghobject_t next;
88 while (!next.is_max()) {
89 vector<ghobject_t> list;
11fdf7f2 90 int r = store->collection_list(ch,
7c673cae
FG
91 next,
92 ghobject_t::get_max(),
93 LIST_AT_A_TIME,
94 &list,
95 &next);
96 if (r < 0) {
97 cerr << "Error listing collection: " << coll << ", "
98 << cpp_strerror(r) << std::endl;
99 return r;
100 }
101 for (vector<ghobject_t>::iterator obj = list.begin();
102 obj != list.end();
103 ++obj) {
104 if (obj->is_pgmeta())
105 continue;
106 object_info_t oi;
107 if (coll != coll_t::meta()) {
108 bufferlist attr;
11fdf7f2 109 r = store->getattr(ch, *obj, OI_ATTR, attr);
7c673cae
FG
110 if (r < 0) {
111 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
112 << cpp_strerror(r) << std::endl;
11fdf7f2
TL
113 } else {
114 auto bp = attr.cbegin();
115 try {
116 decode(oi, bp);
117 } catch (...) {
118 r = -EINVAL;
119 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
120 << cpp_strerror(r) << std::endl;
121 }
122 }
7c673cae 123 }
11fdf7f2 124 action.call(store, coll, *obj, oi);
7c673cae
FG
125 }
126 }
127 return 0;
128}
129
130int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
131{
132 spg_t pgid;
133 // Scan collections in case this is an ec pool but no shard specified
134 unsigned scanned = 0;
135 int r = 0;
136 vector<coll_t> colls_to_check;
137 vector<coll_t> candidates;
138 r = store->list_collections(candidates);
139 if (r < 0) {
140 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
141 return r;
142 }
143 pgid.parse(pgidstr.c_str());
144 for (vector<coll_t>::iterator i = candidates.begin();
145 i != candidates.end();
146 ++i) {
147 spg_t cand_pgid;
148 if (!i->is_pg(&cand_pgid))
149 continue;
150
151 // If an exact match or treat no shard as any shard
152 if (cand_pgid == pgid ||
153 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
154 colls_to_check.push_back(*i);
155 }
156 }
157
158 if (debug)
159 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
160 for (vector<coll_t>::iterator i = colls_to_check.begin();
161 i != colls_to_check.end();
162 ++i, ++scanned) {
163 if (debug)
164 cerr << "Scanning " << *i << ", " << scanned << "/"
165 << colls_to_check.size() << " completed" << std::endl;
166 r = _action_on_all_objects_in_pg(store, *i, action, debug);
167 if (r < 0)
168 break;
169 }
170 return r;
171}
172
173int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
174{
175 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
176 return r;
177}
178
179int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
180{
181 unsigned scanned = 0;
182 int r = 0;
183 vector<coll_t> colls_to_check;
184 vector<coll_t> candidates;
185 r = store->list_collections(candidates);
186 if (r < 0) {
187 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
188 return r;
189 }
190 for (vector<coll_t>::iterator i = candidates.begin();
191 i != candidates.end();
192 ++i) {
193 if (i->is_pg()) {
194 colls_to_check.push_back(*i);
195 }
196 }
197
198 if (debug)
199 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
200 for (vector<coll_t>::iterator i = colls_to_check.begin();
201 i != colls_to_check.end();
202 ++i, ++scanned) {
203 if (debug)
204 cerr << "Scanning " << *i << ", " << scanned << "/"
205 << colls_to_check.size() << " completed" << std::endl;
206 r = _action_on_all_objects_in_pg(store, *i, action, debug);
207 if (r < 0)
208 return r;
209 }
210 return 0;
211}
212
213int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
214{
215 int r = _action_on_all_objects(store, action, debug);
216 return r;
217}
218
219struct pgid_object_list {
220 list<pair<coll_t, ghobject_t> > _objects;
221
222 void insert(coll_t coll, ghobject_t &ghobj) {
223 _objects.push_back(make_pair(coll, ghobj));
224 }
225
226 void dump(Formatter *f, bool human_readable) const {
227 if (!human_readable)
228 f->open_array_section("pgid_objects");
229 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
230 i != _objects.end();
231 ++i) {
232 f->open_array_section("pgid_object");
233 spg_t pgid;
234 bool is_pg = i->first.is_pg(&pgid);
235 if (is_pg)
236 f->dump_string("pgid", stringify(pgid));
237 if (!is_pg || !human_readable)
238 f->dump_string("coll", i->first.to_str());
239 f->open_object_section("ghobject");
240 i->second.dump(f);
241 f->close_section();
242 f->close_section();
243 if (human_readable) {
244 f->flush(cout);
245 cout << std::endl;
246 }
247 }
248 if (!human_readable) {
249 f->close_section();
250 f->flush(cout);
251 cout << std::endl;
252 }
253 }
254};
255
256struct lookup_ghobject : public action_on_object_t {
257 pgid_object_list _objects;
258 const string _name;
259 const boost::optional<std::string> _namespace;
260 bool _need_snapset;
261
262 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
263 _namespace(nspace), _need_snapset(need_snapset) { }
264
11fdf7f2 265 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
7c673cae 266 if (_need_snapset && !ghobj.hobj.has_snapset())
11fdf7f2 267 return;
7c673cae
FG
268 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
269 (!_namespace || ghobj.hobj.nspace == _namespace))
270 _objects.insert(coll, ghobj);
11fdf7f2 271 return;
7c673cae
FG
272 }
273
274 int size() const {
275 return _objects._objects.size();
276 }
277
278 pair<coll_t, ghobject_t> pop() {
279 pair<coll_t, ghobject_t> front = _objects._objects.front();
280 _objects._objects.pop_front();
281 return front;
282 }
283
284 void dump(Formatter *f, bool human_readable) const {
285 _objects.dump(f, human_readable);
286 }
287};
288
7c673cae 289int file_fd = fd_none;
3efd9988 290bool debug;
11fdf7f2 291bool force = false;
7c673cae 292super_header sh;
7c673cae
FG
293
294static int get_fd_data(int fd, bufferlist &bl)
295{
296 uint64_t total = 0;
297 do {
298 ssize_t bytes = bl.read_fd(fd, max_read);
299 if (bytes < 0) {
300 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
301 return bytes;
302 }
303
304 if (bytes == 0)
305 break;
306
307 total += bytes;
308 } while(true);
309
11fdf7f2 310 ceph_assert(bl.length() == total);
7c673cae
FG
311 return 0;
312}
313
314int get_log(ObjectStore *fs, __u8 struct_ver,
11fdf7f2 315 spg_t pgid, const pg_info_t &info,
7c673cae
FG
316 PGLog::IndexedLog &log, pg_missing_t &missing)
317{
318 try {
11fdf7f2
TL
319 auto ch = fs->open_collection(coll_t(pgid));
320 if (!ch) {
321 return -ENOENT;
322 }
7c673cae 323 ostringstream oss;
11fdf7f2
TL
324 ceph_assert(struct_ver > 0);
325 PGLog::read_log_and_missing(
326 fs, ch,
327 pgid.make_pgmeta_oid(),
328 info, log, missing,
329 oss,
330 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
7c673cae
FG
331 if (debug && oss.str().size())
332 cerr << oss.str() << std::endl;
333 }
334 catch (const buffer::error &e) {
335 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
336 return -EFAULT;
337 }
338 return 0;
339}
340
341void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
342 pg_missing_t &missing)
343{
344 formatter->open_object_section("op_log");
345 formatter->open_object_section("pg_log_t");
346 log.dump(formatter);
347 formatter->close_section();
348 formatter->flush(out);
349 formatter->open_object_section("pg_missing_t");
350 missing.dump(formatter);
351 formatter->close_section();
7c673cae
FG
352 formatter->close_section();
353 formatter->flush(out);
354}
355
356//Based on part of OSD::load_pgs()
357int finish_remove_pgs(ObjectStore *store)
358{
359 vector<coll_t> ls;
360 int r = store->list_collections(ls);
361 if (r < 0) {
362 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
363 << std::endl;
364 return r;
365 }
366
367 for (vector<coll_t>::iterator it = ls.begin();
368 it != ls.end();
369 ++it) {
370 spg_t pgid;
371
372 if (it->is_temp(&pgid) ||
373 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
374 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
375 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
376 continue;
377 }
378
379 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
380 }
381 return 0;
382}
383
384#pragma GCC diagnostic ignored "-Wpragmas"
385#pragma GCC diagnostic push
386#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
387
388int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
389{
390 pg_info_t info(pgid);
391 coll_t coll(pgid);
392 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
393
7c673cae 394 epoch_t map_epoch = 0;
11fdf7f2 395 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
7c673cae
FG
396 if (r < 0)
397 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
398 PastIntervals past_intervals;
399 __u8 struct_v;
11fdf7f2 400 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
7c673cae
FG
401 if (r < 0) {
402 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
403 return r;
404 }
11fdf7f2 405 ceph_assert(struct_v >= 8);
7c673cae
FG
406 // new omap key
407 cout << "setting '_remove' omap key" << std::endl;
408 map<string,bufferlist> values;
11fdf7f2 409 encode((char)1, values["_remove"]);
7c673cae
FG
410 t->omap_setkeys(coll, pgmeta_oid, values);
411 return 0;
412}
413
414#pragma GCC diagnostic pop
415#pragma GCC diagnostic warning "-Wpragmas"
416
11fdf7f2
TL
417template<typename Func>
418void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
419{
420 bool finished = false;
421 std::condition_variable cond;
422 std::mutex m;
423 txn->register_on_complete(make_lambda_context([&]() {
424 std::unique_lock lock{m};
425 finished = true;
426 cond.notify_one();
427 }));
428 std::move(func)();
429 std::unique_lock lock{m};
430 cond.wait(lock, [&] {return finished;});
431}
432
433int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
7c673cae
FG
434{
435 if (!dry_run)
436 finish_remove_pgs(store);
437 if (!store->collection_exists(coll_t(r_pgid)))
438 return -ENOENT;
439
440 cout << " marking collection for removal" << std::endl;
441 if (dry_run)
442 return 0;
443 ObjectStore::Transaction rmt;
444 int r = mark_pg_for_removal(store, r_pgid, &rmt);
445 if (r < 0) {
446 return r;
447 }
11fdf7f2
TL
448 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
449 store->queue_transaction(ch, std::move(rmt));
7c673cae
FG
450 finish_remove_pgs(store);
451 return r;
452}
453
454int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
455 PastIntervals &past_intervals)
456{
457 //Empty for this
458 coll_t coll(info.pgid);
459 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
460 map<string,bufferlist> km;
461 pg_info_t last_written_info;
462 int ret = PG::_prepare_write_info(
463 g_ceph_context,
464 &km, epoch,
465 info,
466 last_written_info,
467 past_intervals,
468 true, true, false);
469 if (ret) cerr << "Failed to write info" << std::endl;
470 t.omap_setkeys(coll, pgmeta_oid, km);
471 return ret;
472}
473
474typedef map<eversion_t, hobject_t> divergent_priors_t;
475
476int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
477 pg_log_t &log, PastIntervals &past_intervals,
478 divergent_priors_t &divergent,
479 pg_missing_t &missing)
480{
11fdf7f2 481 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
7c673cae
FG
482 int ret = write_info(t, epoch, info, past_intervals);
483 if (ret)
484 return ret;
485 coll_t coll(info.pgid);
486 map<string,bufferlist> km;
487
488 if (!divergent.empty()) {
11fdf7f2 489 ceph_assert(missing.get_items().empty());
7c673cae 490 PGLog::write_log_and_missing_wo_missing(
181888fb 491 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true);
7c673cae
FG
492 } else {
493 pg_missing_tracker_t tmissing(missing);
c07f9fc5 494 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
7c673cae 495 PGLog::write_log_and_missing(
181888fb 496 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true,
c07f9fc5 497 &rebuilt_missing_set_with_deletes);
7c673cae
FG
498 }
499 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
500 return 0;
501}
502
94b18763
FG
503int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
504 pg_info_t &info, const spg_t &pgid,
11fdf7f2 505 epoch_t map_epoch,
94b18763
FG
506 PastIntervals &past_intervals)
507{
508 ghobject_t oid = pgid.make_pgmeta_oid();
509 struct stat st;
11fdf7f2
TL
510 auto ch = store->open_collection(coll);
511 int r = store->stat(ch, oid, &st);
512 ceph_assert(r == 0);
513 ceph_assert(st.st_size == 0);
94b18763
FG
514
515 cerr << "Log bounds are: " << "(" << info.log_tail << ","
516 << info.last_update << "]" << std::endl;
517
518 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
519 if (info.last_update.version - info.log_tail.version <= max_entries) {
520 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
521 return 0;
522 }
523
11fdf7f2 524 ceph_assert(info.last_update.version > max_entries);
94b18763
FG
525 version_t trim_to = info.last_update.version - max_entries;
526 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
527 eversion_t new_tail;
528 bool done = false;
529
530 while (!done) {
531 // gather keys so we can delete them in a batch without
532 // affecting the iterator
533 set<string> keys_to_trim;
534 {
11fdf7f2 535 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
94b18763
FG
536 if (!p)
537 break;
11fdf7f2 538 for (p->seek_to_first(); p->valid(); p->next()) {
94b18763
FG
539 if (p->key()[0] == '_')
540 continue;
541 if (p->key() == "can_rollback_to")
542 continue;
543 if (p->key() == "divergent_priors")
544 continue;
545 if (p->key() == "rollback_info_trimmed_to")
546 continue;
547 if (p->key() == "may_include_deletes_in_missing")
548 continue;
549 if (p->key().substr(0, 7) == string("missing"))
550 continue;
551 if (p->key().substr(0, 4) == string("dup_"))
552 continue;
553
554 bufferlist bl = p->value();
11fdf7f2 555 auto bp = bl.cbegin();
94b18763
FG
556 pg_log_entry_t e;
557 try {
558 e.decode_with_checksum(bp);
559 } catch (const buffer::error &e) {
560 cerr << "Error reading pg log entry: " << e << std::endl;
561 }
562 if (debug) {
563 cerr << "read entry " << e << std::endl;
564 }
565 if (e.version.version > trim_to) {
566 done = true;
567 break;
568 }
569 keys_to_trim.insert(p->key());
570 new_tail = e.version;
571 if (keys_to_trim.size() >= trim_at_once)
572 break;
573 }
574
575 if (!p->valid())
576 done = true;
577 } // deconstruct ObjectMapIterator
578
579 // delete the keys
580 if (!dry_run && !keys_to_trim.empty()) {
581 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
582 ObjectStore::Transaction t;
583 t.omap_rmkeys(coll, oid, keys_to_trim);
11fdf7f2
TL
584 store->queue_transaction(ch, std::move(t));
585 ch->flush();
94b18763
FG
586 }
587 }
588
589 // update pg info with new tail
590 if (!dry_run && new_tail != eversion_t()) {
591 info.log_tail = new_tail;
592 ObjectStore::Transaction t;
593 int ret = write_info(t, map_epoch, info, past_intervals);
594 if (ret)
595 return ret;
11fdf7f2
TL
596 store->queue_transaction(ch, std::move(t));
597 ch->flush();
94b18763
FG
598 }
599
600 // compact the db since we just removed a bunch of data
601 cerr << "Finished trimming, now compacting..." << std::endl;
602 if (!dry_run)
603 store->compact();
604 return 0;
605}
606
7c673cae
FG
607const int OMAP_BATCH_SIZE = 25;
608void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
609{
610 oset.clear();
611 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
612 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
613 }
614}
615
616int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
617{
618 struct stat st;
619 mysize_t total;
620 footer ft;
621
11fdf7f2
TL
622 auto ch = store->open_collection(cid);
623 int ret = store->stat(ch, obj, &st);
7c673cae
FG
624 if (ret < 0)
625 return ret;
626
627 cerr << "Read " << obj << std::endl;
628
629 total = st.st_size;
630 if (debug)
631 cerr << "size=" << total << std::endl;
632
633 object_begin objb(obj);
634
635 {
636 bufferptr bp;
637 bufferlist bl;
11fdf7f2 638 ret = store->getattr(ch, obj, OI_ATTR, bp);
7c673cae
FG
639 if (ret < 0) {
640 cerr << "getattr failure object_info " << ret << std::endl;
641 return ret;
642 }
643 bl.push_back(bp);
644 decode(objb.oi, bl);
645 if (debug)
646 cerr << "object_info: " << objb.oi << std::endl;
647 }
648
649 // NOTE: we include whiteouts, lost, etc.
650
651 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
652 if (ret < 0)
653 return ret;
654
655 uint64_t offset = 0;
656 bufferlist rawdatabl;
657 while(total > 0) {
658 rawdatabl.clear();
659 mysize_t len = max_read;
660 if (len > total)
661 len = total;
662
11fdf7f2 663 ret = store->read(ch, obj, offset, len, rawdatabl);
7c673cae
FG
664 if (ret < 0)
665 return ret;
666 if (ret == 0)
667 return -EINVAL;
668
669 data_section dblock(offset, len, rawdatabl);
670 if (debug)
671 cerr << "data section offset=" << offset << " len=" << len << std::endl;
672
673 total -= ret;
674 offset += ret;
675
676 ret = write_section(TYPE_DATA, dblock, file_fd);
677 if (ret) return ret;
678 }
679
680 //Handle attrs for this object
681 map<string,bufferptr> aset;
11fdf7f2 682 ret = store->getattrs(ch, obj, aset);
7c673cae
FG
683 if (ret) return ret;
684 attr_section as(aset);
685 ret = write_section(TYPE_ATTRS, as, file_fd);
686 if (ret)
687 return ret;
688
689 if (debug) {
690 cerr << "attrs size " << aset.size() << std::endl;
691 }
692
693 //Handle omap information
694 bufferlist hdrbuf;
11fdf7f2 695 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
7c673cae
FG
696 if (ret < 0) {
697 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
698 return ret;
699 }
700
701 omap_hdr_section ohs(hdrbuf);
702 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
703 if (ret)
704 return ret;
705
11fdf7f2 706 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
7c673cae
FG
707 if (!iter) {
708 ret = -ENOENT;
709 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
710 return ret;
711 }
712 iter->seek_to_first();
713 int mapcount = 0;
714 map<string, bufferlist> out;
715 while(iter->valid()) {
716 get_omap_batch(iter, out);
717
718 if (out.empty()) break;
719
720 mapcount += out.size();
721 omap_section oms(out);
722 ret = write_section(TYPE_OMAP, oms, file_fd);
723 if (ret)
724 return ret;
725 }
726 if (debug)
727 cerr << "omap map size " << mapcount << std::endl;
728
729 ret = write_simple(TYPE_OBJECT_END, file_fd);
730 if (ret)
731 return ret;
732
733 return 0;
734}
735
736int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
737{
738 ghobject_t next;
11fdf7f2 739 auto ch = store->open_collection(coll);
7c673cae
FG
740 while (!next.is_max()) {
741 vector<ghobject_t> objects;
11fdf7f2 742 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
7c673cae
FG
743 &objects, &next);
744 if (r < 0)
745 return r;
746 for (vector<ghobject_t>::iterator i = objects.begin();
747 i != objects.end();
748 ++i) {
11fdf7f2
TL
749 ceph_assert(!i->hobj.is_meta());
750 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
7c673cae
FG
751 continue;
752 }
753 r = export_file(store, coll, *i);
754 if (r < 0)
755 return r;
756 }
757 }
758 return 0;
759}
760
11fdf7f2 761int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae 762 OSDMap::Incremental inc;
11fdf7f2 763 auto it = bl.cbegin();
7c673cae
FG
764 inc.decode(it);
765 if (e == 0) {
766 e = inc.epoch;
767 } else if (e != inc.epoch) {
768 cerr << "incremental.epoch mismatch: "
769 << inc.epoch << " != " << e << std::endl;
770 if (force) {
771 cerr << "But will continue anyway." << std::endl;
772 } else {
773 return -EINVAL;
774 }
775 }
11fdf7f2 776 auto ch = store->open_collection(coll_t::meta());
7c673cae 777 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
11fdf7f2 778 if (!store->exists(ch, inc_oid)) {
7c673cae
FG
779 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
780 if (!force) {
781 return -ENOENT;
782 }
783 cout << "Creating a new epoch." << std::endl;
784 }
785 if (dry_run)
786 return 0;
787 ObjectStore::Transaction t;
788 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
789 t.truncate(coll_t::meta(), inc_oid, bl.length());
11fdf7f2
TL
790 store->queue_transaction(ch, std::move(t));
791 return 0;
7c673cae
FG
792}
793
794int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
795{
11fdf7f2
TL
796 auto ch = store->open_collection(coll_t::meta());
797 if (store->read(ch,
7c673cae
FG
798 OSD::get_inc_osdmap_pobject_name(e),
799 0, 0, bl) < 0) {
800 return -ENOENT;
801 }
802 return 0;
803}
804
11fdf7f2 805int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae
FG
806 OSDMap osdmap;
807 osdmap.decode(bl);
808 if (e == 0) {
809 e = osdmap.get_epoch();
810 } else if (e != osdmap.get_epoch()) {
811 cerr << "osdmap.epoch mismatch: "
812 << e << " != " << osdmap.get_epoch() << std::endl;
813 if (force) {
814 cerr << "But will continue anyway." << std::endl;
815 } else {
816 return -EINVAL;
817 }
818 }
11fdf7f2 819 auto ch = store->open_collection(coll_t::meta());
7c673cae 820 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
11fdf7f2 821 if (!store->exists(ch, full_oid)) {
7c673cae
FG
822 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
823 if (!force) {
824 return -ENOENT;
825 }
826 cout << "Creating a new epoch." << std::endl;
827 }
828 if (dry_run)
829 return 0;
830 ObjectStore::Transaction t;
831 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
832 t.truncate(coll_t::meta(), full_oid, bl.length());
11fdf7f2
TL
833 store->queue_transaction(ch, std::move(t));
834 return 0;
7c673cae
FG
835}
836
837int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
838{
11fdf7f2 839 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
7c673cae 840 bool found = store->read(
11fdf7f2 841 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
7c673cae
FG
842 if (!found) {
843 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
844 return -ENOENT;
845 }
846 osdmap.decode(bl);
847 if (debug)
848 cerr << osdmap << std::endl;
849 return 0;
850}
851
11fdf7f2
TL
852int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
853{
854 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
855 bufferlist bl;
856 auto pghist = OSD::make_pg_num_history_oid();
857 int r = store->read(ch, pghist, 0, 0, bl, 0);
858 if (r >= 0 && bl.length() > 0) {
859 auto p = bl.cbegin();
860 decode(*h, p);
861 }
862 cout << __func__ << " pg_num_history " << *h << std::endl;
863 return 0;
864}
865
7c673cae
FG
866int add_osdmap(ObjectStore *store, metadata_section &ms)
867{
868 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
869}
870
871int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
872 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
873 const OSDSuperblock& superblock,
874 PastIntervals &past_intervals)
875{
876 PGLog::IndexedLog log;
877 pg_missing_t missing;
878
11fdf7f2 879 cerr << "Exporting " << pgid << " info " << info << std::endl;
7c673cae 880
11fdf7f2 881 int ret = get_log(fs, struct_ver, pgid, info, log, missing);
7c673cae
FG
882 if (ret > 0)
883 return ret;
884
885 if (debug) {
886 Formatter *formatter = Formatter::create("json-pretty");
11fdf7f2 887 ceph_assert(formatter);
7c673cae
FG
888 dump_log(formatter, cerr, log, missing);
889 delete formatter;
890 }
891 write_super();
892
893 pg_begin pgb(pgid, superblock);
894 // Special case: If replicated pg don't require the importing OSD to have shard feature
895 if (pgid.is_no_shard()) {
896 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
897 }
898 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
899 if (ret)
900 return ret;
901
902 // The metadata_section is now before files, so import can detect
903 // errors and abort without wasting time.
904 metadata_section ms(
905 struct_ver,
906 map_epoch,
907 info,
908 log,
909 past_intervals,
910 missing);
911 ret = add_osdmap(fs, ms);
912 if (ret)
913 return ret;
914 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
915 if (ret)
916 return ret;
917
918 ret = export_files(fs, coll);
919 if (ret) {
920 cerr << "export_files error " << ret << std::endl;
921 return ret;
922 }
923
924 ret = write_simple(TYPE_PG_END, file_fd);
925 if (ret)
926 return ret;
927
928 return 0;
929}
930
b32b8144
FG
931int dump_data(Formatter *formatter, bufferlist &bl)
932{
11fdf7f2 933 auto ebliter = bl.cbegin();
b32b8144
FG
934 data_section ds;
935 ds.decode(ebliter);
936
937 formatter->open_object_section("data_block");
938 formatter->dump_unsigned("offset", ds.offset);
939 formatter->dump_unsigned("len", ds.len);
940 // XXX: Add option to dump data like od -cx ?
941 formatter->close_section();
942 formatter->flush(cout);
943 return 0;
944}
945
7c673cae
FG
946int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
947 ObjectStore::Transaction *t, bufferlist &bl)
948{
11fdf7f2 949 auto ebliter = bl.cbegin();
7c673cae
FG
950 data_section ds;
951 ds.decode(ebliter);
952
953 if (debug)
954 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
955 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
956 return 0;
957}
958
b32b8144
FG
959int dump_attrs(
960 Formatter *formatter, ghobject_t hoid,
961 bufferlist &bl)
962{
11fdf7f2 963 auto ebliter = bl.cbegin();
b32b8144
FG
964 attr_section as;
965 as.decode(ebliter);
966
967 // This could have been handled in the caller if we didn't need to
968 // support exports that didn't include object_info_t in object_begin.
969 if (hoid.generation == ghobject_t::NO_GEN &&
970 hoid.hobj.is_head()) {
971 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
972 if (mi != as.data.end()) {
973 SnapSet snapset;
11fdf7f2 974 auto p = mi->second.cbegin();
b32b8144
FG
975 snapset.decode(p);
976 formatter->open_object_section("snapset");
977 snapset.dump(formatter);
978 formatter->close_section();
979 } else {
980 formatter->open_object_section("snapset");
981 formatter->dump_string("error", "missing SS_ATTR");
982 formatter->close_section();
983 }
984 }
985
986 formatter->open_object_section("attrs");
987 formatter->open_array_section("user");
988 for (auto kv : as.data) {
989 // Skip system attributes
990 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
991 continue;
992 formatter->open_object_section("user_attr");
993 formatter->dump_string("name", kv.first.substr(1));
994 bool b64;
995 formatter->dump_string("value", cleanbin(kv.second, b64));
996 formatter->dump_bool("Base64", b64);
997 formatter->close_section();
998 }
999 formatter->close_section();
1000 formatter->open_array_section("system");
1001 for (auto kv : as.data) {
1002 // Skip user attributes
1003 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1004 continue;
1005 formatter->open_object_section("sys_attr");
1006 formatter->dump_string("name", kv.first);
1007 formatter->close_section();
1008 }
1009 formatter->close_section();
1010 formatter->close_section();
1011 formatter->flush(cout);
1012
1013 return 0;
1014}
1015
7c673cae
FG
1016int get_attrs(
1017 ObjectStore *store, coll_t coll, ghobject_t hoid,
1018 ObjectStore::Transaction *t, bufferlist &bl,
224ce89b 1019 OSDriver &driver, SnapMapper &snap_mapper)
7c673cae 1020{
11fdf7f2 1021 auto ebliter = bl.cbegin();
7c673cae
FG
1022 attr_section as;
1023 as.decode(ebliter);
1024
11fdf7f2 1025 auto ch = store->open_collection(coll);
7c673cae
FG
1026 if (debug)
1027 cerr << "\tattrs: len " << as.data.size() << std::endl;
1028 t->setattrs(coll, hoid, as.data);
1029
1030 // This could have been handled in the caller if we didn't need to
1031 // support exports that didn't include object_info_t in object_begin.
11fdf7f2
TL
1032 if (hoid.generation == ghobject_t::NO_GEN &&
1033 hoid.hobj.is_head()) {
1034 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1035 if (mi != as.data.end()) {
1036 SnapSet snapset;
1037 auto p = mi->second.cbegin();
1038 snapset.decode(p);
1039 cout << "snapset " << snapset << std::endl;
1040 for (auto& p : snapset.clone_snaps) {
1041 ghobject_t clone = hoid;
1042 clone.hobj.snap = p.first;
1043 set<snapid_t> snaps(p.second.begin(), p.second.end());
1044 if (!store->exists(ch, clone)) {
1045 // no clone, skip. this is probably a cache pool. this works
1046 // because we use a separate transaction per object and clones
1047 // come before head in the archive.
7c673cae 1048 if (debug)
11fdf7f2
TL
1049 cerr << "\tskipping missing " << clone << " (snaps "
1050 << snaps << ")" << std::endl;
1051 continue;
7c673cae 1052 }
11fdf7f2
TL
1053 if (debug)
1054 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1055 << std::endl;
1056 OSDriver::OSTransaction _t(driver.get_transaction(t));
1057 ceph_assert(!snaps.empty());
1058 snap_mapper.add_oid(clone.hobj, snaps, &_t);
7c673cae
FG
1059 }
1060 } else {
11fdf7f2 1061 cerr << "missing SS_ATTR on " << hoid << std::endl;
7c673cae
FG
1062 }
1063 }
7c673cae
FG
1064 return 0;
1065}
1066
b32b8144
FG
1067int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1068{
11fdf7f2 1069 auto ebliter = bl.cbegin();
b32b8144
FG
1070 omap_hdr_section oh;
1071 oh.decode(ebliter);
1072
1073 formatter->open_object_section("omap_header");
1074 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1075 formatter->close_section();
1076 formatter->flush(cout);
1077 return 0;
1078}
1079
7c673cae
FG
1080int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1081 ObjectStore::Transaction *t, bufferlist &bl)
1082{
11fdf7f2 1083 auto ebliter = bl.cbegin();
7c673cae
FG
1084 omap_hdr_section oh;
1085 oh.decode(ebliter);
1086
1087 if (debug)
1088 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1089 << std::endl;
1090 t->omap_setheader(coll, hoid, oh.hdr);
1091 return 0;
1092}
1093
b32b8144
FG
1094int dump_omap(Formatter *formatter, bufferlist &bl)
1095{
11fdf7f2 1096 auto ebliter = bl.cbegin();
b32b8144
FG
1097 omap_section os;
1098 os.decode(ebliter);
1099
1100 formatter->open_object_section("omaps");
1101 formatter->dump_unsigned("count", os.omap.size());
1102 formatter->open_array_section("data");
1103 for (auto o : os.omap) {
1104 formatter->open_object_section("omap");
1105 formatter->dump_string("name", o.first);
1106 bool b64;
1107 formatter->dump_string("value", cleanbin(o.second, b64));
1108 formatter->dump_bool("Base64", b64);
1109 formatter->close_section();
1110 }
1111 formatter->close_section();
1112 formatter->close_section();
1113 formatter->flush(cout);
1114 return 0;
1115}
1116
7c673cae
FG
1117int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1118 ObjectStore::Transaction *t, bufferlist &bl)
1119{
11fdf7f2 1120 auto ebliter = bl.cbegin();
7c673cae
FG
1121 omap_section os;
1122 os.decode(ebliter);
1123
1124 if (debug)
1125 cerr << "\tomap: size " << os.omap.size() << std::endl;
1126 t->omap_setkeys(coll, hoid, os.omap);
1127 return 0;
1128}
1129
b32b8144
FG
1130int ObjectStoreTool::dump_object(Formatter *formatter,
1131 bufferlist &bl)
1132{
11fdf7f2 1133 auto ebliter = bl.cbegin();
b32b8144
FG
1134 object_begin ob;
1135 ob.decode(ebliter);
1136
1137 if (ob.hoid.hobj.is_temp()) {
1138 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1139 return -EFAULT;
1140 }
1141
1142 formatter->open_object_section("object");
1143 formatter->open_object_section("oid");
1144 ob.hoid.dump(formatter);
1145 formatter->close_section();
1146 formatter->open_object_section("object_info");
1147 ob.oi.dump(formatter);
1148 formatter->close_section();
1149
1150 bufferlist ebl;
1151 bool done = false;
1152 while(!done) {
1153 sectiontype_t type;
1154 int ret = read_section(&type, &ebl);
1155 if (ret)
1156 return ret;
1157
1158 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1159 //cout << "\t\tsection size " << ebl.length() << std::endl;
1160 if (type >= END_OF_TYPES) {
1161 cout << "Skipping unknown object section type" << std::endl;
1162 continue;
1163 }
1164 switch(type) {
1165 case TYPE_DATA:
1166 if (dry_run) break;
1167 ret = dump_data(formatter, ebl);
1168 if (ret) return ret;
1169 break;
1170 case TYPE_ATTRS:
1171 if (dry_run) break;
1172 ret = dump_attrs(formatter, ob.hoid, ebl);
1173 if (ret) return ret;
1174 break;
1175 case TYPE_OMAP_HDR:
1176 if (dry_run) break;
1177 ret = dump_omap_hdr(formatter, ebl);
1178 if (ret) return ret;
1179 break;
1180 case TYPE_OMAP:
1181 if (dry_run) break;
1182 ret = dump_omap(formatter, ebl);
1183 if (ret) return ret;
1184 break;
1185 case TYPE_OBJECT_END:
1186 done = true;
1187 break;
1188 default:
1189 cerr << "Unknown section type " << type << std::endl;
1190 return -EFAULT;
1191 }
1192 }
1193 formatter->close_section();
1194 return 0;
1195}
1196
11fdf7f2
TL
1197int ObjectStoreTool::get_object(ObjectStore *store,
1198 OSDriver& driver,
1199 SnapMapper& mapper,
1200 coll_t coll,
1201 bufferlist &bl, OSDMap &origmap,
1202 bool *skipped_objects)
7c673cae
FG
1203{
1204 ObjectStore::Transaction tran;
1205 ObjectStore::Transaction *t = &tran;
11fdf7f2 1206 auto ebliter = bl.cbegin();
7c673cae
FG
1207 object_begin ob;
1208 ob.decode(ebliter);
7c673cae
FG
1209
1210 if (ob.hoid.hobj.is_temp()) {
1211 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1212 return -EFAULT;
1213 }
11fdf7f2
TL
1214 ceph_assert(g_ceph_context);
1215
1216 auto ch = store->open_collection(coll);
7c673cae
FG
1217 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1218 object_t oid = ob.hoid.hobj.oid;
1219 object_locator_t loc(ob.hoid.hobj);
11fdf7f2
TL
1220 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1221 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
1222
1223 spg_t coll_pgid;
1224 if (coll.is_pg(&coll_pgid) == false) {
1225 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1226 return -EFAULT;
1227 }
1228 if (coll_pgid.shard != ob.hoid.shard_id) {
1229 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1230 << " but object shard is " << ob.hoid.shard_id << std::endl;
1231 return -EFAULT;
1232 }
1233
1234 if (coll_pgid.pgid != pgid) {
1235 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1236 *skipped_objects = true;
1237 skip_object(bl);
1238 return 0;
1239 }
1240 }
1241
1242 if (!dry_run)
1243 t->touch(coll, ob.hoid);
1244
1245 cout << "Write " << ob.hoid << std::endl;
1246
7c673cae
FG
1247 bufferlist ebl;
1248 bool done = false;
1249 while(!done) {
1250 sectiontype_t type;
1251 int ret = read_section(&type, &ebl);
1252 if (ret)
1253 return ret;
1254
1255 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1256 //cout << "\t\tsection size " << ebl.length() << std::endl;
1257 if (type >= END_OF_TYPES) {
1258 cout << "Skipping unknown object section type" << std::endl;
1259 continue;
1260 }
1261 switch(type) {
1262 case TYPE_DATA:
1263 if (dry_run) break;
1264 ret = get_data(store, coll, ob.hoid, t, ebl);
1265 if (ret) return ret;
1266 break;
1267 case TYPE_ATTRS:
1268 if (dry_run) break;
224ce89b 1269 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
7c673cae
FG
1270 if (ret) return ret;
1271 break;
1272 case TYPE_OMAP_HDR:
1273 if (dry_run) break;
1274 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1275 if (ret) return ret;
1276 break;
1277 case TYPE_OMAP:
1278 if (dry_run) break;
1279 ret = get_omap(store, coll, ob.hoid, t, ebl);
1280 if (ret) return ret;
1281 break;
1282 case TYPE_OBJECT_END:
1283 done = true;
1284 break;
1285 default:
1286 cerr << "Unknown section type " << type << std::endl;
1287 return -EFAULT;
1288 }
1289 }
11fdf7f2
TL
1290 if (!dry_run) {
1291 wait_until_done(t, [&] {
1292 store->queue_transaction(ch, std::move(*t));
1293 ch->flush();
1294 });
1295 }
7c673cae
FG
1296 return 0;
1297}
1298
b32b8144
FG
1299int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1300{
11fdf7f2 1301 auto ebliter = bl.cbegin();
b32b8144
FG
1302 ms.decode(ebliter);
1303
1304 formatter->open_object_section("metadata_section");
1305
1306 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1307 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1308
1309 formatter->open_object_section("OSDMap");
1310 ms.osdmap.dump(formatter);
1311 formatter->close_section();
1312 formatter->flush(cout);
1313 cout << std::endl;
1314
1315 formatter->open_object_section("info");
1316 ms.info.dump(formatter);
1317 formatter->close_section();
1318 formatter->flush(cout);
1319
1320 formatter->open_object_section("log");
1321 ms.log.dump(formatter);
1322 formatter->close_section();
1323 formatter->flush(cout);
1324
1325 formatter->open_object_section("pg_missing_t");
1326 ms.missing.dump(formatter);
1327 formatter->close_section();
1328
1329 // XXX: ms.past_intervals?
1330
1331 formatter->close_section();
1332 formatter->flush(cout);
1333
1334 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1335 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1336 return -EFAULT;
1337 }
1338
1339 return 0;
1340}
1341
7c673cae 1342int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
11fdf7f2 1343 const OSDSuperblock& sb, spg_t pgid)
7c673cae 1344{
11fdf7f2 1345 auto ebliter = bl.cbegin();
7c673cae
FG
1346 ms.decode(ebliter);
1347 spg_t old_pgid = ms.info.pgid;
1348 ms.info.pgid = pgid;
1349
11fdf7f2
TL
1350 if (debug) {
1351 cout << "export pgid " << old_pgid << std::endl;
1352 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1353 cout << "map epoch " << ms.map_epoch << std::endl;
7c673cae 1354
11fdf7f2
TL
1355#ifdef DIAGNOSTIC
1356 Formatter *formatter = new JSONFormatter(true);
1357 formatter->open_object_section("stuff");
7c673cae 1358
11fdf7f2
TL
1359 formatter->open_object_section("importing OSDMap");
1360 ms.osdmap.dump(formatter);
1361 formatter->close_section();
1362 formatter->flush(cout);
1363 cout << std::endl;
7c673cae 1364
11fdf7f2 1365 cout << "osd current epoch " << sb.current_epoch << std::endl;
7c673cae 1366
11fdf7f2
TL
1367 formatter->open_object_section("info");
1368 ms.info.dump(formatter);
1369 formatter->close_section();
1370 formatter->flush(cout);
1371 cout << std::endl;
7c673cae 1372
11fdf7f2
TL
1373 formatter->open_object_section("log");
1374 ms.log.dump(formatter);
1375 formatter->close_section();
1376 formatter->flush(cout);
1377 cout << std::endl;
1378
1379 formatter->close_section();
1380 formatter->flush(cout);
1381 cout << std::endl;
7c673cae 1382#endif
11fdf7f2 1383 }
7c673cae
FG
1384
1385 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1386 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1387 return -EFAULT;
1388 }
1389
1390 if (ms.map_epoch > sb.current_epoch) {
1391 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1392 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1393 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1394 return -EINVAL;
1395 }
1396
11fdf7f2 1397 // Old exports didn't include OSDMap
7c673cae 1398 if (ms.osdmap.get_epoch() == 0) {
11fdf7f2
TL
1399 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1400 " Not supported." << std::endl;
1401 return -EINVAL;
7c673cae
FG
1402 }
1403
11fdf7f2
TL
1404 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1405 cerr << "PG export's map " << ms.osdmap.get_epoch()
1406 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1407 if (!force) {
1408 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1409 << std::endl;
1410 return -EINVAL;
7c673cae
FG
1411 }
1412 }
7c673cae
FG
1413 if (debug) {
1414 cerr << "Import pgid " << ms.info.pgid << std::endl;
b32b8144 1415 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
11fdf7f2
TL
1416 cerr << "history.same_interval_since "
1417 << ms.info.history.same_interval_since << std::endl;
7c673cae
FG
1418 }
1419
7c673cae
FG
1420 return 0;
1421}
1422
1423// out: pg_log_t that only has entries that apply to import_pgid using curmap
1424// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1425void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1426 const string &hit_set_namespace, const divergent_priors_t &in,
1427 divergent_priors_t &out, divergent_priors_t &reject)
1428{
1429 out.clear();
1430 reject.clear();
1431
1432 for (divergent_priors_t::const_iterator i = in.begin();
1433 i != in.end(); ++i) {
1434
1435 // Reject divergent priors for temporary objects
1436 if (i->second.is_temp()) {
1437 reject.insert(*i);
1438 continue;
1439 }
1440
1441 if (i->second.nspace != hit_set_namespace) {
1442 object_t oid = i->second.oid;
1443 object_locator_t loc(i->second);
1444 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1445 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1446
1447 if (import_pgid.pgid == pgid) {
1448 out.insert(*i);
1449 } else {
1450 reject.insert(*i);
1451 }
1452 } else {
1453 out.insert(*i);
1454 }
1455 }
1456}
1457
11fdf7f2 1458int ObjectStoreTool::dump_export(Formatter *formatter)
b32b8144
FG
1459{
1460 bufferlist ebl;
1461 pg_info_t info;
1462 PGLog::IndexedLog log;
1463 //bool skipped_objects = false;
1464
1465 int ret = read_super();
1466 if (ret)
1467 return ret;
1468
1469 if (sh.magic != super_header::super_magic) {
1470 cerr << "Invalid magic number" << std::endl;
1471 return -EFAULT;
1472 }
1473
1474 if (sh.version > super_header::super_ver) {
1475 cerr << "Can't handle export format version=" << sh.version << std::endl;
1476 return -EINVAL;
1477 }
1478
1479 formatter->open_object_section("Export");
1480
1481 //First section must be TYPE_PG_BEGIN
1482 sectiontype_t type;
1483 ret = read_section(&type, &ebl);
1484 if (ret)
1485 return ret;
1486 if (type == TYPE_POOL_BEGIN) {
1487 cerr << "Dump of pool exports not supported" << std::endl;
1488 return -EINVAL;
1489 } else if (type != TYPE_PG_BEGIN) {
1490 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1491 return -EFAULT;
1492 }
1493
11fdf7f2 1494 auto ebliter = ebl.cbegin();
b32b8144
FG
1495 pg_begin pgb;
1496 pgb.decode(ebliter);
1497 spg_t pgid = pgb.pgid;
1498
1499 formatter->dump_string("pgid", stringify(pgid));
1500 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1501 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1502
1503 bool done = false;
1504 bool found_metadata = false;
1505 metadata_section ms;
1506 bool objects_started = false;
1507 while(!done) {
1508 ret = read_section(&type, &ebl);
1509 if (ret)
1510 return ret;
1511
1512 if (debug) {
11fdf7f2 1513 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
b32b8144
FG
1514 }
1515 if (type >= END_OF_TYPES) {
1516 cerr << "Skipping unknown section type" << std::endl;
1517 continue;
1518 }
1519 switch(type) {
1520 case TYPE_OBJECT_BEGIN:
1521 if (!objects_started) {
1522 formatter->open_array_section("objects");
1523 objects_started = true;
1524 }
1525 ret = dump_object(formatter, ebl);
1526 if (ret) return ret;
1527 break;
1528 case TYPE_PG_METADATA:
1529 if (objects_started)
1530 cerr << "WARNING: metadata_section out of order" << std::endl;
1531 ret = dump_pg_metadata(formatter, ebl, ms);
1532 if (ret) return ret;
1533 found_metadata = true;
1534 break;
1535 case TYPE_PG_END:
1536 if (objects_started) {
1537 formatter->close_section();
1538 }
1539 done = true;
1540 break;
1541 default:
1542 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1543 return -EFAULT;
1544 }
1545 }
1546
1547 if (!found_metadata) {
1548 cerr << "Missing metadata section" << std::endl;
1549 return -EFAULT;
1550 }
1551
1552 formatter->close_section();
1553 formatter->flush(cout);
1554
1555 return 0;
1556}
1557
7c673cae 1558int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
11fdf7f2 1559 bool force, std::string pgidstr)
7c673cae
FG
1560{
1561 bufferlist ebl;
1562 pg_info_t info;
1563 PGLog::IndexedLog log;
1564 bool skipped_objects = false;
1565
1566 if (!dry_run)
1567 finish_remove_pgs(store);
1568
1569 int ret = read_super();
1570 if (ret)
1571 return ret;
1572
1573 if (sh.magic != super_header::super_magic) {
1574 cerr << "Invalid magic number" << std::endl;
1575 return -EFAULT;
1576 }
1577
1578 if (sh.version > super_header::super_ver) {
1579 cerr << "Can't handle export format version=" << sh.version << std::endl;
1580 return -EINVAL;
1581 }
1582
1583 //First section must be TYPE_PG_BEGIN
1584 sectiontype_t type;
1585 ret = read_section(&type, &ebl);
1586 if (ret)
1587 return ret;
1588 if (type == TYPE_POOL_BEGIN) {
1589 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1590 return -EINVAL;
1591 } else if (type != TYPE_PG_BEGIN) {
b32b8144 1592 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
7c673cae
FG
1593 return -EFAULT;
1594 }
1595
11fdf7f2 1596 auto ebliter = ebl.cbegin();
7c673cae
FG
1597 pg_begin pgb;
1598 pgb.decode(ebliter);
1599 spg_t pgid = pgb.pgid;
7c673cae
FG
1600
1601 if (pgidstr.length()) {
1602 spg_t user_pgid;
1603
1604 bool ok = user_pgid.parse(pgidstr.c_str());
1605 // This succeeded in main() already
11fdf7f2 1606 ceph_assert(ok);
7c673cae 1607 if (pgid != user_pgid) {
11fdf7f2
TL
1608 cerr << "specified pgid " << user_pgid
1609 << " does not match actual pgid " << pgid << std::endl;
1610 return -EINVAL;
7c673cae
FG
1611 }
1612 }
1613
1614 if (!pgb.superblock.cluster_fsid.is_zero()
1615 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1616 cerr << "Export came from different cluster with fsid "
1617 << pgb.superblock.cluster_fsid << std::endl;
1618 return -EINVAL;
1619 }
1620
1621 if (debug) {
1622 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1623 }
1624
11fdf7f2 1625 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
7c673cae
FG
1626 if (pgid.is_no_shard())
1627 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1628
1629 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1630 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1631
1632 cerr << "Export has incompatible features set " << unsupported << std::endl;
1633
1634 // Let them import if they specify the --force option
1635 if (!force)
1636 return 11; // Positive return means exit status
1637 }
1638
11fdf7f2 1639 // we need the latest OSDMap to check for collisions
7c673cae
FG
1640 OSDMap curmap;
1641 bufferlist bl;
1642 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1643 if (ret) {
11fdf7f2 1644 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
7c673cae
FG
1645 return ret;
1646 }
1647 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1648 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1649 // Special exit code for this error, used by test code
1650 return 10; // Positive return means exit status
1651 }
1652
11fdf7f2
TL
1653 pool_pg_num_history_t pg_num_history;
1654 get_pg_num_history(store, &pg_num_history);
1655
7c673cae 1656 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
7c673cae 1657
11fdf7f2 1658 // Check for PG already present.
7c673cae
FG
1659 coll_t coll(pgid);
1660 if (store->collection_exists(coll)) {
1661 cerr << "pgid " << pgid << " already exists" << std::endl;
1662 return -EEXIST;
1663 }
1664
11fdf7f2 1665 ObjectStore::CollectionHandle ch;
7c673cae 1666
11fdf7f2
TL
1667 OSDriver driver(
1668 store,
1669 coll_t(),
1670 OSD::make_snapmapper_oid());
1671 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
7c673cae
FG
1672
1673 cout << "Importing pgid " << pgid;
7c673cae
FG
1674 cout << std::endl;
1675
1676 bool done = false;
1677 bool found_metadata = false;
1678 metadata_section ms;
7c673cae
FG
1679 while(!done) {
1680 ret = read_section(&type, &ebl);
1681 if (ret)
1682 return ret;
1683
b32b8144
FG
1684 if (debug) {
1685 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1686 }
7c673cae
FG
1687 if (type >= END_OF_TYPES) {
1688 cout << "Skipping unknown section type" << std::endl;
1689 continue;
1690 }
1691 switch(type) {
1692 case TYPE_OBJECT_BEGIN:
11fdf7f2
TL
1693 ceph_assert(found_metadata);
1694 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1695 &skipped_objects);
7c673cae
FG
1696 if (ret) return ret;
1697 break;
1698 case TYPE_PG_METADATA:
11fdf7f2 1699 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
7c673cae
FG
1700 if (ret) return ret;
1701 found_metadata = true;
11fdf7f2
TL
1702
1703 if (pgid != ms.info.pgid) {
1704 cerr << "specified pgid " << pgid << " does not match import file pgid "
1705 << ms.info.pgid << std::endl;
1706 return -EINVAL;
1707 }
1708
1709 // make sure there are no conflicting splits or merges
1710 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1711 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1712 if (p != pg_num_history.pg_nums.end() &&
1713 !p->second.empty()) {
1714 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1715 unsigned pg_num = start_pg_num;
1716 for (auto q = p->second.lower_bound(ms.map_epoch);
1717 q != p->second.end();
1718 ++q) {
1719 unsigned new_pg_num = q->second;
1720 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1721 << " -> " << new_pg_num << std::endl;
1722
1723 // check for merge target
1724 spg_t target;
1725 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1726 // FIXME: this checks assumes the OSD's PG is at the OSD's
1727 // map epoch; it could be, say, at *our* epoch, pre-merge.
1728 coll_t coll(target);
1729 if (store->collection_exists(coll)) {
1730 cerr << "pgid " << pgid << " merges to target " << target
1731 << " which already exists" << std::endl;
1732 return 12;
1733 }
1734 }
1735
1736 // check for split children
1737 set<spg_t> children;
1738 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1739 cerr << " children are " << children << std::endl;
1740 for (auto child : children) {
1741 coll_t coll(child);
1742 if (store->collection_exists(coll)) {
1743 cerr << "pgid " << pgid << " splits to " << children
1744 << " and " << child << " exists" << std::endl;
1745 return 12;
1746 }
1747 }
1748 }
1749 pg_num = new_pg_num;
1750 }
1751 }
1752 } else {
1753 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1754 << " for splits or mergers" << std::endl;
1755 }
1756
1757 if (!dry_run) {
1758 ObjectStore::Transaction t;
1759 ch = store->create_new_collection(coll);
1760 PG::_create(
1761 t, pgid,
1762 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
1763 PG::_init(t, pgid, NULL);
1764
1765 // mark this coll for removal until we're done
1766 map<string,bufferlist> values;
1767 encode((char)1, values["_remove"]);
1768 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1769
1770 store->queue_transaction(ch, std::move(t));
1771 }
1772
7c673cae
FG
1773 break;
1774 case TYPE_PG_END:
11fdf7f2 1775 ceph_assert(found_metadata);
7c673cae
FG
1776 done = true;
1777 break;
1778 default:
b32b8144 1779 cerr << "Unknown section type " << std::to_string(type) << std::endl;
7c673cae
FG
1780 return -EFAULT;
1781 }
1782 }
1783
1784 if (!found_metadata) {
1785 cerr << "Missing metadata section" << std::endl;
1786 return -EFAULT;
1787 }
1788
1789 ObjectStore::Transaction t;
1790 if (!dry_run) {
1791 pg_log_t newlog, reject;
11fdf7f2 1792 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
1793 ms.log, newlog, reject);
1794 if (debug) {
1795 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
1796 i != newlog.log.end(); ++i)
1797 cerr << "Keeping log entry " << *i << std::endl;
1798 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
1799 i != reject.log.end(); ++i)
1800 cerr << "Skipping log entry " << *i << std::endl;
1801 }
1802
1803 divergent_priors_t newdp, rejectdp;
11fdf7f2 1804 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
1805 ms.divergent_priors, newdp, rejectdp);
1806 ms.divergent_priors = newdp;
1807 if (debug) {
1808 for (divergent_priors_t::iterator i = newdp.begin();
1809 i != newdp.end(); ++i)
1810 cerr << "Keeping divergent_prior " << *i << std::endl;
1811 for (divergent_priors_t::iterator i = rejectdp.begin();
1812 i != rejectdp.end(); ++i)
1813 cerr << "Skipping divergent_prior " << *i << std::endl;
1814 }
1815
1816 ms.missing.filter_objects([&](const hobject_t &obj) {
1817 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
1818 return false;
11fdf7f2 1819 ceph_assert(!obj.is_temp());
7c673cae
FG
1820 object_t oid = obj.oid;
1821 object_locator_t loc(obj);
11fdf7f2
TL
1822 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
1823 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
1824
1825 return pgid.pgid != _pgid;
1826 });
1827
1828
1829 if (debug) {
1830 pg_missing_t missing;
1831 Formatter *formatter = Formatter::create("json-pretty");
1832 dump_log(formatter, cerr, newlog, ms.missing);
1833 delete formatter;
1834 }
1835
1836 // Just like a split invalidate stats since the object count is changed
1837 if (skipped_objects)
1838 ms.info.stats.stats_invalid = true;
1839
1840 ret = write_pg(
1841 t,
1842 ms.map_epoch,
1843 ms.info,
1844 newlog,
1845 ms.past_intervals,
1846 ms.divergent_priors,
1847 ms.missing);
1848 if (ret) return ret;
1849 }
1850
1851 // done, clear removal flag
1852 if (debug)
1853 cerr << "done, clearing removal flag" << std::endl;
1854
1855 if (!dry_run) {
1856 set<string> remove;
1857 remove.insert("_remove");
1858 t.omap_rmkeys(coll, pgid.make_pgmeta_oid(), remove);
11fdf7f2
TL
1859 wait_until_done(&t, [&] {
1860 store->queue_transaction(ch, std::move(t));
1861 // make sure we flush onreadable items before mapper/driver are destroyed.
1862 ch->flush();
1863 });
7c673cae 1864 }
7c673cae
FG
1865 return 0;
1866}
1867
1868int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
1869 Formatter *formatter, bool debug, bool human_readable, bool head)
1870{
1871 int r;
1872 lookup_ghobject lookup(object, nspace, head);
1873 if (pgidstr.length() > 0) {
1874 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
1875 } else {
1876 r = action_on_all_objects(store, lookup, debug);
1877 }
1878 if (r)
1879 return r;
1880 lookup.dump(formatter, human_readable);
1881 formatter->flush(cout);
1882 return 0;
1883}
1884
1885int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
1886{
1887 int r;
1888 boost::optional<std::string> nspace; // Not specified
1889 lookup_ghobject lookup(object, nspace);
1890 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
1891 if (r)
1892 return r;
1893 lookup.dump(formatter, human_readable);
1894 formatter->flush(cout);
1895 return 0;
1896}
1897
11fdf7f2
TL
1898enum rmtype {
1899 BOTH,
1900 SNAPMAP,
1901 NOSNAPMAP
1902};
1903
7c673cae
FG
1904int remove_object(coll_t coll, ghobject_t &ghobj,
1905 SnapMapper &mapper,
1906 MapCacher::Transaction<std::string, bufferlist> *_t,
11fdf7f2
TL
1907 ObjectStore::Transaction *t,
1908 enum rmtype type)
7c673cae 1909{
11fdf7f2
TL
1910 if (type == BOTH || type == SNAPMAP) {
1911 int r = mapper.remove_oid(ghobj.hobj, _t);
1912 if (r < 0 && r != -ENOENT) {
1913 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
1914 return r;
1915 }
7c673cae
FG
1916 }
1917
11fdf7f2
TL
1918 if (type == BOTH || type == NOSNAPMAP) {
1919 t->remove(coll, ghobj);
1920 }
7c673cae
FG
1921 return 0;
1922}
1923
1924int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
1925
1926int do_remove_object(ObjectStore *store, coll_t coll,
11fdf7f2 1927 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
7c673cae 1928{
11fdf7f2 1929 auto ch = store->open_collection(coll);
7c673cae
FG
1930 spg_t pg;
1931 coll.is_pg_prefix(&pg);
1932 OSDriver driver(
1933 store,
1934 coll_t(),
1935 OSD::make_snapmapper_oid());
1936 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
1937 struct stat st;
1938
11fdf7f2 1939 int r = store->stat(ch, ghobj, &st);
7c673cae
FG
1940 if (r < 0) {
1941 cerr << "remove: " << cpp_strerror(r) << std::endl;
1942 return r;
1943 }
1944
1945 SnapSet ss;
1946 if (ghobj.hobj.has_snapset()) {
1947 r = get_snapset(store, coll, ghobj, ss, false);
1948 if (r < 0) {
1949 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
1950 return r;
1951 }
1952 if (!ss.snaps.empty() && !all) {
1953 if (force) {
1954 cout << "WARNING: only removing "
1955 << (ghobj.hobj.is_head() ? "head" : "snapdir")
1956 << " with snapshots present" << std::endl;
1957 ss.snaps.clear();
1958 } else {
1959 cerr << "Snapshots are present, use removeall to delete everything" << std::endl;
1960 return -EINVAL;
1961 }
1962 }
1963 }
1964
1965 ObjectStore::Transaction t;
1966 OSDriver::OSTransaction _t(driver.get_transaction(&t));
1967
7c673cae
FG
1968 ghobject_t snapobj = ghobj;
1969 for (vector<snapid_t>::iterator i = ss.snaps.begin() ;
1970 i != ss.snaps.end() ; ++i) {
1971 snapobj.hobj.snap = *i;
1972 cout << "remove " << snapobj << std::endl;
1973 if (!dry_run) {
11fdf7f2 1974 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
7c673cae
FG
1975 if (r < 0)
1976 return r;
1977 }
1978 }
1979
11fdf7f2
TL
1980 cout << "remove " << ghobj << std::endl;
1981
1982 if (!dry_run) {
1983 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
1984 if (r < 0)
1985 return r;
1986 }
7c673cae 1987
11fdf7f2
TL
1988 if (!dry_run) {
1989 wait_until_done(&t, [&] {
1990 store->queue_transaction(ch, std::move(t));
1991 ch->flush();
1992 });
1993 }
7c673cae
FG
1994 return 0;
1995}
1996
1997int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
1998{
11fdf7f2 1999 auto ch = store->open_collection(coll);
7c673cae 2000 map<string,bufferptr> aset;
11fdf7f2 2001 int r = store->getattrs(ch, ghobj, aset);
7c673cae
FG
2002 if (r < 0) {
2003 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2004 return r;
2005 }
2006
2007 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2008 string key(i->first);
2009 if (outistty)
2010 key = cleanbin(key);
2011 cout << key << std::endl;
2012 }
2013 return 0;
2014}
2015
2016int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2017{
11fdf7f2
TL
2018 auto ch = store->open_collection(coll);
2019 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
7c673cae
FG
2020 if (!iter) {
2021 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2022 return -ENOENT;
2023 }
2024 iter->seek_to_first();
2025 map<string, bufferlist> oset;
2026 while(iter->valid()) {
2027 get_omap_batch(iter, oset);
2028
2029 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2030 string key(i->first);
2031 if (outistty)
2032 key = cleanbin(key);
2033 cout << key << std::endl;
2034 }
2035 }
2036 return 0;
2037}
2038
2039int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2040{
11fdf7f2 2041 auto ch = store->open_collection(coll);
7c673cae
FG
2042 struct stat st;
2043 mysize_t total;
2044
11fdf7f2 2045 int ret = store->stat(ch, ghobj, &st);
7c673cae
FG
2046 if (ret < 0) {
2047 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2048 return ret;
2049 }
2050
2051 total = st.st_size;
2052 if (debug)
2053 cerr << "size=" << total << std::endl;
2054
2055 uint64_t offset = 0;
2056 bufferlist rawdatabl;
2057 while(total > 0) {
2058 rawdatabl.clear();
2059 mysize_t len = max_read;
2060 if (len > total)
2061 len = total;
2062
11fdf7f2 2063 ret = store->read(ch, ghobj, offset, len, rawdatabl);
7c673cae
FG
2064 if (ret < 0)
2065 return ret;
2066 if (ret == 0)
2067 return -EINVAL;
2068
2069 if (debug)
2070 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2071
2072 total -= ret;
2073 offset += ret;
2074
2075 ret = write(fd, rawdatabl.c_str(), ret);
2076 if (ret == -1) {
2077 perror("write");
2078 return -errno;
2079 }
2080 }
2081
2082 return 0;
2083}
2084
2085int do_set_bytes(ObjectStore *store, coll_t coll,
11fdf7f2 2086 ghobject_t &ghobj, int fd)
7c673cae
FG
2087{
2088 ObjectStore::Transaction tran;
2089 ObjectStore::Transaction *t = &tran;
2090
2091 if (debug)
2092 cerr << "Write " << ghobj << std::endl;
2093
2094 if (!dry_run) {
2095 t->touch(coll, ghobj);
2096 t->truncate(coll, ghobj, 0);
2097 }
2098
2099 uint64_t offset = 0;
2100 bufferlist rawdatabl;
2101 do {
2102 rawdatabl.clear();
2103 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2104 if (bytes < 0) {
2105 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2106 return bytes;
2107 }
2108
2109 if (bytes == 0)
2110 break;
2111
2112 if (debug)
2113 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2114 if (!dry_run)
2115 t->write(coll, ghobj, offset, bytes, rawdatabl);
2116
2117 offset += bytes;
11fdf7f2 2118 // XXX: Should we queue_transaction() every once in a while for very large files
7c673cae
FG
2119 } while(true);
2120
11fdf7f2 2121 auto ch = store->open_collection(coll);
7c673cae 2122 if (!dry_run)
11fdf7f2 2123 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2124 return 0;
2125}
2126
2127int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2128{
11fdf7f2 2129 auto ch = store->open_collection(coll);
7c673cae
FG
2130 bufferptr bp;
2131
11fdf7f2 2132 int r = store->getattr(ch, ghobj, key.c_str(), bp);
7c673cae
FG
2133 if (r < 0) {
2134 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2135 return r;
2136 }
2137
2138 string value(bp.c_str(), bp.length());
2139 if (outistty) {
2140 value = cleanbin(value);
2141 value.push_back('\n');
2142 }
2143 cout << value;
2144
2145 return 0;
2146}
2147
2148int do_set_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2149 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2150{
2151 ObjectStore::Transaction tran;
2152 ObjectStore::Transaction *t = &tran;
2153 bufferlist bl;
2154
2155 if (debug)
2156 cerr << "Setattr " << ghobj << std::endl;
2157
2158 int ret = get_fd_data(fd, bl);
2159 if (ret < 0)
2160 return ret;
2161
2162 if (dry_run)
2163 return 0;
2164
2165 t->touch(coll, ghobj);
2166
2167 t->setattr(coll, ghobj, key, bl);
2168
11fdf7f2
TL
2169 auto ch = store->open_collection(coll);
2170 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2171 return 0;
2172}
2173
2174int do_rm_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2175 ghobject_t &ghobj, string key)
7c673cae
FG
2176{
2177 ObjectStore::Transaction tran;
2178 ObjectStore::Transaction *t = &tran;
2179
2180 if (debug)
2181 cerr << "Rmattr " << ghobj << std::endl;
2182
2183 if (dry_run)
2184 return 0;
2185
2186 t->rmattr(coll, ghobj, key);
2187
11fdf7f2
TL
2188 auto ch = store->open_collection(coll);
2189 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2190 return 0;
2191}
2192
2193int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2194{
11fdf7f2 2195 auto ch = store->open_collection(coll);
7c673cae
FG
2196 set<string> keys;
2197 map<string, bufferlist> out;
2198
2199 keys.insert(key);
2200
11fdf7f2 2201 int r = store->omap_get_values(ch, ghobj, keys, &out);
7c673cae
FG
2202 if (r < 0) {
2203 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2204 return r;
2205 }
2206
2207 if (out.empty()) {
2208 cerr << "Key not found" << std::endl;
2209 return -ENOENT;
2210 }
2211
11fdf7f2 2212 ceph_assert(out.size() == 1);
7c673cae
FG
2213
2214 bufferlist bl = out.begin()->second;
2215 string value(bl.c_str(), bl.length());
2216 if (outistty) {
2217 value = cleanbin(value);
2218 value.push_back('\n');
2219 }
2220 cout << value;
2221
2222 return 0;
2223}
2224
2225int do_set_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2226 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2227{
2228 ObjectStore::Transaction tran;
2229 ObjectStore::Transaction *t = &tran;
2230 map<string, bufferlist> attrset;
2231 bufferlist valbl;
2232
2233 if (debug)
2234 cerr << "Set_omap " << ghobj << std::endl;
2235
2236 int ret = get_fd_data(fd, valbl);
2237 if (ret < 0)
2238 return ret;
2239
2240 attrset.insert(pair<string, bufferlist>(key, valbl));
2241
2242 if (dry_run)
2243 return 0;
2244
2245 t->touch(coll, ghobj);
2246
2247 t->omap_setkeys(coll, ghobj, attrset);
2248
11fdf7f2
TL
2249 auto ch = store->open_collection(coll);
2250 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2251 return 0;
2252}
2253
2254int do_rm_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2255 ghobject_t &ghobj, string key)
7c673cae
FG
2256{
2257 ObjectStore::Transaction tran;
2258 ObjectStore::Transaction *t = &tran;
2259 set<string> keys;
2260
2261 keys.insert(key);
2262
2263 if (debug)
2264 cerr << "Rm_omap " << ghobj << std::endl;
2265
2266 if (dry_run)
2267 return 0;
2268
2269 t->omap_rmkeys(coll, ghobj, keys);
2270
11fdf7f2
TL
2271 auto ch = store->open_collection(coll);
2272 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2273 return 0;
2274}
2275
2276int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2277{
11fdf7f2 2278 auto ch = store->open_collection(coll);
7c673cae
FG
2279 bufferlist hdrbl;
2280
11fdf7f2 2281 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
7c673cae
FG
2282 if (r < 0) {
2283 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2284 return r;
2285 }
2286
2287 string header(hdrbl.c_str(), hdrbl.length());
2288 if (outistty) {
2289 header = cleanbin(header);
2290 header.push_back('\n');
2291 }
2292 cout << header;
2293
2294 return 0;
2295}
2296
2297int do_set_omaphdr(ObjectStore *store, coll_t coll,
11fdf7f2 2298 ghobject_t &ghobj, int fd)
7c673cae
FG
2299{
2300 ObjectStore::Transaction tran;
2301 ObjectStore::Transaction *t = &tran;
2302 bufferlist hdrbl;
2303
2304 if (debug)
2305 cerr << "Omap_setheader " << ghobj << std::endl;
2306
2307 int ret = get_fd_data(fd, hdrbl);
2308 if (ret)
2309 return ret;
2310
2311 if (dry_run)
2312 return 0;
2313
2314 t->touch(coll, ghobj);
2315
2316 t->omap_setheader(coll, ghobj, hdrbl);
2317
11fdf7f2
TL
2318 auto ch = store->open_collection(coll);
2319 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2320 return 0;
2321}
2322
2323struct do_fix_lost : public action_on_object_t {
11fdf7f2 2324 void call(ObjectStore *store, coll_t coll,
7c673cae
FG
2325 ghobject_t &ghobj, object_info_t &oi) override {
2326 if (oi.is_lost()) {
2327 cout << coll << "/" << ghobj << " is lost";
2328 if (!dry_run)
2329 cout << ", fixing";
2330 cout << std::endl;
2331 if (dry_run)
11fdf7f2 2332 return;
7c673cae
FG
2333 oi.clear_flag(object_info_t::FLAG_LOST);
2334 bufferlist bl;
11fdf7f2 2335 encode(oi, bl, -1); /* fixme: using full features */
7c673cae
FG
2336 ObjectStore::Transaction t;
2337 t.setattr(coll, ghobj, OI_ATTR, bl);
11fdf7f2
TL
2338 auto ch = store->open_collection(coll);
2339 store->queue_transaction(ch, std::move(t));
7c673cae 2340 }
11fdf7f2 2341 return;
7c673cae
FG
2342 }
2343};
2344
2345int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2346{
11fdf7f2 2347 auto ch = store->open_collection(coll);
7c673cae 2348 bufferlist attr;
11fdf7f2 2349 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
7c673cae
FG
2350 if (r < 0) {
2351 if (!silent)
2352 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2353 << cpp_strerror(r) << std::endl;
2354 return r;
2355 }
11fdf7f2 2356 auto bp = attr.cbegin();
7c673cae 2357 try {
11fdf7f2 2358 decode(ss, bp);
7c673cae
FG
2359 } catch (...) {
2360 r = -EINVAL;
2361 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2362 << cpp_strerror(r) << std::endl;
2363 return r;
2364 }
2365 return 0;
2366}
2367
2368int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2369{
11fdf7f2 2370 auto ch = store->open_collection(coll);
7c673cae
FG
2371 int r = 0;
2372 formatter->open_object_section("obj");
2373 formatter->open_object_section("id");
2374 ghobj.dump(formatter);
2375 formatter->close_section();
2376
2377 bufferlist attr;
11fdf7f2 2378 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2379 if (gr < 0) {
2380 r = gr;
2381 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2382 << cpp_strerror(r) << std::endl;
2383 } else {
2384 object_info_t oi;
11fdf7f2 2385 auto bp = attr.cbegin();
7c673cae 2386 try {
11fdf7f2 2387 decode(oi, bp);
7c673cae
FG
2388 formatter->open_object_section("info");
2389 oi.dump(formatter);
2390 formatter->close_section();
2391 } catch (...) {
2392 r = -EINVAL;
2393 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2394 << cpp_strerror(r) << std::endl;
2395 }
2396 }
2397 struct stat st;
11fdf7f2 2398 int sr = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2399 if (sr < 0) {
2400 r = sr;
2401 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2402 << cpp_strerror(r) << std::endl;
2403 } else {
2404 formatter->open_object_section("stat");
2405 formatter->dump_int("size", st.st_size);
2406 formatter->dump_int("blksize", st.st_blksize);
2407 formatter->dump_int("blocks", st.st_blocks);
2408 formatter->dump_int("nlink", st.st_nlink);
2409 formatter->close_section();
2410 }
2411
2412 if (ghobj.hobj.has_snapset()) {
2413 SnapSet ss;
2414 int snr = get_snapset(store, coll, ghobj, ss);
2415 if (snr < 0) {
2416 r = snr;
2417 } else {
2418 formatter->open_object_section("SnapSet");
2419 ss.dump(formatter);
2420 formatter->close_section();
2421 }
2422 }
a8e16298 2423 bufferlist hattr;
11fdf7f2 2424 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
a8e16298
TL
2425 if (gr == 0) {
2426 ECUtil::HashInfo hinfo;
11fdf7f2 2427 auto hp = hattr.cbegin();
a8e16298
TL
2428 try {
2429 decode(hinfo, hp);
2430 formatter->open_object_section("hinfo");
2431 hinfo.dump(formatter);
2432 formatter->close_section();
2433 } catch (...) {
2434 r = -EINVAL;
2435 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2436 << cpp_strerror(r) << std::endl;
2437 }
2438 }
7c673cae
FG
2439 formatter->close_section();
2440 formatter->flush(cout);
2441 cout << std::endl;
2442 return r;
2443}
2444
11fdf7f2 2445int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
1adf2230 2446{
11fdf7f2 2447 auto ch = store->open_collection(coll);
1adf2230 2448 bufferlist attr;
11fdf7f2 2449 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
1adf2230
AA
2450 if (r < 0) {
2451 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2452 << cpp_strerror(r) << std::endl;
2453 return r;
2454 }
2455 object_info_t oi;
11fdf7f2 2456 auto bp = attr.cbegin();
1adf2230 2457 try {
11fdf7f2 2458 decode(oi, bp);
1adf2230
AA
2459 } catch (...) {
2460 r = -EINVAL;
2461 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2462 << cpp_strerror(r) << std::endl;
2463 return r;
2464 }
1adf2230
AA
2465 if (!dry_run) {
2466 attr.clear();
2467 oi.alloc_hint_flags += 0xff;
2468 ObjectStore::Transaction t;
11fdf7f2 2469 encode(oi, attr, -1); /* fixme: using full features */
1adf2230 2470 t.setattr(coll, ghobj, OI_ATTR, attr);
11fdf7f2
TL
2471 auto ch = store->open_collection(coll);
2472 r = store->queue_transaction(ch, std::move(t));
1adf2230
AA
2473 if (r < 0) {
2474 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2475 << cpp_strerror(r) << std::endl;
2476 return r;
2477 }
2478 }
2479 return 0;
2480}
2481
11fdf7f2
TL
2482int set_size(
2483 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2484 bool corrupt)
7c673cae 2485{
11fdf7f2 2486 auto ch = store->open_collection(coll);
7c673cae
FG
2487 if (ghobj.hobj.is_snapdir()) {
2488 cerr << "Can't set the size of a snapdir" << std::endl;
2489 return -EINVAL;
2490 }
2491 bufferlist attr;
11fdf7f2 2492 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2493 if (r < 0) {
2494 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2495 << cpp_strerror(r) << std::endl;
2496 return r;
2497 }
2498 object_info_t oi;
11fdf7f2 2499 auto bp = attr.cbegin();
7c673cae 2500 try {
11fdf7f2 2501 decode(oi, bp);
7c673cae
FG
2502 } catch (...) {
2503 r = -EINVAL;
2504 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2505 << cpp_strerror(r) << std::endl;
2506 return r;
2507 }
2508 struct stat st;
11fdf7f2 2509 r = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2510 if (r < 0) {
2511 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2512 << cpp_strerror(r) << std::endl;
2513 }
2514 ghobject_t head(ghobj);
2515 SnapSet ss;
2516 bool found_head = true;
2517 map<snapid_t, uint64_t>::iterator csi;
2518 bool is_snap = ghobj.hobj.is_snap();
2519 if (is_snap) {
2520 head.hobj = head.hobj.get_head();
2521 r = get_snapset(store, coll, head, ss, true);
2522 if (r < 0 && r != -ENOENT) {
2523 // Requested get_snapset() silent, so if not -ENOENT show error
2524 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2525 << cpp_strerror(r) << std::endl;
2526 return r;
2527 }
2528 if (r == -ENOENT) {
2529 head.hobj = head.hobj.get_snapdir();
2530 r = get_snapset(store, coll, head, ss);
2531 if (r < 0)
2532 return r;
2533 found_head = false;
2534 } else {
2535 found_head = true;
2536 }
2537 csi = ss.clone_size.find(ghobj.hobj.snap);
2538 if (csi == ss.clone_size.end()) {
2539 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2540 return -EINVAL;
2541 }
2542 }
2543 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2544 && (!is_snap || csi->second == setsize)) {
2545 cout << "Size of object is already " << setsize << std::endl;
2546 return 0;
2547 }
2548 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2549 << ", obj info size " << oi.size;
2550 if (is_snap) {
2551 cout << ", " << (found_head ? "head" : "snapdir")
2552 << " clone_size " << csi->second;
2553 csi->second = setsize;
2554 }
2555 cout << std::endl;
2556 if (!dry_run) {
2557 attr.clear();
2558 oi.size = setsize;
7c673cae 2559 ObjectStore::Transaction t;
b5b8bbf5 2560 // Only modify object info if we want to corrupt it
b32b8144 2561 if (!corrupt && (uint64_t)st.st_size != setsize) {
b5b8bbf5 2562 t.truncate(coll, ghobj, setsize);
b32b8144
FG
2563 // Changing objectstore size will invalidate data_digest, so clear it.
2564 oi.clear_data_digest();
2565 }
11fdf7f2 2566 encode(oi, attr, -1); /* fixme: using full features */
b32b8144 2567 t.setattr(coll, ghobj, OI_ATTR, attr);
7c673cae
FG
2568 if (is_snap) {
2569 bufferlist snapattr;
2570 snapattr.clear();
11fdf7f2 2571 encode(ss, snapattr);
7c673cae
FG
2572 t.setattr(coll, head, SS_ATTR, snapattr);
2573 }
11fdf7f2
TL
2574 auto ch = store->open_collection(coll);
2575 r = store->queue_transaction(ch, std::move(t));
2576 if (r < 0) {
2577 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2578 << cpp_strerror(r) << std::endl;
2579 return r;
2580 }
2581 }
2582 return 0;
2583}
2584
2585int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2586 auto ch = store->open_collection(coll);
2587 bufferlist attr;
2588 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2589 if (r < 0) {
2590 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2591 << cpp_strerror(r) << std::endl;
2592 return r;
2593 }
2594 object_info_t oi;
2595 auto bp = attr.cbegin();
2596 try {
2597 decode(oi, bp);
2598 } catch (...) {
2599 r = -EINVAL;
2600 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2601 << cpp_strerror(r) << std::endl;
2602 return r;
2603 }
2604 if (!dry_run) {
2605 attr.clear();
2606 oi.clear_data_digest();
2607 encode(oi, attr, -1); /* fixme: using full features */
2608 ObjectStore::Transaction t;
2609 t.setattr(coll, ghobj, OI_ATTR, attr);
2610 auto ch = store->open_collection(coll);
2611 r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2612 if (r < 0) {
2613 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2614 << cpp_strerror(r) << std::endl;
2615 return r;
2616 }
2617 }
2618 return 0;
2619}
2620
2621int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
11fdf7f2 2622 string arg)
7c673cae
FG
2623{
2624 SnapSet ss;
2625 int ret = get_snapset(store, coll, ghobj, ss);
2626 if (ret < 0)
2627 return ret;
2628
7c673cae
FG
2629 // Use "corrupt" to clear entire SnapSet
2630 // Use "seq" to just corrupt SnapSet.seq
2631 if (arg == "corrupt" || arg == "seq")
2632 ss.seq = 0;
2633 // Use "snaps" to just clear SnapSet.snaps
2634 if (arg == "corrupt" || arg == "snaps")
2635 ss.snaps.clear();
2636 // By default just clear clone, clone_overlap and clone_size
2637 if (arg == "corrupt")
2638 arg = "";
2639 if (arg == "" || arg == "clones")
2640 ss.clones.clear();
2641 if (arg == "" || arg == "clone_overlap")
2642 ss.clone_overlap.clear();
2643 if (arg == "" || arg == "clone_size")
2644 ss.clone_size.clear();
2645 // Break all clone sizes by adding 1
2646 if (arg == "size") {
2647 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2648 i != ss.clone_size.end(); ++i)
2649 ++(i->second);
2650 }
2651
2652 if (!dry_run) {
2653 bufferlist bl;
11fdf7f2 2654 encode(ss, bl);
7c673cae
FG
2655 ObjectStore::Transaction t;
2656 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2657 auto ch = store->open_collection(coll);
2658 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2659 if (r < 0) {
2660 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2661 << cpp_strerror(r) << std::endl;
2662 return r;
2663 }
2664 }
2665 return 0;
2666}
2667
2668vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2669{
2670 return std::find(v.begin(), v.end(), clid);
2671}
2672
2673map<snapid_t, interval_set<uint64_t> >::iterator
2674find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2675{
2676 return m.find(clid);
2677}
2678
2679map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2680 snapid_t clid)
2681{
2682 return m.find(clid);
2683}
2684
2685template<class T>
2686int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2687{
2688 typename T::iterator i = find(mv, cloneid);
2689 if (i != mv.end()) {
2690 mv.erase(i);
2691 } else {
2692 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2693 if (force) {
2694 cerr << " (ignored)" << std::endl;
2695 return 0;
2696 }
2697 cerr << std::endl;
2698 return -EINVAL;
2699 }
2700 return 0;
2701}
2702
11fdf7f2
TL
2703int remove_clone(
2704 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
7c673cae
FG
2705{
2706 // XXX: Don't allow this if in a cache tier or former cache tier
2707 // bool allow_incomplete_clones() const {
2708 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2709
2710 SnapSet snapset;
2711 int ret = get_snapset(store, coll, ghobj, snapset);
2712 if (ret < 0)
2713 return ret;
2714
2715 // Derived from trim_object()
2716 // ...from snapset
2717 vector<snapid_t>::iterator p;
2718 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2719 if (*p == cloneid)
2720 break;
2721 if (p == snapset.clones.end()) {
2722 cerr << "Clone " << cloneid << " not present";
2723 return -ENOENT;
2724 }
2725 if (p != snapset.clones.begin()) {
2726 // not the oldest... merge overlap into next older clone
2727 vector<snapid_t>::iterator n = p - 1;
2728 hobject_t prev_coid = ghobj.hobj;
2729 prev_coid.snap = *n;
2730 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2731
2732 //if (adjust_prev_bytes)
2733 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2734
2735 snapset.clone_overlap[*n].intersection_of(
2736 snapset.clone_overlap[*p]);
2737
2738 //if (adjust_prev_bytes)
2739 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2740 }
2741
2742 ret = remove_from(snapset.clones, "clones", cloneid, force);
2743 if (ret) return ret;
2744 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2745 if (ret) return ret;
2746 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2747 if (ret) return ret;
2748
2749 if (dry_run)
2750 return 0;
2751
2752 bufferlist bl;
11fdf7f2 2753 encode(snapset, bl);
7c673cae
FG
2754 ObjectStore::Transaction t;
2755 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2756 auto ch = store->open_collection(coll);
2757 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2758 if (r < 0) {
2759 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2760 << cpp_strerror(r) << std::endl;
2761 return r;
2762 }
2763 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2764 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2765 return 0;
2766}
2767
2768int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2769{
2770 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
2771 << " to " << dst->get_type() << ": " << dstpath
2772 << std::endl;
7c673cae
FG
2773 int num, i;
2774 vector<coll_t> collections;
2775 int r;
2776
2777 r = src->mount();
2778 if (r < 0) {
2779 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
2780 return r;
2781 }
2782 r = dst->mount();
2783 if (r < 0) {
2784 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
2785 goto out_src;
2786 }
2787
2788 if (src->get_fsid() != dst->get_fsid()) {
2789 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
2790 << std::endl;
2791 goto out;
2792 }
2793 cout << "fsid " << src->get_fsid() << std::endl;
2794
2795 // make sure dst is empty
2796 r = dst->list_collections(collections);
2797 if (r < 0) {
2798 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
2799 goto out;
2800 }
2801 if (!collections.empty()) {
2802 cerr << "destination store is not empty" << std::endl;
2803 goto out;
2804 }
2805
2806 r = src->list_collections(collections);
2807 if (r < 0) {
2808 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
2809 goto out;
2810 }
2811
2812 num = collections.size();
2813 cout << num << " collections" << std::endl;
2814 i = 1;
2815 for (auto cid : collections) {
2816 cout << i++ << "/" << num << " " << cid << std::endl;
11fdf7f2
TL
2817 auto ch = src->open_collection(cid);
2818 auto dch = dst->create_new_collection(cid);
7c673cae
FG
2819 {
2820 ObjectStore::Transaction t;
11fdf7f2 2821 int bits = src->collection_bits(ch);
7c673cae 2822 if (bits < 0) {
181888fb
FG
2823 if (src->get_type() == "filestore" && cid.is_meta()) {
2824 bits = 0;
2825 } else {
2826 cerr << "cannot get bit count for collection " << cid << ": "
2827 << cpp_strerror(bits) << std::endl;
2828 goto out;
2829 }
7c673cae
FG
2830 }
2831 t.create_collection(cid, bits);
11fdf7f2 2832 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
2833 }
2834
2835 ghobject_t pos;
2836 uint64_t n = 0;
2837 uint64_t bytes = 0, keys = 0;
2838 while (true) {
2839 vector<ghobject_t> ls;
11fdf7f2 2840 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
7c673cae
FG
2841 if (r < 0) {
2842 cerr << "collection_list on " << cid << " from " << pos << " got: "
2843 << cpp_strerror(r) << std::endl;
2844 goto out;
2845 }
2846 if (ls.empty()) {
2847 break;
2848 }
2849
2850 for (auto& oid : ls) {
2851 //cout << " " << cid << " " << oid << std::endl;
2852 if (n % 100 == 0) {
2853 cout << " " << std::setw(16) << n << " objects, "
2854 << std::setw(16) << bytes << " bytes, "
2855 << std::setw(16) << keys << " keys"
2856 << std::setw(1) << "\r" << std::flush;
2857 }
2858 n++;
2859
2860 ObjectStore::Transaction t;
2861 t.touch(cid, oid);
2862
2863 map<string,bufferptr> attrs;
11fdf7f2 2864 src->getattrs(ch, oid, attrs);
7c673cae
FG
2865 if (!attrs.empty()) {
2866 t.setattrs(cid, oid, attrs);
2867 }
2868
2869 bufferlist bl;
11fdf7f2 2870 src->read(ch, oid, 0, 0, bl);
7c673cae
FG
2871 if (bl.length()) {
2872 t.write(cid, oid, 0, bl.length(), bl);
2873 bytes += bl.length();
2874 }
2875
2876 bufferlist header;
2877 map<string,bufferlist> omap;
11fdf7f2 2878 src->omap_get(ch, oid, &header, &omap);
7c673cae
FG
2879 if (header.length()) {
2880 t.omap_setheader(cid, oid, header);
2881 ++keys;
2882 }
2883 if (!omap.empty()) {
2884 keys += omap.size();
2885 t.omap_setkeys(cid, oid, omap);
2886 }
2887
11fdf7f2 2888 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
2889 }
2890 }
2891 cout << " " << std::setw(16) << n << " objects, "
2892 << std::setw(16) << bytes << " bytes, "
2893 << std::setw(16) << keys << " keys"
2894 << std::setw(1) << std::endl;
2895 }
2896
2897 // keyring
2898 cout << "keyring" << std::endl;
2899 {
2900 bufferlist bl;
2901 string s = srcpath + "/keyring";
2902 string err;
2903 r = bl.read_file(s.c_str(), &err);
2904 if (r < 0) {
2905 cerr << "failed to copy " << s << ": " << err << std::endl;
2906 } else {
2907 string d = dstpath + "/keyring";
2908 bl.write_file(d.c_str(), 0600);
2909 }
2910 }
2911
2912 // osd metadata
2913 cout << "duping osd metadata" << std::endl;
2914 {
2915 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
2916 string val;
2917 src->read_meta(k, &val);
2918 dst->write_meta(k, val);
2919 }
2920 }
2921
2922 dst->write_meta("ready", "ready");
2923
2924 cout << "done." << std::endl;
2925 r = 0;
2926 out:
2927 dst->umount();
2928 out_src:
2929 src->umount();
2930 return r;
2931}
2932
2933void usage(po::options_description &desc)
2934{
2935 cerr << std::endl;
2936 cerr << desc << std::endl;
2937 cerr << std::endl;
2938 cerr << "Positional syntax:" << std::endl;
2939 cerr << std::endl;
2940 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
2941 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
2942 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
2943 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
2944 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
2945 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
2946 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
2947 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
2948 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
2949 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
11fdf7f2 2950 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
7c673cae
FG
2951 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
2952 cerr << std::endl;
2953 cerr << "<object> can be a JSON object description as displayed" << std::endl;
2954 cerr << "by --op list." << std::endl;
2955 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
2956 cerr << "the OSD's PGs." << std::endl;
2957 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
2958 cerr << "specifies the pgmeta object" << std::endl;
2959 cerr << std::endl;
2960 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
2961 cerr << "if not specified or if '-' specified." << std::endl;
2962}
2963
2964bool ends_with(const string& check, const string& ending)
2965{
2966 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
2967}
2968
2969// Based on FileStore::dump_journal(), set-up enough to only dump
2970int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
2971{
2972 int r;
2973
2974 if (!journalpath.length())
2975 return -EINVAL;
2976
2977 FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
2978 journalpath.c_str(), m_journal_dio);
2979 r = journal->_fdump(*f, false);
2980 delete journal;
2981 return r;
2982}
2983
2984int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
1adf2230
AA
2985 const string &pool_name, const spg_t &pgid, bool dry_run,
2986 int target_level)
7c673cae
FG
2987{
2988 int r = 0;
2989
2990 FileStore *fs = dynamic_cast<FileStore*>(os);
2991 if (!fs) {
2992 cerr << "Nothing to do for non-filestore backend" << std::endl;
2993 return 0; // making this return success makes testing easier
2994 }
2995
2996 OSDMap curmap;
2997 bufferlist bl;
2998 r = get_osdmap(os, superblock.current_epoch, curmap, bl);
2999 if (r) {
3000 cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
3001 return r;
3002 }
3003
3004 int64_t poolid = -1;
3005 if (pool_name.length()) {
3006 poolid = curmap.lookup_pg_pool_name(pool_name);
3007 if (poolid < 0) {
3008 cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
3009 << std::endl;
3010 return poolid;
3011 }
3012 }
3013
3014 vector<coll_t> collections, filtered_colls;
3015 r = os->list_collections(collections);
3016 if (r < 0) {
3017 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
3018 return r;
3019 }
3020
3021 for (auto const &coll : collections) {
3022 spg_t coll_pgid;
3023 if (coll.is_pg(&coll_pgid) &&
3024 ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
3025 coll_pgid == pgid)) {
3026 filtered_colls.push_back(coll);
3027 }
3028 }
3029
3030 size_t done = 0, total = filtered_colls.size();
3031 for (auto const &coll : filtered_colls) {
3032 if (dry_run) {
3033 cerr << "Would apply layout settings to " << coll << std::endl;
3034 } else {
3035 cerr << "Finished " << done << "/" << total << " collections" << "\r";
1adf2230 3036 r = fs->apply_layout_settings(coll, target_level);
7c673cae
FG
3037 if (r < 0) {
3038 cerr << "Error applying layout settings to " << coll << std::endl;
3039 return r;
3040 }
3041 }
3042 ++done;
3043 }
3044
3045 cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
3046 return r;
3047}
3048
3049int main(int argc, char **argv)
3050{
3051 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3052 string target_data_path, fsid;
11fdf7f2 3053 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
7c673cae
FG
3054 boost::optional<std::string> nspace;
3055 spg_t pgid;
3056 unsigned epoch = 0;
3057 ghobject_t ghobj;
3058 bool human_readable;
7c673cae
FG
3059 Formatter *formatter;
3060 bool head;
3061
3062 po::options_description desc("Allowed options");
3063 desc.add_options()
3064 ("help", "produce help message")
3065 ("type", po::value<string>(&type),
11fdf7f2 3066 "Arg is one of [bluestore (default), filestore, memstore]")
7c673cae
FG
3067 ("data-path", po::value<string>(&dpath),
3068 "path to object store, mandatory")
3069 ("journal-path", po::value<string>(&jpath),
3070 "path to journal, use if tool can't find it")
3071 ("pgid", po::value<string>(&pgidstr),
11fdf7f2 3072 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
7c673cae
FG
3073 ("pool", po::value<string>(&pool),
3074 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3075 ("op", po::value<string>(&op),
11fdf7f2
TL
3076 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3077 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]")
7c673cae
FG
3078 ("epoch", po::value<unsigned>(&epoch),
3079 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3080 ("file", po::value<string>(&file),
3efd9988 3081 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
7c673cae
FG
3082 ("mon-store-path", po::value<string>(&mon_store_path),
3083 "path of monstore to update-mon-db")
3084 ("fsid", po::value<string>(&fsid),
3085 "fsid for new store created by mkfs")
3086 ("target-data-path", po::value<string>(&target_data_path),
3087 "path of target object store (for --op dup)")
3088 ("mountpoint", po::value<string>(&mountpoint),
3089 "fuse mountpoint")
3090 ("format", po::value<string>(&format)->default_value("json-pretty"),
3091 "Output format which may be json, json-pretty, xml, xml-pretty")
3092 ("debug", "Enable diagnostic output to stderr")
3093 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3094 ("skip-journal-replay", "Disable journal replay")
3095 ("skip-mount-omap", "Disable mounting of omap")
3096 ("head", "Find head/snapdir when searching for objects by name")
3097 ("dry-run", "Don't modify the objectstore")
3098 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
11fdf7f2 3099 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
7c673cae
FG
3100 ;
3101
3102 po::options_description positional("Positional options");
3103 positional.add_options()
3104 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3105 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
91327a77 3106 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
7c673cae 3107 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
7c673cae
FG
3108 ;
3109
b32b8144 3110 po::options_description all;
7c673cae
FG
3111 all.add(desc).add(positional);
3112
3113 po::positional_options_description pd;
3114 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3115
3116 vector<string> ceph_option_strings;
11fdf7f2 3117
7c673cae
FG
3118 po::variables_map vm;
3119 try {
3120 po::parsed_options parsed =
3121 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3122 po::store( parsed, vm);
3123 po::notify(vm);
3124 ceph_option_strings = po::collect_unrecognized(parsed.options,
3125 po::include_positional);
3126 } catch(po::error &e) {
3127 std::cerr << e.what() << std::endl;
3128 return 1;
3129 }
3130
3131 if (vm.count("help")) {
b32b8144 3132 usage(desc);
7c673cae
FG
3133 return 1;
3134 }
3135
11fdf7f2
TL
3136 // Compatibility with previous option name
3137 if (op == "dump-import")
3138 op = "dump-export";
3139
3efd9988 3140 debug = (vm.count("debug") > 0);
7c673cae 3141
3efd9988 3142 force = (vm.count("force") > 0);
7c673cae
FG
3143
3144 if (vm.count("namespace"))
3145 nspace = argnspace;
3146
3efd9988
FG
3147 dry_run = (vm.count("dry-run") > 0);
3148
7c673cae
FG
3149 osflagbits_t flags = 0;
3150 if (dry_run || vm.count("skip-journal-replay"))
3151 flags |= SKIP_JOURNAL_REPLAY;
3152 if (vm.count("skip-mount-omap"))
3153 flags |= SKIP_MOUNT_OMAP;
3154 if (op == "update-mon-db")
3155 flags |= SKIP_JOURNAL_REPLAY;
3efd9988 3156
7c673cae
FG
3157 head = (vm.count("head") > 0);
3158
11fdf7f2
TL
3159 // infer osd id so we can authenticate
3160 char fn[PATH_MAX];
3161 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3162 int fd = ::open(fn, O_RDONLY);
3163 if (fd >= 0) {
3164 bufferlist bl;
3165 bl.read_fd(fd, 64);
3166 string s(bl.c_str(), bl.length());
3167 int whoami = atoi(s.c_str());
3168 vector<string> tmp;
3169 // identify ourselves as this osd so we can auth and fetch our configs
3170 tmp.push_back("-n");
3171 tmp.push_back(string("osd.") + stringify(whoami));
3172 // populate osd_data so that the default keyring location works
3173 tmp.push_back("--osd-data");
3174 tmp.push_back(dpath);
3175 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3176 ceph_option_strings.end());
3177 tmp.swap(ceph_option_strings);
3178 }
3179
7c673cae 3180 vector<const char *> ceph_options;
7c673cae
FG
3181 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3182 for (vector<string>::iterator i = ceph_option_strings.begin();
3183 i != ceph_option_strings.end();
3184 ++i) {
3185 ceph_options.push_back(i->c_str());
3186 }
3187
7c673cae 3188 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
11fdf7f2 3189 fd = ::open(fn, O_RDONLY);
7c673cae
FG
3190 if (fd >= 0) {
3191 bufferlist bl;
3192 bl.read_fd(fd, 64);
3193 if (bl.length()) {
3194 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3195 if (vm.count("type") && dp_type != "" && type != dp_type)
3196 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3197 << dp_type << "\"" << std::endl;
3198 type = dp_type;
3199 //cout << "object store type is " << type << std::endl;
3200 }
3201 ::close(fd);
3202 }
11fdf7f2 3203
7c673cae 3204 if (!vm.count("type") && type == "") {
11fdf7f2 3205 type = "bluestore";
7c673cae
FG
3206 }
3207 if (!vm.count("data-path") &&
11fdf7f2 3208 op != "dump-export" &&
7c673cae
FG
3209 !(op == "dump-journal" && type == "filestore")) {
3210 cerr << "Must provide --data-path" << std::endl;
3211 usage(desc);
3212 return 1;
3213 }
3214 if (type == "filestore" && !vm.count("journal-path")) {
3215 jpath = dpath + "/journal";
3216 }
3217 if (!vm.count("op") && !vm.count("object")) {
3218 cerr << "Must provide --op or object command..." << std::endl;
3219 usage(desc);
3220 return 1;
3221 }
91327a77 3222 if (op != "list" && op != "apply-layout-settings" &&
7c673cae
FG
3223 vm.count("op") && vm.count("object")) {
3224 cerr << "Can't specify both --op and object command syntax" << std::endl;
3225 usage(desc);
3226 return 1;
3227 }
3228 if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
3229 cerr << "apply-layout-settings requires either --pool or --pgid"
3230 << std::endl;
3231 usage(desc);
3232 return 1;
3233 }
91327a77 3234 if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
7c673cae
FG
3235 cerr << "Invalid syntax, missing command" << std::endl;
3236 usage(desc);
3237 return 1;
3238 }
3239 if (op == "fuse" && mountpoint.length() == 0) {
3240 cerr << "Missing fuse mountpoint" << std::endl;
3241 usage(desc);
3242 return 1;
3243 }
3244 outistty = isatty(STDOUT_FILENO);
3245
3246 file_fd = fd_none;
3efd9988 3247 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
7c673cae
FG
3248 if (!vm.count("file") || file == "-") {
3249 if (outistty) {
3250 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3251 return 1;
3252 }
3253 file_fd = STDOUT_FILENO;
3254 } else {
3255 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3256 }
11fdf7f2 3257 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") {
7c673cae
FG
3258 if (!vm.count("file") || file == "-") {
3259 if (isatty(STDIN_FILENO)) {
3260 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3261 return 1;
3262 }
3263 file_fd = STDIN_FILENO;
3264 } else {
3265 file_fd = open(file.c_str(), O_RDONLY);
3266 }
3267 }
3268
3269 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3270
3271 if (vm.count("file") && file_fd == fd_none && !dry_run) {
11fdf7f2 3272 cerr << "--file option only applies to import, dump-export, export, export-remove, "
7c673cae
FG
3273 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3274 return 1;
3275 }
3276
3277 if (file_fd != fd_none && file_fd < 0) {
3278 string err = string("file: ") + file;
3279 perror(err.c_str());
3280 return 1;
3281 }
3282
3283 auto cct = global_init(
11fdf7f2
TL
3284 NULL, ceph_options,
3285 CEPH_ENTITY_TYPE_OSD,
3286 CODE_ENVIRONMENT_UTILITY_NODOUT,
3287 0);
7c673cae 3288 common_init_finish(g_ceph_context);
7c673cae 3289 if (debug) {
11fdf7f2
TL
3290 g_conf().set_val_or_die("log_to_stderr", "true");
3291 g_conf().set_val_or_die("err_to_stderr", "true");
7c673cae 3292 }
11fdf7f2 3293 g_conf().apply_changes(nullptr);
7c673cae
FG
3294
3295 // Special list handling. Treating pretty_format as human readable,
3296 // with one object per line and not an enclosing array.
3297 human_readable = ends_with(format, "-pretty");
3298 if ((op == "list" || op == "meta-list") && human_readable) {
3299 // Remove -pretty from end of format which we know is there
3300 format = format.substr(0, format.size() - strlen("-pretty"));
3301 }
3302
3303 formatter = Formatter::create(format);
3304 if (formatter == NULL) {
3305 cerr << "unrecognized format: " << format << std::endl;
3306 return 1;
3307 }
3308
3309 // Special handling for filestore journal, so we can dump it without mounting
3310 if (op == "dump-journal" && type == "filestore") {
11fdf7f2 3311 int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
7c673cae
FG
3312 if (ret < 0) {
3313 cerr << "journal-path: " << jpath << ": "
3314 << cpp_strerror(ret) << std::endl;
3315 return 1;
3316 }
3317 formatter->flush(cout);
3318 return 0;
3319 }
3320
11fdf7f2
TL
3321 if (op == "dump-export") {
3322 int ret = tool.dump_export(formatter);
b32b8144 3323 if (ret < 0) {
11fdf7f2 3324 cerr << "dump-export: "
b32b8144
FG
3325 << cpp_strerror(ret) << std::endl;
3326 return 1;
3327 }
3328 return 0;
3329 }
3330
7c673cae
FG
3331 //Verify that data-path really exists
3332 struct stat st;
3333 if (::stat(dpath.c_str(), &st) == -1) {
3334 string err = string("data-path: ") + dpath;
3335 perror(err.c_str());
3336 return 1;
3337 }
3338
3339 if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) {
3340 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3341 return 1;
3342 }
3343
3efd9988
FG
3344 //Verify that the journal-path really exists
3345 if (type == "filestore") {
3346 if (::stat(jpath.c_str(), &st) == -1) {
3347 string err = string("journal-path: ") + jpath;
3348 perror(err.c_str());
3349 return 1;
3350 }
3351 if (S_ISDIR(st.st_mode)) {
3352 cerr << "journal-path: " << jpath << ": "
3353 << cpp_strerror(EISDIR) << std::endl;
3354 return 1;
3355 }
3356 }
3357
7c673cae
FG
3358 ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3359 if (fs == NULL) {
3360 cerr << "Unable to create store of type " << type << std::endl;
3361 return 1;
3362 }
3363
3364 if (op == "fsck" || op == "fsck-deep") {
3365 int r = fs->fsck(op == "fsck-deep");
3366 if (r < 0) {
3367 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3368 return 1;
3369 }
3370 if (r > 0) {
3371 cerr << "fsck found " << r << " errors" << std::endl;
3372 return 1;
3373 }
3374 cout << "fsck found no errors" << std::endl;
3375 return 0;
3376 }
3efd9988
FG
3377 if (op == "repair" || op == "repair-deep") {
3378 int r = fs->repair(op == "repair-deep");
3379 if (r < 0) {
3380 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3381 return 1;
3382 }
3383 if (r > 0) {
3384 cerr << "repair found " << r << " errors" << std::endl;
3385 return 1;
3386 }
3387 cout << "repair found no errors" << std::endl;
3388 return 0;
3389 }
7c673cae
FG
3390 if (op == "mkfs") {
3391 if (fsid.length()) {
3392 uuid_d f;
3393 bool r = f.parse(fsid.c_str());
3394 if (!r) {
3395 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3396 return 1;
3397 }
3398 fs->set_fsid(f);
3399 }
3400 int r = fs->mkfs();
3401 if (r < 0) {
3efd9988 3402 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
7c673cae
FG
3403 return 1;
3404 }
3405 return 0;
3406 }
3407 if (op == "dup") {
3408 string target_type;
3409 char fn[PATH_MAX];
3410 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3411 int fd = ::open(fn, O_RDONLY);
3412 if (fd < 0) {
3413 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3414 exit(1);
3415 }
3416 bufferlist bl;
3417 bl.read_fd(fd, 64);
3418 if (bl.length()) {
3419 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3420 }
3421 ::close(fd);
3422 ObjectStore *targetfs = ObjectStore::create(
3423 g_ceph_context, target_type,
3424 target_data_path, "", 0);
3425 if (targetfs == NULL) {
3426 cerr << "Unable to open store of type " << target_type << std::endl;
3427 return 1;
3428 }
3429 int r = dup(dpath, fs, target_data_path, targetfs);
3430 if (r < 0) {
3431 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3432 return 1;
3433 }
3434 return 0;
3435 }
3436
7c673cae
FG
3437 int ret = fs->mount();
3438 if (ret < 0) {
3439 if (ret == -EBUSY) {
3440 cerr << "OSD has the store locked" << std::endl;
3441 } else {
3442 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3443 }
3444 return 1;
3445 }
3446
3447 if (op == "fuse") {
3448#ifdef HAVE_LIBFUSE
3449 FuseStore fuse(fs, mountpoint);
3450 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3451 int r = fuse.main();
3452 if (r < 0) {
3453 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3454 return 1;
3455 }
3456#else
3457 cerr << "fuse support not enabled" << std::endl;
3458#endif
3459 return 0;
3460 }
3461
3462 vector<coll_t> ls;
3463 vector<coll_t>::iterator it;
3464 CompatSet supported;
3465
3466#ifdef INTERNAL_TEST
3467 supported = get_test_compat_set();
3468#else
3469 supported = OSD::get_osd_compat_set();
3470#endif
3471
3472 bufferlist bl;
3473 OSDSuperblock superblock;
11fdf7f2
TL
3474 auto ch = fs->open_collection(coll_t::meta());
3475 bufferlist::const_iterator p;
3476 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
7c673cae
FG
3477 if (ret < 0) {
3478 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3479 goto out;
3480 }
3481
11fdf7f2
TL
3482 p = bl.cbegin();
3483 decode(superblock, p);
7c673cae
FG
3484
3485 if (debug) {
3486 cerr << "Cluster fsid=" << superblock.cluster_fsid << std::endl;
3487 }
3488
3489 if (debug) {
3490 cerr << "Supported features: " << supported << std::endl;
3491 cerr << "On-disk features: " << superblock.compat_features << std::endl;
3492 }
3493 if (supported.compare(superblock.compat_features) == -1) {
3494 CompatSet unsupported = supported.unsupported(superblock.compat_features);
3495 cerr << "On-disk OSD incompatible features set "
3496 << unsupported << std::endl;
3497 ret = -EINVAL;
3498 goto out;
3499 }
3500
3501 if (op == "apply-layout-settings") {
1adf2230 3502 int target_level = 0;
91327a77
AA
3503 // Single positional argument with apply-layout-settings
3504 // for target_level.
3505 if (vm.count("object") && isdigit(object[0])) {
3506 target_level = atoi(object.c_str());
3507 // This requires --arg1 to be specified since
3508 // this is the third positional argument and normally
3509 // used with object operations.
3510 } else if (vm.count("arg1") && isdigit(arg1[0])) {
1adf2230
AA
3511 target_level = atoi(arg1.c_str());
3512 }
3513 ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run, target_level);
7c673cae
FG
3514 goto out;
3515 }
3516
3517 if (op != "list" && vm.count("object")) {
3518 // Special case: Create pgmeta_oid if empty string specified
3519 // This can't conflict with any actual object names.
3520 if (object == "") {
3521 ghobj = pgid.make_pgmeta_oid();
3522 } else {
3523 json_spirit::Value v;
3524 try {
3525 if (!json_spirit::read(object, v) ||
3526 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3527 // Special: Need head/snapdir so set even if user didn't specify
3528 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3529 head = true;
3530 lookup_ghobject lookup(object, nspace, head);
3a9019d9
FG
3531 if (pgidstr.length())
3532 ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), lookup, debug);
3533 else
3534 ret = action_on_all_objects(fs, lookup, debug);
3535 if (ret) {
7c673cae
FG
3536 throw std::runtime_error("Internal error");
3537 } else {
3538 if (lookup.size() != 1) {
3539 stringstream ss;
3540 if (lookup.size() == 0)
3541 ss << "No object id '" << object << "' found or invalid JSON specified";
3542 else
3543 ss << "Found " << lookup.size() << " objects with id '" << object
3544 << "', please use a JSON spec from --op list instead";
3545 throw std::runtime_error(ss.str());
3546 }
3547 pair<coll_t, ghobject_t> found = lookup.pop();
3548 pgidstr = found.first.to_str();
3549 pgid.parse(pgidstr.c_str());
3550 ghobj = found.second;
3551 }
3552 } else {
3553 stringstream ss;
3554 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3555 ss << "Without --pgid the object '" << object
3556 << "' must be a JSON array";
3557 throw std::runtime_error(ss.str());
3558 }
3559 if (v.type() == json_spirit::array_type) {
3560 json_spirit::Array array = v.get_array();
3561 if (array.size() != 2) {
3562 ss << "Object '" << object
3563 << "' must be a JSON array with 2 elements";
3564 throw std::runtime_error(ss.str());
3565 }
3566 vector<json_spirit::Value>::iterator i = array.begin();
11fdf7f2 3567 ceph_assert(i != array.end());
7c673cae
FG
3568 if (i->type() != json_spirit::str_type) {
3569 ss << "Object '" << object
3570 << "' must be a JSON array with the first element a string";
3571 throw std::runtime_error(ss.str());
3572 }
3573 string object_pgidstr = i->get_str();
3574 if (object_pgidstr != "meta") {
3575 spg_t object_pgid;
3576 object_pgid.parse(object_pgidstr.c_str());
3577 if (pgidstr.length() > 0) {
3578 if (object_pgid != pgid) {
3579 ss << "object '" << object
3580 << "' has a pgid different from the --pgid="
3581 << pgidstr << " option";
3582 throw std::runtime_error(ss.str());
3583 }
3584 } else {
3585 pgidstr = object_pgidstr;
3586 pgid = object_pgid;
3587 }
3588 } else {
3589 pgidstr = object_pgidstr;
3590 }
3591 ++i;
3592 v = *i;
3593 }
3594 try {
3595 ghobj.decode(v);
3596 } catch (std::runtime_error& e) {
3597 ss << "Decode object JSON error: " << e.what();
3598 throw std::runtime_error(ss.str());
3599 }
3600 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3601 cerr << "Object pool and pgid pool don't match" << std::endl;
3602 ret = 1;
3603 goto out;
3604 }
3605 }
3606 } catch (std::runtime_error& e) {
3607 cerr << e.what() << std::endl;
3608 ret = 1;
3609 goto out;
3610 }
3611 }
3612 }
3613
3614 // The ops which require --pgid option are checked here and
3615 // mentioned in the usage for --pgid.
3616 if ((op == "info" || op == "log" || op == "remove" || op == "export"
11fdf7f2
TL
3617 || op == "export-remove" || op == "mark-complete"
3618 || op == "reset-last-complete"
3619 || op == "trim-pg-log") &&
7c673cae
FG
3620 pgidstr.length() == 0) {
3621 cerr << "Must provide pgid" << std::endl;
3622 usage(desc);
3623 ret = 1;
3624 goto out;
3625 }
3626
3627 if (op == "import") {
3628
3629 try {
11fdf7f2 3630 ret = tool.do_import(fs, superblock, force, pgidstr);
7c673cae
FG
3631 }
3632 catch (const buffer::error &e) {
3633 cerr << "do_import threw exception error " << e.what() << std::endl;
3634 ret = -EFAULT;
3635 }
3636 if (ret == -EFAULT) {
3637 cerr << "Corrupt input for import" << std::endl;
3638 }
3639 if (ret == 0)
3640 cout << "Import successful" << std::endl;
3641 goto out;
3642 } else if (op == "dump-journal-mount") {
3643 // Undocumented feature to dump journal with mounted fs
3644 // This doesn't support the format option, but it uses the
3645 // ObjectStore::dump_journal() and mounts to get replay to run.
3646 ret = fs->dump_journal(cout);
3647 if (ret) {
3648 if (ret == -EOPNOTSUPP) {
3649 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
3650 } else {
3651 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
3652 }
3653 }
3654 goto out;
3655 } else if (op == "get-osdmap") {
3656 bufferlist bl;
3657 OSDMap osdmap;
3658 if (epoch == 0) {
3659 epoch = superblock.current_epoch;
3660 }
3661 ret = get_osdmap(fs, epoch, osdmap, bl);
3662 if (ret) {
3663 cerr << "Failed to get osdmap#" << epoch << ": "
3664 << cpp_strerror(ret) << std::endl;
3665 goto out;
3666 }
3667 ret = bl.write_fd(file_fd);
3668 if (ret) {
3669 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3670 } else {
3671 cout << "osdmap#" << epoch << " exported." << std::endl;
3672 }
3673 goto out;
3674 } else if (op == "set-osdmap") {
3675 bufferlist bl;
3676 ret = get_fd_data(file_fd, bl);
3677 if (ret < 0) {
3678 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
3679 } else {
11fdf7f2 3680 ret = set_osdmap(fs, epoch, bl, force);
7c673cae
FG
3681 }
3682 goto out;
3683 } else if (op == "get-inc-osdmap") {
3684 bufferlist bl;
3685 if (epoch == 0) {
3686 epoch = superblock.current_epoch;
3687 }
3688 ret = get_inc_osdmap(fs, epoch, bl);
3689 if (ret < 0) {
3690 cerr << "Failed to get incremental osdmap# " << epoch << ": "
3691 << cpp_strerror(ret) << std::endl;
3692 goto out;
3693 }
3694 ret = bl.write_fd(file_fd);
3695 if (ret) {
3696 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3697 } else {
3698 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
3699 }
3700 goto out;
3701 } else if (op == "set-inc-osdmap") {
3702 bufferlist bl;
3703 ret = get_fd_data(file_fd, bl);
3704 if (ret < 0) {
3705 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
3706 goto out;
3707 } else {
11fdf7f2 3708 ret = set_inc_osdmap(fs, epoch, bl, force);
7c673cae
FG
3709 }
3710 goto out;
3711 } else if (op == "update-mon-db") {
3712 if (!vm.count("mon-store-path")) {
3713 cerr << "Please specify the path to monitor db to update" << std::endl;
3714 ret = -EINVAL;
3715 } else {
3716 ret = update_mon_db(*fs, superblock, dpath + "/keyring", mon_store_path);
3717 }
3718 goto out;
3719 }
3720
7c673cae 3721 if (op == "remove") {
3efd9988
FG
3722 if (!force && !dry_run) {
3723 cerr << "Please use export-remove or you must use --force option" << std::endl;
3724 ret = -EINVAL;
3725 goto out;
3726 }
11fdf7f2 3727 ret = initiate_new_remove_pg(fs, pgid);
7c673cae
FG
3728 if (ret < 0) {
3729 cerr << "PG '" << pgid << "' not found" << std::endl;
3730 goto out;
3731 }
3732 cout << "Remove successful" << std::endl;
3733 goto out;
3734 }
3735
3736 if (op == "fix-lost") {
3737 boost::scoped_ptr<action_on_object_t> action;
11fdf7f2 3738 action.reset(new do_fix_lost());
7c673cae
FG
3739 if (pgidstr.length())
3740 ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), *action, debug);
3741 else
3742 ret = action_on_all_objects(fs, *action, debug);
3743 goto out;
3744 }
3745
3746 if (op == "list") {
3747 ret = do_list(fs, pgidstr, object, nspace, formatter, debug,
3748 human_readable, head);
3749 if (ret < 0) {
3750 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3751 }
3752 goto out;
3753 }
3754
3755 if (op == "dump-super") {
3756 formatter->open_object_section("superblock");
3757 superblock.dump(formatter);
3758 formatter->close_section();
3759 formatter->flush(cout);
3760 cout << std::endl;
3761 goto out;
3762 }
3763
3764 if (op == "meta-list") {
3765 ret = do_meta(fs, object, formatter, debug, human_readable);
3766 if (ret < 0) {
3767 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
3768 }
3769 goto out;
3770 }
3771
3772 ret = fs->list_collections(ls);
3773 if (ret < 0) {
3774 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
3775 goto out;
3776 }
3777
3778 if (debug && op == "list-pgs")
3779 cout << "Performing list-pgs operation" << std::endl;
3780
3781 // Find pg
3782 for (it = ls.begin(); it != ls.end(); ++it) {
3783 spg_t tmppgid;
3784
3785 if (pgidstr == "meta") {
3786 if (it->to_str() == "meta")
3787 break;
3788 else
3789 continue;
3790 }
3791
3792 if (!it->is_pg(&tmppgid)) {
3793 continue;
3794 }
3795
3796 if (it->is_temp(&tmppgid)) {
3797 continue;
3798 }
3799
3800 if (op != "list-pgs" && tmppgid != pgid) {
3801 continue;
3802 }
3803
3804 if (op != "list-pgs") {
3805 //Found!
3806 break;
3807 }
3808
3809 cout << tmppgid << std::endl;
3810 }
3811
3812 if (op == "list-pgs") {
3813 ret = 0;
3814 goto out;
3815 }
3816
3817 // If not an object command nor any of the ops handled below, then output this usage
3818 // before complaining about a bad pgid
11fdf7f2
TL
3819 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
3820 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3821 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log)"
7c673cae
FG
3822 << std::endl;
3823 usage(desc);
3824 ret = 1;
3825 goto out;
3826 }
3827 epoch_t map_epoch;
3828// The following code for export, info, log require omap or !skip-mount-omap
3829 if (it != ls.end()) {
3830
3831 coll_t coll = *it;
3832
3833 if (vm.count("objcmd")) {
3834 ret = 0;
3835 if (objcmd == "remove" || objcmd == "removeall") {
3836 bool all = (objcmd == "removeall");
11fdf7f2
TL
3837 enum rmtype type = BOTH;
3838 if (rmtypestr == "nosnapmap")
3839 type = NOSNAPMAP;
3840 else if (rmtypestr == "snapmap")
3841 type = SNAPMAP;
3842 ret = do_remove_object(fs, coll, ghobj, all, force, type);
7c673cae
FG
3843 goto out;
3844 } else if (objcmd == "list-attrs") {
3845 ret = do_list_attrs(fs, coll, ghobj);
3846 goto out;
3847 } else if (objcmd == "list-omap") {
3848 ret = do_list_omap(fs, coll, ghobj);
3849 goto out;
3850 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
3851 if (objcmd == "get-bytes") {
3852 int fd;
3853 if (vm.count("arg1") == 0 || arg1 == "-") {
3854 fd = STDOUT_FILENO;
3855 } else {
3856 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
3857 if (fd == -1) {
3858 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
3859 ret = 1;
3860 goto out;
3861 }
3862 }
3863 ret = do_get_bytes(fs, coll, ghobj, fd);
3864 if (fd != STDOUT_FILENO)
3865 close(fd);
3866 } else {
3867 int fd;
3868 if (vm.count("arg1") == 0 || arg1 == "-") {
3869 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
3870 if (isatty(STDIN_FILENO)) {
3871 cerr << "stdin is a tty and no file specified" << std::endl;
3872 ret = 1;
3873 goto out;
3874 }
3875 fd = STDIN_FILENO;
3876 } else {
3877 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
3878 if (fd == -1) {
3879 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
3880 ret = 1;
3881 goto out;
3882 }
3883 }
11fdf7f2 3884 ret = do_set_bytes(fs, coll, ghobj, fd);
7c673cae
FG
3885 if (fd != STDIN_FILENO)
3886 close(fd);
3887 }
3888 goto out;
3889 } else if (objcmd == "get-attr") {
3890 if (vm.count("arg1") == 0) {
3891 usage(desc);
3892 ret = 1;
3893 goto out;
3894 }
3895 ret = do_get_attr(fs, coll, ghobj, arg1);
3896 goto out;
3897 } else if (objcmd == "set-attr") {
3898 if (vm.count("arg1") == 0) {
3899 usage(desc);
3900 ret = 1;
3901 }
3902
3903 int fd;
3904 if (vm.count("arg2") == 0 || arg2 == "-") {
3905 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
3906 if (isatty(STDIN_FILENO)) {
3907 cerr << "stdin is a tty and no file specified" << std::endl;
3908 ret = 1;
3909 goto out;
3910 }
3911 fd = STDIN_FILENO;
3912 } else {
3913 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
3914 if (fd == -1) {
3915 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
3916 ret = 1;
3917 goto out;
3918 }
3919 }
11fdf7f2 3920 ret = do_set_attr(fs, coll, ghobj, arg1, fd);
7c673cae
FG
3921 if (fd != STDIN_FILENO)
3922 close(fd);
3923 goto out;
3924 } else if (objcmd == "rm-attr") {
3925 if (vm.count("arg1") == 0) {
3926 usage(desc);
3927 ret = 1;
3928 goto out;
3929 }
11fdf7f2 3930 ret = do_rm_attr(fs, coll, ghobj, arg1);
7c673cae
FG
3931 goto out;
3932 } else if (objcmd == "get-omap") {
3933 if (vm.count("arg1") == 0) {
3934 usage(desc);
3935 ret = 1;
3936 goto out;
3937 }
3938 ret = do_get_omap(fs, coll, ghobj, arg1);
3939 goto out;
3940 } else if (objcmd == "set-omap") {
3941 if (vm.count("arg1") == 0) {
3942 usage(desc);
3943 ret = 1;
3944 goto out;
3945 }
3946 int fd;
3947 if (vm.count("arg2") == 0 || arg2 == "-") {
3948 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
3949 if (isatty(STDIN_FILENO)) {
3950 cerr << "stdin is a tty and no file specified" << std::endl;
3951 ret = 1;
3952 goto out;
3953 }
3954 fd = STDIN_FILENO;
3955 } else {
3956 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
3957 if (fd == -1) {
3958 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
3959 ret = 1;
3960 goto out;
3961 }
3962 }
11fdf7f2 3963 ret = do_set_omap(fs, coll, ghobj, arg1, fd);
7c673cae
FG
3964 if (fd != STDIN_FILENO)
3965 close(fd);
3966 goto out;
3967 } else if (objcmd == "rm-omap") {
3968 if (vm.count("arg1") == 0) {
3969 usage(desc);
3970 ret = 1;
3971 goto out;
3972 }
11fdf7f2 3973 ret = do_rm_omap(fs, coll, ghobj, arg1);
7c673cae
FG
3974 goto out;
3975 } else if (objcmd == "get-omaphdr") {
3976 if (vm.count("arg1")) {
3977 usage(desc);
3978 ret = 1;
3979 goto out;
3980 }
3981 ret = do_get_omaphdr(fs, coll, ghobj);
3982 goto out;
3983 } else if (objcmd == "set-omaphdr") {
3984 // Extra arg
3985 if (vm.count("arg2")) {
3986 usage(desc);
3987 ret = 1;
3988 goto out;
3989 }
3990 int fd;
3991 if (vm.count("arg1") == 0 || arg1 == "-") {
3992 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
3993 if (isatty(STDIN_FILENO)) {
3994 cerr << "stdin is a tty and no file specified" << std::endl;
3995 ret = 1;
3996 goto out;
3997 }
3998 fd = STDIN_FILENO;
3999 } else {
4000 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4001 if (fd == -1) {
4002 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4003 ret = 1;
4004 goto out;
4005 }
4006 }
11fdf7f2 4007 ret = do_set_omaphdr(fs, coll, ghobj, fd);
7c673cae
FG
4008 if (fd != STDIN_FILENO)
4009 close(fd);
4010 goto out;
4011 } else if (objcmd == "dump") {
4012 // There should not be any other arguments
4013 if (vm.count("arg1") || vm.count("arg2")) {
4014 usage(desc);
4015 ret = 1;
4016 goto out;
4017 }
4018 ret = print_obj_info(fs, coll, ghobj, formatter);
4019 goto out;
1adf2230
AA
4020 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4021 // There should not be any other arguments
4022 if (vm.count("arg1") || vm.count("arg2")) {
4023 usage(desc);
4024 ret = 1;
4025 goto out;
4026 }
11fdf7f2 4027 ret = corrupt_info(fs, coll, ghobj, formatter);
1adf2230 4028 goto out;
b5b8bbf5
FG
4029 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4030 // Undocumented testing feature
4031 bool corrupt = (objcmd == "corrupt-size");
7c673cae
FG
4032 // Extra arg
4033 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4034 usage(desc);
4035 ret = 1;
4036 goto out;
4037 }
4038 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4039 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4040 ret = 1;
4041 goto out;
4042 }
4043 uint64_t size = atoll(arg1.c_str());
11fdf7f2 4044 ret = set_size(fs, coll, ghobj, size, formatter, corrupt);
7c673cae 4045 goto out;
11fdf7f2
TL
4046 } else if (objcmd == "clear-data-digest") {
4047 ret = clear_data_digest(fs, coll, ghobj);
4048 goto out;
7c673cae
FG
4049 } else if (objcmd == "clear-snapset") {
4050 // UNDOCUMENTED: For testing zap SnapSet
4051 // IGNORE extra args since not in usage anyway
4052 if (!ghobj.hobj.has_snapset()) {
4053 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4054 ret = 1;
4055 goto out;
4056 }
11fdf7f2 4057 ret = clear_snapset(fs, coll, ghobj, arg1);
7c673cae
FG
4058 goto out;
4059 } else if (objcmd == "remove-clone-metadata") {
4060 // Extra arg
4061 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4062 usage(desc);
4063 ret = 1;
4064 goto out;
4065 }
4066 if (!ghobj.hobj.has_snapset()) {
4067 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4068 ret = 1;
4069 goto out;
4070 }
4071 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4072 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4073 ret = 1;
4074 goto out;
4075 }
4076 snapid_t cloneid = atoi(arg1.c_str());
11fdf7f2 4077 ret = remove_clone(fs, coll, ghobj, cloneid, force);
7c673cae
FG
4078 goto out;
4079 }
4080 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4081 usage(desc);
4082 ret = 1;
4083 goto out;
4084 }
4085
7c673cae 4086 map_epoch = 0;
11fdf7f2 4087 ret = PG::peek_map_epoch(fs, pgid, &map_epoch);
7c673cae
FG
4088 if (ret < 0)
4089 cerr << "peek_map_epoch reports error" << std::endl;
4090 if (debug)
4091 cerr << "map_epoch " << map_epoch << std::endl;
4092
4093 pg_info_t info(pgid);
4094 PastIntervals past_intervals;
4095 __u8 struct_ver;
11fdf7f2 4096 ret = PG::read_info(fs, pgid, coll, info, past_intervals, struct_ver);
7c673cae
FG
4097 if (ret < 0) {
4098 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4099 goto out;
4100 }
11fdf7f2 4101 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae
FG
4102 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4103 ret = -EFAULT;
4104 goto out;
4105 }
4106 if (debug)
4107 cerr << "struct_v " << (int)struct_ver << std::endl;
4108
3efd9988 4109 if (op == "export" || op == "export-remove") {
7c673cae 4110 ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals);
3efd9988 4111 if (ret == 0) {
7c673cae 4112 cerr << "Export successful" << std::endl;
3efd9988 4113 if (op == "export-remove") {
11fdf7f2 4114 ret = initiate_new_remove_pg(fs, pgid);
3efd9988 4115 // Export succeeded, so pgid is there
11fdf7f2 4116 ceph_assert(ret == 0);
3efd9988
FG
4117 cerr << "Remove successful" << std::endl;
4118 }
4119 }
7c673cae
FG
4120 } else if (op == "info") {
4121 formatter->open_object_section("info");
4122 info.dump(formatter);
4123 formatter->close_section();
4124 formatter->flush(cout);
4125 cout << std::endl;
4126 } else if (op == "log") {
4127 PGLog::IndexedLog log;
4128 pg_missing_t missing;
11fdf7f2 4129 ret = get_log(fs, struct_ver, pgid, info, log, missing);
7c673cae
FG
4130 if (ret < 0)
4131 goto out;
4132
4133 dump_log(formatter, cout, log, missing);
7c673cae
FG
4134 } else if (op == "mark-complete") {
4135 ObjectStore::Transaction tran;
4136 ObjectStore::Transaction *t = &tran;
4137
11fdf7f2 4138 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae 4139 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
11fdf7f2 4140 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
7c673cae
FG
4141 << std::endl;
4142 ret = 1;
4143 goto out;
4144 }
4145
4146 cout << "Marking complete " << std::endl;
4147
4148 info.last_update = eversion_t(superblock.current_epoch, info.last_update.version + 1);
4149 info.last_backfill = hobject_t::get_max();
4150 info.last_epoch_started = superblock.current_epoch;
4151 info.history.last_epoch_started = superblock.current_epoch;
4152 info.history.last_epoch_clean = superblock.current_epoch;
4153 past_intervals.clear();
4154
4155 if (!dry_run) {
4156 ret = write_info(*t, map_epoch, info, past_intervals);
4157 if (ret != 0)
4158 goto out;
11fdf7f2
TL
4159 auto ch = fs->open_collection(coll_t(pgid));
4160 fs->queue_transaction(ch, std::move(*t));
7c673cae
FG
4161 }
4162 cout << "Marking complete succeeded" << std::endl;
94b18763 4163 } else if (op == "trim-pg-log") {
11fdf7f2 4164 ret = do_trim_pg_log(fs, coll, info, pgid,
94b18763
FG
4165 map_epoch, past_intervals);
4166 if (ret < 0) {
4167 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4168 goto out;
4169 }
4170 cout << "Finished trimming pg log" << std::endl;
4171 goto out;
11fdf7f2
TL
4172 } else if (op == "reset-last-complete") {
4173 if (!force) {
4174 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4175 << "certain to lead to permanent data loss unless you know exactly "
4176 << "what you are doing. Pass --force to proceed anyway."
4177 << std::endl;
4178 ret = -EINVAL;
4179 goto out;
4180 }
4181 ObjectStore::Transaction tran;
4182 ObjectStore::Transaction *t = &tran;
4183
4184 if (struct_ver < PG::get_compat_struct_v()) {
4185 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4186 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4187 << std::endl;
4188 ret = 1;
4189 goto out;
4190 }
4191
4192 cout << "Reseting last_complete " << std::endl;
4193
4194 info.last_complete = info.last_update;
4195
4196 if (!dry_run) {
4197 ret = write_info(*t, map_epoch, info, past_intervals);
4198 if (ret != 0)
4199 goto out;
4200 fs->queue_transaction(ch, std::move(*t));
4201 }
4202 cout << "Reseting last_complete succeeded" << std::endl;
4203
7c673cae 4204 } else {
11fdf7f2 4205 ceph_assert(!"Should have already checked for valid --op");
7c673cae
FG
4206 }
4207 } else {
4208 cerr << "PG '" << pgid << "' not found" << std::endl;
4209 ret = -ENOENT;
4210 }
4211
4212out:
4213 int r = fs->umount();
7c673cae
FG
4214 if (r < 0) {
4215 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4216 // If no previous error, then use umount() error
4217 if (ret == 0)
4218 ret = r;
4219 }
4220
4221 if (dry_run) {
4222 // Export output can go to stdout, so put this message on stderr
4223 if (op == "export")
4224 cerr << "dry-run: Nothing changed" << std::endl;
4225 else
4226 cout << "dry-run: Nothing changed" << std::endl;
4227 }
4228
4229 if (ret < 0)
4230 ret = 1;
4231 return ret;
4232}