]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/ceph_objectstore_tool.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <boost/program_options/variables_map.hpp>
16#include <boost/program_options/parsers.hpp>
2a845540 17#include <boost/algorithm/string.hpp>
7c673cae
FG
18#include <boost/scoped_ptr.hpp>
19#include <boost/optional.hpp>
2a845540 20#include <fstream>
7c673cae
FG
21
22#include <stdlib.h>
23
24#include "common/Formatter.h"
25#include "common/errno.h"
26#include "common/ceph_argparse.h"
9f95a23c 27#include "common/url_escape.h"
7c673cae
FG
28
29#include "global/global_init.h"
30
31#include "os/ObjectStore.h"
7c673cae
FG
32#ifdef HAVE_LIBFUSE
33#include "os/FuseStore.h"
34#endif
35
36#include "osd/PGLog.h"
37#include "osd/OSD.h"
38#include "osd/PG.h"
a8e16298 39#include "osd/ECUtil.h"
7c673cae
FG
40
41#include "json_spirit/json_spirit_value.h"
42#include "json_spirit/json_spirit_reader.h"
43
44#include "rebuild_mondb.h"
45#include "ceph_objectstore_tool.h"
46#include "include/compat.h"
47#include "include/util.h"
48
20effc67 49using namespace std;
7c673cae 50namespace po = boost::program_options;
7c673cae
FG
51
52#ifdef INTERNAL_TEST
53CompatSet get_test_compat_set() {
54 CompatSet::FeatureSet ceph_osd_feature_compat;
55 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
56 CompatSet::FeatureSet ceph_osd_feature_incompat;
57 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
58 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
64 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
65 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
66#ifdef INTERNAL_TEST2
67 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
68 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
69#endif
70 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
71 ceph_osd_feature_incompat);
72}
73#endif
74
75const ssize_t max_read = 1024 * 1024;
76const int fd_none = INT_MIN;
77bool outistty;
3efd9988 78bool dry_run;
7c673cae
FG
79
80struct action_on_object_t {
81 virtual ~action_on_object_t() {}
11fdf7f2 82 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
7c673cae
FG
83};
84
85int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
86{
11fdf7f2 87 auto ch = store->open_collection(coll);
20effc67 88
7c673cae
FG
89 unsigned LIST_AT_A_TIME = 100;
90 ghobject_t next;
91 while (!next.is_max()) {
92 vector<ghobject_t> list;
11fdf7f2 93 int r = store->collection_list(ch,
7c673cae
FG
94 next,
95 ghobject_t::get_max(),
96 LIST_AT_A_TIME,
97 &list,
98 &next);
99 if (r < 0) {
100 cerr << "Error listing collection: " << coll << ", "
101 << cpp_strerror(r) << std::endl;
102 return r;
103 }
104 for (vector<ghobject_t>::iterator obj = list.begin();
105 obj != list.end();
106 ++obj) {
7c673cae
FG
107 object_info_t oi;
108 if (coll != coll_t::meta()) {
109 bufferlist attr;
11fdf7f2 110 r = store->getattr(ch, *obj, OI_ATTR, attr);
7c673cae
FG
111 if (r < 0) {
112 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
113 << cpp_strerror(r) << std::endl;
11fdf7f2
TL
114 } else {
115 auto bp = attr.cbegin();
116 try {
117 decode(oi, bp);
118 } catch (...) {
119 r = -EINVAL;
120 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
121 << cpp_strerror(r) << std::endl;
122 }
123 }
7c673cae 124 }
11fdf7f2 125 action.call(store, coll, *obj, oi);
7c673cae
FG
126 }
127 }
128 return 0;
129}
130
131int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
132{
133 spg_t pgid;
134 // Scan collections in case this is an ec pool but no shard specified
135 unsigned scanned = 0;
136 int r = 0;
137 vector<coll_t> colls_to_check;
138 vector<coll_t> candidates;
20effc67 139
7c673cae
FG
140 r = store->list_collections(candidates);
141 if (r < 0) {
142 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
143 return r;
144 }
145 pgid.parse(pgidstr.c_str());
146 for (vector<coll_t>::iterator i = candidates.begin();
147 i != candidates.end();
148 ++i) {
149 spg_t cand_pgid;
20effc67
TL
150 if (i->is_meta() && pgidstr == "meta") {
151 colls_to_check.push_back(*i);
152 continue;
153 }
7c673cae
FG
154 if (!i->is_pg(&cand_pgid))
155 continue;
156
157 // If an exact match or treat no shard as any shard
158 if (cand_pgid == pgid ||
159 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
160 colls_to_check.push_back(*i);
161 }
162 }
163
164 if (debug)
165 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
166 for (vector<coll_t>::iterator i = colls_to_check.begin();
167 i != colls_to_check.end();
168 ++i, ++scanned) {
169 if (debug)
170 cerr << "Scanning " << *i << ", " << scanned << "/"
171 << colls_to_check.size() << " completed" << std::endl;
172 r = _action_on_all_objects_in_pg(store, *i, action, debug);
173 if (r < 0)
174 break;
175 }
176 return r;
177}
178
179int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
180{
181 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
182 return r;
183}
184
185int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
186{
187 unsigned scanned = 0;
188 int r = 0;
189 vector<coll_t> colls_to_check;
190 vector<coll_t> candidates;
191 r = store->list_collections(candidates);
192 if (r < 0) {
193 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
194 return r;
195 }
196 for (vector<coll_t>::iterator i = candidates.begin();
197 i != candidates.end();
198 ++i) {
199 if (i->is_pg()) {
200 colls_to_check.push_back(*i);
201 }
202 }
203
204 if (debug)
205 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
206 for (vector<coll_t>::iterator i = colls_to_check.begin();
207 i != colls_to_check.end();
208 ++i, ++scanned) {
209 if (debug)
210 cerr << "Scanning " << *i << ", " << scanned << "/"
211 << colls_to_check.size() << " completed" << std::endl;
212 r = _action_on_all_objects_in_pg(store, *i, action, debug);
213 if (r < 0)
214 return r;
215 }
216 return 0;
217}
218
219int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
220{
221 int r = _action_on_all_objects(store, action, debug);
222 return r;
223}
224
225struct pgid_object_list {
226 list<pair<coll_t, ghobject_t> > _objects;
227
228 void insert(coll_t coll, ghobject_t &ghobj) {
229 _objects.push_back(make_pair(coll, ghobj));
230 }
231
232 void dump(Formatter *f, bool human_readable) const {
233 if (!human_readable)
234 f->open_array_section("pgid_objects");
235 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
236 i != _objects.end();
237 ++i) {
238 f->open_array_section("pgid_object");
239 spg_t pgid;
240 bool is_pg = i->first.is_pg(&pgid);
241 if (is_pg)
242 f->dump_string("pgid", stringify(pgid));
243 if (!is_pg || !human_readable)
244 f->dump_string("coll", i->first.to_str());
245 f->open_object_section("ghobject");
246 i->second.dump(f);
247 f->close_section();
248 f->close_section();
249 if (human_readable) {
250 f->flush(cout);
251 cout << std::endl;
252 }
253 }
254 if (!human_readable) {
255 f->close_section();
256 f->flush(cout);
257 cout << std::endl;
258 }
259 }
260};
261
262struct lookup_ghobject : public action_on_object_t {
263 pgid_object_list _objects;
264 const string _name;
265 const boost::optional<std::string> _namespace;
266 bool _need_snapset;
267
268 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
269 _namespace(nspace), _need_snapset(need_snapset) { }
270
11fdf7f2 271 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
7c673cae 272 if (_need_snapset && !ghobj.hobj.has_snapset())
11fdf7f2 273 return;
7c673cae
FG
274 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
275 (!_namespace || ghobj.hobj.nspace == _namespace))
276 _objects.insert(coll, ghobj);
11fdf7f2 277 return;
7c673cae
FG
278 }
279
280 int size() const {
281 return _objects._objects.size();
282 }
283
284 pair<coll_t, ghobject_t> pop() {
285 pair<coll_t, ghobject_t> front = _objects._objects.front();
286 _objects._objects.pop_front();
287 return front;
288 }
289
290 void dump(Formatter *f, bool human_readable) const {
291 _objects.dump(f, human_readable);
292 }
293};
294
9f95a23c
TL
295struct lookup_slow_ghobject : public action_on_object_t {
296 list<tuple<
297 coll_t,
298 ghobject_t,
299 ceph::signedspan,
300 ceph::signedspan,
301 ceph::signedspan,
302 string> > _objects;
303 const string _name;
304 double threshold;
305
306 coll_t last_coll;
307
308 lookup_slow_ghobject(const string& name, double _threshold) :
309 _name(name), threshold(_threshold) { }
310
311 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
312 ObjectMap::ObjectMapIterator iter;
313 auto start1 = mono_clock::now();
314 ceph::signedspan first_seek_time = start1 - start1;
315 ceph::signedspan last_seek_time = first_seek_time;
316 ceph::signedspan total_time = first_seek_time;
317 {
318 auto ch = store->open_collection(coll);
319 iter = store->get_omap_iterator(ch, ghobj);
320 if (!iter) {
321 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
322 << " obj:" << ghobj
323 << std::endl;
324 return;
325 }
326 auto start = mono_clock::now();
327 iter->seek_to_first();
328 first_seek_time = mono_clock::now() - start;
329
330 while(iter->valid()) {
331 start = mono_clock::now();
332 iter->next();
333 last_seek_time = mono_clock::now() - start;
334 }
335 }
336
337 if (coll != last_coll) {
338 cerr << ">>> inspecting coll" << coll << std::endl;
339 last_coll = coll;
340 }
341
342 total_time = mono_clock::now() - start1;
343 if ( total_time >= make_timespan(threshold)) {
344 _objects.emplace_back(coll, ghobj,
345 first_seek_time, last_seek_time, total_time,
346 url_escape(iter->tail_key()));
347 cerr << ">>>>> found obj " << ghobj
348 << " first_seek_time "
349 << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
350 << " last_seek_time "
351 << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
352 << " total_time "
353 << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
354 << " tail key: " << url_escape(iter->tail_key())
355 << std::endl;
356 }
357 return;
358 }
359
360 int size() const {
361 return _objects.size();
362 }
363
364 void dump(Formatter *f, bool human_readable) const {
365 if (!human_readable)
366 f->open_array_section("objects");
367 for (auto i = _objects.begin();
368 i != _objects.end();
369 ++i) {
370 f->open_array_section("object");
371 coll_t coll;
372 ghobject_t ghobj;
373 ceph::signedspan first_seek_time;
374 ceph::signedspan last_seek_time;
375 ceph::signedspan total_time;
376 string tail_key;
377 std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
378
379 spg_t pgid;
380 bool is_pg = coll.is_pg(&pgid);
381 if (is_pg)
382 f->dump_string("pgid", stringify(pgid));
383 if (!is_pg || !human_readable)
384 f->dump_string("coll", coll.to_str());
385 f->dump_object("ghobject", ghobj);
386 f->open_object_section("times");
387 f->dump_int("first_seek_time",
388 std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
389 f->dump_int("last_seek_time",
390 std::chrono::duration_cast<std::chrono::seconds>
391 (last_seek_time).count());
392 f->dump_int("total_time",
393 std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
394 f->dump_string("tail_key", tail_key);
395 f->close_section();
396
397 f->close_section();
398 if (human_readable) {
399 f->flush(cout);
400 cout << std::endl;
401 }
402 }
403 if (!human_readable) {
404 f->close_section();
405 f->flush(cout);
406 cout << std::endl;
407 }
408 }
409};
410
7c673cae 411int file_fd = fd_none;
3efd9988 412bool debug;
11fdf7f2 413bool force = false;
9f95a23c
TL
414bool no_superblock = false;
415
7c673cae 416super_header sh;
7c673cae
FG
417
418static int get_fd_data(int fd, bufferlist &bl)
419{
420 uint64_t total = 0;
421 do {
422 ssize_t bytes = bl.read_fd(fd, max_read);
423 if (bytes < 0) {
424 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
425 return bytes;
426 }
427
428 if (bytes == 0)
429 break;
430
431 total += bytes;
432 } while(true);
433
11fdf7f2 434 ceph_assert(bl.length() == total);
7c673cae
FG
435 return 0;
436}
437
2a845540 438int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver,
11fdf7f2 439 spg_t pgid, const pg_info_t &info,
7c673cae
FG
440 PGLog::IndexedLog &log, pg_missing_t &missing)
441{
442 try {
11fdf7f2
TL
443 auto ch = fs->open_collection(coll_t(pgid));
444 if (!ch) {
445 return -ENOENT;
446 }
7c673cae 447 ostringstream oss;
11fdf7f2
TL
448 ceph_assert(struct_ver > 0);
449 PGLog::read_log_and_missing(
2a845540 450 cct, fs, ch,
11fdf7f2
TL
451 pgid.make_pgmeta_oid(),
452 info, log, missing,
453 oss,
454 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
7c673cae
FG
455 if (debug && oss.str().size())
456 cerr << oss.str() << std::endl;
457 }
458 catch (const buffer::error &e) {
459 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
460 return -EFAULT;
461 }
462 return 0;
463}
464
465void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
466 pg_missing_t &missing)
467{
468 formatter->open_object_section("op_log");
469 formatter->open_object_section("pg_log_t");
470 log.dump(formatter);
471 formatter->close_section();
472 formatter->flush(out);
473 formatter->open_object_section("pg_missing_t");
474 missing.dump(formatter);
475 formatter->close_section();
7c673cae
FG
476 formatter->close_section();
477 formatter->flush(out);
478}
479
480//Based on part of OSD::load_pgs()
481int finish_remove_pgs(ObjectStore *store)
482{
483 vector<coll_t> ls;
484 int r = store->list_collections(ls);
485 if (r < 0) {
486 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
487 << std::endl;
488 return r;
489 }
490
491 for (vector<coll_t>::iterator it = ls.begin();
492 it != ls.end();
493 ++it) {
494 spg_t pgid;
495
496 if (it->is_temp(&pgid) ||
497 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
498 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
499 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
500 continue;
501 }
502
503 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
504 }
505 return 0;
506}
507
508#pragma GCC diagnostic ignored "-Wpragmas"
509#pragma GCC diagnostic push
510#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
511
512int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
513{
514 pg_info_t info(pgid);
515 coll_t coll(pgid);
516 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
517
7c673cae 518 epoch_t map_epoch = 0;
11fdf7f2 519 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
7c673cae
FG
520 if (r < 0)
521 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
522 PastIntervals past_intervals;
523 __u8 struct_v;
11fdf7f2 524 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
7c673cae
FG
525 if (r < 0) {
526 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
527 return r;
528 }
11fdf7f2 529 ceph_assert(struct_v >= 8);
7c673cae
FG
530 // new omap key
531 cout << "setting '_remove' omap key" << std::endl;
532 map<string,bufferlist> values;
11fdf7f2 533 encode((char)1, values["_remove"]);
7c673cae
FG
534 t->omap_setkeys(coll, pgmeta_oid, values);
535 return 0;
536}
537
538#pragma GCC diagnostic pop
539#pragma GCC diagnostic warning "-Wpragmas"
540
11fdf7f2
TL
541template<typename Func>
542void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
543{
544 bool finished = false;
545 std::condition_variable cond;
546 std::mutex m;
9f95a23c 547 txn->register_on_complete(make_lambda_context([&](int) {
11fdf7f2
TL
548 std::unique_lock lock{m};
549 finished = true;
550 cond.notify_one();
551 }));
552 std::move(func)();
553 std::unique_lock lock{m};
554 cond.wait(lock, [&] {return finished;});
555}
556
557int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
7c673cae
FG
558{
559 if (!dry_run)
560 finish_remove_pgs(store);
561 if (!store->collection_exists(coll_t(r_pgid)))
562 return -ENOENT;
563
564 cout << " marking collection for removal" << std::endl;
565 if (dry_run)
566 return 0;
567 ObjectStore::Transaction rmt;
568 int r = mark_pg_for_removal(store, r_pgid, &rmt);
569 if (r < 0) {
570 return r;
571 }
11fdf7f2
TL
572 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
573 store->queue_transaction(ch, std::move(rmt));
7c673cae
FG
574 finish_remove_pgs(store);
575 return r;
576}
577
578int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
579 PastIntervals &past_intervals)
580{
581 //Empty for this
582 coll_t coll(info.pgid);
583 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
584 map<string,bufferlist> km;
9f95a23c 585 string key_to_remove;
7c673cae 586 pg_info_t last_written_info;
9f95a23c 587 int ret = prepare_info_keymap(
7c673cae 588 g_ceph_context,
9f95a23c
TL
589 &km, &key_to_remove,
590 epoch,
7c673cae
FG
591 info,
592 last_written_info,
593 past_intervals,
594 true, true, false);
595 if (ret) cerr << "Failed to write info" << std::endl;
596 t.omap_setkeys(coll, pgmeta_oid, km);
9f95a23c
TL
597 if (!key_to_remove.empty()) {
598 t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
599 }
7c673cae
FG
600 return ret;
601}
602
603typedef map<eversion_t, hobject_t> divergent_priors_t;
604
605int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
606 pg_log_t &log, PastIntervals &past_intervals,
607 divergent_priors_t &divergent,
608 pg_missing_t &missing)
609{
11fdf7f2 610 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
7c673cae
FG
611 int ret = write_info(t, epoch, info, past_intervals);
612 if (ret)
613 return ret;
20effc67 614
7c673cae
FG
615 coll_t coll(info.pgid);
616 map<string,bufferlist> km;
20effc67 617 const bool require_rollback = !info.pgid.is_no_shard();
7c673cae 618 if (!divergent.empty()) {
11fdf7f2 619 ceph_assert(missing.get_items().empty());
7c673cae 620 PGLog::write_log_and_missing_wo_missing(
20effc67
TL
621 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent,
622 require_rollback);
7c673cae
FG
623 } else {
624 pg_missing_tracker_t tmissing(missing);
c07f9fc5 625 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
7c673cae 626 PGLog::write_log_and_missing(
20effc67
TL
627 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing,
628 require_rollback,
c07f9fc5 629 &rebuilt_missing_set_with_deletes);
7c673cae
FG
630 }
631 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
632 return 0;
633}
634
94b18763
FG
635int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
636 pg_info_t &info, const spg_t &pgid,
11fdf7f2 637 epoch_t map_epoch,
94b18763
FG
638 PastIntervals &past_intervals)
639{
640 ghobject_t oid = pgid.make_pgmeta_oid();
641 struct stat st;
11fdf7f2
TL
642 auto ch = store->open_collection(coll);
643 int r = store->stat(ch, oid, &st);
644 ceph_assert(r == 0);
645 ceph_assert(st.st_size == 0);
94b18763
FG
646
647 cerr << "Log bounds are: " << "(" << info.log_tail << ","
648 << info.last_update << "]" << std::endl;
649
650 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
651 if (info.last_update.version - info.log_tail.version <= max_entries) {
652 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
653 return 0;
654 }
655
11fdf7f2 656 ceph_assert(info.last_update.version > max_entries);
94b18763
FG
657 version_t trim_to = info.last_update.version - max_entries;
658 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
659 eversion_t new_tail;
660 bool done = false;
661
662 while (!done) {
663 // gather keys so we can delete them in a batch without
664 // affecting the iterator
665 set<string> keys_to_trim;
666 {
11fdf7f2 667 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
94b18763
FG
668 if (!p)
669 break;
11fdf7f2 670 for (p->seek_to_first(); p->valid(); p->next()) {
94b18763
FG
671 if (p->key()[0] == '_')
672 continue;
673 if (p->key() == "can_rollback_to")
674 continue;
675 if (p->key() == "divergent_priors")
676 continue;
677 if (p->key() == "rollback_info_trimmed_to")
678 continue;
679 if (p->key() == "may_include_deletes_in_missing")
680 continue;
681 if (p->key().substr(0, 7) == string("missing"))
682 continue;
683 if (p->key().substr(0, 4) == string("dup_"))
684 continue;
685
686 bufferlist bl = p->value();
11fdf7f2 687 auto bp = bl.cbegin();
94b18763
FG
688 pg_log_entry_t e;
689 try {
690 e.decode_with_checksum(bp);
691 } catch (const buffer::error &e) {
f67539c2 692 cerr << "Error reading pg log entry: " << e.what() << std::endl;
94b18763
FG
693 }
694 if (debug) {
695 cerr << "read entry " << e << std::endl;
696 }
697 if (e.version.version > trim_to) {
698 done = true;
699 break;
700 }
701 keys_to_trim.insert(p->key());
702 new_tail = e.version;
703 if (keys_to_trim.size() >= trim_at_once)
704 break;
705 }
706
707 if (!p->valid())
708 done = true;
709 } // deconstruct ObjectMapIterator
710
711 // delete the keys
712 if (!dry_run && !keys_to_trim.empty()) {
713 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
714 ObjectStore::Transaction t;
715 t.omap_rmkeys(coll, oid, keys_to_trim);
11fdf7f2
TL
716 store->queue_transaction(ch, std::move(t));
717 ch->flush();
94b18763
FG
718 }
719 }
720
721 // update pg info with new tail
722 if (!dry_run && new_tail != eversion_t()) {
723 info.log_tail = new_tail;
724 ObjectStore::Transaction t;
725 int ret = write_info(t, map_epoch, info, past_intervals);
726 if (ret)
727 return ret;
11fdf7f2
TL
728 store->queue_transaction(ch, std::move(t));
729 ch->flush();
94b18763
FG
730 }
731
732 // compact the db since we just removed a bunch of data
733 cerr << "Finished trimming, now compacting..." << std::endl;
734 if (!dry_run)
735 store->compact();
736 return 0;
737}
738
33c7a0ef
TL
739int do_trim_pg_log_dups(ObjectStore *store, const coll_t &coll,
740 pg_info_t &info, const spg_t &pgid,
741 epoch_t map_epoch,
742 PastIntervals &past_intervals)
743{
744 ghobject_t oid = pgid.make_pgmeta_oid();
745 struct stat st;
746 auto ch = store->open_collection(coll);
747 int r = store->stat(ch, oid, &st);
748 ceph_assert(r == 0);
749 ceph_assert(st.st_size == 0);
750
751 const size_t max_dup_entries = g_ceph_context->_conf->osd_pg_log_dups_tracked;
752 ceph_assert(max_dup_entries > 0);
753 const size_t max_chunk_size = g_ceph_context->_conf->osd_pg_log_trim_max;
754 ceph_assert(max_chunk_size > 0);
755
756 cout << "max_dup_entries=" << max_dup_entries
757 << " max_chunk_size=" << max_chunk_size << std::endl;
758 if (dry_run) {
759 cout << "Dry run enabled, so when many chunks are needed,"
760 << " the trimming will never stop!" << std::endl;
761 }
762
763 set<string> keys_to_keep;
764 size_t num_removed = 0;
765 do {
766 set<string> keys_to_trim;
767 {
768 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
769 if (!p)
770 break;
771 for (p->seek_to_first(); p->valid(); p->next()) {
772 if (p->key()[0] == '_')
773 continue;
774 if (p->key() == "can_rollback_to")
775 continue;
776 if (p->key() == "divergent_priors")
777 continue;
778 if (p->key() == "rollback_info_trimmed_to")
779 continue;
780 if (p->key() == "may_include_deletes_in_missing")
781 continue;
782 if (p->key().substr(0, 7) == string("missing"))
783 continue;
784 if (p->key().substr(0, 4) != string("dup_"))
785 continue;
786 keys_to_keep.insert(p->key());
787 if (keys_to_keep.size() > max_dup_entries) {
788 auto oldest_to_keep = keys_to_keep.begin();
789 keys_to_trim.emplace(*oldest_to_keep);
790 keys_to_keep.erase(oldest_to_keep);
791 }
792 if (keys_to_trim.size() >= max_chunk_size) {
793 break;
794 }
795 }
796 } // deconstruct ObjectMapIterator
797 // delete the keys
798 num_removed = keys_to_trim.size();
799 if (!dry_run && !keys_to_trim.empty()) {
800 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
801 ObjectStore::Transaction t;
802 t.omap_rmkeys(coll, oid, keys_to_trim);
803 store->queue_transaction(ch, std::move(t));
804 ch->flush();
805 }
806 } while (num_removed == max_chunk_size);
807
808 // compact the db since we just removed a bunch of data
809 cerr << "Finished trimming, now compacting..." << std::endl;
810 if (!dry_run)
811 store->compact();
812 return 0;
813}
814
7c673cae
FG
815const int OMAP_BATCH_SIZE = 25;
816void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
817{
818 oset.clear();
819 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
820 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
821 }
822}
823
824int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
825{
826 struct stat st;
827 mysize_t total;
828 footer ft;
829
11fdf7f2
TL
830 auto ch = store->open_collection(cid);
831 int ret = store->stat(ch, obj, &st);
7c673cae
FG
832 if (ret < 0)
833 return ret;
834
835 cerr << "Read " << obj << std::endl;
836
837 total = st.st_size;
838 if (debug)
839 cerr << "size=" << total << std::endl;
840
841 object_begin objb(obj);
842
843 {
844 bufferptr bp;
845 bufferlist bl;
11fdf7f2 846 ret = store->getattr(ch, obj, OI_ATTR, bp);
7c673cae
FG
847 if (ret < 0) {
848 cerr << "getattr failure object_info " << ret << std::endl;
849 return ret;
850 }
851 bl.push_back(bp);
852 decode(objb.oi, bl);
853 if (debug)
854 cerr << "object_info: " << objb.oi << std::endl;
855 }
856
857 // NOTE: we include whiteouts, lost, etc.
858
859 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
860 if (ret < 0)
861 return ret;
862
863 uint64_t offset = 0;
864 bufferlist rawdatabl;
865 while(total > 0) {
866 rawdatabl.clear();
867 mysize_t len = max_read;
868 if (len > total)
869 len = total;
870
11fdf7f2 871 ret = store->read(ch, obj, offset, len, rawdatabl);
7c673cae
FG
872 if (ret < 0)
873 return ret;
874 if (ret == 0)
875 return -EINVAL;
876
877 data_section dblock(offset, len, rawdatabl);
878 if (debug)
879 cerr << "data section offset=" << offset << " len=" << len << std::endl;
880
881 total -= ret;
882 offset += ret;
883
884 ret = write_section(TYPE_DATA, dblock, file_fd);
885 if (ret) return ret;
886 }
887
888 //Handle attrs for this object
20effc67 889 map<string,bufferptr,less<>> aset;
11fdf7f2 890 ret = store->getattrs(ch, obj, aset);
7c673cae
FG
891 if (ret) return ret;
892 attr_section as(aset);
893 ret = write_section(TYPE_ATTRS, as, file_fd);
894 if (ret)
895 return ret;
896
897 if (debug) {
898 cerr << "attrs size " << aset.size() << std::endl;
899 }
900
901 //Handle omap information
902 bufferlist hdrbuf;
11fdf7f2 903 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
7c673cae
FG
904 if (ret < 0) {
905 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
906 return ret;
907 }
908
909 omap_hdr_section ohs(hdrbuf);
910 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
911 if (ret)
912 return ret;
913
11fdf7f2 914 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
7c673cae
FG
915 if (!iter) {
916 ret = -ENOENT;
917 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
918 return ret;
919 }
920 iter->seek_to_first();
921 int mapcount = 0;
922 map<string, bufferlist> out;
923 while(iter->valid()) {
924 get_omap_batch(iter, out);
925
926 if (out.empty()) break;
927
928 mapcount += out.size();
929 omap_section oms(out);
930 ret = write_section(TYPE_OMAP, oms, file_fd);
931 if (ret)
932 return ret;
933 }
934 if (debug)
935 cerr << "omap map size " << mapcount << std::endl;
936
937 ret = write_simple(TYPE_OBJECT_END, file_fd);
938 if (ret)
939 return ret;
940
941 return 0;
942}
943
944int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
945{
946 ghobject_t next;
11fdf7f2 947 auto ch = store->open_collection(coll);
7c673cae
FG
948 while (!next.is_max()) {
949 vector<ghobject_t> objects;
11fdf7f2 950 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
7c673cae
FG
951 &objects, &next);
952 if (r < 0)
953 return r;
954 for (vector<ghobject_t>::iterator i = objects.begin();
955 i != objects.end();
956 ++i) {
11fdf7f2
TL
957 ceph_assert(!i->hobj.is_meta());
958 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
7c673cae
FG
959 continue;
960 }
961 r = export_file(store, coll, *i);
962 if (r < 0)
963 return r;
964 }
965 }
966 return 0;
967}
968
11fdf7f2 969int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae 970 OSDMap::Incremental inc;
11fdf7f2 971 auto it = bl.cbegin();
7c673cae
FG
972 inc.decode(it);
973 if (e == 0) {
974 e = inc.epoch;
975 } else if (e != inc.epoch) {
976 cerr << "incremental.epoch mismatch: "
977 << inc.epoch << " != " << e << std::endl;
978 if (force) {
979 cerr << "But will continue anyway." << std::endl;
980 } else {
981 return -EINVAL;
982 }
983 }
11fdf7f2 984 auto ch = store->open_collection(coll_t::meta());
7c673cae 985 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
11fdf7f2 986 if (!store->exists(ch, inc_oid)) {
7c673cae
FG
987 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
988 if (!force) {
989 return -ENOENT;
990 }
991 cout << "Creating a new epoch." << std::endl;
992 }
993 if (dry_run)
994 return 0;
995 ObjectStore::Transaction t;
996 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
997 t.truncate(coll_t::meta(), inc_oid, bl.length());
11fdf7f2
TL
998 store->queue_transaction(ch, std::move(t));
999 return 0;
7c673cae
FG
1000}
1001
1002int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
1003{
11fdf7f2
TL
1004 auto ch = store->open_collection(coll_t::meta());
1005 if (store->read(ch,
7c673cae
FG
1006 OSD::get_inc_osdmap_pobject_name(e),
1007 0, 0, bl) < 0) {
1008 return -ENOENT;
1009 }
1010 return 0;
1011}
1012
11fdf7f2 1013int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae
FG
1014 OSDMap osdmap;
1015 osdmap.decode(bl);
1016 if (e == 0) {
1017 e = osdmap.get_epoch();
1018 } else if (e != osdmap.get_epoch()) {
1019 cerr << "osdmap.epoch mismatch: "
1020 << e << " != " << osdmap.get_epoch() << std::endl;
1021 if (force) {
1022 cerr << "But will continue anyway." << std::endl;
1023 } else {
1024 return -EINVAL;
1025 }
1026 }
11fdf7f2 1027 auto ch = store->open_collection(coll_t::meta());
7c673cae 1028 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
11fdf7f2 1029 if (!store->exists(ch, full_oid)) {
7c673cae
FG
1030 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
1031 if (!force) {
1032 return -ENOENT;
1033 }
1034 cout << "Creating a new epoch." << std::endl;
1035 }
1036 if (dry_run)
1037 return 0;
1038 ObjectStore::Transaction t;
1039 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
1040 t.truncate(coll_t::meta(), full_oid, bl.length());
11fdf7f2
TL
1041 store->queue_transaction(ch, std::move(t));
1042 return 0;
7c673cae
FG
1043}
1044
1045int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
1046{
11fdf7f2 1047 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
7c673cae 1048 bool found = store->read(
11fdf7f2 1049 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
7c673cae
FG
1050 if (!found) {
1051 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
1052 return -ENOENT;
1053 }
1054 osdmap.decode(bl);
1055 if (debug)
1056 cerr << osdmap << std::endl;
1057 return 0;
1058}
1059
11fdf7f2
TL
1060int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
1061{
1062 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
1063 bufferlist bl;
1064 auto pghist = OSD::make_pg_num_history_oid();
1065 int r = store->read(ch, pghist, 0, 0, bl, 0);
1066 if (r >= 0 && bl.length() > 0) {
1067 auto p = bl.cbegin();
1068 decode(*h, p);
1069 }
1070 cout << __func__ << " pg_num_history " << *h << std::endl;
1071 return 0;
1072}
1073
7c673cae
FG
1074int add_osdmap(ObjectStore *store, metadata_section &ms)
1075{
1076 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
1077}
1078
2a845540
TL
1079int ObjectStoreTool::do_export(
1080 CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid,
7c673cae
FG
1081 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
1082 const OSDSuperblock& superblock,
1083 PastIntervals &past_intervals)
1084{
1085 PGLog::IndexedLog log;
1086 pg_missing_t missing;
1087
11fdf7f2 1088 cerr << "Exporting " << pgid << " info " << info << std::endl;
7c673cae 1089
2a845540 1090 int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing);
7c673cae
FG
1091 if (ret > 0)
1092 return ret;
1093
1094 if (debug) {
1095 Formatter *formatter = Formatter::create("json-pretty");
11fdf7f2 1096 ceph_assert(formatter);
7c673cae
FG
1097 dump_log(formatter, cerr, log, missing);
1098 delete formatter;
1099 }
1100 write_super();
1101
1102 pg_begin pgb(pgid, superblock);
1103 // Special case: If replicated pg don't require the importing OSD to have shard feature
1104 if (pgid.is_no_shard()) {
1105 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1106 }
1107 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
1108 if (ret)
1109 return ret;
1110
1111 // The metadata_section is now before files, so import can detect
1112 // errors and abort without wasting time.
1113 metadata_section ms(
1114 struct_ver,
1115 map_epoch,
1116 info,
1117 log,
1118 past_intervals,
1119 missing);
1120 ret = add_osdmap(fs, ms);
1121 if (ret)
1122 return ret;
1123 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
1124 if (ret)
1125 return ret;
1126
1127 ret = export_files(fs, coll);
1128 if (ret) {
1129 cerr << "export_files error " << ret << std::endl;
1130 return ret;
1131 }
1132
1133 ret = write_simple(TYPE_PG_END, file_fd);
1134 if (ret)
1135 return ret;
1136
1137 return 0;
1138}
1139
b32b8144
FG
1140int dump_data(Formatter *formatter, bufferlist &bl)
1141{
11fdf7f2 1142 auto ebliter = bl.cbegin();
b32b8144
FG
1143 data_section ds;
1144 ds.decode(ebliter);
1145
1146 formatter->open_object_section("data_block");
1147 formatter->dump_unsigned("offset", ds.offset);
1148 formatter->dump_unsigned("len", ds.len);
1149 // XXX: Add option to dump data like od -cx ?
1150 formatter->close_section();
1151 formatter->flush(cout);
1152 return 0;
1153}
1154
7c673cae
FG
1155int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
1156 ObjectStore::Transaction *t, bufferlist &bl)
1157{
11fdf7f2 1158 auto ebliter = bl.cbegin();
7c673cae
FG
1159 data_section ds;
1160 ds.decode(ebliter);
1161
1162 if (debug)
1163 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
1164 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
1165 return 0;
1166}
1167
b32b8144
FG
1168int dump_attrs(
1169 Formatter *formatter, ghobject_t hoid,
1170 bufferlist &bl)
1171{
11fdf7f2 1172 auto ebliter = bl.cbegin();
b32b8144
FG
1173 attr_section as;
1174 as.decode(ebliter);
1175
1176 // This could have been handled in the caller if we didn't need to
1177 // support exports that didn't include object_info_t in object_begin.
1178 if (hoid.generation == ghobject_t::NO_GEN &&
1179 hoid.hobj.is_head()) {
1180 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1181 if (mi != as.data.end()) {
1182 SnapSet snapset;
11fdf7f2 1183 auto p = mi->second.cbegin();
b32b8144
FG
1184 snapset.decode(p);
1185 formatter->open_object_section("snapset");
1186 snapset.dump(formatter);
1187 formatter->close_section();
1188 } else {
1189 formatter->open_object_section("snapset");
1190 formatter->dump_string("error", "missing SS_ATTR");
1191 formatter->close_section();
1192 }
1193 }
1194
1195 formatter->open_object_section("attrs");
1196 formatter->open_array_section("user");
1197 for (auto kv : as.data) {
1198 // Skip system attributes
1199 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
1200 continue;
1201 formatter->open_object_section("user_attr");
1202 formatter->dump_string("name", kv.first.substr(1));
1203 bool b64;
1204 formatter->dump_string("value", cleanbin(kv.second, b64));
1205 formatter->dump_bool("Base64", b64);
1206 formatter->close_section();
1207 }
1208 formatter->close_section();
1209 formatter->open_array_section("system");
1210 for (auto kv : as.data) {
1211 // Skip user attributes
1212 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1213 continue;
1214 formatter->open_object_section("sys_attr");
1215 formatter->dump_string("name", kv.first);
1216 formatter->close_section();
1217 }
1218 formatter->close_section();
1219 formatter->close_section();
1220 formatter->flush(cout);
1221
1222 return 0;
1223}
1224
7c673cae
FG
1225int get_attrs(
1226 ObjectStore *store, coll_t coll, ghobject_t hoid,
1227 ObjectStore::Transaction *t, bufferlist &bl,
224ce89b 1228 OSDriver &driver, SnapMapper &snap_mapper)
7c673cae 1229{
11fdf7f2 1230 auto ebliter = bl.cbegin();
7c673cae
FG
1231 attr_section as;
1232 as.decode(ebliter);
1233
11fdf7f2 1234 auto ch = store->open_collection(coll);
7c673cae
FG
1235 if (debug)
1236 cerr << "\tattrs: len " << as.data.size() << std::endl;
1237 t->setattrs(coll, hoid, as.data);
1238
1239 // This could have been handled in the caller if we didn't need to
1240 // support exports that didn't include object_info_t in object_begin.
11fdf7f2
TL
1241 if (hoid.generation == ghobject_t::NO_GEN &&
1242 hoid.hobj.is_head()) {
1243 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1244 if (mi != as.data.end()) {
1245 SnapSet snapset;
1246 auto p = mi->second.cbegin();
1247 snapset.decode(p);
1248 cout << "snapset " << snapset << std::endl;
1249 for (auto& p : snapset.clone_snaps) {
1250 ghobject_t clone = hoid;
1251 clone.hobj.snap = p.first;
1252 set<snapid_t> snaps(p.second.begin(), p.second.end());
1253 if (!store->exists(ch, clone)) {
1254 // no clone, skip. this is probably a cache pool. this works
1255 // because we use a separate transaction per object and clones
1256 // come before head in the archive.
7c673cae 1257 if (debug)
11fdf7f2
TL
1258 cerr << "\tskipping missing " << clone << " (snaps "
1259 << snaps << ")" << std::endl;
1260 continue;
7c673cae 1261 }
11fdf7f2
TL
1262 if (debug)
1263 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1264 << std::endl;
1265 OSDriver::OSTransaction _t(driver.get_transaction(t));
1266 ceph_assert(!snaps.empty());
1267 snap_mapper.add_oid(clone.hobj, snaps, &_t);
7c673cae
FG
1268 }
1269 } else {
11fdf7f2 1270 cerr << "missing SS_ATTR on " << hoid << std::endl;
7c673cae
FG
1271 }
1272 }
7c673cae
FG
1273 return 0;
1274}
1275
b32b8144
FG
1276int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1277{
11fdf7f2 1278 auto ebliter = bl.cbegin();
b32b8144
FG
1279 omap_hdr_section oh;
1280 oh.decode(ebliter);
1281
1282 formatter->open_object_section("omap_header");
1283 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1284 formatter->close_section();
1285 formatter->flush(cout);
1286 return 0;
1287}
1288
7c673cae
FG
1289int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1290 ObjectStore::Transaction *t, bufferlist &bl)
1291{
11fdf7f2 1292 auto ebliter = bl.cbegin();
7c673cae
FG
1293 omap_hdr_section oh;
1294 oh.decode(ebliter);
1295
1296 if (debug)
1297 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1298 << std::endl;
1299 t->omap_setheader(coll, hoid, oh.hdr);
1300 return 0;
1301}
1302
b32b8144
FG
1303int dump_omap(Formatter *formatter, bufferlist &bl)
1304{
11fdf7f2 1305 auto ebliter = bl.cbegin();
b32b8144
FG
1306 omap_section os;
1307 os.decode(ebliter);
1308
1309 formatter->open_object_section("omaps");
1310 formatter->dump_unsigned("count", os.omap.size());
1311 formatter->open_array_section("data");
1312 for (auto o : os.omap) {
1313 formatter->open_object_section("omap");
1314 formatter->dump_string("name", o.first);
1315 bool b64;
1316 formatter->dump_string("value", cleanbin(o.second, b64));
1317 formatter->dump_bool("Base64", b64);
1318 formatter->close_section();
1319 }
1320 formatter->close_section();
1321 formatter->close_section();
1322 formatter->flush(cout);
1323 return 0;
1324}
1325
7c673cae
FG
1326int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1327 ObjectStore::Transaction *t, bufferlist &bl)
1328{
11fdf7f2 1329 auto ebliter = bl.cbegin();
7c673cae
FG
1330 omap_section os;
1331 os.decode(ebliter);
1332
1333 if (debug)
1334 cerr << "\tomap: size " << os.omap.size() << std::endl;
1335 t->omap_setkeys(coll, hoid, os.omap);
1336 return 0;
1337}
1338
b32b8144
FG
1339int ObjectStoreTool::dump_object(Formatter *formatter,
1340 bufferlist &bl)
1341{
11fdf7f2 1342 auto ebliter = bl.cbegin();
b32b8144
FG
1343 object_begin ob;
1344 ob.decode(ebliter);
1345
1346 if (ob.hoid.hobj.is_temp()) {
1347 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1348 return -EFAULT;
1349 }
1350
1351 formatter->open_object_section("object");
1352 formatter->open_object_section("oid");
1353 ob.hoid.dump(formatter);
1354 formatter->close_section();
1355 formatter->open_object_section("object_info");
1356 ob.oi.dump(formatter);
1357 formatter->close_section();
1358
1359 bufferlist ebl;
1360 bool done = false;
1361 while(!done) {
1362 sectiontype_t type;
1363 int ret = read_section(&type, &ebl);
1364 if (ret)
1365 return ret;
1366
1367 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1368 //cout << "\t\tsection size " << ebl.length() << std::endl;
1369 if (type >= END_OF_TYPES) {
1370 cout << "Skipping unknown object section type" << std::endl;
1371 continue;
1372 }
1373 switch(type) {
1374 case TYPE_DATA:
1375 if (dry_run) break;
1376 ret = dump_data(formatter, ebl);
1377 if (ret) return ret;
1378 break;
1379 case TYPE_ATTRS:
1380 if (dry_run) break;
1381 ret = dump_attrs(formatter, ob.hoid, ebl);
1382 if (ret) return ret;
1383 break;
1384 case TYPE_OMAP_HDR:
1385 if (dry_run) break;
1386 ret = dump_omap_hdr(formatter, ebl);
1387 if (ret) return ret;
1388 break;
1389 case TYPE_OMAP:
1390 if (dry_run) break;
1391 ret = dump_omap(formatter, ebl);
1392 if (ret) return ret;
1393 break;
1394 case TYPE_OBJECT_END:
1395 done = true;
1396 break;
1397 default:
1398 cerr << "Unknown section type " << type << std::endl;
1399 return -EFAULT;
1400 }
1401 }
1402 formatter->close_section();
1403 return 0;
1404}
1405
11fdf7f2
TL
1406int ObjectStoreTool::get_object(ObjectStore *store,
1407 OSDriver& driver,
1408 SnapMapper& mapper,
1409 coll_t coll,
1410 bufferlist &bl, OSDMap &origmap,
1411 bool *skipped_objects)
7c673cae
FG
1412{
1413 ObjectStore::Transaction tran;
1414 ObjectStore::Transaction *t = &tran;
11fdf7f2 1415 auto ebliter = bl.cbegin();
7c673cae
FG
1416 object_begin ob;
1417 ob.decode(ebliter);
7c673cae
FG
1418
1419 if (ob.hoid.hobj.is_temp()) {
1420 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1421 return -EFAULT;
1422 }
11fdf7f2
TL
1423 ceph_assert(g_ceph_context);
1424
1425 auto ch = store->open_collection(coll);
7c673cae
FG
1426 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1427 object_t oid = ob.hoid.hobj.oid;
1428 object_locator_t loc(ob.hoid.hobj);
11fdf7f2
TL
1429 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1430 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
1431
1432 spg_t coll_pgid;
1433 if (coll.is_pg(&coll_pgid) == false) {
1434 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1435 return -EFAULT;
1436 }
1437 if (coll_pgid.shard != ob.hoid.shard_id) {
1438 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1439 << " but object shard is " << ob.hoid.shard_id << std::endl;
1440 return -EFAULT;
1441 }
1442
1443 if (coll_pgid.pgid != pgid) {
1444 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1445 *skipped_objects = true;
1446 skip_object(bl);
1447 return 0;
1448 }
1449 }
1450
1451 if (!dry_run)
1452 t->touch(coll, ob.hoid);
1453
1454 cout << "Write " << ob.hoid << std::endl;
1455
7c673cae
FG
1456 bufferlist ebl;
1457 bool done = false;
1458 while(!done) {
1459 sectiontype_t type;
1460 int ret = read_section(&type, &ebl);
1461 if (ret)
1462 return ret;
1463
1464 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1465 //cout << "\t\tsection size " << ebl.length() << std::endl;
1466 if (type >= END_OF_TYPES) {
1467 cout << "Skipping unknown object section type" << std::endl;
1468 continue;
1469 }
1470 switch(type) {
1471 case TYPE_DATA:
1472 if (dry_run) break;
1473 ret = get_data(store, coll, ob.hoid, t, ebl);
1474 if (ret) return ret;
1475 break;
1476 case TYPE_ATTRS:
1477 if (dry_run) break;
224ce89b 1478 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
7c673cae
FG
1479 if (ret) return ret;
1480 break;
1481 case TYPE_OMAP_HDR:
1482 if (dry_run) break;
1483 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1484 if (ret) return ret;
1485 break;
1486 case TYPE_OMAP:
1487 if (dry_run) break;
1488 ret = get_omap(store, coll, ob.hoid, t, ebl);
1489 if (ret) return ret;
1490 break;
1491 case TYPE_OBJECT_END:
1492 done = true;
1493 break;
1494 default:
1495 cerr << "Unknown section type " << type << std::endl;
1496 return -EFAULT;
1497 }
1498 }
11fdf7f2
TL
1499 if (!dry_run) {
1500 wait_until_done(t, [&] {
1501 store->queue_transaction(ch, std::move(*t));
1502 ch->flush();
1503 });
1504 }
7c673cae
FG
1505 return 0;
1506}
1507
b32b8144
FG
1508int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1509{
11fdf7f2 1510 auto ebliter = bl.cbegin();
b32b8144
FG
1511 ms.decode(ebliter);
1512
1513 formatter->open_object_section("metadata_section");
1514
1515 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1516 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1517
1518 formatter->open_object_section("OSDMap");
1519 ms.osdmap.dump(formatter);
1520 formatter->close_section();
1521 formatter->flush(cout);
1522 cout << std::endl;
1523
1524 formatter->open_object_section("info");
1525 ms.info.dump(formatter);
1526 formatter->close_section();
1527 formatter->flush(cout);
1528
1529 formatter->open_object_section("log");
1530 ms.log.dump(formatter);
1531 formatter->close_section();
1532 formatter->flush(cout);
1533
1534 formatter->open_object_section("pg_missing_t");
1535 ms.missing.dump(formatter);
1536 formatter->close_section();
1537
1538 // XXX: ms.past_intervals?
1539
1540 formatter->close_section();
1541 formatter->flush(cout);
1542
1543 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1544 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1545 return -EFAULT;
1546 }
1547
1548 return 0;
1549}
1550
7c673cae 1551int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
11fdf7f2 1552 const OSDSuperblock& sb, spg_t pgid)
7c673cae 1553{
11fdf7f2 1554 auto ebliter = bl.cbegin();
7c673cae
FG
1555 ms.decode(ebliter);
1556 spg_t old_pgid = ms.info.pgid;
1557 ms.info.pgid = pgid;
1558
11fdf7f2
TL
1559 if (debug) {
1560 cout << "export pgid " << old_pgid << std::endl;
1561 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1562 cout << "map epoch " << ms.map_epoch << std::endl;
7c673cae 1563
11fdf7f2
TL
1564#ifdef DIAGNOSTIC
1565 Formatter *formatter = new JSONFormatter(true);
1566 formatter->open_object_section("stuff");
7c673cae 1567
11fdf7f2
TL
1568 formatter->open_object_section("importing OSDMap");
1569 ms.osdmap.dump(formatter);
1570 formatter->close_section();
1571 formatter->flush(cout);
1572 cout << std::endl;
7c673cae 1573
11fdf7f2 1574 cout << "osd current epoch " << sb.current_epoch << std::endl;
7c673cae 1575
11fdf7f2
TL
1576 formatter->open_object_section("info");
1577 ms.info.dump(formatter);
1578 formatter->close_section();
1579 formatter->flush(cout);
1580 cout << std::endl;
7c673cae 1581
11fdf7f2
TL
1582 formatter->open_object_section("log");
1583 ms.log.dump(formatter);
1584 formatter->close_section();
1585 formatter->flush(cout);
1586 cout << std::endl;
1587
1588 formatter->close_section();
1589 formatter->flush(cout);
1590 cout << std::endl;
7c673cae 1591#endif
11fdf7f2 1592 }
7c673cae
FG
1593
1594 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1595 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1596 return -EFAULT;
1597 }
1598
1599 if (ms.map_epoch > sb.current_epoch) {
1600 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1601 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1602 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1603 return -EINVAL;
1604 }
1605
11fdf7f2 1606 // Old exports didn't include OSDMap
7c673cae 1607 if (ms.osdmap.get_epoch() == 0) {
11fdf7f2
TL
1608 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1609 " Not supported." << std::endl;
1610 return -EINVAL;
7c673cae
FG
1611 }
1612
11fdf7f2
TL
1613 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1614 cerr << "PG export's map " << ms.osdmap.get_epoch()
1615 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1616 if (!force) {
1617 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1618 << std::endl;
1619 return -EINVAL;
7c673cae
FG
1620 }
1621 }
7c673cae
FG
1622 if (debug) {
1623 cerr << "Import pgid " << ms.info.pgid << std::endl;
b32b8144 1624 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
11fdf7f2
TL
1625 cerr << "history.same_interval_since "
1626 << ms.info.history.same_interval_since << std::endl;
7c673cae
FG
1627 }
1628
7c673cae
FG
1629 return 0;
1630}
1631
1632// out: pg_log_t that only has entries that apply to import_pgid using curmap
1633// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1634void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1635 const string &hit_set_namespace, const divergent_priors_t &in,
1636 divergent_priors_t &out, divergent_priors_t &reject)
1637{
1638 out.clear();
1639 reject.clear();
1640
1641 for (divergent_priors_t::const_iterator i = in.begin();
1642 i != in.end(); ++i) {
1643
1644 // Reject divergent priors for temporary objects
1645 if (i->second.is_temp()) {
1646 reject.insert(*i);
1647 continue;
1648 }
1649
1650 if (i->second.nspace != hit_set_namespace) {
1651 object_t oid = i->second.oid;
1652 object_locator_t loc(i->second);
1653 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1654 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1655
1656 if (import_pgid.pgid == pgid) {
1657 out.insert(*i);
1658 } else {
1659 reject.insert(*i);
1660 }
1661 } else {
1662 out.insert(*i);
1663 }
1664 }
1665}
1666
11fdf7f2 1667int ObjectStoreTool::dump_export(Formatter *formatter)
b32b8144
FG
1668{
1669 bufferlist ebl;
1670 pg_info_t info;
1671 PGLog::IndexedLog log;
1672 //bool skipped_objects = false;
1673
1674 int ret = read_super();
1675 if (ret)
1676 return ret;
1677
1678 if (sh.magic != super_header::super_magic) {
1679 cerr << "Invalid magic number" << std::endl;
1680 return -EFAULT;
1681 }
1682
1683 if (sh.version > super_header::super_ver) {
1684 cerr << "Can't handle export format version=" << sh.version << std::endl;
1685 return -EINVAL;
1686 }
1687
1688 formatter->open_object_section("Export");
1689
1690 //First section must be TYPE_PG_BEGIN
1691 sectiontype_t type;
1692 ret = read_section(&type, &ebl);
1693 if (ret)
1694 return ret;
1695 if (type == TYPE_POOL_BEGIN) {
1696 cerr << "Dump of pool exports not supported" << std::endl;
1697 return -EINVAL;
1698 } else if (type != TYPE_PG_BEGIN) {
1699 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1700 return -EFAULT;
1701 }
1702
11fdf7f2 1703 auto ebliter = ebl.cbegin();
b32b8144
FG
1704 pg_begin pgb;
1705 pgb.decode(ebliter);
1706 spg_t pgid = pgb.pgid;
1707
1708 formatter->dump_string("pgid", stringify(pgid));
1709 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1710 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1711
1712 bool done = false;
1713 bool found_metadata = false;
1714 metadata_section ms;
1715 bool objects_started = false;
1716 while(!done) {
1717 ret = read_section(&type, &ebl);
1718 if (ret)
1719 return ret;
1720
1721 if (debug) {
11fdf7f2 1722 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
b32b8144
FG
1723 }
1724 if (type >= END_OF_TYPES) {
1725 cerr << "Skipping unknown section type" << std::endl;
1726 continue;
1727 }
1728 switch(type) {
1729 case TYPE_OBJECT_BEGIN:
1730 if (!objects_started) {
1731 formatter->open_array_section("objects");
1732 objects_started = true;
1733 }
1734 ret = dump_object(formatter, ebl);
1735 if (ret) return ret;
1736 break;
1737 case TYPE_PG_METADATA:
1738 if (objects_started)
1739 cerr << "WARNING: metadata_section out of order" << std::endl;
1740 ret = dump_pg_metadata(formatter, ebl, ms);
1741 if (ret) return ret;
1742 found_metadata = true;
1743 break;
1744 case TYPE_PG_END:
1745 if (objects_started) {
1746 formatter->close_section();
1747 }
1748 done = true;
1749 break;
1750 default:
1751 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1752 return -EFAULT;
1753 }
1754 }
1755
1756 if (!found_metadata) {
1757 cerr << "Missing metadata section" << std::endl;
1758 return -EFAULT;
1759 }
1760
1761 formatter->close_section();
1762 formatter->flush(cout);
1763
1764 return 0;
1765}
1766
7c673cae 1767int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
11fdf7f2 1768 bool force, std::string pgidstr)
7c673cae
FG
1769{
1770 bufferlist ebl;
1771 pg_info_t info;
1772 PGLog::IndexedLog log;
1773 bool skipped_objects = false;
1774
1775 if (!dry_run)
1776 finish_remove_pgs(store);
1777
1778 int ret = read_super();
1779 if (ret)
1780 return ret;
1781
1782 if (sh.magic != super_header::super_magic) {
1783 cerr << "Invalid magic number" << std::endl;
1784 return -EFAULT;
1785 }
1786
1787 if (sh.version > super_header::super_ver) {
1788 cerr << "Can't handle export format version=" << sh.version << std::endl;
1789 return -EINVAL;
1790 }
1791
1792 //First section must be TYPE_PG_BEGIN
1793 sectiontype_t type;
1794 ret = read_section(&type, &ebl);
1795 if (ret)
1796 return ret;
1797 if (type == TYPE_POOL_BEGIN) {
1798 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1799 return -EINVAL;
1800 } else if (type != TYPE_PG_BEGIN) {
b32b8144 1801 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
7c673cae
FG
1802 return -EFAULT;
1803 }
1804
11fdf7f2 1805 auto ebliter = ebl.cbegin();
7c673cae
FG
1806 pg_begin pgb;
1807 pgb.decode(ebliter);
1808 spg_t pgid = pgb.pgid;
7c673cae
FG
1809
1810 if (pgidstr.length()) {
1811 spg_t user_pgid;
1812
1813 bool ok = user_pgid.parse(pgidstr.c_str());
1814 // This succeeded in main() already
11fdf7f2 1815 ceph_assert(ok);
7c673cae 1816 if (pgid != user_pgid) {
11fdf7f2
TL
1817 cerr << "specified pgid " << user_pgid
1818 << " does not match actual pgid " << pgid << std::endl;
1819 return -EINVAL;
7c673cae
FG
1820 }
1821 }
1822
1823 if (!pgb.superblock.cluster_fsid.is_zero()
1824 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1825 cerr << "Export came from different cluster with fsid "
1826 << pgb.superblock.cluster_fsid << std::endl;
1e59de90
TL
1827 if (force) {
1828 cerr << "Ignoring this problem due to --force" << std::endl;
1829 } else {
1830 return -EINVAL;
1831 }
7c673cae
FG
1832 }
1833
1834 if (debug) {
1835 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1836 }
1837
11fdf7f2 1838 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
7c673cae
FG
1839 if (pgid.is_no_shard())
1840 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1841
1842 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1843 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1844
1845 cerr << "Export has incompatible features set " << unsupported << std::endl;
1846
1847 // Let them import if they specify the --force option
1848 if (!force)
1849 return 11; // Positive return means exit status
1850 }
1851
11fdf7f2 1852 // we need the latest OSDMap to check for collisions
7c673cae
FG
1853 OSDMap curmap;
1854 bufferlist bl;
1855 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1856 if (ret) {
11fdf7f2 1857 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
7c673cae
FG
1858 return ret;
1859 }
1860 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1861 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1862 // Special exit code for this error, used by test code
1863 return 10; // Positive return means exit status
1864 }
1865
11fdf7f2
TL
1866 pool_pg_num_history_t pg_num_history;
1867 get_pg_num_history(store, &pg_num_history);
1868
7c673cae 1869 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
7c673cae 1870
11fdf7f2 1871 // Check for PG already present.
7c673cae
FG
1872 coll_t coll(pgid);
1873 if (store->collection_exists(coll)) {
1874 cerr << "pgid " << pgid << " already exists" << std::endl;
1875 return -EEXIST;
1876 }
1877
11fdf7f2 1878 ObjectStore::CollectionHandle ch;
7c673cae 1879
11fdf7f2
TL
1880 OSDriver driver(
1881 store,
1882 coll_t(),
1883 OSD::make_snapmapper_oid());
1884 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
7c673cae
FG
1885
1886 cout << "Importing pgid " << pgid;
7c673cae
FG
1887 cout << std::endl;
1888
1889 bool done = false;
1890 bool found_metadata = false;
1891 metadata_section ms;
7c673cae
FG
1892 while(!done) {
1893 ret = read_section(&type, &ebl);
1894 if (ret)
1895 return ret;
1896
b32b8144
FG
1897 if (debug) {
1898 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1899 }
7c673cae
FG
1900 if (type >= END_OF_TYPES) {
1901 cout << "Skipping unknown section type" << std::endl;
1902 continue;
1903 }
1904 switch(type) {
1905 case TYPE_OBJECT_BEGIN:
11fdf7f2
TL
1906 ceph_assert(found_metadata);
1907 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1908 &skipped_objects);
7c673cae
FG
1909 if (ret) return ret;
1910 break;
1911 case TYPE_PG_METADATA:
11fdf7f2 1912 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
7c673cae
FG
1913 if (ret) return ret;
1914 found_metadata = true;
11fdf7f2
TL
1915
1916 if (pgid != ms.info.pgid) {
1917 cerr << "specified pgid " << pgid << " does not match import file pgid "
1918 << ms.info.pgid << std::endl;
1919 return -EINVAL;
1920 }
1921
1922 // make sure there are no conflicting splits or merges
1923 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1924 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1925 if (p != pg_num_history.pg_nums.end() &&
1926 !p->second.empty()) {
1927 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1928 unsigned pg_num = start_pg_num;
1929 for (auto q = p->second.lower_bound(ms.map_epoch);
1930 q != p->second.end();
1931 ++q) {
1932 unsigned new_pg_num = q->second;
1933 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1934 << " -> " << new_pg_num << std::endl;
1935
1936 // check for merge target
1937 spg_t target;
1938 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1939 // FIXME: this checks assumes the OSD's PG is at the OSD's
1940 // map epoch; it could be, say, at *our* epoch, pre-merge.
1941 coll_t coll(target);
1942 if (store->collection_exists(coll)) {
1943 cerr << "pgid " << pgid << " merges to target " << target
1944 << " which already exists" << std::endl;
1945 return 12;
1946 }
1947 }
1948
1949 // check for split children
1950 set<spg_t> children;
1951 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1952 cerr << " children are " << children << std::endl;
1953 for (auto child : children) {
1954 coll_t coll(child);
1955 if (store->collection_exists(coll)) {
1956 cerr << "pgid " << pgid << " splits to " << children
1957 << " and " << child << " exists" << std::endl;
1958 return 12;
1959 }
1960 }
1961 }
1962 pg_num = new_pg_num;
1963 }
1964 }
1965 } else {
1966 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1967 << " for splits or mergers" << std::endl;
1968 }
1969
1970 if (!dry_run) {
1971 ObjectStore::Transaction t;
1972 ch = store->create_new_collection(coll);
9f95a23c 1973 create_pg_collection(
11fdf7f2
TL
1974 t, pgid,
1975 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
9f95a23c 1976 init_pg_ondisk(t, pgid, NULL);
11fdf7f2
TL
1977
1978 // mark this coll for removal until we're done
1979 map<string,bufferlist> values;
1980 encode((char)1, values["_remove"]);
1981 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1982
1983 store->queue_transaction(ch, std::move(t));
1984 }
1985
7c673cae
FG
1986 break;
1987 case TYPE_PG_END:
11fdf7f2 1988 ceph_assert(found_metadata);
7c673cae
FG
1989 done = true;
1990 break;
1991 default:
b32b8144 1992 cerr << "Unknown section type " << std::to_string(type) << std::endl;
7c673cae
FG
1993 return -EFAULT;
1994 }
1995 }
1996
1997 if (!found_metadata) {
1998 cerr << "Missing metadata section" << std::endl;
1999 return -EFAULT;
2000 }
2001
2002 ObjectStore::Transaction t;
2003 if (!dry_run) {
2004 pg_log_t newlog, reject;
11fdf7f2 2005 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
2006 ms.log, newlog, reject);
2007 if (debug) {
2008 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
2009 i != newlog.log.end(); ++i)
2010 cerr << "Keeping log entry " << *i << std::endl;
2011 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
2012 i != reject.log.end(); ++i)
2013 cerr << "Skipping log entry " << *i << std::endl;
2014 }
2015
2016 divergent_priors_t newdp, rejectdp;
11fdf7f2 2017 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
2018 ms.divergent_priors, newdp, rejectdp);
2019 ms.divergent_priors = newdp;
2020 if (debug) {
2021 for (divergent_priors_t::iterator i = newdp.begin();
2022 i != newdp.end(); ++i)
2023 cerr << "Keeping divergent_prior " << *i << std::endl;
2024 for (divergent_priors_t::iterator i = rejectdp.begin();
2025 i != rejectdp.end(); ++i)
2026 cerr << "Skipping divergent_prior " << *i << std::endl;
2027 }
2028
2029 ms.missing.filter_objects([&](const hobject_t &obj) {
2030 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
2031 return false;
11fdf7f2 2032 ceph_assert(!obj.is_temp());
7c673cae
FG
2033 object_t oid = obj.oid;
2034 object_locator_t loc(obj);
11fdf7f2
TL
2035 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
2036 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
2037
2038 return pgid.pgid != _pgid;
2039 });
2040
2041
2042 if (debug) {
2043 pg_missing_t missing;
2044 Formatter *formatter = Formatter::create("json-pretty");
2045 dump_log(formatter, cerr, newlog, ms.missing);
2046 delete formatter;
2047 }
2048
2049 // Just like a split invalidate stats since the object count is changed
2050 if (skipped_objects)
2051 ms.info.stats.stats_invalid = true;
2052
2053 ret = write_pg(
2054 t,
2055 ms.map_epoch,
2056 ms.info,
2057 newlog,
2058 ms.past_intervals,
2059 ms.divergent_priors,
2060 ms.missing);
2061 if (ret) return ret;
2062 }
2063
2064 // done, clear removal flag
2065 if (debug)
2066 cerr << "done, clearing removal flag" << std::endl;
2067
2068 if (!dry_run) {
9f95a23c 2069 t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
11fdf7f2
TL
2070 wait_until_done(&t, [&] {
2071 store->queue_transaction(ch, std::move(t));
2072 // make sure we flush onreadable items before mapper/driver are destroyed.
2073 ch->flush();
2074 });
7c673cae 2075 }
7c673cae
FG
2076 return 0;
2077}
2078
2079int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
2080 Formatter *formatter, bool debug, bool human_readable, bool head)
2081{
2082 int r;
2083 lookup_ghobject lookup(object, nspace, head);
2084 if (pgidstr.length() > 0) {
2085 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2086 } else {
2087 r = action_on_all_objects(store, lookup, debug);
2088 }
2089 if (r)
2090 return r;
2091 lookup.dump(formatter, human_readable);
2092 formatter->flush(cout);
2093 return 0;
2094}
2095
9f95a23c
TL
2096int do_list_slow(ObjectStore *store, string pgidstr, string object,
2097 double threshold, Formatter *formatter, bool debug, bool human_readable)
2098{
2099 int r;
2100 lookup_slow_ghobject lookup(object, threshold);
2101 if (pgidstr.length() > 0) {
2102 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2103 } else {
2104 r = action_on_all_objects(store, lookup, debug);
2105 }
2106 if (r)
2107 return r;
2108 lookup.dump(formatter, human_readable);
2109 formatter->flush(cout);
2110 return 0;
2111}
2112
7c673cae
FG
2113int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
2114{
2115 int r;
2116 boost::optional<std::string> nspace; // Not specified
2117 lookup_ghobject lookup(object, nspace);
2118 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
2119 if (r)
2120 return r;
2121 lookup.dump(formatter, human_readable);
2122 formatter->flush(cout);
2123 return 0;
2124}
2125
11fdf7f2
TL
2126enum rmtype {
2127 BOTH,
2128 SNAPMAP,
2129 NOSNAPMAP
2130};
2131
7c673cae
FG
2132int remove_object(coll_t coll, ghobject_t &ghobj,
2133 SnapMapper &mapper,
2134 MapCacher::Transaction<std::string, bufferlist> *_t,
11fdf7f2
TL
2135 ObjectStore::Transaction *t,
2136 enum rmtype type)
7c673cae 2137{
11fdf7f2
TL
2138 if (type == BOTH || type == SNAPMAP) {
2139 int r = mapper.remove_oid(ghobj.hobj, _t);
2140 if (r < 0 && r != -ENOENT) {
2141 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
2142 return r;
2143 }
7c673cae
FG
2144 }
2145
11fdf7f2
TL
2146 if (type == BOTH || type == NOSNAPMAP) {
2147 t->remove(coll, ghobj);
2148 }
7c673cae
FG
2149 return 0;
2150}
2151
2152int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
2153
2154int do_remove_object(ObjectStore *store, coll_t coll,
11fdf7f2 2155 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
7c673cae 2156{
11fdf7f2 2157 auto ch = store->open_collection(coll);
7c673cae
FG
2158 spg_t pg;
2159 coll.is_pg_prefix(&pg);
2160 OSDriver driver(
2161 store,
2162 coll_t(),
2163 OSD::make_snapmapper_oid());
2164 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
2165 struct stat st;
2166
11fdf7f2 2167 int r = store->stat(ch, ghobj, &st);
7c673cae
FG
2168 if (r < 0) {
2169 cerr << "remove: " << cpp_strerror(r) << std::endl;
2170 return r;
2171 }
2172
2173 SnapSet ss;
2174 if (ghobj.hobj.has_snapset()) {
2175 r = get_snapset(store, coll, ghobj, ss, false);
2176 if (r < 0) {
2177 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
eafe8130
TL
2178 // If --force and bad snapset let them remove the head
2179 if (!(force && !all))
2180 return r;
7c673cae 2181 }
9f95a23c
TL
2182// cout << "snapset " << ss << std::endl;
2183 if (!ss.clone_snaps.empty() && !all) {
7c673cae
FG
2184 if (force) {
2185 cout << "WARNING: only removing "
2186 << (ghobj.hobj.is_head() ? "head" : "snapdir")
9f95a23c
TL
2187 << " with clones present" << std::endl;
2188 ss.clone_snaps.clear();
7c673cae 2189 } else {
9f95a23c
TL
2190 cerr << "Clones are present, use removeall to delete everything"
2191 << std::endl;
7c673cae
FG
2192 return -EINVAL;
2193 }
2194 }
2195 }
2196
2197 ObjectStore::Transaction t;
2198 OSDriver::OSTransaction _t(driver.get_transaction(&t));
2199
7c673cae 2200 ghobject_t snapobj = ghobj;
9f95a23c
TL
2201 for (auto& p : ss.clone_snaps) {
2202 snapobj.hobj.snap = p.first;
2203 cout << "remove clone " << snapobj << std::endl;
7c673cae 2204 if (!dry_run) {
11fdf7f2 2205 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
7c673cae
FG
2206 if (r < 0)
2207 return r;
2208 }
2209 }
2210
11fdf7f2
TL
2211 cout << "remove " << ghobj << std::endl;
2212
2213 if (!dry_run) {
2214 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
2215 if (r < 0)
2216 return r;
2217 }
7c673cae 2218
11fdf7f2
TL
2219 if (!dry_run) {
2220 wait_until_done(&t, [&] {
2221 store->queue_transaction(ch, std::move(t));
2222 ch->flush();
2223 });
2224 }
7c673cae
FG
2225 return 0;
2226}
2227
2228int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2229{
11fdf7f2 2230 auto ch = store->open_collection(coll);
20effc67 2231 map<string,bufferptr,less<>> aset;
11fdf7f2 2232 int r = store->getattrs(ch, ghobj, aset);
7c673cae
FG
2233 if (r < 0) {
2234 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2235 return r;
2236 }
2237
2238 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2239 string key(i->first);
2240 if (outistty)
2241 key = cleanbin(key);
2242 cout << key << std::endl;
2243 }
2244 return 0;
2245}
2246
2247int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2248{
11fdf7f2
TL
2249 auto ch = store->open_collection(coll);
2250 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
7c673cae
FG
2251 if (!iter) {
2252 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2253 return -ENOENT;
2254 }
2255 iter->seek_to_first();
2256 map<string, bufferlist> oset;
2257 while(iter->valid()) {
2258 get_omap_batch(iter, oset);
2259
2260 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2261 string key(i->first);
2262 if (outistty)
2263 key = cleanbin(key);
2264 cout << key << std::endl;
2265 }
2266 }
2267 return 0;
2268}
2269
2270int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2271{
11fdf7f2 2272 auto ch = store->open_collection(coll);
7c673cae
FG
2273 struct stat st;
2274 mysize_t total;
2275
11fdf7f2 2276 int ret = store->stat(ch, ghobj, &st);
7c673cae
FG
2277 if (ret < 0) {
2278 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2279 return ret;
2280 }
2281
2282 total = st.st_size;
2283 if (debug)
2284 cerr << "size=" << total << std::endl;
2285
2286 uint64_t offset = 0;
2287 bufferlist rawdatabl;
2288 while(total > 0) {
2289 rawdatabl.clear();
2290 mysize_t len = max_read;
2291 if (len > total)
2292 len = total;
2293
11fdf7f2 2294 ret = store->read(ch, ghobj, offset, len, rawdatabl);
7c673cae
FG
2295 if (ret < 0)
2296 return ret;
2297 if (ret == 0)
2298 return -EINVAL;
2299
2300 if (debug)
2301 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2302
2303 total -= ret;
2304 offset += ret;
2305
2306 ret = write(fd, rawdatabl.c_str(), ret);
2307 if (ret == -1) {
2308 perror("write");
2309 return -errno;
2310 }
2311 }
2312
2313 return 0;
2314}
2315
2316int do_set_bytes(ObjectStore *store, coll_t coll,
11fdf7f2 2317 ghobject_t &ghobj, int fd)
7c673cae
FG
2318{
2319 ObjectStore::Transaction tran;
2320 ObjectStore::Transaction *t = &tran;
2321
2322 if (debug)
2323 cerr << "Write " << ghobj << std::endl;
2324
2325 if (!dry_run) {
2326 t->touch(coll, ghobj);
2327 t->truncate(coll, ghobj, 0);
2328 }
2329
2330 uint64_t offset = 0;
2331 bufferlist rawdatabl;
2332 do {
2333 rawdatabl.clear();
2334 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2335 if (bytes < 0) {
2336 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2337 return bytes;
2338 }
2339
2340 if (bytes == 0)
2341 break;
2342
2343 if (debug)
2344 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2345 if (!dry_run)
2346 t->write(coll, ghobj, offset, bytes, rawdatabl);
2347
2348 offset += bytes;
11fdf7f2 2349 // XXX: Should we queue_transaction() every once in a while for very large files
7c673cae
FG
2350 } while(true);
2351
11fdf7f2 2352 auto ch = store->open_collection(coll);
7c673cae 2353 if (!dry_run)
11fdf7f2 2354 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2355 return 0;
2356}
2357
2358int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2359{
11fdf7f2 2360 auto ch = store->open_collection(coll);
7c673cae
FG
2361 bufferptr bp;
2362
11fdf7f2 2363 int r = store->getattr(ch, ghobj, key.c_str(), bp);
7c673cae
FG
2364 if (r < 0) {
2365 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2366 return r;
2367 }
2368
2369 string value(bp.c_str(), bp.length());
2370 if (outistty) {
2371 value = cleanbin(value);
2372 value.push_back('\n');
2373 }
2374 cout << value;
2375
2376 return 0;
2377}
2378
2379int do_set_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2380 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2381{
2382 ObjectStore::Transaction tran;
2383 ObjectStore::Transaction *t = &tran;
2384 bufferlist bl;
2385
2386 if (debug)
2387 cerr << "Setattr " << ghobj << std::endl;
2388
2389 int ret = get_fd_data(fd, bl);
2390 if (ret < 0)
2391 return ret;
2392
2393 if (dry_run)
2394 return 0;
2395
2396 t->touch(coll, ghobj);
2397
2398 t->setattr(coll, ghobj, key, bl);
2399
11fdf7f2
TL
2400 auto ch = store->open_collection(coll);
2401 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2402 return 0;
2403}
2404
2405int do_rm_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2406 ghobject_t &ghobj, string key)
7c673cae
FG
2407{
2408 ObjectStore::Transaction tran;
2409 ObjectStore::Transaction *t = &tran;
2410
2411 if (debug)
2412 cerr << "Rmattr " << ghobj << std::endl;
2413
2414 if (dry_run)
2415 return 0;
2416
2417 t->rmattr(coll, ghobj, key);
2418
11fdf7f2
TL
2419 auto ch = store->open_collection(coll);
2420 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2421 return 0;
2422}
2423
2424int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2425{
11fdf7f2 2426 auto ch = store->open_collection(coll);
7c673cae
FG
2427 set<string> keys;
2428 map<string, bufferlist> out;
2429
2430 keys.insert(key);
2431
11fdf7f2 2432 int r = store->omap_get_values(ch, ghobj, keys, &out);
7c673cae
FG
2433 if (r < 0) {
2434 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2435 return r;
2436 }
2437
2438 if (out.empty()) {
2439 cerr << "Key not found" << std::endl;
2440 return -ENOENT;
2441 }
2442
11fdf7f2 2443 ceph_assert(out.size() == 1);
7c673cae
FG
2444
2445 bufferlist bl = out.begin()->second;
2446 string value(bl.c_str(), bl.length());
2447 if (outistty) {
2448 value = cleanbin(value);
2449 value.push_back('\n');
2450 }
2451 cout << value;
2452
2453 return 0;
2454}
2455
2456int do_set_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2457 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2458{
2459 ObjectStore::Transaction tran;
2460 ObjectStore::Transaction *t = &tran;
2461 map<string, bufferlist> attrset;
2462 bufferlist valbl;
2463
2464 if (debug)
2465 cerr << "Set_omap " << ghobj << std::endl;
2466
2467 int ret = get_fd_data(fd, valbl);
2468 if (ret < 0)
2469 return ret;
2470
2471 attrset.insert(pair<string, bufferlist>(key, valbl));
2472
2473 if (dry_run)
2474 return 0;
2475
2476 t->touch(coll, ghobj);
2477
2478 t->omap_setkeys(coll, ghobj, attrset);
2479
11fdf7f2
TL
2480 auto ch = store->open_collection(coll);
2481 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2482 return 0;
2483}
2484
2485int do_rm_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2486 ghobject_t &ghobj, string key)
7c673cae
FG
2487{
2488 ObjectStore::Transaction tran;
2489 ObjectStore::Transaction *t = &tran;
7c673cae
FG
2490
2491 if (debug)
2492 cerr << "Rm_omap " << ghobj << std::endl;
2493
2494 if (dry_run)
2495 return 0;
2496
9f95a23c 2497 t->omap_rmkey(coll, ghobj, key);
7c673cae 2498
11fdf7f2
TL
2499 auto ch = store->open_collection(coll);
2500 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2501 return 0;
2502}
2503
2504int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2505{
11fdf7f2 2506 auto ch = store->open_collection(coll);
7c673cae
FG
2507 bufferlist hdrbl;
2508
11fdf7f2 2509 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
7c673cae
FG
2510 if (r < 0) {
2511 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2512 return r;
2513 }
2514
2515 string header(hdrbl.c_str(), hdrbl.length());
2516 if (outistty) {
2517 header = cleanbin(header);
2518 header.push_back('\n');
2519 }
2520 cout << header;
2521
2522 return 0;
2523}
2524
2525int do_set_omaphdr(ObjectStore *store, coll_t coll,
11fdf7f2 2526 ghobject_t &ghobj, int fd)
7c673cae
FG
2527{
2528 ObjectStore::Transaction tran;
2529 ObjectStore::Transaction *t = &tran;
2530 bufferlist hdrbl;
2531
2532 if (debug)
2533 cerr << "Omap_setheader " << ghobj << std::endl;
2534
2535 int ret = get_fd_data(fd, hdrbl);
2536 if (ret)
2537 return ret;
2538
2539 if (dry_run)
2540 return 0;
2541
2542 t->touch(coll, ghobj);
2543
2544 t->omap_setheader(coll, ghobj, hdrbl);
2545
11fdf7f2
TL
2546 auto ch = store->open_collection(coll);
2547 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2548 return 0;
2549}
2550
2551struct do_fix_lost : public action_on_object_t {
11fdf7f2 2552 void call(ObjectStore *store, coll_t coll,
7c673cae
FG
2553 ghobject_t &ghobj, object_info_t &oi) override {
2554 if (oi.is_lost()) {
2555 cout << coll << "/" << ghobj << " is lost";
2556 if (!dry_run)
2557 cout << ", fixing";
2558 cout << std::endl;
2559 if (dry_run)
11fdf7f2 2560 return;
7c673cae
FG
2561 oi.clear_flag(object_info_t::FLAG_LOST);
2562 bufferlist bl;
11fdf7f2 2563 encode(oi, bl, -1); /* fixme: using full features */
7c673cae
FG
2564 ObjectStore::Transaction t;
2565 t.setattr(coll, ghobj, OI_ATTR, bl);
11fdf7f2
TL
2566 auto ch = store->open_collection(coll);
2567 store->queue_transaction(ch, std::move(t));
7c673cae 2568 }
11fdf7f2 2569 return;
7c673cae
FG
2570 }
2571};
2572
2573int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2574{
11fdf7f2 2575 auto ch = store->open_collection(coll);
7c673cae 2576 bufferlist attr;
11fdf7f2 2577 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
7c673cae
FG
2578 if (r < 0) {
2579 if (!silent)
2580 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2581 << cpp_strerror(r) << std::endl;
2582 return r;
2583 }
11fdf7f2 2584 auto bp = attr.cbegin();
7c673cae 2585 try {
11fdf7f2 2586 decode(ss, bp);
7c673cae
FG
2587 } catch (...) {
2588 r = -EINVAL;
2589 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2590 << cpp_strerror(r) << std::endl;
2591 return r;
2592 }
2593 return 0;
2594}
2595
2596int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2597{
11fdf7f2 2598 auto ch = store->open_collection(coll);
7c673cae
FG
2599 int r = 0;
2600 formatter->open_object_section("obj");
2601 formatter->open_object_section("id");
2602 ghobj.dump(formatter);
2603 formatter->close_section();
2604
2605 bufferlist attr;
11fdf7f2 2606 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2607 if (gr < 0) {
2608 r = gr;
2609 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2610 << cpp_strerror(r) << std::endl;
2611 } else {
2612 object_info_t oi;
11fdf7f2 2613 auto bp = attr.cbegin();
7c673cae 2614 try {
11fdf7f2 2615 decode(oi, bp);
7c673cae
FG
2616 formatter->open_object_section("info");
2617 oi.dump(formatter);
2618 formatter->close_section();
2619 } catch (...) {
2620 r = -EINVAL;
2621 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2622 << cpp_strerror(r) << std::endl;
2623 }
2624 }
2625 struct stat st;
11fdf7f2 2626 int sr = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2627 if (sr < 0) {
2628 r = sr;
2629 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2630 << cpp_strerror(r) << std::endl;
2631 } else {
2632 formatter->open_object_section("stat");
2633 formatter->dump_int("size", st.st_size);
2634 formatter->dump_int("blksize", st.st_blksize);
2635 formatter->dump_int("blocks", st.st_blocks);
2636 formatter->dump_int("nlink", st.st_nlink);
2637 formatter->close_section();
2638 }
2639
2640 if (ghobj.hobj.has_snapset()) {
2641 SnapSet ss;
2642 int snr = get_snapset(store, coll, ghobj, ss);
2643 if (snr < 0) {
2644 r = snr;
2645 } else {
2646 formatter->open_object_section("SnapSet");
2647 ss.dump(formatter);
2648 formatter->close_section();
2649 }
2650 }
a8e16298 2651 bufferlist hattr;
11fdf7f2 2652 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
a8e16298
TL
2653 if (gr == 0) {
2654 ECUtil::HashInfo hinfo;
11fdf7f2 2655 auto hp = hattr.cbegin();
a8e16298
TL
2656 try {
2657 decode(hinfo, hp);
2658 formatter->open_object_section("hinfo");
2659 hinfo.dump(formatter);
2660 formatter->close_section();
2661 } catch (...) {
2662 r = -EINVAL;
2663 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2664 << cpp_strerror(r) << std::endl;
2665 }
2666 }
9f95a23c
TL
2667 gr = store->dump_onode(ch, ghobj, "onode", formatter);
2668
7c673cae
FG
2669 formatter->close_section();
2670 formatter->flush(cout);
2671 cout << std::endl;
2672 return r;
2673}
2674
11fdf7f2 2675int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
1adf2230 2676{
11fdf7f2 2677 auto ch = store->open_collection(coll);
1adf2230 2678 bufferlist attr;
11fdf7f2 2679 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
1adf2230
AA
2680 if (r < 0) {
2681 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2682 << cpp_strerror(r) << std::endl;
2683 return r;
2684 }
2685 object_info_t oi;
11fdf7f2 2686 auto bp = attr.cbegin();
1adf2230 2687 try {
11fdf7f2 2688 decode(oi, bp);
1adf2230
AA
2689 } catch (...) {
2690 r = -EINVAL;
2691 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2692 << cpp_strerror(r) << std::endl;
2693 return r;
2694 }
1adf2230
AA
2695 if (!dry_run) {
2696 attr.clear();
2697 oi.alloc_hint_flags += 0xff;
2698 ObjectStore::Transaction t;
11fdf7f2 2699 encode(oi, attr, -1); /* fixme: using full features */
1adf2230 2700 t.setattr(coll, ghobj, OI_ATTR, attr);
11fdf7f2
TL
2701 auto ch = store->open_collection(coll);
2702 r = store->queue_transaction(ch, std::move(t));
1adf2230
AA
2703 if (r < 0) {
2704 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2705 << cpp_strerror(r) << std::endl;
2706 return r;
2707 }
2708 }
2709 return 0;
2710}
2711
11fdf7f2
TL
2712int set_size(
2713 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2714 bool corrupt)
7c673cae 2715{
11fdf7f2 2716 auto ch = store->open_collection(coll);
7c673cae
FG
2717 if (ghobj.hobj.is_snapdir()) {
2718 cerr << "Can't set the size of a snapdir" << std::endl;
2719 return -EINVAL;
2720 }
2721 bufferlist attr;
11fdf7f2 2722 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2723 if (r < 0) {
2724 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2725 << cpp_strerror(r) << std::endl;
2726 return r;
2727 }
2728 object_info_t oi;
11fdf7f2 2729 auto bp = attr.cbegin();
7c673cae 2730 try {
11fdf7f2 2731 decode(oi, bp);
7c673cae
FG
2732 } catch (...) {
2733 r = -EINVAL;
2734 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2735 << cpp_strerror(r) << std::endl;
2736 return r;
2737 }
2738 struct stat st;
11fdf7f2 2739 r = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2740 if (r < 0) {
2741 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2742 << cpp_strerror(r) << std::endl;
2743 }
2744 ghobject_t head(ghobj);
2745 SnapSet ss;
2746 bool found_head = true;
2747 map<snapid_t, uint64_t>::iterator csi;
2748 bool is_snap = ghobj.hobj.is_snap();
2749 if (is_snap) {
2750 head.hobj = head.hobj.get_head();
2751 r = get_snapset(store, coll, head, ss, true);
2752 if (r < 0 && r != -ENOENT) {
2753 // Requested get_snapset() silent, so if not -ENOENT show error
2754 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2755 << cpp_strerror(r) << std::endl;
2756 return r;
2757 }
2758 if (r == -ENOENT) {
2759 head.hobj = head.hobj.get_snapdir();
2760 r = get_snapset(store, coll, head, ss);
2761 if (r < 0)
2762 return r;
2763 found_head = false;
2764 } else {
2765 found_head = true;
2766 }
2767 csi = ss.clone_size.find(ghobj.hobj.snap);
2768 if (csi == ss.clone_size.end()) {
2769 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2770 return -EINVAL;
2771 }
2772 }
2773 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2774 && (!is_snap || csi->second == setsize)) {
2775 cout << "Size of object is already " << setsize << std::endl;
2776 return 0;
2777 }
2778 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2779 << ", obj info size " << oi.size;
2780 if (is_snap) {
2781 cout << ", " << (found_head ? "head" : "snapdir")
2782 << " clone_size " << csi->second;
2783 csi->second = setsize;
2784 }
2785 cout << std::endl;
2786 if (!dry_run) {
2787 attr.clear();
2788 oi.size = setsize;
7c673cae 2789 ObjectStore::Transaction t;
b5b8bbf5 2790 // Only modify object info if we want to corrupt it
b32b8144 2791 if (!corrupt && (uint64_t)st.st_size != setsize) {
b5b8bbf5 2792 t.truncate(coll, ghobj, setsize);
b32b8144
FG
2793 // Changing objectstore size will invalidate data_digest, so clear it.
2794 oi.clear_data_digest();
2795 }
11fdf7f2 2796 encode(oi, attr, -1); /* fixme: using full features */
b32b8144 2797 t.setattr(coll, ghobj, OI_ATTR, attr);
7c673cae
FG
2798 if (is_snap) {
2799 bufferlist snapattr;
2800 snapattr.clear();
11fdf7f2 2801 encode(ss, snapattr);
7c673cae
FG
2802 t.setattr(coll, head, SS_ATTR, snapattr);
2803 }
11fdf7f2
TL
2804 auto ch = store->open_collection(coll);
2805 r = store->queue_transaction(ch, std::move(t));
2806 if (r < 0) {
2807 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2808 << cpp_strerror(r) << std::endl;
2809 return r;
2810 }
2811 }
2812 return 0;
2813}
2814
2815int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2816 auto ch = store->open_collection(coll);
2817 bufferlist attr;
2818 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2819 if (r < 0) {
2820 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2821 << cpp_strerror(r) << std::endl;
2822 return r;
2823 }
2824 object_info_t oi;
2825 auto bp = attr.cbegin();
2826 try {
2827 decode(oi, bp);
2828 } catch (...) {
2829 r = -EINVAL;
2830 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2831 << cpp_strerror(r) << std::endl;
2832 return r;
2833 }
2834 if (!dry_run) {
2835 attr.clear();
2836 oi.clear_data_digest();
2837 encode(oi, attr, -1); /* fixme: using full features */
2838 ObjectStore::Transaction t;
2839 t.setattr(coll, ghobj, OI_ATTR, attr);
2840 auto ch = store->open_collection(coll);
2841 r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2842 if (r < 0) {
2843 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2844 << cpp_strerror(r) << std::endl;
2845 return r;
2846 }
2847 }
2848 return 0;
2849}
2850
2851int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
11fdf7f2 2852 string arg)
7c673cae
FG
2853{
2854 SnapSet ss;
2855 int ret = get_snapset(store, coll, ghobj, ss);
2856 if (ret < 0)
2857 return ret;
2858
7c673cae
FG
2859 // Use "corrupt" to clear entire SnapSet
2860 // Use "seq" to just corrupt SnapSet.seq
2861 if (arg == "corrupt" || arg == "seq")
2862 ss.seq = 0;
9f95a23c 2863 // Use "snaps" to just clear SnapSet.clone_snaps
7c673cae 2864 if (arg == "corrupt" || arg == "snaps")
9f95a23c 2865 ss.clone_snaps.clear();
7c673cae
FG
2866 // By default just clear clone, clone_overlap and clone_size
2867 if (arg == "corrupt")
2868 arg = "";
2869 if (arg == "" || arg == "clones")
2870 ss.clones.clear();
2871 if (arg == "" || arg == "clone_overlap")
2872 ss.clone_overlap.clear();
2873 if (arg == "" || arg == "clone_size")
2874 ss.clone_size.clear();
2875 // Break all clone sizes by adding 1
2876 if (arg == "size") {
2877 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2878 i != ss.clone_size.end(); ++i)
2879 ++(i->second);
2880 }
2881
2882 if (!dry_run) {
2883 bufferlist bl;
11fdf7f2 2884 encode(ss, bl);
7c673cae
FG
2885 ObjectStore::Transaction t;
2886 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2887 auto ch = store->open_collection(coll);
2888 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2889 if (r < 0) {
2890 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2891 << cpp_strerror(r) << std::endl;
2892 return r;
2893 }
2894 }
2895 return 0;
2896}
2897
2898vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2899{
2900 return std::find(v.begin(), v.end(), clid);
2901}
2902
2903map<snapid_t, interval_set<uint64_t> >::iterator
2904find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2905{
2906 return m.find(clid);
2907}
2908
2909map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2910 snapid_t clid)
2911{
2912 return m.find(clid);
2913}
2914
2915template<class T>
2916int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2917{
2918 typename T::iterator i = find(mv, cloneid);
2919 if (i != mv.end()) {
2920 mv.erase(i);
2921 } else {
2922 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2923 if (force) {
2924 cerr << " (ignored)" << std::endl;
2925 return 0;
2926 }
2927 cerr << std::endl;
2928 return -EINVAL;
2929 }
2930 return 0;
2931}
2932
11fdf7f2
TL
2933int remove_clone(
2934 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
7c673cae
FG
2935{
2936 // XXX: Don't allow this if in a cache tier or former cache tier
2937 // bool allow_incomplete_clones() const {
2938 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2939
2940 SnapSet snapset;
2941 int ret = get_snapset(store, coll, ghobj, snapset);
2942 if (ret < 0)
2943 return ret;
2944
2945 // Derived from trim_object()
2946 // ...from snapset
2947 vector<snapid_t>::iterator p;
2948 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2949 if (*p == cloneid)
2950 break;
2951 if (p == snapset.clones.end()) {
2952 cerr << "Clone " << cloneid << " not present";
2953 return -ENOENT;
2954 }
2955 if (p != snapset.clones.begin()) {
2956 // not the oldest... merge overlap into next older clone
2957 vector<snapid_t>::iterator n = p - 1;
2958 hobject_t prev_coid = ghobj.hobj;
2959 prev_coid.snap = *n;
2960 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2961
2962 //if (adjust_prev_bytes)
2963 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2964
2965 snapset.clone_overlap[*n].intersection_of(
2966 snapset.clone_overlap[*p]);
2967
2968 //if (adjust_prev_bytes)
2969 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2970 }
2971
2972 ret = remove_from(snapset.clones, "clones", cloneid, force);
2973 if (ret) return ret;
2974 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2975 if (ret) return ret;
2976 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2977 if (ret) return ret;
2978
2979 if (dry_run)
2980 return 0;
2981
2982 bufferlist bl;
11fdf7f2 2983 encode(snapset, bl);
7c673cae
FG
2984 ObjectStore::Transaction t;
2985 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2986 auto ch = store->open_collection(coll);
2987 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2988 if (r < 0) {
2989 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2990 << cpp_strerror(r) << std::endl;
2991 return r;
2992 }
2993 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2994 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2995 return 0;
2996}
2997
2998int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2999{
3000 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
3001 << " to " << dst->get_type() << ": " << dstpath
3002 << std::endl;
7c673cae
FG
3003 int num, i;
3004 vector<coll_t> collections;
3005 int r;
3006
3007 r = src->mount();
3008 if (r < 0) {
3009 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
3010 return r;
3011 }
3012 r = dst->mount();
3013 if (r < 0) {
3014 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
3015 goto out_src;
3016 }
3017
3018 if (src->get_fsid() != dst->get_fsid()) {
3019 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
3020 << std::endl;
3021 goto out;
3022 }
3023 cout << "fsid " << src->get_fsid() << std::endl;
3024
3025 // make sure dst is empty
3026 r = dst->list_collections(collections);
3027 if (r < 0) {
3028 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
3029 goto out;
3030 }
3031 if (!collections.empty()) {
3032 cerr << "destination store is not empty" << std::endl;
3033 goto out;
3034 }
3035
3036 r = src->list_collections(collections);
3037 if (r < 0) {
3038 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
3039 goto out;
3040 }
3041
3042 num = collections.size();
3043 cout << num << " collections" << std::endl;
3044 i = 1;
3045 for (auto cid : collections) {
3046 cout << i++ << "/" << num << " " << cid << std::endl;
11fdf7f2
TL
3047 auto ch = src->open_collection(cid);
3048 auto dch = dst->create_new_collection(cid);
7c673cae
FG
3049 {
3050 ObjectStore::Transaction t;
11fdf7f2 3051 int bits = src->collection_bits(ch);
7c673cae 3052 if (bits < 0) {
181888fb
FG
3053 if (src->get_type() == "filestore" && cid.is_meta()) {
3054 bits = 0;
3055 } else {
3056 cerr << "cannot get bit count for collection " << cid << ": "
3057 << cpp_strerror(bits) << std::endl;
3058 goto out;
3059 }
7c673cae
FG
3060 }
3061 t.create_collection(cid, bits);
11fdf7f2 3062 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
3063 }
3064
3065 ghobject_t pos;
3066 uint64_t n = 0;
3067 uint64_t bytes = 0, keys = 0;
3068 while (true) {
3069 vector<ghobject_t> ls;
11fdf7f2 3070 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
7c673cae
FG
3071 if (r < 0) {
3072 cerr << "collection_list on " << cid << " from " << pos << " got: "
3073 << cpp_strerror(r) << std::endl;
3074 goto out;
3075 }
3076 if (ls.empty()) {
3077 break;
3078 }
3079
3080 for (auto& oid : ls) {
3081 //cout << " " << cid << " " << oid << std::endl;
3082 if (n % 100 == 0) {
3083 cout << " " << std::setw(16) << n << " objects, "
3084 << std::setw(16) << bytes << " bytes, "
3085 << std::setw(16) << keys << " keys"
3086 << std::setw(1) << "\r" << std::flush;
3087 }
3088 n++;
3089
3090 ObjectStore::Transaction t;
3091 t.touch(cid, oid);
3092
20effc67 3093 map<string,bufferptr,less<>> attrs;
11fdf7f2 3094 src->getattrs(ch, oid, attrs);
7c673cae
FG
3095 if (!attrs.empty()) {
3096 t.setattrs(cid, oid, attrs);
3097 }
3098
3099 bufferlist bl;
11fdf7f2 3100 src->read(ch, oid, 0, 0, bl);
7c673cae
FG
3101 if (bl.length()) {
3102 t.write(cid, oid, 0, bl.length(), bl);
3103 bytes += bl.length();
3104 }
3105
3106 bufferlist header;
3107 map<string,bufferlist> omap;
11fdf7f2 3108 src->omap_get(ch, oid, &header, &omap);
7c673cae
FG
3109 if (header.length()) {
3110 t.omap_setheader(cid, oid, header);
3111 ++keys;
3112 }
3113 if (!omap.empty()) {
3114 keys += omap.size();
3115 t.omap_setkeys(cid, oid, omap);
3116 }
3117
11fdf7f2 3118 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
3119 }
3120 }
3121 cout << " " << std::setw(16) << n << " objects, "
3122 << std::setw(16) << bytes << " bytes, "
3123 << std::setw(16) << keys << " keys"
3124 << std::setw(1) << std::endl;
3125 }
3126
3127 // keyring
3128 cout << "keyring" << std::endl;
3129 {
3130 bufferlist bl;
3131 string s = srcpath + "/keyring";
3132 string err;
3133 r = bl.read_file(s.c_str(), &err);
3134 if (r < 0) {
3135 cerr << "failed to copy " << s << ": " << err << std::endl;
3136 } else {
3137 string d = dstpath + "/keyring";
3138 bl.write_file(d.c_str(), 0600);
3139 }
3140 }
3141
3142 // osd metadata
3143 cout << "duping osd metadata" << std::endl;
3144 {
3145 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
3146 string val;
3147 src->read_meta(k, &val);
3148 dst->write_meta(k, val);
3149 }
3150 }
3151
3152 dst->write_meta("ready", "ready");
3153
3154 cout << "done." << std::endl;
3155 r = 0;
3156 out:
3157 dst->umount();
3158 out_src:
3159 src->umount();
3160 return r;
3161}
3162
2a845540
TL
3163
3164const int ceph_entity_name_type(const string name)
3165{
3166 if (name == "mds") return CEPH_ENTITY_TYPE_MDS;
3167 if (name == "osd") return CEPH_ENTITY_TYPE_OSD;
3168 if (name == "mon") return CEPH_ENTITY_TYPE_MON;
3169 if (name == "client") return CEPH_ENTITY_TYPE_CLIENT;
3170 if (name == "mgr") return CEPH_ENTITY_TYPE_MGR;
3171 if (name == "auth") return CEPH_ENTITY_TYPE_AUTH;
3172 return -1;
3173}
3174
3175eversion_t get_eversion_from_str(const string& s) {
3176 eversion_t e;
3177 vector<string> result;
3178 boost::split(result, s, boost::is_any_of("'"));
3179 if (result.size() != 2) {
3180 cerr << "eversion_t: invalid format: '" << s << "'" << std::endl;
3181 return e;
3182 }
3183 e.epoch = atoi(result[0].c_str());
3184 e.version = atoi(result[1].c_str());
3185 return e;
3186}
3187
3188osd_reqid_t get_reqid_from_str(const string& s) {
3189 osd_reqid_t reqid;
3190
3191 vector<string> result;
3192 boost::split(result, s, boost::is_any_of(".:"));
3193 if (result.size() != 4) {
3194 cerr << "reqid: invalid format " << s << std::endl;
3195 return osd_reqid_t();
3196 }
3197 reqid.name._type = ceph_entity_name_type(result[0]);
3198 reqid.name._num = atoi(result[1].c_str());
3199
3200 reqid.inc = atoi(result[2].c_str());
3201 reqid.tid = atoi(result[3].c_str());
3202 return reqid;
3203}
3204
3205void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map<string,bufferlist> *new_dups)
3206{
3207 ObjectStore::Transaction t;
3208 coll_t coll(r_pgid);
3209 cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl;
3210 t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups));
3211 auto ch = store->open_collection(coll);
3212 store->queue_transaction(ch, std::move(t));
3213 new_dups->clear();
3214}
3215
3216int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj,
3217 map<string,bufferlist> *new_dups, bool debug) {
3218 std::map<std::string, json_spirit::mValue>::const_iterator it = in_json_obj.find("generate");
3219 int32_t generate = 0;
3220 if (it != in_json_obj.end()) {
3221 generate = atoi(it->second.get_str().c_str());
3222 }
3223
3224 it = in_json_obj.find("reqid");
3225 if (it == in_json_obj.end()) {
3226 return 1;
3227 }
3228 osd_reqid_t reqid(get_reqid_from_str(it->second.get_str()));
3229 it = in_json_obj.find("version");
3230 if (it == in_json_obj.end()) {
3231 return 1;
3232 }
3233 eversion_t version(get_eversion_from_str(it->second.get_str()));
3234 it = in_json_obj.find("user_version");
3235 if (it == in_json_obj.end()) {
3236 return 1;
3237 }
3238 version_t user_version = atoi(it->second.get_str().c_str());
3239 it = in_json_obj.find("return_code");
3240 if (it == in_json_obj.end()) {
3241 return 1;
3242 }
3243 int32_t return_code = atoi(it->second.get_str().c_str());
3244 if (generate) {
3245 for(auto i = 0; i < generate; ++i) {
3246 version.version++;
3247 if (debug) {
3248 cout << "generate dups reqid " << reqid << " v=" << version << std::endl;
3249 }
3250 pg_log_dup_t tmp(version, user_version, reqid, return_code);
3251 bufferlist bl;
3252 encode(tmp, bl);
3253 (*new_dups)[tmp.get_key_name()] = std::move(bl);
3254 if ( new_dups->size() > 50000 ) {
3255 do_dups_inject_transction(store, r_pgid, new_dups);
3256 cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl;
3257 }
3258 }
3259 return 0;
3260 } else {
3261 pg_log_dup_t tmp(version, user_version, reqid, return_code);
3262 if (debug) {
3263 cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl;
3264 }
3265 bufferlist bl;
3266 encode(tmp, bl);
3267 (*new_dups)[tmp.get_key_name()] = std::move(bl);
3268 }
3269 return 0;
3270}
3271
3272void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug)
3273{
3274 map<string,bufferlist> new_dups;
3275 const vector<json_spirit::mValue>& o = inJson.get_array();
3276 for (const auto& obj : o) {
3277 if (obj.type() == json_spirit::obj_type) {
3278 json_spirit::mObject Mobj = obj.get_obj();
3279 do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug);
3280 } else {
3281 throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type()));
3282 return;
3283 }
3284 }
3285 if (new_dups.size() > 0) {
3286 do_dups_inject_transction(store, r_pgid, &new_dups);
3287 }
3288
3289
3290 return ;
3291}
3292
7c673cae
FG
3293void usage(po::options_description &desc)
3294{
3295 cerr << std::endl;
3296 cerr << desc << std::endl;
3297 cerr << std::endl;
3298 cerr << "Positional syntax:" << std::endl;
3299 cerr << std::endl;
3300 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
3301 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
3302 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
3303 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
3304 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
3305 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
3306 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
3307 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
3308 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
3309 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
11fdf7f2 3310 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
7c673cae
FG
3311 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
3312 cerr << std::endl;
3313 cerr << "<object> can be a JSON object description as displayed" << std::endl;
3314 cerr << "by --op list." << std::endl;
3315 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
3316 cerr << "the OSD's PGs." << std::endl;
3317 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
3318 cerr << "specifies the pgmeta object" << std::endl;
3319 cerr << std::endl;
3320 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
3321 cerr << "if not specified or if '-' specified." << std::endl;
3322}
3323
3324bool ends_with(const string& check, const string& ending)
3325{
3326 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
3327}
3328
7c673cae
FG
3329int main(int argc, char **argv)
3330{
3331 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3332 string target_data_path, fsid;
11fdf7f2 3333 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
7c673cae
FG
3334 boost::optional<std::string> nspace;
3335 spg_t pgid;
3336 unsigned epoch = 0;
9f95a23c 3337 unsigned slow_threshold = 16;
7c673cae
FG
3338 ghobject_t ghobj;
3339 bool human_readable;
7c673cae 3340 Formatter *formatter;
9f95a23c 3341 bool head, tty;
7c673cae
FG
3342
3343 po::options_description desc("Allowed options");
3344 desc.add_options()
3345 ("help", "produce help message")
3346 ("type", po::value<string>(&type),
1e59de90 3347 "Arg is one of [bluestore (default), memstore]")
7c673cae
FG
3348 ("data-path", po::value<string>(&dpath),
3349 "path to object store, mandatory")
3350 ("journal-path", po::value<string>(&jpath),
3351 "path to journal, use if tool can't find it")
3352 ("pgid", po::value<string>(&pgidstr),
1e59de90 3353 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups")
7c673cae 3354 ("pool", po::value<string>(&pool),
1e59de90 3355 "Pool name")
7c673cae 3356 ("op", po::value<string>(&op),
1e59de90
TL
3357 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-super, meta-list, "
3358 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]")
7c673cae
FG
3359 ("epoch", po::value<unsigned>(&epoch),
3360 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3361 ("file", po::value<string>(&file),
3efd9988 3362 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
7c673cae
FG
3363 ("mon-store-path", po::value<string>(&mon_store_path),
3364 "path of monstore to update-mon-db")
3365 ("fsid", po::value<string>(&fsid),
3366 "fsid for new store created by mkfs")
3367 ("target-data-path", po::value<string>(&target_data_path),
3368 "path of target object store (for --op dup)")
3369 ("mountpoint", po::value<string>(&mountpoint),
3370 "fuse mountpoint")
3371 ("format", po::value<string>(&format)->default_value("json-pretty"),
3372 "Output format which may be json, json-pretty, xml, xml-pretty")
3373 ("debug", "Enable diagnostic output to stderr")
9f95a23c
TL
3374 ("no-mon-config", "Do not contact mons for config")
3375 ("no-superblock", "Do not read superblock")
7c673cae
FG
3376 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3377 ("skip-journal-replay", "Disable journal replay")
3378 ("skip-mount-omap", "Disable mounting of omap")
3379 ("head", "Find head/snapdir when searching for objects by name")
3380 ("dry-run", "Don't modify the objectstore")
9f95a23c 3381 ("tty", "Treat stdout as a tty (no binary data)")
7c673cae 3382 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
11fdf7f2 3383 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
9f95a23c
TL
3384 ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
3385 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
7c673cae
FG
3386 ;
3387
3388 po::options_description positional("Positional options");
3389 positional.add_options()
3390 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3391 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
91327a77 3392 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
7c673cae 3393 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
7c673cae
FG
3394 ;
3395
b32b8144 3396 po::options_description all;
7c673cae
FG
3397 all.add(desc).add(positional);
3398
3399 po::positional_options_description pd;
3400 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3401
3402 vector<string> ceph_option_strings;
11fdf7f2 3403
7c673cae
FG
3404 po::variables_map vm;
3405 try {
3406 po::parsed_options parsed =
3407 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3408 po::store( parsed, vm);
3409 po::notify(vm);
3410 ceph_option_strings = po::collect_unrecognized(parsed.options,
3411 po::include_positional);
3412 } catch(po::error &e) {
3413 std::cerr << e.what() << std::endl;
3414 return 1;
3415 }
3416
3417 if (vm.count("help")) {
b32b8144 3418 usage(desc);
7c673cae
FG
3419 return 1;
3420 }
3421
11fdf7f2
TL
3422 // Compatibility with previous option name
3423 if (op == "dump-import")
3424 op = "dump-export";
3425
3efd9988 3426 debug = (vm.count("debug") > 0);
7c673cae 3427
3efd9988 3428 force = (vm.count("force") > 0);
7c673cae 3429
9f95a23c
TL
3430 no_superblock = (vm.count("no-superblock") > 0);
3431
7c673cae
FG
3432 if (vm.count("namespace"))
3433 nspace = argnspace;
3434
3efd9988 3435 dry_run = (vm.count("dry-run") > 0);
9f95a23c 3436 tty = (vm.count("tty") > 0);
3efd9988 3437
7c673cae
FG
3438 osflagbits_t flags = 0;
3439 if (dry_run || vm.count("skip-journal-replay"))
3440 flags |= SKIP_JOURNAL_REPLAY;
3441 if (vm.count("skip-mount-omap"))
3442 flags |= SKIP_MOUNT_OMAP;
3443 if (op == "update-mon-db")
3444 flags |= SKIP_JOURNAL_REPLAY;
3efd9988 3445
7c673cae
FG
3446 head = (vm.count("head") > 0);
3447
11fdf7f2
TL
3448 // infer osd id so we can authenticate
3449 char fn[PATH_MAX];
3450 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3451 int fd = ::open(fn, O_RDONLY);
3452 if (fd >= 0) {
3453 bufferlist bl;
3454 bl.read_fd(fd, 64);
3455 string s(bl.c_str(), bl.length());
3456 int whoami = atoi(s.c_str());
3457 vector<string> tmp;
3458 // identify ourselves as this osd so we can auth and fetch our configs
3459 tmp.push_back("-n");
3460 tmp.push_back(string("osd.") + stringify(whoami));
3461 // populate osd_data so that the default keyring location works
3462 tmp.push_back("--osd-data");
3463 tmp.push_back(dpath);
3464 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3465 ceph_option_strings.end());
3466 tmp.swap(ceph_option_strings);
3467 }
3468
7c673cae 3469 vector<const char *> ceph_options;
7c673cae
FG
3470 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3471 for (vector<string>::iterator i = ceph_option_strings.begin();
3472 i != ceph_option_strings.end();
3473 ++i) {
3474 ceph_options.push_back(i->c_str());
3475 }
3476
7c673cae 3477 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
11fdf7f2 3478 fd = ::open(fn, O_RDONLY);
7c673cae
FG
3479 if (fd >= 0) {
3480 bufferlist bl;
3481 bl.read_fd(fd, 64);
3482 if (bl.length()) {
3483 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3484 if (vm.count("type") && dp_type != "" && type != dp_type)
3485 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3486 << dp_type << "\"" << std::endl;
3487 type = dp_type;
3488 //cout << "object store type is " << type << std::endl;
3489 }
3490 ::close(fd);
3491 }
3492 if (!vm.count("type") && type == "") {
11fdf7f2 3493 type = "bluestore";
7c673cae
FG
3494 }
3495 if (!vm.count("data-path") &&
1e59de90 3496 op != "dump-export") {
7c673cae
FG
3497 cerr << "Must provide --data-path" << std::endl;
3498 usage(desc);
3499 return 1;
3500 }
7c673cae
FG
3501 if (!vm.count("op") && !vm.count("object")) {
3502 cerr << "Must provide --op or object command..." << std::endl;
3503 usage(desc);
3504 return 1;
3505 }
7c673cae
FG
3506 if (op == "fuse" && mountpoint.length() == 0) {
3507 cerr << "Missing fuse mountpoint" << std::endl;
3508 usage(desc);
3509 return 1;
3510 }
9f95a23c 3511 outistty = isatty(STDOUT_FILENO) || tty;
7c673cae
FG
3512
3513 file_fd = fd_none;
3efd9988 3514 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
7c673cae
FG
3515 if (!vm.count("file") || file == "-") {
3516 if (outistty) {
3517 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3518 return 1;
3519 }
3520 file_fd = STDOUT_FILENO;
3521 } else {
3522 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3523 }
2a845540 3524 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") {
7c673cae
FG
3525 if (!vm.count("file") || file == "-") {
3526 if (isatty(STDIN_FILENO)) {
3527 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3528 return 1;
3529 }
3530 file_fd = STDIN_FILENO;
3531 } else {
3532 file_fd = open(file.c_str(), O_RDONLY);
3533 }
3534 }
3535
3536 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3537
3538 if (vm.count("file") && file_fd == fd_none && !dry_run) {
11fdf7f2 3539 cerr << "--file option only applies to import, dump-export, export, export-remove, "
7c673cae
FG
3540 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3541 return 1;
3542 }
3543
3544 if (file_fd != fd_none && file_fd < 0) {
3545 string err = string("file: ") + file;
3546 perror(err.c_str());
3547 return 1;
3548 }
9f95a23c
TL
3549 int init_flags = 0;
3550 if (vm.count("no-mon-config") > 0) {
3551 init_flags |= CINIT_FLAG_NO_MON_CONFIG;
3552 }
7c673cae
FG
3553
3554 auto cct = global_init(
11fdf7f2
TL
3555 NULL, ceph_options,
3556 CEPH_ENTITY_TYPE_OSD,
3557 CODE_ENVIRONMENT_UTILITY_NODOUT,
9f95a23c 3558 init_flags);
7c673cae 3559 common_init_finish(g_ceph_context);
7c673cae 3560 if (debug) {
11fdf7f2
TL
3561 g_conf().set_val_or_die("log_to_stderr", "true");
3562 g_conf().set_val_or_die("err_to_stderr", "true");
7c673cae 3563 }
11fdf7f2 3564 g_conf().apply_changes(nullptr);
7c673cae
FG
3565
3566 // Special list handling. Treating pretty_format as human readable,
3567 // with one object per line and not an enclosing array.
3568 human_readable = ends_with(format, "-pretty");
3569 if ((op == "list" || op == "meta-list") && human_readable) {
3570 // Remove -pretty from end of format which we know is there
3571 format = format.substr(0, format.size() - strlen("-pretty"));
3572 }
3573
3574 formatter = Formatter::create(format);
3575 if (formatter == NULL) {
3576 cerr << "unrecognized format: " << format << std::endl;
3577 return 1;
3578 }
3579
11fdf7f2
TL
3580 if (op == "dump-export") {
3581 int ret = tool.dump_export(formatter);
b32b8144 3582 if (ret < 0) {
11fdf7f2 3583 cerr << "dump-export: "
b32b8144
FG
3584 << cpp_strerror(ret) << std::endl;
3585 return 1;
3586 }
3587 return 0;
3588 }
3589
7c673cae
FG
3590 //Verify that data-path really exists
3591 struct stat st;
3592 if (::stat(dpath.c_str(), &st) == -1) {
3593 string err = string("data-path: ") + dpath;
3594 perror(err.c_str());
3595 return 1;
3596 }
3597
20effc67 3598 if (pgidstr.length() && pgidstr != "meta" && !pgid.parse(pgidstr.c_str())) {
7c673cae
FG
3599 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3600 return 1;
3601 }
3602
20effc67
TL
3603 std::unique_ptr<ObjectStore> fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3604 if (!fs) {
7c673cae
FG
3605 cerr << "Unable to create store of type " << type << std::endl;
3606 return 1;
3607 }
3608
3609 if (op == "fsck" || op == "fsck-deep") {
3610 int r = fs->fsck(op == "fsck-deep");
3611 if (r < 0) {
3612 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3613 return 1;
3614 }
3615 if (r > 0) {
9f95a23c 3616 cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
7c673cae
FG
3617 return 1;
3618 }
9f95a23c 3619 cout << "fsck success" << std::endl;
7c673cae
FG
3620 return 0;
3621 }
3efd9988
FG
3622 if (op == "repair" || op == "repair-deep") {
3623 int r = fs->repair(op == "repair-deep");
3624 if (r < 0) {
3625 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3626 return 1;
3627 }
3628 if (r > 0) {
9f95a23c 3629 cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
3efd9988
FG
3630 return 1;
3631 }
9f95a23c 3632 cout << "repair success" << std::endl;
3efd9988
FG
3633 return 0;
3634 }
7c673cae
FG
3635 if (op == "mkfs") {
3636 if (fsid.length()) {
3637 uuid_d f;
3638 bool r = f.parse(fsid.c_str());
3639 if (!r) {
3640 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3641 return 1;
3642 }
3643 fs->set_fsid(f);
3644 }
3645 int r = fs->mkfs();
3646 if (r < 0) {
3efd9988 3647 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
7c673cae
FG
3648 return 1;
3649 }
3650 return 0;
3651 }
3652 if (op == "dup") {
3653 string target_type;
3654 char fn[PATH_MAX];
3655 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3656 int fd = ::open(fn, O_RDONLY);
3657 if (fd < 0) {
3658 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3659 exit(1);
3660 }
3661 bufferlist bl;
3662 bl.read_fd(fd, 64);
3663 if (bl.length()) {
3664 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3665 }
3666 ::close(fd);
20effc67 3667 unique_ptr<ObjectStore> targetfs = ObjectStore::create(
7c673cae
FG
3668 g_ceph_context, target_type,
3669 target_data_path, "", 0);
20effc67 3670 if (!targetfs) {
7c673cae
FG
3671 cerr << "Unable to open store of type " << target_type << std::endl;
3672 return 1;
3673 }
20effc67 3674 int r = dup(dpath, fs.get(), target_data_path, targetfs.get());
7c673cae
FG
3675 if (r < 0) {
3676 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3677 return 1;
3678 }
3679 return 0;
3680 }
3681
7c673cae
FG
3682 int ret = fs->mount();
3683 if (ret < 0) {
3684 if (ret == -EBUSY) {
3685 cerr << "OSD has the store locked" << std::endl;
3686 } else {
3687 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3688 }
3689 return 1;
3690 }
3691
3692 if (op == "fuse") {
3693#ifdef HAVE_LIBFUSE
20effc67 3694 FuseStore fuse(fs.get(), mountpoint);
7c673cae
FG
3695 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3696 int r = fuse.main();
20effc67 3697 fs->umount();
7c673cae
FG
3698 if (r < 0) {
3699 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3700 return 1;
3701 }
3702#else
3703 cerr << "fuse support not enabled" << std::endl;
3704#endif
3705 return 0;
3706 }
3707
3708 vector<coll_t> ls;
3709 vector<coll_t>::iterator it;
3710 CompatSet supported;
3711
3712#ifdef INTERNAL_TEST
3713 supported = get_test_compat_set();
3714#else
3715 supported = OSD::get_osd_compat_set();
3716#endif
3717
3718 bufferlist bl;
11fdf7f2 3719 auto ch = fs->open_collection(coll_t::meta());
9f95a23c
TL
3720 std::unique_ptr<OSDSuperblock> superblock;
3721 if (!no_superblock) {
3722 superblock.reset(new OSDSuperblock);
3723 bufferlist::const_iterator p;
3724 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
3725 if (ret < 0) {
3726 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3727 goto out;
3728 }
7c673cae 3729
9f95a23c
TL
3730 p = bl.cbegin();
3731 decode(*superblock, p);
7c673cae 3732
9f95a23c
TL
3733 if (debug) {
3734 cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
3735 }
7c673cae 3736
9f95a23c
TL
3737 if (debug) {
3738 cerr << "Supported features: " << supported << std::endl;
3739 cerr << "On-disk features: " << superblock->compat_features << std::endl;
3740 }
3741 if (supported.compare(superblock->compat_features) == -1) {
3742 CompatSet unsupported = supported.unsupported(superblock->compat_features);
3743 cerr << "On-disk OSD incompatible features set "
3744 << unsupported << std::endl;
3745 ret = -EINVAL;
3746 goto out;
3747 }
7c673cae
FG
3748 }
3749
7c673cae
FG
3750 if (op != "list" && vm.count("object")) {
3751 // Special case: Create pgmeta_oid if empty string specified
3752 // This can't conflict with any actual object names.
3753 if (object == "") {
3754 ghobj = pgid.make_pgmeta_oid();
3755 } else {
3756 json_spirit::Value v;
3757 try {
3758 if (!json_spirit::read(object, v) ||
3759 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3760 // Special: Need head/snapdir so set even if user didn't specify
3761 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3762 head = true;
3763 lookup_ghobject lookup(object, nspace, head);
20effc67
TL
3764 if (pgidstr == "meta")
3765 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t::meta(), lookup, debug);
3766 else if (pgidstr.length())
3767 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), lookup, debug);
3a9019d9 3768 else
20effc67 3769 ret = action_on_all_objects(fs.get(), lookup, debug);
3a9019d9 3770 if (ret) {
7c673cae
FG
3771 throw std::runtime_error("Internal error");
3772 } else {
3773 if (lookup.size() != 1) {
3774 stringstream ss;
3775 if (lookup.size() == 0)
3776 ss << "No object id '" << object << "' found or invalid JSON specified";
3777 else
3778 ss << "Found " << lookup.size() << " objects with id '" << object
3779 << "', please use a JSON spec from --op list instead";
3780 throw std::runtime_error(ss.str());
3781 }
3782 pair<coll_t, ghobject_t> found = lookup.pop();
3783 pgidstr = found.first.to_str();
3784 pgid.parse(pgidstr.c_str());
3785 ghobj = found.second;
3786 }
3787 } else {
3788 stringstream ss;
3789 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3790 ss << "Without --pgid the object '" << object
3791 << "' must be a JSON array";
3792 throw std::runtime_error(ss.str());
3793 }
3794 if (v.type() == json_spirit::array_type) {
3795 json_spirit::Array array = v.get_array();
3796 if (array.size() != 2) {
3797 ss << "Object '" << object
3798 << "' must be a JSON array with 2 elements";
3799 throw std::runtime_error(ss.str());
3800 }
3801 vector<json_spirit::Value>::iterator i = array.begin();
11fdf7f2 3802 ceph_assert(i != array.end());
7c673cae
FG
3803 if (i->type() != json_spirit::str_type) {
3804 ss << "Object '" << object
3805 << "' must be a JSON array with the first element a string";
3806 throw std::runtime_error(ss.str());
3807 }
3808 string object_pgidstr = i->get_str();
3809 if (object_pgidstr != "meta") {
3810 spg_t object_pgid;
3811 object_pgid.parse(object_pgidstr.c_str());
3812 if (pgidstr.length() > 0) {
3813 if (object_pgid != pgid) {
3814 ss << "object '" << object
3815 << "' has a pgid different from the --pgid="
3816 << pgidstr << " option";
3817 throw std::runtime_error(ss.str());
3818 }
3819 } else {
3820 pgidstr = object_pgidstr;
3821 pgid = object_pgid;
3822 }
3823 } else {
3824 pgidstr = object_pgidstr;
3825 }
3826 ++i;
3827 v = *i;
3828 }
3829 try {
3830 ghobj.decode(v);
3831 } catch (std::runtime_error& e) {
3832 ss << "Decode object JSON error: " << e.what();
3833 throw std::runtime_error(ss.str());
3834 }
3835 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3836 cerr << "Object pool and pgid pool don't match" << std::endl;
3837 ret = 1;
3838 goto out;
3839 }
9f95a23c
TL
3840 if (pgidstr != "meta") {
3841 auto ch = fs->open_collection(coll_t(pgid));
3842 if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
3843 stringstream ss;
3844 ss << "object " << ghobj << " not contained by pg " << pgid;
3845 throw std::runtime_error(ss.str());
3846 }
3847 }
7c673cae
FG
3848 }
3849 } catch (std::runtime_error& e) {
3850 cerr << e.what() << std::endl;
3851 ret = 1;
3852 goto out;
3853 }
3854 }
3855 }
3856
3857 // The ops which require --pgid option are checked here and
3858 // mentioned in the usage for --pgid.
3859 if ((op == "info" || op == "log" || op == "remove" || op == "export"
11fdf7f2
TL
3860 || op == "export-remove" || op == "mark-complete"
3861 || op == "reset-last-complete"
33c7a0ef 3862 || op == "trim-pg-log"
2a845540 3863 || op == "pg-log-inject-dups") &&
7c673cae
FG
3864 pgidstr.length() == 0) {
3865 cerr << "Must provide pgid" << std::endl;
3866 usage(desc);
3867 ret = 1;
3868 goto out;
3869 }
3870
3871 if (op == "import") {
9f95a23c 3872 ceph_assert(superblock != nullptr);
7c673cae 3873 try {
20effc67 3874 ret = tool.do_import(fs.get(), *superblock, force, pgidstr);
7c673cae
FG
3875 }
3876 catch (const buffer::error &e) {
3877 cerr << "do_import threw exception error " << e.what() << std::endl;
3878 ret = -EFAULT;
3879 }
3880 if (ret == -EFAULT) {
3881 cerr << "Corrupt input for import" << std::endl;
3882 }
3883 if (ret == 0)
3884 cout << "Import successful" << std::endl;
3885 goto out;
3886 } else if (op == "dump-journal-mount") {
3887 // Undocumented feature to dump journal with mounted fs
3888 // This doesn't support the format option, but it uses the
3889 // ObjectStore::dump_journal() and mounts to get replay to run.
3890 ret = fs->dump_journal(cout);
3891 if (ret) {
3892 if (ret == -EOPNOTSUPP) {
3893 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
3894 } else {
3895 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
3896 }
3897 }
3898 goto out;
3899 } else if (op == "get-osdmap") {
3900 bufferlist bl;
3901 OSDMap osdmap;
3902 if (epoch == 0) {
9f95a23c
TL
3903 ceph_assert(superblock != nullptr);
3904 epoch = superblock->current_epoch;
7c673cae 3905 }
20effc67 3906 ret = get_osdmap(fs.get(), epoch, osdmap, bl);
7c673cae
FG
3907 if (ret) {
3908 cerr << "Failed to get osdmap#" << epoch << ": "
3909 << cpp_strerror(ret) << std::endl;
3910 goto out;
3911 }
3912 ret = bl.write_fd(file_fd);
3913 if (ret) {
3914 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3915 } else {
3916 cout << "osdmap#" << epoch << " exported." << std::endl;
3917 }
3918 goto out;
3919 } else if (op == "set-osdmap") {
3920 bufferlist bl;
3921 ret = get_fd_data(file_fd, bl);
3922 if (ret < 0) {
3923 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
3924 } else {
20effc67 3925 ret = set_osdmap(fs.get(), epoch, bl, force);
7c673cae
FG
3926 }
3927 goto out;
3928 } else if (op == "get-inc-osdmap") {
3929 bufferlist bl;
3930 if (epoch == 0) {
9f95a23c
TL
3931 ceph_assert(superblock != nullptr);
3932 epoch = superblock->current_epoch;
7c673cae 3933 }
20effc67 3934 ret = get_inc_osdmap(fs.get(), epoch, bl);
7c673cae
FG
3935 if (ret < 0) {
3936 cerr << "Failed to get incremental osdmap# " << epoch << ": "
3937 << cpp_strerror(ret) << std::endl;
3938 goto out;
3939 }
3940 ret = bl.write_fd(file_fd);
3941 if (ret) {
3942 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3943 } else {
3944 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
3945 }
3946 goto out;
3947 } else if (op == "set-inc-osdmap") {
3948 bufferlist bl;
3949 ret = get_fd_data(file_fd, bl);
3950 if (ret < 0) {
3951 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
3952 goto out;
3953 } else {
20effc67 3954 ret = set_inc_osdmap(fs.get(), epoch, bl, force);
7c673cae
FG
3955 }
3956 goto out;
3957 } else if (op == "update-mon-db") {
3958 if (!vm.count("mon-store-path")) {
3959 cerr << "Please specify the path to monitor db to update" << std::endl;
3960 ret = -EINVAL;
3961 } else {
9f95a23c
TL
3962 ceph_assert(superblock != nullptr);
3963 ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
7c673cae
FG
3964 }
3965 goto out;
3966 }
3967
7c673cae 3968 if (op == "remove") {
3efd9988
FG
3969 if (!force && !dry_run) {
3970 cerr << "Please use export-remove or you must use --force option" << std::endl;
3971 ret = -EINVAL;
3972 goto out;
3973 }
20effc67 3974 ret = initiate_new_remove_pg(fs.get(), pgid);
7c673cae
FG
3975 if (ret < 0) {
3976 cerr << "PG '" << pgid << "' not found" << std::endl;
3977 goto out;
3978 }
3979 cout << "Remove successful" << std::endl;
3980 goto out;
3981 }
3982
3983 if (op == "fix-lost") {
3984 boost::scoped_ptr<action_on_object_t> action;
11fdf7f2 3985 action.reset(new do_fix_lost());
7c673cae 3986 if (pgidstr.length())
20effc67 3987 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), *action, debug);
7c673cae 3988 else
20effc67 3989 ret = action_on_all_objects(fs.get(), *action, debug);
7c673cae
FG
3990 goto out;
3991 }
3992
3993 if (op == "list") {
20effc67 3994 ret = do_list(fs.get(), pgidstr, object, nspace, formatter, debug,
7c673cae
FG
3995 human_readable, head);
3996 if (ret < 0) {
3997 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3998 }
3999 goto out;
4000 }
9f95a23c 4001 if (op == "list-slow-omap") {
20effc67 4002 ret = do_list_slow(fs.get(), pgidstr, object, slow_threshold, formatter, debug,
9f95a23c
TL
4003 human_readable);
4004 if (ret < 0) {
4005 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
4006 }
4007 goto out;
4008 }
7c673cae
FG
4009
4010 if (op == "dump-super") {
9f95a23c 4011 ceph_assert(superblock != nullptr);
7c673cae 4012 formatter->open_object_section("superblock");
9f95a23c 4013 superblock->dump(formatter);
7c673cae
FG
4014 formatter->close_section();
4015 formatter->flush(cout);
4016 cout << std::endl;
4017 goto out;
4018 }
4019
f6b5b4d7
TL
4020 if (op == "statfs") {
4021 store_statfs_t statsbuf;
4022 ret = fs->statfs(&statsbuf);
4023 if (ret < 0) {
4024 cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
4025 goto out;
4026 }
4027 formatter->open_object_section("statfs");
4028 statsbuf.dump(formatter);
4029 formatter->close_section();
4030 formatter->flush(cout);
4031 cout << std::endl;
4032 goto out;
4033 }
4034
7c673cae 4035 if (op == "meta-list") {
20effc67 4036 ret = do_meta(fs.get(), object, formatter, debug, human_readable);
7c673cae
FG
4037 if (ret < 0) {
4038 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
4039 }
4040 goto out;
4041 }
4042
4043 ret = fs->list_collections(ls);
4044 if (ret < 0) {
4045 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
4046 goto out;
4047 }
4048
4049 if (debug && op == "list-pgs")
4050 cout << "Performing list-pgs operation" << std::endl;
4051
4052 // Find pg
4053 for (it = ls.begin(); it != ls.end(); ++it) {
4054 spg_t tmppgid;
4055
4056 if (pgidstr == "meta") {
4057 if (it->to_str() == "meta")
4058 break;
4059 else
4060 continue;
4061 }
4062
4063 if (!it->is_pg(&tmppgid)) {
4064 continue;
4065 }
4066
4067 if (it->is_temp(&tmppgid)) {
4068 continue;
4069 }
4070
4071 if (op != "list-pgs" && tmppgid != pgid) {
4072 continue;
4073 }
4074
4075 if (op != "list-pgs") {
4076 //Found!
4077 break;
4078 }
4079
4080 cout << tmppgid << std::endl;
4081 }
4082
4083 if (op == "list-pgs") {
4084 ret = 0;
4085 goto out;
4086 }
4087
4088 // If not an object command nor any of the ops handled below, then output this usage
4089 // before complaining about a bad pgid
2a845540 4090 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") {
1e59de90 4091 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-super, meta-list, "
33c7a0ef 4092 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)"
7c673cae
FG
4093 << std::endl;
4094 usage(desc);
4095 ret = 1;
4096 goto out;
4097 }
4098 epoch_t map_epoch;
4099// The following code for export, info, log require omap or !skip-mount-omap
4100 if (it != ls.end()) {
4101
4102 coll_t coll = *it;
4103
4104 if (vm.count("objcmd")) {
4105 ret = 0;
4106 if (objcmd == "remove" || objcmd == "removeall") {
4107 bool all = (objcmd == "removeall");
11fdf7f2
TL
4108 enum rmtype type = BOTH;
4109 if (rmtypestr == "nosnapmap")
4110 type = NOSNAPMAP;
4111 else if (rmtypestr == "snapmap")
4112 type = SNAPMAP;
20effc67 4113 ret = do_remove_object(fs.get(), coll, ghobj, all, force, type);
7c673cae
FG
4114 goto out;
4115 } else if (objcmd == "list-attrs") {
20effc67 4116 ret = do_list_attrs(fs.get(), coll, ghobj);
7c673cae
FG
4117 goto out;
4118 } else if (objcmd == "list-omap") {
20effc67 4119 ret = do_list_omap(fs.get(), coll, ghobj);
7c673cae
FG
4120 goto out;
4121 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
4122 if (objcmd == "get-bytes") {
4123 int fd;
4124 if (vm.count("arg1") == 0 || arg1 == "-") {
4125 fd = STDOUT_FILENO;
4126 } else {
4127 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
4128 if (fd == -1) {
4129 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4130 ret = 1;
4131 goto out;
4132 }
4133 }
20effc67 4134 ret = do_get_bytes(fs.get(), coll, ghobj, fd);
7c673cae
FG
4135 if (fd != STDOUT_FILENO)
4136 close(fd);
4137 } else {
4138 int fd;
4139 if (vm.count("arg1") == 0 || arg1 == "-") {
4140 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4141 if (isatty(STDIN_FILENO)) {
4142 cerr << "stdin is a tty and no file specified" << std::endl;
4143 ret = 1;
4144 goto out;
4145 }
4146 fd = STDIN_FILENO;
4147 } else {
4148 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4149 if (fd == -1) {
4150 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4151 ret = 1;
4152 goto out;
4153 }
4154 }
20effc67 4155 ret = do_set_bytes(fs.get(), coll, ghobj, fd);
7c673cae
FG
4156 if (fd != STDIN_FILENO)
4157 close(fd);
4158 }
4159 goto out;
4160 } else if (objcmd == "get-attr") {
4161 if (vm.count("arg1") == 0) {
4162 usage(desc);
4163 ret = 1;
4164 goto out;
4165 }
20effc67 4166 ret = do_get_attr(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4167 goto out;
4168 } else if (objcmd == "set-attr") {
4169 if (vm.count("arg1") == 0) {
4170 usage(desc);
4171 ret = 1;
4172 }
4173
4174 int fd;
4175 if (vm.count("arg2") == 0 || arg2 == "-") {
4176 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4177 if (isatty(STDIN_FILENO)) {
4178 cerr << "stdin is a tty and no file specified" << std::endl;
4179 ret = 1;
4180 goto out;
4181 }
4182 fd = STDIN_FILENO;
4183 } else {
4184 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4185 if (fd == -1) {
4186 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4187 ret = 1;
4188 goto out;
4189 }
4190 }
20effc67 4191 ret = do_set_attr(fs.get(), coll, ghobj, arg1, fd);
7c673cae
FG
4192 if (fd != STDIN_FILENO)
4193 close(fd);
4194 goto out;
4195 } else if (objcmd == "rm-attr") {
4196 if (vm.count("arg1") == 0) {
4197 usage(desc);
4198 ret = 1;
4199 goto out;
4200 }
20effc67 4201 ret = do_rm_attr(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4202 goto out;
4203 } else if (objcmd == "get-omap") {
4204 if (vm.count("arg1") == 0) {
4205 usage(desc);
4206 ret = 1;
4207 goto out;
4208 }
20effc67 4209 ret = do_get_omap(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4210 goto out;
4211 } else if (objcmd == "set-omap") {
4212 if (vm.count("arg1") == 0) {
4213 usage(desc);
4214 ret = 1;
4215 goto out;
4216 }
4217 int fd;
4218 if (vm.count("arg2") == 0 || arg2 == "-") {
4219 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4220 if (isatty(STDIN_FILENO)) {
4221 cerr << "stdin is a tty and no file specified" << std::endl;
4222 ret = 1;
4223 goto out;
4224 }
4225 fd = STDIN_FILENO;
4226 } else {
4227 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4228 if (fd == -1) {
4229 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4230 ret = 1;
4231 goto out;
4232 }
4233 }
20effc67 4234 ret = do_set_omap(fs.get(), coll, ghobj, arg1, fd);
7c673cae
FG
4235 if (fd != STDIN_FILENO)
4236 close(fd);
4237 goto out;
4238 } else if (objcmd == "rm-omap") {
4239 if (vm.count("arg1") == 0) {
4240 usage(desc);
4241 ret = 1;
4242 goto out;
4243 }
20effc67 4244 ret = do_rm_omap(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4245 goto out;
4246 } else if (objcmd == "get-omaphdr") {
4247 if (vm.count("arg1")) {
4248 usage(desc);
4249 ret = 1;
4250 goto out;
4251 }
20effc67 4252 ret = do_get_omaphdr(fs.get(), coll, ghobj);
7c673cae
FG
4253 goto out;
4254 } else if (objcmd == "set-omaphdr") {
4255 // Extra arg
4256 if (vm.count("arg2")) {
4257 usage(desc);
4258 ret = 1;
4259 goto out;
4260 }
4261 int fd;
4262 if (vm.count("arg1") == 0 || arg1 == "-") {
4263 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4264 if (isatty(STDIN_FILENO)) {
4265 cerr << "stdin is a tty and no file specified" << std::endl;
4266 ret = 1;
4267 goto out;
4268 }
4269 fd = STDIN_FILENO;
4270 } else {
4271 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4272 if (fd == -1) {
4273 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4274 ret = 1;
4275 goto out;
4276 }
4277 }
20effc67 4278 ret = do_set_omaphdr(fs.get(), coll, ghobj, fd);
7c673cae
FG
4279 if (fd != STDIN_FILENO)
4280 close(fd);
4281 goto out;
4282 } else if (objcmd == "dump") {
4283 // There should not be any other arguments
4284 if (vm.count("arg1") || vm.count("arg2")) {
4285 usage(desc);
4286 ret = 1;
4287 goto out;
4288 }
20effc67 4289 ret = print_obj_info(fs.get(), coll, ghobj, formatter);
7c673cae 4290 goto out;
1adf2230
AA
4291 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4292 // There should not be any other arguments
4293 if (vm.count("arg1") || vm.count("arg2")) {
4294 usage(desc);
4295 ret = 1;
4296 goto out;
4297 }
20effc67 4298 ret = corrupt_info(fs.get(), coll, ghobj, formatter);
1adf2230 4299 goto out;
b5b8bbf5
FG
4300 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4301 // Undocumented testing feature
4302 bool corrupt = (objcmd == "corrupt-size");
7c673cae
FG
4303 // Extra arg
4304 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4305 usage(desc);
4306 ret = 1;
4307 goto out;
4308 }
4309 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4310 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4311 ret = 1;
4312 goto out;
4313 }
4314 uint64_t size = atoll(arg1.c_str());
20effc67 4315 ret = set_size(fs.get(), coll, ghobj, size, formatter, corrupt);
7c673cae 4316 goto out;
11fdf7f2 4317 } else if (objcmd == "clear-data-digest") {
20effc67 4318 ret = clear_data_digest(fs.get(), coll, ghobj);
11fdf7f2 4319 goto out;
7c673cae
FG
4320 } else if (objcmd == "clear-snapset") {
4321 // UNDOCUMENTED: For testing zap SnapSet
4322 // IGNORE extra args since not in usage anyway
4323 if (!ghobj.hobj.has_snapset()) {
4324 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4325 ret = 1;
4326 goto out;
4327 }
20effc67 4328 ret = clear_snapset(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4329 goto out;
4330 } else if (objcmd == "remove-clone-metadata") {
4331 // Extra arg
4332 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4333 usage(desc);
4334 ret = 1;
4335 goto out;
4336 }
4337 if (!ghobj.hobj.has_snapset()) {
4338 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4339 ret = 1;
4340 goto out;
4341 }
4342 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4343 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4344 ret = 1;
4345 goto out;
4346 }
4347 snapid_t cloneid = atoi(arg1.c_str());
20effc67 4348 ret = remove_clone(fs.get(), coll, ghobj, cloneid, force);
7c673cae
FG
4349 goto out;
4350 }
4351 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4352 usage(desc);
4353 ret = 1;
4354 goto out;
4355 }
4356
7c673cae 4357 map_epoch = 0;
20effc67 4358 ret = PG::peek_map_epoch(fs.get(), pgid, &map_epoch);
7c673cae
FG
4359 if (ret < 0)
4360 cerr << "peek_map_epoch reports error" << std::endl;
4361 if (debug)
4362 cerr << "map_epoch " << map_epoch << std::endl;
4363
4364 pg_info_t info(pgid);
4365 PastIntervals past_intervals;
4366 __u8 struct_ver;
20effc67 4367 ret = PG::read_info(fs.get(), pgid, coll, info, past_intervals, struct_ver);
7c673cae
FG
4368 if (ret < 0) {
4369 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4370 goto out;
4371 }
11fdf7f2 4372 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae
FG
4373 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4374 ret = -EFAULT;
4375 goto out;
4376 }
4377 if (debug)
4378 cerr << "struct_v " << (int)struct_ver << std::endl;
4379
3efd9988 4380 if (op == "export" || op == "export-remove") {
9f95a23c 4381 ceph_assert(superblock != nullptr);
2a845540 4382 ret = tool.do_export(cct.get(), fs.get(), coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
3efd9988 4383 if (ret == 0) {
7c673cae 4384 cerr << "Export successful" << std::endl;
3efd9988 4385 if (op == "export-remove") {
20effc67 4386 ret = initiate_new_remove_pg(fs.get(), pgid);
3efd9988 4387 // Export succeeded, so pgid is there
11fdf7f2 4388 ceph_assert(ret == 0);
3efd9988
FG
4389 cerr << "Remove successful" << std::endl;
4390 }
4391 }
7c673cae
FG
4392 } else if (op == "info") {
4393 formatter->open_object_section("info");
4394 info.dump(formatter);
4395 formatter->close_section();
4396 formatter->flush(cout);
4397 cout << std::endl;
4398 } else if (op == "log") {
4399 PGLog::IndexedLog log;
4400 pg_missing_t missing;
2a845540 4401 ret = get_log(cct.get(), fs.get(), struct_ver, pgid, info, log, missing);
7c673cae
FG
4402 if (ret < 0)
4403 goto out;
4404
4405 dump_log(formatter, cout, log, missing);
7c673cae
FG
4406 } else if (op == "mark-complete") {
4407 ObjectStore::Transaction tran;
4408 ObjectStore::Transaction *t = &tran;
4409
11fdf7f2 4410 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae 4411 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
11fdf7f2 4412 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
7c673cae
FG
4413 << std::endl;
4414 ret = 1;
4415 goto out;
4416 }
4417
4418 cout << "Marking complete " << std::endl;
4419
9f95a23c
TL
4420 ceph_assert(superblock != nullptr);
4421 info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
7c673cae 4422 info.last_backfill = hobject_t::get_max();
9f95a23c
TL
4423 info.last_epoch_started = superblock->current_epoch;
4424 info.history.last_epoch_started = superblock->current_epoch;
4425 info.history.last_epoch_clean = superblock->current_epoch;
7c673cae
FG
4426 past_intervals.clear();
4427
4428 if (!dry_run) {
4429 ret = write_info(*t, map_epoch, info, past_intervals);
4430 if (ret != 0)
4431 goto out;
11fdf7f2
TL
4432 auto ch = fs->open_collection(coll_t(pgid));
4433 fs->queue_transaction(ch, std::move(*t));
7c673cae
FG
4434 }
4435 cout << "Marking complete succeeded" << std::endl;
94b18763 4436 } else if (op == "trim-pg-log") {
20effc67 4437 ret = do_trim_pg_log(fs.get(), coll, info, pgid,
94b18763
FG
4438 map_epoch, past_intervals);
4439 if (ret < 0) {
4440 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4441 goto out;
4442 }
4443 cout << "Finished trimming pg log" << std::endl;
33c7a0ef
TL
4444 goto out;
4445 } else if (op == "trim-pg-log-dups") {
4446 ret = do_trim_pg_log_dups(fs.get(), coll, info, pgid,
4447 map_epoch, past_intervals);
4448 if (ret < 0) {
4449 cerr << "Error trimming pg log dups: " << cpp_strerror(ret) << std::endl;
4450 goto out;
4451 }
4452 cout << "Finished trimming pg log dups" << std::endl;
94b18763 4453 goto out;
11fdf7f2
TL
4454 } else if (op == "reset-last-complete") {
4455 if (!force) {
4456 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4457 << "certain to lead to permanent data loss unless you know exactly "
4458 << "what you are doing. Pass --force to proceed anyway."
4459 << std::endl;
4460 ret = -EINVAL;
4461 goto out;
4462 }
4463 ObjectStore::Transaction tran;
4464 ObjectStore::Transaction *t = &tran;
4465
4466 if (struct_ver < PG::get_compat_struct_v()) {
4467 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4468 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4469 << std::endl;
4470 ret = 1;
4471 goto out;
4472 }
4473
4474 cout << "Reseting last_complete " << std::endl;
4475
4476 info.last_complete = info.last_update;
4477
4478 if (!dry_run) {
4479 ret = write_info(*t, map_epoch, info, past_intervals);
4480 if (ret != 0)
4481 goto out;
4482 fs->queue_transaction(ch, std::move(*t));
4483 }
4484 cout << "Reseting last_complete succeeded" << std::endl;
4485
2a845540
TL
4486 } else if (op == "pg-log-inject-dups") {
4487 if (!vm.count("file") || file == "-") {
4488 cerr << "Must provide file containing JSON dups entries" << std::endl;
4489 ret = 1;
4490 goto out;
4491 }
4492 if (debug)
4493 cerr << "opening file " << file << std::endl;
4494
4495 ifstream json_file_stream(file , std::ifstream::in);
4496 if (!json_file_stream.is_open()) {
4497 cerr << "unable to open file " << file << std::endl;
4498 ret = -1;
4499 goto out;
4500 }
4501 json_spirit::mValue result;
4502 try {
4503 if (!json_spirit::read(json_file_stream, result))
4504 throw std::runtime_error("unparseable JSON " + file);
4505 if (result.type() != json_spirit::array_type) {
4506 cerr << "result is not an array_type - type=" << result.type() << std::endl;
4507 throw std::runtime_error("not JSON array_type " + file);
4508 }
4509 do_dups_inject_from_json(fs.get(), pgid, result, debug);
4510 } catch (const std::runtime_error &e) {
4511 cerr << e.what() << std::endl;;
4512 return -1;
4513 }
7c673cae 4514 } else {
11fdf7f2 4515 ceph_assert(!"Should have already checked for valid --op");
7c673cae
FG
4516 }
4517 } else {
4518 cerr << "PG '" << pgid << "' not found" << std::endl;
4519 ret = -ENOENT;
4520 }
4521
4522out:
9f95a23c
TL
4523 if (debug) {
4524 ostringstream ostr;
4525 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
1e59de90 4526 cct->get_perfcounters_collection()->dump_formatted(f, false, false);
9f95a23c
TL
4527 ostr << "ceph-objectstore-tool ";
4528 f->flush(ostr);
4529 delete f;
4530 cout << ostr.str() << std::endl;
4531 }
4532
7c673cae 4533 int r = fs->umount();
7c673cae
FG
4534 if (r < 0) {
4535 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4536 // If no previous error, then use umount() error
4537 if (ret == 0)
4538 ret = r;
4539 }
4540
4541 if (dry_run) {
4542 // Export output can go to stdout, so put this message on stderr
4543 if (op == "export")
4544 cerr << "dry-run: Nothing changed" << std::endl;
4545 else
4546 cout << "dry-run: Nothing changed" << std::endl;
4547 }
4548
4549 if (ret < 0)
4550 ret = 1;
4551 return ret;
4552}