]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/ceph_objectstore_tool.cc
import ceph quincy 17.2.4
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <boost/program_options/variables_map.hpp>
16#include <boost/program_options/parsers.hpp>
2a845540 17#include <boost/algorithm/string.hpp>
7c673cae
FG
18#include <boost/scoped_ptr.hpp>
19#include <boost/optional.hpp>
2a845540 20#include <fstream>
7c673cae
FG
21
22#include <stdlib.h>
23
24#include "common/Formatter.h"
25#include "common/errno.h"
26#include "common/ceph_argparse.h"
9f95a23c 27#include "common/url_escape.h"
7c673cae
FG
28
29#include "global/global_init.h"
30
31#include "os/ObjectStore.h"
32#include "os/filestore/FileJournal.h"
33#include "os/filestore/FileStore.h"
34#ifdef HAVE_LIBFUSE
35#include "os/FuseStore.h"
36#endif
37
38#include "osd/PGLog.h"
39#include "osd/OSD.h"
40#include "osd/PG.h"
a8e16298 41#include "osd/ECUtil.h"
7c673cae
FG
42
43#include "json_spirit/json_spirit_value.h"
44#include "json_spirit/json_spirit_reader.h"
45
46#include "rebuild_mondb.h"
47#include "ceph_objectstore_tool.h"
48#include "include/compat.h"
49#include "include/util.h"
50
20effc67 51using namespace std;
7c673cae 52namespace po = boost::program_options;
7c673cae
FG
53
54#ifdef INTERNAL_TEST
55CompatSet get_test_compat_set() {
56 CompatSet::FeatureSet ceph_osd_feature_compat;
57 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
58 CompatSet::FeatureSet ceph_osd_feature_incompat;
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
64 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
65 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
66 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
67 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
68#ifdef INTERNAL_TEST2
69 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
70 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
71#endif
72 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
73 ceph_osd_feature_incompat);
74}
75#endif
76
77const ssize_t max_read = 1024 * 1024;
78const int fd_none = INT_MIN;
79bool outistty;
3efd9988 80bool dry_run;
7c673cae
FG
81
82struct action_on_object_t {
83 virtual ~action_on_object_t() {}
11fdf7f2 84 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
7c673cae
FG
85};
86
87int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
88{
11fdf7f2 89 auto ch = store->open_collection(coll);
20effc67 90
7c673cae
FG
91 unsigned LIST_AT_A_TIME = 100;
92 ghobject_t next;
93 while (!next.is_max()) {
94 vector<ghobject_t> list;
11fdf7f2 95 int r = store->collection_list(ch,
7c673cae
FG
96 next,
97 ghobject_t::get_max(),
98 LIST_AT_A_TIME,
99 &list,
100 &next);
101 if (r < 0) {
102 cerr << "Error listing collection: " << coll << ", "
103 << cpp_strerror(r) << std::endl;
104 return r;
105 }
106 for (vector<ghobject_t>::iterator obj = list.begin();
107 obj != list.end();
108 ++obj) {
7c673cae
FG
109 object_info_t oi;
110 if (coll != coll_t::meta()) {
111 bufferlist attr;
11fdf7f2 112 r = store->getattr(ch, *obj, OI_ATTR, attr);
7c673cae
FG
113 if (r < 0) {
114 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
115 << cpp_strerror(r) << std::endl;
11fdf7f2
TL
116 } else {
117 auto bp = attr.cbegin();
118 try {
119 decode(oi, bp);
120 } catch (...) {
121 r = -EINVAL;
122 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
123 << cpp_strerror(r) << std::endl;
124 }
125 }
7c673cae 126 }
11fdf7f2 127 action.call(store, coll, *obj, oi);
7c673cae
FG
128 }
129 }
130 return 0;
131}
132
133int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
134{
135 spg_t pgid;
136 // Scan collections in case this is an ec pool but no shard specified
137 unsigned scanned = 0;
138 int r = 0;
139 vector<coll_t> colls_to_check;
140 vector<coll_t> candidates;
20effc67 141
7c673cae
FG
142 r = store->list_collections(candidates);
143 if (r < 0) {
144 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
145 return r;
146 }
147 pgid.parse(pgidstr.c_str());
148 for (vector<coll_t>::iterator i = candidates.begin();
149 i != candidates.end();
150 ++i) {
151 spg_t cand_pgid;
20effc67
TL
152 if (i->is_meta() && pgidstr == "meta") {
153 colls_to_check.push_back(*i);
154 continue;
155 }
7c673cae
FG
156 if (!i->is_pg(&cand_pgid))
157 continue;
158
159 // If an exact match or treat no shard as any shard
160 if (cand_pgid == pgid ||
161 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
162 colls_to_check.push_back(*i);
163 }
164 }
165
166 if (debug)
167 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
168 for (vector<coll_t>::iterator i = colls_to_check.begin();
169 i != colls_to_check.end();
170 ++i, ++scanned) {
171 if (debug)
172 cerr << "Scanning " << *i << ", " << scanned << "/"
173 << colls_to_check.size() << " completed" << std::endl;
174 r = _action_on_all_objects_in_pg(store, *i, action, debug);
175 if (r < 0)
176 break;
177 }
178 return r;
179}
180
181int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
182{
183 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
184 return r;
185}
186
187int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
188{
189 unsigned scanned = 0;
190 int r = 0;
191 vector<coll_t> colls_to_check;
192 vector<coll_t> candidates;
193 r = store->list_collections(candidates);
194 if (r < 0) {
195 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
196 return r;
197 }
198 for (vector<coll_t>::iterator i = candidates.begin();
199 i != candidates.end();
200 ++i) {
201 if (i->is_pg()) {
202 colls_to_check.push_back(*i);
203 }
204 }
205
206 if (debug)
207 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
208 for (vector<coll_t>::iterator i = colls_to_check.begin();
209 i != colls_to_check.end();
210 ++i, ++scanned) {
211 if (debug)
212 cerr << "Scanning " << *i << ", " << scanned << "/"
213 << colls_to_check.size() << " completed" << std::endl;
214 r = _action_on_all_objects_in_pg(store, *i, action, debug);
215 if (r < 0)
216 return r;
217 }
218 return 0;
219}
220
221int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
222{
223 int r = _action_on_all_objects(store, action, debug);
224 return r;
225}
226
227struct pgid_object_list {
228 list<pair<coll_t, ghobject_t> > _objects;
229
230 void insert(coll_t coll, ghobject_t &ghobj) {
231 _objects.push_back(make_pair(coll, ghobj));
232 }
233
234 void dump(Formatter *f, bool human_readable) const {
235 if (!human_readable)
236 f->open_array_section("pgid_objects");
237 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
238 i != _objects.end();
239 ++i) {
240 f->open_array_section("pgid_object");
241 spg_t pgid;
242 bool is_pg = i->first.is_pg(&pgid);
243 if (is_pg)
244 f->dump_string("pgid", stringify(pgid));
245 if (!is_pg || !human_readable)
246 f->dump_string("coll", i->first.to_str());
247 f->open_object_section("ghobject");
248 i->second.dump(f);
249 f->close_section();
250 f->close_section();
251 if (human_readable) {
252 f->flush(cout);
253 cout << std::endl;
254 }
255 }
256 if (!human_readable) {
257 f->close_section();
258 f->flush(cout);
259 cout << std::endl;
260 }
261 }
262};
263
264struct lookup_ghobject : public action_on_object_t {
265 pgid_object_list _objects;
266 const string _name;
267 const boost::optional<std::string> _namespace;
268 bool _need_snapset;
269
270 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
271 _namespace(nspace), _need_snapset(need_snapset) { }
272
11fdf7f2 273 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
7c673cae 274 if (_need_snapset && !ghobj.hobj.has_snapset())
11fdf7f2 275 return;
7c673cae
FG
276 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
277 (!_namespace || ghobj.hobj.nspace == _namespace))
278 _objects.insert(coll, ghobj);
11fdf7f2 279 return;
7c673cae
FG
280 }
281
282 int size() const {
283 return _objects._objects.size();
284 }
285
286 pair<coll_t, ghobject_t> pop() {
287 pair<coll_t, ghobject_t> front = _objects._objects.front();
288 _objects._objects.pop_front();
289 return front;
290 }
291
292 void dump(Formatter *f, bool human_readable) const {
293 _objects.dump(f, human_readable);
294 }
295};
296
9f95a23c
TL
297struct lookup_slow_ghobject : public action_on_object_t {
298 list<tuple<
299 coll_t,
300 ghobject_t,
301 ceph::signedspan,
302 ceph::signedspan,
303 ceph::signedspan,
304 string> > _objects;
305 const string _name;
306 double threshold;
307
308 coll_t last_coll;
309
310 lookup_slow_ghobject(const string& name, double _threshold) :
311 _name(name), threshold(_threshold) { }
312
313 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
314 ObjectMap::ObjectMapIterator iter;
315 auto start1 = mono_clock::now();
316 ceph::signedspan first_seek_time = start1 - start1;
317 ceph::signedspan last_seek_time = first_seek_time;
318 ceph::signedspan total_time = first_seek_time;
319 {
320 auto ch = store->open_collection(coll);
321 iter = store->get_omap_iterator(ch, ghobj);
322 if (!iter) {
323 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
324 << " obj:" << ghobj
325 << std::endl;
326 return;
327 }
328 auto start = mono_clock::now();
329 iter->seek_to_first();
330 first_seek_time = mono_clock::now() - start;
331
332 while(iter->valid()) {
333 start = mono_clock::now();
334 iter->next();
335 last_seek_time = mono_clock::now() - start;
336 }
337 }
338
339 if (coll != last_coll) {
340 cerr << ">>> inspecting coll" << coll << std::endl;
341 last_coll = coll;
342 }
343
344 total_time = mono_clock::now() - start1;
345 if ( total_time >= make_timespan(threshold)) {
346 _objects.emplace_back(coll, ghobj,
347 first_seek_time, last_seek_time, total_time,
348 url_escape(iter->tail_key()));
349 cerr << ">>>>> found obj " << ghobj
350 << " first_seek_time "
351 << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
352 << " last_seek_time "
353 << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
354 << " total_time "
355 << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
356 << " tail key: " << url_escape(iter->tail_key())
357 << std::endl;
358 }
359 return;
360 }
361
362 int size() const {
363 return _objects.size();
364 }
365
366 void dump(Formatter *f, bool human_readable) const {
367 if (!human_readable)
368 f->open_array_section("objects");
369 for (auto i = _objects.begin();
370 i != _objects.end();
371 ++i) {
372 f->open_array_section("object");
373 coll_t coll;
374 ghobject_t ghobj;
375 ceph::signedspan first_seek_time;
376 ceph::signedspan last_seek_time;
377 ceph::signedspan total_time;
378 string tail_key;
379 std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
380
381 spg_t pgid;
382 bool is_pg = coll.is_pg(&pgid);
383 if (is_pg)
384 f->dump_string("pgid", stringify(pgid));
385 if (!is_pg || !human_readable)
386 f->dump_string("coll", coll.to_str());
387 f->dump_object("ghobject", ghobj);
388 f->open_object_section("times");
389 f->dump_int("first_seek_time",
390 std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
391 f->dump_int("last_seek_time",
392 std::chrono::duration_cast<std::chrono::seconds>
393 (last_seek_time).count());
394 f->dump_int("total_time",
395 std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
396 f->dump_string("tail_key", tail_key);
397 f->close_section();
398
399 f->close_section();
400 if (human_readable) {
401 f->flush(cout);
402 cout << std::endl;
403 }
404 }
405 if (!human_readable) {
406 f->close_section();
407 f->flush(cout);
408 cout << std::endl;
409 }
410 }
411};
412
7c673cae 413int file_fd = fd_none;
3efd9988 414bool debug;
11fdf7f2 415bool force = false;
9f95a23c
TL
416bool no_superblock = false;
417
7c673cae 418super_header sh;
7c673cae
FG
419
420static int get_fd_data(int fd, bufferlist &bl)
421{
422 uint64_t total = 0;
423 do {
424 ssize_t bytes = bl.read_fd(fd, max_read);
425 if (bytes < 0) {
426 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
427 return bytes;
428 }
429
430 if (bytes == 0)
431 break;
432
433 total += bytes;
434 } while(true);
435
11fdf7f2 436 ceph_assert(bl.length() == total);
7c673cae
FG
437 return 0;
438}
439
2a845540 440int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver,
11fdf7f2 441 spg_t pgid, const pg_info_t &info,
7c673cae
FG
442 PGLog::IndexedLog &log, pg_missing_t &missing)
443{
444 try {
11fdf7f2
TL
445 auto ch = fs->open_collection(coll_t(pgid));
446 if (!ch) {
447 return -ENOENT;
448 }
7c673cae 449 ostringstream oss;
11fdf7f2
TL
450 ceph_assert(struct_ver > 0);
451 PGLog::read_log_and_missing(
2a845540 452 cct, fs, ch,
11fdf7f2
TL
453 pgid.make_pgmeta_oid(),
454 info, log, missing,
455 oss,
456 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
7c673cae
FG
457 if (debug && oss.str().size())
458 cerr << oss.str() << std::endl;
459 }
460 catch (const buffer::error &e) {
461 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
462 return -EFAULT;
463 }
464 return 0;
465}
466
467void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
468 pg_missing_t &missing)
469{
470 formatter->open_object_section("op_log");
471 formatter->open_object_section("pg_log_t");
472 log.dump(formatter);
473 formatter->close_section();
474 formatter->flush(out);
475 formatter->open_object_section("pg_missing_t");
476 missing.dump(formatter);
477 formatter->close_section();
7c673cae
FG
478 formatter->close_section();
479 formatter->flush(out);
480}
481
482//Based on part of OSD::load_pgs()
483int finish_remove_pgs(ObjectStore *store)
484{
485 vector<coll_t> ls;
486 int r = store->list_collections(ls);
487 if (r < 0) {
488 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
489 << std::endl;
490 return r;
491 }
492
493 for (vector<coll_t>::iterator it = ls.begin();
494 it != ls.end();
495 ++it) {
496 spg_t pgid;
497
498 if (it->is_temp(&pgid) ||
499 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
500 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
501 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
502 continue;
503 }
504
505 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
506 }
507 return 0;
508}
509
510#pragma GCC diagnostic ignored "-Wpragmas"
511#pragma GCC diagnostic push
512#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
513
514int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
515{
516 pg_info_t info(pgid);
517 coll_t coll(pgid);
518 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
519
7c673cae 520 epoch_t map_epoch = 0;
11fdf7f2 521 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
7c673cae
FG
522 if (r < 0)
523 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
524 PastIntervals past_intervals;
525 __u8 struct_v;
11fdf7f2 526 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
7c673cae
FG
527 if (r < 0) {
528 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
529 return r;
530 }
11fdf7f2 531 ceph_assert(struct_v >= 8);
7c673cae
FG
532 // new omap key
533 cout << "setting '_remove' omap key" << std::endl;
534 map<string,bufferlist> values;
11fdf7f2 535 encode((char)1, values["_remove"]);
7c673cae
FG
536 t->omap_setkeys(coll, pgmeta_oid, values);
537 return 0;
538}
539
540#pragma GCC diagnostic pop
541#pragma GCC diagnostic warning "-Wpragmas"
542
11fdf7f2
TL
543template<typename Func>
544void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
545{
546 bool finished = false;
547 std::condition_variable cond;
548 std::mutex m;
9f95a23c 549 txn->register_on_complete(make_lambda_context([&](int) {
11fdf7f2
TL
550 std::unique_lock lock{m};
551 finished = true;
552 cond.notify_one();
553 }));
554 std::move(func)();
555 std::unique_lock lock{m};
556 cond.wait(lock, [&] {return finished;});
557}
558
559int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
7c673cae
FG
560{
561 if (!dry_run)
562 finish_remove_pgs(store);
563 if (!store->collection_exists(coll_t(r_pgid)))
564 return -ENOENT;
565
566 cout << " marking collection for removal" << std::endl;
567 if (dry_run)
568 return 0;
569 ObjectStore::Transaction rmt;
570 int r = mark_pg_for_removal(store, r_pgid, &rmt);
571 if (r < 0) {
572 return r;
573 }
11fdf7f2
TL
574 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
575 store->queue_transaction(ch, std::move(rmt));
7c673cae
FG
576 finish_remove_pgs(store);
577 return r;
578}
579
580int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
581 PastIntervals &past_intervals)
582{
583 //Empty for this
584 coll_t coll(info.pgid);
585 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
586 map<string,bufferlist> km;
9f95a23c 587 string key_to_remove;
7c673cae 588 pg_info_t last_written_info;
9f95a23c 589 int ret = prepare_info_keymap(
7c673cae 590 g_ceph_context,
9f95a23c
TL
591 &km, &key_to_remove,
592 epoch,
7c673cae
FG
593 info,
594 last_written_info,
595 past_intervals,
596 true, true, false);
597 if (ret) cerr << "Failed to write info" << std::endl;
598 t.omap_setkeys(coll, pgmeta_oid, km);
9f95a23c
TL
599 if (!key_to_remove.empty()) {
600 t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
601 }
7c673cae
FG
602 return ret;
603}
604
605typedef map<eversion_t, hobject_t> divergent_priors_t;
606
607int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
608 pg_log_t &log, PastIntervals &past_intervals,
609 divergent_priors_t &divergent,
610 pg_missing_t &missing)
611{
11fdf7f2 612 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
7c673cae
FG
613 int ret = write_info(t, epoch, info, past_intervals);
614 if (ret)
615 return ret;
20effc67 616
7c673cae
FG
617 coll_t coll(info.pgid);
618 map<string,bufferlist> km;
20effc67 619 const bool require_rollback = !info.pgid.is_no_shard();
7c673cae 620 if (!divergent.empty()) {
11fdf7f2 621 ceph_assert(missing.get_items().empty());
7c673cae 622 PGLog::write_log_and_missing_wo_missing(
20effc67
TL
623 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent,
624 require_rollback);
7c673cae
FG
625 } else {
626 pg_missing_tracker_t tmissing(missing);
c07f9fc5 627 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
7c673cae 628 PGLog::write_log_and_missing(
20effc67
TL
629 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing,
630 require_rollback,
c07f9fc5 631 &rebuilt_missing_set_with_deletes);
7c673cae
FG
632 }
633 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
634 return 0;
635}
636
94b18763
FG
637int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
638 pg_info_t &info, const spg_t &pgid,
11fdf7f2 639 epoch_t map_epoch,
94b18763
FG
640 PastIntervals &past_intervals)
641{
642 ghobject_t oid = pgid.make_pgmeta_oid();
643 struct stat st;
11fdf7f2
TL
644 auto ch = store->open_collection(coll);
645 int r = store->stat(ch, oid, &st);
646 ceph_assert(r == 0);
647 ceph_assert(st.st_size == 0);
94b18763
FG
648
649 cerr << "Log bounds are: " << "(" << info.log_tail << ","
650 << info.last_update << "]" << std::endl;
651
652 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
653 if (info.last_update.version - info.log_tail.version <= max_entries) {
654 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
655 return 0;
656 }
657
11fdf7f2 658 ceph_assert(info.last_update.version > max_entries);
94b18763
FG
659 version_t trim_to = info.last_update.version - max_entries;
660 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
661 eversion_t new_tail;
662 bool done = false;
663
664 while (!done) {
665 // gather keys so we can delete them in a batch without
666 // affecting the iterator
667 set<string> keys_to_trim;
668 {
11fdf7f2 669 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
94b18763
FG
670 if (!p)
671 break;
11fdf7f2 672 for (p->seek_to_first(); p->valid(); p->next()) {
94b18763
FG
673 if (p->key()[0] == '_')
674 continue;
675 if (p->key() == "can_rollback_to")
676 continue;
677 if (p->key() == "divergent_priors")
678 continue;
679 if (p->key() == "rollback_info_trimmed_to")
680 continue;
681 if (p->key() == "may_include_deletes_in_missing")
682 continue;
683 if (p->key().substr(0, 7) == string("missing"))
684 continue;
685 if (p->key().substr(0, 4) == string("dup_"))
686 continue;
687
688 bufferlist bl = p->value();
11fdf7f2 689 auto bp = bl.cbegin();
94b18763
FG
690 pg_log_entry_t e;
691 try {
692 e.decode_with_checksum(bp);
693 } catch (const buffer::error &e) {
f67539c2 694 cerr << "Error reading pg log entry: " << e.what() << std::endl;
94b18763
FG
695 }
696 if (debug) {
697 cerr << "read entry " << e << std::endl;
698 }
699 if (e.version.version > trim_to) {
700 done = true;
701 break;
702 }
703 keys_to_trim.insert(p->key());
704 new_tail = e.version;
705 if (keys_to_trim.size() >= trim_at_once)
706 break;
707 }
708
709 if (!p->valid())
710 done = true;
711 } // deconstruct ObjectMapIterator
712
713 // delete the keys
714 if (!dry_run && !keys_to_trim.empty()) {
715 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
716 ObjectStore::Transaction t;
717 t.omap_rmkeys(coll, oid, keys_to_trim);
11fdf7f2
TL
718 store->queue_transaction(ch, std::move(t));
719 ch->flush();
94b18763
FG
720 }
721 }
722
723 // update pg info with new tail
724 if (!dry_run && new_tail != eversion_t()) {
725 info.log_tail = new_tail;
726 ObjectStore::Transaction t;
727 int ret = write_info(t, map_epoch, info, past_intervals);
728 if (ret)
729 return ret;
11fdf7f2
TL
730 store->queue_transaction(ch, std::move(t));
731 ch->flush();
94b18763
FG
732 }
733
734 // compact the db since we just removed a bunch of data
735 cerr << "Finished trimming, now compacting..." << std::endl;
736 if (!dry_run)
737 store->compact();
738 return 0;
739}
740
33c7a0ef
TL
741int do_trim_pg_log_dups(ObjectStore *store, const coll_t &coll,
742 pg_info_t &info, const spg_t &pgid,
743 epoch_t map_epoch,
744 PastIntervals &past_intervals)
745{
746 ghobject_t oid = pgid.make_pgmeta_oid();
747 struct stat st;
748 auto ch = store->open_collection(coll);
749 int r = store->stat(ch, oid, &st);
750 ceph_assert(r == 0);
751 ceph_assert(st.st_size == 0);
752
753 const size_t max_dup_entries = g_ceph_context->_conf->osd_pg_log_dups_tracked;
754 ceph_assert(max_dup_entries > 0);
755 const size_t max_chunk_size = g_ceph_context->_conf->osd_pg_log_trim_max;
756 ceph_assert(max_chunk_size > 0);
757
758 cout << "max_dup_entries=" << max_dup_entries
759 << " max_chunk_size=" << max_chunk_size << std::endl;
760 if (dry_run) {
761 cout << "Dry run enabled, so when many chunks are needed,"
762 << " the trimming will never stop!" << std::endl;
763 }
764
765 set<string> keys_to_keep;
766 size_t num_removed = 0;
767 do {
768 set<string> keys_to_trim;
769 {
770 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
771 if (!p)
772 break;
773 for (p->seek_to_first(); p->valid(); p->next()) {
774 if (p->key()[0] == '_')
775 continue;
776 if (p->key() == "can_rollback_to")
777 continue;
778 if (p->key() == "divergent_priors")
779 continue;
780 if (p->key() == "rollback_info_trimmed_to")
781 continue;
782 if (p->key() == "may_include_deletes_in_missing")
783 continue;
784 if (p->key().substr(0, 7) == string("missing"))
785 continue;
786 if (p->key().substr(0, 4) != string("dup_"))
787 continue;
788 keys_to_keep.insert(p->key());
789 if (keys_to_keep.size() > max_dup_entries) {
790 auto oldest_to_keep = keys_to_keep.begin();
791 keys_to_trim.emplace(*oldest_to_keep);
792 keys_to_keep.erase(oldest_to_keep);
793 }
794 if (keys_to_trim.size() >= max_chunk_size) {
795 break;
796 }
797 }
798 } // deconstruct ObjectMapIterator
799 // delete the keys
800 num_removed = keys_to_trim.size();
801 if (!dry_run && !keys_to_trim.empty()) {
802 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
803 ObjectStore::Transaction t;
804 t.omap_rmkeys(coll, oid, keys_to_trim);
805 store->queue_transaction(ch, std::move(t));
806 ch->flush();
807 }
808 } while (num_removed == max_chunk_size);
809
810 // compact the db since we just removed a bunch of data
811 cerr << "Finished trimming, now compacting..." << std::endl;
812 if (!dry_run)
813 store->compact();
814 return 0;
815}
816
7c673cae
FG
817const int OMAP_BATCH_SIZE = 25;
818void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
819{
820 oset.clear();
821 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
822 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
823 }
824}
825
826int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
827{
828 struct stat st;
829 mysize_t total;
830 footer ft;
831
11fdf7f2
TL
832 auto ch = store->open_collection(cid);
833 int ret = store->stat(ch, obj, &st);
7c673cae
FG
834 if (ret < 0)
835 return ret;
836
837 cerr << "Read " << obj << std::endl;
838
839 total = st.st_size;
840 if (debug)
841 cerr << "size=" << total << std::endl;
842
843 object_begin objb(obj);
844
845 {
846 bufferptr bp;
847 bufferlist bl;
11fdf7f2 848 ret = store->getattr(ch, obj, OI_ATTR, bp);
7c673cae
FG
849 if (ret < 0) {
850 cerr << "getattr failure object_info " << ret << std::endl;
851 return ret;
852 }
853 bl.push_back(bp);
854 decode(objb.oi, bl);
855 if (debug)
856 cerr << "object_info: " << objb.oi << std::endl;
857 }
858
859 // NOTE: we include whiteouts, lost, etc.
860
861 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
862 if (ret < 0)
863 return ret;
864
865 uint64_t offset = 0;
866 bufferlist rawdatabl;
867 while(total > 0) {
868 rawdatabl.clear();
869 mysize_t len = max_read;
870 if (len > total)
871 len = total;
872
11fdf7f2 873 ret = store->read(ch, obj, offset, len, rawdatabl);
7c673cae
FG
874 if (ret < 0)
875 return ret;
876 if (ret == 0)
877 return -EINVAL;
878
879 data_section dblock(offset, len, rawdatabl);
880 if (debug)
881 cerr << "data section offset=" << offset << " len=" << len << std::endl;
882
883 total -= ret;
884 offset += ret;
885
886 ret = write_section(TYPE_DATA, dblock, file_fd);
887 if (ret) return ret;
888 }
889
890 //Handle attrs for this object
20effc67 891 map<string,bufferptr,less<>> aset;
11fdf7f2 892 ret = store->getattrs(ch, obj, aset);
7c673cae
FG
893 if (ret) return ret;
894 attr_section as(aset);
895 ret = write_section(TYPE_ATTRS, as, file_fd);
896 if (ret)
897 return ret;
898
899 if (debug) {
900 cerr << "attrs size " << aset.size() << std::endl;
901 }
902
903 //Handle omap information
904 bufferlist hdrbuf;
11fdf7f2 905 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
7c673cae
FG
906 if (ret < 0) {
907 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
908 return ret;
909 }
910
911 omap_hdr_section ohs(hdrbuf);
912 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
913 if (ret)
914 return ret;
915
11fdf7f2 916 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
7c673cae
FG
917 if (!iter) {
918 ret = -ENOENT;
919 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
920 return ret;
921 }
922 iter->seek_to_first();
923 int mapcount = 0;
924 map<string, bufferlist> out;
925 while(iter->valid()) {
926 get_omap_batch(iter, out);
927
928 if (out.empty()) break;
929
930 mapcount += out.size();
931 omap_section oms(out);
932 ret = write_section(TYPE_OMAP, oms, file_fd);
933 if (ret)
934 return ret;
935 }
936 if (debug)
937 cerr << "omap map size " << mapcount << std::endl;
938
939 ret = write_simple(TYPE_OBJECT_END, file_fd);
940 if (ret)
941 return ret;
942
943 return 0;
944}
945
946int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
947{
948 ghobject_t next;
11fdf7f2 949 auto ch = store->open_collection(coll);
7c673cae
FG
950 while (!next.is_max()) {
951 vector<ghobject_t> objects;
11fdf7f2 952 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
7c673cae
FG
953 &objects, &next);
954 if (r < 0)
955 return r;
956 for (vector<ghobject_t>::iterator i = objects.begin();
957 i != objects.end();
958 ++i) {
11fdf7f2
TL
959 ceph_assert(!i->hobj.is_meta());
960 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
7c673cae
FG
961 continue;
962 }
963 r = export_file(store, coll, *i);
964 if (r < 0)
965 return r;
966 }
967 }
968 return 0;
969}
970
11fdf7f2 971int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae 972 OSDMap::Incremental inc;
11fdf7f2 973 auto it = bl.cbegin();
7c673cae
FG
974 inc.decode(it);
975 if (e == 0) {
976 e = inc.epoch;
977 } else if (e != inc.epoch) {
978 cerr << "incremental.epoch mismatch: "
979 << inc.epoch << " != " << e << std::endl;
980 if (force) {
981 cerr << "But will continue anyway." << std::endl;
982 } else {
983 return -EINVAL;
984 }
985 }
11fdf7f2 986 auto ch = store->open_collection(coll_t::meta());
7c673cae 987 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
11fdf7f2 988 if (!store->exists(ch, inc_oid)) {
7c673cae
FG
989 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
990 if (!force) {
991 return -ENOENT;
992 }
993 cout << "Creating a new epoch." << std::endl;
994 }
995 if (dry_run)
996 return 0;
997 ObjectStore::Transaction t;
998 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
999 t.truncate(coll_t::meta(), inc_oid, bl.length());
11fdf7f2
TL
1000 store->queue_transaction(ch, std::move(t));
1001 return 0;
7c673cae
FG
1002}
1003
1004int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
1005{
11fdf7f2
TL
1006 auto ch = store->open_collection(coll_t::meta());
1007 if (store->read(ch,
7c673cae
FG
1008 OSD::get_inc_osdmap_pobject_name(e),
1009 0, 0, bl) < 0) {
1010 return -ENOENT;
1011 }
1012 return 0;
1013}
1014
11fdf7f2 1015int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
7c673cae
FG
1016 OSDMap osdmap;
1017 osdmap.decode(bl);
1018 if (e == 0) {
1019 e = osdmap.get_epoch();
1020 } else if (e != osdmap.get_epoch()) {
1021 cerr << "osdmap.epoch mismatch: "
1022 << e << " != " << osdmap.get_epoch() << std::endl;
1023 if (force) {
1024 cerr << "But will continue anyway." << std::endl;
1025 } else {
1026 return -EINVAL;
1027 }
1028 }
11fdf7f2 1029 auto ch = store->open_collection(coll_t::meta());
7c673cae 1030 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
11fdf7f2 1031 if (!store->exists(ch, full_oid)) {
7c673cae
FG
1032 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
1033 if (!force) {
1034 return -ENOENT;
1035 }
1036 cout << "Creating a new epoch." << std::endl;
1037 }
1038 if (dry_run)
1039 return 0;
1040 ObjectStore::Transaction t;
1041 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
1042 t.truncate(coll_t::meta(), full_oid, bl.length());
11fdf7f2
TL
1043 store->queue_transaction(ch, std::move(t));
1044 return 0;
7c673cae
FG
1045}
1046
1047int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
1048{
11fdf7f2 1049 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
7c673cae 1050 bool found = store->read(
11fdf7f2 1051 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
7c673cae
FG
1052 if (!found) {
1053 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
1054 return -ENOENT;
1055 }
1056 osdmap.decode(bl);
1057 if (debug)
1058 cerr << osdmap << std::endl;
1059 return 0;
1060}
1061
11fdf7f2
TL
1062int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
1063{
1064 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
1065 bufferlist bl;
1066 auto pghist = OSD::make_pg_num_history_oid();
1067 int r = store->read(ch, pghist, 0, 0, bl, 0);
1068 if (r >= 0 && bl.length() > 0) {
1069 auto p = bl.cbegin();
1070 decode(*h, p);
1071 }
1072 cout << __func__ << " pg_num_history " << *h << std::endl;
1073 return 0;
1074}
1075
7c673cae
FG
1076int add_osdmap(ObjectStore *store, metadata_section &ms)
1077{
1078 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
1079}
1080
2a845540
TL
1081int ObjectStoreTool::do_export(
1082 CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid,
7c673cae
FG
1083 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
1084 const OSDSuperblock& superblock,
1085 PastIntervals &past_intervals)
1086{
1087 PGLog::IndexedLog log;
1088 pg_missing_t missing;
1089
11fdf7f2 1090 cerr << "Exporting " << pgid << " info " << info << std::endl;
7c673cae 1091
2a845540 1092 int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing);
7c673cae
FG
1093 if (ret > 0)
1094 return ret;
1095
1096 if (debug) {
1097 Formatter *formatter = Formatter::create("json-pretty");
11fdf7f2 1098 ceph_assert(formatter);
7c673cae
FG
1099 dump_log(formatter, cerr, log, missing);
1100 delete formatter;
1101 }
1102 write_super();
1103
1104 pg_begin pgb(pgid, superblock);
1105 // Special case: If replicated pg don't require the importing OSD to have shard feature
1106 if (pgid.is_no_shard()) {
1107 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1108 }
1109 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
1110 if (ret)
1111 return ret;
1112
1113 // The metadata_section is now before files, so import can detect
1114 // errors and abort without wasting time.
1115 metadata_section ms(
1116 struct_ver,
1117 map_epoch,
1118 info,
1119 log,
1120 past_intervals,
1121 missing);
1122 ret = add_osdmap(fs, ms);
1123 if (ret)
1124 return ret;
1125 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
1126 if (ret)
1127 return ret;
1128
1129 ret = export_files(fs, coll);
1130 if (ret) {
1131 cerr << "export_files error " << ret << std::endl;
1132 return ret;
1133 }
1134
1135 ret = write_simple(TYPE_PG_END, file_fd);
1136 if (ret)
1137 return ret;
1138
1139 return 0;
1140}
1141
b32b8144
FG
1142int dump_data(Formatter *formatter, bufferlist &bl)
1143{
11fdf7f2 1144 auto ebliter = bl.cbegin();
b32b8144
FG
1145 data_section ds;
1146 ds.decode(ebliter);
1147
1148 formatter->open_object_section("data_block");
1149 formatter->dump_unsigned("offset", ds.offset);
1150 formatter->dump_unsigned("len", ds.len);
1151 // XXX: Add option to dump data like od -cx ?
1152 formatter->close_section();
1153 formatter->flush(cout);
1154 return 0;
1155}
1156
7c673cae
FG
1157int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
1158 ObjectStore::Transaction *t, bufferlist &bl)
1159{
11fdf7f2 1160 auto ebliter = bl.cbegin();
7c673cae
FG
1161 data_section ds;
1162 ds.decode(ebliter);
1163
1164 if (debug)
1165 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
1166 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
1167 return 0;
1168}
1169
b32b8144
FG
1170int dump_attrs(
1171 Formatter *formatter, ghobject_t hoid,
1172 bufferlist &bl)
1173{
11fdf7f2 1174 auto ebliter = bl.cbegin();
b32b8144
FG
1175 attr_section as;
1176 as.decode(ebliter);
1177
1178 // This could have been handled in the caller if we didn't need to
1179 // support exports that didn't include object_info_t in object_begin.
1180 if (hoid.generation == ghobject_t::NO_GEN &&
1181 hoid.hobj.is_head()) {
1182 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1183 if (mi != as.data.end()) {
1184 SnapSet snapset;
11fdf7f2 1185 auto p = mi->second.cbegin();
b32b8144
FG
1186 snapset.decode(p);
1187 formatter->open_object_section("snapset");
1188 snapset.dump(formatter);
1189 formatter->close_section();
1190 } else {
1191 formatter->open_object_section("snapset");
1192 formatter->dump_string("error", "missing SS_ATTR");
1193 formatter->close_section();
1194 }
1195 }
1196
1197 formatter->open_object_section("attrs");
1198 formatter->open_array_section("user");
1199 for (auto kv : as.data) {
1200 // Skip system attributes
1201 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
1202 continue;
1203 formatter->open_object_section("user_attr");
1204 formatter->dump_string("name", kv.first.substr(1));
1205 bool b64;
1206 formatter->dump_string("value", cleanbin(kv.second, b64));
1207 formatter->dump_bool("Base64", b64);
1208 formatter->close_section();
1209 }
1210 formatter->close_section();
1211 formatter->open_array_section("system");
1212 for (auto kv : as.data) {
1213 // Skip user attributes
1214 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1215 continue;
1216 formatter->open_object_section("sys_attr");
1217 formatter->dump_string("name", kv.first);
1218 formatter->close_section();
1219 }
1220 formatter->close_section();
1221 formatter->close_section();
1222 formatter->flush(cout);
1223
1224 return 0;
1225}
1226
7c673cae
FG
1227int get_attrs(
1228 ObjectStore *store, coll_t coll, ghobject_t hoid,
1229 ObjectStore::Transaction *t, bufferlist &bl,
224ce89b 1230 OSDriver &driver, SnapMapper &snap_mapper)
7c673cae 1231{
11fdf7f2 1232 auto ebliter = bl.cbegin();
7c673cae
FG
1233 attr_section as;
1234 as.decode(ebliter);
1235
11fdf7f2 1236 auto ch = store->open_collection(coll);
7c673cae
FG
1237 if (debug)
1238 cerr << "\tattrs: len " << as.data.size() << std::endl;
1239 t->setattrs(coll, hoid, as.data);
1240
1241 // This could have been handled in the caller if we didn't need to
1242 // support exports that didn't include object_info_t in object_begin.
11fdf7f2
TL
1243 if (hoid.generation == ghobject_t::NO_GEN &&
1244 hoid.hobj.is_head()) {
1245 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1246 if (mi != as.data.end()) {
1247 SnapSet snapset;
1248 auto p = mi->second.cbegin();
1249 snapset.decode(p);
1250 cout << "snapset " << snapset << std::endl;
1251 for (auto& p : snapset.clone_snaps) {
1252 ghobject_t clone = hoid;
1253 clone.hobj.snap = p.first;
1254 set<snapid_t> snaps(p.second.begin(), p.second.end());
1255 if (!store->exists(ch, clone)) {
1256 // no clone, skip. this is probably a cache pool. this works
1257 // because we use a separate transaction per object and clones
1258 // come before head in the archive.
7c673cae 1259 if (debug)
11fdf7f2
TL
1260 cerr << "\tskipping missing " << clone << " (snaps "
1261 << snaps << ")" << std::endl;
1262 continue;
7c673cae 1263 }
11fdf7f2
TL
1264 if (debug)
1265 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1266 << std::endl;
1267 OSDriver::OSTransaction _t(driver.get_transaction(t));
1268 ceph_assert(!snaps.empty());
1269 snap_mapper.add_oid(clone.hobj, snaps, &_t);
7c673cae
FG
1270 }
1271 } else {
11fdf7f2 1272 cerr << "missing SS_ATTR on " << hoid << std::endl;
7c673cae
FG
1273 }
1274 }
7c673cae
FG
1275 return 0;
1276}
1277
b32b8144
FG
1278int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1279{
11fdf7f2 1280 auto ebliter = bl.cbegin();
b32b8144
FG
1281 omap_hdr_section oh;
1282 oh.decode(ebliter);
1283
1284 formatter->open_object_section("omap_header");
1285 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1286 formatter->close_section();
1287 formatter->flush(cout);
1288 return 0;
1289}
1290
7c673cae
FG
1291int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1292 ObjectStore::Transaction *t, bufferlist &bl)
1293{
11fdf7f2 1294 auto ebliter = bl.cbegin();
7c673cae
FG
1295 omap_hdr_section oh;
1296 oh.decode(ebliter);
1297
1298 if (debug)
1299 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1300 << std::endl;
1301 t->omap_setheader(coll, hoid, oh.hdr);
1302 return 0;
1303}
1304
b32b8144
FG
1305int dump_omap(Formatter *formatter, bufferlist &bl)
1306{
11fdf7f2 1307 auto ebliter = bl.cbegin();
b32b8144
FG
1308 omap_section os;
1309 os.decode(ebliter);
1310
1311 formatter->open_object_section("omaps");
1312 formatter->dump_unsigned("count", os.omap.size());
1313 formatter->open_array_section("data");
1314 for (auto o : os.omap) {
1315 formatter->open_object_section("omap");
1316 formatter->dump_string("name", o.first);
1317 bool b64;
1318 formatter->dump_string("value", cleanbin(o.second, b64));
1319 formatter->dump_bool("Base64", b64);
1320 formatter->close_section();
1321 }
1322 formatter->close_section();
1323 formatter->close_section();
1324 formatter->flush(cout);
1325 return 0;
1326}
1327
7c673cae
FG
1328int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1329 ObjectStore::Transaction *t, bufferlist &bl)
1330{
11fdf7f2 1331 auto ebliter = bl.cbegin();
7c673cae
FG
1332 omap_section os;
1333 os.decode(ebliter);
1334
1335 if (debug)
1336 cerr << "\tomap: size " << os.omap.size() << std::endl;
1337 t->omap_setkeys(coll, hoid, os.omap);
1338 return 0;
1339}
1340
b32b8144
FG
1341int ObjectStoreTool::dump_object(Formatter *formatter,
1342 bufferlist &bl)
1343{
11fdf7f2 1344 auto ebliter = bl.cbegin();
b32b8144
FG
1345 object_begin ob;
1346 ob.decode(ebliter);
1347
1348 if (ob.hoid.hobj.is_temp()) {
1349 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1350 return -EFAULT;
1351 }
1352
1353 formatter->open_object_section("object");
1354 formatter->open_object_section("oid");
1355 ob.hoid.dump(formatter);
1356 formatter->close_section();
1357 formatter->open_object_section("object_info");
1358 ob.oi.dump(formatter);
1359 formatter->close_section();
1360
1361 bufferlist ebl;
1362 bool done = false;
1363 while(!done) {
1364 sectiontype_t type;
1365 int ret = read_section(&type, &ebl);
1366 if (ret)
1367 return ret;
1368
1369 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1370 //cout << "\t\tsection size " << ebl.length() << std::endl;
1371 if (type >= END_OF_TYPES) {
1372 cout << "Skipping unknown object section type" << std::endl;
1373 continue;
1374 }
1375 switch(type) {
1376 case TYPE_DATA:
1377 if (dry_run) break;
1378 ret = dump_data(formatter, ebl);
1379 if (ret) return ret;
1380 break;
1381 case TYPE_ATTRS:
1382 if (dry_run) break;
1383 ret = dump_attrs(formatter, ob.hoid, ebl);
1384 if (ret) return ret;
1385 break;
1386 case TYPE_OMAP_HDR:
1387 if (dry_run) break;
1388 ret = dump_omap_hdr(formatter, ebl);
1389 if (ret) return ret;
1390 break;
1391 case TYPE_OMAP:
1392 if (dry_run) break;
1393 ret = dump_omap(formatter, ebl);
1394 if (ret) return ret;
1395 break;
1396 case TYPE_OBJECT_END:
1397 done = true;
1398 break;
1399 default:
1400 cerr << "Unknown section type " << type << std::endl;
1401 return -EFAULT;
1402 }
1403 }
1404 formatter->close_section();
1405 return 0;
1406}
1407
11fdf7f2
TL
1408int ObjectStoreTool::get_object(ObjectStore *store,
1409 OSDriver& driver,
1410 SnapMapper& mapper,
1411 coll_t coll,
1412 bufferlist &bl, OSDMap &origmap,
1413 bool *skipped_objects)
7c673cae
FG
1414{
1415 ObjectStore::Transaction tran;
1416 ObjectStore::Transaction *t = &tran;
11fdf7f2 1417 auto ebliter = bl.cbegin();
7c673cae
FG
1418 object_begin ob;
1419 ob.decode(ebliter);
7c673cae
FG
1420
1421 if (ob.hoid.hobj.is_temp()) {
1422 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1423 return -EFAULT;
1424 }
11fdf7f2
TL
1425 ceph_assert(g_ceph_context);
1426
1427 auto ch = store->open_collection(coll);
7c673cae
FG
1428 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1429 object_t oid = ob.hoid.hobj.oid;
1430 object_locator_t loc(ob.hoid.hobj);
11fdf7f2
TL
1431 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1432 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
1433
1434 spg_t coll_pgid;
1435 if (coll.is_pg(&coll_pgid) == false) {
1436 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1437 return -EFAULT;
1438 }
1439 if (coll_pgid.shard != ob.hoid.shard_id) {
1440 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1441 << " but object shard is " << ob.hoid.shard_id << std::endl;
1442 return -EFAULT;
1443 }
1444
1445 if (coll_pgid.pgid != pgid) {
1446 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1447 *skipped_objects = true;
1448 skip_object(bl);
1449 return 0;
1450 }
1451 }
1452
1453 if (!dry_run)
1454 t->touch(coll, ob.hoid);
1455
1456 cout << "Write " << ob.hoid << std::endl;
1457
7c673cae
FG
1458 bufferlist ebl;
1459 bool done = false;
1460 while(!done) {
1461 sectiontype_t type;
1462 int ret = read_section(&type, &ebl);
1463 if (ret)
1464 return ret;
1465
1466 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1467 //cout << "\t\tsection size " << ebl.length() << std::endl;
1468 if (type >= END_OF_TYPES) {
1469 cout << "Skipping unknown object section type" << std::endl;
1470 continue;
1471 }
1472 switch(type) {
1473 case TYPE_DATA:
1474 if (dry_run) break;
1475 ret = get_data(store, coll, ob.hoid, t, ebl);
1476 if (ret) return ret;
1477 break;
1478 case TYPE_ATTRS:
1479 if (dry_run) break;
224ce89b 1480 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
7c673cae
FG
1481 if (ret) return ret;
1482 break;
1483 case TYPE_OMAP_HDR:
1484 if (dry_run) break;
1485 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1486 if (ret) return ret;
1487 break;
1488 case TYPE_OMAP:
1489 if (dry_run) break;
1490 ret = get_omap(store, coll, ob.hoid, t, ebl);
1491 if (ret) return ret;
1492 break;
1493 case TYPE_OBJECT_END:
1494 done = true;
1495 break;
1496 default:
1497 cerr << "Unknown section type " << type << std::endl;
1498 return -EFAULT;
1499 }
1500 }
11fdf7f2
TL
1501 if (!dry_run) {
1502 wait_until_done(t, [&] {
1503 store->queue_transaction(ch, std::move(*t));
1504 ch->flush();
1505 });
1506 }
7c673cae
FG
1507 return 0;
1508}
1509
b32b8144
FG
1510int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1511{
11fdf7f2 1512 auto ebliter = bl.cbegin();
b32b8144
FG
1513 ms.decode(ebliter);
1514
1515 formatter->open_object_section("metadata_section");
1516
1517 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1518 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1519
1520 formatter->open_object_section("OSDMap");
1521 ms.osdmap.dump(formatter);
1522 formatter->close_section();
1523 formatter->flush(cout);
1524 cout << std::endl;
1525
1526 formatter->open_object_section("info");
1527 ms.info.dump(formatter);
1528 formatter->close_section();
1529 formatter->flush(cout);
1530
1531 formatter->open_object_section("log");
1532 ms.log.dump(formatter);
1533 formatter->close_section();
1534 formatter->flush(cout);
1535
1536 formatter->open_object_section("pg_missing_t");
1537 ms.missing.dump(formatter);
1538 formatter->close_section();
1539
1540 // XXX: ms.past_intervals?
1541
1542 formatter->close_section();
1543 formatter->flush(cout);
1544
1545 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1546 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1547 return -EFAULT;
1548 }
1549
1550 return 0;
1551}
1552
7c673cae 1553int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
11fdf7f2 1554 const OSDSuperblock& sb, spg_t pgid)
7c673cae 1555{
11fdf7f2 1556 auto ebliter = bl.cbegin();
7c673cae
FG
1557 ms.decode(ebliter);
1558 spg_t old_pgid = ms.info.pgid;
1559 ms.info.pgid = pgid;
1560
11fdf7f2
TL
1561 if (debug) {
1562 cout << "export pgid " << old_pgid << std::endl;
1563 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1564 cout << "map epoch " << ms.map_epoch << std::endl;
7c673cae 1565
11fdf7f2
TL
1566#ifdef DIAGNOSTIC
1567 Formatter *formatter = new JSONFormatter(true);
1568 formatter->open_object_section("stuff");
7c673cae 1569
11fdf7f2
TL
1570 formatter->open_object_section("importing OSDMap");
1571 ms.osdmap.dump(formatter);
1572 formatter->close_section();
1573 formatter->flush(cout);
1574 cout << std::endl;
7c673cae 1575
11fdf7f2 1576 cout << "osd current epoch " << sb.current_epoch << std::endl;
7c673cae 1577
11fdf7f2
TL
1578 formatter->open_object_section("info");
1579 ms.info.dump(formatter);
1580 formatter->close_section();
1581 formatter->flush(cout);
1582 cout << std::endl;
7c673cae 1583
11fdf7f2
TL
1584 formatter->open_object_section("log");
1585 ms.log.dump(formatter);
1586 formatter->close_section();
1587 formatter->flush(cout);
1588 cout << std::endl;
1589
1590 formatter->close_section();
1591 formatter->flush(cout);
1592 cout << std::endl;
7c673cae 1593#endif
11fdf7f2 1594 }
7c673cae
FG
1595
1596 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1597 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1598 return -EFAULT;
1599 }
1600
1601 if (ms.map_epoch > sb.current_epoch) {
1602 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1603 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1604 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1605 return -EINVAL;
1606 }
1607
11fdf7f2 1608 // Old exports didn't include OSDMap
7c673cae 1609 if (ms.osdmap.get_epoch() == 0) {
11fdf7f2
TL
1610 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1611 " Not supported." << std::endl;
1612 return -EINVAL;
7c673cae
FG
1613 }
1614
11fdf7f2
TL
1615 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1616 cerr << "PG export's map " << ms.osdmap.get_epoch()
1617 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1618 if (!force) {
1619 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1620 << std::endl;
1621 return -EINVAL;
7c673cae
FG
1622 }
1623 }
7c673cae
FG
1624 if (debug) {
1625 cerr << "Import pgid " << ms.info.pgid << std::endl;
b32b8144 1626 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
11fdf7f2
TL
1627 cerr << "history.same_interval_since "
1628 << ms.info.history.same_interval_since << std::endl;
7c673cae
FG
1629 }
1630
7c673cae
FG
1631 return 0;
1632}
1633
1634// out: pg_log_t that only has entries that apply to import_pgid using curmap
1635// reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1636void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1637 const string &hit_set_namespace, const divergent_priors_t &in,
1638 divergent_priors_t &out, divergent_priors_t &reject)
1639{
1640 out.clear();
1641 reject.clear();
1642
1643 for (divergent_priors_t::const_iterator i = in.begin();
1644 i != in.end(); ++i) {
1645
1646 // Reject divergent priors for temporary objects
1647 if (i->second.is_temp()) {
1648 reject.insert(*i);
1649 continue;
1650 }
1651
1652 if (i->second.nspace != hit_set_namespace) {
1653 object_t oid = i->second.oid;
1654 object_locator_t loc(i->second);
1655 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1656 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1657
1658 if (import_pgid.pgid == pgid) {
1659 out.insert(*i);
1660 } else {
1661 reject.insert(*i);
1662 }
1663 } else {
1664 out.insert(*i);
1665 }
1666 }
1667}
1668
11fdf7f2 1669int ObjectStoreTool::dump_export(Formatter *formatter)
b32b8144
FG
1670{
1671 bufferlist ebl;
1672 pg_info_t info;
1673 PGLog::IndexedLog log;
1674 //bool skipped_objects = false;
1675
1676 int ret = read_super();
1677 if (ret)
1678 return ret;
1679
1680 if (sh.magic != super_header::super_magic) {
1681 cerr << "Invalid magic number" << std::endl;
1682 return -EFAULT;
1683 }
1684
1685 if (sh.version > super_header::super_ver) {
1686 cerr << "Can't handle export format version=" << sh.version << std::endl;
1687 return -EINVAL;
1688 }
1689
1690 formatter->open_object_section("Export");
1691
1692 //First section must be TYPE_PG_BEGIN
1693 sectiontype_t type;
1694 ret = read_section(&type, &ebl);
1695 if (ret)
1696 return ret;
1697 if (type == TYPE_POOL_BEGIN) {
1698 cerr << "Dump of pool exports not supported" << std::endl;
1699 return -EINVAL;
1700 } else if (type != TYPE_PG_BEGIN) {
1701 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1702 return -EFAULT;
1703 }
1704
11fdf7f2 1705 auto ebliter = ebl.cbegin();
b32b8144
FG
1706 pg_begin pgb;
1707 pgb.decode(ebliter);
1708 spg_t pgid = pgb.pgid;
1709
1710 formatter->dump_string("pgid", stringify(pgid));
1711 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1712 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1713
1714 bool done = false;
1715 bool found_metadata = false;
1716 metadata_section ms;
1717 bool objects_started = false;
1718 while(!done) {
1719 ret = read_section(&type, &ebl);
1720 if (ret)
1721 return ret;
1722
1723 if (debug) {
11fdf7f2 1724 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
b32b8144
FG
1725 }
1726 if (type >= END_OF_TYPES) {
1727 cerr << "Skipping unknown section type" << std::endl;
1728 continue;
1729 }
1730 switch(type) {
1731 case TYPE_OBJECT_BEGIN:
1732 if (!objects_started) {
1733 formatter->open_array_section("objects");
1734 objects_started = true;
1735 }
1736 ret = dump_object(formatter, ebl);
1737 if (ret) return ret;
1738 break;
1739 case TYPE_PG_METADATA:
1740 if (objects_started)
1741 cerr << "WARNING: metadata_section out of order" << std::endl;
1742 ret = dump_pg_metadata(formatter, ebl, ms);
1743 if (ret) return ret;
1744 found_metadata = true;
1745 break;
1746 case TYPE_PG_END:
1747 if (objects_started) {
1748 formatter->close_section();
1749 }
1750 done = true;
1751 break;
1752 default:
1753 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1754 return -EFAULT;
1755 }
1756 }
1757
1758 if (!found_metadata) {
1759 cerr << "Missing metadata section" << std::endl;
1760 return -EFAULT;
1761 }
1762
1763 formatter->close_section();
1764 formatter->flush(cout);
1765
1766 return 0;
1767}
1768
7c673cae 1769int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
11fdf7f2 1770 bool force, std::string pgidstr)
7c673cae
FG
1771{
1772 bufferlist ebl;
1773 pg_info_t info;
1774 PGLog::IndexedLog log;
1775 bool skipped_objects = false;
1776
1777 if (!dry_run)
1778 finish_remove_pgs(store);
1779
1780 int ret = read_super();
1781 if (ret)
1782 return ret;
1783
1784 if (sh.magic != super_header::super_magic) {
1785 cerr << "Invalid magic number" << std::endl;
1786 return -EFAULT;
1787 }
1788
1789 if (sh.version > super_header::super_ver) {
1790 cerr << "Can't handle export format version=" << sh.version << std::endl;
1791 return -EINVAL;
1792 }
1793
1794 //First section must be TYPE_PG_BEGIN
1795 sectiontype_t type;
1796 ret = read_section(&type, &ebl);
1797 if (ret)
1798 return ret;
1799 if (type == TYPE_POOL_BEGIN) {
1800 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1801 return -EINVAL;
1802 } else if (type != TYPE_PG_BEGIN) {
b32b8144 1803 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
7c673cae
FG
1804 return -EFAULT;
1805 }
1806
11fdf7f2 1807 auto ebliter = ebl.cbegin();
7c673cae
FG
1808 pg_begin pgb;
1809 pgb.decode(ebliter);
1810 spg_t pgid = pgb.pgid;
7c673cae
FG
1811
1812 if (pgidstr.length()) {
1813 spg_t user_pgid;
1814
1815 bool ok = user_pgid.parse(pgidstr.c_str());
1816 // This succeeded in main() already
11fdf7f2 1817 ceph_assert(ok);
7c673cae 1818 if (pgid != user_pgid) {
11fdf7f2
TL
1819 cerr << "specified pgid " << user_pgid
1820 << " does not match actual pgid " << pgid << std::endl;
1821 return -EINVAL;
7c673cae
FG
1822 }
1823 }
1824
1825 if (!pgb.superblock.cluster_fsid.is_zero()
1826 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1827 cerr << "Export came from different cluster with fsid "
1828 << pgb.superblock.cluster_fsid << std::endl;
1829 return -EINVAL;
1830 }
1831
1832 if (debug) {
1833 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1834 }
1835
11fdf7f2 1836 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
7c673cae
FG
1837 if (pgid.is_no_shard())
1838 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1839
1840 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1841 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1842
1843 cerr << "Export has incompatible features set " << unsupported << std::endl;
1844
1845 // Let them import if they specify the --force option
1846 if (!force)
1847 return 11; // Positive return means exit status
1848 }
1849
11fdf7f2 1850 // we need the latest OSDMap to check for collisions
7c673cae
FG
1851 OSDMap curmap;
1852 bufferlist bl;
1853 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1854 if (ret) {
11fdf7f2 1855 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
7c673cae
FG
1856 return ret;
1857 }
1858 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1859 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1860 // Special exit code for this error, used by test code
1861 return 10; // Positive return means exit status
1862 }
1863
11fdf7f2
TL
1864 pool_pg_num_history_t pg_num_history;
1865 get_pg_num_history(store, &pg_num_history);
1866
7c673cae 1867 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
7c673cae 1868
11fdf7f2 1869 // Check for PG already present.
7c673cae
FG
1870 coll_t coll(pgid);
1871 if (store->collection_exists(coll)) {
1872 cerr << "pgid " << pgid << " already exists" << std::endl;
1873 return -EEXIST;
1874 }
1875
11fdf7f2 1876 ObjectStore::CollectionHandle ch;
7c673cae 1877
11fdf7f2
TL
1878 OSDriver driver(
1879 store,
1880 coll_t(),
1881 OSD::make_snapmapper_oid());
1882 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
7c673cae
FG
1883
1884 cout << "Importing pgid " << pgid;
7c673cae
FG
1885 cout << std::endl;
1886
1887 bool done = false;
1888 bool found_metadata = false;
1889 metadata_section ms;
7c673cae
FG
1890 while(!done) {
1891 ret = read_section(&type, &ebl);
1892 if (ret)
1893 return ret;
1894
b32b8144
FG
1895 if (debug) {
1896 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1897 }
7c673cae
FG
1898 if (type >= END_OF_TYPES) {
1899 cout << "Skipping unknown section type" << std::endl;
1900 continue;
1901 }
1902 switch(type) {
1903 case TYPE_OBJECT_BEGIN:
11fdf7f2
TL
1904 ceph_assert(found_metadata);
1905 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1906 &skipped_objects);
7c673cae
FG
1907 if (ret) return ret;
1908 break;
1909 case TYPE_PG_METADATA:
11fdf7f2 1910 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
7c673cae
FG
1911 if (ret) return ret;
1912 found_metadata = true;
11fdf7f2
TL
1913
1914 if (pgid != ms.info.pgid) {
1915 cerr << "specified pgid " << pgid << " does not match import file pgid "
1916 << ms.info.pgid << std::endl;
1917 return -EINVAL;
1918 }
1919
1920 // make sure there are no conflicting splits or merges
1921 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1922 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1923 if (p != pg_num_history.pg_nums.end() &&
1924 !p->second.empty()) {
1925 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1926 unsigned pg_num = start_pg_num;
1927 for (auto q = p->second.lower_bound(ms.map_epoch);
1928 q != p->second.end();
1929 ++q) {
1930 unsigned new_pg_num = q->second;
1931 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1932 << " -> " << new_pg_num << std::endl;
1933
1934 // check for merge target
1935 spg_t target;
1936 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1937 // FIXME: this checks assumes the OSD's PG is at the OSD's
1938 // map epoch; it could be, say, at *our* epoch, pre-merge.
1939 coll_t coll(target);
1940 if (store->collection_exists(coll)) {
1941 cerr << "pgid " << pgid << " merges to target " << target
1942 << " which already exists" << std::endl;
1943 return 12;
1944 }
1945 }
1946
1947 // check for split children
1948 set<spg_t> children;
1949 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1950 cerr << " children are " << children << std::endl;
1951 for (auto child : children) {
1952 coll_t coll(child);
1953 if (store->collection_exists(coll)) {
1954 cerr << "pgid " << pgid << " splits to " << children
1955 << " and " << child << " exists" << std::endl;
1956 return 12;
1957 }
1958 }
1959 }
1960 pg_num = new_pg_num;
1961 }
1962 }
1963 } else {
1964 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1965 << " for splits or mergers" << std::endl;
1966 }
1967
1968 if (!dry_run) {
1969 ObjectStore::Transaction t;
1970 ch = store->create_new_collection(coll);
9f95a23c 1971 create_pg_collection(
11fdf7f2
TL
1972 t, pgid,
1973 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
9f95a23c 1974 init_pg_ondisk(t, pgid, NULL);
11fdf7f2
TL
1975
1976 // mark this coll for removal until we're done
1977 map<string,bufferlist> values;
1978 encode((char)1, values["_remove"]);
1979 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1980
1981 store->queue_transaction(ch, std::move(t));
1982 }
1983
7c673cae
FG
1984 break;
1985 case TYPE_PG_END:
11fdf7f2 1986 ceph_assert(found_metadata);
7c673cae
FG
1987 done = true;
1988 break;
1989 default:
b32b8144 1990 cerr << "Unknown section type " << std::to_string(type) << std::endl;
7c673cae
FG
1991 return -EFAULT;
1992 }
1993 }
1994
1995 if (!found_metadata) {
1996 cerr << "Missing metadata section" << std::endl;
1997 return -EFAULT;
1998 }
1999
2000 ObjectStore::Transaction t;
2001 if (!dry_run) {
2002 pg_log_t newlog, reject;
11fdf7f2 2003 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
2004 ms.log, newlog, reject);
2005 if (debug) {
2006 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
2007 i != newlog.log.end(); ++i)
2008 cerr << "Keeping log entry " << *i << std::endl;
2009 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
2010 i != reject.log.end(); ++i)
2011 cerr << "Skipping log entry " << *i << std::endl;
2012 }
2013
2014 divergent_priors_t newdp, rejectdp;
11fdf7f2 2015 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
7c673cae
FG
2016 ms.divergent_priors, newdp, rejectdp);
2017 ms.divergent_priors = newdp;
2018 if (debug) {
2019 for (divergent_priors_t::iterator i = newdp.begin();
2020 i != newdp.end(); ++i)
2021 cerr << "Keeping divergent_prior " << *i << std::endl;
2022 for (divergent_priors_t::iterator i = rejectdp.begin();
2023 i != rejectdp.end(); ++i)
2024 cerr << "Skipping divergent_prior " << *i << std::endl;
2025 }
2026
2027 ms.missing.filter_objects([&](const hobject_t &obj) {
2028 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
2029 return false;
11fdf7f2 2030 ceph_assert(!obj.is_temp());
7c673cae
FG
2031 object_t oid = obj.oid;
2032 object_locator_t loc(obj);
11fdf7f2
TL
2033 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
2034 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
7c673cae
FG
2035
2036 return pgid.pgid != _pgid;
2037 });
2038
2039
2040 if (debug) {
2041 pg_missing_t missing;
2042 Formatter *formatter = Formatter::create("json-pretty");
2043 dump_log(formatter, cerr, newlog, ms.missing);
2044 delete formatter;
2045 }
2046
2047 // Just like a split invalidate stats since the object count is changed
2048 if (skipped_objects)
2049 ms.info.stats.stats_invalid = true;
2050
2051 ret = write_pg(
2052 t,
2053 ms.map_epoch,
2054 ms.info,
2055 newlog,
2056 ms.past_intervals,
2057 ms.divergent_priors,
2058 ms.missing);
2059 if (ret) return ret;
2060 }
2061
2062 // done, clear removal flag
2063 if (debug)
2064 cerr << "done, clearing removal flag" << std::endl;
2065
2066 if (!dry_run) {
9f95a23c 2067 t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
11fdf7f2
TL
2068 wait_until_done(&t, [&] {
2069 store->queue_transaction(ch, std::move(t));
2070 // make sure we flush onreadable items before mapper/driver are destroyed.
2071 ch->flush();
2072 });
7c673cae 2073 }
7c673cae
FG
2074 return 0;
2075}
2076
2077int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
2078 Formatter *formatter, bool debug, bool human_readable, bool head)
2079{
2080 int r;
2081 lookup_ghobject lookup(object, nspace, head);
2082 if (pgidstr.length() > 0) {
2083 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2084 } else {
2085 r = action_on_all_objects(store, lookup, debug);
2086 }
2087 if (r)
2088 return r;
2089 lookup.dump(formatter, human_readable);
2090 formatter->flush(cout);
2091 return 0;
2092}
2093
9f95a23c
TL
2094int do_list_slow(ObjectStore *store, string pgidstr, string object,
2095 double threshold, Formatter *formatter, bool debug, bool human_readable)
2096{
2097 int r;
2098 lookup_slow_ghobject lookup(object, threshold);
2099 if (pgidstr.length() > 0) {
2100 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2101 } else {
2102 r = action_on_all_objects(store, lookup, debug);
2103 }
2104 if (r)
2105 return r;
2106 lookup.dump(formatter, human_readable);
2107 formatter->flush(cout);
2108 return 0;
2109}
2110
7c673cae
FG
2111int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
2112{
2113 int r;
2114 boost::optional<std::string> nspace; // Not specified
2115 lookup_ghobject lookup(object, nspace);
2116 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
2117 if (r)
2118 return r;
2119 lookup.dump(formatter, human_readable);
2120 formatter->flush(cout);
2121 return 0;
2122}
2123
11fdf7f2
TL
2124enum rmtype {
2125 BOTH,
2126 SNAPMAP,
2127 NOSNAPMAP
2128};
2129
7c673cae
FG
2130int remove_object(coll_t coll, ghobject_t &ghobj,
2131 SnapMapper &mapper,
2132 MapCacher::Transaction<std::string, bufferlist> *_t,
11fdf7f2
TL
2133 ObjectStore::Transaction *t,
2134 enum rmtype type)
7c673cae 2135{
11fdf7f2
TL
2136 if (type == BOTH || type == SNAPMAP) {
2137 int r = mapper.remove_oid(ghobj.hobj, _t);
2138 if (r < 0 && r != -ENOENT) {
2139 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
2140 return r;
2141 }
7c673cae
FG
2142 }
2143
11fdf7f2
TL
2144 if (type == BOTH || type == NOSNAPMAP) {
2145 t->remove(coll, ghobj);
2146 }
7c673cae
FG
2147 return 0;
2148}
2149
2150int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
2151
2152int do_remove_object(ObjectStore *store, coll_t coll,
11fdf7f2 2153 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
7c673cae 2154{
11fdf7f2 2155 auto ch = store->open_collection(coll);
7c673cae
FG
2156 spg_t pg;
2157 coll.is_pg_prefix(&pg);
2158 OSDriver driver(
2159 store,
2160 coll_t(),
2161 OSD::make_snapmapper_oid());
2162 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
2163 struct stat st;
2164
11fdf7f2 2165 int r = store->stat(ch, ghobj, &st);
7c673cae
FG
2166 if (r < 0) {
2167 cerr << "remove: " << cpp_strerror(r) << std::endl;
2168 return r;
2169 }
2170
2171 SnapSet ss;
2172 if (ghobj.hobj.has_snapset()) {
2173 r = get_snapset(store, coll, ghobj, ss, false);
2174 if (r < 0) {
2175 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
eafe8130
TL
2176 // If --force and bad snapset let them remove the head
2177 if (!(force && !all))
2178 return r;
7c673cae 2179 }
9f95a23c
TL
2180// cout << "snapset " << ss << std::endl;
2181 if (!ss.clone_snaps.empty() && !all) {
7c673cae
FG
2182 if (force) {
2183 cout << "WARNING: only removing "
2184 << (ghobj.hobj.is_head() ? "head" : "snapdir")
9f95a23c
TL
2185 << " with clones present" << std::endl;
2186 ss.clone_snaps.clear();
7c673cae 2187 } else {
9f95a23c
TL
2188 cerr << "Clones are present, use removeall to delete everything"
2189 << std::endl;
7c673cae
FG
2190 return -EINVAL;
2191 }
2192 }
2193 }
2194
2195 ObjectStore::Transaction t;
2196 OSDriver::OSTransaction _t(driver.get_transaction(&t));
2197
7c673cae 2198 ghobject_t snapobj = ghobj;
9f95a23c
TL
2199 for (auto& p : ss.clone_snaps) {
2200 snapobj.hobj.snap = p.first;
2201 cout << "remove clone " << snapobj << std::endl;
7c673cae 2202 if (!dry_run) {
11fdf7f2 2203 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
7c673cae
FG
2204 if (r < 0)
2205 return r;
2206 }
2207 }
2208
11fdf7f2
TL
2209 cout << "remove " << ghobj << std::endl;
2210
2211 if (!dry_run) {
2212 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
2213 if (r < 0)
2214 return r;
2215 }
7c673cae 2216
11fdf7f2
TL
2217 if (!dry_run) {
2218 wait_until_done(&t, [&] {
2219 store->queue_transaction(ch, std::move(t));
2220 ch->flush();
2221 });
2222 }
7c673cae
FG
2223 return 0;
2224}
2225
2226int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2227{
11fdf7f2 2228 auto ch = store->open_collection(coll);
20effc67 2229 map<string,bufferptr,less<>> aset;
11fdf7f2 2230 int r = store->getattrs(ch, ghobj, aset);
7c673cae
FG
2231 if (r < 0) {
2232 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2233 return r;
2234 }
2235
2236 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2237 string key(i->first);
2238 if (outistty)
2239 key = cleanbin(key);
2240 cout << key << std::endl;
2241 }
2242 return 0;
2243}
2244
2245int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2246{
11fdf7f2
TL
2247 auto ch = store->open_collection(coll);
2248 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
7c673cae
FG
2249 if (!iter) {
2250 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2251 return -ENOENT;
2252 }
2253 iter->seek_to_first();
2254 map<string, bufferlist> oset;
2255 while(iter->valid()) {
2256 get_omap_batch(iter, oset);
2257
2258 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2259 string key(i->first);
2260 if (outistty)
2261 key = cleanbin(key);
2262 cout << key << std::endl;
2263 }
2264 }
2265 return 0;
2266}
2267
2268int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2269{
11fdf7f2 2270 auto ch = store->open_collection(coll);
7c673cae
FG
2271 struct stat st;
2272 mysize_t total;
2273
11fdf7f2 2274 int ret = store->stat(ch, ghobj, &st);
7c673cae
FG
2275 if (ret < 0) {
2276 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2277 return ret;
2278 }
2279
2280 total = st.st_size;
2281 if (debug)
2282 cerr << "size=" << total << std::endl;
2283
2284 uint64_t offset = 0;
2285 bufferlist rawdatabl;
2286 while(total > 0) {
2287 rawdatabl.clear();
2288 mysize_t len = max_read;
2289 if (len > total)
2290 len = total;
2291
11fdf7f2 2292 ret = store->read(ch, ghobj, offset, len, rawdatabl);
7c673cae
FG
2293 if (ret < 0)
2294 return ret;
2295 if (ret == 0)
2296 return -EINVAL;
2297
2298 if (debug)
2299 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2300
2301 total -= ret;
2302 offset += ret;
2303
2304 ret = write(fd, rawdatabl.c_str(), ret);
2305 if (ret == -1) {
2306 perror("write");
2307 return -errno;
2308 }
2309 }
2310
2311 return 0;
2312}
2313
2314int do_set_bytes(ObjectStore *store, coll_t coll,
11fdf7f2 2315 ghobject_t &ghobj, int fd)
7c673cae
FG
2316{
2317 ObjectStore::Transaction tran;
2318 ObjectStore::Transaction *t = &tran;
2319
2320 if (debug)
2321 cerr << "Write " << ghobj << std::endl;
2322
2323 if (!dry_run) {
2324 t->touch(coll, ghobj);
2325 t->truncate(coll, ghobj, 0);
2326 }
2327
2328 uint64_t offset = 0;
2329 bufferlist rawdatabl;
2330 do {
2331 rawdatabl.clear();
2332 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2333 if (bytes < 0) {
2334 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2335 return bytes;
2336 }
2337
2338 if (bytes == 0)
2339 break;
2340
2341 if (debug)
2342 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2343 if (!dry_run)
2344 t->write(coll, ghobj, offset, bytes, rawdatabl);
2345
2346 offset += bytes;
11fdf7f2 2347 // XXX: Should we queue_transaction() every once in a while for very large files
7c673cae
FG
2348 } while(true);
2349
11fdf7f2 2350 auto ch = store->open_collection(coll);
7c673cae 2351 if (!dry_run)
11fdf7f2 2352 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2353 return 0;
2354}
2355
2356int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2357{
11fdf7f2 2358 auto ch = store->open_collection(coll);
7c673cae
FG
2359 bufferptr bp;
2360
11fdf7f2 2361 int r = store->getattr(ch, ghobj, key.c_str(), bp);
7c673cae
FG
2362 if (r < 0) {
2363 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2364 return r;
2365 }
2366
2367 string value(bp.c_str(), bp.length());
2368 if (outistty) {
2369 value = cleanbin(value);
2370 value.push_back('\n');
2371 }
2372 cout << value;
2373
2374 return 0;
2375}
2376
2377int do_set_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2378 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2379{
2380 ObjectStore::Transaction tran;
2381 ObjectStore::Transaction *t = &tran;
2382 bufferlist bl;
2383
2384 if (debug)
2385 cerr << "Setattr " << ghobj << std::endl;
2386
2387 int ret = get_fd_data(fd, bl);
2388 if (ret < 0)
2389 return ret;
2390
2391 if (dry_run)
2392 return 0;
2393
2394 t->touch(coll, ghobj);
2395
2396 t->setattr(coll, ghobj, key, bl);
2397
11fdf7f2
TL
2398 auto ch = store->open_collection(coll);
2399 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2400 return 0;
2401}
2402
2403int do_rm_attr(ObjectStore *store, coll_t coll,
11fdf7f2 2404 ghobject_t &ghobj, string key)
7c673cae
FG
2405{
2406 ObjectStore::Transaction tran;
2407 ObjectStore::Transaction *t = &tran;
2408
2409 if (debug)
2410 cerr << "Rmattr " << ghobj << std::endl;
2411
2412 if (dry_run)
2413 return 0;
2414
2415 t->rmattr(coll, ghobj, key);
2416
11fdf7f2
TL
2417 auto ch = store->open_collection(coll);
2418 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2419 return 0;
2420}
2421
2422int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2423{
11fdf7f2 2424 auto ch = store->open_collection(coll);
7c673cae
FG
2425 set<string> keys;
2426 map<string, bufferlist> out;
2427
2428 keys.insert(key);
2429
11fdf7f2 2430 int r = store->omap_get_values(ch, ghobj, keys, &out);
7c673cae
FG
2431 if (r < 0) {
2432 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2433 return r;
2434 }
2435
2436 if (out.empty()) {
2437 cerr << "Key not found" << std::endl;
2438 return -ENOENT;
2439 }
2440
11fdf7f2 2441 ceph_assert(out.size() == 1);
7c673cae
FG
2442
2443 bufferlist bl = out.begin()->second;
2444 string value(bl.c_str(), bl.length());
2445 if (outistty) {
2446 value = cleanbin(value);
2447 value.push_back('\n');
2448 }
2449 cout << value;
2450
2451 return 0;
2452}
2453
2454int do_set_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2455 ghobject_t &ghobj, string key, int fd)
7c673cae
FG
2456{
2457 ObjectStore::Transaction tran;
2458 ObjectStore::Transaction *t = &tran;
2459 map<string, bufferlist> attrset;
2460 bufferlist valbl;
2461
2462 if (debug)
2463 cerr << "Set_omap " << ghobj << std::endl;
2464
2465 int ret = get_fd_data(fd, valbl);
2466 if (ret < 0)
2467 return ret;
2468
2469 attrset.insert(pair<string, bufferlist>(key, valbl));
2470
2471 if (dry_run)
2472 return 0;
2473
2474 t->touch(coll, ghobj);
2475
2476 t->omap_setkeys(coll, ghobj, attrset);
2477
11fdf7f2
TL
2478 auto ch = store->open_collection(coll);
2479 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2480 return 0;
2481}
2482
2483int do_rm_omap(ObjectStore *store, coll_t coll,
11fdf7f2 2484 ghobject_t &ghobj, string key)
7c673cae
FG
2485{
2486 ObjectStore::Transaction tran;
2487 ObjectStore::Transaction *t = &tran;
7c673cae
FG
2488
2489 if (debug)
2490 cerr << "Rm_omap " << ghobj << std::endl;
2491
2492 if (dry_run)
2493 return 0;
2494
9f95a23c 2495 t->omap_rmkey(coll, ghobj, key);
7c673cae 2496
11fdf7f2
TL
2497 auto ch = store->open_collection(coll);
2498 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2499 return 0;
2500}
2501
2502int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2503{
11fdf7f2 2504 auto ch = store->open_collection(coll);
7c673cae
FG
2505 bufferlist hdrbl;
2506
11fdf7f2 2507 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
7c673cae
FG
2508 if (r < 0) {
2509 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2510 return r;
2511 }
2512
2513 string header(hdrbl.c_str(), hdrbl.length());
2514 if (outistty) {
2515 header = cleanbin(header);
2516 header.push_back('\n');
2517 }
2518 cout << header;
2519
2520 return 0;
2521}
2522
2523int do_set_omaphdr(ObjectStore *store, coll_t coll,
11fdf7f2 2524 ghobject_t &ghobj, int fd)
7c673cae
FG
2525{
2526 ObjectStore::Transaction tran;
2527 ObjectStore::Transaction *t = &tran;
2528 bufferlist hdrbl;
2529
2530 if (debug)
2531 cerr << "Omap_setheader " << ghobj << std::endl;
2532
2533 int ret = get_fd_data(fd, hdrbl);
2534 if (ret)
2535 return ret;
2536
2537 if (dry_run)
2538 return 0;
2539
2540 t->touch(coll, ghobj);
2541
2542 t->omap_setheader(coll, ghobj, hdrbl);
2543
11fdf7f2
TL
2544 auto ch = store->open_collection(coll);
2545 store->queue_transaction(ch, std::move(*t));
7c673cae
FG
2546 return 0;
2547}
2548
2549struct do_fix_lost : public action_on_object_t {
11fdf7f2 2550 void call(ObjectStore *store, coll_t coll,
7c673cae
FG
2551 ghobject_t &ghobj, object_info_t &oi) override {
2552 if (oi.is_lost()) {
2553 cout << coll << "/" << ghobj << " is lost";
2554 if (!dry_run)
2555 cout << ", fixing";
2556 cout << std::endl;
2557 if (dry_run)
11fdf7f2 2558 return;
7c673cae
FG
2559 oi.clear_flag(object_info_t::FLAG_LOST);
2560 bufferlist bl;
11fdf7f2 2561 encode(oi, bl, -1); /* fixme: using full features */
7c673cae
FG
2562 ObjectStore::Transaction t;
2563 t.setattr(coll, ghobj, OI_ATTR, bl);
11fdf7f2
TL
2564 auto ch = store->open_collection(coll);
2565 store->queue_transaction(ch, std::move(t));
7c673cae 2566 }
11fdf7f2 2567 return;
7c673cae
FG
2568 }
2569};
2570
2571int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2572{
11fdf7f2 2573 auto ch = store->open_collection(coll);
7c673cae 2574 bufferlist attr;
11fdf7f2 2575 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
7c673cae
FG
2576 if (r < 0) {
2577 if (!silent)
2578 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2579 << cpp_strerror(r) << std::endl;
2580 return r;
2581 }
11fdf7f2 2582 auto bp = attr.cbegin();
7c673cae 2583 try {
11fdf7f2 2584 decode(ss, bp);
7c673cae
FG
2585 } catch (...) {
2586 r = -EINVAL;
2587 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2588 << cpp_strerror(r) << std::endl;
2589 return r;
2590 }
2591 return 0;
2592}
2593
2594int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2595{
11fdf7f2 2596 auto ch = store->open_collection(coll);
7c673cae
FG
2597 int r = 0;
2598 formatter->open_object_section("obj");
2599 formatter->open_object_section("id");
2600 ghobj.dump(formatter);
2601 formatter->close_section();
2602
2603 bufferlist attr;
11fdf7f2 2604 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2605 if (gr < 0) {
2606 r = gr;
2607 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2608 << cpp_strerror(r) << std::endl;
2609 } else {
2610 object_info_t oi;
11fdf7f2 2611 auto bp = attr.cbegin();
7c673cae 2612 try {
11fdf7f2 2613 decode(oi, bp);
7c673cae
FG
2614 formatter->open_object_section("info");
2615 oi.dump(formatter);
2616 formatter->close_section();
2617 } catch (...) {
2618 r = -EINVAL;
2619 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2620 << cpp_strerror(r) << std::endl;
2621 }
2622 }
2623 struct stat st;
11fdf7f2 2624 int sr = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2625 if (sr < 0) {
2626 r = sr;
2627 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2628 << cpp_strerror(r) << std::endl;
2629 } else {
2630 formatter->open_object_section("stat");
2631 formatter->dump_int("size", st.st_size);
2632 formatter->dump_int("blksize", st.st_blksize);
2633 formatter->dump_int("blocks", st.st_blocks);
2634 formatter->dump_int("nlink", st.st_nlink);
2635 formatter->close_section();
2636 }
2637
2638 if (ghobj.hobj.has_snapset()) {
2639 SnapSet ss;
2640 int snr = get_snapset(store, coll, ghobj, ss);
2641 if (snr < 0) {
2642 r = snr;
2643 } else {
2644 formatter->open_object_section("SnapSet");
2645 ss.dump(formatter);
2646 formatter->close_section();
2647 }
2648 }
a8e16298 2649 bufferlist hattr;
11fdf7f2 2650 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
a8e16298
TL
2651 if (gr == 0) {
2652 ECUtil::HashInfo hinfo;
11fdf7f2 2653 auto hp = hattr.cbegin();
a8e16298
TL
2654 try {
2655 decode(hinfo, hp);
2656 formatter->open_object_section("hinfo");
2657 hinfo.dump(formatter);
2658 formatter->close_section();
2659 } catch (...) {
2660 r = -EINVAL;
2661 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2662 << cpp_strerror(r) << std::endl;
2663 }
2664 }
9f95a23c
TL
2665 gr = store->dump_onode(ch, ghobj, "onode", formatter);
2666
7c673cae
FG
2667 formatter->close_section();
2668 formatter->flush(cout);
2669 cout << std::endl;
2670 return r;
2671}
2672
11fdf7f2 2673int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
1adf2230 2674{
11fdf7f2 2675 auto ch = store->open_collection(coll);
1adf2230 2676 bufferlist attr;
11fdf7f2 2677 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
1adf2230
AA
2678 if (r < 0) {
2679 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2680 << cpp_strerror(r) << std::endl;
2681 return r;
2682 }
2683 object_info_t oi;
11fdf7f2 2684 auto bp = attr.cbegin();
1adf2230 2685 try {
11fdf7f2 2686 decode(oi, bp);
1adf2230
AA
2687 } catch (...) {
2688 r = -EINVAL;
2689 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2690 << cpp_strerror(r) << std::endl;
2691 return r;
2692 }
1adf2230
AA
2693 if (!dry_run) {
2694 attr.clear();
2695 oi.alloc_hint_flags += 0xff;
2696 ObjectStore::Transaction t;
11fdf7f2 2697 encode(oi, attr, -1); /* fixme: using full features */
1adf2230 2698 t.setattr(coll, ghobj, OI_ATTR, attr);
11fdf7f2
TL
2699 auto ch = store->open_collection(coll);
2700 r = store->queue_transaction(ch, std::move(t));
1adf2230
AA
2701 if (r < 0) {
2702 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2703 << cpp_strerror(r) << std::endl;
2704 return r;
2705 }
2706 }
2707 return 0;
2708}
2709
11fdf7f2
TL
2710int set_size(
2711 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2712 bool corrupt)
7c673cae 2713{
11fdf7f2 2714 auto ch = store->open_collection(coll);
7c673cae
FG
2715 if (ghobj.hobj.is_snapdir()) {
2716 cerr << "Can't set the size of a snapdir" << std::endl;
2717 return -EINVAL;
2718 }
2719 bufferlist attr;
11fdf7f2 2720 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
7c673cae
FG
2721 if (r < 0) {
2722 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2723 << cpp_strerror(r) << std::endl;
2724 return r;
2725 }
2726 object_info_t oi;
11fdf7f2 2727 auto bp = attr.cbegin();
7c673cae 2728 try {
11fdf7f2 2729 decode(oi, bp);
7c673cae
FG
2730 } catch (...) {
2731 r = -EINVAL;
2732 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2733 << cpp_strerror(r) << std::endl;
2734 return r;
2735 }
2736 struct stat st;
11fdf7f2 2737 r = store->stat(ch, ghobj, &st, true);
7c673cae
FG
2738 if (r < 0) {
2739 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2740 << cpp_strerror(r) << std::endl;
2741 }
2742 ghobject_t head(ghobj);
2743 SnapSet ss;
2744 bool found_head = true;
2745 map<snapid_t, uint64_t>::iterator csi;
2746 bool is_snap = ghobj.hobj.is_snap();
2747 if (is_snap) {
2748 head.hobj = head.hobj.get_head();
2749 r = get_snapset(store, coll, head, ss, true);
2750 if (r < 0 && r != -ENOENT) {
2751 // Requested get_snapset() silent, so if not -ENOENT show error
2752 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2753 << cpp_strerror(r) << std::endl;
2754 return r;
2755 }
2756 if (r == -ENOENT) {
2757 head.hobj = head.hobj.get_snapdir();
2758 r = get_snapset(store, coll, head, ss);
2759 if (r < 0)
2760 return r;
2761 found_head = false;
2762 } else {
2763 found_head = true;
2764 }
2765 csi = ss.clone_size.find(ghobj.hobj.snap);
2766 if (csi == ss.clone_size.end()) {
2767 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2768 return -EINVAL;
2769 }
2770 }
2771 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2772 && (!is_snap || csi->second == setsize)) {
2773 cout << "Size of object is already " << setsize << std::endl;
2774 return 0;
2775 }
2776 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2777 << ", obj info size " << oi.size;
2778 if (is_snap) {
2779 cout << ", " << (found_head ? "head" : "snapdir")
2780 << " clone_size " << csi->second;
2781 csi->second = setsize;
2782 }
2783 cout << std::endl;
2784 if (!dry_run) {
2785 attr.clear();
2786 oi.size = setsize;
7c673cae 2787 ObjectStore::Transaction t;
b5b8bbf5 2788 // Only modify object info if we want to corrupt it
b32b8144 2789 if (!corrupt && (uint64_t)st.st_size != setsize) {
b5b8bbf5 2790 t.truncate(coll, ghobj, setsize);
b32b8144
FG
2791 // Changing objectstore size will invalidate data_digest, so clear it.
2792 oi.clear_data_digest();
2793 }
11fdf7f2 2794 encode(oi, attr, -1); /* fixme: using full features */
b32b8144 2795 t.setattr(coll, ghobj, OI_ATTR, attr);
7c673cae
FG
2796 if (is_snap) {
2797 bufferlist snapattr;
2798 snapattr.clear();
11fdf7f2 2799 encode(ss, snapattr);
7c673cae
FG
2800 t.setattr(coll, head, SS_ATTR, snapattr);
2801 }
11fdf7f2
TL
2802 auto ch = store->open_collection(coll);
2803 r = store->queue_transaction(ch, std::move(t));
2804 if (r < 0) {
2805 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2806 << cpp_strerror(r) << std::endl;
2807 return r;
2808 }
2809 }
2810 return 0;
2811}
2812
2813int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2814 auto ch = store->open_collection(coll);
2815 bufferlist attr;
2816 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2817 if (r < 0) {
2818 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2819 << cpp_strerror(r) << std::endl;
2820 return r;
2821 }
2822 object_info_t oi;
2823 auto bp = attr.cbegin();
2824 try {
2825 decode(oi, bp);
2826 } catch (...) {
2827 r = -EINVAL;
2828 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2829 << cpp_strerror(r) << std::endl;
2830 return r;
2831 }
2832 if (!dry_run) {
2833 attr.clear();
2834 oi.clear_data_digest();
2835 encode(oi, attr, -1); /* fixme: using full features */
2836 ObjectStore::Transaction t;
2837 t.setattr(coll, ghobj, OI_ATTR, attr);
2838 auto ch = store->open_collection(coll);
2839 r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2840 if (r < 0) {
2841 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2842 << cpp_strerror(r) << std::endl;
2843 return r;
2844 }
2845 }
2846 return 0;
2847}
2848
2849int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
11fdf7f2 2850 string arg)
7c673cae
FG
2851{
2852 SnapSet ss;
2853 int ret = get_snapset(store, coll, ghobj, ss);
2854 if (ret < 0)
2855 return ret;
2856
7c673cae
FG
2857 // Use "corrupt" to clear entire SnapSet
2858 // Use "seq" to just corrupt SnapSet.seq
2859 if (arg == "corrupt" || arg == "seq")
2860 ss.seq = 0;
9f95a23c 2861 // Use "snaps" to just clear SnapSet.clone_snaps
7c673cae 2862 if (arg == "corrupt" || arg == "snaps")
9f95a23c 2863 ss.clone_snaps.clear();
7c673cae
FG
2864 // By default just clear clone, clone_overlap and clone_size
2865 if (arg == "corrupt")
2866 arg = "";
2867 if (arg == "" || arg == "clones")
2868 ss.clones.clear();
2869 if (arg == "" || arg == "clone_overlap")
2870 ss.clone_overlap.clear();
2871 if (arg == "" || arg == "clone_size")
2872 ss.clone_size.clear();
2873 // Break all clone sizes by adding 1
2874 if (arg == "size") {
2875 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2876 i != ss.clone_size.end(); ++i)
2877 ++(i->second);
2878 }
2879
2880 if (!dry_run) {
2881 bufferlist bl;
11fdf7f2 2882 encode(ss, bl);
7c673cae
FG
2883 ObjectStore::Transaction t;
2884 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2885 auto ch = store->open_collection(coll);
2886 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2887 if (r < 0) {
2888 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2889 << cpp_strerror(r) << std::endl;
2890 return r;
2891 }
2892 }
2893 return 0;
2894}
2895
2896vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2897{
2898 return std::find(v.begin(), v.end(), clid);
2899}
2900
2901map<snapid_t, interval_set<uint64_t> >::iterator
2902find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2903{
2904 return m.find(clid);
2905}
2906
2907map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2908 snapid_t clid)
2909{
2910 return m.find(clid);
2911}
2912
2913template<class T>
2914int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2915{
2916 typename T::iterator i = find(mv, cloneid);
2917 if (i != mv.end()) {
2918 mv.erase(i);
2919 } else {
2920 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2921 if (force) {
2922 cerr << " (ignored)" << std::endl;
2923 return 0;
2924 }
2925 cerr << std::endl;
2926 return -EINVAL;
2927 }
2928 return 0;
2929}
2930
11fdf7f2
TL
2931int remove_clone(
2932 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
7c673cae
FG
2933{
2934 // XXX: Don't allow this if in a cache tier or former cache tier
2935 // bool allow_incomplete_clones() const {
2936 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2937
2938 SnapSet snapset;
2939 int ret = get_snapset(store, coll, ghobj, snapset);
2940 if (ret < 0)
2941 return ret;
2942
2943 // Derived from trim_object()
2944 // ...from snapset
2945 vector<snapid_t>::iterator p;
2946 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2947 if (*p == cloneid)
2948 break;
2949 if (p == snapset.clones.end()) {
2950 cerr << "Clone " << cloneid << " not present";
2951 return -ENOENT;
2952 }
2953 if (p != snapset.clones.begin()) {
2954 // not the oldest... merge overlap into next older clone
2955 vector<snapid_t>::iterator n = p - 1;
2956 hobject_t prev_coid = ghobj.hobj;
2957 prev_coid.snap = *n;
2958 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2959
2960 //if (adjust_prev_bytes)
2961 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2962
2963 snapset.clone_overlap[*n].intersection_of(
2964 snapset.clone_overlap[*p]);
2965
2966 //if (adjust_prev_bytes)
2967 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2968 }
2969
2970 ret = remove_from(snapset.clones, "clones", cloneid, force);
2971 if (ret) return ret;
2972 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2973 if (ret) return ret;
2974 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2975 if (ret) return ret;
2976
2977 if (dry_run)
2978 return 0;
2979
2980 bufferlist bl;
11fdf7f2 2981 encode(snapset, bl);
7c673cae
FG
2982 ObjectStore::Transaction t;
2983 t.setattr(coll, ghobj, SS_ATTR, bl);
11fdf7f2
TL
2984 auto ch = store->open_collection(coll);
2985 int r = store->queue_transaction(ch, std::move(t));
7c673cae
FG
2986 if (r < 0) {
2987 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2988 << cpp_strerror(r) << std::endl;
2989 return r;
2990 }
2991 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2992 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2993 return 0;
2994}
2995
2996int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2997{
2998 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
2999 << " to " << dst->get_type() << ": " << dstpath
3000 << std::endl;
7c673cae
FG
3001 int num, i;
3002 vector<coll_t> collections;
3003 int r;
3004
3005 r = src->mount();
3006 if (r < 0) {
3007 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
3008 return r;
3009 }
3010 r = dst->mount();
3011 if (r < 0) {
3012 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
3013 goto out_src;
3014 }
3015
3016 if (src->get_fsid() != dst->get_fsid()) {
3017 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
3018 << std::endl;
3019 goto out;
3020 }
3021 cout << "fsid " << src->get_fsid() << std::endl;
3022
3023 // make sure dst is empty
3024 r = dst->list_collections(collections);
3025 if (r < 0) {
3026 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
3027 goto out;
3028 }
3029 if (!collections.empty()) {
3030 cerr << "destination store is not empty" << std::endl;
3031 goto out;
3032 }
3033
3034 r = src->list_collections(collections);
3035 if (r < 0) {
3036 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
3037 goto out;
3038 }
3039
3040 num = collections.size();
3041 cout << num << " collections" << std::endl;
3042 i = 1;
3043 for (auto cid : collections) {
3044 cout << i++ << "/" << num << " " << cid << std::endl;
11fdf7f2
TL
3045 auto ch = src->open_collection(cid);
3046 auto dch = dst->create_new_collection(cid);
7c673cae
FG
3047 {
3048 ObjectStore::Transaction t;
11fdf7f2 3049 int bits = src->collection_bits(ch);
7c673cae 3050 if (bits < 0) {
181888fb
FG
3051 if (src->get_type() == "filestore" && cid.is_meta()) {
3052 bits = 0;
3053 } else {
3054 cerr << "cannot get bit count for collection " << cid << ": "
3055 << cpp_strerror(bits) << std::endl;
3056 goto out;
3057 }
7c673cae
FG
3058 }
3059 t.create_collection(cid, bits);
11fdf7f2 3060 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
3061 }
3062
3063 ghobject_t pos;
3064 uint64_t n = 0;
3065 uint64_t bytes = 0, keys = 0;
3066 while (true) {
3067 vector<ghobject_t> ls;
11fdf7f2 3068 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
7c673cae
FG
3069 if (r < 0) {
3070 cerr << "collection_list on " << cid << " from " << pos << " got: "
3071 << cpp_strerror(r) << std::endl;
3072 goto out;
3073 }
3074 if (ls.empty()) {
3075 break;
3076 }
3077
3078 for (auto& oid : ls) {
3079 //cout << " " << cid << " " << oid << std::endl;
3080 if (n % 100 == 0) {
3081 cout << " " << std::setw(16) << n << " objects, "
3082 << std::setw(16) << bytes << " bytes, "
3083 << std::setw(16) << keys << " keys"
3084 << std::setw(1) << "\r" << std::flush;
3085 }
3086 n++;
3087
3088 ObjectStore::Transaction t;
3089 t.touch(cid, oid);
3090
20effc67 3091 map<string,bufferptr,less<>> attrs;
11fdf7f2 3092 src->getattrs(ch, oid, attrs);
7c673cae
FG
3093 if (!attrs.empty()) {
3094 t.setattrs(cid, oid, attrs);
3095 }
3096
3097 bufferlist bl;
11fdf7f2 3098 src->read(ch, oid, 0, 0, bl);
7c673cae
FG
3099 if (bl.length()) {
3100 t.write(cid, oid, 0, bl.length(), bl);
3101 bytes += bl.length();
3102 }
3103
3104 bufferlist header;
3105 map<string,bufferlist> omap;
11fdf7f2 3106 src->omap_get(ch, oid, &header, &omap);
7c673cae
FG
3107 if (header.length()) {
3108 t.omap_setheader(cid, oid, header);
3109 ++keys;
3110 }
3111 if (!omap.empty()) {
3112 keys += omap.size();
3113 t.omap_setkeys(cid, oid, omap);
3114 }
3115
11fdf7f2 3116 dst->queue_transaction(dch, std::move(t));
7c673cae
FG
3117 }
3118 }
3119 cout << " " << std::setw(16) << n << " objects, "
3120 << std::setw(16) << bytes << " bytes, "
3121 << std::setw(16) << keys << " keys"
3122 << std::setw(1) << std::endl;
3123 }
3124
3125 // keyring
3126 cout << "keyring" << std::endl;
3127 {
3128 bufferlist bl;
3129 string s = srcpath + "/keyring";
3130 string err;
3131 r = bl.read_file(s.c_str(), &err);
3132 if (r < 0) {
3133 cerr << "failed to copy " << s << ": " << err << std::endl;
3134 } else {
3135 string d = dstpath + "/keyring";
3136 bl.write_file(d.c_str(), 0600);
3137 }
3138 }
3139
3140 // osd metadata
3141 cout << "duping osd metadata" << std::endl;
3142 {
3143 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
3144 string val;
3145 src->read_meta(k, &val);
3146 dst->write_meta(k, val);
3147 }
3148 }
3149
3150 dst->write_meta("ready", "ready");
3151
3152 cout << "done." << std::endl;
3153 r = 0;
3154 out:
3155 dst->umount();
3156 out_src:
3157 src->umount();
3158 return r;
3159}
3160
2a845540
TL
3161
3162const int ceph_entity_name_type(const string name)
3163{
3164 if (name == "mds") return CEPH_ENTITY_TYPE_MDS;
3165 if (name == "osd") return CEPH_ENTITY_TYPE_OSD;
3166 if (name == "mon") return CEPH_ENTITY_TYPE_MON;
3167 if (name == "client") return CEPH_ENTITY_TYPE_CLIENT;
3168 if (name == "mgr") return CEPH_ENTITY_TYPE_MGR;
3169 if (name == "auth") return CEPH_ENTITY_TYPE_AUTH;
3170 return -1;
3171}
3172
3173eversion_t get_eversion_from_str(const string& s) {
3174 eversion_t e;
3175 vector<string> result;
3176 boost::split(result, s, boost::is_any_of("'"));
3177 if (result.size() != 2) {
3178 cerr << "eversion_t: invalid format: '" << s << "'" << std::endl;
3179 return e;
3180 }
3181 e.epoch = atoi(result[0].c_str());
3182 e.version = atoi(result[1].c_str());
3183 return e;
3184}
3185
3186osd_reqid_t get_reqid_from_str(const string& s) {
3187 osd_reqid_t reqid;
3188
3189 vector<string> result;
3190 boost::split(result, s, boost::is_any_of(".:"));
3191 if (result.size() != 4) {
3192 cerr << "reqid: invalid format " << s << std::endl;
3193 return osd_reqid_t();
3194 }
3195 reqid.name._type = ceph_entity_name_type(result[0]);
3196 reqid.name._num = atoi(result[1].c_str());
3197
3198 reqid.inc = atoi(result[2].c_str());
3199 reqid.tid = atoi(result[3].c_str());
3200 return reqid;
3201}
3202
3203void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map<string,bufferlist> *new_dups)
3204{
3205 ObjectStore::Transaction t;
3206 coll_t coll(r_pgid);
3207 cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl;
3208 t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups));
3209 auto ch = store->open_collection(coll);
3210 store->queue_transaction(ch, std::move(t));
3211 new_dups->clear();
3212}
3213
3214int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj,
3215 map<string,bufferlist> *new_dups, bool debug) {
3216 std::map<std::string, json_spirit::mValue>::const_iterator it = in_json_obj.find("generate");
3217 int32_t generate = 0;
3218 if (it != in_json_obj.end()) {
3219 generate = atoi(it->second.get_str().c_str());
3220 }
3221
3222 it = in_json_obj.find("reqid");
3223 if (it == in_json_obj.end()) {
3224 return 1;
3225 }
3226 osd_reqid_t reqid(get_reqid_from_str(it->second.get_str()));
3227 it = in_json_obj.find("version");
3228 if (it == in_json_obj.end()) {
3229 return 1;
3230 }
3231 eversion_t version(get_eversion_from_str(it->second.get_str()));
3232 it = in_json_obj.find("user_version");
3233 if (it == in_json_obj.end()) {
3234 return 1;
3235 }
3236 version_t user_version = atoi(it->second.get_str().c_str());
3237 it = in_json_obj.find("return_code");
3238 if (it == in_json_obj.end()) {
3239 return 1;
3240 }
3241 int32_t return_code = atoi(it->second.get_str().c_str());
3242 if (generate) {
3243 for(auto i = 0; i < generate; ++i) {
3244 version.version++;
3245 if (debug) {
3246 cout << "generate dups reqid " << reqid << " v=" << version << std::endl;
3247 }
3248 pg_log_dup_t tmp(version, user_version, reqid, return_code);
3249 bufferlist bl;
3250 encode(tmp, bl);
3251 (*new_dups)[tmp.get_key_name()] = std::move(bl);
3252 if ( new_dups->size() > 50000 ) {
3253 do_dups_inject_transction(store, r_pgid, new_dups);
3254 cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl;
3255 }
3256 }
3257 return 0;
3258 } else {
3259 pg_log_dup_t tmp(version, user_version, reqid, return_code);
3260 if (debug) {
3261 cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl;
3262 }
3263 bufferlist bl;
3264 encode(tmp, bl);
3265 (*new_dups)[tmp.get_key_name()] = std::move(bl);
3266 }
3267 return 0;
3268}
3269
3270void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug)
3271{
3272 map<string,bufferlist> new_dups;
3273 const vector<json_spirit::mValue>& o = inJson.get_array();
3274 for (const auto& obj : o) {
3275 if (obj.type() == json_spirit::obj_type) {
3276 json_spirit::mObject Mobj = obj.get_obj();
3277 do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug);
3278 } else {
3279 throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type()));
3280 return;
3281 }
3282 }
3283 if (new_dups.size() > 0) {
3284 do_dups_inject_transction(store, r_pgid, &new_dups);
3285 }
3286
3287
3288 return ;
3289}
3290
7c673cae
FG
3291void usage(po::options_description &desc)
3292{
3293 cerr << std::endl;
3294 cerr << desc << std::endl;
3295 cerr << std::endl;
3296 cerr << "Positional syntax:" << std::endl;
3297 cerr << std::endl;
3298 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
3299 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
3300 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
3301 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
3302 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
3303 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
3304 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
3305 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
3306 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
3307 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
11fdf7f2 3308 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
7c673cae
FG
3309 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
3310 cerr << std::endl;
3311 cerr << "<object> can be a JSON object description as displayed" << std::endl;
3312 cerr << "by --op list." << std::endl;
3313 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
3314 cerr << "the OSD's PGs." << std::endl;
3315 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
3316 cerr << "specifies the pgmeta object" << std::endl;
3317 cerr << std::endl;
3318 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
3319 cerr << "if not specified or if '-' specified." << std::endl;
3320}
3321
3322bool ends_with(const string& check, const string& ending)
3323{
3324 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
3325}
3326
3327// Based on FileStore::dump_journal(), set-up enough to only dump
3328int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
3329{
3330 int r;
3331
3332 if (!journalpath.length())
3333 return -EINVAL;
3334
3335 FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
3336 journalpath.c_str(), m_journal_dio);
3337 r = journal->_fdump(*f, false);
3338 delete journal;
3339 return r;
3340}
3341
3342int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
1adf2230
AA
3343 const string &pool_name, const spg_t &pgid, bool dry_run,
3344 int target_level)
7c673cae
FG
3345{
3346 int r = 0;
3347
3348 FileStore *fs = dynamic_cast<FileStore*>(os);
3349 if (!fs) {
3350 cerr << "Nothing to do for non-filestore backend" << std::endl;
3351 return 0; // making this return success makes testing easier
3352 }
3353
3354 OSDMap curmap;
3355 bufferlist bl;
3356 r = get_osdmap(os, superblock.current_epoch, curmap, bl);
3357 if (r) {
3358 cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
3359 return r;
3360 }
3361
3362 int64_t poolid = -1;
3363 if (pool_name.length()) {
3364 poolid = curmap.lookup_pg_pool_name(pool_name);
3365 if (poolid < 0) {
3366 cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
3367 << std::endl;
3368 return poolid;
3369 }
3370 }
3371
3372 vector<coll_t> collections, filtered_colls;
3373 r = os->list_collections(collections);
3374 if (r < 0) {
3375 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
3376 return r;
3377 }
3378
3379 for (auto const &coll : collections) {
3380 spg_t coll_pgid;
3381 if (coll.is_pg(&coll_pgid) &&
3382 ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
3383 coll_pgid == pgid)) {
3384 filtered_colls.push_back(coll);
3385 }
3386 }
3387
3388 size_t done = 0, total = filtered_colls.size();
3389 for (auto const &coll : filtered_colls) {
3390 if (dry_run) {
3391 cerr << "Would apply layout settings to " << coll << std::endl;
3392 } else {
3393 cerr << "Finished " << done << "/" << total << " collections" << "\r";
1adf2230 3394 r = fs->apply_layout_settings(coll, target_level);
7c673cae
FG
3395 if (r < 0) {
3396 cerr << "Error applying layout settings to " << coll << std::endl;
3397 return r;
3398 }
3399 }
3400 ++done;
3401 }
3402
3403 cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
3404 return r;
3405}
3406
3407int main(int argc, char **argv)
3408{
3409 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3410 string target_data_path, fsid;
11fdf7f2 3411 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
7c673cae
FG
3412 boost::optional<std::string> nspace;
3413 spg_t pgid;
3414 unsigned epoch = 0;
9f95a23c 3415 unsigned slow_threshold = 16;
7c673cae
FG
3416 ghobject_t ghobj;
3417 bool human_readable;
7c673cae 3418 Formatter *formatter;
9f95a23c 3419 bool head, tty;
7c673cae
FG
3420
3421 po::options_description desc("Allowed options");
3422 desc.add_options()
3423 ("help", "produce help message")
3424 ("type", po::value<string>(&type),
11fdf7f2 3425 "Arg is one of [bluestore (default), filestore, memstore]")
7c673cae
FG
3426 ("data-path", po::value<string>(&dpath),
3427 "path to object store, mandatory")
3428 ("journal-path", po::value<string>(&jpath),
3429 "path to journal, use if tool can't find it")
3430 ("pgid", po::value<string>(&pgidstr),
33c7a0ef 3431 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups and mandatory for apply-layout-settings if --pool is not specified")
7c673cae
FG
3432 ("pool", po::value<string>(&pool),
3433 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3434 ("op", po::value<string>(&op),
9f95a23c 3435 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
33c7a0ef 3436 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]")
7c673cae
FG
3437 ("epoch", po::value<unsigned>(&epoch),
3438 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3439 ("file", po::value<string>(&file),
3efd9988 3440 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
7c673cae
FG
3441 ("mon-store-path", po::value<string>(&mon_store_path),
3442 "path of monstore to update-mon-db")
3443 ("fsid", po::value<string>(&fsid),
3444 "fsid for new store created by mkfs")
3445 ("target-data-path", po::value<string>(&target_data_path),
3446 "path of target object store (for --op dup)")
3447 ("mountpoint", po::value<string>(&mountpoint),
3448 "fuse mountpoint")
3449 ("format", po::value<string>(&format)->default_value("json-pretty"),
3450 "Output format which may be json, json-pretty, xml, xml-pretty")
3451 ("debug", "Enable diagnostic output to stderr")
9f95a23c
TL
3452 ("no-mon-config", "Do not contact mons for config")
3453 ("no-superblock", "Do not read superblock")
7c673cae
FG
3454 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3455 ("skip-journal-replay", "Disable journal replay")
3456 ("skip-mount-omap", "Disable mounting of omap")
3457 ("head", "Find head/snapdir when searching for objects by name")
3458 ("dry-run", "Don't modify the objectstore")
9f95a23c 3459 ("tty", "Treat stdout as a tty (no binary data)")
7c673cae 3460 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
11fdf7f2 3461 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
9f95a23c
TL
3462 ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
3463 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
7c673cae
FG
3464 ;
3465
3466 po::options_description positional("Positional options");
3467 positional.add_options()
3468 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3469 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
91327a77 3470 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
7c673cae 3471 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
7c673cae
FG
3472 ;
3473
b32b8144 3474 po::options_description all;
7c673cae
FG
3475 all.add(desc).add(positional);
3476
3477 po::positional_options_description pd;
3478 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3479
3480 vector<string> ceph_option_strings;
11fdf7f2 3481
7c673cae
FG
3482 po::variables_map vm;
3483 try {
3484 po::parsed_options parsed =
3485 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3486 po::store( parsed, vm);
3487 po::notify(vm);
3488 ceph_option_strings = po::collect_unrecognized(parsed.options,
3489 po::include_positional);
3490 } catch(po::error &e) {
3491 std::cerr << e.what() << std::endl;
3492 return 1;
3493 }
3494
3495 if (vm.count("help")) {
b32b8144 3496 usage(desc);
7c673cae
FG
3497 return 1;
3498 }
3499
11fdf7f2
TL
3500 // Compatibility with previous option name
3501 if (op == "dump-import")
3502 op = "dump-export";
3503
3efd9988 3504 debug = (vm.count("debug") > 0);
7c673cae 3505
3efd9988 3506 force = (vm.count("force") > 0);
7c673cae 3507
9f95a23c
TL
3508 no_superblock = (vm.count("no-superblock") > 0);
3509
7c673cae
FG
3510 if (vm.count("namespace"))
3511 nspace = argnspace;
3512
3efd9988 3513 dry_run = (vm.count("dry-run") > 0);
9f95a23c 3514 tty = (vm.count("tty") > 0);
3efd9988 3515
7c673cae
FG
3516 osflagbits_t flags = 0;
3517 if (dry_run || vm.count("skip-journal-replay"))
3518 flags |= SKIP_JOURNAL_REPLAY;
3519 if (vm.count("skip-mount-omap"))
3520 flags |= SKIP_MOUNT_OMAP;
3521 if (op == "update-mon-db")
3522 flags |= SKIP_JOURNAL_REPLAY;
3efd9988 3523
7c673cae
FG
3524 head = (vm.count("head") > 0);
3525
11fdf7f2
TL
3526 // infer osd id so we can authenticate
3527 char fn[PATH_MAX];
3528 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3529 int fd = ::open(fn, O_RDONLY);
3530 if (fd >= 0) {
3531 bufferlist bl;
3532 bl.read_fd(fd, 64);
3533 string s(bl.c_str(), bl.length());
3534 int whoami = atoi(s.c_str());
3535 vector<string> tmp;
3536 // identify ourselves as this osd so we can auth and fetch our configs
3537 tmp.push_back("-n");
3538 tmp.push_back(string("osd.") + stringify(whoami));
3539 // populate osd_data so that the default keyring location works
3540 tmp.push_back("--osd-data");
3541 tmp.push_back(dpath);
3542 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3543 ceph_option_strings.end());
3544 tmp.swap(ceph_option_strings);
3545 }
3546
7c673cae 3547 vector<const char *> ceph_options;
7c673cae
FG
3548 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3549 for (vector<string>::iterator i = ceph_option_strings.begin();
3550 i != ceph_option_strings.end();
3551 ++i) {
3552 ceph_options.push_back(i->c_str());
3553 }
3554
7c673cae 3555 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
11fdf7f2 3556 fd = ::open(fn, O_RDONLY);
7c673cae
FG
3557 if (fd >= 0) {
3558 bufferlist bl;
3559 bl.read_fd(fd, 64);
3560 if (bl.length()) {
3561 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3562 if (vm.count("type") && dp_type != "" && type != dp_type)
3563 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3564 << dp_type << "\"" << std::endl;
3565 type = dp_type;
3566 //cout << "object store type is " << type << std::endl;
3567 }
3568 ::close(fd);
3569 }
11fdf7f2 3570
7c673cae 3571 if (!vm.count("type") && type == "") {
11fdf7f2 3572 type = "bluestore";
7c673cae
FG
3573 }
3574 if (!vm.count("data-path") &&
11fdf7f2 3575 op != "dump-export" &&
7c673cae
FG
3576 !(op == "dump-journal" && type == "filestore")) {
3577 cerr << "Must provide --data-path" << std::endl;
3578 usage(desc);
3579 return 1;
3580 }
3581 if (type == "filestore" && !vm.count("journal-path")) {
3582 jpath = dpath + "/journal";
3583 }
3584 if (!vm.count("op") && !vm.count("object")) {
3585 cerr << "Must provide --op or object command..." << std::endl;
3586 usage(desc);
3587 return 1;
3588 }
91327a77 3589 if (op != "list" && op != "apply-layout-settings" &&
7c673cae
FG
3590 vm.count("op") && vm.count("object")) {
3591 cerr << "Can't specify both --op and object command syntax" << std::endl;
3592 usage(desc);
3593 return 1;
3594 }
3595 if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
3596 cerr << "apply-layout-settings requires either --pool or --pgid"
3597 << std::endl;
3598 usage(desc);
3599 return 1;
3600 }
91327a77 3601 if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
7c673cae
FG
3602 cerr << "Invalid syntax, missing command" << std::endl;
3603 usage(desc);
3604 return 1;
3605 }
3606 if (op == "fuse" && mountpoint.length() == 0) {
3607 cerr << "Missing fuse mountpoint" << std::endl;
3608 usage(desc);
3609 return 1;
3610 }
9f95a23c 3611 outistty = isatty(STDOUT_FILENO) || tty;
7c673cae
FG
3612
3613 file_fd = fd_none;
3efd9988 3614 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
7c673cae
FG
3615 if (!vm.count("file") || file == "-") {
3616 if (outistty) {
3617 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3618 return 1;
3619 }
3620 file_fd = STDOUT_FILENO;
3621 } else {
3622 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3623 }
2a845540 3624 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") {
7c673cae
FG
3625 if (!vm.count("file") || file == "-") {
3626 if (isatty(STDIN_FILENO)) {
3627 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3628 return 1;
3629 }
3630 file_fd = STDIN_FILENO;
3631 } else {
3632 file_fd = open(file.c_str(), O_RDONLY);
3633 }
3634 }
3635
3636 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3637
3638 if (vm.count("file") && file_fd == fd_none && !dry_run) {
11fdf7f2 3639 cerr << "--file option only applies to import, dump-export, export, export-remove, "
7c673cae
FG
3640 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3641 return 1;
3642 }
3643
3644 if (file_fd != fd_none && file_fd < 0) {
3645 string err = string("file: ") + file;
3646 perror(err.c_str());
3647 return 1;
3648 }
9f95a23c
TL
3649 int init_flags = 0;
3650 if (vm.count("no-mon-config") > 0) {
3651 init_flags |= CINIT_FLAG_NO_MON_CONFIG;
3652 }
7c673cae
FG
3653
3654 auto cct = global_init(
11fdf7f2
TL
3655 NULL, ceph_options,
3656 CEPH_ENTITY_TYPE_OSD,
3657 CODE_ENVIRONMENT_UTILITY_NODOUT,
9f95a23c 3658 init_flags);
7c673cae 3659 common_init_finish(g_ceph_context);
7c673cae 3660 if (debug) {
11fdf7f2
TL
3661 g_conf().set_val_or_die("log_to_stderr", "true");
3662 g_conf().set_val_or_die("err_to_stderr", "true");
7c673cae 3663 }
11fdf7f2 3664 g_conf().apply_changes(nullptr);
7c673cae
FG
3665
3666 // Special list handling. Treating pretty_format as human readable,
3667 // with one object per line and not an enclosing array.
3668 human_readable = ends_with(format, "-pretty");
3669 if ((op == "list" || op == "meta-list") && human_readable) {
3670 // Remove -pretty from end of format which we know is there
3671 format = format.substr(0, format.size() - strlen("-pretty"));
3672 }
3673
3674 formatter = Formatter::create(format);
3675 if (formatter == NULL) {
3676 cerr << "unrecognized format: " << format << std::endl;
3677 return 1;
3678 }
3679
3680 // Special handling for filestore journal, so we can dump it without mounting
3681 if (op == "dump-journal" && type == "filestore") {
11fdf7f2 3682 int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
7c673cae
FG
3683 if (ret < 0) {
3684 cerr << "journal-path: " << jpath << ": "
3685 << cpp_strerror(ret) << std::endl;
3686 return 1;
3687 }
3688 formatter->flush(cout);
3689 return 0;
3690 }
3691
11fdf7f2
TL
3692 if (op == "dump-export") {
3693 int ret = tool.dump_export(formatter);
b32b8144 3694 if (ret < 0) {
11fdf7f2 3695 cerr << "dump-export: "
b32b8144
FG
3696 << cpp_strerror(ret) << std::endl;
3697 return 1;
3698 }
3699 return 0;
3700 }
3701
7c673cae
FG
3702 //Verify that data-path really exists
3703 struct stat st;
3704 if (::stat(dpath.c_str(), &st) == -1) {
3705 string err = string("data-path: ") + dpath;
3706 perror(err.c_str());
3707 return 1;
3708 }
3709
20effc67 3710 if (pgidstr.length() && pgidstr != "meta" && !pgid.parse(pgidstr.c_str())) {
7c673cae
FG
3711 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3712 return 1;
3713 }
3714
3efd9988
FG
3715 //Verify that the journal-path really exists
3716 if (type == "filestore") {
3717 if (::stat(jpath.c_str(), &st) == -1) {
3718 string err = string("journal-path: ") + jpath;
3719 perror(err.c_str());
3720 return 1;
3721 }
3722 if (S_ISDIR(st.st_mode)) {
3723 cerr << "journal-path: " << jpath << ": "
3724 << cpp_strerror(EISDIR) << std::endl;
3725 return 1;
3726 }
3727 }
3728
20effc67
TL
3729 std::unique_ptr<ObjectStore> fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3730 if (!fs) {
7c673cae
FG
3731 cerr << "Unable to create store of type " << type << std::endl;
3732 return 1;
3733 }
3734
3735 if (op == "fsck" || op == "fsck-deep") {
3736 int r = fs->fsck(op == "fsck-deep");
3737 if (r < 0) {
3738 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3739 return 1;
3740 }
3741 if (r > 0) {
9f95a23c 3742 cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
7c673cae
FG
3743 return 1;
3744 }
9f95a23c 3745 cout << "fsck success" << std::endl;
7c673cae
FG
3746 return 0;
3747 }
3efd9988
FG
3748 if (op == "repair" || op == "repair-deep") {
3749 int r = fs->repair(op == "repair-deep");
3750 if (r < 0) {
3751 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3752 return 1;
3753 }
3754 if (r > 0) {
9f95a23c 3755 cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
3efd9988
FG
3756 return 1;
3757 }
9f95a23c 3758 cout << "repair success" << std::endl;
3efd9988
FG
3759 return 0;
3760 }
7c673cae
FG
3761 if (op == "mkfs") {
3762 if (fsid.length()) {
3763 uuid_d f;
3764 bool r = f.parse(fsid.c_str());
3765 if (!r) {
3766 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3767 return 1;
3768 }
3769 fs->set_fsid(f);
3770 }
3771 int r = fs->mkfs();
3772 if (r < 0) {
3efd9988 3773 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
7c673cae
FG
3774 return 1;
3775 }
3776 return 0;
3777 }
3778 if (op == "dup") {
3779 string target_type;
3780 char fn[PATH_MAX];
3781 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3782 int fd = ::open(fn, O_RDONLY);
3783 if (fd < 0) {
3784 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3785 exit(1);
3786 }
3787 bufferlist bl;
3788 bl.read_fd(fd, 64);
3789 if (bl.length()) {
3790 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3791 }
3792 ::close(fd);
20effc67 3793 unique_ptr<ObjectStore> targetfs = ObjectStore::create(
7c673cae
FG
3794 g_ceph_context, target_type,
3795 target_data_path, "", 0);
20effc67 3796 if (!targetfs) {
7c673cae
FG
3797 cerr << "Unable to open store of type " << target_type << std::endl;
3798 return 1;
3799 }
20effc67 3800 int r = dup(dpath, fs.get(), target_data_path, targetfs.get());
7c673cae
FG
3801 if (r < 0) {
3802 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3803 return 1;
3804 }
3805 return 0;
3806 }
3807
7c673cae
FG
3808 int ret = fs->mount();
3809 if (ret < 0) {
3810 if (ret == -EBUSY) {
3811 cerr << "OSD has the store locked" << std::endl;
3812 } else {
3813 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3814 }
3815 return 1;
3816 }
3817
3818 if (op == "fuse") {
3819#ifdef HAVE_LIBFUSE
20effc67 3820 FuseStore fuse(fs.get(), mountpoint);
7c673cae
FG
3821 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3822 int r = fuse.main();
20effc67 3823 fs->umount();
7c673cae
FG
3824 if (r < 0) {
3825 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3826 return 1;
3827 }
3828#else
3829 cerr << "fuse support not enabled" << std::endl;
3830#endif
3831 return 0;
3832 }
3833
3834 vector<coll_t> ls;
3835 vector<coll_t>::iterator it;
3836 CompatSet supported;
3837
3838#ifdef INTERNAL_TEST
3839 supported = get_test_compat_set();
3840#else
3841 supported = OSD::get_osd_compat_set();
3842#endif
3843
3844 bufferlist bl;
11fdf7f2 3845 auto ch = fs->open_collection(coll_t::meta());
9f95a23c
TL
3846 std::unique_ptr<OSDSuperblock> superblock;
3847 if (!no_superblock) {
3848 superblock.reset(new OSDSuperblock);
3849 bufferlist::const_iterator p;
3850 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
3851 if (ret < 0) {
3852 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3853 goto out;
3854 }
7c673cae 3855
9f95a23c
TL
3856 p = bl.cbegin();
3857 decode(*superblock, p);
7c673cae 3858
9f95a23c
TL
3859 if (debug) {
3860 cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
3861 }
7c673cae 3862
9f95a23c
TL
3863 if (debug) {
3864 cerr << "Supported features: " << supported << std::endl;
3865 cerr << "On-disk features: " << superblock->compat_features << std::endl;
3866 }
3867 if (supported.compare(superblock->compat_features) == -1) {
3868 CompatSet unsupported = supported.unsupported(superblock->compat_features);
3869 cerr << "On-disk OSD incompatible features set "
3870 << unsupported << std::endl;
3871 ret = -EINVAL;
3872 goto out;
3873 }
7c673cae
FG
3874 }
3875
3876 if (op == "apply-layout-settings") {
1adf2230 3877 int target_level = 0;
91327a77
AA
3878 // Single positional argument with apply-layout-settings
3879 // for target_level.
3880 if (vm.count("object") && isdigit(object[0])) {
3881 target_level = atoi(object.c_str());
3882 // This requires --arg1 to be specified since
3883 // this is the third positional argument and normally
3884 // used with object operations.
3885 } else if (vm.count("arg1") && isdigit(arg1[0])) {
1adf2230
AA
3886 target_level = atoi(arg1.c_str());
3887 }
9f95a23c 3888 ceph_assert(superblock != nullptr);
20effc67 3889 ret = apply_layout_settings(fs.get(), *superblock, pool, pgid, dry_run, target_level);
7c673cae
FG
3890 goto out;
3891 }
3892
3893 if (op != "list" && vm.count("object")) {
3894 // Special case: Create pgmeta_oid if empty string specified
3895 // This can't conflict with any actual object names.
3896 if (object == "") {
3897 ghobj = pgid.make_pgmeta_oid();
3898 } else {
3899 json_spirit::Value v;
3900 try {
3901 if (!json_spirit::read(object, v) ||
3902 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3903 // Special: Need head/snapdir so set even if user didn't specify
3904 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3905 head = true;
3906 lookup_ghobject lookup(object, nspace, head);
20effc67
TL
3907 if (pgidstr == "meta")
3908 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t::meta(), lookup, debug);
3909 else if (pgidstr.length())
3910 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), lookup, debug);
3a9019d9 3911 else
20effc67 3912 ret = action_on_all_objects(fs.get(), lookup, debug);
3a9019d9 3913 if (ret) {
7c673cae
FG
3914 throw std::runtime_error("Internal error");
3915 } else {
3916 if (lookup.size() != 1) {
3917 stringstream ss;
3918 if (lookup.size() == 0)
3919 ss << "No object id '" << object << "' found or invalid JSON specified";
3920 else
3921 ss << "Found " << lookup.size() << " objects with id '" << object
3922 << "', please use a JSON spec from --op list instead";
3923 throw std::runtime_error(ss.str());
3924 }
3925 pair<coll_t, ghobject_t> found = lookup.pop();
3926 pgidstr = found.first.to_str();
3927 pgid.parse(pgidstr.c_str());
3928 ghobj = found.second;
3929 }
3930 } else {
3931 stringstream ss;
3932 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3933 ss << "Without --pgid the object '" << object
3934 << "' must be a JSON array";
3935 throw std::runtime_error(ss.str());
3936 }
3937 if (v.type() == json_spirit::array_type) {
3938 json_spirit::Array array = v.get_array();
3939 if (array.size() != 2) {
3940 ss << "Object '" << object
3941 << "' must be a JSON array with 2 elements";
3942 throw std::runtime_error(ss.str());
3943 }
3944 vector<json_spirit::Value>::iterator i = array.begin();
11fdf7f2 3945 ceph_assert(i != array.end());
7c673cae
FG
3946 if (i->type() != json_spirit::str_type) {
3947 ss << "Object '" << object
3948 << "' must be a JSON array with the first element a string";
3949 throw std::runtime_error(ss.str());
3950 }
3951 string object_pgidstr = i->get_str();
3952 if (object_pgidstr != "meta") {
3953 spg_t object_pgid;
3954 object_pgid.parse(object_pgidstr.c_str());
3955 if (pgidstr.length() > 0) {
3956 if (object_pgid != pgid) {
3957 ss << "object '" << object
3958 << "' has a pgid different from the --pgid="
3959 << pgidstr << " option";
3960 throw std::runtime_error(ss.str());
3961 }
3962 } else {
3963 pgidstr = object_pgidstr;
3964 pgid = object_pgid;
3965 }
3966 } else {
3967 pgidstr = object_pgidstr;
3968 }
3969 ++i;
3970 v = *i;
3971 }
3972 try {
3973 ghobj.decode(v);
3974 } catch (std::runtime_error& e) {
3975 ss << "Decode object JSON error: " << e.what();
3976 throw std::runtime_error(ss.str());
3977 }
3978 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3979 cerr << "Object pool and pgid pool don't match" << std::endl;
3980 ret = 1;
3981 goto out;
3982 }
9f95a23c
TL
3983 if (pgidstr != "meta") {
3984 auto ch = fs->open_collection(coll_t(pgid));
3985 if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
3986 stringstream ss;
3987 ss << "object " << ghobj << " not contained by pg " << pgid;
3988 throw std::runtime_error(ss.str());
3989 }
3990 }
7c673cae
FG
3991 }
3992 } catch (std::runtime_error& e) {
3993 cerr << e.what() << std::endl;
3994 ret = 1;
3995 goto out;
3996 }
3997 }
3998 }
3999
4000 // The ops which require --pgid option are checked here and
4001 // mentioned in the usage for --pgid.
4002 if ((op == "info" || op == "log" || op == "remove" || op == "export"
11fdf7f2
TL
4003 || op == "export-remove" || op == "mark-complete"
4004 || op == "reset-last-complete"
33c7a0ef 4005 || op == "trim-pg-log"
2a845540 4006 || op == "pg-log-inject-dups") &&
7c673cae
FG
4007 pgidstr.length() == 0) {
4008 cerr << "Must provide pgid" << std::endl;
4009 usage(desc);
4010 ret = 1;
4011 goto out;
4012 }
4013
4014 if (op == "import") {
9f95a23c 4015 ceph_assert(superblock != nullptr);
7c673cae 4016 try {
20effc67 4017 ret = tool.do_import(fs.get(), *superblock, force, pgidstr);
7c673cae
FG
4018 }
4019 catch (const buffer::error &e) {
4020 cerr << "do_import threw exception error " << e.what() << std::endl;
4021 ret = -EFAULT;
4022 }
4023 if (ret == -EFAULT) {
4024 cerr << "Corrupt input for import" << std::endl;
4025 }
4026 if (ret == 0)
4027 cout << "Import successful" << std::endl;
4028 goto out;
4029 } else if (op == "dump-journal-mount") {
4030 // Undocumented feature to dump journal with mounted fs
4031 // This doesn't support the format option, but it uses the
4032 // ObjectStore::dump_journal() and mounts to get replay to run.
4033 ret = fs->dump_journal(cout);
4034 if (ret) {
4035 if (ret == -EOPNOTSUPP) {
4036 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
4037 } else {
4038 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
4039 }
4040 }
4041 goto out;
4042 } else if (op == "get-osdmap") {
4043 bufferlist bl;
4044 OSDMap osdmap;
4045 if (epoch == 0) {
9f95a23c
TL
4046 ceph_assert(superblock != nullptr);
4047 epoch = superblock->current_epoch;
7c673cae 4048 }
20effc67 4049 ret = get_osdmap(fs.get(), epoch, osdmap, bl);
7c673cae
FG
4050 if (ret) {
4051 cerr << "Failed to get osdmap#" << epoch << ": "
4052 << cpp_strerror(ret) << std::endl;
4053 goto out;
4054 }
4055 ret = bl.write_fd(file_fd);
4056 if (ret) {
4057 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
4058 } else {
4059 cout << "osdmap#" << epoch << " exported." << std::endl;
4060 }
4061 goto out;
4062 } else if (op == "set-osdmap") {
4063 bufferlist bl;
4064 ret = get_fd_data(file_fd, bl);
4065 if (ret < 0) {
4066 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
4067 } else {
20effc67 4068 ret = set_osdmap(fs.get(), epoch, bl, force);
7c673cae
FG
4069 }
4070 goto out;
4071 } else if (op == "get-inc-osdmap") {
4072 bufferlist bl;
4073 if (epoch == 0) {
9f95a23c
TL
4074 ceph_assert(superblock != nullptr);
4075 epoch = superblock->current_epoch;
7c673cae 4076 }
20effc67 4077 ret = get_inc_osdmap(fs.get(), epoch, bl);
7c673cae
FG
4078 if (ret < 0) {
4079 cerr << "Failed to get incremental osdmap# " << epoch << ": "
4080 << cpp_strerror(ret) << std::endl;
4081 goto out;
4082 }
4083 ret = bl.write_fd(file_fd);
4084 if (ret) {
4085 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
4086 } else {
4087 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
4088 }
4089 goto out;
4090 } else if (op == "set-inc-osdmap") {
4091 bufferlist bl;
4092 ret = get_fd_data(file_fd, bl);
4093 if (ret < 0) {
4094 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
4095 goto out;
4096 } else {
20effc67 4097 ret = set_inc_osdmap(fs.get(), epoch, bl, force);
7c673cae
FG
4098 }
4099 goto out;
4100 } else if (op == "update-mon-db") {
4101 if (!vm.count("mon-store-path")) {
4102 cerr << "Please specify the path to monitor db to update" << std::endl;
4103 ret = -EINVAL;
4104 } else {
9f95a23c
TL
4105 ceph_assert(superblock != nullptr);
4106 ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
7c673cae
FG
4107 }
4108 goto out;
4109 }
4110
7c673cae 4111 if (op == "remove") {
3efd9988
FG
4112 if (!force && !dry_run) {
4113 cerr << "Please use export-remove or you must use --force option" << std::endl;
4114 ret = -EINVAL;
4115 goto out;
4116 }
20effc67 4117 ret = initiate_new_remove_pg(fs.get(), pgid);
7c673cae
FG
4118 if (ret < 0) {
4119 cerr << "PG '" << pgid << "' not found" << std::endl;
4120 goto out;
4121 }
4122 cout << "Remove successful" << std::endl;
4123 goto out;
4124 }
4125
4126 if (op == "fix-lost") {
4127 boost::scoped_ptr<action_on_object_t> action;
11fdf7f2 4128 action.reset(new do_fix_lost());
7c673cae 4129 if (pgidstr.length())
20effc67 4130 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), *action, debug);
7c673cae 4131 else
20effc67 4132 ret = action_on_all_objects(fs.get(), *action, debug);
7c673cae
FG
4133 goto out;
4134 }
4135
4136 if (op == "list") {
20effc67 4137 ret = do_list(fs.get(), pgidstr, object, nspace, formatter, debug,
7c673cae
FG
4138 human_readable, head);
4139 if (ret < 0) {
4140 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
4141 }
4142 goto out;
4143 }
9f95a23c 4144 if (op == "list-slow-omap") {
20effc67 4145 ret = do_list_slow(fs.get(), pgidstr, object, slow_threshold, formatter, debug,
9f95a23c
TL
4146 human_readable);
4147 if (ret < 0) {
4148 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
4149 }
4150 goto out;
4151 }
7c673cae
FG
4152
4153 if (op == "dump-super") {
9f95a23c 4154 ceph_assert(superblock != nullptr);
7c673cae 4155 formatter->open_object_section("superblock");
9f95a23c 4156 superblock->dump(formatter);
7c673cae
FG
4157 formatter->close_section();
4158 formatter->flush(cout);
4159 cout << std::endl;
4160 goto out;
4161 }
4162
f6b5b4d7
TL
4163 if (op == "statfs") {
4164 store_statfs_t statsbuf;
4165 ret = fs->statfs(&statsbuf);
4166 if (ret < 0) {
4167 cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
4168 goto out;
4169 }
4170 formatter->open_object_section("statfs");
4171 statsbuf.dump(formatter);
4172 formatter->close_section();
4173 formatter->flush(cout);
4174 cout << std::endl;
4175 goto out;
4176 }
4177
7c673cae 4178 if (op == "meta-list") {
20effc67 4179 ret = do_meta(fs.get(), object, formatter, debug, human_readable);
7c673cae
FG
4180 if (ret < 0) {
4181 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
4182 }
4183 goto out;
4184 }
4185
4186 ret = fs->list_collections(ls);
4187 if (ret < 0) {
4188 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
4189 goto out;
4190 }
4191
4192 if (debug && op == "list-pgs")
4193 cout << "Performing list-pgs operation" << std::endl;
4194
4195 // Find pg
4196 for (it = ls.begin(); it != ls.end(); ++it) {
4197 spg_t tmppgid;
4198
4199 if (pgidstr == "meta") {
4200 if (it->to_str() == "meta")
4201 break;
4202 else
4203 continue;
4204 }
4205
4206 if (!it->is_pg(&tmppgid)) {
4207 continue;
4208 }
4209
4210 if (it->is_temp(&tmppgid)) {
4211 continue;
4212 }
4213
4214 if (op != "list-pgs" && tmppgid != pgid) {
4215 continue;
4216 }
4217
4218 if (op != "list-pgs") {
4219 //Found!
4220 break;
4221 }
4222
4223 cout << tmppgid << std::endl;
4224 }
4225
4226 if (op == "list-pgs") {
4227 ret = 0;
4228 goto out;
4229 }
4230
4231 // If not an object command nor any of the ops handled below, then output this usage
4232 // before complaining about a bad pgid
2a845540 4233 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") {
11fdf7f2 4234 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
33c7a0ef 4235 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)"
7c673cae
FG
4236 << std::endl;
4237 usage(desc);
4238 ret = 1;
4239 goto out;
4240 }
4241 epoch_t map_epoch;
4242// The following code for export, info, log require omap or !skip-mount-omap
4243 if (it != ls.end()) {
4244
4245 coll_t coll = *it;
4246
4247 if (vm.count("objcmd")) {
4248 ret = 0;
4249 if (objcmd == "remove" || objcmd == "removeall") {
4250 bool all = (objcmd == "removeall");
11fdf7f2
TL
4251 enum rmtype type = BOTH;
4252 if (rmtypestr == "nosnapmap")
4253 type = NOSNAPMAP;
4254 else if (rmtypestr == "snapmap")
4255 type = SNAPMAP;
20effc67 4256 ret = do_remove_object(fs.get(), coll, ghobj, all, force, type);
7c673cae
FG
4257 goto out;
4258 } else if (objcmd == "list-attrs") {
20effc67 4259 ret = do_list_attrs(fs.get(), coll, ghobj);
7c673cae
FG
4260 goto out;
4261 } else if (objcmd == "list-omap") {
20effc67 4262 ret = do_list_omap(fs.get(), coll, ghobj);
7c673cae
FG
4263 goto out;
4264 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
4265 if (objcmd == "get-bytes") {
4266 int fd;
4267 if (vm.count("arg1") == 0 || arg1 == "-") {
4268 fd = STDOUT_FILENO;
4269 } else {
4270 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
4271 if (fd == -1) {
4272 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4273 ret = 1;
4274 goto out;
4275 }
4276 }
20effc67 4277 ret = do_get_bytes(fs.get(), coll, ghobj, fd);
7c673cae
FG
4278 if (fd != STDOUT_FILENO)
4279 close(fd);
4280 } else {
4281 int fd;
4282 if (vm.count("arg1") == 0 || arg1 == "-") {
4283 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4284 if (isatty(STDIN_FILENO)) {
4285 cerr << "stdin is a tty and no file specified" << std::endl;
4286 ret = 1;
4287 goto out;
4288 }
4289 fd = STDIN_FILENO;
4290 } else {
4291 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4292 if (fd == -1) {
4293 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4294 ret = 1;
4295 goto out;
4296 }
4297 }
20effc67 4298 ret = do_set_bytes(fs.get(), coll, ghobj, fd);
7c673cae
FG
4299 if (fd != STDIN_FILENO)
4300 close(fd);
4301 }
4302 goto out;
4303 } else if (objcmd == "get-attr") {
4304 if (vm.count("arg1") == 0) {
4305 usage(desc);
4306 ret = 1;
4307 goto out;
4308 }
20effc67 4309 ret = do_get_attr(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4310 goto out;
4311 } else if (objcmd == "set-attr") {
4312 if (vm.count("arg1") == 0) {
4313 usage(desc);
4314 ret = 1;
4315 }
4316
4317 int fd;
4318 if (vm.count("arg2") == 0 || arg2 == "-") {
4319 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4320 if (isatty(STDIN_FILENO)) {
4321 cerr << "stdin is a tty and no file specified" << std::endl;
4322 ret = 1;
4323 goto out;
4324 }
4325 fd = STDIN_FILENO;
4326 } else {
4327 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4328 if (fd == -1) {
4329 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4330 ret = 1;
4331 goto out;
4332 }
4333 }
20effc67 4334 ret = do_set_attr(fs.get(), coll, ghobj, arg1, fd);
7c673cae
FG
4335 if (fd != STDIN_FILENO)
4336 close(fd);
4337 goto out;
4338 } else if (objcmd == "rm-attr") {
4339 if (vm.count("arg1") == 0) {
4340 usage(desc);
4341 ret = 1;
4342 goto out;
4343 }
20effc67 4344 ret = do_rm_attr(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4345 goto out;
4346 } else if (objcmd == "get-omap") {
4347 if (vm.count("arg1") == 0) {
4348 usage(desc);
4349 ret = 1;
4350 goto out;
4351 }
20effc67 4352 ret = do_get_omap(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4353 goto out;
4354 } else if (objcmd == "set-omap") {
4355 if (vm.count("arg1") == 0) {
4356 usage(desc);
4357 ret = 1;
4358 goto out;
4359 }
4360 int fd;
4361 if (vm.count("arg2") == 0 || arg2 == "-") {
4362 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4363 if (isatty(STDIN_FILENO)) {
4364 cerr << "stdin is a tty and no file specified" << std::endl;
4365 ret = 1;
4366 goto out;
4367 }
4368 fd = STDIN_FILENO;
4369 } else {
4370 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4371 if (fd == -1) {
4372 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4373 ret = 1;
4374 goto out;
4375 }
4376 }
20effc67 4377 ret = do_set_omap(fs.get(), coll, ghobj, arg1, fd);
7c673cae
FG
4378 if (fd != STDIN_FILENO)
4379 close(fd);
4380 goto out;
4381 } else if (objcmd == "rm-omap") {
4382 if (vm.count("arg1") == 0) {
4383 usage(desc);
4384 ret = 1;
4385 goto out;
4386 }
20effc67 4387 ret = do_rm_omap(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4388 goto out;
4389 } else if (objcmd == "get-omaphdr") {
4390 if (vm.count("arg1")) {
4391 usage(desc);
4392 ret = 1;
4393 goto out;
4394 }
20effc67 4395 ret = do_get_omaphdr(fs.get(), coll, ghobj);
7c673cae
FG
4396 goto out;
4397 } else if (objcmd == "set-omaphdr") {
4398 // Extra arg
4399 if (vm.count("arg2")) {
4400 usage(desc);
4401 ret = 1;
4402 goto out;
4403 }
4404 int fd;
4405 if (vm.count("arg1") == 0 || arg1 == "-") {
4406 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4407 if (isatty(STDIN_FILENO)) {
4408 cerr << "stdin is a tty and no file specified" << std::endl;
4409 ret = 1;
4410 goto out;
4411 }
4412 fd = STDIN_FILENO;
4413 } else {
4414 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4415 if (fd == -1) {
4416 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4417 ret = 1;
4418 goto out;
4419 }
4420 }
20effc67 4421 ret = do_set_omaphdr(fs.get(), coll, ghobj, fd);
7c673cae
FG
4422 if (fd != STDIN_FILENO)
4423 close(fd);
4424 goto out;
4425 } else if (objcmd == "dump") {
4426 // There should not be any other arguments
4427 if (vm.count("arg1") || vm.count("arg2")) {
4428 usage(desc);
4429 ret = 1;
4430 goto out;
4431 }
20effc67 4432 ret = print_obj_info(fs.get(), coll, ghobj, formatter);
7c673cae 4433 goto out;
1adf2230
AA
4434 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4435 // There should not be any other arguments
4436 if (vm.count("arg1") || vm.count("arg2")) {
4437 usage(desc);
4438 ret = 1;
4439 goto out;
4440 }
20effc67 4441 ret = corrupt_info(fs.get(), coll, ghobj, formatter);
1adf2230 4442 goto out;
b5b8bbf5
FG
4443 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4444 // Undocumented testing feature
4445 bool corrupt = (objcmd == "corrupt-size");
7c673cae
FG
4446 // Extra arg
4447 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4448 usage(desc);
4449 ret = 1;
4450 goto out;
4451 }
4452 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4453 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4454 ret = 1;
4455 goto out;
4456 }
4457 uint64_t size = atoll(arg1.c_str());
20effc67 4458 ret = set_size(fs.get(), coll, ghobj, size, formatter, corrupt);
7c673cae 4459 goto out;
11fdf7f2 4460 } else if (objcmd == "clear-data-digest") {
20effc67 4461 ret = clear_data_digest(fs.get(), coll, ghobj);
11fdf7f2 4462 goto out;
7c673cae
FG
4463 } else if (objcmd == "clear-snapset") {
4464 // UNDOCUMENTED: For testing zap SnapSet
4465 // IGNORE extra args since not in usage anyway
4466 if (!ghobj.hobj.has_snapset()) {
4467 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4468 ret = 1;
4469 goto out;
4470 }
20effc67 4471 ret = clear_snapset(fs.get(), coll, ghobj, arg1);
7c673cae
FG
4472 goto out;
4473 } else if (objcmd == "remove-clone-metadata") {
4474 // Extra arg
4475 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4476 usage(desc);
4477 ret = 1;
4478 goto out;
4479 }
4480 if (!ghobj.hobj.has_snapset()) {
4481 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4482 ret = 1;
4483 goto out;
4484 }
4485 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4486 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4487 ret = 1;
4488 goto out;
4489 }
4490 snapid_t cloneid = atoi(arg1.c_str());
20effc67 4491 ret = remove_clone(fs.get(), coll, ghobj, cloneid, force);
7c673cae
FG
4492 goto out;
4493 }
4494 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4495 usage(desc);
4496 ret = 1;
4497 goto out;
4498 }
4499
7c673cae 4500 map_epoch = 0;
20effc67 4501 ret = PG::peek_map_epoch(fs.get(), pgid, &map_epoch);
7c673cae
FG
4502 if (ret < 0)
4503 cerr << "peek_map_epoch reports error" << std::endl;
4504 if (debug)
4505 cerr << "map_epoch " << map_epoch << std::endl;
4506
4507 pg_info_t info(pgid);
4508 PastIntervals past_intervals;
4509 __u8 struct_ver;
20effc67 4510 ret = PG::read_info(fs.get(), pgid, coll, info, past_intervals, struct_ver);
7c673cae
FG
4511 if (ret < 0) {
4512 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4513 goto out;
4514 }
11fdf7f2 4515 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae
FG
4516 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4517 ret = -EFAULT;
4518 goto out;
4519 }
4520 if (debug)
4521 cerr << "struct_v " << (int)struct_ver << std::endl;
4522
3efd9988 4523 if (op == "export" || op == "export-remove") {
9f95a23c 4524 ceph_assert(superblock != nullptr);
2a845540 4525 ret = tool.do_export(cct.get(), fs.get(), coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
3efd9988 4526 if (ret == 0) {
7c673cae 4527 cerr << "Export successful" << std::endl;
3efd9988 4528 if (op == "export-remove") {
20effc67 4529 ret = initiate_new_remove_pg(fs.get(), pgid);
3efd9988 4530 // Export succeeded, so pgid is there
11fdf7f2 4531 ceph_assert(ret == 0);
3efd9988
FG
4532 cerr << "Remove successful" << std::endl;
4533 }
4534 }
7c673cae
FG
4535 } else if (op == "info") {
4536 formatter->open_object_section("info");
4537 info.dump(formatter);
4538 formatter->close_section();
4539 formatter->flush(cout);
4540 cout << std::endl;
4541 } else if (op == "log") {
4542 PGLog::IndexedLog log;
4543 pg_missing_t missing;
2a845540 4544 ret = get_log(cct.get(), fs.get(), struct_ver, pgid, info, log, missing);
7c673cae
FG
4545 if (ret < 0)
4546 goto out;
4547
4548 dump_log(formatter, cout, log, missing);
7c673cae
FG
4549 } else if (op == "mark-complete") {
4550 ObjectStore::Transaction tran;
4551 ObjectStore::Transaction *t = &tran;
4552
11fdf7f2 4553 if (struct_ver < PG::get_compat_struct_v()) {
7c673cae 4554 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
11fdf7f2 4555 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
7c673cae
FG
4556 << std::endl;
4557 ret = 1;
4558 goto out;
4559 }
4560
4561 cout << "Marking complete " << std::endl;
4562
9f95a23c
TL
4563 ceph_assert(superblock != nullptr);
4564 info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
7c673cae 4565 info.last_backfill = hobject_t::get_max();
9f95a23c
TL
4566 info.last_epoch_started = superblock->current_epoch;
4567 info.history.last_epoch_started = superblock->current_epoch;
4568 info.history.last_epoch_clean = superblock->current_epoch;
7c673cae
FG
4569 past_intervals.clear();
4570
4571 if (!dry_run) {
4572 ret = write_info(*t, map_epoch, info, past_intervals);
4573 if (ret != 0)
4574 goto out;
11fdf7f2
TL
4575 auto ch = fs->open_collection(coll_t(pgid));
4576 fs->queue_transaction(ch, std::move(*t));
7c673cae
FG
4577 }
4578 cout << "Marking complete succeeded" << std::endl;
94b18763 4579 } else if (op == "trim-pg-log") {
20effc67 4580 ret = do_trim_pg_log(fs.get(), coll, info, pgid,
94b18763
FG
4581 map_epoch, past_intervals);
4582 if (ret < 0) {
4583 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4584 goto out;
4585 }
4586 cout << "Finished trimming pg log" << std::endl;
33c7a0ef
TL
4587 goto out;
4588 } else if (op == "trim-pg-log-dups") {
4589 ret = do_trim_pg_log_dups(fs.get(), coll, info, pgid,
4590 map_epoch, past_intervals);
4591 if (ret < 0) {
4592 cerr << "Error trimming pg log dups: " << cpp_strerror(ret) << std::endl;
4593 goto out;
4594 }
4595 cout << "Finished trimming pg log dups" << std::endl;
94b18763 4596 goto out;
11fdf7f2
TL
4597 } else if (op == "reset-last-complete") {
4598 if (!force) {
4599 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4600 << "certain to lead to permanent data loss unless you know exactly "
4601 << "what you are doing. Pass --force to proceed anyway."
4602 << std::endl;
4603 ret = -EINVAL;
4604 goto out;
4605 }
4606 ObjectStore::Transaction tran;
4607 ObjectStore::Transaction *t = &tran;
4608
4609 if (struct_ver < PG::get_compat_struct_v()) {
4610 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4611 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4612 << std::endl;
4613 ret = 1;
4614 goto out;
4615 }
4616
4617 cout << "Reseting last_complete " << std::endl;
4618
4619 info.last_complete = info.last_update;
4620
4621 if (!dry_run) {
4622 ret = write_info(*t, map_epoch, info, past_intervals);
4623 if (ret != 0)
4624 goto out;
4625 fs->queue_transaction(ch, std::move(*t));
4626 }
4627 cout << "Reseting last_complete succeeded" << std::endl;
4628
2a845540
TL
4629 } else if (op == "pg-log-inject-dups") {
4630 if (!vm.count("file") || file == "-") {
4631 cerr << "Must provide file containing JSON dups entries" << std::endl;
4632 ret = 1;
4633 goto out;
4634 }
4635 if (debug)
4636 cerr << "opening file " << file << std::endl;
4637
4638 ifstream json_file_stream(file , std::ifstream::in);
4639 if (!json_file_stream.is_open()) {
4640 cerr << "unable to open file " << file << std::endl;
4641 ret = -1;
4642 goto out;
4643 }
4644 json_spirit::mValue result;
4645 try {
4646 if (!json_spirit::read(json_file_stream, result))
4647 throw std::runtime_error("unparseable JSON " + file);
4648 if (result.type() != json_spirit::array_type) {
4649 cerr << "result is not an array_type - type=" << result.type() << std::endl;
4650 throw std::runtime_error("not JSON array_type " + file);
4651 }
4652 do_dups_inject_from_json(fs.get(), pgid, result, debug);
4653 } catch (const std::runtime_error &e) {
4654 cerr << e.what() << std::endl;;
4655 return -1;
4656 }
7c673cae 4657 } else {
11fdf7f2 4658 ceph_assert(!"Should have already checked for valid --op");
7c673cae
FG
4659 }
4660 } else {
4661 cerr << "PG '" << pgid << "' not found" << std::endl;
4662 ret = -ENOENT;
4663 }
4664
4665out:
9f95a23c
TL
4666 if (debug) {
4667 ostringstream ostr;
4668 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
4669 cct->get_perfcounters_collection()->dump_formatted(f, false);
4670 ostr << "ceph-objectstore-tool ";
4671 f->flush(ostr);
4672 delete f;
4673 cout << ostr.str() << std::endl;
4674 }
4675
7c673cae 4676 int r = fs->umount();
7c673cae
FG
4677 if (r < 0) {
4678 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4679 // If no previous error, then use umount() error
4680 if (ret == 0)
4681 ret = r;
4682 }
4683
4684 if (dry_run) {
4685 // Export output can go to stdout, so put this message on stderr
4686 if (op == "export")
4687 cerr << "dry-run: Nothing changed" << std::endl;
4688 else
4689 cout << "dry-run: Nothing changed" << std::endl;
4690 }
4691
4692 if (ret < 0)
4693 ret = 1;
4694 return ret;
4695}