1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include <boost/program_options/variables_map.hpp>
16 #include <boost/program_options/parsers.hpp>
17 #include <boost/algorithm/string.hpp>
18 #include <boost/scoped_ptr.hpp>
19 #include <boost/optional.hpp>
24 #include "common/Formatter.h"
25 #include "common/errno.h"
26 #include "common/ceph_argparse.h"
27 #include "common/url_escape.h"
29 #include "global/global_init.h"
31 #include "os/ObjectStore.h"
32 #include "os/filestore/FileJournal.h"
33 #include "os/filestore/FileStore.h"
35 #include "os/FuseStore.h"
38 #include "osd/PGLog.h"
41 #include "osd/ECUtil.h"
43 #include "json_spirit/json_spirit_value.h"
44 #include "json_spirit/json_spirit_reader.h"
46 #include "rebuild_mondb.h"
47 #include "ceph_objectstore_tool.h"
48 #include "include/compat.h"
49 #include "include/util.h"
52 namespace po
= boost::program_options
;
55 CompatSet
get_test_compat_set() {
56 CompatSet::FeatureSet ceph_osd_feature_compat
;
57 CompatSet::FeatureSet ceph_osd_feature_ro_compat
;
58 CompatSet::FeatureSet ceph_osd_feature_incompat
;
59 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE
);
60 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO
);
61 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC
);
62 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC
);
63 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES
);
64 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL
);
65 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO
);
66 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO
);
67 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG
);
69 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER
);
70 ceph_osd_feature_incompat
.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS
);
72 return CompatSet(ceph_osd_feature_compat
, ceph_osd_feature_ro_compat
,
73 ceph_osd_feature_incompat
);
77 const ssize_t max_read
= 1024 * 1024;
78 const int fd_none
= INT_MIN
;
82 struct action_on_object_t
{
83 virtual ~action_on_object_t() {}
84 virtual void call(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, object_info_t
&oi
) = 0;
87 int _action_on_all_objects_in_pg(ObjectStore
*store
, coll_t coll
, action_on_object_t
&action
, bool debug
)
89 auto ch
= store
->open_collection(coll
);
91 unsigned LIST_AT_A_TIME
= 100;
93 while (!next
.is_max()) {
94 vector
<ghobject_t
> list
;
95 int r
= store
->collection_list(ch
,
97 ghobject_t::get_max(),
102 cerr
<< "Error listing collection: " << coll
<< ", "
103 << cpp_strerror(r
) << std::endl
;
106 for (vector
<ghobject_t
>::iterator obj
= list
.begin();
110 if (coll
!= coll_t::meta()) {
112 r
= store
->getattr(ch
, *obj
, OI_ATTR
, attr
);
114 cerr
<< "Error getting attr on : " << make_pair(coll
, *obj
) << ", "
115 << cpp_strerror(r
) << std::endl
;
117 auto bp
= attr
.cbegin();
122 cerr
<< "Error decoding attr on : " << make_pair(coll
, *obj
) << ", "
123 << cpp_strerror(r
) << std::endl
;
127 action
.call(store
, coll
, *obj
, oi
);
133 int action_on_all_objects_in_pg(ObjectStore
*store
, string pgidstr
, action_on_object_t
&action
, bool debug
)
136 // Scan collections in case this is an ec pool but no shard specified
137 unsigned scanned
= 0;
139 vector
<coll_t
> colls_to_check
;
140 vector
<coll_t
> candidates
;
142 r
= store
->list_collections(candidates
);
144 cerr
<< "Error listing collections: " << cpp_strerror(r
) << std::endl
;
147 pgid
.parse(pgidstr
.c_str());
148 for (vector
<coll_t
>::iterator i
= candidates
.begin();
149 i
!= candidates
.end();
152 if (i
->is_meta() && pgidstr
== "meta") {
153 colls_to_check
.push_back(*i
);
156 if (!i
->is_pg(&cand_pgid
))
159 // If an exact match or treat no shard as any shard
160 if (cand_pgid
== pgid
||
161 (pgid
.is_no_shard() && pgid
.pgid
== cand_pgid
.pgid
)) {
162 colls_to_check
.push_back(*i
);
167 cerr
<< colls_to_check
.size() << " pgs to scan" << std::endl
;
168 for (vector
<coll_t
>::iterator i
= colls_to_check
.begin();
169 i
!= colls_to_check
.end();
172 cerr
<< "Scanning " << *i
<< ", " << scanned
<< "/"
173 << colls_to_check
.size() << " completed" << std::endl
;
174 r
= _action_on_all_objects_in_pg(store
, *i
, action
, debug
);
181 int action_on_all_objects_in_exact_pg(ObjectStore
*store
, coll_t coll
, action_on_object_t
&action
, bool debug
)
183 int r
= _action_on_all_objects_in_pg(store
, coll
, action
, debug
);
187 int _action_on_all_objects(ObjectStore
*store
, action_on_object_t
&action
, bool debug
)
189 unsigned scanned
= 0;
191 vector
<coll_t
> colls_to_check
;
192 vector
<coll_t
> candidates
;
193 r
= store
->list_collections(candidates
);
195 cerr
<< "Error listing collections: " << cpp_strerror(r
) << std::endl
;
198 for (vector
<coll_t
>::iterator i
= candidates
.begin();
199 i
!= candidates
.end();
202 colls_to_check
.push_back(*i
);
207 cerr
<< colls_to_check
.size() << " pgs to scan" << std::endl
;
208 for (vector
<coll_t
>::iterator i
= colls_to_check
.begin();
209 i
!= colls_to_check
.end();
212 cerr
<< "Scanning " << *i
<< ", " << scanned
<< "/"
213 << colls_to_check
.size() << " completed" << std::endl
;
214 r
= _action_on_all_objects_in_pg(store
, *i
, action
, debug
);
221 int action_on_all_objects(ObjectStore
*store
, action_on_object_t
&action
, bool debug
)
223 int r
= _action_on_all_objects(store
, action
, debug
);
227 struct pgid_object_list
{
228 list
<pair
<coll_t
, ghobject_t
> > _objects
;
230 void insert(coll_t coll
, ghobject_t
&ghobj
) {
231 _objects
.push_back(make_pair(coll
, ghobj
));
234 void dump(Formatter
*f
, bool human_readable
) const {
236 f
->open_array_section("pgid_objects");
237 for (list
<pair
<coll_t
, ghobject_t
> >::const_iterator i
= _objects
.begin();
240 f
->open_array_section("pgid_object");
242 bool is_pg
= i
->first
.is_pg(&pgid
);
244 f
->dump_string("pgid", stringify(pgid
));
245 if (!is_pg
|| !human_readable
)
246 f
->dump_string("coll", i
->first
.to_str());
247 f
->open_object_section("ghobject");
251 if (human_readable
) {
256 if (!human_readable
) {
264 struct lookup_ghobject
: public action_on_object_t
{
265 pgid_object_list _objects
;
267 const boost::optional
<std::string
> _namespace
;
270 lookup_ghobject(const string
& name
, const boost::optional
<std::string
>& nspace
, bool need_snapset
= false) : _name(name
),
271 _namespace(nspace
), _need_snapset(need_snapset
) { }
273 void call(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, object_info_t
&oi
) override
{
274 if (_need_snapset
&& !ghobj
.hobj
.has_snapset())
276 if ((_name
.length() == 0 || ghobj
.hobj
.oid
.name
== _name
) &&
277 (!_namespace
|| ghobj
.hobj
.nspace
== _namespace
))
278 _objects
.insert(coll
, ghobj
);
283 return _objects
._objects
.size();
286 pair
<coll_t
, ghobject_t
> pop() {
287 pair
<coll_t
, ghobject_t
> front
= _objects
._objects
.front();
288 _objects
._objects
.pop_front();
292 void dump(Formatter
*f
, bool human_readable
) const {
293 _objects
.dump(f
, human_readable
);
297 struct lookup_slow_ghobject
: public action_on_object_t
{
310 lookup_slow_ghobject(const string
& name
, double _threshold
) :
311 _name(name
), threshold(_threshold
) { }
313 void call(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, object_info_t
&oi
) override
{
314 ObjectMap::ObjectMapIterator iter
;
315 auto start1
= mono_clock::now();
316 ceph::signedspan first_seek_time
= start1
- start1
;
317 ceph::signedspan last_seek_time
= first_seek_time
;
318 ceph::signedspan total_time
= first_seek_time
;
320 auto ch
= store
->open_collection(coll
);
321 iter
= store
->get_omap_iterator(ch
, ghobj
);
323 cerr
<< "omap_get_iterator: " << cpp_strerror(ENOENT
)
328 auto start
= mono_clock::now();
329 iter
->seek_to_first();
330 first_seek_time
= mono_clock::now() - start
;
332 while(iter
->valid()) {
333 start
= mono_clock::now();
335 last_seek_time
= mono_clock::now() - start
;
339 if (coll
!= last_coll
) {
340 cerr
<< ">>> inspecting coll" << coll
<< std::endl
;
344 total_time
= mono_clock::now() - start1
;
345 if ( total_time
>= make_timespan(threshold
)) {
346 _objects
.emplace_back(coll
, ghobj
,
347 first_seek_time
, last_seek_time
, total_time
,
348 url_escape(iter
->tail_key()));
349 cerr
<< ">>>>> found obj " << ghobj
350 << " first_seek_time "
351 << std::chrono::duration_cast
<std::chrono::seconds
>(first_seek_time
).count()
352 << " last_seek_time "
353 << std::chrono::duration_cast
<std::chrono::seconds
>(last_seek_time
).count()
355 << std::chrono::duration_cast
<std::chrono::seconds
>(total_time
).count()
356 << " tail key: " << url_escape(iter
->tail_key())
363 return _objects
.size();
366 void dump(Formatter
*f
, bool human_readable
) const {
368 f
->open_array_section("objects");
369 for (auto i
= _objects
.begin();
372 f
->open_array_section("object");
375 ceph::signedspan first_seek_time
;
376 ceph::signedspan last_seek_time
;
377 ceph::signedspan total_time
;
379 std::tie(coll
, ghobj
, first_seek_time
, last_seek_time
, total_time
, tail_key
) = *i
;
382 bool is_pg
= coll
.is_pg(&pgid
);
384 f
->dump_string("pgid", stringify(pgid
));
385 if (!is_pg
|| !human_readable
)
386 f
->dump_string("coll", coll
.to_str());
387 f
->dump_object("ghobject", ghobj
);
388 f
->open_object_section("times");
389 f
->dump_int("first_seek_time",
390 std::chrono::duration_cast
<std::chrono::seconds
>(first_seek_time
).count());
391 f
->dump_int("last_seek_time",
392 std::chrono::duration_cast
<std::chrono::seconds
>
393 (last_seek_time
).count());
394 f
->dump_int("total_time",
395 std::chrono::duration_cast
<std::chrono::seconds
>(total_time
).count());
396 f
->dump_string("tail_key", tail_key
);
400 if (human_readable
) {
405 if (!human_readable
) {
413 int file_fd
= fd_none
;
416 bool no_superblock
= false;
420 static int get_fd_data(int fd
, bufferlist
&bl
)
424 ssize_t bytes
= bl
.read_fd(fd
, max_read
);
426 cerr
<< "read_fd error " << cpp_strerror(bytes
) << std::endl
;
436 ceph_assert(bl
.length() == total
);
440 int get_log(CephContext
*cct
, ObjectStore
*fs
, __u8 struct_ver
,
441 spg_t pgid
, const pg_info_t
&info
,
442 PGLog::IndexedLog
&log
, pg_missing_t
&missing
)
445 auto ch
= fs
->open_collection(coll_t(pgid
));
450 ceph_assert(struct_ver
> 0);
451 PGLog::read_log_and_missing(
453 pgid
.make_pgmeta_oid(),
456 g_ceph_context
->_conf
->osd_ignore_stale_divergent_priors
);
457 if (debug
&& oss
.str().size())
458 cerr
<< oss
.str() << std::endl
;
460 catch (const buffer::error
&e
) {
461 cerr
<< "read_log_and_missing threw exception error " << e
.what() << std::endl
;
467 void dump_log(Formatter
*formatter
, ostream
&out
, pg_log_t
&log
,
468 pg_missing_t
&missing
)
470 formatter
->open_object_section("op_log");
471 formatter
->open_object_section("pg_log_t");
473 formatter
->close_section();
474 formatter
->flush(out
);
475 formatter
->open_object_section("pg_missing_t");
476 missing
.dump(formatter
);
477 formatter
->close_section();
478 formatter
->close_section();
479 formatter
->flush(out
);
482 //Based on part of OSD::load_pgs()
483 int finish_remove_pgs(ObjectStore
*store
)
486 int r
= store
->list_collections(ls
);
488 cerr
<< "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r
)
493 for (vector
<coll_t
>::iterator it
= ls
.begin();
498 if (it
->is_temp(&pgid
) ||
499 (it
->is_pg(&pgid
) && PG::_has_removal_flag(store
, pgid
))) {
500 cout
<< "finish_remove_pgs " << *it
<< " removing " << pgid
<< std::endl
;
501 OSD::recursive_remove_collection(g_ceph_context
, store
, pgid
, *it
);
505 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
510 #pragma GCC diagnostic ignored "-Wpragmas"
511 #pragma GCC diagnostic push
512 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
514 int mark_pg_for_removal(ObjectStore
*fs
, spg_t pgid
, ObjectStore::Transaction
*t
)
516 pg_info_t
info(pgid
);
518 ghobject_t
pgmeta_oid(info
.pgid
.make_pgmeta_oid());
520 epoch_t map_epoch
= 0;
521 int r
= PG::peek_map_epoch(fs
, pgid
, &map_epoch
);
523 cerr
<< __func__
<< " warning: peek_map_epoch reported error" << std::endl
;
524 PastIntervals past_intervals
;
526 r
= PG::read_info(fs
, pgid
, coll
, info
, past_intervals
, struct_v
);
528 cerr
<< __func__
<< " error on read_info " << cpp_strerror(r
) << std::endl
;
531 ceph_assert(struct_v
>= 8);
533 cout
<< "setting '_remove' omap key" << std::endl
;
534 map
<string
,bufferlist
> values
;
535 encode((char)1, values
["_remove"]);
536 t
->omap_setkeys(coll
, pgmeta_oid
, values
);
540 #pragma GCC diagnostic pop
541 #pragma GCC diagnostic warning "-Wpragmas"
543 template<typename Func
>
544 void wait_until_done(ObjectStore::Transaction
* txn
, Func
&& func
)
546 bool finished
= false;
547 std::condition_variable cond
;
549 txn
->register_on_complete(make_lambda_context([&](int) {
550 std::unique_lock lock
{m
};
555 std::unique_lock lock
{m
};
556 cond
.wait(lock
, [&] {return finished
;});
559 int initiate_new_remove_pg(ObjectStore
*store
, spg_t r_pgid
)
562 finish_remove_pgs(store
);
563 if (!store
->collection_exists(coll_t(r_pgid
)))
566 cout
<< " marking collection for removal" << std::endl
;
569 ObjectStore::Transaction rmt
;
570 int r
= mark_pg_for_removal(store
, r_pgid
, &rmt
);
574 ObjectStore::CollectionHandle ch
= store
->open_collection(coll_t(r_pgid
));
575 store
->queue_transaction(ch
, std::move(rmt
));
576 finish_remove_pgs(store
);
580 int write_info(ObjectStore::Transaction
&t
, epoch_t epoch
, pg_info_t
&info
,
581 PastIntervals
&past_intervals
)
584 coll_t
coll(info
.pgid
);
585 ghobject_t
pgmeta_oid(info
.pgid
.make_pgmeta_oid());
586 map
<string
,bufferlist
> km
;
587 string key_to_remove
;
588 pg_info_t last_written_info
;
589 int ret
= prepare_info_keymap(
597 if (ret
) cerr
<< "Failed to write info" << std::endl
;
598 t
.omap_setkeys(coll
, pgmeta_oid
, km
);
599 if (!key_to_remove
.empty()) {
600 t
.omap_rmkey(coll
, pgmeta_oid
, key_to_remove
);
605 typedef map
<eversion_t
, hobject_t
> divergent_priors_t
;
607 int write_pg(ObjectStore::Transaction
&t
, epoch_t epoch
, pg_info_t
&info
,
608 pg_log_t
&log
, PastIntervals
&past_intervals
,
609 divergent_priors_t
&divergent
,
610 pg_missing_t
&missing
)
612 cout
<< __func__
<< " epoch " << epoch
<< " info " << info
<< std::endl
;
613 int ret
= write_info(t
, epoch
, info
, past_intervals
);
617 coll_t
coll(info
.pgid
);
618 map
<string
,bufferlist
> km
;
619 const bool require_rollback
= !info
.pgid
.is_no_shard();
620 if (!divergent
.empty()) {
621 ceph_assert(missing
.get_items().empty());
622 PGLog::write_log_and_missing_wo_missing(
623 t
, &km
, log
, coll
, info
.pgid
.make_pgmeta_oid(), divergent
,
626 pg_missing_tracker_t
tmissing(missing
);
627 bool rebuilt_missing_set_with_deletes
= missing
.may_include_deletes
;
628 PGLog::write_log_and_missing(
629 t
, &km
, log
, coll
, info
.pgid
.make_pgmeta_oid(), tmissing
,
631 &rebuilt_missing_set_with_deletes
);
633 t
.omap_setkeys(coll
, info
.pgid
.make_pgmeta_oid(), km
);
637 int do_trim_pg_log(ObjectStore
*store
, const coll_t
&coll
,
638 pg_info_t
&info
, const spg_t
&pgid
,
640 PastIntervals
&past_intervals
)
642 ghobject_t oid
= pgid
.make_pgmeta_oid();
644 auto ch
= store
->open_collection(coll
);
645 int r
= store
->stat(ch
, oid
, &st
);
647 ceph_assert(st
.st_size
== 0);
649 cerr
<< "Log bounds are: " << "(" << info
.log_tail
<< ","
650 << info
.last_update
<< "]" << std::endl
;
652 uint64_t max_entries
= g_ceph_context
->_conf
->osd_max_pg_log_entries
;
653 if (info
.last_update
.version
- info
.log_tail
.version
<= max_entries
) {
654 cerr
<< "Log not larger than osd_max_pg_log_entries " << max_entries
<< std::endl
;
658 ceph_assert(info
.last_update
.version
> max_entries
);
659 version_t trim_to
= info
.last_update
.version
- max_entries
;
660 size_t trim_at_once
= g_ceph_context
->_conf
->osd_pg_log_trim_max
;
665 // gather keys so we can delete them in a batch without
666 // affecting the iterator
667 set
<string
> keys_to_trim
;
669 ObjectMap::ObjectMapIterator p
= store
->get_omap_iterator(ch
, oid
);
672 for (p
->seek_to_first(); p
->valid(); p
->next()) {
673 if (p
->key()[0] == '_')
675 if (p
->key() == "can_rollback_to")
677 if (p
->key() == "divergent_priors")
679 if (p
->key() == "rollback_info_trimmed_to")
681 if (p
->key() == "may_include_deletes_in_missing")
683 if (p
->key().substr(0, 7) == string("missing"))
685 if (p
->key().substr(0, 4) == string("dup_"))
688 bufferlist bl
= p
->value();
689 auto bp
= bl
.cbegin();
692 e
.decode_with_checksum(bp
);
693 } catch (const buffer::error
&e
) {
694 cerr
<< "Error reading pg log entry: " << e
.what() << std::endl
;
697 cerr
<< "read entry " << e
<< std::endl
;
699 if (e
.version
.version
> trim_to
) {
703 keys_to_trim
.insert(p
->key());
704 new_tail
= e
.version
;
705 if (keys_to_trim
.size() >= trim_at_once
)
711 } // deconstruct ObjectMapIterator
714 if (!dry_run
&& !keys_to_trim
.empty()) {
715 cout
<< "Removing keys " << *keys_to_trim
.begin() << " - " << *keys_to_trim
.rbegin() << std::endl
;
716 ObjectStore::Transaction t
;
717 t
.omap_rmkeys(coll
, oid
, keys_to_trim
);
718 store
->queue_transaction(ch
, std::move(t
));
723 // update pg info with new tail
724 if (!dry_run
&& new_tail
!= eversion_t()) {
725 info
.log_tail
= new_tail
;
726 ObjectStore::Transaction t
;
727 int ret
= write_info(t
, map_epoch
, info
, past_intervals
);
730 store
->queue_transaction(ch
, std::move(t
));
734 // compact the db since we just removed a bunch of data
735 cerr
<< "Finished trimming, now compacting..." << std::endl
;
741 int do_trim_pg_log_dups(ObjectStore
*store
, const coll_t
&coll
,
742 pg_info_t
&info
, const spg_t
&pgid
,
744 PastIntervals
&past_intervals
)
746 ghobject_t oid
= pgid
.make_pgmeta_oid();
748 auto ch
= store
->open_collection(coll
);
749 int r
= store
->stat(ch
, oid
, &st
);
751 ceph_assert(st
.st_size
== 0);
753 const size_t max_dup_entries
= g_ceph_context
->_conf
->osd_pg_log_dups_tracked
;
754 ceph_assert(max_dup_entries
> 0);
755 const size_t max_chunk_size
= g_ceph_context
->_conf
->osd_pg_log_trim_max
;
756 ceph_assert(max_chunk_size
> 0);
758 cout
<< "max_dup_entries=" << max_dup_entries
759 << " max_chunk_size=" << max_chunk_size
<< std::endl
;
761 cout
<< "Dry run enabled, so when many chunks are needed,"
762 << " the trimming will never stop!" << std::endl
;
765 set
<string
> keys_to_keep
;
766 size_t num_removed
= 0;
768 set
<string
> keys_to_trim
;
770 ObjectMap::ObjectMapIterator p
= store
->get_omap_iterator(ch
, oid
);
773 for (p
->seek_to_first(); p
->valid(); p
->next()) {
774 if (p
->key()[0] == '_')
776 if (p
->key() == "can_rollback_to")
778 if (p
->key() == "divergent_priors")
780 if (p
->key() == "rollback_info_trimmed_to")
782 if (p
->key() == "may_include_deletes_in_missing")
784 if (p
->key().substr(0, 7) == string("missing"))
786 if (p
->key().substr(0, 4) != string("dup_"))
788 keys_to_keep
.insert(p
->key());
789 if (keys_to_keep
.size() > max_dup_entries
) {
790 auto oldest_to_keep
= keys_to_keep
.begin();
791 keys_to_trim
.emplace(*oldest_to_keep
);
792 keys_to_keep
.erase(oldest_to_keep
);
794 if (keys_to_trim
.size() >= max_chunk_size
) {
798 } // deconstruct ObjectMapIterator
800 num_removed
= keys_to_trim
.size();
801 if (!dry_run
&& !keys_to_trim
.empty()) {
802 cout
<< "Removing keys " << *keys_to_trim
.begin() << " - " << *keys_to_trim
.rbegin() << std::endl
;
803 ObjectStore::Transaction t
;
804 t
.omap_rmkeys(coll
, oid
, keys_to_trim
);
805 store
->queue_transaction(ch
, std::move(t
));
808 } while (num_removed
== max_chunk_size
);
810 // compact the db since we just removed a bunch of data
811 cerr
<< "Finished trimming, now compacting..." << std::endl
;
817 const int OMAP_BATCH_SIZE
= 25;
818 void get_omap_batch(ObjectMap::ObjectMapIterator
&iter
, map
<string
, bufferlist
> &oset
)
821 for (int count
= OMAP_BATCH_SIZE
; count
&& iter
->valid(); --count
, iter
->next()) {
822 oset
.insert(pair
<string
, bufferlist
>(iter
->key(), iter
->value()));
826 int ObjectStoreTool::export_file(ObjectStore
*store
, coll_t cid
, ghobject_t
&obj
)
832 auto ch
= store
->open_collection(cid
);
833 int ret
= store
->stat(ch
, obj
, &st
);
837 cerr
<< "Read " << obj
<< std::endl
;
841 cerr
<< "size=" << total
<< std::endl
;
843 object_begin
objb(obj
);
848 ret
= store
->getattr(ch
, obj
, OI_ATTR
, bp
);
850 cerr
<< "getattr failure object_info " << ret
<< std::endl
;
856 cerr
<< "object_info: " << objb
.oi
<< std::endl
;
859 // NOTE: we include whiteouts, lost, etc.
861 ret
= write_section(TYPE_OBJECT_BEGIN
, objb
, file_fd
);
866 bufferlist rawdatabl
;
869 mysize_t len
= max_read
;
873 ret
= store
->read(ch
, obj
, offset
, len
, rawdatabl
);
879 data_section
dblock(offset
, len
, rawdatabl
);
881 cerr
<< "data section offset=" << offset
<< " len=" << len
<< std::endl
;
886 ret
= write_section(TYPE_DATA
, dblock
, file_fd
);
890 //Handle attrs for this object
891 map
<string
,bufferptr
,less
<>> aset
;
892 ret
= store
->getattrs(ch
, obj
, aset
);
894 attr_section
as(aset
);
895 ret
= write_section(TYPE_ATTRS
, as
, file_fd
);
900 cerr
<< "attrs size " << aset
.size() << std::endl
;
903 //Handle omap information
905 ret
= store
->omap_get_header(ch
, obj
, &hdrbuf
, true);
907 cerr
<< "omap_get_header: " << cpp_strerror(ret
) << std::endl
;
911 omap_hdr_section
ohs(hdrbuf
);
912 ret
= write_section(TYPE_OMAP_HDR
, ohs
, file_fd
);
916 ObjectMap::ObjectMapIterator iter
= store
->get_omap_iterator(ch
, obj
);
919 cerr
<< "omap_get_iterator: " << cpp_strerror(ret
) << std::endl
;
922 iter
->seek_to_first();
924 map
<string
, bufferlist
> out
;
925 while(iter
->valid()) {
926 get_omap_batch(iter
, out
);
928 if (out
.empty()) break;
930 mapcount
+= out
.size();
931 omap_section
oms(out
);
932 ret
= write_section(TYPE_OMAP
, oms
, file_fd
);
937 cerr
<< "omap map size " << mapcount
<< std::endl
;
939 ret
= write_simple(TYPE_OBJECT_END
, file_fd
);
946 int ObjectStoreTool::export_files(ObjectStore
*store
, coll_t coll
)
949 auto ch
= store
->open_collection(coll
);
950 while (!next
.is_max()) {
951 vector
<ghobject_t
> objects
;
952 int r
= store
->collection_list(ch
, next
, ghobject_t::get_max(), 300,
956 for (vector
<ghobject_t
>::iterator i
= objects
.begin();
959 ceph_assert(!i
->hobj
.is_meta());
960 if (i
->is_pgmeta() || i
->hobj
.is_temp() || !i
->is_no_gen()) {
963 r
= export_file(store
, coll
, *i
);
971 int set_inc_osdmap(ObjectStore
*store
, epoch_t e
, bufferlist
& bl
, bool force
) {
972 OSDMap::Incremental inc
;
973 auto it
= bl
.cbegin();
977 } else if (e
!= inc
.epoch
) {
978 cerr
<< "incremental.epoch mismatch: "
979 << inc
.epoch
<< " != " << e
<< std::endl
;
981 cerr
<< "But will continue anyway." << std::endl
;
986 auto ch
= store
->open_collection(coll_t::meta());
987 const ghobject_t inc_oid
= OSD::get_inc_osdmap_pobject_name(e
);
988 if (!store
->exists(ch
, inc_oid
)) {
989 cerr
<< "inc-osdmap (" << inc_oid
<< ") does not exist." << std::endl
;
993 cout
<< "Creating a new epoch." << std::endl
;
997 ObjectStore::Transaction t
;
998 t
.write(coll_t::meta(), inc_oid
, 0, bl
.length(), bl
);
999 t
.truncate(coll_t::meta(), inc_oid
, bl
.length());
1000 store
->queue_transaction(ch
, std::move(t
));
1004 int get_inc_osdmap(ObjectStore
*store
, epoch_t e
, bufferlist
& bl
)
1006 auto ch
= store
->open_collection(coll_t::meta());
1008 OSD::get_inc_osdmap_pobject_name(e
),
1015 int set_osdmap(ObjectStore
*store
, epoch_t e
, bufferlist
& bl
, bool force
) {
1019 e
= osdmap
.get_epoch();
1020 } else if (e
!= osdmap
.get_epoch()) {
1021 cerr
<< "osdmap.epoch mismatch: "
1022 << e
<< " != " << osdmap
.get_epoch() << std::endl
;
1024 cerr
<< "But will continue anyway." << std::endl
;
1029 auto ch
= store
->open_collection(coll_t::meta());
1030 const ghobject_t full_oid
= OSD::get_osdmap_pobject_name(e
);
1031 if (!store
->exists(ch
, full_oid
)) {
1032 cerr
<< "osdmap (" << full_oid
<< ") does not exist." << std::endl
;
1036 cout
<< "Creating a new epoch." << std::endl
;
1040 ObjectStore::Transaction t
;
1041 t
.write(coll_t::meta(), full_oid
, 0, bl
.length(), bl
);
1042 t
.truncate(coll_t::meta(), full_oid
, bl
.length());
1043 store
->queue_transaction(ch
, std::move(t
));
1047 int get_osdmap(ObjectStore
*store
, epoch_t e
, OSDMap
&osdmap
, bufferlist
& bl
)
1049 ObjectStore::CollectionHandle ch
= store
->open_collection(coll_t::meta());
1050 bool found
= store
->read(
1051 ch
, OSD::get_osdmap_pobject_name(e
), 0, 0, bl
) >= 0;
1053 cerr
<< "Can't find OSDMap for pg epoch " << e
<< std::endl
;
1058 cerr
<< osdmap
<< std::endl
;
1062 int get_pg_num_history(ObjectStore
*store
, pool_pg_num_history_t
*h
)
1064 ObjectStore::CollectionHandle ch
= store
->open_collection(coll_t::meta());
1066 auto pghist
= OSD::make_pg_num_history_oid();
1067 int r
= store
->read(ch
, pghist
, 0, 0, bl
, 0);
1068 if (r
>= 0 && bl
.length() > 0) {
1069 auto p
= bl
.cbegin();
1072 cout
<< __func__
<< " pg_num_history " << *h
<< std::endl
;
1076 int add_osdmap(ObjectStore
*store
, metadata_section
&ms
)
1078 return get_osdmap(store
, ms
.map_epoch
, ms
.osdmap
, ms
.osdmap_bl
);
1081 int ObjectStoreTool::do_export(
1082 CephContext
*cct
, ObjectStore
*fs
, coll_t coll
, spg_t pgid
,
1083 pg_info_t
&info
, epoch_t map_epoch
, __u8 struct_ver
,
1084 const OSDSuperblock
& superblock
,
1085 PastIntervals
&past_intervals
)
1087 PGLog::IndexedLog log
;
1088 pg_missing_t missing
;
1090 cerr
<< "Exporting " << pgid
<< " info " << info
<< std::endl
;
1092 int ret
= get_log(cct
, fs
, struct_ver
, pgid
, info
, log
, missing
);
1097 Formatter
*formatter
= Formatter::create("json-pretty");
1098 ceph_assert(formatter
);
1099 dump_log(formatter
, cerr
, log
, missing
);
1104 pg_begin
pgb(pgid
, superblock
);
1105 // Special case: If replicated pg don't require the importing OSD to have shard feature
1106 if (pgid
.is_no_shard()) {
1107 pgb
.superblock
.compat_features
.incompat
.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS
);
1109 ret
= write_section(TYPE_PG_BEGIN
, pgb
, file_fd
);
1113 // The metadata_section is now before files, so import can detect
1114 // errors and abort without wasting time.
1115 metadata_section
ms(
1122 ret
= add_osdmap(fs
, ms
);
1125 ret
= write_section(TYPE_PG_METADATA
, ms
, file_fd
);
1129 ret
= export_files(fs
, coll
);
1131 cerr
<< "export_files error " << ret
<< std::endl
;
1135 ret
= write_simple(TYPE_PG_END
, file_fd
);
1142 int dump_data(Formatter
*formatter
, bufferlist
&bl
)
1144 auto ebliter
= bl
.cbegin();
1148 formatter
->open_object_section("data_block");
1149 formatter
->dump_unsigned("offset", ds
.offset
);
1150 formatter
->dump_unsigned("len", ds
.len
);
1151 // XXX: Add option to dump data like od -cx ?
1152 formatter
->close_section();
1153 formatter
->flush(cout
);
1157 int get_data(ObjectStore
*store
, coll_t coll
, ghobject_t hoid
,
1158 ObjectStore::Transaction
*t
, bufferlist
&bl
)
1160 auto ebliter
= bl
.cbegin();
1165 cerr
<< "\tdata: offset " << ds
.offset
<< " len " << ds
.len
<< std::endl
;
1166 t
->write(coll
, hoid
, ds
.offset
, ds
.len
, ds
.databl
);
1171 Formatter
*formatter
, ghobject_t hoid
,
1174 auto ebliter
= bl
.cbegin();
1178 // This could have been handled in the caller if we didn't need to
1179 // support exports that didn't include object_info_t in object_begin.
1180 if (hoid
.generation
== ghobject_t::NO_GEN
&&
1181 hoid
.hobj
.is_head()) {
1182 map
<string
,bufferlist
>::iterator mi
= as
.data
.find(SS_ATTR
);
1183 if (mi
!= as
.data
.end()) {
1185 auto p
= mi
->second
.cbegin();
1187 formatter
->open_object_section("snapset");
1188 snapset
.dump(formatter
);
1189 formatter
->close_section();
1191 formatter
->open_object_section("snapset");
1192 formatter
->dump_string("error", "missing SS_ATTR");
1193 formatter
->close_section();
1197 formatter
->open_object_section("attrs");
1198 formatter
->open_array_section("user");
1199 for (auto kv
: as
.data
) {
1200 // Skip system attributes
1201 if (('_' != kv
.first
.at(0)) || kv
.first
.size() == 1)
1203 formatter
->open_object_section("user_attr");
1204 formatter
->dump_string("name", kv
.first
.substr(1));
1206 formatter
->dump_string("value", cleanbin(kv
.second
, b64
));
1207 formatter
->dump_bool("Base64", b64
);
1208 formatter
->close_section();
1210 formatter
->close_section();
1211 formatter
->open_array_section("system");
1212 for (auto kv
: as
.data
) {
1213 // Skip user attributes
1214 if (('_' == kv
.first
.at(0)) && kv
.first
.size() != 1)
1216 formatter
->open_object_section("sys_attr");
1217 formatter
->dump_string("name", kv
.first
);
1218 formatter
->close_section();
1220 formatter
->close_section();
1221 formatter
->close_section();
1222 formatter
->flush(cout
);
1228 ObjectStore
*store
, coll_t coll
, ghobject_t hoid
,
1229 ObjectStore::Transaction
*t
, bufferlist
&bl
,
1230 OSDriver
&driver
, SnapMapper
&snap_mapper
)
1232 auto ebliter
= bl
.cbegin();
1236 auto ch
= store
->open_collection(coll
);
1238 cerr
<< "\tattrs: len " << as
.data
.size() << std::endl
;
1239 t
->setattrs(coll
, hoid
, as
.data
);
1241 // This could have been handled in the caller if we didn't need to
1242 // support exports that didn't include object_info_t in object_begin.
1243 if (hoid
.generation
== ghobject_t::NO_GEN
&&
1244 hoid
.hobj
.is_head()) {
1245 map
<string
,bufferlist
>::iterator mi
= as
.data
.find(SS_ATTR
);
1246 if (mi
!= as
.data
.end()) {
1248 auto p
= mi
->second
.cbegin();
1250 cout
<< "snapset " << snapset
<< std::endl
;
1251 for (auto& p
: snapset
.clone_snaps
) {
1252 ghobject_t clone
= hoid
;
1253 clone
.hobj
.snap
= p
.first
;
1254 set
<snapid_t
> snaps(p
.second
.begin(), p
.second
.end());
1255 if (!store
->exists(ch
, clone
)) {
1256 // no clone, skip. this is probably a cache pool. this works
1257 // because we use a separate transaction per object and clones
1258 // come before head in the archive.
1260 cerr
<< "\tskipping missing " << clone
<< " (snaps "
1261 << snaps
<< ")" << std::endl
;
1265 cerr
<< "\tsetting " << clone
.hobj
<< " snaps " << snaps
1267 OSDriver::OSTransaction
_t(driver
.get_transaction(t
));
1268 ceph_assert(!snaps
.empty());
1269 snap_mapper
.add_oid(clone
.hobj
, snaps
, &_t
);
1272 cerr
<< "missing SS_ATTR on " << hoid
<< std::endl
;
1278 int dump_omap_hdr(Formatter
*formatter
, bufferlist
&bl
)
1280 auto ebliter
= bl
.cbegin();
1281 omap_hdr_section oh
;
1284 formatter
->open_object_section("omap_header");
1285 formatter
->dump_string("value", string(oh
.hdr
.c_str(), oh
.hdr
.length()));
1286 formatter
->close_section();
1287 formatter
->flush(cout
);
1291 int get_omap_hdr(ObjectStore
*store
, coll_t coll
, ghobject_t hoid
,
1292 ObjectStore::Transaction
*t
, bufferlist
&bl
)
1294 auto ebliter
= bl
.cbegin();
1295 omap_hdr_section oh
;
1299 cerr
<< "\tomap header: " << string(oh
.hdr
.c_str(), oh
.hdr
.length())
1301 t
->omap_setheader(coll
, hoid
, oh
.hdr
);
1305 int dump_omap(Formatter
*formatter
, bufferlist
&bl
)
1307 auto ebliter
= bl
.cbegin();
1311 formatter
->open_object_section("omaps");
1312 formatter
->dump_unsigned("count", os
.omap
.size());
1313 formatter
->open_array_section("data");
1314 for (auto o
: os
.omap
) {
1315 formatter
->open_object_section("omap");
1316 formatter
->dump_string("name", o
.first
);
1318 formatter
->dump_string("value", cleanbin(o
.second
, b64
));
1319 formatter
->dump_bool("Base64", b64
);
1320 formatter
->close_section();
1322 formatter
->close_section();
1323 formatter
->close_section();
1324 formatter
->flush(cout
);
1328 int get_omap(ObjectStore
*store
, coll_t coll
, ghobject_t hoid
,
1329 ObjectStore::Transaction
*t
, bufferlist
&bl
)
1331 auto ebliter
= bl
.cbegin();
1336 cerr
<< "\tomap: size " << os
.omap
.size() << std::endl
;
1337 t
->omap_setkeys(coll
, hoid
, os
.omap
);
1341 int ObjectStoreTool::dump_object(Formatter
*formatter
,
1344 auto ebliter
= bl
.cbegin();
1348 if (ob
.hoid
.hobj
.is_temp()) {
1349 cerr
<< "ERROR: Export contains temporary object '" << ob
.hoid
<< "'" << std::endl
;
1353 formatter
->open_object_section("object");
1354 formatter
->open_object_section("oid");
1355 ob
.hoid
.dump(formatter
);
1356 formatter
->close_section();
1357 formatter
->open_object_section("object_info");
1358 ob
.oi
.dump(formatter
);
1359 formatter
->close_section();
1365 int ret
= read_section(&type
, &ebl
);
1369 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1370 //cout << "\t\tsection size " << ebl.length() << std::endl;
1371 if (type
>= END_OF_TYPES
) {
1372 cout
<< "Skipping unknown object section type" << std::endl
;
1378 ret
= dump_data(formatter
, ebl
);
1379 if (ret
) return ret
;
1383 ret
= dump_attrs(formatter
, ob
.hoid
, ebl
);
1384 if (ret
) return ret
;
1388 ret
= dump_omap_hdr(formatter
, ebl
);
1389 if (ret
) return ret
;
1393 ret
= dump_omap(formatter
, ebl
);
1394 if (ret
) return ret
;
1396 case TYPE_OBJECT_END
:
1400 cerr
<< "Unknown section type " << type
<< std::endl
;
1404 formatter
->close_section();
1408 int ObjectStoreTool::get_object(ObjectStore
*store
,
1412 bufferlist
&bl
, OSDMap
&origmap
,
1413 bool *skipped_objects
)
1415 ObjectStore::Transaction tran
;
1416 ObjectStore::Transaction
*t
= &tran
;
1417 auto ebliter
= bl
.cbegin();
1421 if (ob
.hoid
.hobj
.is_temp()) {
1422 cerr
<< "ERROR: Export contains temporary object '" << ob
.hoid
<< "'" << std::endl
;
1425 ceph_assert(g_ceph_context
);
1427 auto ch
= store
->open_collection(coll
);
1428 if (ob
.hoid
.hobj
.nspace
!= g_ceph_context
->_conf
->osd_hit_set_namespace
) {
1429 object_t oid
= ob
.hoid
.hobj
.oid
;
1430 object_locator_t
loc(ob
.hoid
.hobj
);
1431 pg_t raw_pgid
= origmap
.object_locator_to_pg(oid
, loc
);
1432 pg_t pgid
= origmap
.raw_pg_to_pg(raw_pgid
);
1435 if (coll
.is_pg(&coll_pgid
) == false) {
1436 cerr
<< "INTERNAL ERROR: Bad collection during import" << std::endl
;
1439 if (coll_pgid
.shard
!= ob
.hoid
.shard_id
) {
1440 cerr
<< "INTERNAL ERROR: Importing shard " << coll_pgid
.shard
1441 << " but object shard is " << ob
.hoid
.shard_id
<< std::endl
;
1445 if (coll_pgid
.pgid
!= pgid
) {
1446 cerr
<< "Skipping object '" << ob
.hoid
<< "' which belongs in pg " << pgid
<< std::endl
;
1447 *skipped_objects
= true;
1454 t
->touch(coll
, ob
.hoid
);
1456 cout
<< "Write " << ob
.hoid
<< std::endl
;
1462 int ret
= read_section(&type
, &ebl
);
1466 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1467 //cout << "\t\tsection size " << ebl.length() << std::endl;
1468 if (type
>= END_OF_TYPES
) {
1469 cout
<< "Skipping unknown object section type" << std::endl
;
1475 ret
= get_data(store
, coll
, ob
.hoid
, t
, ebl
);
1476 if (ret
) return ret
;
1480 ret
= get_attrs(store
, coll
, ob
.hoid
, t
, ebl
, driver
, mapper
);
1481 if (ret
) return ret
;
1485 ret
= get_omap_hdr(store
, coll
, ob
.hoid
, t
, ebl
);
1486 if (ret
) return ret
;
1490 ret
= get_omap(store
, coll
, ob
.hoid
, t
, ebl
);
1491 if (ret
) return ret
;
1493 case TYPE_OBJECT_END
:
1497 cerr
<< "Unknown section type " << type
<< std::endl
;
1502 wait_until_done(t
, [&] {
1503 store
->queue_transaction(ch
, std::move(*t
));
1510 int dump_pg_metadata(Formatter
*formatter
, bufferlist
&bl
, metadata_section
&ms
)
1512 auto ebliter
= bl
.cbegin();
1515 formatter
->open_object_section("metadata_section");
1517 formatter
->dump_unsigned("pg_disk_version", (int)ms
.struct_ver
);
1518 formatter
->dump_unsigned("map_epoch", ms
.map_epoch
);
1520 formatter
->open_object_section("OSDMap");
1521 ms
.osdmap
.dump(formatter
);
1522 formatter
->close_section();
1523 formatter
->flush(cout
);
1526 formatter
->open_object_section("info");
1527 ms
.info
.dump(formatter
);
1528 formatter
->close_section();
1529 formatter
->flush(cout
);
1531 formatter
->open_object_section("log");
1532 ms
.log
.dump(formatter
);
1533 formatter
->close_section();
1534 formatter
->flush(cout
);
1536 formatter
->open_object_section("pg_missing_t");
1537 ms
.missing
.dump(formatter
);
1538 formatter
->close_section();
1540 // XXX: ms.past_intervals?
1542 formatter
->close_section();
1543 formatter
->flush(cout
);
1545 if (ms
.osdmap
.get_epoch() != 0 && ms
.map_epoch
!= ms
.osdmap
.get_epoch()) {
1546 cerr
<< "FATAL: Invalid OSDMap epoch in export data" << std::endl
;
1553 int get_pg_metadata(ObjectStore
*store
, bufferlist
&bl
, metadata_section
&ms
,
1554 const OSDSuperblock
& sb
, spg_t pgid
)
1556 auto ebliter
= bl
.cbegin();
1558 spg_t old_pgid
= ms
.info
.pgid
;
1559 ms
.info
.pgid
= pgid
;
1562 cout
<< "export pgid " << old_pgid
<< std::endl
;
1563 cout
<< "struct_v " << (int)ms
.struct_ver
<< std::endl
;
1564 cout
<< "map epoch " << ms
.map_epoch
<< std::endl
;
1567 Formatter
*formatter
= new JSONFormatter(true);
1568 formatter
->open_object_section("stuff");
1570 formatter
->open_object_section("importing OSDMap");
1571 ms
.osdmap
.dump(formatter
);
1572 formatter
->close_section();
1573 formatter
->flush(cout
);
1576 cout
<< "osd current epoch " << sb
.current_epoch
<< std::endl
;
1578 formatter
->open_object_section("info");
1579 ms
.info
.dump(formatter
);
1580 formatter
->close_section();
1581 formatter
->flush(cout
);
1584 formatter
->open_object_section("log");
1585 ms
.log
.dump(formatter
);
1586 formatter
->close_section();
1587 formatter
->flush(cout
);
1590 formatter
->close_section();
1591 formatter
->flush(cout
);
1596 if (ms
.osdmap
.get_epoch() != 0 && ms
.map_epoch
!= ms
.osdmap
.get_epoch()) {
1597 cerr
<< "FATAL: Invalid OSDMap epoch in export data" << std::endl
;
1601 if (ms
.map_epoch
> sb
.current_epoch
) {
1602 cerr
<< "ERROR: Export PG's map_epoch " << ms
.map_epoch
<< " > OSD's epoch " << sb
.current_epoch
<< std::endl
;
1603 cerr
<< "The OSD you are using is older than the exported PG" << std::endl
;
1604 cerr
<< "Either use another OSD or join selected OSD to cluster to update it first" << std::endl
;
1608 // Old exports didn't include OSDMap
1609 if (ms
.osdmap
.get_epoch() == 0) {
1610 cerr
<< "WARNING: No OSDMap in old export, this is an ancient export."
1611 " Not supported." << std::endl
;
1615 if (ms
.osdmap
.get_epoch() < sb
.oldest_map
) {
1616 cerr
<< "PG export's map " << ms
.osdmap
.get_epoch()
1617 << " is older than OSD's oldest_map " << sb
.oldest_map
<< std::endl
;
1619 cerr
<< " pass --force to proceed anyway (with incomplete PastIntervals)"
1625 cerr
<< "Import pgid " << ms
.info
.pgid
<< std::endl
;
1626 cerr
<< "Previous past_intervals " << ms
.past_intervals
<< std::endl
;
1627 cerr
<< "history.same_interval_since "
1628 << ms
.info
.history
.same_interval_since
<< std::endl
;
1634 // out: pg_log_t that only has entries that apply to import_pgid using curmap
1635 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1636 void filter_divergent_priors(spg_t import_pgid
, const OSDMap
&curmap
,
1637 const string
&hit_set_namespace
, const divergent_priors_t
&in
,
1638 divergent_priors_t
&out
, divergent_priors_t
&reject
)
1643 for (divergent_priors_t::const_iterator i
= in
.begin();
1644 i
!= in
.end(); ++i
) {
1646 // Reject divergent priors for temporary objects
1647 if (i
->second
.is_temp()) {
1652 if (i
->second
.nspace
!= hit_set_namespace
) {
1653 object_t oid
= i
->second
.oid
;
1654 object_locator_t
loc(i
->second
);
1655 pg_t raw_pgid
= curmap
.object_locator_to_pg(oid
, loc
);
1656 pg_t pgid
= curmap
.raw_pg_to_pg(raw_pgid
);
1658 if (import_pgid
.pgid
== pgid
) {
1669 int ObjectStoreTool::dump_export(Formatter
*formatter
)
1673 PGLog::IndexedLog log
;
1674 //bool skipped_objects = false;
1676 int ret
= read_super();
1680 if (sh
.magic
!= super_header::super_magic
) {
1681 cerr
<< "Invalid magic number" << std::endl
;
1685 if (sh
.version
> super_header::super_ver
) {
1686 cerr
<< "Can't handle export format version=" << sh
.version
<< std::endl
;
1690 formatter
->open_object_section("Export");
1692 //First section must be TYPE_PG_BEGIN
1694 ret
= read_section(&type
, &ebl
);
1697 if (type
== TYPE_POOL_BEGIN
) {
1698 cerr
<< "Dump of pool exports not supported" << std::endl
;
1700 } else if (type
!= TYPE_PG_BEGIN
) {
1701 cerr
<< "Invalid first section type " << std::to_string(type
) << std::endl
;
1705 auto ebliter
= ebl
.cbegin();
1707 pgb
.decode(ebliter
);
1708 spg_t pgid
= pgb
.pgid
;
1710 formatter
->dump_string("pgid", stringify(pgid
));
1711 formatter
->dump_string("cluster_fsid", stringify(pgb
.superblock
.cluster_fsid
));
1712 formatter
->dump_string("features", stringify(pgb
.superblock
.compat_features
));
1715 bool found_metadata
= false;
1716 metadata_section ms
;
1717 bool objects_started
= false;
1719 ret
= read_section(&type
, &ebl
);
1724 cerr
<< "dump_export: Section type " << std::to_string(type
) << std::endl
;
1726 if (type
>= END_OF_TYPES
) {
1727 cerr
<< "Skipping unknown section type" << std::endl
;
1731 case TYPE_OBJECT_BEGIN
:
1732 if (!objects_started
) {
1733 formatter
->open_array_section("objects");
1734 objects_started
= true;
1736 ret
= dump_object(formatter
, ebl
);
1737 if (ret
) return ret
;
1739 case TYPE_PG_METADATA
:
1740 if (objects_started
)
1741 cerr
<< "WARNING: metadata_section out of order" << std::endl
;
1742 ret
= dump_pg_metadata(formatter
, ebl
, ms
);
1743 if (ret
) return ret
;
1744 found_metadata
= true;
1747 if (objects_started
) {
1748 formatter
->close_section();
1753 cerr
<< "Unknown section type " << std::to_string(type
) << std::endl
;
1758 if (!found_metadata
) {
1759 cerr
<< "Missing metadata section" << std::endl
;
1763 formatter
->close_section();
1764 formatter
->flush(cout
);
1769 int ObjectStoreTool::do_import(ObjectStore
*store
, OSDSuperblock
& sb
,
1770 bool force
, std::string pgidstr
)
1774 PGLog::IndexedLog log
;
1775 bool skipped_objects
= false;
1778 finish_remove_pgs(store
);
1780 int ret
= read_super();
1784 if (sh
.magic
!= super_header::super_magic
) {
1785 cerr
<< "Invalid magic number" << std::endl
;
1789 if (sh
.version
> super_header::super_ver
) {
1790 cerr
<< "Can't handle export format version=" << sh
.version
<< std::endl
;
1794 //First section must be TYPE_PG_BEGIN
1796 ret
= read_section(&type
, &ebl
);
1799 if (type
== TYPE_POOL_BEGIN
) {
1800 cerr
<< "Pool exports cannot be imported into a PG" << std::endl
;
1802 } else if (type
!= TYPE_PG_BEGIN
) {
1803 cerr
<< "Invalid first section type " << std::to_string(type
) << std::endl
;
1807 auto ebliter
= ebl
.cbegin();
1809 pgb
.decode(ebliter
);
1810 spg_t pgid
= pgb
.pgid
;
1812 if (pgidstr
.length()) {
1815 bool ok
= user_pgid
.parse(pgidstr
.c_str());
1816 // This succeeded in main() already
1818 if (pgid
!= user_pgid
) {
1819 cerr
<< "specified pgid " << user_pgid
1820 << " does not match actual pgid " << pgid
<< std::endl
;
1825 if (!pgb
.superblock
.cluster_fsid
.is_zero()
1826 && pgb
.superblock
.cluster_fsid
!= sb
.cluster_fsid
) {
1827 cerr
<< "Export came from different cluster with fsid "
1828 << pgb
.superblock
.cluster_fsid
<< std::endl
;
1833 cerr
<< "Exported features: " << pgb
.superblock
.compat_features
<< std::endl
;
1836 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
1837 if (pgid
.is_no_shard())
1838 pgb
.superblock
.compat_features
.incompat
.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS
);
1840 if (sb
.compat_features
.compare(pgb
.superblock
.compat_features
) == -1) {
1841 CompatSet unsupported
= sb
.compat_features
.unsupported(pgb
.superblock
.compat_features
);
1843 cerr
<< "Export has incompatible features set " << unsupported
<< std::endl
;
1845 // Let them import if they specify the --force option
1847 return 11; // Positive return means exit status
1850 // we need the latest OSDMap to check for collisions
1853 ret
= get_osdmap(store
, sb
.current_epoch
, curmap
, bl
);
1855 cerr
<< "Can't find latest local OSDMap " << sb
.current_epoch
<< std::endl
;
1858 if (!curmap
.have_pg_pool(pgid
.pgid
.m_pool
)) {
1859 cerr
<< "Pool " << pgid
.pgid
.m_pool
<< " no longer exists" << std::endl
;
1860 // Special exit code for this error, used by test code
1861 return 10; // Positive return means exit status
1864 pool_pg_num_history_t pg_num_history
;
1865 get_pg_num_history(store
, &pg_num_history
);
1867 ghobject_t pgmeta_oid
= pgid
.make_pgmeta_oid();
1869 // Check for PG already present.
1871 if (store
->collection_exists(coll
)) {
1872 cerr
<< "pgid " << pgid
<< " already exists" << std::endl
;
1876 ObjectStore::CollectionHandle ch
;
1881 OSD::make_snapmapper_oid());
1882 SnapMapper
mapper(g_ceph_context
, &driver
, 0, 0, 0, pgid
.shard
);
1884 cout
<< "Importing pgid " << pgid
;
1888 bool found_metadata
= false;
1889 metadata_section ms
;
1891 ret
= read_section(&type
, &ebl
);
1896 cout
<< __func__
<< ": Section type " << std::to_string(type
) << std::endl
;
1898 if (type
>= END_OF_TYPES
) {
1899 cout
<< "Skipping unknown section type" << std::endl
;
1903 case TYPE_OBJECT_BEGIN
:
1904 ceph_assert(found_metadata
);
1905 ret
= get_object(store
, driver
, mapper
, coll
, ebl
, ms
.osdmap
,
1907 if (ret
) return ret
;
1909 case TYPE_PG_METADATA
:
1910 ret
= get_pg_metadata(store
, ebl
, ms
, sb
, pgid
);
1911 if (ret
) return ret
;
1912 found_metadata
= true;
1914 if (pgid
!= ms
.info
.pgid
) {
1915 cerr
<< "specified pgid " << pgid
<< " does not match import file pgid "
1916 << ms
.info
.pgid
<< std::endl
;
1920 // make sure there are no conflicting splits or merges
1921 if (ms
.osdmap
.have_pg_pool(pgid
.pgid
.pool())) {
1922 auto p
= pg_num_history
.pg_nums
.find(pgid
.pgid
.m_pool
);
1923 if (p
!= pg_num_history
.pg_nums
.end() &&
1924 !p
->second
.empty()) {
1925 unsigned start_pg_num
= ms
.osdmap
.get_pg_num(pgid
.pgid
.pool());
1926 unsigned pg_num
= start_pg_num
;
1927 for (auto q
= p
->second
.lower_bound(ms
.map_epoch
);
1928 q
!= p
->second
.end();
1930 unsigned new_pg_num
= q
->second
;
1931 cout
<< "pool " << pgid
.pgid
.pool() << " pg_num " << pg_num
1932 << " -> " << new_pg_num
<< std::endl
;
1934 // check for merge target
1936 if (pgid
.is_merge_source(pg_num
, new_pg_num
, &target
)) {
1937 // FIXME: this checks assumes the OSD's PG is at the OSD's
1938 // map epoch; it could be, say, at *our* epoch, pre-merge.
1939 coll_t
coll(target
);
1940 if (store
->collection_exists(coll
)) {
1941 cerr
<< "pgid " << pgid
<< " merges to target " << target
1942 << " which already exists" << std::endl
;
1947 // check for split children
1948 set
<spg_t
> children
;
1949 if (pgid
.is_split(start_pg_num
, new_pg_num
, &children
)) {
1950 cerr
<< " children are " << children
<< std::endl
;
1951 for (auto child
: children
) {
1953 if (store
->collection_exists(coll
)) {
1954 cerr
<< "pgid " << pgid
<< " splits to " << children
1955 << " and " << child
<< " exists" << std::endl
;
1960 pg_num
= new_pg_num
;
1964 cout
<< "pool " << pgid
.pgid
.pool() << " doesn't existing, not checking"
1965 << " for splits or mergers" << std::endl
;
1969 ObjectStore::Transaction t
;
1970 ch
= store
->create_new_collection(coll
);
1971 create_pg_collection(
1973 pgid
.get_split_bits(ms
.osdmap
.get_pg_pool(pgid
.pool())->get_pg_num()));
1974 init_pg_ondisk(t
, pgid
, NULL
);
1976 // mark this coll for removal until we're done
1977 map
<string
,bufferlist
> values
;
1978 encode((char)1, values
["_remove"]);
1979 t
.omap_setkeys(coll
, pgid
.make_pgmeta_oid(), values
);
1981 store
->queue_transaction(ch
, std::move(t
));
1986 ceph_assert(found_metadata
);
1990 cerr
<< "Unknown section type " << std::to_string(type
) << std::endl
;
1995 if (!found_metadata
) {
1996 cerr
<< "Missing metadata section" << std::endl
;
2000 ObjectStore::Transaction t
;
2002 pg_log_t newlog
, reject
;
2003 pg_log_t::filter_log(pgid
, ms
.osdmap
, g_ceph_context
->_conf
->osd_hit_set_namespace
,
2004 ms
.log
, newlog
, reject
);
2006 for (list
<pg_log_entry_t
>::iterator i
= newlog
.log
.begin();
2007 i
!= newlog
.log
.end(); ++i
)
2008 cerr
<< "Keeping log entry " << *i
<< std::endl
;
2009 for (list
<pg_log_entry_t
>::iterator i
= reject
.log
.begin();
2010 i
!= reject
.log
.end(); ++i
)
2011 cerr
<< "Skipping log entry " << *i
<< std::endl
;
2014 divergent_priors_t newdp
, rejectdp
;
2015 filter_divergent_priors(pgid
, ms
.osdmap
, g_ceph_context
->_conf
->osd_hit_set_namespace
,
2016 ms
.divergent_priors
, newdp
, rejectdp
);
2017 ms
.divergent_priors
= newdp
;
2019 for (divergent_priors_t::iterator i
= newdp
.begin();
2020 i
!= newdp
.end(); ++i
)
2021 cerr
<< "Keeping divergent_prior " << *i
<< std::endl
;
2022 for (divergent_priors_t::iterator i
= rejectdp
.begin();
2023 i
!= rejectdp
.end(); ++i
)
2024 cerr
<< "Skipping divergent_prior " << *i
<< std::endl
;
2027 ms
.missing
.filter_objects([&](const hobject_t
&obj
) {
2028 if (obj
.nspace
== g_ceph_context
->_conf
->osd_hit_set_namespace
)
2030 ceph_assert(!obj
.is_temp());
2031 object_t oid
= obj
.oid
;
2032 object_locator_t
loc(obj
);
2033 pg_t raw_pgid
= ms
.osdmap
.object_locator_to_pg(oid
, loc
);
2034 pg_t _pgid
= ms
.osdmap
.raw_pg_to_pg(raw_pgid
);
2036 return pgid
.pgid
!= _pgid
;
2041 pg_missing_t missing
;
2042 Formatter
*formatter
= Formatter::create("json-pretty");
2043 dump_log(formatter
, cerr
, newlog
, ms
.missing
);
2047 // Just like a split invalidate stats since the object count is changed
2048 if (skipped_objects
)
2049 ms
.info
.stats
.stats_invalid
= true;
2057 ms
.divergent_priors
,
2059 if (ret
) return ret
;
2062 // done, clear removal flag
2064 cerr
<< "done, clearing removal flag" << std::endl
;
2067 t
.omap_rmkey(coll
, pgid
.make_pgmeta_oid(), "_remove");
2068 wait_until_done(&t
, [&] {
2069 store
->queue_transaction(ch
, std::move(t
));
2070 // make sure we flush onreadable items before mapper/driver are destroyed.
2077 int do_list(ObjectStore
*store
, string pgidstr
, string object
, boost::optional
<std::string
> nspace
,
2078 Formatter
*formatter
, bool debug
, bool human_readable
, bool head
)
2081 lookup_ghobject
lookup(object
, nspace
, head
);
2082 if (pgidstr
.length() > 0) {
2083 r
= action_on_all_objects_in_pg(store
, pgidstr
, lookup
, debug
);
2085 r
= action_on_all_objects(store
, lookup
, debug
);
2089 lookup
.dump(formatter
, human_readable
);
2090 formatter
->flush(cout
);
2094 int do_list_slow(ObjectStore
*store
, string pgidstr
, string object
,
2095 double threshold
, Formatter
*formatter
, bool debug
, bool human_readable
)
2098 lookup_slow_ghobject
lookup(object
, threshold
);
2099 if (pgidstr
.length() > 0) {
2100 r
= action_on_all_objects_in_pg(store
, pgidstr
, lookup
, debug
);
2102 r
= action_on_all_objects(store
, lookup
, debug
);
2106 lookup
.dump(formatter
, human_readable
);
2107 formatter
->flush(cout
);
2111 int do_meta(ObjectStore
*store
, string object
, Formatter
*formatter
, bool debug
, bool human_readable
)
2114 boost::optional
<std::string
> nspace
; // Not specified
2115 lookup_ghobject
lookup(object
, nspace
);
2116 r
= action_on_all_objects_in_exact_pg(store
, coll_t::meta(), lookup
, debug
);
2119 lookup
.dump(formatter
, human_readable
);
2120 formatter
->flush(cout
);
2130 int remove_object(coll_t coll
, ghobject_t
&ghobj
,
2132 MapCacher::Transaction
<std::string
, bufferlist
> *_t
,
2133 ObjectStore::Transaction
*t
,
2136 if (type
== BOTH
|| type
== SNAPMAP
) {
2137 int r
= mapper
.remove_oid(ghobj
.hobj
, _t
);
2138 if (r
< 0 && r
!= -ENOENT
) {
2139 cerr
<< "remove_oid returned " << cpp_strerror(r
) << std::endl
;
2144 if (type
== BOTH
|| type
== NOSNAPMAP
) {
2145 t
->remove(coll
, ghobj
);
2150 int get_snapset(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, SnapSet
&ss
, bool silent
);
2152 int do_remove_object(ObjectStore
*store
, coll_t coll
,
2153 ghobject_t
&ghobj
, bool all
, bool force
, enum rmtype type
)
2155 auto ch
= store
->open_collection(coll
);
2157 coll
.is_pg_prefix(&pg
);
2161 OSD::make_snapmapper_oid());
2162 SnapMapper
mapper(g_ceph_context
, &driver
, 0, 0, 0, pg
.shard
);
2165 int r
= store
->stat(ch
, ghobj
, &st
);
2167 cerr
<< "remove: " << cpp_strerror(r
) << std::endl
;
2172 if (ghobj
.hobj
.has_snapset()) {
2173 r
= get_snapset(store
, coll
, ghobj
, ss
, false);
2175 cerr
<< "Can't get snapset error " << cpp_strerror(r
) << std::endl
;
2176 // If --force and bad snapset let them remove the head
2177 if (!(force
&& !all
))
2180 // cout << "snapset " << ss << std::endl;
2181 if (!ss
.clone_snaps
.empty() && !all
) {
2183 cout
<< "WARNING: only removing "
2184 << (ghobj
.hobj
.is_head() ? "head" : "snapdir")
2185 << " with clones present" << std::endl
;
2186 ss
.clone_snaps
.clear();
2188 cerr
<< "Clones are present, use removeall to delete everything"
2195 ObjectStore::Transaction t
;
2196 OSDriver::OSTransaction
_t(driver
.get_transaction(&t
));
2198 ghobject_t snapobj
= ghobj
;
2199 for (auto& p
: ss
.clone_snaps
) {
2200 snapobj
.hobj
.snap
= p
.first
;
2201 cout
<< "remove clone " << snapobj
<< std::endl
;
2203 r
= remove_object(coll
, snapobj
, mapper
, &_t
, &t
, type
);
2209 cout
<< "remove " << ghobj
<< std::endl
;
2212 r
= remove_object(coll
, ghobj
, mapper
, &_t
, &t
, type
);
2218 wait_until_done(&t
, [&] {
2219 store
->queue_transaction(ch
, std::move(t
));
2226 int do_list_attrs(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
)
2228 auto ch
= store
->open_collection(coll
);
2229 map
<string
,bufferptr
,less
<>> aset
;
2230 int r
= store
->getattrs(ch
, ghobj
, aset
);
2232 cerr
<< "getattrs: " << cpp_strerror(r
) << std::endl
;
2236 for (map
<string
,bufferptr
>::iterator i
= aset
.begin();i
!= aset
.end(); ++i
) {
2237 string
key(i
->first
);
2239 key
= cleanbin(key
);
2240 cout
<< key
<< std::endl
;
2245 int do_list_omap(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
)
2247 auto ch
= store
->open_collection(coll
);
2248 ObjectMap::ObjectMapIterator iter
= store
->get_omap_iterator(ch
, ghobj
);
2250 cerr
<< "omap_get_iterator: " << cpp_strerror(ENOENT
) << std::endl
;
2253 iter
->seek_to_first();
2254 map
<string
, bufferlist
> oset
;
2255 while(iter
->valid()) {
2256 get_omap_batch(iter
, oset
);
2258 for (map
<string
,bufferlist
>::iterator i
= oset
.begin();i
!= oset
.end(); ++i
) {
2259 string
key(i
->first
);
2261 key
= cleanbin(key
);
2262 cout
<< key
<< std::endl
;
2268 int do_get_bytes(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, int fd
)
2270 auto ch
= store
->open_collection(coll
);
2274 int ret
= store
->stat(ch
, ghobj
, &st
);
2276 cerr
<< "get-bytes: " << cpp_strerror(ret
) << std::endl
;
2282 cerr
<< "size=" << total
<< std::endl
;
2284 uint64_t offset
= 0;
2285 bufferlist rawdatabl
;
2288 mysize_t len
= max_read
;
2292 ret
= store
->read(ch
, ghobj
, offset
, len
, rawdatabl
);
2299 cerr
<< "data section offset=" << offset
<< " len=" << len
<< std::endl
;
2304 ret
= write(fd
, rawdatabl
.c_str(), ret
);
2314 int do_set_bytes(ObjectStore
*store
, coll_t coll
,
2315 ghobject_t
&ghobj
, int fd
)
2317 ObjectStore::Transaction tran
;
2318 ObjectStore::Transaction
*t
= &tran
;
2321 cerr
<< "Write " << ghobj
<< std::endl
;
2324 t
->touch(coll
, ghobj
);
2325 t
->truncate(coll
, ghobj
, 0);
2328 uint64_t offset
= 0;
2329 bufferlist rawdatabl
;
2332 ssize_t bytes
= rawdatabl
.read_fd(fd
, max_read
);
2334 cerr
<< "read_fd error " << cpp_strerror(bytes
) << std::endl
;
2342 cerr
<< "\tdata: offset " << offset
<< " bytes " << bytes
<< std::endl
;
2344 t
->write(coll
, ghobj
, offset
, bytes
, rawdatabl
);
2347 // XXX: Should we queue_transaction() every once in a while for very large files
2350 auto ch
= store
->open_collection(coll
);
2352 store
->queue_transaction(ch
, std::move(*t
));
2356 int do_get_attr(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, string key
)
2358 auto ch
= store
->open_collection(coll
);
2361 int r
= store
->getattr(ch
, ghobj
, key
.c_str(), bp
);
2363 cerr
<< "getattr: " << cpp_strerror(r
) << std::endl
;
2367 string
value(bp
.c_str(), bp
.length());
2369 value
= cleanbin(value
);
2370 value
.push_back('\n');
2377 int do_set_attr(ObjectStore
*store
, coll_t coll
,
2378 ghobject_t
&ghobj
, string key
, int fd
)
2380 ObjectStore::Transaction tran
;
2381 ObjectStore::Transaction
*t
= &tran
;
2385 cerr
<< "Setattr " << ghobj
<< std::endl
;
2387 int ret
= get_fd_data(fd
, bl
);
2394 t
->touch(coll
, ghobj
);
2396 t
->setattr(coll
, ghobj
, key
, bl
);
2398 auto ch
= store
->open_collection(coll
);
2399 store
->queue_transaction(ch
, std::move(*t
));
2403 int do_rm_attr(ObjectStore
*store
, coll_t coll
,
2404 ghobject_t
&ghobj
, string key
)
2406 ObjectStore::Transaction tran
;
2407 ObjectStore::Transaction
*t
= &tran
;
2410 cerr
<< "Rmattr " << ghobj
<< std::endl
;
2415 t
->rmattr(coll
, ghobj
, key
);
2417 auto ch
= store
->open_collection(coll
);
2418 store
->queue_transaction(ch
, std::move(*t
));
2422 int do_get_omap(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, string key
)
2424 auto ch
= store
->open_collection(coll
);
2426 map
<string
, bufferlist
> out
;
2430 int r
= store
->omap_get_values(ch
, ghobj
, keys
, &out
);
2432 cerr
<< "omap_get_values: " << cpp_strerror(r
) << std::endl
;
2437 cerr
<< "Key not found" << std::endl
;
2441 ceph_assert(out
.size() == 1);
2443 bufferlist bl
= out
.begin()->second
;
2444 string
value(bl
.c_str(), bl
.length());
2446 value
= cleanbin(value
);
2447 value
.push_back('\n');
2454 int do_set_omap(ObjectStore
*store
, coll_t coll
,
2455 ghobject_t
&ghobj
, string key
, int fd
)
2457 ObjectStore::Transaction tran
;
2458 ObjectStore::Transaction
*t
= &tran
;
2459 map
<string
, bufferlist
> attrset
;
2463 cerr
<< "Set_omap " << ghobj
<< std::endl
;
2465 int ret
= get_fd_data(fd
, valbl
);
2469 attrset
.insert(pair
<string
, bufferlist
>(key
, valbl
));
2474 t
->touch(coll
, ghobj
);
2476 t
->omap_setkeys(coll
, ghobj
, attrset
);
2478 auto ch
= store
->open_collection(coll
);
2479 store
->queue_transaction(ch
, std::move(*t
));
2483 int do_rm_omap(ObjectStore
*store
, coll_t coll
,
2484 ghobject_t
&ghobj
, string key
)
2486 ObjectStore::Transaction tran
;
2487 ObjectStore::Transaction
*t
= &tran
;
2490 cerr
<< "Rm_omap " << ghobj
<< std::endl
;
2495 t
->omap_rmkey(coll
, ghobj
, key
);
2497 auto ch
= store
->open_collection(coll
);
2498 store
->queue_transaction(ch
, std::move(*t
));
2502 int do_get_omaphdr(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
)
2504 auto ch
= store
->open_collection(coll
);
2507 int r
= store
->omap_get_header(ch
, ghobj
, &hdrbl
, true);
2509 cerr
<< "omap_get_header: " << cpp_strerror(r
) << std::endl
;
2513 string
header(hdrbl
.c_str(), hdrbl
.length());
2515 header
= cleanbin(header
);
2516 header
.push_back('\n');
2523 int do_set_omaphdr(ObjectStore
*store
, coll_t coll
,
2524 ghobject_t
&ghobj
, int fd
)
2526 ObjectStore::Transaction tran
;
2527 ObjectStore::Transaction
*t
= &tran
;
2531 cerr
<< "Omap_setheader " << ghobj
<< std::endl
;
2533 int ret
= get_fd_data(fd
, hdrbl
);
2540 t
->touch(coll
, ghobj
);
2542 t
->omap_setheader(coll
, ghobj
, hdrbl
);
2544 auto ch
= store
->open_collection(coll
);
2545 store
->queue_transaction(ch
, std::move(*t
));
2549 struct do_fix_lost
: public action_on_object_t
{
2550 void call(ObjectStore
*store
, coll_t coll
,
2551 ghobject_t
&ghobj
, object_info_t
&oi
) override
{
2553 cout
<< coll
<< "/" << ghobj
<< " is lost";
2559 oi
.clear_flag(object_info_t::FLAG_LOST
);
2561 encode(oi
, bl
, -1); /* fixme: using full features */
2562 ObjectStore::Transaction t
;
2563 t
.setattr(coll
, ghobj
, OI_ATTR
, bl
);
2564 auto ch
= store
->open_collection(coll
);
2565 store
->queue_transaction(ch
, std::move(t
));
2571 int get_snapset(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, SnapSet
&ss
, bool silent
= false)
2573 auto ch
= store
->open_collection(coll
);
2575 int r
= store
->getattr(ch
, ghobj
, SS_ATTR
, attr
);
2578 cerr
<< "Error getting snapset on : " << make_pair(coll
, ghobj
) << ", "
2579 << cpp_strerror(r
) << std::endl
;
2582 auto bp
= attr
.cbegin();
2587 cerr
<< "Error decoding snapset on : " << make_pair(coll
, ghobj
) << ", "
2588 << cpp_strerror(r
) << std::endl
;
2594 int print_obj_info(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, Formatter
* formatter
)
2596 auto ch
= store
->open_collection(coll
);
2598 formatter
->open_object_section("obj");
2599 formatter
->open_object_section("id");
2600 ghobj
.dump(formatter
);
2601 formatter
->close_section();
2604 int gr
= store
->getattr(ch
, ghobj
, OI_ATTR
, attr
);
2607 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2608 << cpp_strerror(r
) << std::endl
;
2611 auto bp
= attr
.cbegin();
2614 formatter
->open_object_section("info");
2616 formatter
->close_section();
2619 cerr
<< "Error decoding attr on : " << make_pair(coll
, ghobj
) << ", "
2620 << cpp_strerror(r
) << std::endl
;
2624 int sr
= store
->stat(ch
, ghobj
, &st
, true);
2627 cerr
<< "Error stat on : " << make_pair(coll
, ghobj
) << ", "
2628 << cpp_strerror(r
) << std::endl
;
2630 formatter
->open_object_section("stat");
2631 formatter
->dump_int("size", st
.st_size
);
2632 formatter
->dump_int("blksize", st
.st_blksize
);
2633 formatter
->dump_int("blocks", st
.st_blocks
);
2634 formatter
->dump_int("nlink", st
.st_nlink
);
2635 formatter
->close_section();
2638 if (ghobj
.hobj
.has_snapset()) {
2640 int snr
= get_snapset(store
, coll
, ghobj
, ss
);
2644 formatter
->open_object_section("SnapSet");
2646 formatter
->close_section();
2650 gr
= store
->getattr(ch
, ghobj
, ECUtil::get_hinfo_key(), hattr
);
2652 ECUtil::HashInfo hinfo
;
2653 auto hp
= hattr
.cbegin();
2656 formatter
->open_object_section("hinfo");
2657 hinfo
.dump(formatter
);
2658 formatter
->close_section();
2661 cerr
<< "Error decoding hinfo on : " << make_pair(coll
, ghobj
) << ", "
2662 << cpp_strerror(r
) << std::endl
;
2665 gr
= store
->dump_onode(ch
, ghobj
, "onode", formatter
);
2667 formatter
->close_section();
2668 formatter
->flush(cout
);
2673 int corrupt_info(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, Formatter
* formatter
)
2675 auto ch
= store
->open_collection(coll
);
2677 int r
= store
->getattr(ch
, ghobj
, OI_ATTR
, attr
);
2679 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2680 << cpp_strerror(r
) << std::endl
;
2684 auto bp
= attr
.cbegin();
2689 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2690 << cpp_strerror(r
) << std::endl
;
2695 oi
.alloc_hint_flags
+= 0xff;
2696 ObjectStore::Transaction t
;
2697 encode(oi
, attr
, -1); /* fixme: using full features */
2698 t
.setattr(coll
, ghobj
, OI_ATTR
, attr
);
2699 auto ch
= store
->open_collection(coll
);
2700 r
= store
->queue_transaction(ch
, std::move(t
));
2702 cerr
<< "Error writing object info: " << make_pair(coll
, ghobj
) << ", "
2703 << cpp_strerror(r
) << std::endl
;
2711 ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, uint64_t setsize
, Formatter
* formatter
,
2714 auto ch
= store
->open_collection(coll
);
2715 if (ghobj
.hobj
.is_snapdir()) {
2716 cerr
<< "Can't set the size of a snapdir" << std::endl
;
2720 int r
= store
->getattr(ch
, ghobj
, OI_ATTR
, attr
);
2722 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2723 << cpp_strerror(r
) << std::endl
;
2727 auto bp
= attr
.cbegin();
2732 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2733 << cpp_strerror(r
) << std::endl
;
2737 r
= store
->stat(ch
, ghobj
, &st
, true);
2739 cerr
<< "Error stat on : " << make_pair(coll
, ghobj
) << ", "
2740 << cpp_strerror(r
) << std::endl
;
2742 ghobject_t
head(ghobj
);
2744 bool found_head
= true;
2745 map
<snapid_t
, uint64_t>::iterator csi
;
2746 bool is_snap
= ghobj
.hobj
.is_snap();
2748 head
.hobj
= head
.hobj
.get_head();
2749 r
= get_snapset(store
, coll
, head
, ss
, true);
2750 if (r
< 0 && r
!= -ENOENT
) {
2751 // Requested get_snapset() silent, so if not -ENOENT show error
2752 cerr
<< "Error getting snapset on : " << make_pair(coll
, head
) << ", "
2753 << cpp_strerror(r
) << std::endl
;
2757 head
.hobj
= head
.hobj
.get_snapdir();
2758 r
= get_snapset(store
, coll
, head
, ss
);
2765 csi
= ss
.clone_size
.find(ghobj
.hobj
.snap
);
2766 if (csi
== ss
.clone_size
.end()) {
2767 cerr
<< "SnapSet is missing clone_size for snap " << ghobj
.hobj
.snap
<< std::endl
;
2771 if ((uint64_t)st
.st_size
== setsize
&& oi
.size
== setsize
2772 && (!is_snap
|| csi
->second
== setsize
)) {
2773 cout
<< "Size of object is already " << setsize
<< std::endl
;
2776 cout
<< "Setting size to " << setsize
<< ", stat size " << st
.st_size
2777 << ", obj info size " << oi
.size
;
2779 cout
<< ", " << (found_head
? "head" : "snapdir")
2780 << " clone_size " << csi
->second
;
2781 csi
->second
= setsize
;
2787 ObjectStore::Transaction t
;
2788 // Only modify object info if we want to corrupt it
2789 if (!corrupt
&& (uint64_t)st
.st_size
!= setsize
) {
2790 t
.truncate(coll
, ghobj
, setsize
);
2791 // Changing objectstore size will invalidate data_digest, so clear it.
2792 oi
.clear_data_digest();
2794 encode(oi
, attr
, -1); /* fixme: using full features */
2795 t
.setattr(coll
, ghobj
, OI_ATTR
, attr
);
2797 bufferlist snapattr
;
2799 encode(ss
, snapattr
);
2800 t
.setattr(coll
, head
, SS_ATTR
, snapattr
);
2802 auto ch
= store
->open_collection(coll
);
2803 r
= store
->queue_transaction(ch
, std::move(t
));
2805 cerr
<< "Error writing object info: " << make_pair(coll
, ghobj
) << ", "
2806 << cpp_strerror(r
) << std::endl
;
2813 int clear_data_digest(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
) {
2814 auto ch
= store
->open_collection(coll
);
2816 int r
= store
->getattr(ch
, ghobj
, OI_ATTR
, attr
);
2818 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2819 << cpp_strerror(r
) << std::endl
;
2823 auto bp
= attr
.cbegin();
2828 cerr
<< "Error getting attr on : " << make_pair(coll
, ghobj
) << ", "
2829 << cpp_strerror(r
) << std::endl
;
2834 oi
.clear_data_digest();
2835 encode(oi
, attr
, -1); /* fixme: using full features */
2836 ObjectStore::Transaction t
;
2837 t
.setattr(coll
, ghobj
, OI_ATTR
, attr
);
2838 auto ch
= store
->open_collection(coll
);
2839 r
= store
->queue_transaction(ch
, std::move(t
));
2841 cerr
<< "Error writing object info: " << make_pair(coll
, ghobj
) << ", "
2842 << cpp_strerror(r
) << std::endl
;
2849 int clear_snapset(ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
,
2853 int ret
= get_snapset(store
, coll
, ghobj
, ss
);
2857 // Use "corrupt" to clear entire SnapSet
2858 // Use "seq" to just corrupt SnapSet.seq
2859 if (arg
== "corrupt" || arg
== "seq")
2861 // Use "snaps" to just clear SnapSet.clone_snaps
2862 if (arg
== "corrupt" || arg
== "snaps")
2863 ss
.clone_snaps
.clear();
2864 // By default just clear clone, clone_overlap and clone_size
2865 if (arg
== "corrupt")
2867 if (arg
== "" || arg
== "clones")
2869 if (arg
== "" || arg
== "clone_overlap")
2870 ss
.clone_overlap
.clear();
2871 if (arg
== "" || arg
== "clone_size")
2872 ss
.clone_size
.clear();
2873 // Break all clone sizes by adding 1
2874 if (arg
== "size") {
2875 for (map
<snapid_t
, uint64_t>::iterator i
= ss
.clone_size
.begin();
2876 i
!= ss
.clone_size
.end(); ++i
)
2883 ObjectStore::Transaction t
;
2884 t
.setattr(coll
, ghobj
, SS_ATTR
, bl
);
2885 auto ch
= store
->open_collection(coll
);
2886 int r
= store
->queue_transaction(ch
, std::move(t
));
2888 cerr
<< "Error setting snapset on : " << make_pair(coll
, ghobj
) << ", "
2889 << cpp_strerror(r
) << std::endl
;
2896 vector
<snapid_t
>::iterator
find(vector
<snapid_t
> &v
, snapid_t clid
)
2898 return std::find(v
.begin(), v
.end(), clid
);
2901 map
<snapid_t
, interval_set
<uint64_t> >::iterator
2902 find(map
<snapid_t
, interval_set
<uint64_t> > &m
, snapid_t clid
)
2904 return m
.find(clid
);
2907 map
<snapid_t
, uint64_t>::iterator
find(map
<snapid_t
, uint64_t> &m
,
2910 return m
.find(clid
);
2914 int remove_from(T
&mv
, string name
, snapid_t cloneid
, bool force
)
2916 typename
T::iterator i
= find(mv
, cloneid
);
2917 if (i
!= mv
.end()) {
2920 cerr
<< "Clone " << cloneid
<< " doesn't exist in " << name
;
2922 cerr
<< " (ignored)" << std::endl
;
2932 ObjectStore
*store
, coll_t coll
, ghobject_t
&ghobj
, snapid_t cloneid
, bool force
)
2934 // XXX: Don't allow this if in a cache tier or former cache tier
2935 // bool allow_incomplete_clones() const {
2936 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2939 int ret
= get_snapset(store
, coll
, ghobj
, snapset
);
2943 // Derived from trim_object()
2945 vector
<snapid_t
>::iterator p
;
2946 for (p
= snapset
.clones
.begin(); p
!= snapset
.clones
.end(); ++p
)
2949 if (p
== snapset
.clones
.end()) {
2950 cerr
<< "Clone " << cloneid
<< " not present";
2953 if (p
!= snapset
.clones
.begin()) {
2954 // not the oldest... merge overlap into next older clone
2955 vector
<snapid_t
>::iterator n
= p
- 1;
2956 hobject_t prev_coid
= ghobj
.hobj
;
2957 prev_coid
.snap
= *n
;
2958 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2960 //if (adjust_prev_bytes)
2961 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2963 snapset
.clone_overlap
[*n
].intersection_of(
2964 snapset
.clone_overlap
[*p
]);
2966 //if (adjust_prev_bytes)
2967 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2970 ret
= remove_from(snapset
.clones
, "clones", cloneid
, force
);
2971 if (ret
) return ret
;
2972 ret
= remove_from(snapset
.clone_overlap
, "clone_overlap", cloneid
, force
);
2973 if (ret
) return ret
;
2974 ret
= remove_from(snapset
.clone_size
, "clone_size", cloneid
, force
);
2975 if (ret
) return ret
;
2981 encode(snapset
, bl
);
2982 ObjectStore::Transaction t
;
2983 t
.setattr(coll
, ghobj
, SS_ATTR
, bl
);
2984 auto ch
= store
->open_collection(coll
);
2985 int r
= store
->queue_transaction(ch
, std::move(t
));
2987 cerr
<< "Error setting snapset on : " << make_pair(coll
, ghobj
) << ", "
2988 << cpp_strerror(r
) << std::endl
;
2991 cout
<< "Removal of clone " << cloneid
<< " complete" << std::endl
;
2992 cout
<< "Use pg repair after OSD restarted to correct stat information" << std::endl
;
2996 int dup(string srcpath
, ObjectStore
*src
, string dstpath
, ObjectStore
*dst
)
2998 cout
<< "dup from " << src
->get_type() << ": " << srcpath
<< "\n"
2999 << " to " << dst
->get_type() << ": " << dstpath
3002 vector
<coll_t
> collections
;
3007 cerr
<< "failed to mount src: " << cpp_strerror(r
) << std::endl
;
3012 cerr
<< "failed to mount dst: " << cpp_strerror(r
) << std::endl
;
3016 if (src
->get_fsid() != dst
->get_fsid()) {
3017 cerr
<< "src fsid " << src
->get_fsid() << " != dest " << dst
->get_fsid()
3021 cout
<< "fsid " << src
->get_fsid() << std::endl
;
3023 // make sure dst is empty
3024 r
= dst
->list_collections(collections
);
3026 cerr
<< "error listing collections on dst: " << cpp_strerror(r
) << std::endl
;
3029 if (!collections
.empty()) {
3030 cerr
<< "destination store is not empty" << std::endl
;
3034 r
= src
->list_collections(collections
);
3036 cerr
<< "error listing collections on src: " << cpp_strerror(r
) << std::endl
;
3040 num
= collections
.size();
3041 cout
<< num
<< " collections" << std::endl
;
3043 for (auto cid
: collections
) {
3044 cout
<< i
++ << "/" << num
<< " " << cid
<< std::endl
;
3045 auto ch
= src
->open_collection(cid
);
3046 auto dch
= dst
->create_new_collection(cid
);
3048 ObjectStore::Transaction t
;
3049 int bits
= src
->collection_bits(ch
);
3051 if (src
->get_type() == "filestore" && cid
.is_meta()) {
3054 cerr
<< "cannot get bit count for collection " << cid
<< ": "
3055 << cpp_strerror(bits
) << std::endl
;
3059 t
.create_collection(cid
, bits
);
3060 dst
->queue_transaction(dch
, std::move(t
));
3065 uint64_t bytes
= 0, keys
= 0;
3067 vector
<ghobject_t
> ls
;
3068 r
= src
->collection_list(ch
, pos
, ghobject_t::get_max(), 1000, &ls
, &pos
);
3070 cerr
<< "collection_list on " << cid
<< " from " << pos
<< " got: "
3071 << cpp_strerror(r
) << std::endl
;
3078 for (auto& oid
: ls
) {
3079 //cout << " " << cid << " " << oid << std::endl;
3081 cout
<< " " << std::setw(16) << n
<< " objects, "
3082 << std::setw(16) << bytes
<< " bytes, "
3083 << std::setw(16) << keys
<< " keys"
3084 << std::setw(1) << "\r" << std::flush
;
3088 ObjectStore::Transaction t
;
3091 map
<string
,bufferptr
,less
<>> attrs
;
3092 src
->getattrs(ch
, oid
, attrs
);
3093 if (!attrs
.empty()) {
3094 t
.setattrs(cid
, oid
, attrs
);
3098 src
->read(ch
, oid
, 0, 0, bl
);
3100 t
.write(cid
, oid
, 0, bl
.length(), bl
);
3101 bytes
+= bl
.length();
3105 map
<string
,bufferlist
> omap
;
3106 src
->omap_get(ch
, oid
, &header
, &omap
);
3107 if (header
.length()) {
3108 t
.omap_setheader(cid
, oid
, header
);
3111 if (!omap
.empty()) {
3112 keys
+= omap
.size();
3113 t
.omap_setkeys(cid
, oid
, omap
);
3116 dst
->queue_transaction(dch
, std::move(t
));
3119 cout
<< " " << std::setw(16) << n
<< " objects, "
3120 << std::setw(16) << bytes
<< " bytes, "
3121 << std::setw(16) << keys
<< " keys"
3122 << std::setw(1) << std::endl
;
3126 cout
<< "keyring" << std::endl
;
3129 string s
= srcpath
+ "/keyring";
3131 r
= bl
.read_file(s
.c_str(), &err
);
3133 cerr
<< "failed to copy " << s
<< ": " << err
<< std::endl
;
3135 string d
= dstpath
+ "/keyring";
3136 bl
.write_file(d
.c_str(), 0600);
3141 cout
<< "duping osd metadata" << std::endl
;
3143 for (auto k
: {"magic", "whoami", "ceph_fsid", "fsid"}) {
3145 src
->read_meta(k
, &val
);
3146 dst
->write_meta(k
, val
);
3150 dst
->write_meta("ready", "ready");
3152 cout
<< "done." << std::endl
;
3162 const int ceph_entity_name_type(const string name
)
3164 if (name
== "mds") return CEPH_ENTITY_TYPE_MDS
;
3165 if (name
== "osd") return CEPH_ENTITY_TYPE_OSD
;
3166 if (name
== "mon") return CEPH_ENTITY_TYPE_MON
;
3167 if (name
== "client") return CEPH_ENTITY_TYPE_CLIENT
;
3168 if (name
== "mgr") return CEPH_ENTITY_TYPE_MGR
;
3169 if (name
== "auth") return CEPH_ENTITY_TYPE_AUTH
;
3173 eversion_t
get_eversion_from_str(const string
& s
) {
3175 vector
<string
> result
;
3176 boost::split(result
, s
, boost::is_any_of("'"));
3177 if (result
.size() != 2) {
3178 cerr
<< "eversion_t: invalid format: '" << s
<< "'" << std::endl
;
3181 e
.epoch
= atoi(result
[0].c_str());
3182 e
.version
= atoi(result
[1].c_str());
3186 osd_reqid_t
get_reqid_from_str(const string
& s
) {
3189 vector
<string
> result
;
3190 boost::split(result
, s
, boost::is_any_of(".:"));
3191 if (result
.size() != 4) {
3192 cerr
<< "reqid: invalid format " << s
<< std::endl
;
3193 return osd_reqid_t();
3195 reqid
.name
._type
= ceph_entity_name_type(result
[0]);
3196 reqid
.name
._num
= atoi(result
[1].c_str());
3198 reqid
.inc
= atoi(result
[2].c_str());
3199 reqid
.tid
= atoi(result
[3].c_str());
3203 void do_dups_inject_transction(ObjectStore
*store
, spg_t r_pgid
, map
<string
,bufferlist
> *new_dups
)
3205 ObjectStore::Transaction t
;
3206 coll_t
coll(r_pgid
);
3207 cerr
<< "injecting dups into pgid:" << r_pgid
<< " num of dups:" << new_dups
->size() << std::endl
;
3208 t
.omap_setkeys(coll
, r_pgid
.make_pgmeta_oid(), (*new_dups
));
3209 auto ch
= store
->open_collection(coll
);
3210 store
->queue_transaction(ch
, std::move(t
));
3214 int do_dups_inject_object(ObjectStore
*store
, spg_t r_pgid
, json_spirit::mObject
&in_json_obj
,
3215 map
<string
,bufferlist
> *new_dups
, bool debug
) {
3216 std::map
<std::string
, json_spirit::mValue
>::const_iterator it
= in_json_obj
.find("generate");
3217 int32_t generate
= 0;
3218 if (it
!= in_json_obj
.end()) {
3219 generate
= atoi(it
->second
.get_str().c_str());
3222 it
= in_json_obj
.find("reqid");
3223 if (it
== in_json_obj
.end()) {
3226 osd_reqid_t
reqid(get_reqid_from_str(it
->second
.get_str()));
3227 it
= in_json_obj
.find("version");
3228 if (it
== in_json_obj
.end()) {
3231 eversion_t
version(get_eversion_from_str(it
->second
.get_str()));
3232 it
= in_json_obj
.find("user_version");
3233 if (it
== in_json_obj
.end()) {
3236 version_t user_version
= atoi(it
->second
.get_str().c_str());
3237 it
= in_json_obj
.find("return_code");
3238 if (it
== in_json_obj
.end()) {
3241 int32_t return_code
= atoi(it
->second
.get_str().c_str());
3243 for(auto i
= 0; i
< generate
; ++i
) {
3246 cout
<< "generate dups reqid " << reqid
<< " v=" << version
<< std::endl
;
3248 pg_log_dup_t
tmp(version
, user_version
, reqid
, return_code
);
3251 (*new_dups
)[tmp
.get_key_name()] = std::move(bl
);
3252 if ( new_dups
->size() > 50000 ) {
3253 do_dups_inject_transction(store
, r_pgid
, new_dups
);
3254 cout
<< "inject of " << i
<< " dups into pgid:" << r_pgid
<< " done..." << std::endl
;
3259 pg_log_dup_t
tmp(version
, user_version
, reqid
, return_code
);
3261 cout
<< "adding dup: " << tmp
<< "into key:" << tmp
.get_key_name() << std::endl
;
3265 (*new_dups
)[tmp
.get_key_name()] = std::move(bl
);
3270 void do_dups_inject_from_json(ObjectStore
*store
, spg_t r_pgid
, json_spirit::mValue
&inJson
, bool debug
)
3272 map
<string
,bufferlist
> new_dups
;
3273 const vector
<json_spirit::mValue
>& o
= inJson
.get_array();
3274 for (const auto& obj
: o
) {
3275 if (obj
.type() == json_spirit::obj_type
) {
3276 json_spirit::mObject Mobj
= obj
.get_obj();
3277 do_dups_inject_object(store
, r_pgid
, Mobj
, &new_dups
, debug
);
3279 throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj
.type()));
3283 if (new_dups
.size() > 0) {
3284 do_dups_inject_transction(store
, r_pgid
, &new_dups
);
3291 void usage(po::options_description
&desc
)
3294 cerr
<< desc
<< std::endl
;
3296 cerr
<< "Positional syntax:" << std::endl
;
3298 cerr
<< "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl
;
3299 cerr
<< "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl
;
3300 cerr
<< "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl
;
3301 cerr
<< "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl
;
3302 cerr
<< "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl
;
3303 cerr
<< "ceph-objectstore-tool ... <object> list-attrs" << std::endl
;
3304 cerr
<< "ceph-objectstore-tool ... <object> list-omap" << std::endl
;
3305 cerr
<< "ceph-objectstore-tool ... <object> remove|removeall" << std::endl
;
3306 cerr
<< "ceph-objectstore-tool ... <object> dump" << std::endl
;
3307 cerr
<< "ceph-objectstore-tool ... <object> set-size" << std::endl
;
3308 cerr
<< "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl
;
3309 cerr
<< "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl
;
3311 cerr
<< "<object> can be a JSON object description as displayed" << std::endl
;
3312 cerr
<< "by --op list." << std::endl
;
3313 cerr
<< "<object> can be an object name which will be looked up in all" << std::endl
;
3314 cerr
<< "the OSD's PGs." << std::endl
;
3315 cerr
<< "<object> can be the empty string ('') which with a provided pgid " << std::endl
;
3316 cerr
<< "specifies the pgmeta object" << std::endl
;
3318 cerr
<< "The optional [file] argument will read stdin or write stdout" << std::endl
;
3319 cerr
<< "if not specified or if '-' specified." << std::endl
;
3322 bool ends_with(const string
& check
, const string
& ending
)
3324 return check
.size() >= ending
.size() && check
.rfind(ending
) == (check
.size() - ending
.size());
3327 // Based on FileStore::dump_journal(), set-up enough to only dump
3328 int mydump_journal(Formatter
*f
, string journalpath
, bool m_journal_dio
)
3332 if (!journalpath
.length())
3335 FileJournal
*journal
= new FileJournal(g_ceph_context
, uuid_d(), NULL
, NULL
,
3336 journalpath
.c_str(), m_journal_dio
);
3337 r
= journal
->_fdump(*f
, false);
3342 int apply_layout_settings(ObjectStore
*os
, const OSDSuperblock
&superblock
,
3343 const string
&pool_name
, const spg_t
&pgid
, bool dry_run
,
3348 FileStore
*fs
= dynamic_cast<FileStore
*>(os
);
3350 cerr
<< "Nothing to do for non-filestore backend" << std::endl
;
3351 return 0; // making this return success makes testing easier
3356 r
= get_osdmap(os
, superblock
.current_epoch
, curmap
, bl
);
3358 cerr
<< "Can't find local OSDMap: " << cpp_strerror(r
) << std::endl
;
3362 int64_t poolid
= -1;
3363 if (pool_name
.length()) {
3364 poolid
= curmap
.lookup_pg_pool_name(pool_name
);
3366 cerr
<< "Couldn't find pool " << pool_name
<< ": " << cpp_strerror(poolid
)
3372 vector
<coll_t
> collections
, filtered_colls
;
3373 r
= os
->list_collections(collections
);
3375 cerr
<< "Error listing collections: " << cpp_strerror(r
) << std::endl
;
3379 for (auto const &coll
: collections
) {
3381 if (coll
.is_pg(&coll_pgid
) &&
3382 ((poolid
>= 0 && coll_pgid
.pool() == (uint64_t)poolid
) ||
3383 coll_pgid
== pgid
)) {
3384 filtered_colls
.push_back(coll
);
3388 size_t done
= 0, total
= filtered_colls
.size();
3389 for (auto const &coll
: filtered_colls
) {
3391 cerr
<< "Would apply layout settings to " << coll
<< std::endl
;
3393 cerr
<< "Finished " << done
<< "/" << total
<< " collections" << "\r";
3394 r
= fs
->apply_layout_settings(coll
, target_level
);
3396 cerr
<< "Error applying layout settings to " << coll
<< std::endl
;
3403 cerr
<< "Finished " << total
<< "/" << total
<< " collections" << "\r" << std::endl
;
3407 int main(int argc
, char **argv
)
3409 string dpath
, jpath
, pgidstr
, op
, file
, mountpoint
, mon_store_path
, object
;
3410 string target_data_path
, fsid
;
3411 string objcmd
, arg1
, arg2
, type
, format
, argnspace
, pool
, rmtypestr
;
3412 boost::optional
<std::string
> nspace
;
3415 unsigned slow_threshold
= 16;
3417 bool human_readable
;
3418 Formatter
*formatter
;
3421 po::options_description
desc("Allowed options");
3423 ("help", "produce help message")
3424 ("type", po::value
<string
>(&type
),
3425 "Arg is one of [bluestore (default), filestore, memstore]")
3426 ("data-path", po::value
<string
>(&dpath
),
3427 "path to object store, mandatory")
3428 ("journal-path", po::value
<string
>(&jpath
),
3429 "path to journal, use if tool can't find it")
3430 ("pgid", po::value
<string
>(&pgidstr
),
3431 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups and mandatory for apply-layout-settings if --pool is not specified")
3432 ("pool", po::value
<string
>(&pool
),
3433 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3434 ("op", po::value
<string
>(&op
),
3435 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3436 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]")
3437 ("epoch", po::value
<unsigned>(&epoch
),
3438 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3439 ("file", po::value
<string
>(&file
),
3440 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
3441 ("mon-store-path", po::value
<string
>(&mon_store_path
),
3442 "path of monstore to update-mon-db")
3443 ("fsid", po::value
<string
>(&fsid
),
3444 "fsid for new store created by mkfs")
3445 ("target-data-path", po::value
<string
>(&target_data_path
),
3446 "path of target object store (for --op dup)")
3447 ("mountpoint", po::value
<string
>(&mountpoint
),
3449 ("format", po::value
<string
>(&format
)->default_value("json-pretty"),
3450 "Output format which may be json, json-pretty, xml, xml-pretty")
3451 ("debug", "Enable diagnostic output to stderr")
3452 ("no-mon-config", "Do not contact mons for config")
3453 ("no-superblock", "Do not read superblock")
3454 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3455 ("skip-journal-replay", "Disable journal replay")
3456 ("skip-mount-omap", "Disable mounting of omap")
3457 ("head", "Find head/snapdir when searching for objects by name")
3458 ("dry-run", "Don't modify the objectstore")
3459 ("tty", "Treat stdout as a tty (no binary data)")
3460 ("namespace", po::value
<string
>(&argnspace
), "Specify namespace when searching for objects")
3461 ("rmtype", po::value
<string
>(&rmtypestr
), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
3462 ("slow-omap-threshold", po::value
<unsigned>(&slow_threshold
),
3463 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
3466 po::options_description
positional("Positional options");
3467 positional
.add_options()
3468 ("object", po::value
<string
>(&object
), "'' for pgmeta_oid, object name or ghobject in json")
3469 ("objcmd", po::value
<string
>(&objcmd
), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
3470 ("arg1", po::value
<string
>(&arg1
), "arg1 based on cmd")
3471 ("arg2", po::value
<string
>(&arg2
), "arg2 based on cmd")
3474 po::options_description all
;
3475 all
.add(desc
).add(positional
);
3477 po::positional_options_description pd
;
3478 pd
.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3480 vector
<string
> ceph_option_strings
;
3482 po::variables_map vm
;
3484 po::parsed_options parsed
=
3485 po::command_line_parser(argc
, argv
).options(all
).allow_unregistered().positional(pd
).run();
3486 po::store( parsed
, vm
);
3488 ceph_option_strings
= po::collect_unrecognized(parsed
.options
,
3489 po::include_positional
);
3490 } catch(po::error
&e
) {
3491 std::cerr
<< e
.what() << std::endl
;
3495 if (vm
.count("help")) {
3500 // Compatibility with previous option name
3501 if (op
== "dump-import")
3504 debug
= (vm
.count("debug") > 0);
3506 force
= (vm
.count("force") > 0);
3508 no_superblock
= (vm
.count("no-superblock") > 0);
3510 if (vm
.count("namespace"))
3513 dry_run
= (vm
.count("dry-run") > 0);
3514 tty
= (vm
.count("tty") > 0);
3516 osflagbits_t flags
= 0;
3517 if (dry_run
|| vm
.count("skip-journal-replay"))
3518 flags
|= SKIP_JOURNAL_REPLAY
;
3519 if (vm
.count("skip-mount-omap"))
3520 flags
|= SKIP_MOUNT_OMAP
;
3521 if (op
== "update-mon-db")
3522 flags
|= SKIP_JOURNAL_REPLAY
;
3524 head
= (vm
.count("head") > 0);
3526 // infer osd id so we can authenticate
3528 snprintf(fn
, sizeof(fn
), "%s/whoami", dpath
.c_str());
3529 int fd
= ::open(fn
, O_RDONLY
);
3533 string
s(bl
.c_str(), bl
.length());
3534 int whoami
= atoi(s
.c_str());
3536 // identify ourselves as this osd so we can auth and fetch our configs
3537 tmp
.push_back("-n");
3538 tmp
.push_back(string("osd.") + stringify(whoami
));
3539 // populate osd_data so that the default keyring location works
3540 tmp
.push_back("--osd-data");
3541 tmp
.push_back(dpath
);
3542 tmp
.insert(tmp
.end(), ceph_option_strings
.begin(),
3543 ceph_option_strings
.end());
3544 tmp
.swap(ceph_option_strings
);
3547 vector
<const char *> ceph_options
;
3548 ceph_options
.reserve(ceph_options
.size() + ceph_option_strings
.size());
3549 for (vector
<string
>::iterator i
= ceph_option_strings
.begin();
3550 i
!= ceph_option_strings
.end();
3552 ceph_options
.push_back(i
->c_str());
3555 snprintf(fn
, sizeof(fn
), "%s/type", dpath
.c_str());
3556 fd
= ::open(fn
, O_RDONLY
);
3561 string dp_type
= string(bl
.c_str(), bl
.length() - 1); // drop \n
3562 if (vm
.count("type") && dp_type
!= "" && type
!= dp_type
)
3563 cerr
<< "WARNING: Ignoring type \"" << type
<< "\" - found data-path type \""
3564 << dp_type
<< "\"" << std::endl
;
3566 //cout << "object store type is " << type << std::endl;
3571 if (!vm
.count("type") && type
== "") {
3574 if (!vm
.count("data-path") &&
3575 op
!= "dump-export" &&
3576 !(op
== "dump-journal" && type
== "filestore")) {
3577 cerr
<< "Must provide --data-path" << std::endl
;
3581 if (type
== "filestore" && !vm
.count("journal-path")) {
3582 jpath
= dpath
+ "/journal";
3584 if (!vm
.count("op") && !vm
.count("object")) {
3585 cerr
<< "Must provide --op or object command..." << std::endl
;
3589 if (op
!= "list" && op
!= "apply-layout-settings" &&
3590 vm
.count("op") && vm
.count("object")) {
3591 cerr
<< "Can't specify both --op and object command syntax" << std::endl
;
3595 if (op
== "apply-layout-settings" && !(vm
.count("pool") ^ vm
.count("pgid"))) {
3596 cerr
<< "apply-layout-settings requires either --pool or --pgid"
3601 if (op
!= "list" && op
!= "apply-layout-settings" && vm
.count("object") && !vm
.count("objcmd")) {
3602 cerr
<< "Invalid syntax, missing command" << std::endl
;
3606 if (op
== "fuse" && mountpoint
.length() == 0) {
3607 cerr
<< "Missing fuse mountpoint" << std::endl
;
3611 outistty
= isatty(STDOUT_FILENO
) || tty
;
3614 if ((op
== "export" || op
== "export-remove" || op
== "get-osdmap" || op
== "get-inc-osdmap") && !dry_run
) {
3615 if (!vm
.count("file") || file
== "-") {
3617 cerr
<< "stdout is a tty and no --file filename specified" << std::endl
;
3620 file_fd
= STDOUT_FILENO
;
3622 file_fd
= open(file
.c_str(), O_WRONLY
|O_CREAT
|O_TRUNC
, 0666);
3624 } else if (op
== "import" || op
== "dump-export" || op
== "set-osdmap" || op
== "set-inc-osdmap" || op
== "pg-log-inject-dups") {
3625 if (!vm
.count("file") || file
== "-") {
3626 if (isatty(STDIN_FILENO
)) {
3627 cerr
<< "stdin is a tty and no --file filename specified" << std::endl
;
3630 file_fd
= STDIN_FILENO
;
3632 file_fd
= open(file
.c_str(), O_RDONLY
);
3636 ObjectStoreTool tool
= ObjectStoreTool(file_fd
, dry_run
);
3638 if (vm
.count("file") && file_fd
== fd_none
&& !dry_run
) {
3639 cerr
<< "--file option only applies to import, dump-export, export, export-remove, "
3640 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl
;
3644 if (file_fd
!= fd_none
&& file_fd
< 0) {
3645 string err
= string("file: ") + file
;
3646 perror(err
.c_str());
3650 if (vm
.count("no-mon-config") > 0) {
3651 init_flags
|= CINIT_FLAG_NO_MON_CONFIG
;
3654 auto cct
= global_init(
3656 CEPH_ENTITY_TYPE_OSD
,
3657 CODE_ENVIRONMENT_UTILITY_NODOUT
,
3659 common_init_finish(g_ceph_context
);
3661 g_conf().set_val_or_die("log_to_stderr", "true");
3662 g_conf().set_val_or_die("err_to_stderr", "true");
3664 g_conf().apply_changes(nullptr);
3666 // Special list handling. Treating pretty_format as human readable,
3667 // with one object per line and not an enclosing array.
3668 human_readable
= ends_with(format
, "-pretty");
3669 if ((op
== "list" || op
== "meta-list") && human_readable
) {
3670 // Remove -pretty from end of format which we know is there
3671 format
= format
.substr(0, format
.size() - strlen("-pretty"));
3674 formatter
= Formatter::create(format
);
3675 if (formatter
== NULL
) {
3676 cerr
<< "unrecognized format: " << format
<< std::endl
;
3680 // Special handling for filestore journal, so we can dump it without mounting
3681 if (op
== "dump-journal" && type
== "filestore") {
3682 int ret
= mydump_journal(formatter
, jpath
, g_conf()->journal_dio
);
3684 cerr
<< "journal-path: " << jpath
<< ": "
3685 << cpp_strerror(ret
) << std::endl
;
3688 formatter
->flush(cout
);
3692 if (op
== "dump-export") {
3693 int ret
= tool
.dump_export(formatter
);
3695 cerr
<< "dump-export: "
3696 << cpp_strerror(ret
) << std::endl
;
3702 //Verify that data-path really exists
3704 if (::stat(dpath
.c_str(), &st
) == -1) {
3705 string err
= string("data-path: ") + dpath
;
3706 perror(err
.c_str());
3710 if (pgidstr
.length() && pgidstr
!= "meta" && !pgid
.parse(pgidstr
.c_str())) {
3711 cerr
<< "Invalid pgid '" << pgidstr
<< "' specified" << std::endl
;
3715 //Verify that the journal-path really exists
3716 if (type
== "filestore") {
3717 if (::stat(jpath
.c_str(), &st
) == -1) {
3718 string err
= string("journal-path: ") + jpath
;
3719 perror(err
.c_str());
3722 if (S_ISDIR(st
.st_mode
)) {
3723 cerr
<< "journal-path: " << jpath
<< ": "
3724 << cpp_strerror(EISDIR
) << std::endl
;
3729 std::unique_ptr
<ObjectStore
> fs
= ObjectStore::create(g_ceph_context
, type
, dpath
, jpath
, flags
);
3731 cerr
<< "Unable to create store of type " << type
<< std::endl
;
3735 if (op
== "fsck" || op
== "fsck-deep") {
3736 int r
= fs
->fsck(op
== "fsck-deep");
3738 cerr
<< "fsck failed: " << cpp_strerror(r
) << std::endl
;
3742 cerr
<< "fsck status: " << r
<< " remaining error(s) and warning(s)" << std::endl
;
3745 cout
<< "fsck success" << std::endl
;
3748 if (op
== "repair" || op
== "repair-deep") {
3749 int r
= fs
->repair(op
== "repair-deep");
3751 cerr
<< "repair failed: " << cpp_strerror(r
) << std::endl
;
3755 cerr
<< "repair status: " << r
<< " remaining error(s) and warning(s)" << std::endl
;
3758 cout
<< "repair success" << std::endl
;
3762 if (fsid
.length()) {
3764 bool r
= f
.parse(fsid
.c_str());
3766 cerr
<< "failed to parse uuid '" << fsid
<< "'" << std::endl
;
3773 cerr
<< "mkfs failed: " << cpp_strerror(r
) << std::endl
;
3781 snprintf(fn
, sizeof(fn
), "%s/type", target_data_path
.c_str());
3782 int fd
= ::open(fn
, O_RDONLY
);
3784 cerr
<< "Unable to open " << target_data_path
<< "/type" << std::endl
;
3790 target_type
= string(bl
.c_str(), bl
.length() - 1); // drop \n
3793 unique_ptr
<ObjectStore
> targetfs
= ObjectStore::create(
3794 g_ceph_context
, target_type
,
3795 target_data_path
, "", 0);
3797 cerr
<< "Unable to open store of type " << target_type
<< std::endl
;
3800 int r
= dup(dpath
, fs
.get(), target_data_path
, targetfs
.get());
3802 cerr
<< "dup failed: " << cpp_strerror(r
) << std::endl
;
3808 int ret
= fs
->mount();
3810 if (ret
== -EBUSY
) {
3811 cerr
<< "OSD has the store locked" << std::endl
;
3813 cerr
<< "Mount failed with '" << cpp_strerror(ret
) << "'" << std::endl
;
3820 FuseStore
fuse(fs
.get(), mountpoint
);
3821 cout
<< "mounting fuse at " << mountpoint
<< " ..." << std::endl
;
3822 int r
= fuse
.main();
3825 cerr
<< "failed to mount fuse: " << cpp_strerror(r
) << std::endl
;
3829 cerr
<< "fuse support not enabled" << std::endl
;
3835 vector
<coll_t
>::iterator it
;
3836 CompatSet supported
;
3838 #ifdef INTERNAL_TEST
3839 supported
= get_test_compat_set();
3841 supported
= OSD::get_osd_compat_set();
3845 auto ch
= fs
->open_collection(coll_t::meta());
3846 std::unique_ptr
<OSDSuperblock
> superblock
;
3847 if (!no_superblock
) {
3848 superblock
.reset(new OSDSuperblock
);
3849 bufferlist::const_iterator p
;
3850 ret
= fs
->read(ch
, OSD_SUPERBLOCK_GOBJECT
, 0, 0, bl
);
3852 cerr
<< "Failure to read OSD superblock: " << cpp_strerror(ret
) << std::endl
;
3857 decode(*superblock
, p
);
3860 cerr
<< "Cluster fsid=" << superblock
->cluster_fsid
<< std::endl
;
3864 cerr
<< "Supported features: " << supported
<< std::endl
;
3865 cerr
<< "On-disk features: " << superblock
->compat_features
<< std::endl
;
3867 if (supported
.compare(superblock
->compat_features
) == -1) {
3868 CompatSet unsupported
= supported
.unsupported(superblock
->compat_features
);
3869 cerr
<< "On-disk OSD incompatible features set "
3870 << unsupported
<< std::endl
;
3876 if (op
== "apply-layout-settings") {
3877 int target_level
= 0;
3878 // Single positional argument with apply-layout-settings
3879 // for target_level.
3880 if (vm
.count("object") && isdigit(object
[0])) {
3881 target_level
= atoi(object
.c_str());
3882 // This requires --arg1 to be specified since
3883 // this is the third positional argument and normally
3884 // used with object operations.
3885 } else if (vm
.count("arg1") && isdigit(arg1
[0])) {
3886 target_level
= atoi(arg1
.c_str());
3888 ceph_assert(superblock
!= nullptr);
3889 ret
= apply_layout_settings(fs
.get(), *superblock
, pool
, pgid
, dry_run
, target_level
);
3893 if (op
!= "list" && vm
.count("object")) {
3894 // Special case: Create pgmeta_oid if empty string specified
3895 // This can't conflict with any actual object names.
3897 ghobj
= pgid
.make_pgmeta_oid();
3899 json_spirit::Value v
;
3901 if (!json_spirit::read(object
, v
) ||
3902 (v
.type() != json_spirit::array_type
&& v
.type() != json_spirit::obj_type
)) {
3903 // Special: Need head/snapdir so set even if user didn't specify
3904 if (vm
.count("objcmd") && (objcmd
== "remove-clone-metadata"))
3906 lookup_ghobject
lookup(object
, nspace
, head
);
3907 if (pgidstr
== "meta")
3908 ret
= action_on_all_objects_in_exact_pg(fs
.get(), coll_t::meta(), lookup
, debug
);
3909 else if (pgidstr
.length())
3910 ret
= action_on_all_objects_in_exact_pg(fs
.get(), coll_t(pgid
), lookup
, debug
);
3912 ret
= action_on_all_objects(fs
.get(), lookup
, debug
);
3914 throw std::runtime_error("Internal error");
3916 if (lookup
.size() != 1) {
3918 if (lookup
.size() == 0)
3919 ss
<< "No object id '" << object
<< "' found or invalid JSON specified";
3921 ss
<< "Found " << lookup
.size() << " objects with id '" << object
3922 << "', please use a JSON spec from --op list instead";
3923 throw std::runtime_error(ss
.str());
3925 pair
<coll_t
, ghobject_t
> found
= lookup
.pop();
3926 pgidstr
= found
.first
.to_str();
3927 pgid
.parse(pgidstr
.c_str());
3928 ghobj
= found
.second
;
3932 if (pgidstr
.length() == 0 && v
.type() != json_spirit::array_type
) {
3933 ss
<< "Without --pgid the object '" << object
3934 << "' must be a JSON array";
3935 throw std::runtime_error(ss
.str());
3937 if (v
.type() == json_spirit::array_type
) {
3938 json_spirit::Array array
= v
.get_array();
3939 if (array
.size() != 2) {
3940 ss
<< "Object '" << object
3941 << "' must be a JSON array with 2 elements";
3942 throw std::runtime_error(ss
.str());
3944 vector
<json_spirit::Value
>::iterator i
= array
.begin();
3945 ceph_assert(i
!= array
.end());
3946 if (i
->type() != json_spirit::str_type
) {
3947 ss
<< "Object '" << object
3948 << "' must be a JSON array with the first element a string";
3949 throw std::runtime_error(ss
.str());
3951 string object_pgidstr
= i
->get_str();
3952 if (object_pgidstr
!= "meta") {
3954 object_pgid
.parse(object_pgidstr
.c_str());
3955 if (pgidstr
.length() > 0) {
3956 if (object_pgid
!= pgid
) {
3957 ss
<< "object '" << object
3958 << "' has a pgid different from the --pgid="
3959 << pgidstr
<< " option";
3960 throw std::runtime_error(ss
.str());
3963 pgidstr
= object_pgidstr
;
3967 pgidstr
= object_pgidstr
;
3974 } catch (std::runtime_error
& e
) {
3975 ss
<< "Decode object JSON error: " << e
.what();
3976 throw std::runtime_error(ss
.str());
3978 if (pgidstr
!= "meta" && (uint64_t)pgid
.pgid
.m_pool
!= (uint64_t)ghobj
.hobj
.pool
) {
3979 cerr
<< "Object pool and pgid pool don't match" << std::endl
;
3983 if (pgidstr
!= "meta") {
3984 auto ch
= fs
->open_collection(coll_t(pgid
));
3985 if (!ghobj
.match(fs
->collection_bits(ch
), pgid
.ps())) {
3987 ss
<< "object " << ghobj
<< " not contained by pg " << pgid
;
3988 throw std::runtime_error(ss
.str());
3992 } catch (std::runtime_error
& e
) {
3993 cerr
<< e
.what() << std::endl
;
4000 // The ops which require --pgid option are checked here and
4001 // mentioned in the usage for --pgid.
4002 if ((op
== "info" || op
== "log" || op
== "remove" || op
== "export"
4003 || op
== "export-remove" || op
== "mark-complete"
4004 || op
== "reset-last-complete"
4005 || op
== "trim-pg-log"
4006 || op
== "pg-log-inject-dups") &&
4007 pgidstr
.length() == 0) {
4008 cerr
<< "Must provide pgid" << std::endl
;
4014 if (op
== "import") {
4015 ceph_assert(superblock
!= nullptr);
4017 ret
= tool
.do_import(fs
.get(), *superblock
, force
, pgidstr
);
4019 catch (const buffer::error
&e
) {
4020 cerr
<< "do_import threw exception error " << e
.what() << std::endl
;
4023 if (ret
== -EFAULT
) {
4024 cerr
<< "Corrupt input for import" << std::endl
;
4027 cout
<< "Import successful" << std::endl
;
4029 } else if (op
== "dump-journal-mount") {
4030 // Undocumented feature to dump journal with mounted fs
4031 // This doesn't support the format option, but it uses the
4032 // ObjectStore::dump_journal() and mounts to get replay to run.
4033 ret
= fs
->dump_journal(cout
);
4035 if (ret
== -EOPNOTSUPP
) {
4036 cerr
<< "Object store type \"" << type
<< "\" doesn't support journal dump" << std::endl
;
4038 cerr
<< "Journal dump failed with error " << cpp_strerror(ret
) << std::endl
;
4042 } else if (op
== "get-osdmap") {
4046 ceph_assert(superblock
!= nullptr);
4047 epoch
= superblock
->current_epoch
;
4049 ret
= get_osdmap(fs
.get(), epoch
, osdmap
, bl
);
4051 cerr
<< "Failed to get osdmap#" << epoch
<< ": "
4052 << cpp_strerror(ret
) << std::endl
;
4055 ret
= bl
.write_fd(file_fd
);
4057 cerr
<< "Failed to write to " << file
<< ": " << cpp_strerror(ret
) << std::endl
;
4059 cout
<< "osdmap#" << epoch
<< " exported." << std::endl
;
4062 } else if (op
== "set-osdmap") {
4064 ret
= get_fd_data(file_fd
, bl
);
4066 cerr
<< "Failed to read osdmap " << cpp_strerror(ret
) << std::endl
;
4068 ret
= set_osdmap(fs
.get(), epoch
, bl
, force
);
4071 } else if (op
== "get-inc-osdmap") {
4074 ceph_assert(superblock
!= nullptr);
4075 epoch
= superblock
->current_epoch
;
4077 ret
= get_inc_osdmap(fs
.get(), epoch
, bl
);
4079 cerr
<< "Failed to get incremental osdmap# " << epoch
<< ": "
4080 << cpp_strerror(ret
) << std::endl
;
4083 ret
= bl
.write_fd(file_fd
);
4085 cerr
<< "Failed to write to " << file
<< ": " << cpp_strerror(ret
) << std::endl
;
4087 cout
<< "inc-osdmap#" << epoch
<< " exported." << std::endl
;
4090 } else if (op
== "set-inc-osdmap") {
4092 ret
= get_fd_data(file_fd
, bl
);
4094 cerr
<< "Failed to read incremental osdmap " << cpp_strerror(ret
) << std::endl
;
4097 ret
= set_inc_osdmap(fs
.get(), epoch
, bl
, force
);
4100 } else if (op
== "update-mon-db") {
4101 if (!vm
.count("mon-store-path")) {
4102 cerr
<< "Please specify the path to monitor db to update" << std::endl
;
4105 ceph_assert(superblock
!= nullptr);
4106 ret
= update_mon_db(*fs
, *superblock
, dpath
+ "/keyring", mon_store_path
);
4111 if (op
== "remove") {
4112 if (!force
&& !dry_run
) {
4113 cerr
<< "Please use export-remove or you must use --force option" << std::endl
;
4117 ret
= initiate_new_remove_pg(fs
.get(), pgid
);
4119 cerr
<< "PG '" << pgid
<< "' not found" << std::endl
;
4122 cout
<< "Remove successful" << std::endl
;
4126 if (op
== "fix-lost") {
4127 boost::scoped_ptr
<action_on_object_t
> action
;
4128 action
.reset(new do_fix_lost());
4129 if (pgidstr
.length())
4130 ret
= action_on_all_objects_in_exact_pg(fs
.get(), coll_t(pgid
), *action
, debug
);
4132 ret
= action_on_all_objects(fs
.get(), *action
, debug
);
4137 ret
= do_list(fs
.get(), pgidstr
, object
, nspace
, formatter
, debug
,
4138 human_readable
, head
);
4140 cerr
<< "do_list failed: " << cpp_strerror(ret
) << std::endl
;
4144 if (op
== "list-slow-omap") {
4145 ret
= do_list_slow(fs
.get(), pgidstr
, object
, slow_threshold
, formatter
, debug
,
4148 cerr
<< "do_list failed: " << cpp_strerror(ret
) << std::endl
;
4153 if (op
== "dump-super") {
4154 ceph_assert(superblock
!= nullptr);
4155 formatter
->open_object_section("superblock");
4156 superblock
->dump(formatter
);
4157 formatter
->close_section();
4158 formatter
->flush(cout
);
4163 if (op
== "statfs") {
4164 store_statfs_t statsbuf
;
4165 ret
= fs
->statfs(&statsbuf
);
4167 cerr
<< "error from statfs: " << cpp_strerror(ret
) << std::endl
;
4170 formatter
->open_object_section("statfs");
4171 statsbuf
.dump(formatter
);
4172 formatter
->close_section();
4173 formatter
->flush(cout
);
4178 if (op
== "meta-list") {
4179 ret
= do_meta(fs
.get(), object
, formatter
, debug
, human_readable
);
4181 cerr
<< "do_meta failed: " << cpp_strerror(ret
) << std::endl
;
4186 ret
= fs
->list_collections(ls
);
4188 cerr
<< "failed to list pgs: " << cpp_strerror(ret
) << std::endl
;
4192 if (debug
&& op
== "list-pgs")
4193 cout
<< "Performing list-pgs operation" << std::endl
;
4196 for (it
= ls
.begin(); it
!= ls
.end(); ++it
) {
4199 if (pgidstr
== "meta") {
4200 if (it
->to_str() == "meta")
4206 if (!it
->is_pg(&tmppgid
)) {
4210 if (it
->is_temp(&tmppgid
)) {
4214 if (op
!= "list-pgs" && tmppgid
!= pgid
) {
4218 if (op
!= "list-pgs") {
4223 cout
<< tmppgid
<< std::endl
;
4226 if (op
== "list-pgs") {
4231 // If not an object command nor any of the ops handled below, then output this usage
4232 // before complaining about a bad pgid
4233 if (!vm
.count("objcmd") && op
!= "export" && op
!= "export-remove" && op
!= "info" && op
!= "log" && op
!= "mark-complete" && op
!= "trim-pg-log" && op
!= "trim-pg-log-dups" && op
!= "pg-log-inject-dups") {
4234 cerr
<< "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
4235 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)"
4242 // The following code for export, info, log require omap or !skip-mount-omap
4243 if (it
!= ls
.end()) {
4247 if (vm
.count("objcmd")) {
4249 if (objcmd
== "remove" || objcmd
== "removeall") {
4250 bool all
= (objcmd
== "removeall");
4251 enum rmtype type
= BOTH
;
4252 if (rmtypestr
== "nosnapmap")
4254 else if (rmtypestr
== "snapmap")
4256 ret
= do_remove_object(fs
.get(), coll
, ghobj
, all
, force
, type
);
4258 } else if (objcmd
== "list-attrs") {
4259 ret
= do_list_attrs(fs
.get(), coll
, ghobj
);
4261 } else if (objcmd
== "list-omap") {
4262 ret
= do_list_omap(fs
.get(), coll
, ghobj
);
4264 } else if (objcmd
== "get-bytes" || objcmd
== "set-bytes") {
4265 if (objcmd
== "get-bytes") {
4267 if (vm
.count("arg1") == 0 || arg1
== "-") {
4270 fd
= open(arg1
.c_str(), O_WRONLY
|O_TRUNC
|O_CREAT
|O_EXCL
|O_LARGEFILE
, 0666);
4272 cerr
<< "open " << arg1
<< " " << cpp_strerror(errno
) << std::endl
;
4277 ret
= do_get_bytes(fs
.get(), coll
, ghobj
, fd
);
4278 if (fd
!= STDOUT_FILENO
)
4282 if (vm
.count("arg1") == 0 || arg1
== "-") {
4283 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4284 if (isatty(STDIN_FILENO
)) {
4285 cerr
<< "stdin is a tty and no file specified" << std::endl
;
4291 fd
= open(arg1
.c_str(), O_RDONLY
|O_LARGEFILE
, 0666);
4293 cerr
<< "open " << arg1
<< " " << cpp_strerror(errno
) << std::endl
;
4298 ret
= do_set_bytes(fs
.get(), coll
, ghobj
, fd
);
4299 if (fd
!= STDIN_FILENO
)
4303 } else if (objcmd
== "get-attr") {
4304 if (vm
.count("arg1") == 0) {
4309 ret
= do_get_attr(fs
.get(), coll
, ghobj
, arg1
);
4311 } else if (objcmd
== "set-attr") {
4312 if (vm
.count("arg1") == 0) {
4318 if (vm
.count("arg2") == 0 || arg2
== "-") {
4319 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4320 if (isatty(STDIN_FILENO
)) {
4321 cerr
<< "stdin is a tty and no file specified" << std::endl
;
4327 fd
= open(arg2
.c_str(), O_RDONLY
|O_LARGEFILE
, 0666);
4329 cerr
<< "open " << arg2
<< " " << cpp_strerror(errno
) << std::endl
;
4334 ret
= do_set_attr(fs
.get(), coll
, ghobj
, arg1
, fd
);
4335 if (fd
!= STDIN_FILENO
)
4338 } else if (objcmd
== "rm-attr") {
4339 if (vm
.count("arg1") == 0) {
4344 ret
= do_rm_attr(fs
.get(), coll
, ghobj
, arg1
);
4346 } else if (objcmd
== "get-omap") {
4347 if (vm
.count("arg1") == 0) {
4352 ret
= do_get_omap(fs
.get(), coll
, ghobj
, arg1
);
4354 } else if (objcmd
== "set-omap") {
4355 if (vm
.count("arg1") == 0) {
4361 if (vm
.count("arg2") == 0 || arg2
== "-") {
4362 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4363 if (isatty(STDIN_FILENO
)) {
4364 cerr
<< "stdin is a tty and no file specified" << std::endl
;
4370 fd
= open(arg2
.c_str(), O_RDONLY
|O_LARGEFILE
, 0666);
4372 cerr
<< "open " << arg2
<< " " << cpp_strerror(errno
) << std::endl
;
4377 ret
= do_set_omap(fs
.get(), coll
, ghobj
, arg1
, fd
);
4378 if (fd
!= STDIN_FILENO
)
4381 } else if (objcmd
== "rm-omap") {
4382 if (vm
.count("arg1") == 0) {
4387 ret
= do_rm_omap(fs
.get(), coll
, ghobj
, arg1
);
4389 } else if (objcmd
== "get-omaphdr") {
4390 if (vm
.count("arg1")) {
4395 ret
= do_get_omaphdr(fs
.get(), coll
, ghobj
);
4397 } else if (objcmd
== "set-omaphdr") {
4399 if (vm
.count("arg2")) {
4405 if (vm
.count("arg1") == 0 || arg1
== "-") {
4406 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4407 if (isatty(STDIN_FILENO
)) {
4408 cerr
<< "stdin is a tty and no file specified" << std::endl
;
4414 fd
= open(arg1
.c_str(), O_RDONLY
|O_LARGEFILE
, 0666);
4416 cerr
<< "open " << arg1
<< " " << cpp_strerror(errno
) << std::endl
;
4421 ret
= do_set_omaphdr(fs
.get(), coll
, ghobj
, fd
);
4422 if (fd
!= STDIN_FILENO
)
4425 } else if (objcmd
== "dump") {
4426 // There should not be any other arguments
4427 if (vm
.count("arg1") || vm
.count("arg2")) {
4432 ret
= print_obj_info(fs
.get(), coll
, ghobj
, formatter
);
4434 } else if (objcmd
== "corrupt-info") { // Undocumented testing feature
4435 // There should not be any other arguments
4436 if (vm
.count("arg1") || vm
.count("arg2")) {
4441 ret
= corrupt_info(fs
.get(), coll
, ghobj
, formatter
);
4443 } else if (objcmd
== "set-size" || objcmd
== "corrupt-size") {
4444 // Undocumented testing feature
4445 bool corrupt
= (objcmd
== "corrupt-size");
4447 if (vm
.count("arg1") == 0 || vm
.count("arg2")) {
4452 if (arg1
.length() == 0 || !isdigit(arg1
.c_str()[0])) {
4453 cerr
<< "Invalid size '" << arg1
<< "' specified" << std::endl
;
4457 uint64_t size
= atoll(arg1
.c_str());
4458 ret
= set_size(fs
.get(), coll
, ghobj
, size
, formatter
, corrupt
);
4460 } else if (objcmd
== "clear-data-digest") {
4461 ret
= clear_data_digest(fs
.get(), coll
, ghobj
);
4463 } else if (objcmd
== "clear-snapset") {
4464 // UNDOCUMENTED: For testing zap SnapSet
4465 // IGNORE extra args since not in usage anyway
4466 if (!ghobj
.hobj
.has_snapset()) {
4467 cerr
<< "'" << objcmd
<< "' requires a head or snapdir object" << std::endl
;
4471 ret
= clear_snapset(fs
.get(), coll
, ghobj
, arg1
);
4473 } else if (objcmd
== "remove-clone-metadata") {
4475 if (vm
.count("arg1") == 0 || vm
.count("arg2")) {
4480 if (!ghobj
.hobj
.has_snapset()) {
4481 cerr
<< "'" << objcmd
<< "' requires a head or snapdir object" << std::endl
;
4485 if (arg1
.length() == 0 || !isdigit(arg1
.c_str()[0])) {
4486 cerr
<< "Invalid cloneid '" << arg1
<< "' specified" << std::endl
;
4490 snapid_t cloneid
= atoi(arg1
.c_str());
4491 ret
= remove_clone(fs
.get(), coll
, ghobj
, cloneid
, force
);
4494 cerr
<< "Unknown object command '" << objcmd
<< "'" << std::endl
;
4501 ret
= PG::peek_map_epoch(fs
.get(), pgid
, &map_epoch
);
4503 cerr
<< "peek_map_epoch reports error" << std::endl
;
4505 cerr
<< "map_epoch " << map_epoch
<< std::endl
;
4507 pg_info_t
info(pgid
);
4508 PastIntervals past_intervals
;
4510 ret
= PG::read_info(fs
.get(), pgid
, coll
, info
, past_intervals
, struct_ver
);
4512 cerr
<< "read_info error " << cpp_strerror(ret
) << std::endl
;
4515 if (struct_ver
< PG::get_compat_struct_v()) {
4516 cerr
<< "PG is too old to upgrade, use older Ceph version" << std::endl
;
4521 cerr
<< "struct_v " << (int)struct_ver
<< std::endl
;
4523 if (op
== "export" || op
== "export-remove") {
4524 ceph_assert(superblock
!= nullptr);
4525 ret
= tool
.do_export(cct
.get(), fs
.get(), coll
, pgid
, info
, map_epoch
, struct_ver
, *superblock
, past_intervals
);
4527 cerr
<< "Export successful" << std::endl
;
4528 if (op
== "export-remove") {
4529 ret
= initiate_new_remove_pg(fs
.get(), pgid
);
4530 // Export succeeded, so pgid is there
4531 ceph_assert(ret
== 0);
4532 cerr
<< "Remove successful" << std::endl
;
4535 } else if (op
== "info") {
4536 formatter
->open_object_section("info");
4537 info
.dump(formatter
);
4538 formatter
->close_section();
4539 formatter
->flush(cout
);
4541 } else if (op
== "log") {
4542 PGLog::IndexedLog log
;
4543 pg_missing_t missing
;
4544 ret
= get_log(cct
.get(), fs
.get(), struct_ver
, pgid
, info
, log
, missing
);
4548 dump_log(formatter
, cout
, log
, missing
);
4549 } else if (op
== "mark-complete") {
4550 ObjectStore::Transaction tran
;
4551 ObjectStore::Transaction
*t
= &tran
;
4553 if (struct_ver
< PG::get_compat_struct_v()) {
4554 cerr
<< "Can't mark-complete, version mismatch " << (int)struct_ver
4555 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4561 cout
<< "Marking complete " << std::endl
;
4563 ceph_assert(superblock
!= nullptr);
4564 info
.last_update
= eversion_t(superblock
->current_epoch
, info
.last_update
.version
+ 1);
4565 info
.last_backfill
= hobject_t::get_max();
4566 info
.last_epoch_started
= superblock
->current_epoch
;
4567 info
.history
.last_epoch_started
= superblock
->current_epoch
;
4568 info
.history
.last_epoch_clean
= superblock
->current_epoch
;
4569 past_intervals
.clear();
4572 ret
= write_info(*t
, map_epoch
, info
, past_intervals
);
4575 auto ch
= fs
->open_collection(coll_t(pgid
));
4576 fs
->queue_transaction(ch
, std::move(*t
));
4578 cout
<< "Marking complete succeeded" << std::endl
;
4579 } else if (op
== "trim-pg-log") {
4580 ret
= do_trim_pg_log(fs
.get(), coll
, info
, pgid
,
4581 map_epoch
, past_intervals
);
4583 cerr
<< "Error trimming pg log: " << cpp_strerror(ret
) << std::endl
;
4586 cout
<< "Finished trimming pg log" << std::endl
;
4588 } else if (op
== "trim-pg-log-dups") {
4589 ret
= do_trim_pg_log_dups(fs
.get(), coll
, info
, pgid
,
4590 map_epoch
, past_intervals
);
4592 cerr
<< "Error trimming pg log dups: " << cpp_strerror(ret
) << std::endl
;
4595 cout
<< "Finished trimming pg log dups" << std::endl
;
4597 } else if (op
== "reset-last-complete") {
4599 std::cerr
<< "WARNING: reset-last-complete is extremely dangerous and almost "
4600 << "certain to lead to permanent data loss unless you know exactly "
4601 << "what you are doing. Pass --force to proceed anyway."
4606 ObjectStore::Transaction tran
;
4607 ObjectStore::Transaction
*t
= &tran
;
4609 if (struct_ver
< PG::get_compat_struct_v()) {
4610 cerr
<< "Can't reset-last-complete, version mismatch " << (int)struct_ver
4611 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4617 cout
<< "Reseting last_complete " << std::endl
;
4619 info
.last_complete
= info
.last_update
;
4622 ret
= write_info(*t
, map_epoch
, info
, past_intervals
);
4625 fs
->queue_transaction(ch
, std::move(*t
));
4627 cout
<< "Reseting last_complete succeeded" << std::endl
;
4629 } else if (op
== "pg-log-inject-dups") {
4630 if (!vm
.count("file") || file
== "-") {
4631 cerr
<< "Must provide file containing JSON dups entries" << std::endl
;
4636 cerr
<< "opening file " << file
<< std::endl
;
4638 ifstream
json_file_stream(file
, std::ifstream::in
);
4639 if (!json_file_stream
.is_open()) {
4640 cerr
<< "unable to open file " << file
<< std::endl
;
4644 json_spirit::mValue result
;
4646 if (!json_spirit::read(json_file_stream
, result
))
4647 throw std::runtime_error("unparseable JSON " + file
);
4648 if (result
.type() != json_spirit::array_type
) {
4649 cerr
<< "result is not an array_type - type=" << result
.type() << std::endl
;
4650 throw std::runtime_error("not JSON array_type " + file
);
4652 do_dups_inject_from_json(fs
.get(), pgid
, result
, debug
);
4653 } catch (const std::runtime_error
&e
) {
4654 cerr
<< e
.what() << std::endl
;;
4658 ceph_assert(!"Should have already checked for valid --op");
4661 cerr
<< "PG '" << pgid
<< "' not found" << std::endl
;
4668 Formatter
* f
= Formatter::create("json-pretty", "json-pretty", "json-pretty");
4669 cct
->get_perfcounters_collection()->dump_formatted(f
, false);
4670 ostr
<< "ceph-objectstore-tool ";
4673 cout
<< ostr
.str() << std::endl
;
4676 int r
= fs
->umount();
4678 cerr
<< "umount failed: " << cpp_strerror(r
) << std::endl
;
4679 // If no previous error, then use umount() error
4685 // Export output can go to stdout, so put this message on stderr
4687 cerr
<< "dry-run: Nothing changed" << std::endl
;
4689 cout
<< "dry-run: Nothing changed" << std::endl
;