X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=ceph%2Fsrc%2Ftools%2Fceph_objectstore_tool.cc;h=c12a741e97afa29daa1790bf96de58284447cb5c;hb=2a845540123ad00df2e55947b8080306ebdcf410;hp=d17a74dbcbb0ec6c18864361365a84d75b5099b9;hpb=c07f9fc5a4f48397831383549fb0482b93480643;p=ceph.git diff --git a/ceph/src/tools/ceph_objectstore_tool.cc b/ceph/src/tools/ceph_objectstore_tool.cc index d17a74dbc..c12a741e9 100644 --- a/ceph/src/tools/ceph_objectstore_tool.cc +++ b/ceph/src/tools/ceph_objectstore_tool.cc @@ -14,14 +14,17 @@ #include #include +#include #include #include +#include #include #include "common/Formatter.h" #include "common/errno.h" #include "common/ceph_argparse.h" +#include "common/url_escape.h" #include "global/global_init.h" @@ -35,6 +38,7 @@ #include "osd/PGLog.h" #include "osd/OSD.h" #include "osd/PG.h" +#include "osd/ECUtil.h" #include "json_spirit/json_spirit_value.h" #include "json_spirit/json_spirit_reader.h" @@ -44,8 +48,8 @@ #include "include/compat.h" #include "include/util.h" -namespace po = boost::program_options; using namespace std; +namespace po = boost::program_options; #ifdef INTERNAL_TEST CompatSet get_test_compat_set() { @@ -73,21 +77,22 @@ CompatSet get_test_compat_set() { const ssize_t max_read = 1024 * 1024; const int fd_none = INT_MIN; bool outistty; -bool dry_run = false; +bool dry_run; struct action_on_object_t { virtual ~action_on_object_t() {} - virtual int call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0; + virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0; }; int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug) { + auto ch = store->open_collection(coll); + unsigned LIST_AT_A_TIME = 100; ghobject_t next; while (!next.is_max()) { vector list; - int r = store->collection_list( - coll, + int r = store->collection_list(ch, next, ghobject_t::get_max(), LIST_AT_A_TIME, @@ -101,30 +106,25 @@ int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_obje for (vector::iterator obj = list.begin(); obj != list.end(); ++obj) { - if (obj->is_pgmeta()) - continue; object_info_t oi; if (coll != coll_t::meta()) { bufferlist attr; - r = store->getattr(coll, *obj, OI_ATTR, attr); + r = store->getattr(ch, *obj, OI_ATTR, attr); if (r < 0) { cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", " << cpp_strerror(r) << std::endl; - continue; - } - bufferlist::iterator bp = attr.begin(); - try { - ::decode(oi, bp); - } catch (...) { - r = -EINVAL; - cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", " - << cpp_strerror(r) << std::endl; - continue; - } + } else { + auto bp = attr.cbegin(); + try { + decode(oi, bp); + } catch (...) { + r = -EINVAL; + cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", " + << cpp_strerror(r) << std::endl; + } + } } - r = action.call(store, coll, *obj, oi); - if (r < 0) - return r; + action.call(store, coll, *obj, oi); } } return 0; @@ -138,6 +138,7 @@ int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_ob int r = 0; vector colls_to_check; vector candidates; + r = store->list_collections(candidates); if (r < 0) { cerr << "Error listing collections: " << cpp_strerror(r) << std::endl; @@ -148,6 +149,10 @@ int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_ob i != candidates.end(); ++i) { spg_t cand_pgid; + if (i->is_meta() && pgidstr == "meta") { + colls_to_check.push_back(*i); + continue; + } if (!i->is_pg(&cand_pgid)) continue; @@ -265,13 +270,13 @@ struct lookup_ghobject : public action_on_object_t { lookup_ghobject(const string& name, const boost::optional& nspace, bool need_snapset = false) : _name(name), _namespace(nspace), _need_snapset(need_snapset) { } - int call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override { + void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override { if (_need_snapset && !ghobj.hobj.has_snapset()) - return 0; + return; if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) && (!_namespace || ghobj.hobj.nspace == _namespace)) _objects.insert(coll, ghobj); - return 0; + return; } int size() const { @@ -289,14 +294,128 @@ struct lookup_ghobject : public action_on_object_t { } }; -ghobject_t infos_oid = OSD::make_infos_oid(); -ghobject_t log_oid; -ghobject_t biginfo_oid; +struct lookup_slow_ghobject : public action_on_object_t { + list > _objects; + const string _name; + double threshold; + + coll_t last_coll; + + lookup_slow_ghobject(const string& name, double _threshold) : + _name(name), threshold(_threshold) { } + + void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override { + ObjectMap::ObjectMapIterator iter; + auto start1 = mono_clock::now(); + ceph::signedspan first_seek_time = start1 - start1; + ceph::signedspan last_seek_time = first_seek_time; + ceph::signedspan total_time = first_seek_time; + { + auto ch = store->open_collection(coll); + iter = store->get_omap_iterator(ch, ghobj); + if (!iter) { + cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) + << " obj:" << ghobj + << std::endl; + return; + } + auto start = mono_clock::now(); + iter->seek_to_first(); + first_seek_time = mono_clock::now() - start; + + while(iter->valid()) { + start = mono_clock::now(); + iter->next(); + last_seek_time = mono_clock::now() - start; + } + } + + if (coll != last_coll) { + cerr << ">>> inspecting coll" << coll << std::endl; + last_coll = coll; + } + + total_time = mono_clock::now() - start1; + if ( total_time >= make_timespan(threshold)) { + _objects.emplace_back(coll, ghobj, + first_seek_time, last_seek_time, total_time, + url_escape(iter->tail_key())); + cerr << ">>>>> found obj " << ghobj + << " first_seek_time " + << std::chrono::duration_cast(first_seek_time).count() + << " last_seek_time " + << std::chrono::duration_cast(last_seek_time).count() + << " total_time " + << std::chrono::duration_cast(total_time).count() + << " tail key: " << url_escape(iter->tail_key()) + << std::endl; + } + return; + } + + int size() const { + return _objects.size(); + } + + void dump(Formatter *f, bool human_readable) const { + if (!human_readable) + f->open_array_section("objects"); + for (auto i = _objects.begin(); + i != _objects.end(); + ++i) { + f->open_array_section("object"); + coll_t coll; + ghobject_t ghobj; + ceph::signedspan first_seek_time; + ceph::signedspan last_seek_time; + ceph::signedspan total_time; + string tail_key; + std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i; + + spg_t pgid; + bool is_pg = coll.is_pg(&pgid); + if (is_pg) + f->dump_string("pgid", stringify(pgid)); + if (!is_pg || !human_readable) + f->dump_string("coll", coll.to_str()); + f->dump_object("ghobject", ghobj); + f->open_object_section("times"); + f->dump_int("first_seek_time", + std::chrono::duration_cast(first_seek_time).count()); + f->dump_int("last_seek_time", + std::chrono::duration_cast + (last_seek_time).count()); + f->dump_int("total_time", + std::chrono::duration_cast(total_time).count()); + f->dump_string("tail_key", tail_key); + f->close_section(); + + f->close_section(); + if (human_readable) { + f->flush(cout); + cout << std::endl; + } + } + if (!human_readable) { + f->close_section(); + f->flush(cout); + cout << std::endl; + } + } +}; int file_fd = fd_none; -bool debug = false; +bool debug; +bool force = false; +bool no_superblock = false; + super_header sh; -uint64_t testalign; static int get_fd_data(int fd, bufferlist &bl) { @@ -314,22 +433,27 @@ static int get_fd_data(int fd, bufferlist &bl) total += bytes; } while(true); - assert(bl.length() == total); + ceph_assert(bl.length() == total); return 0; } -int get_log(ObjectStore *fs, __u8 struct_ver, - coll_t coll, spg_t pgid, const pg_info_t &info, +int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver, + spg_t pgid, const pg_info_t &info, PGLog::IndexedLog &log, pg_missing_t &missing) { try { + auto ch = fs->open_collection(coll_t(pgid)); + if (!ch) { + return -ENOENT; + } ostringstream oss; - assert(struct_ver > 0); - PGLog::read_log_and_missing(fs, coll, - struct_ver >= 8 ? coll : coll_t::meta(), - struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid, - info, log, missing, oss, - g_ceph_context->_conf->osd_ignore_stale_divergent_priors); + ceph_assert(struct_ver > 0); + PGLog::read_log_and_missing( + cct, fs, ch, + pgid.make_pgmeta_oid(), + info, log, missing, + oss, + g_ceph_context->_conf->osd_ignore_stale_divergent_priors); if (debug && oss.str().size()) cerr << oss.str() << std::endl; } @@ -351,9 +475,6 @@ void dump_log(Formatter *formatter, ostream &out, pg_log_t &log, formatter->open_object_section("pg_missing_t"); missing.dump(formatter); formatter->close_section(); - formatter->flush(out); - formatter->open_object_section("map"); - formatter->close_section(); formatter->close_section(); formatter->flush(out); } @@ -396,23 +517,22 @@ int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t coll_t coll(pgid); ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid()); - bufferlist bl; epoch_t map_epoch = 0; - int r = PG::peek_map_epoch(fs, pgid, &map_epoch, &bl); + int r = PG::peek_map_epoch(fs, pgid, &map_epoch); if (r < 0) cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl; PastIntervals past_intervals; __u8 struct_v; - r = PG::read_info(fs, pgid, coll, bl, info, past_intervals, struct_v); + r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v); if (r < 0) { cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl; return r; } - assert(struct_v >= 8); + ceph_assert(struct_v >= 8); // new omap key cout << "setting '_remove' omap key" << std::endl; map values; - ::encode((char)1, values["_remove"]); + encode((char)1, values["_remove"]); t->omap_setkeys(coll, pgmeta_oid, values); return 0; } @@ -420,8 +540,23 @@ int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t #pragma GCC diagnostic pop #pragma GCC diagnostic warning "-Wpragmas" -int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid, - ObjectStore::Sequencer &osr) +template +void wait_until_done(ObjectStore::Transaction* txn, Func&& func) +{ + bool finished = false; + std::condition_variable cond; + std::mutex m; + txn->register_on_complete(make_lambda_context([&](int) { + std::unique_lock lock{m}; + finished = true; + cond.notify_one(); + })); + std::move(func)(); + std::unique_lock lock{m}; + cond.wait(lock, [&] {return finished;}); +} + +int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid) { if (!dry_run) finish_remove_pgs(store); @@ -436,7 +571,8 @@ int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid, if (r < 0) { return r; } - store->apply_transaction(&osr, std::move(rmt)); + ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid)); + store->queue_transaction(ch, std::move(rmt)); finish_remove_pgs(store); return r; } @@ -448,16 +584,21 @@ int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info, coll_t coll(info.pgid); ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid()); map km; + string key_to_remove; pg_info_t last_written_info; - int ret = PG::_prepare_write_info( + int ret = prepare_info_keymap( g_ceph_context, - &km, epoch, + &km, &key_to_remove, + epoch, info, last_written_info, past_intervals, true, true, false); if (ret) cerr << "Failed to write info" << std::endl; t.omap_setkeys(coll, pgmeta_oid, km); + if (!key_to_remove.empty()) { + t.omap_rmkey(coll, pgmeta_oid, key_to_remove); + } return ret; } @@ -468,27 +609,211 @@ int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info, divergent_priors_t &divergent, pg_missing_t &missing) { + cout << __func__ << " epoch " << epoch << " info " << info << std::endl; int ret = write_info(t, epoch, info, past_intervals); if (ret) return ret; + coll_t coll(info.pgid); map km; - + const bool require_rollback = !info.pgid.is_no_shard(); if (!divergent.empty()) { - assert(missing.get_items().empty()); + ceph_assert(missing.get_items().empty()); PGLog::write_log_and_missing_wo_missing( - t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true, true); + t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, + require_rollback); } else { pg_missing_tracker_t tmissing(missing); bool rebuilt_missing_set_with_deletes = missing.may_include_deletes; PGLog::write_log_and_missing( - t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true, true, + t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, + require_rollback, &rebuilt_missing_set_with_deletes); } t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km); return 0; } +int do_trim_pg_log(ObjectStore *store, const coll_t &coll, + pg_info_t &info, const spg_t &pgid, + epoch_t map_epoch, + PastIntervals &past_intervals) +{ + ghobject_t oid = pgid.make_pgmeta_oid(); + struct stat st; + auto ch = store->open_collection(coll); + int r = store->stat(ch, oid, &st); + ceph_assert(r == 0); + ceph_assert(st.st_size == 0); + + cerr << "Log bounds are: " << "(" << info.log_tail << "," + << info.last_update << "]" << std::endl; + + uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries; + if (info.last_update.version - info.log_tail.version <= max_entries) { + cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl; + return 0; + } + + ceph_assert(info.last_update.version > max_entries); + version_t trim_to = info.last_update.version - max_entries; + size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max; + eversion_t new_tail; + bool done = false; + + while (!done) { + // gather keys so we can delete them in a batch without + // affecting the iterator + set keys_to_trim; + { + ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); + if (!p) + break; + for (p->seek_to_first(); p->valid(); p->next()) { + if (p->key()[0] == '_') + continue; + if (p->key() == "can_rollback_to") + continue; + if (p->key() == "divergent_priors") + continue; + if (p->key() == "rollback_info_trimmed_to") + continue; + if (p->key() == "may_include_deletes_in_missing") + continue; + if (p->key().substr(0, 7) == string("missing")) + continue; + if (p->key().substr(0, 4) == string("dup_")) + continue; + + bufferlist bl = p->value(); + auto bp = bl.cbegin(); + pg_log_entry_t e; + try { + e.decode_with_checksum(bp); + } catch (const buffer::error &e) { + cerr << "Error reading pg log entry: " << e.what() << std::endl; + } + if (debug) { + cerr << "read entry " << e << std::endl; + } + if (e.version.version > trim_to) { + done = true; + break; + } + keys_to_trim.insert(p->key()); + new_tail = e.version; + if (keys_to_trim.size() >= trim_at_once) + break; + } + + if (!p->valid()) + done = true; + } // deconstruct ObjectMapIterator + + // delete the keys + if (!dry_run && !keys_to_trim.empty()) { + cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl; + ObjectStore::Transaction t; + t.omap_rmkeys(coll, oid, keys_to_trim); + store->queue_transaction(ch, std::move(t)); + ch->flush(); + } + } + + // update pg info with new tail + if (!dry_run && new_tail != eversion_t()) { + info.log_tail = new_tail; + ObjectStore::Transaction t; + int ret = write_info(t, map_epoch, info, past_intervals); + if (ret) + return ret; + store->queue_transaction(ch, std::move(t)); + ch->flush(); + } + + // compact the db since we just removed a bunch of data + cerr << "Finished trimming, now compacting..." << std::endl; + if (!dry_run) + store->compact(); + return 0; +} + +int do_trim_pg_log_dups(ObjectStore *store, const coll_t &coll, + pg_info_t &info, const spg_t &pgid, + epoch_t map_epoch, + PastIntervals &past_intervals) +{ + ghobject_t oid = pgid.make_pgmeta_oid(); + struct stat st; + auto ch = store->open_collection(coll); + int r = store->stat(ch, oid, &st); + ceph_assert(r == 0); + ceph_assert(st.st_size == 0); + + const size_t max_dup_entries = g_ceph_context->_conf->osd_pg_log_dups_tracked; + ceph_assert(max_dup_entries > 0); + const size_t max_chunk_size = g_ceph_context->_conf->osd_pg_log_trim_max; + ceph_assert(max_chunk_size > 0); + + cout << "max_dup_entries=" << max_dup_entries + << " max_chunk_size=" << max_chunk_size << std::endl; + if (dry_run) { + cout << "Dry run enabled, so when many chunks are needed," + << " the trimming will never stop!" << std::endl; + } + + set keys_to_keep; + size_t num_removed = 0; + do { + set keys_to_trim; + { + ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid); + if (!p) + break; + for (p->seek_to_first(); p->valid(); p->next()) { + if (p->key()[0] == '_') + continue; + if (p->key() == "can_rollback_to") + continue; + if (p->key() == "divergent_priors") + continue; + if (p->key() == "rollback_info_trimmed_to") + continue; + if (p->key() == "may_include_deletes_in_missing") + continue; + if (p->key().substr(0, 7) == string("missing")) + continue; + if (p->key().substr(0, 4) != string("dup_")) + continue; + keys_to_keep.insert(p->key()); + if (keys_to_keep.size() > max_dup_entries) { + auto oldest_to_keep = keys_to_keep.begin(); + keys_to_trim.emplace(*oldest_to_keep); + keys_to_keep.erase(oldest_to_keep); + } + if (keys_to_trim.size() >= max_chunk_size) { + break; + } + } + } // deconstruct ObjectMapIterator + // delete the keys + num_removed = keys_to_trim.size(); + if (!dry_run && !keys_to_trim.empty()) { + cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl; + ObjectStore::Transaction t; + t.omap_rmkeys(coll, oid, keys_to_trim); + store->queue_transaction(ch, std::move(t)); + ch->flush(); + } + } while (num_removed == max_chunk_size); + + // compact the db since we just removed a bunch of data + cerr << "Finished trimming, now compacting..." << std::endl; + if (!dry_run) + store->compact(); + return 0; +} + const int OMAP_BATCH_SIZE = 25; void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map &oset) { @@ -504,7 +829,8 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj mysize_t total; footer ft; - int ret = store->stat(cid, obj, &st); + auto ch = store->open_collection(cid); + int ret = store->stat(ch, obj, &st); if (ret < 0) return ret; @@ -519,7 +845,7 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj { bufferptr bp; bufferlist bl; - ret = store->getattr(cid, obj, OI_ATTR, bp); + ret = store->getattr(ch, obj, OI_ATTR, bp); if (ret < 0) { cerr << "getattr failure object_info " << ret << std::endl; return ret; @@ -544,7 +870,7 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj if (len > total) len = total; - ret = store->read(cid, obj, offset, len, rawdatabl); + ret = store->read(ch, obj, offset, len, rawdatabl); if (ret < 0) return ret; if (ret == 0) @@ -562,8 +888,8 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj } //Handle attrs for this object - map aset; - ret = store->getattrs(cid, obj, aset); + map> aset; + ret = store->getattrs(ch, obj, aset); if (ret) return ret; attr_section as(aset); ret = write_section(TYPE_ATTRS, as, file_fd); @@ -576,7 +902,7 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj //Handle omap information bufferlist hdrbuf; - ret = store->omap_get_header(cid, obj, &hdrbuf, true); + ret = store->omap_get_header(ch, obj, &hdrbuf, true); if (ret < 0) { cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl; return ret; @@ -587,7 +913,7 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj if (ret) return ret; - ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(cid, obj); + ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj); if (!iter) { ret = -ENOENT; cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl; @@ -620,18 +946,18 @@ int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll) { ghobject_t next; - + auto ch = store->open_collection(coll); while (!next.is_max()) { vector objects; - int r = store->collection_list(coll, next, ghobject_t::get_max(), 300, + int r = store->collection_list(ch, next, ghobject_t::get_max(), 300, &objects, &next); if (r < 0) return r; for (vector::iterator i = objects.begin(); i != objects.end(); ++i) { - assert(!i->hobj.is_meta()); - if (i->is_pgmeta() || i->hobj.is_temp()) { + ceph_assert(!i->hobj.is_meta()); + if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) { continue; } r = export_file(store, coll, *i); @@ -642,10 +968,9 @@ int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll) return 0; } -int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, - ObjectStore::Sequencer &osr) { +int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) { OSDMap::Incremental inc; - bufferlist::iterator it = bl.begin(); + auto it = bl.cbegin(); inc.decode(it); if (e == 0) { e = inc.epoch; @@ -658,8 +983,9 @@ int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, return -EINVAL; } } + auto ch = store->open_collection(coll_t::meta()); const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e); - if (!store->exists(coll_t::meta(), inc_oid)) { + if (!store->exists(ch, inc_oid)) { cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl; if (!force) { return -ENOENT; @@ -671,18 +997,14 @@ int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, ObjectStore::Transaction t; t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl); t.truncate(coll_t::meta(), inc_oid, bl.length()); - int ret = store->apply_transaction(&osr, std::move(t)); - if (ret) { - cerr << "Failed to set inc-osdmap (" << inc_oid << "): " << ret << std::endl; - } else { - cout << "Wrote inc-osdmap." << inc.epoch << std::endl; - } - return ret; + store->queue_transaction(ch, std::move(t)); + return 0; } int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl) { - if (store->read(coll_t::meta(), + auto ch = store->open_collection(coll_t::meta()); + if (store->read(ch, OSD::get_inc_osdmap_pobject_name(e), 0, 0, bl) < 0) { return -ENOENT; @@ -690,8 +1012,7 @@ int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl) return 0; } -int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, - ObjectStore::Sequencer &osr) { +int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) { OSDMap osdmap; osdmap.decode(bl); if (e == 0) { @@ -705,8 +1026,9 @@ int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, return -EINVAL; } } + auto ch = store->open_collection(coll_t::meta()); const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e); - if (!store->exists(coll_t::meta(), full_oid)) { + if (!store->exists(ch, full_oid)) { cerr << "osdmap (" << full_oid << ") does not exist." << std::endl; if (!force) { return -ENOENT; @@ -718,19 +1040,15 @@ int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force, ObjectStore::Transaction t; t.write(coll_t::meta(), full_oid, 0, bl.length(), bl); t.truncate(coll_t::meta(), full_oid, bl.length()); - int ret = store->apply_transaction(&osr, std::move(t)); - if (ret) { - cerr << "Failed to set osdmap (" << full_oid << "): " << ret << std::endl; - } else { - cout << "Wrote osdmap." << osdmap.get_epoch() << std::endl; - } - return ret; + store->queue_transaction(ch, std::move(t)); + return 0; } int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl) { + ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta()); bool found = store->read( - coll_t::meta(), OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0; + ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0; if (!found) { cerr << "Can't find OSDMap for pg epoch " << e << std::endl; return -ENOENT; @@ -741,12 +1059,27 @@ int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl) return 0; } +int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h) +{ + ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta()); + bufferlist bl; + auto pghist = OSD::make_pg_num_history_oid(); + int r = store->read(ch, pghist, 0, 0, bl, 0); + if (r >= 0 && bl.length() > 0) { + auto p = bl.cbegin(); + decode(*h, p); + } + cout << __func__ << " pg_num_history " << *h << std::endl; + return 0; +} + int add_osdmap(ObjectStore *store, metadata_section &ms) { return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); } -int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, +int ObjectStoreTool::do_export( + CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, const OSDSuperblock& superblock, PastIntervals &past_intervals) @@ -754,15 +1087,15 @@ int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, PGLog::IndexedLog log; pg_missing_t missing; - cerr << "Exporting " << pgid << std::endl; + cerr << "Exporting " << pgid << " info " << info << std::endl; - int ret = get_log(fs, struct_ver, coll, pgid, info, log, missing); + int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing); if (ret > 0) return ret; if (debug) { Formatter *formatter = Formatter::create("json-pretty"); - assert(formatter); + ceph_assert(formatter); dump_log(formatter, cerr, log, missing); delete formatter; } @@ -806,10 +1139,25 @@ int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, return 0; } +int dump_data(Formatter *formatter, bufferlist &bl) +{ + auto ebliter = bl.cbegin(); + data_section ds; + ds.decode(ebliter); + + formatter->open_object_section("data_block"); + formatter->dump_unsigned("offset", ds.offset); + formatter->dump_unsigned("len", ds.len); + // XXX: Add option to dump data like od -cx ? + formatter->close_section(); + formatter->flush(cout); + return 0; +} + int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid, ObjectStore::Transaction *t, bufferlist &bl) { - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); data_section ds; ds.decode(ebliter); @@ -819,82 +1167,131 @@ int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid, return 0; } +int dump_attrs( + Formatter *formatter, ghobject_t hoid, + bufferlist &bl) +{ + auto ebliter = bl.cbegin(); + attr_section as; + as.decode(ebliter); + + // This could have been handled in the caller if we didn't need to + // support exports that didn't include object_info_t in object_begin. + if (hoid.generation == ghobject_t::NO_GEN && + hoid.hobj.is_head()) { + map::iterator mi = as.data.find(SS_ATTR); + if (mi != as.data.end()) { + SnapSet snapset; + auto p = mi->second.cbegin(); + snapset.decode(p); + formatter->open_object_section("snapset"); + snapset.dump(formatter); + formatter->close_section(); + } else { + formatter->open_object_section("snapset"); + formatter->dump_string("error", "missing SS_ATTR"); + formatter->close_section(); + } + } + + formatter->open_object_section("attrs"); + formatter->open_array_section("user"); + for (auto kv : as.data) { + // Skip system attributes + if (('_' != kv.first.at(0)) || kv.first.size() == 1) + continue; + formatter->open_object_section("user_attr"); + formatter->dump_string("name", kv.first.substr(1)); + bool b64; + formatter->dump_string("value", cleanbin(kv.second, b64)); + formatter->dump_bool("Base64", b64); + formatter->close_section(); + } + formatter->close_section(); + formatter->open_array_section("system"); + for (auto kv : as.data) { + // Skip user attributes + if (('_' == kv.first.at(0)) && kv.first.size() != 1) + continue; + formatter->open_object_section("sys_attr"); + formatter->dump_string("name", kv.first); + formatter->close_section(); + } + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + + return 0; +} + int get_attrs( ObjectStore *store, coll_t coll, ghobject_t hoid, ObjectStore::Transaction *t, bufferlist &bl, OSDriver &driver, SnapMapper &snap_mapper) { - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); attr_section as; as.decode(ebliter); + auto ch = store->open_collection(coll); if (debug) cerr << "\tattrs: len " << as.data.size() << std::endl; t->setattrs(coll, hoid, as.data); // This could have been handled in the caller if we didn't need to // support exports that didn't include object_info_t in object_begin. - if (hoid.generation == ghobject_t::NO_GEN) { - if (hoid.hobj.snap < CEPH_MAXSNAP) { - map::iterator mi = as.data.find(OI_ATTR); - if (mi != as.data.end()) { - object_info_t oi(mi->second); - - if (debug) - cerr << "object_info " << oi << std::endl; - - OSDriver::OSTransaction _t(driver.get_transaction(t)); - set oi_snaps(oi.legacy_snaps.begin(), oi.legacy_snaps.end()); - if (!oi_snaps.empty()) { + if (hoid.generation == ghobject_t::NO_GEN && + hoid.hobj.is_head()) { + map::iterator mi = as.data.find(SS_ATTR); + if (mi != as.data.end()) { + SnapSet snapset; + auto p = mi->second.cbegin(); + snapset.decode(p); + cout << "snapset " << snapset << std::endl; + for (auto& p : snapset.clone_snaps) { + ghobject_t clone = hoid; + clone.hobj.snap = p.first; + set snaps(p.second.begin(), p.second.end()); + if (!store->exists(ch, clone)) { + // no clone, skip. this is probably a cache pool. this works + // because we use a separate transaction per object and clones + // come before head in the archive. if (debug) - cerr << "\tsetting legacy snaps " << oi_snaps << std::endl; - snap_mapper.add_oid(hoid.hobj, oi_snaps, &_t); + cerr << "\tskipping missing " << clone << " (snaps " + << snaps << ")" << std::endl; + continue; } + if (debug) + cerr << "\tsetting " << clone.hobj << " snaps " << snaps + << std::endl; + OSDriver::OSTransaction _t(driver.get_transaction(t)); + ceph_assert(!snaps.empty()); + snap_mapper.add_oid(clone.hobj, snaps, &_t); } } else { - if (hoid.hobj.is_head()) { - map::iterator mi = as.data.find(SS_ATTR); - if (mi != as.data.end()) { - SnapSet snapset; - auto p = mi->second.begin(); - snapset.decode(p); - cout << "snapset " << snapset << std::endl; - if (!snapset.is_legacy()) { - for (auto& p : snapset.clone_snaps) { - ghobject_t clone = hoid; - clone.hobj.snap = p.first; - set snaps(p.second.begin(), p.second.end()); - if (!store->exists(coll, clone)) { - // no clone, skip. this is probably a cache pool. this works - // because we use a separate transaction per object and clones - // come before head in the archive. - if (debug) - cerr << "\tskipping missing " << clone << " (snaps " - << snaps << ")" << std::endl; - continue; - } - if (debug) - cerr << "\tsetting " << clone.hobj << " snaps " << snaps - << std::endl; - OSDriver::OSTransaction _t(driver.get_transaction(t)); - assert(!snaps.empty()); - snap_mapper.add_oid(clone.hobj, snaps, &_t); - } - } - } else { - cerr << "missing SS_ATTR on " << hoid << std::endl; - } - } + cerr << "missing SS_ATTR on " << hoid << std::endl; } } + return 0; +} + +int dump_omap_hdr(Formatter *formatter, bufferlist &bl) +{ + auto ebliter = bl.cbegin(); + omap_hdr_section oh; + oh.decode(ebliter); + formatter->open_object_section("omap_header"); + formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length())); + formatter->close_section(); + formatter->flush(cout); return 0; } int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid, ObjectStore::Transaction *t, bufferlist &bl) { - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); omap_hdr_section oh; oh.decode(ebliter); @@ -905,10 +1302,33 @@ int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid, return 0; } +int dump_omap(Formatter *formatter, bufferlist &bl) +{ + auto ebliter = bl.cbegin(); + omap_section os; + os.decode(ebliter); + + formatter->open_object_section("omaps"); + formatter->dump_unsigned("count", os.omap.size()); + formatter->open_array_section("data"); + for (auto o : os.omap) { + formatter->open_object_section("omap"); + formatter->dump_string("name", o.first); + bool b64; + formatter->dump_string("value", cleanbin(o.second, b64)); + formatter->dump_bool("Base64", b64); + formatter->close_section(); + } + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + return 0; +} + int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid, ObjectStore::Transaction *t, bufferlist &bl) { - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); omap_section os; os.decode(ebliter); @@ -918,34 +1338,98 @@ int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid, return 0; } -int ObjectStoreTool::get_object(ObjectStore *store, coll_t coll, - bufferlist &bl, OSDMap &curmap, - bool *skipped_objects, - ObjectStore::Sequencer &osr) +int ObjectStoreTool::dump_object(Formatter *formatter, + bufferlist &bl) +{ + auto ebliter = bl.cbegin(); + object_begin ob; + ob.decode(ebliter); + + if (ob.hoid.hobj.is_temp()) { + cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl; + return -EFAULT; + } + + formatter->open_object_section("object"); + formatter->open_object_section("oid"); + ob.hoid.dump(formatter); + formatter->close_section(); + formatter->open_object_section("object_info"); + ob.oi.dump(formatter); + formatter->close_section(); + + bufferlist ebl; + bool done = false; + while(!done) { + sectiontype_t type; + int ret = read_section(&type, &ebl); + if (ret) + return ret; + + //cout << "\tdo_object: Section type " << hex << type << dec << std::endl; + //cout << "\t\tsection size " << ebl.length() << std::endl; + if (type >= END_OF_TYPES) { + cout << "Skipping unknown object section type" << std::endl; + continue; + } + switch(type) { + case TYPE_DATA: + if (dry_run) break; + ret = dump_data(formatter, ebl); + if (ret) return ret; + break; + case TYPE_ATTRS: + if (dry_run) break; + ret = dump_attrs(formatter, ob.hoid, ebl); + if (ret) return ret; + break; + case TYPE_OMAP_HDR: + if (dry_run) break; + ret = dump_omap_hdr(formatter, ebl); + if (ret) return ret; + break; + case TYPE_OMAP: + if (dry_run) break; + ret = dump_omap(formatter, ebl); + if (ret) return ret; + break; + case TYPE_OBJECT_END: + done = true; + break; + default: + cerr << "Unknown section type " << type << std::endl; + return -EFAULT; + } + } + formatter->close_section(); + return 0; +} + +int ObjectStoreTool::get_object(ObjectStore *store, + OSDriver& driver, + SnapMapper& mapper, + coll_t coll, + bufferlist &bl, OSDMap &origmap, + bool *skipped_objects) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); object_begin ob; ob.decode(ebliter); - OSDriver driver( - store, - coll_t(), - OSD::make_snapmapper_oid()); - spg_t pg; - coll.is_pg_prefix(&pg); - SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard); if (ob.hoid.hobj.is_temp()) { cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl; return -EFAULT; } - assert(g_ceph_context); + ceph_assert(g_ceph_context); + + auto ch = store->open_collection(coll); if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) { object_t oid = ob.hoid.hobj.oid; object_locator_t loc(ob.hoid.hobj); - pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc); - pg_t pgid = curmap.raw_pg_to_pg(raw_pgid); + pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc); + pg_t pgid = origmap.raw_pg_to_pg(raw_pgid); spg_t coll_pgid; if (coll.is_pg(&coll_pgid) == false) { @@ -1014,34 +1498,27 @@ int ObjectStoreTool::get_object(ObjectStore *store, coll_t coll, return -EFAULT; } } - if (!dry_run) - store->apply_transaction(&osr, std::move(*t)); + if (!dry_run) { + wait_until_done(t, [&] { + store->queue_transaction(ch, std::move(*t)); + ch->flush(); + }); + } return 0; } -int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms, - const OSDSuperblock& sb, OSDMap& curmap, spg_t pgid) +int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms) { - bufferlist::iterator ebliter = bl.begin(); + auto ebliter = bl.cbegin(); ms.decode(ebliter); - spg_t old_pgid = ms.info.pgid; - ms.info.pgid = pgid; -#if DIAGNOSTIC - Formatter *formatter = new JSONFormatter(true); - cout << "export pgid " << old_pgid << std::endl; - cout << "struct_v " << (int)ms.struct_ver << std::endl; - cout << "map epoch " << ms.map_epoch << std::endl; + formatter->open_object_section("metadata_section"); - formatter->open_object_section("importing OSDMap"); - ms.osdmap.dump(formatter); - formatter->close_section(); - formatter->flush(cout); - cout << std::endl; + formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver); + formatter->dump_unsigned("map_epoch", ms.map_epoch); - cout << "osd current epoch " << sb.current_epoch << std::endl; - formatter->open_object_section("current OSDMap"); - curmap.dump(formatter); + formatter->open_object_section("OSDMap"); + ms.osdmap.dump(formatter); formatter->close_section(); formatter->flush(cout); cout << std::endl; @@ -1050,17 +1527,71 @@ int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms, ms.info.dump(formatter); formatter->close_section(); formatter->flush(cout); - cout << std::endl; formatter->open_object_section("log"); ms.log.dump(formatter); formatter->close_section(); formatter->flush(cout); - cout << std::endl; + formatter->open_object_section("pg_missing_t"); + ms.missing.dump(formatter); + formatter->close_section(); + + // XXX: ms.past_intervals? + + formatter->close_section(); formatter->flush(cout); - cout << std::endl; + + if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) { + cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl; + return -EFAULT; + } + + return 0; +} + +int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms, + const OSDSuperblock& sb, spg_t pgid) +{ + auto ebliter = bl.cbegin(); + ms.decode(ebliter); + spg_t old_pgid = ms.info.pgid; + ms.info.pgid = pgid; + + if (debug) { + cout << "export pgid " << old_pgid << std::endl; + cout << "struct_v " << (int)ms.struct_ver << std::endl; + cout << "map epoch " << ms.map_epoch << std::endl; + +#ifdef DIAGNOSTIC + Formatter *formatter = new JSONFormatter(true); + formatter->open_object_section("stuff"); + + formatter->open_object_section("importing OSDMap"); + ms.osdmap.dump(formatter); + formatter->close_section(); + formatter->flush(cout); + cout << std::endl; + + cout << "osd current epoch " << sb.current_epoch << std::endl; + + formatter->open_object_section("info"); + ms.info.dump(formatter); + formatter->close_section(); + formatter->flush(cout); + cout << std::endl; + + formatter->open_object_section("log"); + ms.log.dump(formatter); + formatter->close_section(); + formatter->flush(cout); + cout << std::endl; + + formatter->close_section(); + formatter->flush(cout); + cout << std::endl; #endif + } if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) { cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl; @@ -1074,71 +1605,29 @@ int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms, return -EINVAL; } - // Pool verified to exist for call to get_pg_num(). - unsigned new_pg_num = curmap.get_pg_num(pgid.pgid.pool()); - - if (pgid.pgid.ps() >= new_pg_num) { - cerr << "Illegal pgid, the seed is larger than current pg_num" << std::endl; - return -EINVAL; - } - - // Old exports didn't include OSDMap, see if we have a copy locally + // Old exports didn't include OSDMap if (ms.osdmap.get_epoch() == 0) { - OSDMap findmap; - bufferlist findmap_bl; - int ret = get_osdmap(store, ms.map_epoch, findmap, findmap_bl); - if (ret == 0) { - ms.osdmap.deepish_copy_from(findmap); - } else { - cerr << "WARNING: No OSDMap in old export," - " some objects may be ignored due to a split" << std::endl; - } - } - - // Make sure old_pg_num is 0 in the unusual case that OSDMap not in export - // nor can we find a local copy. - unsigned old_pg_num = 0; - if (ms.osdmap.get_epoch() != 0) - old_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool()); - - if (debug) { - cerr << "old_pg_num " << old_pg_num << std::endl; - cerr << "new_pg_num " << new_pg_num << std::endl; - cerr << ms.osdmap << std::endl; - cerr << curmap << std::endl; + cerr << "WARNING: No OSDMap in old export, this is an ancient export." + " Not supported." << std::endl; + return -EINVAL; } - // If we have managed to have a good OSDMap we can do these checks - if (old_pg_num) { - if (old_pgid.pgid.ps() >= old_pg_num) { - cerr << "FATAL: pgid invalid for original map epoch" << std::endl; - return -EFAULT; - } - if (pgid.pgid.ps() >= old_pg_num) { - cout << "NOTICE: Post split pgid specified" << std::endl; - } else { - spg_t parent(pgid); - if (parent.is_split(old_pg_num, new_pg_num, NULL)) { - cerr << "WARNING: Split occurred, some objects may be ignored" << std::endl; - } + if (ms.osdmap.get_epoch() < sb.oldest_map) { + cerr << "PG export's map " << ms.osdmap.get_epoch() + << " is older than OSD's oldest_map " << sb.oldest_map << std::endl; + if (!force) { + cerr << " pass --force to proceed anyway (with incomplete PastIntervals)" + << std::endl; + return -EINVAL; } } - if (debug) { cerr << "Import pgid " << ms.info.pgid << std::endl; - cerr << "Clearing past_intervals " << ms.past_intervals << std::endl; - cerr << "Zero same_interval_since " << ms.info.history.same_interval_since << std::endl; + cerr << "Previous past_intervals " << ms.past_intervals << std::endl; + cerr << "history.same_interval_since " + << ms.info.history.same_interval_since << std::endl; } - // Let osd recompute past_intervals and same_interval_since - ms.past_intervals.clear(); - ms.info.history.same_interval_since = 0; - - if (debug) - cerr << "Changing pg epoch " << ms.map_epoch << " to " << sb.current_epoch << std::endl; - - ms.map_epoch = sb.current_epoch; - return 0; } @@ -1177,9 +1666,108 @@ void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap, } } +int ObjectStoreTool::dump_export(Formatter *formatter) +{ + bufferlist ebl; + pg_info_t info; + PGLog::IndexedLog log; + //bool skipped_objects = false; + + int ret = read_super(); + if (ret) + return ret; + + if (sh.magic != super_header::super_magic) { + cerr << "Invalid magic number" << std::endl; + return -EFAULT; + } + + if (sh.version > super_header::super_ver) { + cerr << "Can't handle export format version=" << sh.version << std::endl; + return -EINVAL; + } + + formatter->open_object_section("Export"); + + //First section must be TYPE_PG_BEGIN + sectiontype_t type; + ret = read_section(&type, &ebl); + if (ret) + return ret; + if (type == TYPE_POOL_BEGIN) { + cerr << "Dump of pool exports not supported" << std::endl; + return -EINVAL; + } else if (type != TYPE_PG_BEGIN) { + cerr << "Invalid first section type " << std::to_string(type) << std::endl; + return -EFAULT; + } + + auto ebliter = ebl.cbegin(); + pg_begin pgb; + pgb.decode(ebliter); + spg_t pgid = pgb.pgid; + + formatter->dump_string("pgid", stringify(pgid)); + formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid)); + formatter->dump_string("features", stringify(pgb.superblock.compat_features)); + + bool done = false; + bool found_metadata = false; + metadata_section ms; + bool objects_started = false; + while(!done) { + ret = read_section(&type, &ebl); + if (ret) + return ret; + + if (debug) { + cerr << "dump_export: Section type " << std::to_string(type) << std::endl; + } + if (type >= END_OF_TYPES) { + cerr << "Skipping unknown section type" << std::endl; + continue; + } + switch(type) { + case TYPE_OBJECT_BEGIN: + if (!objects_started) { + formatter->open_array_section("objects"); + objects_started = true; + } + ret = dump_object(formatter, ebl); + if (ret) return ret; + break; + case TYPE_PG_METADATA: + if (objects_started) + cerr << "WARNING: metadata_section out of order" << std::endl; + ret = dump_pg_metadata(formatter, ebl, ms); + if (ret) return ret; + found_metadata = true; + break; + case TYPE_PG_END: + if (objects_started) { + formatter->close_section(); + } + done = true; + break; + default: + cerr << "Unknown section type " << std::to_string(type) << std::endl; + return -EFAULT; + } + } + + if (!found_metadata) { + cerr << "Missing metadata section" << std::endl; + return -EFAULT; + } + + formatter->close_section(); + formatter->flush(cout); + + return 0; +} + int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, - bool force, std::string pgidstr, - ObjectStore::Sequencer &osr) + bool force, std::string pgidstr) { bufferlist ebl; pg_info_t info; @@ -1212,40 +1800,25 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, cerr << "Pool exports cannot be imported into a PG" << std::endl; return -EINVAL; } else if (type != TYPE_PG_BEGIN) { - cerr << "Invalid first section type " << type << std::endl; + cerr << "Invalid first section type " << std::to_string(type) << std::endl; return -EFAULT; } - bufferlist::iterator ebliter = ebl.begin(); + auto ebliter = ebl.cbegin(); pg_begin pgb; pgb.decode(ebliter); spg_t pgid = pgb.pgid; - spg_t orig_pgid = pgid; if (pgidstr.length()) { spg_t user_pgid; bool ok = user_pgid.parse(pgidstr.c_str()); // This succeeded in main() already - assert(ok); + ceph_assert(ok); if (pgid != user_pgid) { - if (pgid.pool() != user_pgid.pool()) { - cerr << "Can't specify a different pgid pool, must be " << pgid.pool() << std::endl; - return -EINVAL; - } - if (pgid.is_no_shard() && !user_pgid.is_no_shard()) { - cerr << "Can't specify a sharded pgid with a non-sharded export" << std::endl; - return -EINVAL; - } - // Get shard from export information if not specified - if (!pgid.is_no_shard() && user_pgid.is_no_shard()) { - user_pgid.shard = pgid.shard; - } - if (pgid.shard != user_pgid.shard) { - cerr << "Can't specify a different shard, must be " << pgid.shard << std::endl; - return -EINVAL; - } - pgid = user_pgid; + cerr << "specified pgid " << user_pgid + << " does not match actual pgid " << pgid << std::endl; + return -EINVAL; } } @@ -1260,7 +1833,7 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, cerr << "Exported features: " << pgb.superblock.compat_features << std::endl; } - // Special case: Old export has SHARDS incompat feature on replicated pg, remove it + // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it if (pgid.is_no_shard()) pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS); @@ -1274,12 +1847,12 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, return 11; // Positive return means exit status } - // Don't import if pool no longer exists + // we need the latest OSDMap to check for collisions OSDMap curmap; bufferlist bl; ret = get_osdmap(store, sb.current_epoch, curmap, bl); if (ret) { - cerr << "Can't find local OSDMap" << std::endl; + cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl; return ret; } if (!curmap.have_pg_pool(pgid.pgid.m_pool)) { @@ -1288,35 +1861,27 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, return 10; // Positive return means exit status } + pool_pg_num_history_t pg_num_history; + get_pg_num_history(store, &pg_num_history); + ghobject_t pgmeta_oid = pgid.make_pgmeta_oid(); - log_oid = OSD::make_pg_log_oid(pgid); - biginfo_oid = OSD::make_pg_biginfo_oid(pgid); - //Check for PG already present. + // Check for PG already present. coll_t coll(pgid); if (store->collection_exists(coll)) { cerr << "pgid " << pgid << " already exists" << std::endl; return -EEXIST; } - if (!dry_run) { - ObjectStore::Transaction t; - PG::_create(t, pgid, - pgid.get_split_bits(curmap.get_pg_pool(pgid.pool())->get_pg_num())); - PG::_init(t, pgid, NULL); - - // mark this coll for removal until we're done - map values; - ::encode((char)1, values["_remove"]); - t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values); + ObjectStore::CollectionHandle ch; - store->apply_transaction(&osr, std::move(t)); - } + OSDriver driver( + store, + coll_t(), + OSD::make_snapmapper_oid()); + SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard); cout << "Importing pgid " << pgid; - if (orig_pgid != pgid) { - cout << " exported as " << orig_pgid; - } cout << std::endl; bool done = false; @@ -1327,26 +1892,102 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, if (ret) return ret; - //cout << "do_import: Section type " << hex << type << dec << std::endl; + if (debug) { + cout << __func__ << ": Section type " << std::to_string(type) << std::endl; + } if (type >= END_OF_TYPES) { cout << "Skipping unknown section type" << std::endl; continue; } switch(type) { case TYPE_OBJECT_BEGIN: - ret = get_object(store, coll, ebl, curmap, &skipped_objects, osr); + ceph_assert(found_metadata); + ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap, + &skipped_objects); if (ret) return ret; break; case TYPE_PG_METADATA: - ret = get_pg_metadata(store, ebl, ms, sb, curmap, pgid); + ret = get_pg_metadata(store, ebl, ms, sb, pgid); if (ret) return ret; found_metadata = true; + + if (pgid != ms.info.pgid) { + cerr << "specified pgid " << pgid << " does not match import file pgid " + << ms.info.pgid << std::endl; + return -EINVAL; + } + + // make sure there are no conflicting splits or merges + if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) { + auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool); + if (p != pg_num_history.pg_nums.end() && + !p->second.empty()) { + unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool()); + unsigned pg_num = start_pg_num; + for (auto q = p->second.lower_bound(ms.map_epoch); + q != p->second.end(); + ++q) { + unsigned new_pg_num = q->second; + cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num + << " -> " << new_pg_num << std::endl; + + // check for merge target + spg_t target; + if (pgid.is_merge_source(pg_num, new_pg_num, &target)) { + // FIXME: this checks assumes the OSD's PG is at the OSD's + // map epoch; it could be, say, at *our* epoch, pre-merge. + coll_t coll(target); + if (store->collection_exists(coll)) { + cerr << "pgid " << pgid << " merges to target " << target + << " which already exists" << std::endl; + return 12; + } + } + + // check for split children + set children; + if (pgid.is_split(start_pg_num, new_pg_num, &children)) { + cerr << " children are " << children << std::endl; + for (auto child : children) { + coll_t coll(child); + if (store->collection_exists(coll)) { + cerr << "pgid " << pgid << " splits to " << children + << " and " << child << " exists" << std::endl; + return 12; + } + } + } + pg_num = new_pg_num; + } + } + } else { + cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking" + << " for splits or mergers" << std::endl; + } + + if (!dry_run) { + ObjectStore::Transaction t; + ch = store->create_new_collection(coll); + create_pg_collection( + t, pgid, + pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num())); + init_pg_ondisk(t, pgid, NULL); + + // mark this coll for removal until we're done + map values; + encode((char)1, values["_remove"]); + t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values); + + store->queue_transaction(ch, std::move(t)); + } + break; case TYPE_PG_END: + ceph_assert(found_metadata); done = true; break; default: - cerr << "Unknown section type " << type << std::endl; + cerr << "Unknown section type " << std::to_string(type) << std::endl; return -EFAULT; } } @@ -1359,7 +2000,7 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, ObjectStore::Transaction t; if (!dry_run) { pg_log_t newlog, reject; - pg_log_t::filter_log(pgid, curmap, g_ceph_context->_conf->osd_hit_set_namespace, + pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace, ms.log, newlog, reject); if (debug) { for (list::iterator i = newlog.log.begin(); @@ -1371,7 +2012,7 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, } divergent_priors_t newdp, rejectdp; - filter_divergent_priors(pgid, curmap, g_ceph_context->_conf->osd_hit_set_namespace, + filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace, ms.divergent_priors, newdp, rejectdp); ms.divergent_priors = newdp; if (debug) { @@ -1386,11 +2027,11 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, ms.missing.filter_objects([&](const hobject_t &obj) { if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace) return false; - assert(!obj.is_temp()); + ceph_assert(!obj.is_temp()); object_t oid = obj.oid; object_locator_t loc(obj); - pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc); - pg_t _pgid = curmap.raw_pg_to_pg(raw_pgid); + pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc); + pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid); return pgid.pgid != _pgid; }); @@ -1423,12 +2064,13 @@ int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb, cerr << "done, clearing removal flag" << std::endl; if (!dry_run) { - set remove; - remove.insert("_remove"); - t.omap_rmkeys(coll, pgid.make_pgmeta_oid(), remove); - store->apply_transaction(&osr, std::move(t)); + t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove"); + wait_until_done(&t, [&] { + store->queue_transaction(ch, std::move(t)); + // make sure we flush onreadable items before mapper/driver are destroyed. + ch->flush(); + }); } - return 0; } @@ -1449,6 +2091,23 @@ int do_list(ObjectStore *store, string pgidstr, string object, boost::optional 0) { + r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug); + } else { + r = action_on_all_objects(store, lookup, debug); + } + if (r) + return r; + lookup.dump(formatter, human_readable); + formatter->flush(cout); + return 0; +} + int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable) { int r; @@ -1462,27 +2121,38 @@ int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, return 0; } +enum rmtype { + BOTH, + SNAPMAP, + NOSNAPMAP +}; + int remove_object(coll_t coll, ghobject_t &ghobj, SnapMapper &mapper, MapCacher::Transaction *_t, - ObjectStore::Transaction *t) + ObjectStore::Transaction *t, + enum rmtype type) { - int r = mapper.remove_oid(ghobj.hobj, _t); - if (r < 0 && r != -ENOENT) { - cerr << "remove_oid returned " << cpp_strerror(r) << std::endl; - return r; + if (type == BOTH || type == SNAPMAP) { + int r = mapper.remove_oid(ghobj.hobj, _t); + if (r < 0 && r != -ENOENT) { + cerr << "remove_oid returned " << cpp_strerror(r) << std::endl; + return r; + } } - t->remove(coll, ghobj); + if (type == BOTH || type == NOSNAPMAP) { + t->remove(coll, ghobj); + } return 0; } int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent); int do_remove_object(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, bool all, bool force, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, bool all, bool force, enum rmtype type) { + auto ch = store->open_collection(coll); spg_t pg; coll.is_pg_prefix(&pg); OSDriver driver( @@ -1492,7 +2162,7 @@ int do_remove_object(ObjectStore *store, coll_t coll, SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard); struct stat st; - int r = store->stat(coll, ghobj, &st); + int r = store->stat(ch, ghobj, &st); if (r < 0) { cerr << "remove: " << cpp_strerror(r) << std::endl; return r; @@ -1503,16 +2173,20 @@ int do_remove_object(ObjectStore *store, coll_t coll, r = get_snapset(store, coll, ghobj, ss, false); if (r < 0) { cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl; - return r; + // If --force and bad snapset let them remove the head + if (!(force && !all)) + return r; } - if (!ss.snaps.empty() && !all) { +// cout << "snapset " << ss << std::endl; + if (!ss.clone_snaps.empty() && !all) { if (force) { cout << "WARNING: only removing " << (ghobj.hobj.is_head() ? "head" : "snapdir") - << " with snapshots present" << std::endl; - ss.snaps.clear(); + << " with clones present" << std::endl; + ss.clone_snaps.clear(); } else { - cerr << "Snapshots are present, use removeall to delete everything" << std::endl; + cerr << "Clones are present, use removeall to delete everything" + << std::endl; return -EINVAL; } } @@ -1521,36 +2195,39 @@ int do_remove_object(ObjectStore *store, coll_t coll, ObjectStore::Transaction t; OSDriver::OSTransaction _t(driver.get_transaction(&t)); - cout << "remove " << ghobj << std::endl; - - if (!dry_run) { - r = remove_object(coll, ghobj, mapper, &_t, &t); - if (r < 0) - return r; - } - ghobject_t snapobj = ghobj; - for (vector::iterator i = ss.snaps.begin() ; - i != ss.snaps.end() ; ++i) { - snapobj.hobj.snap = *i; - cout << "remove " << snapobj << std::endl; + for (auto& p : ss.clone_snaps) { + snapobj.hobj.snap = p.first; + cout << "remove clone " << snapobj << std::endl; if (!dry_run) { - r = remove_object(coll, snapobj, mapper, &_t, &t); + r = remove_object(coll, snapobj, mapper, &_t, &t, type); if (r < 0) return r; } } - if (!dry_run) - store->apply_transaction(&osr, std::move(t)); + cout << "remove " << ghobj << std::endl; + if (!dry_run) { + r = remove_object(coll, ghobj, mapper, &_t, &t, type); + if (r < 0) + return r; + } + + if (!dry_run) { + wait_until_done(&t, [&] { + store->queue_transaction(ch, std::move(t)); + ch->flush(); + }); + } return 0; } int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj) { - map aset; - int r = store->getattrs(coll, ghobj, aset); + auto ch = store->open_collection(coll); + map> aset; + int r = store->getattrs(ch, ghobj, aset); if (r < 0) { cerr << "getattrs: " << cpp_strerror(r) << std::endl; return r; @@ -1567,7 +2244,8 @@ int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj) int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj) { - ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(coll, ghobj); + auto ch = store->open_collection(coll); + ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj); if (!iter) { cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl; return -ENOENT; @@ -1589,10 +2267,11 @@ int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj) int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd) { + auto ch = store->open_collection(coll); struct stat st; mysize_t total; - int ret = store->stat(coll, ghobj, &st); + int ret = store->stat(ch, ghobj, &st); if (ret < 0) { cerr << "get-bytes: " << cpp_strerror(ret) << std::endl; return ret; @@ -1610,7 +2289,7 @@ int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd) if (len > total) len = total; - ret = store->read(coll, ghobj, offset, len, rawdatabl); + ret = store->read(ch, ghobj, offset, len, rawdatabl); if (ret < 0) return ret; if (ret == 0) @@ -1633,8 +2312,7 @@ int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd) } int do_set_bytes(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, int fd, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, int fd) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1666,19 +2344,21 @@ int do_set_bytes(ObjectStore *store, coll_t coll, t->write(coll, ghobj, offset, bytes, rawdatabl); offset += bytes; - // XXX: Should we apply_transaction() every once in a while for very large files + // XXX: Should we queue_transaction() every once in a while for very large files } while(true); + auto ch = store->open_collection(coll); if (!dry_run) - store->apply_transaction(&osr, std::move(*t)); + store->queue_transaction(ch, std::move(*t)); return 0; } int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key) { + auto ch = store->open_collection(coll); bufferptr bp; - int r = store->getattr(coll, ghobj, key.c_str(), bp); + int r = store->getattr(ch, ghobj, key.c_str(), bp); if (r < 0) { cerr << "getattr: " << cpp_strerror(r) << std::endl; return r; @@ -1695,8 +2375,7 @@ int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key) } int do_set_attr(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, string key, int fd, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, string key, int fd) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1716,13 +2395,13 @@ int do_set_attr(ObjectStore *store, coll_t coll, t->setattr(coll, ghobj, key, bl); - store->apply_transaction(&osr, std::move(*t)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(*t)); return 0; } int do_rm_attr(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, string key, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, string key) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1735,18 +2414,20 @@ int do_rm_attr(ObjectStore *store, coll_t coll, t->rmattr(coll, ghobj, key); - store->apply_transaction(&osr, std::move(*t)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(*t)); return 0; } int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key) { + auto ch = store->open_collection(coll); set keys; map out; keys.insert(key); - int r = store->omap_get_values(coll, ghobj, keys, &out); + int r = store->omap_get_values(ch, ghobj, keys, &out); if (r < 0) { cerr << "omap_get_values: " << cpp_strerror(r) << std::endl; return r; @@ -1757,7 +2438,7 @@ int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key) return -ENOENT; } - assert(out.size() == 1); + ceph_assert(out.size() == 1); bufferlist bl = out.begin()->second; string value(bl.c_str(), bl.length()); @@ -1771,8 +2452,7 @@ int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key) } int do_set_omap(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, string key, int fd, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, string key, int fd) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1795,19 +2475,16 @@ int do_set_omap(ObjectStore *store, coll_t coll, t->omap_setkeys(coll, ghobj, attrset); - store->apply_transaction(&osr, std::move(*t)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(*t)); return 0; } int do_rm_omap(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, string key, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, string key) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; - set keys; - - keys.insert(key); if (debug) cerr << "Rm_omap " << ghobj << std::endl; @@ -1815,17 +2492,19 @@ int do_rm_omap(ObjectStore *store, coll_t coll, if (dry_run) return 0; - t->omap_rmkeys(coll, ghobj, keys); + t->omap_rmkey(coll, ghobj, key); - store->apply_transaction(&osr, std::move(*t)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(*t)); return 0; } int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj) { + auto ch = store->open_collection(coll); bufferlist hdrbl; - int r = store->omap_get_header(coll, ghobj, &hdrbl, true); + int r = store->omap_get_header(ch, ghobj, &hdrbl, true); if (r < 0) { cerr << "omap_get_header: " << cpp_strerror(r) << std::endl; return r; @@ -1842,8 +2521,7 @@ int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj) } int do_set_omaphdr(ObjectStore *store, coll_t coll, - ghobject_t &ghobj, int fd, - ObjectStore::Sequencer &osr) + ghobject_t &ghobj, int fd) { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; @@ -1863,16 +2541,13 @@ int do_set_omaphdr(ObjectStore *store, coll_t coll, t->omap_setheader(coll, ghobj, hdrbl); - store->apply_transaction(&osr, std::move(*t)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(*t)); return 0; } struct do_fix_lost : public action_on_object_t { - ObjectStore::Sequencer *osr; - - explicit do_fix_lost(ObjectStore::Sequencer *_osr) : osr(_osr) {} - - int call(ObjectStore *store, coll_t coll, + void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override { if (oi.is_lost()) { cout << coll << "/" << ghobj << " is lost"; @@ -1880,37 +2555,33 @@ struct do_fix_lost : public action_on_object_t { cout << ", fixing"; cout << std::endl; if (dry_run) - return 0; + return; oi.clear_flag(object_info_t::FLAG_LOST); bufferlist bl; - ::encode(oi, bl, -1); /* fixme: using full features */ + encode(oi, bl, -1); /* fixme: using full features */ ObjectStore::Transaction t; t.setattr(coll, ghobj, OI_ATTR, bl); - int r = store->apply_transaction(osr, std::move(t)); - if (r < 0) { - cerr << "Error getting fixing attr on : " << make_pair(coll, ghobj) - << ", " - << cpp_strerror(r) << std::endl; - return r; - } + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(t)); } - return 0; + return; } }; int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false) { + auto ch = store->open_collection(coll); bufferlist attr; - int r = store->getattr(coll, ghobj, SS_ATTR, attr); + int r = store->getattr(ch, ghobj, SS_ATTR, attr); if (r < 0) { if (!silent) cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; return r; } - bufferlist::iterator bp = attr.begin(); + auto bp = attr.cbegin(); try { - ::decode(ss, bp); + decode(ss, bp); } catch (...) { r = -EINVAL; cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", " @@ -1922,6 +2593,7 @@ int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter) { + auto ch = store->open_collection(coll); int r = 0; formatter->open_object_section("obj"); formatter->open_object_section("id"); @@ -1929,16 +2601,16 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter formatter->close_section(); bufferlist attr; - int gr = store->getattr(coll, ghobj, OI_ATTR, attr); + int gr = store->getattr(ch, ghobj, OI_ATTR, attr); if (gr < 0) { r = gr; cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; } else { object_info_t oi; - bufferlist::iterator bp = attr.begin(); + auto bp = attr.cbegin(); try { - ::decode(oi, bp); + decode(oi, bp); formatter->open_object_section("info"); oi.dump(formatter); formatter->close_section(); @@ -1949,7 +2621,7 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter } } struct stat st; - int sr = store->stat(coll, ghobj, &st, true); + int sr = store->stat(ch, ghobj, &st, true); if (sr < 0) { r = sr; cerr << "Error stat on : " << make_pair(coll, ghobj) << ", " @@ -1974,30 +2646,87 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter formatter->close_section(); } } + bufferlist hattr; + gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr); + if (gr == 0) { + ECUtil::HashInfo hinfo; + auto hp = hattr.cbegin(); + try { + decode(hinfo, hp); + formatter->open_object_section("hinfo"); + hinfo.dump(formatter); + formatter->close_section(); + } catch (...) { + r = -EINVAL; + cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + } + } + gr = store->dump_onode(ch, ghobj, "onode", formatter); + formatter->close_section(); formatter->flush(cout); cout << std::endl; return r; } -int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter, - ObjectStore::Sequencer &osr) +int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter) +{ + auto ch = store->open_collection(coll); + bufferlist attr; + int r = store->getattr(ch, ghobj, OI_ATTR, attr); + if (r < 0) { + cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + object_info_t oi; + auto bp = attr.cbegin(); + try { + decode(oi, bp); + } catch (...) { + r = -EINVAL; + cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + if (!dry_run) { + attr.clear(); + oi.alloc_hint_flags += 0xff; + ObjectStore::Transaction t; + encode(oi, attr, -1); /* fixme: using full features */ + t.setattr(coll, ghobj, OI_ATTR, attr); + auto ch = store->open_collection(coll); + r = store->queue_transaction(ch, std::move(t)); + if (r < 0) { + cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + } + return 0; +} + +int set_size( + ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter, + bool corrupt) { + auto ch = store->open_collection(coll); if (ghobj.hobj.is_snapdir()) { cerr << "Can't set the size of a snapdir" << std::endl; return -EINVAL; } bufferlist attr; - int r = store->getattr(coll, ghobj, OI_ATTR, attr); + int r = store->getattr(ch, ghobj, OI_ATTR, attr); if (r < 0) { cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; return r; } object_info_t oi; - bufferlist::iterator bp = attr.begin(); + auto bp = attr.cbegin(); try { - ::decode(oi, bp); + decode(oi, bp); } catch (...) { r = -EINVAL; cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " @@ -2005,7 +2734,7 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz return r; } struct stat st; - r = store->stat(coll, ghobj, &st, true); + r = store->stat(ch, ghobj, &st, true); if (r < 0) { cerr << "Error stat on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; @@ -2055,17 +2784,59 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz if (!dry_run) { attr.clear(); oi.size = setsize; - ::encode(oi, attr, -1); /* fixme: using full features */ ObjectStore::Transaction t; + // Only modify object info if we want to corrupt it + if (!corrupt && (uint64_t)st.st_size != setsize) { + t.truncate(coll, ghobj, setsize); + // Changing objectstore size will invalidate data_digest, so clear it. + oi.clear_data_digest(); + } + encode(oi, attr, -1); /* fixme: using full features */ t.setattr(coll, ghobj, OI_ATTR, attr); - t.truncate(coll, ghobj, setsize); if (is_snap) { bufferlist snapattr; snapattr.clear(); - ::encode(ss, snapattr); + encode(ss, snapattr); t.setattr(coll, head, SS_ATTR, snapattr); } - r = store->apply_transaction(&osr, std::move(t)); + auto ch = store->open_collection(coll); + r = store->queue_transaction(ch, std::move(t)); + if (r < 0) { + cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + } + return 0; +} + +int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) { + auto ch = store->open_collection(coll); + bufferlist attr; + int r = store->getattr(ch, ghobj, OI_ATTR, attr); + if (r < 0) { + cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + object_info_t oi; + auto bp = attr.cbegin(); + try { + decode(oi, bp); + } catch (...) { + r = -EINVAL; + cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", " + << cpp_strerror(r) << std::endl; + return r; + } + if (!dry_run) { + attr.clear(); + oi.clear_data_digest(); + encode(oi, attr, -1); /* fixme: using full features */ + ObjectStore::Transaction t; + t.setattr(coll, ghobj, OI_ATTR, attr); + auto ch = store->open_collection(coll); + r = store->queue_transaction(ch, std::move(t)); if (r < 0) { cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; @@ -2076,28 +2847,20 @@ int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsiz } int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, - string arg, ObjectStore::Sequencer &osr) + string arg) { SnapSet ss; int ret = get_snapset(store, coll, ghobj, ss); if (ret < 0) return ret; - // Use "head" to set head_exists incorrectly - if (arg == "corrupt" || arg == "head") - ss.head_exists = !ghobj.hobj.is_head(); - else if (ss.head_exists != ghobj.hobj.is_head()) { - cerr << "Correcting head_exists, set to " - << (ghobj.hobj.is_head() ? "true" : "false") << std::endl; - ss.head_exists = ghobj.hobj.is_head(); - } // Use "corrupt" to clear entire SnapSet // Use "seq" to just corrupt SnapSet.seq if (arg == "corrupt" || arg == "seq") ss.seq = 0; - // Use "snaps" to just clear SnapSet.snaps + // Use "snaps" to just clear SnapSet.clone_snaps if (arg == "corrupt" || arg == "snaps") - ss.snaps.clear(); + ss.clone_snaps.clear(); // By default just clear clone, clone_overlap and clone_size if (arg == "corrupt") arg = ""; @@ -2116,10 +2879,11 @@ int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, if (!dry_run) { bufferlist bl; - ::encode(ss, bl); + encode(ss, bl); ObjectStore::Transaction t; t.setattr(coll, ghobj, SS_ATTR, bl); - int r = store->apply_transaction(&osr, std::move(t)); + auto ch = store->open_collection(coll); + int r = store->queue_transaction(ch, std::move(t)); if (r < 0) { cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; @@ -2164,8 +2928,8 @@ int remove_from(T &mv, string name, snapid_t cloneid, bool force) return 0; } -int remove_clone(ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force, - ObjectStore::Sequencer &osr) +int remove_clone( + ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force) { // XXX: Don't allow this if in a cache tier or former cache tier // bool allow_incomplete_clones() const { @@ -2214,10 +2978,11 @@ int remove_clone(ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cl return 0; bufferlist bl; - ::encode(snapset, bl); + encode(snapset, bl); ObjectStore::Transaction t; t.setattr(coll, ghobj, SS_ATTR, bl); - int r = store->apply_transaction(&osr, std::move(t)); + auto ch = store->open_collection(coll); + int r = store->queue_transaction(ch, std::move(t)); if (r < 0) { cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", " << cpp_strerror(r) << std::endl; @@ -2233,7 +2998,6 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) cout << "dup from " << src->get_type() << ": " << srcpath << "\n" << " to " << dst->get_type() << ": " << dstpath << std::endl; - ObjectStore::Sequencer osr("dup"); int num, i; vector collections; int r; @@ -2278,16 +3042,22 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) i = 1; for (auto cid : collections) { cout << i++ << "/" << num << " " << cid << std::endl; + auto ch = src->open_collection(cid); + auto dch = dst->create_new_collection(cid); { ObjectStore::Transaction t; - int bits = src->collection_bits(cid); + int bits = src->collection_bits(ch); if (bits < 0) { - cerr << "cannot get bit count for collection " << cid << ": " - << cpp_strerror(bits) << std::endl; - goto out; + if (src->get_type() == "filestore" && cid.is_meta()) { + bits = 0; + } else { + cerr << "cannot get bit count for collection " << cid << ": " + << cpp_strerror(bits) << std::endl; + goto out; + } } t.create_collection(cid, bits); - dst->apply_transaction(&osr, std::move(t)); + dst->queue_transaction(dch, std::move(t)); } ghobject_t pos; @@ -2295,7 +3065,7 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) uint64_t bytes = 0, keys = 0; while (true) { vector ls; - r = src->collection_list(cid, pos, ghobject_t::get_max(), 1000, &ls, &pos); + r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos); if (r < 0) { cerr << "collection_list on " << cid << " from " << pos << " got: " << cpp_strerror(r) << std::endl; @@ -2318,14 +3088,14 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) ObjectStore::Transaction t; t.touch(cid, oid); - map attrs; - src->getattrs(cid, oid, attrs); + map> attrs; + src->getattrs(ch, oid, attrs); if (!attrs.empty()) { t.setattrs(cid, oid, attrs); } bufferlist bl; - src->read(cid, oid, 0, 0, bl); + src->read(ch, oid, 0, 0, bl); if (bl.length()) { t.write(cid, oid, 0, bl.length(), bl); bytes += bl.length(); @@ -2333,7 +3103,7 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) bufferlist header; map omap; - src->omap_get(cid, oid, &header, &omap); + src->omap_get(ch, oid, &header, &omap); if (header.length()) { t.omap_setheader(cid, oid, header); ++keys; @@ -2343,7 +3113,7 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) t.omap_setkeys(cid, oid, omap); } - dst->apply_transaction(&osr, std::move(t)); + dst->queue_transaction(dch, std::move(t)); } } cout << " " << std::setw(16) << n << " objects, " @@ -2388,6 +3158,136 @@ int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst) return r; } + +const int ceph_entity_name_type(const string name) +{ + if (name == "mds") return CEPH_ENTITY_TYPE_MDS; + if (name == "osd") return CEPH_ENTITY_TYPE_OSD; + if (name == "mon") return CEPH_ENTITY_TYPE_MON; + if (name == "client") return CEPH_ENTITY_TYPE_CLIENT; + if (name == "mgr") return CEPH_ENTITY_TYPE_MGR; + if (name == "auth") return CEPH_ENTITY_TYPE_AUTH; + return -1; +} + +eversion_t get_eversion_from_str(const string& s) { + eversion_t e; + vector result; + boost::split(result, s, boost::is_any_of("'")); + if (result.size() != 2) { + cerr << "eversion_t: invalid format: '" << s << "'" << std::endl; + return e; + } + e.epoch = atoi(result[0].c_str()); + e.version = atoi(result[1].c_str()); + return e; +} + +osd_reqid_t get_reqid_from_str(const string& s) { + osd_reqid_t reqid; + + vector result; + boost::split(result, s, boost::is_any_of(".:")); + if (result.size() != 4) { + cerr << "reqid: invalid format " << s << std::endl; + return osd_reqid_t(); + } + reqid.name._type = ceph_entity_name_type(result[0]); + reqid.name._num = atoi(result[1].c_str()); + + reqid.inc = atoi(result[2].c_str()); + reqid.tid = atoi(result[3].c_str()); + return reqid; +} + +void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map *new_dups) +{ + ObjectStore::Transaction t; + coll_t coll(r_pgid); + cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl; + t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups)); + auto ch = store->open_collection(coll); + store->queue_transaction(ch, std::move(t)); + new_dups->clear(); +} + +int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj, + map *new_dups, bool debug) { + std::map::const_iterator it = in_json_obj.find("generate"); + int32_t generate = 0; + if (it != in_json_obj.end()) { + generate = atoi(it->second.get_str().c_str()); + } + + it = in_json_obj.find("reqid"); + if (it == in_json_obj.end()) { + return 1; + } + osd_reqid_t reqid(get_reqid_from_str(it->second.get_str())); + it = in_json_obj.find("version"); + if (it == in_json_obj.end()) { + return 1; + } + eversion_t version(get_eversion_from_str(it->second.get_str())); + it = in_json_obj.find("user_version"); + if (it == in_json_obj.end()) { + return 1; + } + version_t user_version = atoi(it->second.get_str().c_str()); + it = in_json_obj.find("return_code"); + if (it == in_json_obj.end()) { + return 1; + } + int32_t return_code = atoi(it->second.get_str().c_str()); + if (generate) { + for(auto i = 0; i < generate; ++i) { + version.version++; + if (debug) { + cout << "generate dups reqid " << reqid << " v=" << version << std::endl; + } + pg_log_dup_t tmp(version, user_version, reqid, return_code); + bufferlist bl; + encode(tmp, bl); + (*new_dups)[tmp.get_key_name()] = std::move(bl); + if ( new_dups->size() > 50000 ) { + do_dups_inject_transction(store, r_pgid, new_dups); + cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl; + } + } + return 0; + } else { + pg_log_dup_t tmp(version, user_version, reqid, return_code); + if (debug) { + cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl; + } + bufferlist bl; + encode(tmp, bl); + (*new_dups)[tmp.get_key_name()] = std::move(bl); + } + return 0; +} + +void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug) +{ + map new_dups; + const vector& o = inJson.get_array(); + for (const auto& obj : o) { + if (obj.type() == json_spirit::obj_type) { + json_spirit::mObject Mobj = obj.get_obj(); + do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug); + } else { + throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type())); + return; + } + } + if (new_dups.size() > 0) { + do_dups_inject_transction(store, r_pgid, &new_dups); + } + + + return ; +} + void usage(po::options_description &desc) { cerr << std::endl; @@ -2405,6 +3305,7 @@ void usage(po::options_description &desc) cerr << "ceph-objectstore-tool ... remove|removeall" << std::endl; cerr << "ceph-objectstore-tool ... dump" << std::endl; cerr << "ceph-objectstore-tool ... set-size" << std::endl; + cerr << "ceph-objectstore-tool ... clear-data-digest" << std::endl; cerr << "ceph-objectstore-tool ... remove-clone-metadata " << std::endl; cerr << std::endl; cerr << " can be a JSON object description as displayed" << std::endl; @@ -2439,7 +3340,8 @@ int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio) } int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock, - const string &pool_name, const spg_t &pgid, bool dry_run) + const string &pool_name, const spg_t &pgid, bool dry_run, + int target_level) { int r = 0; @@ -2489,7 +3391,7 @@ int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock, cerr << "Would apply layout settings to " << coll << std::endl; } else { cerr << "Finished " << done << "/" << total << " collections" << "\r"; - r = fs->apply_layout_settings(coll); + r = fs->apply_layout_settings(coll, target_level); if (r < 0) { cerr << "Error applying layout settings to " << coll << std::endl; return r; @@ -2506,36 +3408,36 @@ int main(int argc, char **argv) { string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object; string target_data_path, fsid; - string objcmd, arg1, arg2, type, format, argnspace, pool; + string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr; boost::optional nspace; spg_t pgid; unsigned epoch = 0; + unsigned slow_threshold = 16; ghobject_t ghobj; bool human_readable; - bool force; Formatter *formatter; - bool head; + bool head, tty; po::options_description desc("Allowed options"); desc.add_options() ("help", "produce help message") ("type", po::value(&type), - "Arg is one of [bluestore, filestore (default), memstore]") + "Arg is one of [bluestore (default), filestore, memstore]") ("data-path", po::value(&dpath), "path to object store, mandatory") ("journal-path", po::value(&jpath), "path to journal, use if tool can't find it") ("pgid", po::value(&pgidstr), - "PG id, mandatory for info, log, remove, export, rm-past-intervals, mark-complete, and mandatory for apply-layout-settings if --pool is not specified") + "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups and mandatory for apply-layout-settings if --pool is not specified") ("pool", po::value(&pool), "Pool name, mandatory for apply-layout-settings if --pgid is not specified") ("op", po::value(&op), - "Arg is one of [info, log, remove, mkfs, fsck, fuse, dup, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, " - "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, apply-layout-settings, update-mon-db]") + "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]") ("epoch", po::value(&epoch), "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified") ("file", po::value(&file), - "path of file to export, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap") + "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap") ("mon-store-path", po::value(&mon_store_path), "path of monstore to update-mon-db") ("fsid", po::value(&fsid), @@ -2547,12 +3449,18 @@ int main(int argc, char **argv) ("format", po::value(&format)->default_value("json-pretty"), "Output format which may be json, json-pretty, xml, xml-pretty") ("debug", "Enable diagnostic output to stderr") + ("no-mon-config", "Do not contact mons for config") + ("no-superblock", "Do not read superblock") ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE") ("skip-journal-replay", "Disable journal replay") ("skip-mount-omap", "Disable mounting of omap") ("head", "Find head/snapdir when searching for objects by name") ("dry-run", "Don't modify the objectstore") + ("tty", "Treat stdout as a tty (no binary data)") ("namespace", po::value(&argnspace), "Specify namespace when searching for objects") + ("rmtype", po::value(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY") + ("slow-omap-threshold", po::value(&slow_threshold), + "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)") ; po::options_description positional("Positional options"); @@ -2561,16 +3469,16 @@ int main(int argc, char **argv) ("objcmd", po::value(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]") ("arg1", po::value(&arg1), "arg1 based on cmd") ("arg2", po::value(&arg2), "arg2 based on cmd") - ("test-align", po::value(&testalign)->default_value(0), "hidden align option for testing") ; - po::options_description all("All options"); + po::options_description all; all.add(desc).add(positional); po::positional_options_description pd; pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1); vector ceph_option_strings; + po::variables_map vm; try { po::parsed_options parsed = @@ -2585,27 +3493,26 @@ int main(int argc, char **argv) } if (vm.count("help")) { - usage(all); + usage(desc); return 1; } - if (!vm.count("debug")) { - debug = false; - } else { - debug = true; - } + // Compatibility with previous option name + if (op == "dump-import") + op = "dump-export"; - if (!vm.count("force")) { - force = false; - } else { - force = true; - } + debug = (vm.count("debug") > 0); + + force = (vm.count("force") > 0); + + no_superblock = (vm.count("no-superblock") > 0); if (vm.count("namespace")) nspace = argnspace; - if (vm.count("dry-run")) - dry_run = true; + dry_run = (vm.count("dry-run") > 0); + tty = (vm.count("tty") > 0); + osflagbits_t flags = 0; if (dry_run || vm.count("skip-journal-replay")) flags |= SKIP_JOURNAL_REPLAY; @@ -2613,10 +3520,31 @@ int main(int argc, char **argv) flags |= SKIP_MOUNT_OMAP; if (op == "update-mon-db") flags |= SKIP_JOURNAL_REPLAY; + head = (vm.count("head") > 0); + // infer osd id so we can authenticate + char fn[PATH_MAX]; + snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str()); + int fd = ::open(fn, O_RDONLY); + if (fd >= 0) { + bufferlist bl; + bl.read_fd(fd, 64); + string s(bl.c_str(), bl.length()); + int whoami = atoi(s.c_str()); + vector tmp; + // identify ourselves as this osd so we can auth and fetch our configs + tmp.push_back("-n"); + tmp.push_back(string("osd.") + stringify(whoami)); + // populate osd_data so that the default keyring location works + tmp.push_back("--osd-data"); + tmp.push_back(dpath); + tmp.insert(tmp.end(), ceph_option_strings.begin(), + ceph_option_strings.end()); + tmp.swap(ceph_option_strings); + } + vector ceph_options; - env_to_vec(ceph_options); ceph_options.reserve(ceph_options.size() + ceph_option_strings.size()); for (vector::iterator i = ceph_option_strings.begin(); i != ceph_option_strings.end(); @@ -2624,9 +3552,8 @@ int main(int argc, char **argv) ceph_options.push_back(i->c_str()); } - char fn[PATH_MAX]; snprintf(fn, sizeof(fn), "%s/type", dpath.c_str()); - int fd = ::open(fn, O_RDONLY); + fd = ::open(fn, O_RDONLY); if (fd >= 0) { bufferlist bl; bl.read_fd(fd, 64); @@ -2640,10 +3567,12 @@ int main(int argc, char **argv) } ::close(fd); } + if (!vm.count("type") && type == "") { - type = "filestore"; + type = "bluestore"; } if (!vm.count("data-path") && + op != "dump-export" && !(op == "dump-journal" && type == "filestore")) { cerr << "Must provide --data-path" << std::endl; usage(desc); @@ -2657,7 +3586,7 @@ int main(int argc, char **argv) usage(desc); return 1; } - if (op != "list" && + if (op != "list" && op != "apply-layout-settings" && vm.count("op") && vm.count("object")) { cerr << "Can't specify both --op and object command syntax" << std::endl; usage(desc); @@ -2669,7 +3598,7 @@ int main(int argc, char **argv) usage(desc); return 1; } - if (op != "list" && vm.count("object") && !vm.count("objcmd")) { + if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) { cerr << "Invalid syntax, missing command" << std::endl; usage(desc); return 1; @@ -2679,10 +3608,10 @@ int main(int argc, char **argv) usage(desc); return 1; } - outistty = isatty(STDOUT_FILENO); + outistty = isatty(STDOUT_FILENO) || tty; file_fd = fd_none; - if ((op == "export" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) { + if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) { if (!vm.count("file") || file == "-") { if (outistty) { cerr << "stdout is a tty and no --file filename specified" << std::endl; @@ -2692,7 +3621,7 @@ int main(int argc, char **argv) } else { file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); } - } else if (op == "import" || op == "set-osdmap" || op == "set-inc-osdmap") { + } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") { if (!vm.count("file") || file == "-") { if (isatty(STDIN_FILENO)) { cerr << "stdin is a tty and no --file filename specified" << std::endl; @@ -2707,7 +3636,7 @@ int main(int argc, char **argv) ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run); if (vm.count("file") && file_fd == fd_none && !dry_run) { - cerr << "--file option only applies to import, export, " + cerr << "--file option only applies to import, dump-export, export, export-remove, " << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl; return 1; } @@ -2717,18 +3646,22 @@ int main(int argc, char **argv) perror(err.c_str()); return 1; } + int init_flags = 0; + if (vm.count("no-mon-config") > 0) { + init_flags |= CINIT_FLAG_NO_MON_CONFIG; + } auto cct = global_init( - NULL, ceph_options, CEPH_ENTITY_TYPE_OSD, - CODE_ENVIRONMENT_UTILITY_NODOUT, 0); - //CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + NULL, ceph_options, + CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_UTILITY_NODOUT, + init_flags); common_init_finish(g_ceph_context); - g_conf = g_ceph_context->_conf; if (debug) { - g_conf->set_val_or_die("log_to_stderr", "true"); - g_conf->set_val_or_die("err_to_stderr", "true"); + g_conf().set_val_or_die("log_to_stderr", "true"); + g_conf().set_val_or_die("err_to_stderr", "true"); } - g_conf->apply_changes(NULL); + g_conf().apply_changes(nullptr); // Special list handling. Treating pretty_format as human readable, // with one object per line and not an enclosing array. @@ -2746,7 +3679,7 @@ int main(int argc, char **argv) // Special handling for filestore journal, so we can dump it without mounting if (op == "dump-journal" && type == "filestore") { - int ret = mydump_journal(formatter, jpath, g_conf->journal_dio); + int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio); if (ret < 0) { cerr << "journal-path: " << jpath << ": " << cpp_strerror(ret) << std::endl; @@ -2756,6 +3689,16 @@ int main(int argc, char **argv) return 0; } + if (op == "dump-export") { + int ret = tool.dump_export(formatter); + if (ret < 0) { + cerr << "dump-export: " + << cpp_strerror(ret) << std::endl; + return 1; + } + return 0; + } + //Verify that data-path really exists struct stat st; if (::stat(dpath.c_str(), &st) == -1) { @@ -2764,13 +3707,27 @@ int main(int argc, char **argv) return 1; } - if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) { + if (pgidstr.length() && pgidstr != "meta" && !pgid.parse(pgidstr.c_str())) { cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl; return 1; } - ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags); - if (fs == NULL) { + //Verify that the journal-path really exists + if (type == "filestore") { + if (::stat(jpath.c_str(), &st) == -1) { + string err = string("journal-path: ") + jpath; + perror(err.c_str()); + return 1; + } + if (S_ISDIR(st.st_mode)) { + cerr << "journal-path: " << jpath << ": " + << cpp_strerror(EISDIR) << std::endl; + return 1; + } + } + + std::unique_ptr fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags); + if (!fs) { cerr << "Unable to create store of type " << type << std::endl; return 1; } @@ -2782,10 +3739,23 @@ int main(int argc, char **argv) return 1; } if (r > 0) { - cerr << "fsck found " << r << " errors" << std::endl; + cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl; + return 1; + } + cout << "fsck success" << std::endl; + return 0; + } + if (op == "repair" || op == "repair-deep") { + int r = fs->repair(op == "repair-deep"); + if (r < 0) { + cerr << "repair failed: " << cpp_strerror(r) << std::endl; + return 1; + } + if (r > 0) { + cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl; return 1; } - cout << "fsck found no errors" << std::endl; + cout << "repair success" << std::endl; return 0; } if (op == "mkfs") { @@ -2800,7 +3770,7 @@ int main(int argc, char **argv) } int r = fs->mkfs(); if (r < 0) { - cerr << "fsck failed: " << cpp_strerror(r) << std::endl; + cerr << "mkfs failed: " << cpp_strerror(r) << std::endl; return 1; } return 0; @@ -2820,14 +3790,14 @@ int main(int argc, char **argv) target_type = string(bl.c_str(), bl.length() - 1); // drop \n } ::close(fd); - ObjectStore *targetfs = ObjectStore::create( + unique_ptr targetfs = ObjectStore::create( g_ceph_context, target_type, target_data_path, "", 0); - if (targetfs == NULL) { + if (!targetfs) { cerr << "Unable to open store of type " << target_type << std::endl; return 1; } - int r = dup(dpath, fs, target_data_path, targetfs); + int r = dup(dpath, fs.get(), target_data_path, targetfs.get()); if (r < 0) { cerr << "dup failed: " << cpp_strerror(r) << std::endl; return 1; @@ -2835,7 +3805,6 @@ int main(int argc, char **argv) return 0; } - ObjectStore::Sequencer *osr = new ObjectStore::Sequencer(__func__); int ret = fs->mount(); if (ret < 0) { if (ret == -EBUSY) { @@ -2848,9 +3817,10 @@ int main(int argc, char **argv) if (op == "fuse") { #ifdef HAVE_LIBFUSE - FuseStore fuse(fs, mountpoint); + FuseStore fuse(fs.get(), mountpoint); cout << "mounting fuse at " << mountpoint << " ..." << std::endl; int r = fuse.main(); + fs->umount(); if (r < 0) { cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl; return 1; @@ -2872,35 +3842,51 @@ int main(int argc, char **argv) #endif bufferlist bl; - OSDSuperblock superblock; - bufferlist::iterator p; - ret = fs->read(coll_t::meta(), OSD_SUPERBLOCK_GOBJECT, 0, 0, bl); - if (ret < 0) { - cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl; - goto out; - } + auto ch = fs->open_collection(coll_t::meta()); + std::unique_ptr superblock; + if (!no_superblock) { + superblock.reset(new OSDSuperblock); + bufferlist::const_iterator p; + ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl); + if (ret < 0) { + cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl; + goto out; + } - p = bl.begin(); - ::decode(superblock, p); + p = bl.cbegin(); + decode(*superblock, p); - if (debug) { - cerr << "Cluster fsid=" << superblock.cluster_fsid << std::endl; - } + if (debug) { + cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl; + } - if (debug) { - cerr << "Supported features: " << supported << std::endl; - cerr << "On-disk features: " << superblock.compat_features << std::endl; - } - if (supported.compare(superblock.compat_features) == -1) { - CompatSet unsupported = supported.unsupported(superblock.compat_features); - cerr << "On-disk OSD incompatible features set " - << unsupported << std::endl; - ret = -EINVAL; - goto out; + if (debug) { + cerr << "Supported features: " << supported << std::endl; + cerr << "On-disk features: " << superblock->compat_features << std::endl; + } + if (supported.compare(superblock->compat_features) == -1) { + CompatSet unsupported = supported.unsupported(superblock->compat_features); + cerr << "On-disk OSD incompatible features set " + << unsupported << std::endl; + ret = -EINVAL; + goto out; + } } if (op == "apply-layout-settings") { - ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run); + int target_level = 0; + // Single positional argument with apply-layout-settings + // for target_level. + if (vm.count("object") && isdigit(object[0])) { + target_level = atoi(object.c_str()); + // This requires --arg1 to be specified since + // this is the third positional argument and normally + // used with object operations. + } else if (vm.count("arg1") && isdigit(arg1[0])) { + target_level = atoi(arg1.c_str()); + } + ceph_assert(superblock != nullptr); + ret = apply_layout_settings(fs.get(), *superblock, pool, pgid, dry_run, target_level); goto out; } @@ -2918,7 +3904,13 @@ int main(int argc, char **argv) if (vm.count("objcmd") && (objcmd == "remove-clone-metadata")) head = true; lookup_ghobject lookup(object, nspace, head); - if (action_on_all_objects(fs, lookup, debug)) { + if (pgidstr == "meta") + ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t::meta(), lookup, debug); + else if (pgidstr.length()) + ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), lookup, debug); + else + ret = action_on_all_objects(fs.get(), lookup, debug); + if (ret) { throw std::runtime_error("Internal error"); } else { if (lookup.size() != 1) { @@ -2950,7 +3942,7 @@ int main(int argc, char **argv) throw std::runtime_error(ss.str()); } vector::iterator i = array.begin(); - assert(i != array.end()); + ceph_assert(i != array.end()); if (i->type() != json_spirit::str_type) { ss << "Object '" << object << "' must be a JSON array with the first element a string"; @@ -2988,6 +3980,14 @@ int main(int argc, char **argv) ret = 1; goto out; } + if (pgidstr != "meta") { + auto ch = fs->open_collection(coll_t(pgid)); + if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) { + stringstream ss; + ss << "object " << ghobj << " not contained by pg " << pgid; + throw std::runtime_error(ss.str()); + } + } } } catch (std::runtime_error& e) { cerr << e.what() << std::endl; @@ -3000,7 +4000,10 @@ int main(int argc, char **argv) // The ops which require --pgid option are checked here and // mentioned in the usage for --pgid. if ((op == "info" || op == "log" || op == "remove" || op == "export" - || op == "rm-past-intervals" || op == "mark-complete") && + || op == "export-remove" || op == "mark-complete" + || op == "reset-last-complete" + || op == "trim-pg-log" + || op == "pg-log-inject-dups") && pgidstr.length() == 0) { cerr << "Must provide pgid" << std::endl; usage(desc); @@ -3009,9 +4012,9 @@ int main(int argc, char **argv) } if (op == "import") { - + ceph_assert(superblock != nullptr); try { - ret = tool.do_import(fs, superblock, force, pgidstr, *osr); + ret = tool.do_import(fs.get(), *superblock, force, pgidstr); } catch (const buffer::error &e) { cerr << "do_import threw exception error " << e.what() << std::endl; @@ -3040,9 +4043,10 @@ int main(int argc, char **argv) bufferlist bl; OSDMap osdmap; if (epoch == 0) { - epoch = superblock.current_epoch; + ceph_assert(superblock != nullptr); + epoch = superblock->current_epoch; } - ret = get_osdmap(fs, epoch, osdmap, bl); + ret = get_osdmap(fs.get(), epoch, osdmap, bl); if (ret) { cerr << "Failed to get osdmap#" << epoch << ": " << cpp_strerror(ret) << std::endl; @@ -3061,15 +4065,16 @@ int main(int argc, char **argv) if (ret < 0) { cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl; } else { - ret = set_osdmap(fs, epoch, bl, force, *osr); + ret = set_osdmap(fs.get(), epoch, bl, force); } goto out; } else if (op == "get-inc-osdmap") { bufferlist bl; if (epoch == 0) { - epoch = superblock.current_epoch; + ceph_assert(superblock != nullptr); + epoch = superblock->current_epoch; } - ret = get_inc_osdmap(fs, epoch, bl); + ret = get_inc_osdmap(fs.get(), epoch, bl); if (ret < 0) { cerr << "Failed to get incremental osdmap# " << epoch << ": " << cpp_strerror(ret) << std::endl; @@ -3089,7 +4094,7 @@ int main(int argc, char **argv) cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl; goto out; } else { - ret = set_inc_osdmap(fs, epoch, bl, force, *osr); + ret = set_inc_osdmap(fs.get(), epoch, bl, force); } goto out; } else if (op == "update-mon-db") { @@ -3097,16 +4102,19 @@ int main(int argc, char **argv) cerr << "Please specify the path to monitor db to update" << std::endl; ret = -EINVAL; } else { - ret = update_mon_db(*fs, superblock, dpath + "/keyring", mon_store_path); + ceph_assert(superblock != nullptr); + ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path); } goto out; } - log_oid = OSD::make_pg_log_oid(pgid); - biginfo_oid = OSD::make_pg_biginfo_oid(pgid); - if (op == "remove") { - ret = initiate_new_remove_pg(fs, pgid, *osr); + if (!force && !dry_run) { + cerr << "Please use export-remove or you must use --force option" << std::endl; + ret = -EINVAL; + goto out; + } + ret = initiate_new_remove_pg(fs.get(), pgid); if (ret < 0) { cerr << "PG '" << pgid << "' not found" << std::endl; goto out; @@ -3117,34 +4125,58 @@ int main(int argc, char **argv) if (op == "fix-lost") { boost::scoped_ptr action; - action.reset(new do_fix_lost(osr)); + action.reset(new do_fix_lost()); if (pgidstr.length()) - ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), *action, debug); + ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), *action, debug); else - ret = action_on_all_objects(fs, *action, debug); + ret = action_on_all_objects(fs.get(), *action, debug); goto out; } if (op == "list") { - ret = do_list(fs, pgidstr, object, nspace, formatter, debug, + ret = do_list(fs.get(), pgidstr, object, nspace, formatter, debug, human_readable, head); if (ret < 0) { cerr << "do_list failed: " << cpp_strerror(ret) << std::endl; } goto out; } + if (op == "list-slow-omap") { + ret = do_list_slow(fs.get(), pgidstr, object, slow_threshold, formatter, debug, + human_readable); + if (ret < 0) { + cerr << "do_list failed: " << cpp_strerror(ret) << std::endl; + } + goto out; + } if (op == "dump-super") { + ceph_assert(superblock != nullptr); formatter->open_object_section("superblock"); - superblock.dump(formatter); + superblock->dump(formatter); formatter->close_section(); formatter->flush(cout); cout << std::endl; goto out; } + if (op == "statfs") { + store_statfs_t statsbuf; + ret = fs->statfs(&statsbuf); + if (ret < 0) { + cerr << "error from statfs: " << cpp_strerror(ret) << std::endl; + goto out; + } + formatter->open_object_section("statfs"); + statsbuf.dump(formatter); + formatter->close_section(); + formatter->flush(cout); + cout << std::endl; + goto out; + } + if (op == "meta-list") { - ret = do_meta(fs, object, formatter, debug, human_readable); + ret = do_meta(fs.get(), object, formatter, debug, human_readable); if (ret < 0) { cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl; } @@ -3198,9 +4230,9 @@ int main(int argc, char **argv) // If not an object command nor any of the ops handled below, then output this usage // before complaining about a bad pgid - if (!vm.count("objcmd") && op != "export" && op != "info" && op != "log" && op != "rm-past-intervals" && op != "mark-complete") { - cerr << "Must provide --op (info, log, remove, mkfs, fsck, export, import, list, fix-lost, list-pgs, rm-past-intervals, dump-journal, dump-super, meta-list, " - "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete)" + if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") { + cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)" << std::endl; usage(desc); ret = 1; @@ -3216,13 +4248,18 @@ int main(int argc, char **argv) ret = 0; if (objcmd == "remove" || objcmd == "removeall") { bool all = (objcmd == "removeall"); - ret = do_remove_object(fs, coll, ghobj, all, force, *osr); + enum rmtype type = BOTH; + if (rmtypestr == "nosnapmap") + type = NOSNAPMAP; + else if (rmtypestr == "snapmap") + type = SNAPMAP; + ret = do_remove_object(fs.get(), coll, ghobj, all, force, type); goto out; } else if (objcmd == "list-attrs") { - ret = do_list_attrs(fs, coll, ghobj); + ret = do_list_attrs(fs.get(), coll, ghobj); goto out; } else if (objcmd == "list-omap") { - ret = do_list_omap(fs, coll, ghobj); + ret = do_list_omap(fs.get(), coll, ghobj); goto out; } else if (objcmd == "get-bytes" || objcmd == "set-bytes") { if (objcmd == "get-bytes") { @@ -3237,7 +4274,7 @@ int main(int argc, char **argv) goto out; } } - ret = do_get_bytes(fs, coll, ghobj, fd); + ret = do_get_bytes(fs.get(), coll, ghobj, fd); if (fd != STDOUT_FILENO) close(fd); } else { @@ -3258,7 +4295,7 @@ int main(int argc, char **argv) goto out; } } - ret = do_set_bytes(fs, coll, ghobj, fd, *osr); + ret = do_set_bytes(fs.get(), coll, ghobj, fd); if (fd != STDIN_FILENO) close(fd); } @@ -3269,7 +4306,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = do_get_attr(fs, coll, ghobj, arg1); + ret = do_get_attr(fs.get(), coll, ghobj, arg1); goto out; } else if (objcmd == "set-attr") { if (vm.count("arg1") == 0) { @@ -3294,7 +4331,7 @@ int main(int argc, char **argv) goto out; } } - ret = do_set_attr(fs, coll, ghobj, arg1, fd, *osr); + ret = do_set_attr(fs.get(), coll, ghobj, arg1, fd); if (fd != STDIN_FILENO) close(fd); goto out; @@ -3304,7 +4341,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = do_rm_attr(fs, coll, ghobj, arg1, *osr); + ret = do_rm_attr(fs.get(), coll, ghobj, arg1); goto out; } else if (objcmd == "get-omap") { if (vm.count("arg1") == 0) { @@ -3312,7 +4349,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = do_get_omap(fs, coll, ghobj, arg1); + ret = do_get_omap(fs.get(), coll, ghobj, arg1); goto out; } else if (objcmd == "set-omap") { if (vm.count("arg1") == 0) { @@ -3337,7 +4374,7 @@ int main(int argc, char **argv) goto out; } } - ret = do_set_omap(fs, coll, ghobj, arg1, fd, *osr); + ret = do_set_omap(fs.get(), coll, ghobj, arg1, fd); if (fd != STDIN_FILENO) close(fd); goto out; @@ -3347,7 +4384,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = do_rm_omap(fs, coll, ghobj, arg1, *osr); + ret = do_rm_omap(fs.get(), coll, ghobj, arg1); goto out; } else if (objcmd == "get-omaphdr") { if (vm.count("arg1")) { @@ -3355,7 +4392,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = do_get_omaphdr(fs, coll, ghobj); + ret = do_get_omaphdr(fs.get(), coll, ghobj); goto out; } else if (objcmd == "set-omaphdr") { // Extra arg @@ -3381,7 +4418,7 @@ int main(int argc, char **argv) goto out; } } - ret = do_set_omaphdr(fs, coll, ghobj, fd, *osr); + ret = do_set_omaphdr(fs.get(), coll, ghobj, fd); if (fd != STDIN_FILENO) close(fd); goto out; @@ -3392,9 +4429,20 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = print_obj_info(fs, coll, ghobj, formatter); + ret = print_obj_info(fs.get(), coll, ghobj, formatter); goto out; - } else if (objcmd == "set-size") { + } else if (objcmd == "corrupt-info") { // Undocumented testing feature + // There should not be any other arguments + if (vm.count("arg1") || vm.count("arg2")) { + usage(desc); + ret = 1; + goto out; + } + ret = corrupt_info(fs.get(), coll, ghobj, formatter); + goto out; + } else if (objcmd == "set-size" || objcmd == "corrupt-size") { + // Undocumented testing feature + bool corrupt = (objcmd == "corrupt-size"); // Extra arg if (vm.count("arg1") == 0 || vm.count("arg2")) { usage(desc); @@ -3407,8 +4455,11 @@ int main(int argc, char **argv) goto out; } uint64_t size = atoll(arg1.c_str()); - ret = set_size(fs, coll, ghobj, size, formatter, *osr); + ret = set_size(fs.get(), coll, ghobj, size, formatter, corrupt); goto out; + } else if (objcmd == "clear-data-digest") { + ret = clear_data_digest(fs.get(), coll, ghobj); + goto out; } else if (objcmd == "clear-snapset") { // UNDOCUMENTED: For testing zap SnapSet // IGNORE extra args since not in usage anyway @@ -3417,7 +4468,7 @@ int main(int argc, char **argv) ret = 1; goto out; } - ret = clear_snapset(fs, coll, ghobj, arg1, *osr); + ret = clear_snapset(fs.get(), coll, ghobj, arg1); goto out; } else if (objcmd == "remove-clone-metadata") { // Extra arg @@ -3437,7 +4488,7 @@ int main(int argc, char **argv) goto out; } snapid_t cloneid = atoi(arg1.c_str()); - ret = remove_clone(fs, coll, ghobj, cloneid, force, *osr); + ret = remove_clone(fs.get(), coll, ghobj, cloneid, force); goto out; } cerr << "Unknown object command '" << objcmd << "'" << std::endl; @@ -3446,9 +4497,8 @@ int main(int argc, char **argv) goto out; } - bufferlist bl; map_epoch = 0; - ret = PG::peek_map_epoch(fs, pgid, &map_epoch, &bl); + ret = PG::peek_map_epoch(fs.get(), pgid, &map_epoch); if (ret < 0) cerr << "peek_map_epoch reports error" << std::endl; if (debug) @@ -3457,13 +4507,12 @@ int main(int argc, char **argv) pg_info_t info(pgid); PastIntervals past_intervals; __u8 struct_ver; - ret = PG::read_info(fs, pgid, coll, bl, info, past_intervals, - struct_ver); + ret = PG::read_info(fs.get(), pgid, coll, info, past_intervals, struct_ver); if (ret < 0) { cerr << "read_info error " << cpp_strerror(ret) << std::endl; goto out; } - if (struct_ver < PG::compat_struct_v) { + if (struct_ver < PG::get_compat_struct_v()) { cerr << "PG is too old to upgrade, use older Ceph version" << std::endl; ret = -EFAULT; goto out; @@ -3471,10 +4520,18 @@ int main(int argc, char **argv) if (debug) cerr << "struct_v " << (int)struct_ver << std::endl; - if (op == "export") { - ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals); - if (ret == 0) + if (op == "export" || op == "export-remove") { + ceph_assert(superblock != nullptr); + ret = tool.do_export(cct.get(), fs.get(), coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); + if (ret == 0) { cerr << "Export successful" << std::endl; + if (op == "export-remove") { + ret = initiate_new_remove_pg(fs.get(), pgid); + // Export succeeded, so pgid is there + ceph_assert(ret == 0); + cerr << "Remove successful" << std::endl; + } + } } else if (op == "info") { formatter->open_object_section("info"); info.dump(formatter); @@ -3484,66 +4541,121 @@ int main(int argc, char **argv) } else if (op == "log") { PGLog::IndexedLog log; pg_missing_t missing; - ret = get_log(fs, struct_ver, coll, pgid, info, log, missing); + ret = get_log(cct.get(), fs.get(), struct_ver, pgid, info, log, missing); if (ret < 0) goto out; dump_log(formatter, cout, log, missing); - } else if (op == "rm-past-intervals") { + } else if (op == "mark-complete") { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; - if (struct_ver < PG::compat_struct_v) { - cerr << "Can't remove past-intervals, version mismatch " << (int)struct_ver - << " (pg) < compat " << (int)PG::compat_struct_v << " (tool)" - << std::endl; - ret = -EFAULT; - goto out; + if (struct_ver < PG::get_compat_struct_v()) { + cerr << "Can't mark-complete, version mismatch " << (int)struct_ver + << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)" + << std::endl; + ret = 1; + goto out; } - cout << "Remove past-intervals " << past_intervals << std::endl; + cout << "Marking complete " << std::endl; + ceph_assert(superblock != nullptr); + info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1); + info.last_backfill = hobject_t::get_max(); + info.last_epoch_started = superblock->current_epoch; + info.history.last_epoch_started = superblock->current_epoch; + info.history.last_epoch_clean = superblock->current_epoch; past_intervals.clear(); - if (dry_run) { - ret = 0; - goto out; - } - ret = write_info(*t, map_epoch, info, past_intervals); - if (ret == 0) { - fs->apply_transaction(osr, std::move(*t)); - cout << "Removal succeeded" << std::endl; + if (!dry_run) { + ret = write_info(*t, map_epoch, info, past_intervals); + if (ret != 0) + goto out; + auto ch = fs->open_collection(coll_t(pgid)); + fs->queue_transaction(ch, std::move(*t)); + } + cout << "Marking complete succeeded" << std::endl; + } else if (op == "trim-pg-log") { + ret = do_trim_pg_log(fs.get(), coll, info, pgid, + map_epoch, past_intervals); + if (ret < 0) { + cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl; + goto out; + } + cout << "Finished trimming pg log" << std::endl; + goto out; + } else if (op == "trim-pg-log-dups") { + ret = do_trim_pg_log_dups(fs.get(), coll, info, pgid, + map_epoch, past_intervals); + if (ret < 0) { + cerr << "Error trimming pg log dups: " << cpp_strerror(ret) << std::endl; + goto out; + } + cout << "Finished trimming pg log dups" << std::endl; + goto out; + } else if (op == "reset-last-complete") { + if (!force) { + std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost " + << "certain to lead to permanent data loss unless you know exactly " + << "what you are doing. Pass --force to proceed anyway." + << std::endl; + ret = -EINVAL; + goto out; } - } else if (op == "mark-complete") { ObjectStore::Transaction tran; ObjectStore::Transaction *t = &tran; - if (struct_ver < PG::compat_struct_v) { - cerr << "Can't mark-complete, version mismatch " << (int)struct_ver - << " (pg) < compat " << (int)PG::compat_struct_v << " (tool)" + if (struct_ver < PG::get_compat_struct_v()) { + cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver + << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)" << std::endl; ret = 1; goto out; } - cout << "Marking complete " << std::endl; + cout << "Reseting last_complete " << std::endl; - info.last_update = eversion_t(superblock.current_epoch, info.last_update.version + 1); - info.last_backfill = hobject_t::get_max(); - info.last_epoch_started = superblock.current_epoch; - info.history.last_epoch_started = superblock.current_epoch; - info.history.last_epoch_clean = superblock.current_epoch; - past_intervals.clear(); + info.last_complete = info.last_update; if (!dry_run) { ret = write_info(*t, map_epoch, info, past_intervals); if (ret != 0) goto out; - fs->apply_transaction(osr, std::move(*t)); + fs->queue_transaction(ch, std::move(*t)); } - cout << "Marking complete succeeded" << std::endl; + cout << "Reseting last_complete succeeded" << std::endl; + + } else if (op == "pg-log-inject-dups") { + if (!vm.count("file") || file == "-") { + cerr << "Must provide file containing JSON dups entries" << std::endl; + ret = 1; + goto out; + } + if (debug) + cerr << "opening file " << file << std::endl; + + ifstream json_file_stream(file , std::ifstream::in); + if (!json_file_stream.is_open()) { + cerr << "unable to open file " << file << std::endl; + ret = -1; + goto out; + } + json_spirit::mValue result; + try { + if (!json_spirit::read(json_file_stream, result)) + throw std::runtime_error("unparseable JSON " + file); + if (result.type() != json_spirit::array_type) { + cerr << "result is not an array_type - type=" << result.type() << std::endl; + throw std::runtime_error("not JSON array_type " + file); + } + do_dups_inject_from_json(fs.get(), pgid, result, debug); + } catch (const std::runtime_error &e) { + cerr << e.what() << std::endl;; + return -1; + } } else { - assert(!"Should have already checked for valid --op"); + ceph_assert(!"Should have already checked for valid --op"); } } else { cerr << "PG '" << pgid << "' not found" << std::endl; @@ -3551,8 +4663,17 @@ int main(int argc, char **argv) } out: + if (debug) { + ostringstream ostr; + Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty"); + cct->get_perfcounters_collection()->dump_formatted(f, false); + ostr << "ceph-objectstore-tool "; + f->flush(ostr); + delete f; + cout << ostr.str() << std::endl; + } + int r = fs->umount(); - delete osr; if (r < 0) { cerr << "umount failed: " << cpp_strerror(r) << std::endl; // If no previous error, then use umount() error