#include "common/Formatter.h"
#include "common/errno.h"
#include "common/ceph_argparse.h"
+#include "common/url_escape.h"
#include "global/global_init.h"
}
};
+struct lookup_slow_ghobject : public action_on_object_t {
+ list<tuple<
+ coll_t,
+ ghobject_t,
+ ceph::signedspan,
+ ceph::signedspan,
+ ceph::signedspan,
+ string> > _objects;
+ const string _name;
+ double threshold;
+
+ coll_t last_coll;
+
+ lookup_slow_ghobject(const string& name, double _threshold) :
+ _name(name), threshold(_threshold) { }
+
+ void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
+ ObjectMap::ObjectMapIterator iter;
+ auto start1 = mono_clock::now();
+ ceph::signedspan first_seek_time = start1 - start1;
+ ceph::signedspan last_seek_time = first_seek_time;
+ ceph::signedspan total_time = first_seek_time;
+ {
+ auto ch = store->open_collection(coll);
+ iter = store->get_omap_iterator(ch, ghobj);
+ if (!iter) {
+ cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
+ << " obj:" << ghobj
+ << std::endl;
+ return;
+ }
+ auto start = mono_clock::now();
+ iter->seek_to_first();
+ first_seek_time = mono_clock::now() - start;
+
+ while(iter->valid()) {
+ start = mono_clock::now();
+ iter->next();
+ last_seek_time = mono_clock::now() - start;
+ }
+ }
+
+ if (coll != last_coll) {
+ cerr << ">>> inspecting coll" << coll << std::endl;
+ last_coll = coll;
+ }
+
+ total_time = mono_clock::now() - start1;
+ if ( total_time >= make_timespan(threshold)) {
+ _objects.emplace_back(coll, ghobj,
+ first_seek_time, last_seek_time, total_time,
+ url_escape(iter->tail_key()));
+ cerr << ">>>>> found obj " << ghobj
+ << " first_seek_time "
+ << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
+ << " last_seek_time "
+ << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
+ << " total_time "
+ << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
+ << " tail key: " << url_escape(iter->tail_key())
+ << std::endl;
+ }
+ return;
+ }
+
+ int size() const {
+ return _objects.size();
+ }
+
+ void dump(Formatter *f, bool human_readable) const {
+ if (!human_readable)
+ f->open_array_section("objects");
+ for (auto i = _objects.begin();
+ i != _objects.end();
+ ++i) {
+ f->open_array_section("object");
+ coll_t coll;
+ ghobject_t ghobj;
+ ceph::signedspan first_seek_time;
+ ceph::signedspan last_seek_time;
+ ceph::signedspan total_time;
+ string tail_key;
+ std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
+
+ spg_t pgid;
+ bool is_pg = coll.is_pg(&pgid);
+ if (is_pg)
+ f->dump_string("pgid", stringify(pgid));
+ if (!is_pg || !human_readable)
+ f->dump_string("coll", coll.to_str());
+ f->dump_object("ghobject", ghobj);
+ f->open_object_section("times");
+ f->dump_int("first_seek_time",
+ std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
+ f->dump_int("last_seek_time",
+ std::chrono::duration_cast<std::chrono::seconds>
+ (last_seek_time).count());
+ f->dump_int("total_time",
+ std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
+ f->dump_string("tail_key", tail_key);
+ f->close_section();
+
+ f->close_section();
+ if (human_readable) {
+ f->flush(cout);
+ cout << std::endl;
+ }
+ }
+ if (!human_readable) {
+ f->close_section();
+ f->flush(cout);
+ cout << std::endl;
+ }
+ }
+};
+
int file_fd = fd_none;
bool debug;
bool force = false;
+bool no_superblock = false;
+
super_header sh;
static int get_fd_data(int fd, bufferlist &bl)
bool finished = false;
std::condition_variable cond;
std::mutex m;
- txn->register_on_complete(make_lambda_context([&]() {
+ txn->register_on_complete(make_lambda_context([&](int) {
std::unique_lock lock{m};
finished = true;
cond.notify_one();
coll_t coll(info.pgid);
ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
map<string,bufferlist> km;
+ string key_to_remove;
pg_info_t last_written_info;
- int ret = PG::_prepare_write_info(
+ int ret = prepare_info_keymap(
g_ceph_context,
- &km, epoch,
+ &km, &key_to_remove,
+ epoch,
info,
last_written_info,
past_intervals,
true, true, false);
if (ret) cerr << "Failed to write info" << std::endl;
t.omap_setkeys(coll, pgmeta_oid, km);
+ if (!key_to_remove.empty()) {
+ t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
+ }
return ret;
}
if (!dry_run) {
ObjectStore::Transaction t;
ch = store->create_new_collection(coll);
- PG::_create(
+ create_pg_collection(
t, pgid,
pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
- PG::_init(t, pgid, NULL);
+ init_pg_ondisk(t, pgid, NULL);
// mark this coll for removal until we're done
map<string,bufferlist> values;
cerr << "done, clearing removal flag" << std::endl;
if (!dry_run) {
- set<string> remove;
- remove.insert("_remove");
- t.omap_rmkeys(coll, pgid.make_pgmeta_oid(), remove);
+ t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
wait_until_done(&t, [&] {
store->queue_transaction(ch, std::move(t));
// make sure we flush onreadable items before mapper/driver are destroyed.
return 0;
}
+int do_list_slow(ObjectStore *store, string pgidstr, string object,
+ double threshold, Formatter *formatter, bool debug, bool human_readable)
+{
+ int r;
+ lookup_slow_ghobject lookup(object, threshold);
+ if (pgidstr.length() > 0) {
+ r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
+ } else {
+ r = action_on_all_objects(store, lookup, debug);
+ }
+ if (r)
+ return r;
+ lookup.dump(formatter, human_readable);
+ formatter->flush(cout);
+ return 0;
+}
+
int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
{
int r;
if (!(force && !all))
return r;
}
- if (!ss.snaps.empty() && !all) {
+// cout << "snapset " << ss << std::endl;
+ if (!ss.clone_snaps.empty() && !all) {
if (force) {
cout << "WARNING: only removing "
<< (ghobj.hobj.is_head() ? "head" : "snapdir")
- << " with snapshots present" << std::endl;
- ss.snaps.clear();
+ << " with clones present" << std::endl;
+ ss.clone_snaps.clear();
} else {
- cerr << "Snapshots are present, use removeall to delete everything" << std::endl;
+ cerr << "Clones are present, use removeall to delete everything"
+ << std::endl;
return -EINVAL;
}
}
OSDriver::OSTransaction _t(driver.get_transaction(&t));
ghobject_t snapobj = ghobj;
- for (vector<snapid_t>::iterator i = ss.snaps.begin() ;
- i != ss.snaps.end() ; ++i) {
- snapobj.hobj.snap = *i;
- cout << "remove " << snapobj << std::endl;
+ for (auto& p : ss.clone_snaps) {
+ snapobj.hobj.snap = p.first;
+ cout << "remove clone " << snapobj << std::endl;
if (!dry_run) {
r = remove_object(coll, snapobj, mapper, &_t, &t, type);
if (r < 0)
{
ObjectStore::Transaction tran;
ObjectStore::Transaction *t = &tran;
- set<string> keys;
-
- keys.insert(key);
if (debug)
cerr << "Rm_omap " << ghobj << std::endl;
if (dry_run)
return 0;
- t->omap_rmkeys(coll, ghobj, keys);
+ t->omap_rmkey(coll, ghobj, key);
auto ch = store->open_collection(coll);
store->queue_transaction(ch, std::move(*t));
<< cpp_strerror(r) << std::endl;
}
}
+ gr = store->dump_onode(ch, ghobj, "onode", formatter);
+
formatter->close_section();
formatter->flush(cout);
cout << std::endl;
// Use "seq" to just corrupt SnapSet.seq
if (arg == "corrupt" || arg == "seq")
ss.seq = 0;
- // Use "snaps" to just clear SnapSet.snaps
+ // Use "snaps" to just clear SnapSet.clone_snaps
if (arg == "corrupt" || arg == "snaps")
- ss.snaps.clear();
+ ss.clone_snaps.clear();
// By default just clear clone, clone_overlap and clone_size
if (arg == "corrupt")
arg = "";
boost::optional<std::string> nspace;
spg_t pgid;
unsigned epoch = 0;
+ unsigned slow_threshold = 16;
ghobject_t ghobj;
bool human_readable;
Formatter *formatter;
- bool head;
+ bool head, tty;
po::options_description desc("Allowed options");
desc.add_options()
("pool", po::value<string>(&pool),
"Pool name, mandatory for apply-layout-settings if --pgid is not specified")
("op", po::value<string>(&op),
- "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
+ "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log]")
("epoch", po::value<unsigned>(&epoch),
"epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
("format", po::value<string>(&format)->default_value("json-pretty"),
"Output format which may be json, json-pretty, xml, xml-pretty")
("debug", "Enable diagnostic output to stderr")
+ ("no-mon-config", "Do not contact mons for config")
+ ("no-superblock", "Do not read superblock")
("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
("skip-journal-replay", "Disable journal replay")
("skip-mount-omap", "Disable mounting of omap")
("head", "Find head/snapdir when searching for objects by name")
("dry-run", "Don't modify the objectstore")
+ ("tty", "Treat stdout as a tty (no binary data)")
("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
+ ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
+ "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
;
po::options_description positional("Positional options");
force = (vm.count("force") > 0);
+ no_superblock = (vm.count("no-superblock") > 0);
+
if (vm.count("namespace"))
nspace = argnspace;
dry_run = (vm.count("dry-run") > 0);
+ tty = (vm.count("tty") > 0);
osflagbits_t flags = 0;
if (dry_run || vm.count("skip-journal-replay"))
usage(desc);
return 1;
}
- outistty = isatty(STDOUT_FILENO);
+ outistty = isatty(STDOUT_FILENO) || tty;
file_fd = fd_none;
if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
perror(err.c_str());
return 1;
}
+ int init_flags = 0;
+ if (vm.count("no-mon-config") > 0) {
+ init_flags |= CINIT_FLAG_NO_MON_CONFIG;
+ }
auto cct = global_init(
NULL, ceph_options,
CEPH_ENTITY_TYPE_OSD,
CODE_ENVIRONMENT_UTILITY_NODOUT,
- 0);
+ init_flags);
common_init_finish(g_ceph_context);
if (debug) {
g_conf().set_val_or_die("log_to_stderr", "true");
return 1;
}
if (r > 0) {
- cerr << "fsck found " << r << " errors" << std::endl;
+ cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
return 1;
}
- cout << "fsck found no errors" << std::endl;
+ cout << "fsck success" << std::endl;
return 0;
}
if (op == "repair" || op == "repair-deep") {
return 1;
}
if (r > 0) {
- cerr << "repair found " << r << " errors" << std::endl;
+ cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
return 1;
}
- cout << "repair found no errors" << std::endl;
+ cout << "repair success" << std::endl;
return 0;
}
if (op == "mkfs") {
#endif
bufferlist bl;
- OSDSuperblock superblock;
auto ch = fs->open_collection(coll_t::meta());
- bufferlist::const_iterator p;
- ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
- if (ret < 0) {
- cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
- goto out;
- }
+ std::unique_ptr<OSDSuperblock> superblock;
+ if (!no_superblock) {
+ superblock.reset(new OSDSuperblock);
+ bufferlist::const_iterator p;
+ ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
+ if (ret < 0) {
+ cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
- p = bl.cbegin();
- decode(superblock, p);
+ p = bl.cbegin();
+ decode(*superblock, p);
- if (debug) {
- cerr << "Cluster fsid=" << superblock.cluster_fsid << std::endl;
- }
+ if (debug) {
+ cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
+ }
- if (debug) {
- cerr << "Supported features: " << supported << std::endl;
- cerr << "On-disk features: " << superblock.compat_features << std::endl;
- }
- if (supported.compare(superblock.compat_features) == -1) {
- CompatSet unsupported = supported.unsupported(superblock.compat_features);
- cerr << "On-disk OSD incompatible features set "
- << unsupported << std::endl;
- ret = -EINVAL;
- goto out;
+ if (debug) {
+ cerr << "Supported features: " << supported << std::endl;
+ cerr << "On-disk features: " << superblock->compat_features << std::endl;
+ }
+ if (supported.compare(superblock->compat_features) == -1) {
+ CompatSet unsupported = supported.unsupported(superblock->compat_features);
+ cerr << "On-disk OSD incompatible features set "
+ << unsupported << std::endl;
+ ret = -EINVAL;
+ goto out;
+ }
}
if (op == "apply-layout-settings") {
} else if (vm.count("arg1") && isdigit(arg1[0])) {
target_level = atoi(arg1.c_str());
}
- ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run, target_level);
+ ceph_assert(superblock != nullptr);
+ ret = apply_layout_settings(fs, *superblock, pool, pgid, dry_run, target_level);
goto out;
}
ret = 1;
goto out;
}
+ if (pgidstr != "meta") {
+ auto ch = fs->open_collection(coll_t(pgid));
+ if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
+ stringstream ss;
+ ss << "object " << ghobj << " not contained by pg " << pgid;
+ throw std::runtime_error(ss.str());
+ }
+ }
}
} catch (std::runtime_error& e) {
cerr << e.what() << std::endl;
}
if (op == "import") {
-
+ ceph_assert(superblock != nullptr);
try {
- ret = tool.do_import(fs, superblock, force, pgidstr);
+ ret = tool.do_import(fs, *superblock, force, pgidstr);
}
catch (const buffer::error &e) {
cerr << "do_import threw exception error " << e.what() << std::endl;
bufferlist bl;
OSDMap osdmap;
if (epoch == 0) {
- epoch = superblock.current_epoch;
+ ceph_assert(superblock != nullptr);
+ epoch = superblock->current_epoch;
}
ret = get_osdmap(fs, epoch, osdmap, bl);
if (ret) {
} else if (op == "get-inc-osdmap") {
bufferlist bl;
if (epoch == 0) {
- epoch = superblock.current_epoch;
+ ceph_assert(superblock != nullptr);
+ epoch = superblock->current_epoch;
}
ret = get_inc_osdmap(fs, epoch, bl);
if (ret < 0) {
cerr << "Please specify the path to monitor db to update" << std::endl;
ret = -EINVAL;
} else {
- ret = update_mon_db(*fs, superblock, dpath + "/keyring", mon_store_path);
+ ceph_assert(superblock != nullptr);
+ ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
}
goto out;
}
}
goto out;
}
+ if (op == "list-slow-omap") {
+ ret = do_list_slow(fs, pgidstr, object, slow_threshold, formatter, debug,
+ human_readable);
+ if (ret < 0) {
+ cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
+ }
+ goto out;
+ }
if (op == "dump-super") {
+ ceph_assert(superblock != nullptr);
formatter->open_object_section("superblock");
- superblock.dump(formatter);
+ superblock->dump(formatter);
formatter->close_section();
formatter->flush(cout);
cout << std::endl;
cerr << "struct_v " << (int)struct_ver << std::endl;
if (op == "export" || op == "export-remove") {
- ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, superblock, past_intervals);
+ ceph_assert(superblock != nullptr);
+ ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
if (ret == 0) {
cerr << "Export successful" << std::endl;
if (op == "export-remove") {
cout << "Marking complete " << std::endl;
- info.last_update = eversion_t(superblock.current_epoch, info.last_update.version + 1);
+ ceph_assert(superblock != nullptr);
+ info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
info.last_backfill = hobject_t::get_max();
- info.last_epoch_started = superblock.current_epoch;
- info.history.last_epoch_started = superblock.current_epoch;
- info.history.last_epoch_clean = superblock.current_epoch;
+ info.last_epoch_started = superblock->current_epoch;
+ info.history.last_epoch_started = superblock->current_epoch;
+ info.history.last_epoch_clean = superblock->current_epoch;
past_intervals.clear();
if (!dry_run) {
}
out:
+ if (debug) {
+ ostringstream ostr;
+ Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
+ cct->get_perfcounters_collection()->dump_formatted(f, false);
+ ostr << "ceph-objectstore-tool ";
+ f->flush(ostr);
+ delete f;
+ cout << ostr.str() << std::endl;
+ }
+
int r = fs->umount();
if (r < 0) {
cerr << "umount failed: " << cpp_strerror(r) << std::endl;