]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/ceph_objectstore_tool.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <boost/program_options/variables_map.hpp>
16 #include <boost/program_options/parsers.hpp>
17 #include <boost/scoped_ptr.hpp>
18 #include <boost/optional.hpp>
19
20 #include <stdlib.h>
21
22 #include "common/Formatter.h"
23 #include "common/errno.h"
24 #include "common/ceph_argparse.h"
25 #include "common/url_escape.h"
26
27 #include "global/global_init.h"
28
29 #include "os/ObjectStore.h"
30 #include "os/filestore/FileJournal.h"
31 #include "os/filestore/FileStore.h"
32 #ifdef HAVE_LIBFUSE
33 #include "os/FuseStore.h"
34 #endif
35
36 #include "osd/PGLog.h"
37 #include "osd/OSD.h"
38 #include "osd/PG.h"
39 #include "osd/ECUtil.h"
40
41 #include "json_spirit/json_spirit_value.h"
42 #include "json_spirit/json_spirit_reader.h"
43
44 #include "rebuild_mondb.h"
45 #include "ceph_objectstore_tool.h"
46 #include "include/compat.h"
47 #include "include/util.h"
48
49 using namespace std;
50 namespace po = boost::program_options;
51
52 #ifdef INTERNAL_TEST
53 CompatSet get_test_compat_set() {
54 CompatSet::FeatureSet ceph_osd_feature_compat;
55 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
56 CompatSet::FeatureSet ceph_osd_feature_incompat;
57 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
58 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
64 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
65 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
66 #ifdef INTERNAL_TEST2
67 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
68 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
69 #endif
70 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
71 ceph_osd_feature_incompat);
72 }
73 #endif
74
75 const ssize_t max_read = 1024 * 1024;
76 const int fd_none = INT_MIN;
77 bool outistty;
78 bool dry_run;
79
80 struct action_on_object_t {
81 virtual ~action_on_object_t() {}
82 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
83 };
84
85 int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
86 {
87 auto ch = store->open_collection(coll);
88
89 unsigned LIST_AT_A_TIME = 100;
90 ghobject_t next;
91 while (!next.is_max()) {
92 vector<ghobject_t> list;
93 int r = store->collection_list(ch,
94 next,
95 ghobject_t::get_max(),
96 LIST_AT_A_TIME,
97 &list,
98 &next);
99 if (r < 0) {
100 cerr << "Error listing collection: " << coll << ", "
101 << cpp_strerror(r) << std::endl;
102 return r;
103 }
104 for (vector<ghobject_t>::iterator obj = list.begin();
105 obj != list.end();
106 ++obj) {
107 object_info_t oi;
108 if (coll != coll_t::meta()) {
109 bufferlist attr;
110 r = store->getattr(ch, *obj, OI_ATTR, attr);
111 if (r < 0) {
112 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
113 << cpp_strerror(r) << std::endl;
114 } else {
115 auto bp = attr.cbegin();
116 try {
117 decode(oi, bp);
118 } catch (...) {
119 r = -EINVAL;
120 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
121 << cpp_strerror(r) << std::endl;
122 }
123 }
124 }
125 action.call(store, coll, *obj, oi);
126 }
127 }
128 return 0;
129 }
130
131 int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
132 {
133 spg_t pgid;
134 // Scan collections in case this is an ec pool but no shard specified
135 unsigned scanned = 0;
136 int r = 0;
137 vector<coll_t> colls_to_check;
138 vector<coll_t> candidates;
139
140 r = store->list_collections(candidates);
141 if (r < 0) {
142 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
143 return r;
144 }
145 pgid.parse(pgidstr.c_str());
146 for (vector<coll_t>::iterator i = candidates.begin();
147 i != candidates.end();
148 ++i) {
149 spg_t cand_pgid;
150 if (i->is_meta() && pgidstr == "meta") {
151 colls_to_check.push_back(*i);
152 continue;
153 }
154 if (!i->is_pg(&cand_pgid))
155 continue;
156
157 // If an exact match or treat no shard as any shard
158 if (cand_pgid == pgid ||
159 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
160 colls_to_check.push_back(*i);
161 }
162 }
163
164 if (debug)
165 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
166 for (vector<coll_t>::iterator i = colls_to_check.begin();
167 i != colls_to_check.end();
168 ++i, ++scanned) {
169 if (debug)
170 cerr << "Scanning " << *i << ", " << scanned << "/"
171 << colls_to_check.size() << " completed" << std::endl;
172 r = _action_on_all_objects_in_pg(store, *i, action, debug);
173 if (r < 0)
174 break;
175 }
176 return r;
177 }
178
179 int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
180 {
181 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
182 return r;
183 }
184
185 int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
186 {
187 unsigned scanned = 0;
188 int r = 0;
189 vector<coll_t> colls_to_check;
190 vector<coll_t> candidates;
191 r = store->list_collections(candidates);
192 if (r < 0) {
193 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
194 return r;
195 }
196 for (vector<coll_t>::iterator i = candidates.begin();
197 i != candidates.end();
198 ++i) {
199 if (i->is_pg()) {
200 colls_to_check.push_back(*i);
201 }
202 }
203
204 if (debug)
205 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
206 for (vector<coll_t>::iterator i = colls_to_check.begin();
207 i != colls_to_check.end();
208 ++i, ++scanned) {
209 if (debug)
210 cerr << "Scanning " << *i << ", " << scanned << "/"
211 << colls_to_check.size() << " completed" << std::endl;
212 r = _action_on_all_objects_in_pg(store, *i, action, debug);
213 if (r < 0)
214 return r;
215 }
216 return 0;
217 }
218
219 int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
220 {
221 int r = _action_on_all_objects(store, action, debug);
222 return r;
223 }
224
225 struct pgid_object_list {
226 list<pair<coll_t, ghobject_t> > _objects;
227
228 void insert(coll_t coll, ghobject_t &ghobj) {
229 _objects.push_back(make_pair(coll, ghobj));
230 }
231
232 void dump(Formatter *f, bool human_readable) const {
233 if (!human_readable)
234 f->open_array_section("pgid_objects");
235 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
236 i != _objects.end();
237 ++i) {
238 f->open_array_section("pgid_object");
239 spg_t pgid;
240 bool is_pg = i->first.is_pg(&pgid);
241 if (is_pg)
242 f->dump_string("pgid", stringify(pgid));
243 if (!is_pg || !human_readable)
244 f->dump_string("coll", i->first.to_str());
245 f->open_object_section("ghobject");
246 i->second.dump(f);
247 f->close_section();
248 f->close_section();
249 if (human_readable) {
250 f->flush(cout);
251 cout << std::endl;
252 }
253 }
254 if (!human_readable) {
255 f->close_section();
256 f->flush(cout);
257 cout << std::endl;
258 }
259 }
260 };
261
262 struct lookup_ghobject : public action_on_object_t {
263 pgid_object_list _objects;
264 const string _name;
265 const boost::optional<std::string> _namespace;
266 bool _need_snapset;
267
268 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
269 _namespace(nspace), _need_snapset(need_snapset) { }
270
271 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
272 if (_need_snapset && !ghobj.hobj.has_snapset())
273 return;
274 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
275 (!_namespace || ghobj.hobj.nspace == _namespace))
276 _objects.insert(coll, ghobj);
277 return;
278 }
279
280 int size() const {
281 return _objects._objects.size();
282 }
283
284 pair<coll_t, ghobject_t> pop() {
285 pair<coll_t, ghobject_t> front = _objects._objects.front();
286 _objects._objects.pop_front();
287 return front;
288 }
289
290 void dump(Formatter *f, bool human_readable) const {
291 _objects.dump(f, human_readable);
292 }
293 };
294
295 struct lookup_slow_ghobject : public action_on_object_t {
296 list<tuple<
297 coll_t,
298 ghobject_t,
299 ceph::signedspan,
300 ceph::signedspan,
301 ceph::signedspan,
302 string> > _objects;
303 const string _name;
304 double threshold;
305
306 coll_t last_coll;
307
308 lookup_slow_ghobject(const string& name, double _threshold) :
309 _name(name), threshold(_threshold) { }
310
311 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
312 ObjectMap::ObjectMapIterator iter;
313 auto start1 = mono_clock::now();
314 ceph::signedspan first_seek_time = start1 - start1;
315 ceph::signedspan last_seek_time = first_seek_time;
316 ceph::signedspan total_time = first_seek_time;
317 {
318 auto ch = store->open_collection(coll);
319 iter = store->get_omap_iterator(ch, ghobj);
320 if (!iter) {
321 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
322 << " obj:" << ghobj
323 << std::endl;
324 return;
325 }
326 auto start = mono_clock::now();
327 iter->seek_to_first();
328 first_seek_time = mono_clock::now() - start;
329
330 while(iter->valid()) {
331 start = mono_clock::now();
332 iter->next();
333 last_seek_time = mono_clock::now() - start;
334 }
335 }
336
337 if (coll != last_coll) {
338 cerr << ">>> inspecting coll" << coll << std::endl;
339 last_coll = coll;
340 }
341
342 total_time = mono_clock::now() - start1;
343 if ( total_time >= make_timespan(threshold)) {
344 _objects.emplace_back(coll, ghobj,
345 first_seek_time, last_seek_time, total_time,
346 url_escape(iter->tail_key()));
347 cerr << ">>>>> found obj " << ghobj
348 << " first_seek_time "
349 << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
350 << " last_seek_time "
351 << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
352 << " total_time "
353 << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
354 << " tail key: " << url_escape(iter->tail_key())
355 << std::endl;
356 }
357 return;
358 }
359
360 int size() const {
361 return _objects.size();
362 }
363
364 void dump(Formatter *f, bool human_readable) const {
365 if (!human_readable)
366 f->open_array_section("objects");
367 for (auto i = _objects.begin();
368 i != _objects.end();
369 ++i) {
370 f->open_array_section("object");
371 coll_t coll;
372 ghobject_t ghobj;
373 ceph::signedspan first_seek_time;
374 ceph::signedspan last_seek_time;
375 ceph::signedspan total_time;
376 string tail_key;
377 std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
378
379 spg_t pgid;
380 bool is_pg = coll.is_pg(&pgid);
381 if (is_pg)
382 f->dump_string("pgid", stringify(pgid));
383 if (!is_pg || !human_readable)
384 f->dump_string("coll", coll.to_str());
385 f->dump_object("ghobject", ghobj);
386 f->open_object_section("times");
387 f->dump_int("first_seek_time",
388 std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
389 f->dump_int("last_seek_time",
390 std::chrono::duration_cast<std::chrono::seconds>
391 (last_seek_time).count());
392 f->dump_int("total_time",
393 std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
394 f->dump_string("tail_key", tail_key);
395 f->close_section();
396
397 f->close_section();
398 if (human_readable) {
399 f->flush(cout);
400 cout << std::endl;
401 }
402 }
403 if (!human_readable) {
404 f->close_section();
405 f->flush(cout);
406 cout << std::endl;
407 }
408 }
409 };
410
411 int file_fd = fd_none;
412 bool debug;
413 bool force = false;
414 bool no_superblock = false;
415
416 super_header sh;
417
418 static int get_fd_data(int fd, bufferlist &bl)
419 {
420 uint64_t total = 0;
421 do {
422 ssize_t bytes = bl.read_fd(fd, max_read);
423 if (bytes < 0) {
424 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
425 return bytes;
426 }
427
428 if (bytes == 0)
429 break;
430
431 total += bytes;
432 } while(true);
433
434 ceph_assert(bl.length() == total);
435 return 0;
436 }
437
438 int get_log(ObjectStore *fs, __u8 struct_ver,
439 spg_t pgid, const pg_info_t &info,
440 PGLog::IndexedLog &log, pg_missing_t &missing)
441 {
442 try {
443 auto ch = fs->open_collection(coll_t(pgid));
444 if (!ch) {
445 return -ENOENT;
446 }
447 ostringstream oss;
448 ceph_assert(struct_ver > 0);
449 PGLog::read_log_and_missing(
450 fs, ch,
451 pgid.make_pgmeta_oid(),
452 info, log, missing,
453 oss,
454 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
455 if (debug && oss.str().size())
456 cerr << oss.str() << std::endl;
457 }
458 catch (const buffer::error &e) {
459 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
460 return -EFAULT;
461 }
462 return 0;
463 }
464
465 void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
466 pg_missing_t &missing)
467 {
468 formatter->open_object_section("op_log");
469 formatter->open_object_section("pg_log_t");
470 log.dump(formatter);
471 formatter->close_section();
472 formatter->flush(out);
473 formatter->open_object_section("pg_missing_t");
474 missing.dump(formatter);
475 formatter->close_section();
476 formatter->close_section();
477 formatter->flush(out);
478 }
479
480 //Based on part of OSD::load_pgs()
481 int finish_remove_pgs(ObjectStore *store)
482 {
483 vector<coll_t> ls;
484 int r = store->list_collections(ls);
485 if (r < 0) {
486 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
487 << std::endl;
488 return r;
489 }
490
491 for (vector<coll_t>::iterator it = ls.begin();
492 it != ls.end();
493 ++it) {
494 spg_t pgid;
495
496 if (it->is_temp(&pgid) ||
497 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
498 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
499 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
500 continue;
501 }
502
503 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
504 }
505 return 0;
506 }
507
508 #pragma GCC diagnostic ignored "-Wpragmas"
509 #pragma GCC diagnostic push
510 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
511
512 int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
513 {
514 pg_info_t info(pgid);
515 coll_t coll(pgid);
516 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
517
518 epoch_t map_epoch = 0;
519 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
520 if (r < 0)
521 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
522 PastIntervals past_intervals;
523 __u8 struct_v;
524 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
525 if (r < 0) {
526 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
527 return r;
528 }
529 ceph_assert(struct_v >= 8);
530 // new omap key
531 cout << "setting '_remove' omap key" << std::endl;
532 map<string,bufferlist> values;
533 encode((char)1, values["_remove"]);
534 t->omap_setkeys(coll, pgmeta_oid, values);
535 return 0;
536 }
537
538 #pragma GCC diagnostic pop
539 #pragma GCC diagnostic warning "-Wpragmas"
540
541 template<typename Func>
542 void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
543 {
544 bool finished = false;
545 std::condition_variable cond;
546 std::mutex m;
547 txn->register_on_complete(make_lambda_context([&](int) {
548 std::unique_lock lock{m};
549 finished = true;
550 cond.notify_one();
551 }));
552 std::move(func)();
553 std::unique_lock lock{m};
554 cond.wait(lock, [&] {return finished;});
555 }
556
557 int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
558 {
559 if (!dry_run)
560 finish_remove_pgs(store);
561 if (!store->collection_exists(coll_t(r_pgid)))
562 return -ENOENT;
563
564 cout << " marking collection for removal" << std::endl;
565 if (dry_run)
566 return 0;
567 ObjectStore::Transaction rmt;
568 int r = mark_pg_for_removal(store, r_pgid, &rmt);
569 if (r < 0) {
570 return r;
571 }
572 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
573 store->queue_transaction(ch, std::move(rmt));
574 finish_remove_pgs(store);
575 return r;
576 }
577
578 int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
579 PastIntervals &past_intervals)
580 {
581 //Empty for this
582 coll_t coll(info.pgid);
583 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
584 map<string,bufferlist> km;
585 string key_to_remove;
586 pg_info_t last_written_info;
587 int ret = prepare_info_keymap(
588 g_ceph_context,
589 &km, &key_to_remove,
590 epoch,
591 info,
592 last_written_info,
593 past_intervals,
594 true, true, false);
595 if (ret) cerr << "Failed to write info" << std::endl;
596 t.omap_setkeys(coll, pgmeta_oid, km);
597 if (!key_to_remove.empty()) {
598 t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
599 }
600 return ret;
601 }
602
603 typedef map<eversion_t, hobject_t> divergent_priors_t;
604
605 int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
606 pg_log_t &log, PastIntervals &past_intervals,
607 divergent_priors_t &divergent,
608 pg_missing_t &missing)
609 {
610 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
611 int ret = write_info(t, epoch, info, past_intervals);
612 if (ret)
613 return ret;
614
615 coll_t coll(info.pgid);
616 map<string,bufferlist> km;
617 const bool require_rollback = !info.pgid.is_no_shard();
618 if (!divergent.empty()) {
619 ceph_assert(missing.get_items().empty());
620 PGLog::write_log_and_missing_wo_missing(
621 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent,
622 require_rollback);
623 } else {
624 pg_missing_tracker_t tmissing(missing);
625 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
626 PGLog::write_log_and_missing(
627 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing,
628 require_rollback,
629 &rebuilt_missing_set_with_deletes);
630 }
631 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
632 return 0;
633 }
634
635 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
636 pg_info_t &info, const spg_t &pgid,
637 epoch_t map_epoch,
638 PastIntervals &past_intervals)
639 {
640 ghobject_t oid = pgid.make_pgmeta_oid();
641 struct stat st;
642 auto ch = store->open_collection(coll);
643 int r = store->stat(ch, oid, &st);
644 ceph_assert(r == 0);
645 ceph_assert(st.st_size == 0);
646
647 cerr << "Log bounds are: " << "(" << info.log_tail << ","
648 << info.last_update << "]" << std::endl;
649
650 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
651 if (info.last_update.version - info.log_tail.version <= max_entries) {
652 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
653 return 0;
654 }
655
656 ceph_assert(info.last_update.version > max_entries);
657 version_t trim_to = info.last_update.version - max_entries;
658 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
659 eversion_t new_tail;
660 bool done = false;
661
662 while (!done) {
663 // gather keys so we can delete them in a batch without
664 // affecting the iterator
665 set<string> keys_to_trim;
666 {
667 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
668 if (!p)
669 break;
670 for (p->seek_to_first(); p->valid(); p->next()) {
671 if (p->key()[0] == '_')
672 continue;
673 if (p->key() == "can_rollback_to")
674 continue;
675 if (p->key() == "divergent_priors")
676 continue;
677 if (p->key() == "rollback_info_trimmed_to")
678 continue;
679 if (p->key() == "may_include_deletes_in_missing")
680 continue;
681 if (p->key().substr(0, 7) == string("missing"))
682 continue;
683 if (p->key().substr(0, 4) == string("dup_"))
684 continue;
685
686 bufferlist bl = p->value();
687 auto bp = bl.cbegin();
688 pg_log_entry_t e;
689 try {
690 e.decode_with_checksum(bp);
691 } catch (const buffer::error &e) {
692 cerr << "Error reading pg log entry: " << e.what() << std::endl;
693 }
694 if (debug) {
695 cerr << "read entry " << e << std::endl;
696 }
697 if (e.version.version > trim_to) {
698 done = true;
699 break;
700 }
701 keys_to_trim.insert(p->key());
702 new_tail = e.version;
703 if (keys_to_trim.size() >= trim_at_once)
704 break;
705 }
706
707 if (!p->valid())
708 done = true;
709 } // deconstruct ObjectMapIterator
710
711 // delete the keys
712 if (!dry_run && !keys_to_trim.empty()) {
713 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
714 ObjectStore::Transaction t;
715 t.omap_rmkeys(coll, oid, keys_to_trim);
716 store->queue_transaction(ch, std::move(t));
717 ch->flush();
718 }
719 }
720
721 // update pg info with new tail
722 if (!dry_run && new_tail != eversion_t()) {
723 info.log_tail = new_tail;
724 ObjectStore::Transaction t;
725 int ret = write_info(t, map_epoch, info, past_intervals);
726 if (ret)
727 return ret;
728 store->queue_transaction(ch, std::move(t));
729 ch->flush();
730 }
731
732 // compact the db since we just removed a bunch of data
733 cerr << "Finished trimming, now compacting..." << std::endl;
734 if (!dry_run)
735 store->compact();
736 return 0;
737 }
738
739 const int OMAP_BATCH_SIZE = 25;
740 void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
741 {
742 oset.clear();
743 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
744 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
745 }
746 }
747
748 int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
749 {
750 struct stat st;
751 mysize_t total;
752 footer ft;
753
754 auto ch = store->open_collection(cid);
755 int ret = store->stat(ch, obj, &st);
756 if (ret < 0)
757 return ret;
758
759 cerr << "Read " << obj << std::endl;
760
761 total = st.st_size;
762 if (debug)
763 cerr << "size=" << total << std::endl;
764
765 object_begin objb(obj);
766
767 {
768 bufferptr bp;
769 bufferlist bl;
770 ret = store->getattr(ch, obj, OI_ATTR, bp);
771 if (ret < 0) {
772 cerr << "getattr failure object_info " << ret << std::endl;
773 return ret;
774 }
775 bl.push_back(bp);
776 decode(objb.oi, bl);
777 if (debug)
778 cerr << "object_info: " << objb.oi << std::endl;
779 }
780
781 // NOTE: we include whiteouts, lost, etc.
782
783 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
784 if (ret < 0)
785 return ret;
786
787 uint64_t offset = 0;
788 bufferlist rawdatabl;
789 while(total > 0) {
790 rawdatabl.clear();
791 mysize_t len = max_read;
792 if (len > total)
793 len = total;
794
795 ret = store->read(ch, obj, offset, len, rawdatabl);
796 if (ret < 0)
797 return ret;
798 if (ret == 0)
799 return -EINVAL;
800
801 data_section dblock(offset, len, rawdatabl);
802 if (debug)
803 cerr << "data section offset=" << offset << " len=" << len << std::endl;
804
805 total -= ret;
806 offset += ret;
807
808 ret = write_section(TYPE_DATA, dblock, file_fd);
809 if (ret) return ret;
810 }
811
812 //Handle attrs for this object
813 map<string,bufferptr,less<>> aset;
814 ret = store->getattrs(ch, obj, aset);
815 if (ret) return ret;
816 attr_section as(aset);
817 ret = write_section(TYPE_ATTRS, as, file_fd);
818 if (ret)
819 return ret;
820
821 if (debug) {
822 cerr << "attrs size " << aset.size() << std::endl;
823 }
824
825 //Handle omap information
826 bufferlist hdrbuf;
827 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
828 if (ret < 0) {
829 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
830 return ret;
831 }
832
833 omap_hdr_section ohs(hdrbuf);
834 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
835 if (ret)
836 return ret;
837
838 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
839 if (!iter) {
840 ret = -ENOENT;
841 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
842 return ret;
843 }
844 iter->seek_to_first();
845 int mapcount = 0;
846 map<string, bufferlist> out;
847 while(iter->valid()) {
848 get_omap_batch(iter, out);
849
850 if (out.empty()) break;
851
852 mapcount += out.size();
853 omap_section oms(out);
854 ret = write_section(TYPE_OMAP, oms, file_fd);
855 if (ret)
856 return ret;
857 }
858 if (debug)
859 cerr << "omap map size " << mapcount << std::endl;
860
861 ret = write_simple(TYPE_OBJECT_END, file_fd);
862 if (ret)
863 return ret;
864
865 return 0;
866 }
867
868 int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
869 {
870 ghobject_t next;
871 auto ch = store->open_collection(coll);
872 while (!next.is_max()) {
873 vector<ghobject_t> objects;
874 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
875 &objects, &next);
876 if (r < 0)
877 return r;
878 for (vector<ghobject_t>::iterator i = objects.begin();
879 i != objects.end();
880 ++i) {
881 ceph_assert(!i->hobj.is_meta());
882 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
883 continue;
884 }
885 r = export_file(store, coll, *i);
886 if (r < 0)
887 return r;
888 }
889 }
890 return 0;
891 }
892
893 int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
894 OSDMap::Incremental inc;
895 auto it = bl.cbegin();
896 inc.decode(it);
897 if (e == 0) {
898 e = inc.epoch;
899 } else if (e != inc.epoch) {
900 cerr << "incremental.epoch mismatch: "
901 << inc.epoch << " != " << e << std::endl;
902 if (force) {
903 cerr << "But will continue anyway." << std::endl;
904 } else {
905 return -EINVAL;
906 }
907 }
908 auto ch = store->open_collection(coll_t::meta());
909 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
910 if (!store->exists(ch, inc_oid)) {
911 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
912 if (!force) {
913 return -ENOENT;
914 }
915 cout << "Creating a new epoch." << std::endl;
916 }
917 if (dry_run)
918 return 0;
919 ObjectStore::Transaction t;
920 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
921 t.truncate(coll_t::meta(), inc_oid, bl.length());
922 store->queue_transaction(ch, std::move(t));
923 return 0;
924 }
925
926 int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
927 {
928 auto ch = store->open_collection(coll_t::meta());
929 if (store->read(ch,
930 OSD::get_inc_osdmap_pobject_name(e),
931 0, 0, bl) < 0) {
932 return -ENOENT;
933 }
934 return 0;
935 }
936
937 int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
938 OSDMap osdmap;
939 osdmap.decode(bl);
940 if (e == 0) {
941 e = osdmap.get_epoch();
942 } else if (e != osdmap.get_epoch()) {
943 cerr << "osdmap.epoch mismatch: "
944 << e << " != " << osdmap.get_epoch() << std::endl;
945 if (force) {
946 cerr << "But will continue anyway." << std::endl;
947 } else {
948 return -EINVAL;
949 }
950 }
951 auto ch = store->open_collection(coll_t::meta());
952 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
953 if (!store->exists(ch, full_oid)) {
954 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
955 if (!force) {
956 return -ENOENT;
957 }
958 cout << "Creating a new epoch." << std::endl;
959 }
960 if (dry_run)
961 return 0;
962 ObjectStore::Transaction t;
963 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
964 t.truncate(coll_t::meta(), full_oid, bl.length());
965 store->queue_transaction(ch, std::move(t));
966 return 0;
967 }
968
969 int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
970 {
971 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
972 bool found = store->read(
973 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
974 if (!found) {
975 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
976 return -ENOENT;
977 }
978 osdmap.decode(bl);
979 if (debug)
980 cerr << osdmap << std::endl;
981 return 0;
982 }
983
984 int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
985 {
986 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
987 bufferlist bl;
988 auto pghist = OSD::make_pg_num_history_oid();
989 int r = store->read(ch, pghist, 0, 0, bl, 0);
990 if (r >= 0 && bl.length() > 0) {
991 auto p = bl.cbegin();
992 decode(*h, p);
993 }
994 cout << __func__ << " pg_num_history " << *h << std::endl;
995 return 0;
996 }
997
998 int add_osdmap(ObjectStore *store, metadata_section &ms)
999 {
1000 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
1001 }
1002
1003 int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
1004 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
1005 const OSDSuperblock& superblock,
1006 PastIntervals &past_intervals)
1007 {
1008 PGLog::IndexedLog log;
1009 pg_missing_t missing;
1010
1011 cerr << "Exporting " << pgid << " info " << info << std::endl;
1012
1013 int ret = get_log(fs, struct_ver, pgid, info, log, missing);
1014 if (ret > 0)
1015 return ret;
1016
1017 if (debug) {
1018 Formatter *formatter = Formatter::create("json-pretty");
1019 ceph_assert(formatter);
1020 dump_log(formatter, cerr, log, missing);
1021 delete formatter;
1022 }
1023 write_super();
1024
1025 pg_begin pgb(pgid, superblock);
1026 // Special case: If replicated pg don't require the importing OSD to have shard feature
1027 if (pgid.is_no_shard()) {
1028 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1029 }
1030 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
1031 if (ret)
1032 return ret;
1033
1034 // The metadata_section is now before files, so import can detect
1035 // errors and abort without wasting time.
1036 metadata_section ms(
1037 struct_ver,
1038 map_epoch,
1039 info,
1040 log,
1041 past_intervals,
1042 missing);
1043 ret = add_osdmap(fs, ms);
1044 if (ret)
1045 return ret;
1046 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
1047 if (ret)
1048 return ret;
1049
1050 ret = export_files(fs, coll);
1051 if (ret) {
1052 cerr << "export_files error " << ret << std::endl;
1053 return ret;
1054 }
1055
1056 ret = write_simple(TYPE_PG_END, file_fd);
1057 if (ret)
1058 return ret;
1059
1060 return 0;
1061 }
1062
1063 int dump_data(Formatter *formatter, bufferlist &bl)
1064 {
1065 auto ebliter = bl.cbegin();
1066 data_section ds;
1067 ds.decode(ebliter);
1068
1069 formatter->open_object_section("data_block");
1070 formatter->dump_unsigned("offset", ds.offset);
1071 formatter->dump_unsigned("len", ds.len);
1072 // XXX: Add option to dump data like od -cx ?
1073 formatter->close_section();
1074 formatter->flush(cout);
1075 return 0;
1076 }
1077
1078 int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
1079 ObjectStore::Transaction *t, bufferlist &bl)
1080 {
1081 auto ebliter = bl.cbegin();
1082 data_section ds;
1083 ds.decode(ebliter);
1084
1085 if (debug)
1086 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
1087 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
1088 return 0;
1089 }
1090
1091 int dump_attrs(
1092 Formatter *formatter, ghobject_t hoid,
1093 bufferlist &bl)
1094 {
1095 auto ebliter = bl.cbegin();
1096 attr_section as;
1097 as.decode(ebliter);
1098
1099 // This could have been handled in the caller if we didn't need to
1100 // support exports that didn't include object_info_t in object_begin.
1101 if (hoid.generation == ghobject_t::NO_GEN &&
1102 hoid.hobj.is_head()) {
1103 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1104 if (mi != as.data.end()) {
1105 SnapSet snapset;
1106 auto p = mi->second.cbegin();
1107 snapset.decode(p);
1108 formatter->open_object_section("snapset");
1109 snapset.dump(formatter);
1110 formatter->close_section();
1111 } else {
1112 formatter->open_object_section("snapset");
1113 formatter->dump_string("error", "missing SS_ATTR");
1114 formatter->close_section();
1115 }
1116 }
1117
1118 formatter->open_object_section("attrs");
1119 formatter->open_array_section("user");
1120 for (auto kv : as.data) {
1121 // Skip system attributes
1122 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
1123 continue;
1124 formatter->open_object_section("user_attr");
1125 formatter->dump_string("name", kv.first.substr(1));
1126 bool b64;
1127 formatter->dump_string("value", cleanbin(kv.second, b64));
1128 formatter->dump_bool("Base64", b64);
1129 formatter->close_section();
1130 }
1131 formatter->close_section();
1132 formatter->open_array_section("system");
1133 for (auto kv : as.data) {
1134 // Skip user attributes
1135 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1136 continue;
1137 formatter->open_object_section("sys_attr");
1138 formatter->dump_string("name", kv.first);
1139 formatter->close_section();
1140 }
1141 formatter->close_section();
1142 formatter->close_section();
1143 formatter->flush(cout);
1144
1145 return 0;
1146 }
1147
1148 int get_attrs(
1149 ObjectStore *store, coll_t coll, ghobject_t hoid,
1150 ObjectStore::Transaction *t, bufferlist &bl,
1151 OSDriver &driver, SnapMapper &snap_mapper)
1152 {
1153 auto ebliter = bl.cbegin();
1154 attr_section as;
1155 as.decode(ebliter);
1156
1157 auto ch = store->open_collection(coll);
1158 if (debug)
1159 cerr << "\tattrs: len " << as.data.size() << std::endl;
1160 t->setattrs(coll, hoid, as.data);
1161
1162 // This could have been handled in the caller if we didn't need to
1163 // support exports that didn't include object_info_t in object_begin.
1164 if (hoid.generation == ghobject_t::NO_GEN &&
1165 hoid.hobj.is_head()) {
1166 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1167 if (mi != as.data.end()) {
1168 SnapSet snapset;
1169 auto p = mi->second.cbegin();
1170 snapset.decode(p);
1171 cout << "snapset " << snapset << std::endl;
1172 for (auto& p : snapset.clone_snaps) {
1173 ghobject_t clone = hoid;
1174 clone.hobj.snap = p.first;
1175 set<snapid_t> snaps(p.second.begin(), p.second.end());
1176 if (!store->exists(ch, clone)) {
1177 // no clone, skip. this is probably a cache pool. this works
1178 // because we use a separate transaction per object and clones
1179 // come before head in the archive.
1180 if (debug)
1181 cerr << "\tskipping missing " << clone << " (snaps "
1182 << snaps << ")" << std::endl;
1183 continue;
1184 }
1185 if (debug)
1186 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1187 << std::endl;
1188 OSDriver::OSTransaction _t(driver.get_transaction(t));
1189 ceph_assert(!snaps.empty());
1190 snap_mapper.add_oid(clone.hobj, snaps, &_t);
1191 }
1192 } else {
1193 cerr << "missing SS_ATTR on " << hoid << std::endl;
1194 }
1195 }
1196 return 0;
1197 }
1198
1199 int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1200 {
1201 auto ebliter = bl.cbegin();
1202 omap_hdr_section oh;
1203 oh.decode(ebliter);
1204
1205 formatter->open_object_section("omap_header");
1206 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1207 formatter->close_section();
1208 formatter->flush(cout);
1209 return 0;
1210 }
1211
1212 int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1213 ObjectStore::Transaction *t, bufferlist &bl)
1214 {
1215 auto ebliter = bl.cbegin();
1216 omap_hdr_section oh;
1217 oh.decode(ebliter);
1218
1219 if (debug)
1220 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1221 << std::endl;
1222 t->omap_setheader(coll, hoid, oh.hdr);
1223 return 0;
1224 }
1225
1226 int dump_omap(Formatter *formatter, bufferlist &bl)
1227 {
1228 auto ebliter = bl.cbegin();
1229 omap_section os;
1230 os.decode(ebliter);
1231
1232 formatter->open_object_section("omaps");
1233 formatter->dump_unsigned("count", os.omap.size());
1234 formatter->open_array_section("data");
1235 for (auto o : os.omap) {
1236 formatter->open_object_section("omap");
1237 formatter->dump_string("name", o.first);
1238 bool b64;
1239 formatter->dump_string("value", cleanbin(o.second, b64));
1240 formatter->dump_bool("Base64", b64);
1241 formatter->close_section();
1242 }
1243 formatter->close_section();
1244 formatter->close_section();
1245 formatter->flush(cout);
1246 return 0;
1247 }
1248
1249 int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1250 ObjectStore::Transaction *t, bufferlist &bl)
1251 {
1252 auto ebliter = bl.cbegin();
1253 omap_section os;
1254 os.decode(ebliter);
1255
1256 if (debug)
1257 cerr << "\tomap: size " << os.omap.size() << std::endl;
1258 t->omap_setkeys(coll, hoid, os.omap);
1259 return 0;
1260 }
1261
1262 int ObjectStoreTool::dump_object(Formatter *formatter,
1263 bufferlist &bl)
1264 {
1265 auto ebliter = bl.cbegin();
1266 object_begin ob;
1267 ob.decode(ebliter);
1268
1269 if (ob.hoid.hobj.is_temp()) {
1270 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1271 return -EFAULT;
1272 }
1273
1274 formatter->open_object_section("object");
1275 formatter->open_object_section("oid");
1276 ob.hoid.dump(formatter);
1277 formatter->close_section();
1278 formatter->open_object_section("object_info");
1279 ob.oi.dump(formatter);
1280 formatter->close_section();
1281
1282 bufferlist ebl;
1283 bool done = false;
1284 while(!done) {
1285 sectiontype_t type;
1286 int ret = read_section(&type, &ebl);
1287 if (ret)
1288 return ret;
1289
1290 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1291 //cout << "\t\tsection size " << ebl.length() << std::endl;
1292 if (type >= END_OF_TYPES) {
1293 cout << "Skipping unknown object section type" << std::endl;
1294 continue;
1295 }
1296 switch(type) {
1297 case TYPE_DATA:
1298 if (dry_run) break;
1299 ret = dump_data(formatter, ebl);
1300 if (ret) return ret;
1301 break;
1302 case TYPE_ATTRS:
1303 if (dry_run) break;
1304 ret = dump_attrs(formatter, ob.hoid, ebl);
1305 if (ret) return ret;
1306 break;
1307 case TYPE_OMAP_HDR:
1308 if (dry_run) break;
1309 ret = dump_omap_hdr(formatter, ebl);
1310 if (ret) return ret;
1311 break;
1312 case TYPE_OMAP:
1313 if (dry_run) break;
1314 ret = dump_omap(formatter, ebl);
1315 if (ret) return ret;
1316 break;
1317 case TYPE_OBJECT_END:
1318 done = true;
1319 break;
1320 default:
1321 cerr << "Unknown section type " << type << std::endl;
1322 return -EFAULT;
1323 }
1324 }
1325 formatter->close_section();
1326 return 0;
1327 }
1328
1329 int ObjectStoreTool::get_object(ObjectStore *store,
1330 OSDriver& driver,
1331 SnapMapper& mapper,
1332 coll_t coll,
1333 bufferlist &bl, OSDMap &origmap,
1334 bool *skipped_objects)
1335 {
1336 ObjectStore::Transaction tran;
1337 ObjectStore::Transaction *t = &tran;
1338 auto ebliter = bl.cbegin();
1339 object_begin ob;
1340 ob.decode(ebliter);
1341
1342 if (ob.hoid.hobj.is_temp()) {
1343 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1344 return -EFAULT;
1345 }
1346 ceph_assert(g_ceph_context);
1347
1348 auto ch = store->open_collection(coll);
1349 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1350 object_t oid = ob.hoid.hobj.oid;
1351 object_locator_t loc(ob.hoid.hobj);
1352 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1353 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
1354
1355 spg_t coll_pgid;
1356 if (coll.is_pg(&coll_pgid) == false) {
1357 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1358 return -EFAULT;
1359 }
1360 if (coll_pgid.shard != ob.hoid.shard_id) {
1361 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1362 << " but object shard is " << ob.hoid.shard_id << std::endl;
1363 return -EFAULT;
1364 }
1365
1366 if (coll_pgid.pgid != pgid) {
1367 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1368 *skipped_objects = true;
1369 skip_object(bl);
1370 return 0;
1371 }
1372 }
1373
1374 if (!dry_run)
1375 t->touch(coll, ob.hoid);
1376
1377 cout << "Write " << ob.hoid << std::endl;
1378
1379 bufferlist ebl;
1380 bool done = false;
1381 while(!done) {
1382 sectiontype_t type;
1383 int ret = read_section(&type, &ebl);
1384 if (ret)
1385 return ret;
1386
1387 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1388 //cout << "\t\tsection size " << ebl.length() << std::endl;
1389 if (type >= END_OF_TYPES) {
1390 cout << "Skipping unknown object section type" << std::endl;
1391 continue;
1392 }
1393 switch(type) {
1394 case TYPE_DATA:
1395 if (dry_run) break;
1396 ret = get_data(store, coll, ob.hoid, t, ebl);
1397 if (ret) return ret;
1398 break;
1399 case TYPE_ATTRS:
1400 if (dry_run) break;
1401 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
1402 if (ret) return ret;
1403 break;
1404 case TYPE_OMAP_HDR:
1405 if (dry_run) break;
1406 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1407 if (ret) return ret;
1408 break;
1409 case TYPE_OMAP:
1410 if (dry_run) break;
1411 ret = get_omap(store, coll, ob.hoid, t, ebl);
1412 if (ret) return ret;
1413 break;
1414 case TYPE_OBJECT_END:
1415 done = true;
1416 break;
1417 default:
1418 cerr << "Unknown section type " << type << std::endl;
1419 return -EFAULT;
1420 }
1421 }
1422 if (!dry_run) {
1423 wait_until_done(t, [&] {
1424 store->queue_transaction(ch, std::move(*t));
1425 ch->flush();
1426 });
1427 }
1428 return 0;
1429 }
1430
1431 int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1432 {
1433 auto ebliter = bl.cbegin();
1434 ms.decode(ebliter);
1435
1436 formatter->open_object_section("metadata_section");
1437
1438 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1439 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1440
1441 formatter->open_object_section("OSDMap");
1442 ms.osdmap.dump(formatter);
1443 formatter->close_section();
1444 formatter->flush(cout);
1445 cout << std::endl;
1446
1447 formatter->open_object_section("info");
1448 ms.info.dump(formatter);
1449 formatter->close_section();
1450 formatter->flush(cout);
1451
1452 formatter->open_object_section("log");
1453 ms.log.dump(formatter);
1454 formatter->close_section();
1455 formatter->flush(cout);
1456
1457 formatter->open_object_section("pg_missing_t");
1458 ms.missing.dump(formatter);
1459 formatter->close_section();
1460
1461 // XXX: ms.past_intervals?
1462
1463 formatter->close_section();
1464 formatter->flush(cout);
1465
1466 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1467 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1468 return -EFAULT;
1469 }
1470
1471 return 0;
1472 }
1473
1474 int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
1475 const OSDSuperblock& sb, spg_t pgid)
1476 {
1477 auto ebliter = bl.cbegin();
1478 ms.decode(ebliter);
1479 spg_t old_pgid = ms.info.pgid;
1480 ms.info.pgid = pgid;
1481
1482 if (debug) {
1483 cout << "export pgid " << old_pgid << std::endl;
1484 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1485 cout << "map epoch " << ms.map_epoch << std::endl;
1486
1487 #ifdef DIAGNOSTIC
1488 Formatter *formatter = new JSONFormatter(true);
1489 formatter->open_object_section("stuff");
1490
1491 formatter->open_object_section("importing OSDMap");
1492 ms.osdmap.dump(formatter);
1493 formatter->close_section();
1494 formatter->flush(cout);
1495 cout << std::endl;
1496
1497 cout << "osd current epoch " << sb.current_epoch << std::endl;
1498
1499 formatter->open_object_section("info");
1500 ms.info.dump(formatter);
1501 formatter->close_section();
1502 formatter->flush(cout);
1503 cout << std::endl;
1504
1505 formatter->open_object_section("log");
1506 ms.log.dump(formatter);
1507 formatter->close_section();
1508 formatter->flush(cout);
1509 cout << std::endl;
1510
1511 formatter->close_section();
1512 formatter->flush(cout);
1513 cout << std::endl;
1514 #endif
1515 }
1516
1517 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1518 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1519 return -EFAULT;
1520 }
1521
1522 if (ms.map_epoch > sb.current_epoch) {
1523 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1524 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1525 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1526 return -EINVAL;
1527 }
1528
1529 // Old exports didn't include OSDMap
1530 if (ms.osdmap.get_epoch() == 0) {
1531 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1532 " Not supported." << std::endl;
1533 return -EINVAL;
1534 }
1535
1536 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1537 cerr << "PG export's map " << ms.osdmap.get_epoch()
1538 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1539 if (!force) {
1540 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1541 << std::endl;
1542 return -EINVAL;
1543 }
1544 }
1545 if (debug) {
1546 cerr << "Import pgid " << ms.info.pgid << std::endl;
1547 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
1548 cerr << "history.same_interval_since "
1549 << ms.info.history.same_interval_since << std::endl;
1550 }
1551
1552 return 0;
1553 }
1554
1555 // out: pg_log_t that only has entries that apply to import_pgid using curmap
1556 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1557 void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1558 const string &hit_set_namespace, const divergent_priors_t &in,
1559 divergent_priors_t &out, divergent_priors_t &reject)
1560 {
1561 out.clear();
1562 reject.clear();
1563
1564 for (divergent_priors_t::const_iterator i = in.begin();
1565 i != in.end(); ++i) {
1566
1567 // Reject divergent priors for temporary objects
1568 if (i->second.is_temp()) {
1569 reject.insert(*i);
1570 continue;
1571 }
1572
1573 if (i->second.nspace != hit_set_namespace) {
1574 object_t oid = i->second.oid;
1575 object_locator_t loc(i->second);
1576 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1577 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1578
1579 if (import_pgid.pgid == pgid) {
1580 out.insert(*i);
1581 } else {
1582 reject.insert(*i);
1583 }
1584 } else {
1585 out.insert(*i);
1586 }
1587 }
1588 }
1589
1590 int ObjectStoreTool::dump_export(Formatter *formatter)
1591 {
1592 bufferlist ebl;
1593 pg_info_t info;
1594 PGLog::IndexedLog log;
1595 //bool skipped_objects = false;
1596
1597 int ret = read_super();
1598 if (ret)
1599 return ret;
1600
1601 if (sh.magic != super_header::super_magic) {
1602 cerr << "Invalid magic number" << std::endl;
1603 return -EFAULT;
1604 }
1605
1606 if (sh.version > super_header::super_ver) {
1607 cerr << "Can't handle export format version=" << sh.version << std::endl;
1608 return -EINVAL;
1609 }
1610
1611 formatter->open_object_section("Export");
1612
1613 //First section must be TYPE_PG_BEGIN
1614 sectiontype_t type;
1615 ret = read_section(&type, &ebl);
1616 if (ret)
1617 return ret;
1618 if (type == TYPE_POOL_BEGIN) {
1619 cerr << "Dump of pool exports not supported" << std::endl;
1620 return -EINVAL;
1621 } else if (type != TYPE_PG_BEGIN) {
1622 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1623 return -EFAULT;
1624 }
1625
1626 auto ebliter = ebl.cbegin();
1627 pg_begin pgb;
1628 pgb.decode(ebliter);
1629 spg_t pgid = pgb.pgid;
1630
1631 formatter->dump_string("pgid", stringify(pgid));
1632 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1633 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1634
1635 bool done = false;
1636 bool found_metadata = false;
1637 metadata_section ms;
1638 bool objects_started = false;
1639 while(!done) {
1640 ret = read_section(&type, &ebl);
1641 if (ret)
1642 return ret;
1643
1644 if (debug) {
1645 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
1646 }
1647 if (type >= END_OF_TYPES) {
1648 cerr << "Skipping unknown section type" << std::endl;
1649 continue;
1650 }
1651 switch(type) {
1652 case TYPE_OBJECT_BEGIN:
1653 if (!objects_started) {
1654 formatter->open_array_section("objects");
1655 objects_started = true;
1656 }
1657 ret = dump_object(formatter, ebl);
1658 if (ret) return ret;
1659 break;
1660 case TYPE_PG_METADATA:
1661 if (objects_started)
1662 cerr << "WARNING: metadata_section out of order" << std::endl;
1663 ret = dump_pg_metadata(formatter, ebl, ms);
1664 if (ret) return ret;
1665 found_metadata = true;
1666 break;
1667 case TYPE_PG_END:
1668 if (objects_started) {
1669 formatter->close_section();
1670 }
1671 done = true;
1672 break;
1673 default:
1674 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1675 return -EFAULT;
1676 }
1677 }
1678
1679 if (!found_metadata) {
1680 cerr << "Missing metadata section" << std::endl;
1681 return -EFAULT;
1682 }
1683
1684 formatter->close_section();
1685 formatter->flush(cout);
1686
1687 return 0;
1688 }
1689
1690 int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
1691 bool force, std::string pgidstr)
1692 {
1693 bufferlist ebl;
1694 pg_info_t info;
1695 PGLog::IndexedLog log;
1696 bool skipped_objects = false;
1697
1698 if (!dry_run)
1699 finish_remove_pgs(store);
1700
1701 int ret = read_super();
1702 if (ret)
1703 return ret;
1704
1705 if (sh.magic != super_header::super_magic) {
1706 cerr << "Invalid magic number" << std::endl;
1707 return -EFAULT;
1708 }
1709
1710 if (sh.version > super_header::super_ver) {
1711 cerr << "Can't handle export format version=" << sh.version << std::endl;
1712 return -EINVAL;
1713 }
1714
1715 //First section must be TYPE_PG_BEGIN
1716 sectiontype_t type;
1717 ret = read_section(&type, &ebl);
1718 if (ret)
1719 return ret;
1720 if (type == TYPE_POOL_BEGIN) {
1721 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1722 return -EINVAL;
1723 } else if (type != TYPE_PG_BEGIN) {
1724 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1725 return -EFAULT;
1726 }
1727
1728 auto ebliter = ebl.cbegin();
1729 pg_begin pgb;
1730 pgb.decode(ebliter);
1731 spg_t pgid = pgb.pgid;
1732
1733 if (pgidstr.length()) {
1734 spg_t user_pgid;
1735
1736 bool ok = user_pgid.parse(pgidstr.c_str());
1737 // This succeeded in main() already
1738 ceph_assert(ok);
1739 if (pgid != user_pgid) {
1740 cerr << "specified pgid " << user_pgid
1741 << " does not match actual pgid " << pgid << std::endl;
1742 return -EINVAL;
1743 }
1744 }
1745
1746 if (!pgb.superblock.cluster_fsid.is_zero()
1747 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1748 cerr << "Export came from different cluster with fsid "
1749 << pgb.superblock.cluster_fsid << std::endl;
1750 return -EINVAL;
1751 }
1752
1753 if (debug) {
1754 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1755 }
1756
1757 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
1758 if (pgid.is_no_shard())
1759 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1760
1761 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1762 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1763
1764 cerr << "Export has incompatible features set " << unsupported << std::endl;
1765
1766 // Let them import if they specify the --force option
1767 if (!force)
1768 return 11; // Positive return means exit status
1769 }
1770
1771 // we need the latest OSDMap to check for collisions
1772 OSDMap curmap;
1773 bufferlist bl;
1774 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1775 if (ret) {
1776 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
1777 return ret;
1778 }
1779 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1780 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1781 // Special exit code for this error, used by test code
1782 return 10; // Positive return means exit status
1783 }
1784
1785 pool_pg_num_history_t pg_num_history;
1786 get_pg_num_history(store, &pg_num_history);
1787
1788 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
1789
1790 // Check for PG already present.
1791 coll_t coll(pgid);
1792 if (store->collection_exists(coll)) {
1793 cerr << "pgid " << pgid << " already exists" << std::endl;
1794 return -EEXIST;
1795 }
1796
1797 ObjectStore::CollectionHandle ch;
1798
1799 OSDriver driver(
1800 store,
1801 coll_t(),
1802 OSD::make_snapmapper_oid());
1803 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
1804
1805 cout << "Importing pgid " << pgid;
1806 cout << std::endl;
1807
1808 bool done = false;
1809 bool found_metadata = false;
1810 metadata_section ms;
1811 while(!done) {
1812 ret = read_section(&type, &ebl);
1813 if (ret)
1814 return ret;
1815
1816 if (debug) {
1817 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1818 }
1819 if (type >= END_OF_TYPES) {
1820 cout << "Skipping unknown section type" << std::endl;
1821 continue;
1822 }
1823 switch(type) {
1824 case TYPE_OBJECT_BEGIN:
1825 ceph_assert(found_metadata);
1826 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1827 &skipped_objects);
1828 if (ret) return ret;
1829 break;
1830 case TYPE_PG_METADATA:
1831 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
1832 if (ret) return ret;
1833 found_metadata = true;
1834
1835 if (pgid != ms.info.pgid) {
1836 cerr << "specified pgid " << pgid << " does not match import file pgid "
1837 << ms.info.pgid << std::endl;
1838 return -EINVAL;
1839 }
1840
1841 // make sure there are no conflicting splits or merges
1842 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1843 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1844 if (p != pg_num_history.pg_nums.end() &&
1845 !p->second.empty()) {
1846 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1847 unsigned pg_num = start_pg_num;
1848 for (auto q = p->second.lower_bound(ms.map_epoch);
1849 q != p->second.end();
1850 ++q) {
1851 unsigned new_pg_num = q->second;
1852 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1853 << " -> " << new_pg_num << std::endl;
1854
1855 // check for merge target
1856 spg_t target;
1857 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1858 // FIXME: this checks assumes the OSD's PG is at the OSD's
1859 // map epoch; it could be, say, at *our* epoch, pre-merge.
1860 coll_t coll(target);
1861 if (store->collection_exists(coll)) {
1862 cerr << "pgid " << pgid << " merges to target " << target
1863 << " which already exists" << std::endl;
1864 return 12;
1865 }
1866 }
1867
1868 // check for split children
1869 set<spg_t> children;
1870 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1871 cerr << " children are " << children << std::endl;
1872 for (auto child : children) {
1873 coll_t coll(child);
1874 if (store->collection_exists(coll)) {
1875 cerr << "pgid " << pgid << " splits to " << children
1876 << " and " << child << " exists" << std::endl;
1877 return 12;
1878 }
1879 }
1880 }
1881 pg_num = new_pg_num;
1882 }
1883 }
1884 } else {
1885 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1886 << " for splits or mergers" << std::endl;
1887 }
1888
1889 if (!dry_run) {
1890 ObjectStore::Transaction t;
1891 ch = store->create_new_collection(coll);
1892 create_pg_collection(
1893 t, pgid,
1894 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
1895 init_pg_ondisk(t, pgid, NULL);
1896
1897 // mark this coll for removal until we're done
1898 map<string,bufferlist> values;
1899 encode((char)1, values["_remove"]);
1900 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1901
1902 store->queue_transaction(ch, std::move(t));
1903 }
1904
1905 break;
1906 case TYPE_PG_END:
1907 ceph_assert(found_metadata);
1908 done = true;
1909 break;
1910 default:
1911 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1912 return -EFAULT;
1913 }
1914 }
1915
1916 if (!found_metadata) {
1917 cerr << "Missing metadata section" << std::endl;
1918 return -EFAULT;
1919 }
1920
1921 ObjectStore::Transaction t;
1922 if (!dry_run) {
1923 pg_log_t newlog, reject;
1924 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
1925 ms.log, newlog, reject);
1926 if (debug) {
1927 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
1928 i != newlog.log.end(); ++i)
1929 cerr << "Keeping log entry " << *i << std::endl;
1930 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
1931 i != reject.log.end(); ++i)
1932 cerr << "Skipping log entry " << *i << std::endl;
1933 }
1934
1935 divergent_priors_t newdp, rejectdp;
1936 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
1937 ms.divergent_priors, newdp, rejectdp);
1938 ms.divergent_priors = newdp;
1939 if (debug) {
1940 for (divergent_priors_t::iterator i = newdp.begin();
1941 i != newdp.end(); ++i)
1942 cerr << "Keeping divergent_prior " << *i << std::endl;
1943 for (divergent_priors_t::iterator i = rejectdp.begin();
1944 i != rejectdp.end(); ++i)
1945 cerr << "Skipping divergent_prior " << *i << std::endl;
1946 }
1947
1948 ms.missing.filter_objects([&](const hobject_t &obj) {
1949 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
1950 return false;
1951 ceph_assert(!obj.is_temp());
1952 object_t oid = obj.oid;
1953 object_locator_t loc(obj);
1954 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
1955 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
1956
1957 return pgid.pgid != _pgid;
1958 });
1959
1960
1961 if (debug) {
1962 pg_missing_t missing;
1963 Formatter *formatter = Formatter::create("json-pretty");
1964 dump_log(formatter, cerr, newlog, ms.missing);
1965 delete formatter;
1966 }
1967
1968 // Just like a split invalidate stats since the object count is changed
1969 if (skipped_objects)
1970 ms.info.stats.stats_invalid = true;
1971
1972 ret = write_pg(
1973 t,
1974 ms.map_epoch,
1975 ms.info,
1976 newlog,
1977 ms.past_intervals,
1978 ms.divergent_priors,
1979 ms.missing);
1980 if (ret) return ret;
1981 }
1982
1983 // done, clear removal flag
1984 if (debug)
1985 cerr << "done, clearing removal flag" << std::endl;
1986
1987 if (!dry_run) {
1988 t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
1989 wait_until_done(&t, [&] {
1990 store->queue_transaction(ch, std::move(t));
1991 // make sure we flush onreadable items before mapper/driver are destroyed.
1992 ch->flush();
1993 });
1994 }
1995 return 0;
1996 }
1997
1998 int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
1999 Formatter *formatter, bool debug, bool human_readable, bool head)
2000 {
2001 int r;
2002 lookup_ghobject lookup(object, nspace, head);
2003 if (pgidstr.length() > 0) {
2004 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2005 } else {
2006 r = action_on_all_objects(store, lookup, debug);
2007 }
2008 if (r)
2009 return r;
2010 lookup.dump(formatter, human_readable);
2011 formatter->flush(cout);
2012 return 0;
2013 }
2014
2015 int do_list_slow(ObjectStore *store, string pgidstr, string object,
2016 double threshold, Formatter *formatter, bool debug, bool human_readable)
2017 {
2018 int r;
2019 lookup_slow_ghobject lookup(object, threshold);
2020 if (pgidstr.length() > 0) {
2021 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2022 } else {
2023 r = action_on_all_objects(store, lookup, debug);
2024 }
2025 if (r)
2026 return r;
2027 lookup.dump(formatter, human_readable);
2028 formatter->flush(cout);
2029 return 0;
2030 }
2031
2032 int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
2033 {
2034 int r;
2035 boost::optional<std::string> nspace; // Not specified
2036 lookup_ghobject lookup(object, nspace);
2037 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
2038 if (r)
2039 return r;
2040 lookup.dump(formatter, human_readable);
2041 formatter->flush(cout);
2042 return 0;
2043 }
2044
2045 enum rmtype {
2046 BOTH,
2047 SNAPMAP,
2048 NOSNAPMAP
2049 };
2050
2051 int remove_object(coll_t coll, ghobject_t &ghobj,
2052 SnapMapper &mapper,
2053 MapCacher::Transaction<std::string, bufferlist> *_t,
2054 ObjectStore::Transaction *t,
2055 enum rmtype type)
2056 {
2057 if (type == BOTH || type == SNAPMAP) {
2058 int r = mapper.remove_oid(ghobj.hobj, _t);
2059 if (r < 0 && r != -ENOENT) {
2060 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
2061 return r;
2062 }
2063 }
2064
2065 if (type == BOTH || type == NOSNAPMAP) {
2066 t->remove(coll, ghobj);
2067 }
2068 return 0;
2069 }
2070
2071 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
2072
2073 int do_remove_object(ObjectStore *store, coll_t coll,
2074 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
2075 {
2076 auto ch = store->open_collection(coll);
2077 spg_t pg;
2078 coll.is_pg_prefix(&pg);
2079 OSDriver driver(
2080 store,
2081 coll_t(),
2082 OSD::make_snapmapper_oid());
2083 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
2084 struct stat st;
2085
2086 int r = store->stat(ch, ghobj, &st);
2087 if (r < 0) {
2088 cerr << "remove: " << cpp_strerror(r) << std::endl;
2089 return r;
2090 }
2091
2092 SnapSet ss;
2093 if (ghobj.hobj.has_snapset()) {
2094 r = get_snapset(store, coll, ghobj, ss, false);
2095 if (r < 0) {
2096 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
2097 // If --force and bad snapset let them remove the head
2098 if (!(force && !all))
2099 return r;
2100 }
2101 // cout << "snapset " << ss << std::endl;
2102 if (!ss.clone_snaps.empty() && !all) {
2103 if (force) {
2104 cout << "WARNING: only removing "
2105 << (ghobj.hobj.is_head() ? "head" : "snapdir")
2106 << " with clones present" << std::endl;
2107 ss.clone_snaps.clear();
2108 } else {
2109 cerr << "Clones are present, use removeall to delete everything"
2110 << std::endl;
2111 return -EINVAL;
2112 }
2113 }
2114 }
2115
2116 ObjectStore::Transaction t;
2117 OSDriver::OSTransaction _t(driver.get_transaction(&t));
2118
2119 ghobject_t snapobj = ghobj;
2120 for (auto& p : ss.clone_snaps) {
2121 snapobj.hobj.snap = p.first;
2122 cout << "remove clone " << snapobj << std::endl;
2123 if (!dry_run) {
2124 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
2125 if (r < 0)
2126 return r;
2127 }
2128 }
2129
2130 cout << "remove " << ghobj << std::endl;
2131
2132 if (!dry_run) {
2133 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
2134 if (r < 0)
2135 return r;
2136 }
2137
2138 if (!dry_run) {
2139 wait_until_done(&t, [&] {
2140 store->queue_transaction(ch, std::move(t));
2141 ch->flush();
2142 });
2143 }
2144 return 0;
2145 }
2146
2147 int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2148 {
2149 auto ch = store->open_collection(coll);
2150 map<string,bufferptr,less<>> aset;
2151 int r = store->getattrs(ch, ghobj, aset);
2152 if (r < 0) {
2153 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2154 return r;
2155 }
2156
2157 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2158 string key(i->first);
2159 if (outistty)
2160 key = cleanbin(key);
2161 cout << key << std::endl;
2162 }
2163 return 0;
2164 }
2165
2166 int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2167 {
2168 auto ch = store->open_collection(coll);
2169 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
2170 if (!iter) {
2171 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2172 return -ENOENT;
2173 }
2174 iter->seek_to_first();
2175 map<string, bufferlist> oset;
2176 while(iter->valid()) {
2177 get_omap_batch(iter, oset);
2178
2179 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2180 string key(i->first);
2181 if (outistty)
2182 key = cleanbin(key);
2183 cout << key << std::endl;
2184 }
2185 }
2186 return 0;
2187 }
2188
2189 int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2190 {
2191 auto ch = store->open_collection(coll);
2192 struct stat st;
2193 mysize_t total;
2194
2195 int ret = store->stat(ch, ghobj, &st);
2196 if (ret < 0) {
2197 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2198 return ret;
2199 }
2200
2201 total = st.st_size;
2202 if (debug)
2203 cerr << "size=" << total << std::endl;
2204
2205 uint64_t offset = 0;
2206 bufferlist rawdatabl;
2207 while(total > 0) {
2208 rawdatabl.clear();
2209 mysize_t len = max_read;
2210 if (len > total)
2211 len = total;
2212
2213 ret = store->read(ch, ghobj, offset, len, rawdatabl);
2214 if (ret < 0)
2215 return ret;
2216 if (ret == 0)
2217 return -EINVAL;
2218
2219 if (debug)
2220 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2221
2222 total -= ret;
2223 offset += ret;
2224
2225 ret = write(fd, rawdatabl.c_str(), ret);
2226 if (ret == -1) {
2227 perror("write");
2228 return -errno;
2229 }
2230 }
2231
2232 return 0;
2233 }
2234
2235 int do_set_bytes(ObjectStore *store, coll_t coll,
2236 ghobject_t &ghobj, int fd)
2237 {
2238 ObjectStore::Transaction tran;
2239 ObjectStore::Transaction *t = &tran;
2240
2241 if (debug)
2242 cerr << "Write " << ghobj << std::endl;
2243
2244 if (!dry_run) {
2245 t->touch(coll, ghobj);
2246 t->truncate(coll, ghobj, 0);
2247 }
2248
2249 uint64_t offset = 0;
2250 bufferlist rawdatabl;
2251 do {
2252 rawdatabl.clear();
2253 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2254 if (bytes < 0) {
2255 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2256 return bytes;
2257 }
2258
2259 if (bytes == 0)
2260 break;
2261
2262 if (debug)
2263 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2264 if (!dry_run)
2265 t->write(coll, ghobj, offset, bytes, rawdatabl);
2266
2267 offset += bytes;
2268 // XXX: Should we queue_transaction() every once in a while for very large files
2269 } while(true);
2270
2271 auto ch = store->open_collection(coll);
2272 if (!dry_run)
2273 store->queue_transaction(ch, std::move(*t));
2274 return 0;
2275 }
2276
2277 int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2278 {
2279 auto ch = store->open_collection(coll);
2280 bufferptr bp;
2281
2282 int r = store->getattr(ch, ghobj, key.c_str(), bp);
2283 if (r < 0) {
2284 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2285 return r;
2286 }
2287
2288 string value(bp.c_str(), bp.length());
2289 if (outistty) {
2290 value = cleanbin(value);
2291 value.push_back('\n');
2292 }
2293 cout << value;
2294
2295 return 0;
2296 }
2297
2298 int do_set_attr(ObjectStore *store, coll_t coll,
2299 ghobject_t &ghobj, string key, int fd)
2300 {
2301 ObjectStore::Transaction tran;
2302 ObjectStore::Transaction *t = &tran;
2303 bufferlist bl;
2304
2305 if (debug)
2306 cerr << "Setattr " << ghobj << std::endl;
2307
2308 int ret = get_fd_data(fd, bl);
2309 if (ret < 0)
2310 return ret;
2311
2312 if (dry_run)
2313 return 0;
2314
2315 t->touch(coll, ghobj);
2316
2317 t->setattr(coll, ghobj, key, bl);
2318
2319 auto ch = store->open_collection(coll);
2320 store->queue_transaction(ch, std::move(*t));
2321 return 0;
2322 }
2323
2324 int do_rm_attr(ObjectStore *store, coll_t coll,
2325 ghobject_t &ghobj, string key)
2326 {
2327 ObjectStore::Transaction tran;
2328 ObjectStore::Transaction *t = &tran;
2329
2330 if (debug)
2331 cerr << "Rmattr " << ghobj << std::endl;
2332
2333 if (dry_run)
2334 return 0;
2335
2336 t->rmattr(coll, ghobj, key);
2337
2338 auto ch = store->open_collection(coll);
2339 store->queue_transaction(ch, std::move(*t));
2340 return 0;
2341 }
2342
2343 int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2344 {
2345 auto ch = store->open_collection(coll);
2346 set<string> keys;
2347 map<string, bufferlist> out;
2348
2349 keys.insert(key);
2350
2351 int r = store->omap_get_values(ch, ghobj, keys, &out);
2352 if (r < 0) {
2353 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2354 return r;
2355 }
2356
2357 if (out.empty()) {
2358 cerr << "Key not found" << std::endl;
2359 return -ENOENT;
2360 }
2361
2362 ceph_assert(out.size() == 1);
2363
2364 bufferlist bl = out.begin()->second;
2365 string value(bl.c_str(), bl.length());
2366 if (outistty) {
2367 value = cleanbin(value);
2368 value.push_back('\n');
2369 }
2370 cout << value;
2371
2372 return 0;
2373 }
2374
2375 int do_set_omap(ObjectStore *store, coll_t coll,
2376 ghobject_t &ghobj, string key, int fd)
2377 {
2378 ObjectStore::Transaction tran;
2379 ObjectStore::Transaction *t = &tran;
2380 map<string, bufferlist> attrset;
2381 bufferlist valbl;
2382
2383 if (debug)
2384 cerr << "Set_omap " << ghobj << std::endl;
2385
2386 int ret = get_fd_data(fd, valbl);
2387 if (ret < 0)
2388 return ret;
2389
2390 attrset.insert(pair<string, bufferlist>(key, valbl));
2391
2392 if (dry_run)
2393 return 0;
2394
2395 t->touch(coll, ghobj);
2396
2397 t->omap_setkeys(coll, ghobj, attrset);
2398
2399 auto ch = store->open_collection(coll);
2400 store->queue_transaction(ch, std::move(*t));
2401 return 0;
2402 }
2403
2404 int do_rm_omap(ObjectStore *store, coll_t coll,
2405 ghobject_t &ghobj, string key)
2406 {
2407 ObjectStore::Transaction tran;
2408 ObjectStore::Transaction *t = &tran;
2409
2410 if (debug)
2411 cerr << "Rm_omap " << ghobj << std::endl;
2412
2413 if (dry_run)
2414 return 0;
2415
2416 t->omap_rmkey(coll, ghobj, key);
2417
2418 auto ch = store->open_collection(coll);
2419 store->queue_transaction(ch, std::move(*t));
2420 return 0;
2421 }
2422
2423 int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2424 {
2425 auto ch = store->open_collection(coll);
2426 bufferlist hdrbl;
2427
2428 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
2429 if (r < 0) {
2430 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2431 return r;
2432 }
2433
2434 string header(hdrbl.c_str(), hdrbl.length());
2435 if (outistty) {
2436 header = cleanbin(header);
2437 header.push_back('\n');
2438 }
2439 cout << header;
2440
2441 return 0;
2442 }
2443
2444 int do_set_omaphdr(ObjectStore *store, coll_t coll,
2445 ghobject_t &ghobj, int fd)
2446 {
2447 ObjectStore::Transaction tran;
2448 ObjectStore::Transaction *t = &tran;
2449 bufferlist hdrbl;
2450
2451 if (debug)
2452 cerr << "Omap_setheader " << ghobj << std::endl;
2453
2454 int ret = get_fd_data(fd, hdrbl);
2455 if (ret)
2456 return ret;
2457
2458 if (dry_run)
2459 return 0;
2460
2461 t->touch(coll, ghobj);
2462
2463 t->omap_setheader(coll, ghobj, hdrbl);
2464
2465 auto ch = store->open_collection(coll);
2466 store->queue_transaction(ch, std::move(*t));
2467 return 0;
2468 }
2469
2470 struct do_fix_lost : public action_on_object_t {
2471 void call(ObjectStore *store, coll_t coll,
2472 ghobject_t &ghobj, object_info_t &oi) override {
2473 if (oi.is_lost()) {
2474 cout << coll << "/" << ghobj << " is lost";
2475 if (!dry_run)
2476 cout << ", fixing";
2477 cout << std::endl;
2478 if (dry_run)
2479 return;
2480 oi.clear_flag(object_info_t::FLAG_LOST);
2481 bufferlist bl;
2482 encode(oi, bl, -1); /* fixme: using full features */
2483 ObjectStore::Transaction t;
2484 t.setattr(coll, ghobj, OI_ATTR, bl);
2485 auto ch = store->open_collection(coll);
2486 store->queue_transaction(ch, std::move(t));
2487 }
2488 return;
2489 }
2490 };
2491
2492 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2493 {
2494 auto ch = store->open_collection(coll);
2495 bufferlist attr;
2496 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
2497 if (r < 0) {
2498 if (!silent)
2499 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2500 << cpp_strerror(r) << std::endl;
2501 return r;
2502 }
2503 auto bp = attr.cbegin();
2504 try {
2505 decode(ss, bp);
2506 } catch (...) {
2507 r = -EINVAL;
2508 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2509 << cpp_strerror(r) << std::endl;
2510 return r;
2511 }
2512 return 0;
2513 }
2514
2515 int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2516 {
2517 auto ch = store->open_collection(coll);
2518 int r = 0;
2519 formatter->open_object_section("obj");
2520 formatter->open_object_section("id");
2521 ghobj.dump(formatter);
2522 formatter->close_section();
2523
2524 bufferlist attr;
2525 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
2526 if (gr < 0) {
2527 r = gr;
2528 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2529 << cpp_strerror(r) << std::endl;
2530 } else {
2531 object_info_t oi;
2532 auto bp = attr.cbegin();
2533 try {
2534 decode(oi, bp);
2535 formatter->open_object_section("info");
2536 oi.dump(formatter);
2537 formatter->close_section();
2538 } catch (...) {
2539 r = -EINVAL;
2540 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2541 << cpp_strerror(r) << std::endl;
2542 }
2543 }
2544 struct stat st;
2545 int sr = store->stat(ch, ghobj, &st, true);
2546 if (sr < 0) {
2547 r = sr;
2548 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2549 << cpp_strerror(r) << std::endl;
2550 } else {
2551 formatter->open_object_section("stat");
2552 formatter->dump_int("size", st.st_size);
2553 formatter->dump_int("blksize", st.st_blksize);
2554 formatter->dump_int("blocks", st.st_blocks);
2555 formatter->dump_int("nlink", st.st_nlink);
2556 formatter->close_section();
2557 }
2558
2559 if (ghobj.hobj.has_snapset()) {
2560 SnapSet ss;
2561 int snr = get_snapset(store, coll, ghobj, ss);
2562 if (snr < 0) {
2563 r = snr;
2564 } else {
2565 formatter->open_object_section("SnapSet");
2566 ss.dump(formatter);
2567 formatter->close_section();
2568 }
2569 }
2570 bufferlist hattr;
2571 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
2572 if (gr == 0) {
2573 ECUtil::HashInfo hinfo;
2574 auto hp = hattr.cbegin();
2575 try {
2576 decode(hinfo, hp);
2577 formatter->open_object_section("hinfo");
2578 hinfo.dump(formatter);
2579 formatter->close_section();
2580 } catch (...) {
2581 r = -EINVAL;
2582 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2583 << cpp_strerror(r) << std::endl;
2584 }
2585 }
2586 gr = store->dump_onode(ch, ghobj, "onode", formatter);
2587
2588 formatter->close_section();
2589 formatter->flush(cout);
2590 cout << std::endl;
2591 return r;
2592 }
2593
2594 int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2595 {
2596 auto ch = store->open_collection(coll);
2597 bufferlist attr;
2598 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2599 if (r < 0) {
2600 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2601 << cpp_strerror(r) << std::endl;
2602 return r;
2603 }
2604 object_info_t oi;
2605 auto bp = attr.cbegin();
2606 try {
2607 decode(oi, bp);
2608 } catch (...) {
2609 r = -EINVAL;
2610 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2611 << cpp_strerror(r) << std::endl;
2612 return r;
2613 }
2614 if (!dry_run) {
2615 attr.clear();
2616 oi.alloc_hint_flags += 0xff;
2617 ObjectStore::Transaction t;
2618 encode(oi, attr, -1); /* fixme: using full features */
2619 t.setattr(coll, ghobj, OI_ATTR, attr);
2620 auto ch = store->open_collection(coll);
2621 r = store->queue_transaction(ch, std::move(t));
2622 if (r < 0) {
2623 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2624 << cpp_strerror(r) << std::endl;
2625 return r;
2626 }
2627 }
2628 return 0;
2629 }
2630
2631 int set_size(
2632 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2633 bool corrupt)
2634 {
2635 auto ch = store->open_collection(coll);
2636 if (ghobj.hobj.is_snapdir()) {
2637 cerr << "Can't set the size of a snapdir" << std::endl;
2638 return -EINVAL;
2639 }
2640 bufferlist attr;
2641 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2642 if (r < 0) {
2643 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2644 << cpp_strerror(r) << std::endl;
2645 return r;
2646 }
2647 object_info_t oi;
2648 auto bp = attr.cbegin();
2649 try {
2650 decode(oi, bp);
2651 } catch (...) {
2652 r = -EINVAL;
2653 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2654 << cpp_strerror(r) << std::endl;
2655 return r;
2656 }
2657 struct stat st;
2658 r = store->stat(ch, ghobj, &st, true);
2659 if (r < 0) {
2660 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2661 << cpp_strerror(r) << std::endl;
2662 }
2663 ghobject_t head(ghobj);
2664 SnapSet ss;
2665 bool found_head = true;
2666 map<snapid_t, uint64_t>::iterator csi;
2667 bool is_snap = ghobj.hobj.is_snap();
2668 if (is_snap) {
2669 head.hobj = head.hobj.get_head();
2670 r = get_snapset(store, coll, head, ss, true);
2671 if (r < 0 && r != -ENOENT) {
2672 // Requested get_snapset() silent, so if not -ENOENT show error
2673 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2674 << cpp_strerror(r) << std::endl;
2675 return r;
2676 }
2677 if (r == -ENOENT) {
2678 head.hobj = head.hobj.get_snapdir();
2679 r = get_snapset(store, coll, head, ss);
2680 if (r < 0)
2681 return r;
2682 found_head = false;
2683 } else {
2684 found_head = true;
2685 }
2686 csi = ss.clone_size.find(ghobj.hobj.snap);
2687 if (csi == ss.clone_size.end()) {
2688 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2689 return -EINVAL;
2690 }
2691 }
2692 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2693 && (!is_snap || csi->second == setsize)) {
2694 cout << "Size of object is already " << setsize << std::endl;
2695 return 0;
2696 }
2697 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2698 << ", obj info size " << oi.size;
2699 if (is_snap) {
2700 cout << ", " << (found_head ? "head" : "snapdir")
2701 << " clone_size " << csi->second;
2702 csi->second = setsize;
2703 }
2704 cout << std::endl;
2705 if (!dry_run) {
2706 attr.clear();
2707 oi.size = setsize;
2708 ObjectStore::Transaction t;
2709 // Only modify object info if we want to corrupt it
2710 if (!corrupt && (uint64_t)st.st_size != setsize) {
2711 t.truncate(coll, ghobj, setsize);
2712 // Changing objectstore size will invalidate data_digest, so clear it.
2713 oi.clear_data_digest();
2714 }
2715 encode(oi, attr, -1); /* fixme: using full features */
2716 t.setattr(coll, ghobj, OI_ATTR, attr);
2717 if (is_snap) {
2718 bufferlist snapattr;
2719 snapattr.clear();
2720 encode(ss, snapattr);
2721 t.setattr(coll, head, SS_ATTR, snapattr);
2722 }
2723 auto ch = store->open_collection(coll);
2724 r = store->queue_transaction(ch, std::move(t));
2725 if (r < 0) {
2726 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2727 << cpp_strerror(r) << std::endl;
2728 return r;
2729 }
2730 }
2731 return 0;
2732 }
2733
2734 int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2735 auto ch = store->open_collection(coll);
2736 bufferlist attr;
2737 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2738 if (r < 0) {
2739 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2740 << cpp_strerror(r) << std::endl;
2741 return r;
2742 }
2743 object_info_t oi;
2744 auto bp = attr.cbegin();
2745 try {
2746 decode(oi, bp);
2747 } catch (...) {
2748 r = -EINVAL;
2749 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2750 << cpp_strerror(r) << std::endl;
2751 return r;
2752 }
2753 if (!dry_run) {
2754 attr.clear();
2755 oi.clear_data_digest();
2756 encode(oi, attr, -1); /* fixme: using full features */
2757 ObjectStore::Transaction t;
2758 t.setattr(coll, ghobj, OI_ATTR, attr);
2759 auto ch = store->open_collection(coll);
2760 r = store->queue_transaction(ch, std::move(t));
2761 if (r < 0) {
2762 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2763 << cpp_strerror(r) << std::endl;
2764 return r;
2765 }
2766 }
2767 return 0;
2768 }
2769
2770 int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
2771 string arg)
2772 {
2773 SnapSet ss;
2774 int ret = get_snapset(store, coll, ghobj, ss);
2775 if (ret < 0)
2776 return ret;
2777
2778 // Use "corrupt" to clear entire SnapSet
2779 // Use "seq" to just corrupt SnapSet.seq
2780 if (arg == "corrupt" || arg == "seq")
2781 ss.seq = 0;
2782 // Use "snaps" to just clear SnapSet.clone_snaps
2783 if (arg == "corrupt" || arg == "snaps")
2784 ss.clone_snaps.clear();
2785 // By default just clear clone, clone_overlap and clone_size
2786 if (arg == "corrupt")
2787 arg = "";
2788 if (arg == "" || arg == "clones")
2789 ss.clones.clear();
2790 if (arg == "" || arg == "clone_overlap")
2791 ss.clone_overlap.clear();
2792 if (arg == "" || arg == "clone_size")
2793 ss.clone_size.clear();
2794 // Break all clone sizes by adding 1
2795 if (arg == "size") {
2796 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2797 i != ss.clone_size.end(); ++i)
2798 ++(i->second);
2799 }
2800
2801 if (!dry_run) {
2802 bufferlist bl;
2803 encode(ss, bl);
2804 ObjectStore::Transaction t;
2805 t.setattr(coll, ghobj, SS_ATTR, bl);
2806 auto ch = store->open_collection(coll);
2807 int r = store->queue_transaction(ch, std::move(t));
2808 if (r < 0) {
2809 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2810 << cpp_strerror(r) << std::endl;
2811 return r;
2812 }
2813 }
2814 return 0;
2815 }
2816
2817 vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2818 {
2819 return std::find(v.begin(), v.end(), clid);
2820 }
2821
2822 map<snapid_t, interval_set<uint64_t> >::iterator
2823 find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2824 {
2825 return m.find(clid);
2826 }
2827
2828 map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2829 snapid_t clid)
2830 {
2831 return m.find(clid);
2832 }
2833
2834 template<class T>
2835 int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2836 {
2837 typename T::iterator i = find(mv, cloneid);
2838 if (i != mv.end()) {
2839 mv.erase(i);
2840 } else {
2841 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2842 if (force) {
2843 cerr << " (ignored)" << std::endl;
2844 return 0;
2845 }
2846 cerr << std::endl;
2847 return -EINVAL;
2848 }
2849 return 0;
2850 }
2851
2852 int remove_clone(
2853 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
2854 {
2855 // XXX: Don't allow this if in a cache tier or former cache tier
2856 // bool allow_incomplete_clones() const {
2857 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2858
2859 SnapSet snapset;
2860 int ret = get_snapset(store, coll, ghobj, snapset);
2861 if (ret < 0)
2862 return ret;
2863
2864 // Derived from trim_object()
2865 // ...from snapset
2866 vector<snapid_t>::iterator p;
2867 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2868 if (*p == cloneid)
2869 break;
2870 if (p == snapset.clones.end()) {
2871 cerr << "Clone " << cloneid << " not present";
2872 return -ENOENT;
2873 }
2874 if (p != snapset.clones.begin()) {
2875 // not the oldest... merge overlap into next older clone
2876 vector<snapid_t>::iterator n = p - 1;
2877 hobject_t prev_coid = ghobj.hobj;
2878 prev_coid.snap = *n;
2879 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2880
2881 //if (adjust_prev_bytes)
2882 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2883
2884 snapset.clone_overlap[*n].intersection_of(
2885 snapset.clone_overlap[*p]);
2886
2887 //if (adjust_prev_bytes)
2888 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2889 }
2890
2891 ret = remove_from(snapset.clones, "clones", cloneid, force);
2892 if (ret) return ret;
2893 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2894 if (ret) return ret;
2895 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2896 if (ret) return ret;
2897
2898 if (dry_run)
2899 return 0;
2900
2901 bufferlist bl;
2902 encode(snapset, bl);
2903 ObjectStore::Transaction t;
2904 t.setattr(coll, ghobj, SS_ATTR, bl);
2905 auto ch = store->open_collection(coll);
2906 int r = store->queue_transaction(ch, std::move(t));
2907 if (r < 0) {
2908 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2909 << cpp_strerror(r) << std::endl;
2910 return r;
2911 }
2912 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2913 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2914 return 0;
2915 }
2916
2917 int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2918 {
2919 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
2920 << " to " << dst->get_type() << ": " << dstpath
2921 << std::endl;
2922 int num, i;
2923 vector<coll_t> collections;
2924 int r;
2925
2926 r = src->mount();
2927 if (r < 0) {
2928 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
2929 return r;
2930 }
2931 r = dst->mount();
2932 if (r < 0) {
2933 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
2934 goto out_src;
2935 }
2936
2937 if (src->get_fsid() != dst->get_fsid()) {
2938 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
2939 << std::endl;
2940 goto out;
2941 }
2942 cout << "fsid " << src->get_fsid() << std::endl;
2943
2944 // make sure dst is empty
2945 r = dst->list_collections(collections);
2946 if (r < 0) {
2947 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
2948 goto out;
2949 }
2950 if (!collections.empty()) {
2951 cerr << "destination store is not empty" << std::endl;
2952 goto out;
2953 }
2954
2955 r = src->list_collections(collections);
2956 if (r < 0) {
2957 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
2958 goto out;
2959 }
2960
2961 num = collections.size();
2962 cout << num << " collections" << std::endl;
2963 i = 1;
2964 for (auto cid : collections) {
2965 cout << i++ << "/" << num << " " << cid << std::endl;
2966 auto ch = src->open_collection(cid);
2967 auto dch = dst->create_new_collection(cid);
2968 {
2969 ObjectStore::Transaction t;
2970 int bits = src->collection_bits(ch);
2971 if (bits < 0) {
2972 if (src->get_type() == "filestore" && cid.is_meta()) {
2973 bits = 0;
2974 } else {
2975 cerr << "cannot get bit count for collection " << cid << ": "
2976 << cpp_strerror(bits) << std::endl;
2977 goto out;
2978 }
2979 }
2980 t.create_collection(cid, bits);
2981 dst->queue_transaction(dch, std::move(t));
2982 }
2983
2984 ghobject_t pos;
2985 uint64_t n = 0;
2986 uint64_t bytes = 0, keys = 0;
2987 while (true) {
2988 vector<ghobject_t> ls;
2989 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
2990 if (r < 0) {
2991 cerr << "collection_list on " << cid << " from " << pos << " got: "
2992 << cpp_strerror(r) << std::endl;
2993 goto out;
2994 }
2995 if (ls.empty()) {
2996 break;
2997 }
2998
2999 for (auto& oid : ls) {
3000 //cout << " " << cid << " " << oid << std::endl;
3001 if (n % 100 == 0) {
3002 cout << " " << std::setw(16) << n << " objects, "
3003 << std::setw(16) << bytes << " bytes, "
3004 << std::setw(16) << keys << " keys"
3005 << std::setw(1) << "\r" << std::flush;
3006 }
3007 n++;
3008
3009 ObjectStore::Transaction t;
3010 t.touch(cid, oid);
3011
3012 map<string,bufferptr,less<>> attrs;
3013 src->getattrs(ch, oid, attrs);
3014 if (!attrs.empty()) {
3015 t.setattrs(cid, oid, attrs);
3016 }
3017
3018 bufferlist bl;
3019 src->read(ch, oid, 0, 0, bl);
3020 if (bl.length()) {
3021 t.write(cid, oid, 0, bl.length(), bl);
3022 bytes += bl.length();
3023 }
3024
3025 bufferlist header;
3026 map<string,bufferlist> omap;
3027 src->omap_get(ch, oid, &header, &omap);
3028 if (header.length()) {
3029 t.omap_setheader(cid, oid, header);
3030 ++keys;
3031 }
3032 if (!omap.empty()) {
3033 keys += omap.size();
3034 t.omap_setkeys(cid, oid, omap);
3035 }
3036
3037 dst->queue_transaction(dch, std::move(t));
3038 }
3039 }
3040 cout << " " << std::setw(16) << n << " objects, "
3041 << std::setw(16) << bytes << " bytes, "
3042 << std::setw(16) << keys << " keys"
3043 << std::setw(1) << std::endl;
3044 }
3045
3046 // keyring
3047 cout << "keyring" << std::endl;
3048 {
3049 bufferlist bl;
3050 string s = srcpath + "/keyring";
3051 string err;
3052 r = bl.read_file(s.c_str(), &err);
3053 if (r < 0) {
3054 cerr << "failed to copy " << s << ": " << err << std::endl;
3055 } else {
3056 string d = dstpath + "/keyring";
3057 bl.write_file(d.c_str(), 0600);
3058 }
3059 }
3060
3061 // osd metadata
3062 cout << "duping osd metadata" << std::endl;
3063 {
3064 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
3065 string val;
3066 src->read_meta(k, &val);
3067 dst->write_meta(k, val);
3068 }
3069 }
3070
3071 dst->write_meta("ready", "ready");
3072
3073 cout << "done." << std::endl;
3074 r = 0;
3075 out:
3076 dst->umount();
3077 out_src:
3078 src->umount();
3079 return r;
3080 }
3081
3082 void usage(po::options_description &desc)
3083 {
3084 cerr << std::endl;
3085 cerr << desc << std::endl;
3086 cerr << std::endl;
3087 cerr << "Positional syntax:" << std::endl;
3088 cerr << std::endl;
3089 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
3090 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
3091 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
3092 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
3093 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
3094 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
3095 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
3096 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
3097 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
3098 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
3099 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
3100 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
3101 cerr << std::endl;
3102 cerr << "<object> can be a JSON object description as displayed" << std::endl;
3103 cerr << "by --op list." << std::endl;
3104 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
3105 cerr << "the OSD's PGs." << std::endl;
3106 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
3107 cerr << "specifies the pgmeta object" << std::endl;
3108 cerr << std::endl;
3109 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
3110 cerr << "if not specified or if '-' specified." << std::endl;
3111 }
3112
3113 bool ends_with(const string& check, const string& ending)
3114 {
3115 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
3116 }
3117
3118 // Based on FileStore::dump_journal(), set-up enough to only dump
3119 int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
3120 {
3121 int r;
3122
3123 if (!journalpath.length())
3124 return -EINVAL;
3125
3126 FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
3127 journalpath.c_str(), m_journal_dio);
3128 r = journal->_fdump(*f, false);
3129 delete journal;
3130 return r;
3131 }
3132
3133 int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
3134 const string &pool_name, const spg_t &pgid, bool dry_run,
3135 int target_level)
3136 {
3137 int r = 0;
3138
3139 FileStore *fs = dynamic_cast<FileStore*>(os);
3140 if (!fs) {
3141 cerr << "Nothing to do for non-filestore backend" << std::endl;
3142 return 0; // making this return success makes testing easier
3143 }
3144
3145 OSDMap curmap;
3146 bufferlist bl;
3147 r = get_osdmap(os, superblock.current_epoch, curmap, bl);
3148 if (r) {
3149 cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
3150 return r;
3151 }
3152
3153 int64_t poolid = -1;
3154 if (pool_name.length()) {
3155 poolid = curmap.lookup_pg_pool_name(pool_name);
3156 if (poolid < 0) {
3157 cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
3158 << std::endl;
3159 return poolid;
3160 }
3161 }
3162
3163 vector<coll_t> collections, filtered_colls;
3164 r = os->list_collections(collections);
3165 if (r < 0) {
3166 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
3167 return r;
3168 }
3169
3170 for (auto const &coll : collections) {
3171 spg_t coll_pgid;
3172 if (coll.is_pg(&coll_pgid) &&
3173 ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
3174 coll_pgid == pgid)) {
3175 filtered_colls.push_back(coll);
3176 }
3177 }
3178
3179 size_t done = 0, total = filtered_colls.size();
3180 for (auto const &coll : filtered_colls) {
3181 if (dry_run) {
3182 cerr << "Would apply layout settings to " << coll << std::endl;
3183 } else {
3184 cerr << "Finished " << done << "/" << total << " collections" << "\r";
3185 r = fs->apply_layout_settings(coll, target_level);
3186 if (r < 0) {
3187 cerr << "Error applying layout settings to " << coll << std::endl;
3188 return r;
3189 }
3190 }
3191 ++done;
3192 }
3193
3194 cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
3195 return r;
3196 }
3197
3198 int main(int argc, char **argv)
3199 {
3200 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3201 string target_data_path, fsid;
3202 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
3203 boost::optional<std::string> nspace;
3204 spg_t pgid;
3205 unsigned epoch = 0;
3206 unsigned slow_threshold = 16;
3207 ghobject_t ghobj;
3208 bool human_readable;
3209 Formatter *formatter;
3210 bool head, tty;
3211
3212 po::options_description desc("Allowed options");
3213 desc.add_options()
3214 ("help", "produce help message")
3215 ("type", po::value<string>(&type),
3216 "Arg is one of [bluestore (default), filestore, memstore]")
3217 ("data-path", po::value<string>(&dpath),
3218 "path to object store, mandatory")
3219 ("journal-path", po::value<string>(&jpath),
3220 "path to journal, use if tool can't find it")
3221 ("pgid", po::value<string>(&pgidstr),
3222 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
3223 ("pool", po::value<string>(&pool),
3224 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3225 ("op", po::value<string>(&op),
3226 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3227 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, statfs]")
3228 ("epoch", po::value<unsigned>(&epoch),
3229 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3230 ("file", po::value<string>(&file),
3231 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
3232 ("mon-store-path", po::value<string>(&mon_store_path),
3233 "path of monstore to update-mon-db")
3234 ("fsid", po::value<string>(&fsid),
3235 "fsid for new store created by mkfs")
3236 ("target-data-path", po::value<string>(&target_data_path),
3237 "path of target object store (for --op dup)")
3238 ("mountpoint", po::value<string>(&mountpoint),
3239 "fuse mountpoint")
3240 ("format", po::value<string>(&format)->default_value("json-pretty"),
3241 "Output format which may be json, json-pretty, xml, xml-pretty")
3242 ("debug", "Enable diagnostic output to stderr")
3243 ("no-mon-config", "Do not contact mons for config")
3244 ("no-superblock", "Do not read superblock")
3245 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3246 ("skip-journal-replay", "Disable journal replay")
3247 ("skip-mount-omap", "Disable mounting of omap")
3248 ("head", "Find head/snapdir when searching for objects by name")
3249 ("dry-run", "Don't modify the objectstore")
3250 ("tty", "Treat stdout as a tty (no binary data)")
3251 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
3252 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
3253 ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
3254 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
3255 ;
3256
3257 po::options_description positional("Positional options");
3258 positional.add_options()
3259 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3260 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
3261 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
3262 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
3263 ;
3264
3265 po::options_description all;
3266 all.add(desc).add(positional);
3267
3268 po::positional_options_description pd;
3269 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3270
3271 vector<string> ceph_option_strings;
3272
3273 po::variables_map vm;
3274 try {
3275 po::parsed_options parsed =
3276 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3277 po::store( parsed, vm);
3278 po::notify(vm);
3279 ceph_option_strings = po::collect_unrecognized(parsed.options,
3280 po::include_positional);
3281 } catch(po::error &e) {
3282 std::cerr << e.what() << std::endl;
3283 return 1;
3284 }
3285
3286 if (vm.count("help")) {
3287 usage(desc);
3288 return 1;
3289 }
3290
3291 // Compatibility with previous option name
3292 if (op == "dump-import")
3293 op = "dump-export";
3294
3295 debug = (vm.count("debug") > 0);
3296
3297 force = (vm.count("force") > 0);
3298
3299 no_superblock = (vm.count("no-superblock") > 0);
3300
3301 if (vm.count("namespace"))
3302 nspace = argnspace;
3303
3304 dry_run = (vm.count("dry-run") > 0);
3305 tty = (vm.count("tty") > 0);
3306
3307 osflagbits_t flags = 0;
3308 if (dry_run || vm.count("skip-journal-replay"))
3309 flags |= SKIP_JOURNAL_REPLAY;
3310 if (vm.count("skip-mount-omap"))
3311 flags |= SKIP_MOUNT_OMAP;
3312 if (op == "update-mon-db")
3313 flags |= SKIP_JOURNAL_REPLAY;
3314
3315 head = (vm.count("head") > 0);
3316
3317 // infer osd id so we can authenticate
3318 char fn[PATH_MAX];
3319 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3320 int fd = ::open(fn, O_RDONLY);
3321 if (fd >= 0) {
3322 bufferlist bl;
3323 bl.read_fd(fd, 64);
3324 string s(bl.c_str(), bl.length());
3325 int whoami = atoi(s.c_str());
3326 vector<string> tmp;
3327 // identify ourselves as this osd so we can auth and fetch our configs
3328 tmp.push_back("-n");
3329 tmp.push_back(string("osd.") + stringify(whoami));
3330 // populate osd_data so that the default keyring location works
3331 tmp.push_back("--osd-data");
3332 tmp.push_back(dpath);
3333 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3334 ceph_option_strings.end());
3335 tmp.swap(ceph_option_strings);
3336 }
3337
3338 vector<const char *> ceph_options;
3339 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3340 for (vector<string>::iterator i = ceph_option_strings.begin();
3341 i != ceph_option_strings.end();
3342 ++i) {
3343 ceph_options.push_back(i->c_str());
3344 }
3345
3346 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
3347 fd = ::open(fn, O_RDONLY);
3348 if (fd >= 0) {
3349 bufferlist bl;
3350 bl.read_fd(fd, 64);
3351 if (bl.length()) {
3352 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3353 if (vm.count("type") && dp_type != "" && type != dp_type)
3354 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3355 << dp_type << "\"" << std::endl;
3356 type = dp_type;
3357 //cout << "object store type is " << type << std::endl;
3358 }
3359 ::close(fd);
3360 }
3361
3362 if (!vm.count("type") && type == "") {
3363 type = "bluestore";
3364 }
3365 if (!vm.count("data-path") &&
3366 op != "dump-export" &&
3367 !(op == "dump-journal" && type == "filestore")) {
3368 cerr << "Must provide --data-path" << std::endl;
3369 usage(desc);
3370 return 1;
3371 }
3372 if (type == "filestore" && !vm.count("journal-path")) {
3373 jpath = dpath + "/journal";
3374 }
3375 if (!vm.count("op") && !vm.count("object")) {
3376 cerr << "Must provide --op or object command..." << std::endl;
3377 usage(desc);
3378 return 1;
3379 }
3380 if (op != "list" && op != "apply-layout-settings" &&
3381 vm.count("op") && vm.count("object")) {
3382 cerr << "Can't specify both --op and object command syntax" << std::endl;
3383 usage(desc);
3384 return 1;
3385 }
3386 if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
3387 cerr << "apply-layout-settings requires either --pool or --pgid"
3388 << std::endl;
3389 usage(desc);
3390 return 1;
3391 }
3392 if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
3393 cerr << "Invalid syntax, missing command" << std::endl;
3394 usage(desc);
3395 return 1;
3396 }
3397 if (op == "fuse" && mountpoint.length() == 0) {
3398 cerr << "Missing fuse mountpoint" << std::endl;
3399 usage(desc);
3400 return 1;
3401 }
3402 outistty = isatty(STDOUT_FILENO) || tty;
3403
3404 file_fd = fd_none;
3405 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
3406 if (!vm.count("file") || file == "-") {
3407 if (outistty) {
3408 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3409 return 1;
3410 }
3411 file_fd = STDOUT_FILENO;
3412 } else {
3413 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3414 }
3415 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") {
3416 if (!vm.count("file") || file == "-") {
3417 if (isatty(STDIN_FILENO)) {
3418 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3419 return 1;
3420 }
3421 file_fd = STDIN_FILENO;
3422 } else {
3423 file_fd = open(file.c_str(), O_RDONLY);
3424 }
3425 }
3426
3427 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3428
3429 if (vm.count("file") && file_fd == fd_none && !dry_run) {
3430 cerr << "--file option only applies to import, dump-export, export, export-remove, "
3431 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3432 return 1;
3433 }
3434
3435 if (file_fd != fd_none && file_fd < 0) {
3436 string err = string("file: ") + file;
3437 perror(err.c_str());
3438 return 1;
3439 }
3440 int init_flags = 0;
3441 if (vm.count("no-mon-config") > 0) {
3442 init_flags |= CINIT_FLAG_NO_MON_CONFIG;
3443 }
3444
3445 auto cct = global_init(
3446 NULL, ceph_options,
3447 CEPH_ENTITY_TYPE_OSD,
3448 CODE_ENVIRONMENT_UTILITY_NODOUT,
3449 init_flags);
3450 common_init_finish(g_ceph_context);
3451 if (debug) {
3452 g_conf().set_val_or_die("log_to_stderr", "true");
3453 g_conf().set_val_or_die("err_to_stderr", "true");
3454 }
3455 g_conf().apply_changes(nullptr);
3456
3457 // Special list handling. Treating pretty_format as human readable,
3458 // with one object per line and not an enclosing array.
3459 human_readable = ends_with(format, "-pretty");
3460 if ((op == "list" || op == "meta-list") && human_readable) {
3461 // Remove -pretty from end of format which we know is there
3462 format = format.substr(0, format.size() - strlen("-pretty"));
3463 }
3464
3465 formatter = Formatter::create(format);
3466 if (formatter == NULL) {
3467 cerr << "unrecognized format: " << format << std::endl;
3468 return 1;
3469 }
3470
3471 // Special handling for filestore journal, so we can dump it without mounting
3472 if (op == "dump-journal" && type == "filestore") {
3473 int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
3474 if (ret < 0) {
3475 cerr << "journal-path: " << jpath << ": "
3476 << cpp_strerror(ret) << std::endl;
3477 return 1;
3478 }
3479 formatter->flush(cout);
3480 return 0;
3481 }
3482
3483 if (op == "dump-export") {
3484 int ret = tool.dump_export(formatter);
3485 if (ret < 0) {
3486 cerr << "dump-export: "
3487 << cpp_strerror(ret) << std::endl;
3488 return 1;
3489 }
3490 return 0;
3491 }
3492
3493 //Verify that data-path really exists
3494 struct stat st;
3495 if (::stat(dpath.c_str(), &st) == -1) {
3496 string err = string("data-path: ") + dpath;
3497 perror(err.c_str());
3498 return 1;
3499 }
3500
3501 if (pgidstr.length() && pgidstr != "meta" && !pgid.parse(pgidstr.c_str())) {
3502 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3503 return 1;
3504 }
3505
3506 //Verify that the journal-path really exists
3507 if (type == "filestore") {
3508 if (::stat(jpath.c_str(), &st) == -1) {
3509 string err = string("journal-path: ") + jpath;
3510 perror(err.c_str());
3511 return 1;
3512 }
3513 if (S_ISDIR(st.st_mode)) {
3514 cerr << "journal-path: " << jpath << ": "
3515 << cpp_strerror(EISDIR) << std::endl;
3516 return 1;
3517 }
3518 }
3519
3520 std::unique_ptr<ObjectStore> fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3521 if (!fs) {
3522 cerr << "Unable to create store of type " << type << std::endl;
3523 return 1;
3524 }
3525
3526 if (op == "fsck" || op == "fsck-deep") {
3527 int r = fs->fsck(op == "fsck-deep");
3528 if (r < 0) {
3529 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3530 return 1;
3531 }
3532 if (r > 0) {
3533 cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
3534 return 1;
3535 }
3536 cout << "fsck success" << std::endl;
3537 return 0;
3538 }
3539 if (op == "repair" || op == "repair-deep") {
3540 int r = fs->repair(op == "repair-deep");
3541 if (r < 0) {
3542 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3543 return 1;
3544 }
3545 if (r > 0) {
3546 cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
3547 return 1;
3548 }
3549 cout << "repair success" << std::endl;
3550 return 0;
3551 }
3552 if (op == "mkfs") {
3553 if (fsid.length()) {
3554 uuid_d f;
3555 bool r = f.parse(fsid.c_str());
3556 if (!r) {
3557 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3558 return 1;
3559 }
3560 fs->set_fsid(f);
3561 }
3562 int r = fs->mkfs();
3563 if (r < 0) {
3564 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
3565 return 1;
3566 }
3567 return 0;
3568 }
3569 if (op == "dup") {
3570 string target_type;
3571 char fn[PATH_MAX];
3572 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3573 int fd = ::open(fn, O_RDONLY);
3574 if (fd < 0) {
3575 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3576 exit(1);
3577 }
3578 bufferlist bl;
3579 bl.read_fd(fd, 64);
3580 if (bl.length()) {
3581 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3582 }
3583 ::close(fd);
3584 unique_ptr<ObjectStore> targetfs = ObjectStore::create(
3585 g_ceph_context, target_type,
3586 target_data_path, "", 0);
3587 if (!targetfs) {
3588 cerr << "Unable to open store of type " << target_type << std::endl;
3589 return 1;
3590 }
3591 int r = dup(dpath, fs.get(), target_data_path, targetfs.get());
3592 if (r < 0) {
3593 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3594 return 1;
3595 }
3596 return 0;
3597 }
3598
3599 int ret = fs->mount();
3600 if (ret < 0) {
3601 if (ret == -EBUSY) {
3602 cerr << "OSD has the store locked" << std::endl;
3603 } else {
3604 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3605 }
3606 return 1;
3607 }
3608
3609 if (op == "fuse") {
3610 #ifdef HAVE_LIBFUSE
3611 FuseStore fuse(fs.get(), mountpoint);
3612 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3613 int r = fuse.main();
3614 fs->umount();
3615 if (r < 0) {
3616 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3617 return 1;
3618 }
3619 #else
3620 cerr << "fuse support not enabled" << std::endl;
3621 #endif
3622 return 0;
3623 }
3624
3625 vector<coll_t> ls;
3626 vector<coll_t>::iterator it;
3627 CompatSet supported;
3628
3629 #ifdef INTERNAL_TEST
3630 supported = get_test_compat_set();
3631 #else
3632 supported = OSD::get_osd_compat_set();
3633 #endif
3634
3635 bufferlist bl;
3636 auto ch = fs->open_collection(coll_t::meta());
3637 std::unique_ptr<OSDSuperblock> superblock;
3638 if (!no_superblock) {
3639 superblock.reset(new OSDSuperblock);
3640 bufferlist::const_iterator p;
3641 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
3642 if (ret < 0) {
3643 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3644 goto out;
3645 }
3646
3647 p = bl.cbegin();
3648 decode(*superblock, p);
3649
3650 if (debug) {
3651 cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
3652 }
3653
3654 if (debug) {
3655 cerr << "Supported features: " << supported << std::endl;
3656 cerr << "On-disk features: " << superblock->compat_features << std::endl;
3657 }
3658 if (supported.compare(superblock->compat_features) == -1) {
3659 CompatSet unsupported = supported.unsupported(superblock->compat_features);
3660 cerr << "On-disk OSD incompatible features set "
3661 << unsupported << std::endl;
3662 ret = -EINVAL;
3663 goto out;
3664 }
3665 }
3666
3667 if (op == "apply-layout-settings") {
3668 int target_level = 0;
3669 // Single positional argument with apply-layout-settings
3670 // for target_level.
3671 if (vm.count("object") && isdigit(object[0])) {
3672 target_level = atoi(object.c_str());
3673 // This requires --arg1 to be specified since
3674 // this is the third positional argument and normally
3675 // used with object operations.
3676 } else if (vm.count("arg1") && isdigit(arg1[0])) {
3677 target_level = atoi(arg1.c_str());
3678 }
3679 ceph_assert(superblock != nullptr);
3680 ret = apply_layout_settings(fs.get(), *superblock, pool, pgid, dry_run, target_level);
3681 goto out;
3682 }
3683
3684 if (op != "list" && vm.count("object")) {
3685 // Special case: Create pgmeta_oid if empty string specified
3686 // This can't conflict with any actual object names.
3687 if (object == "") {
3688 ghobj = pgid.make_pgmeta_oid();
3689 } else {
3690 json_spirit::Value v;
3691 try {
3692 if (!json_spirit::read(object, v) ||
3693 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3694 // Special: Need head/snapdir so set even if user didn't specify
3695 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3696 head = true;
3697 lookup_ghobject lookup(object, nspace, head);
3698 if (pgidstr == "meta")
3699 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t::meta(), lookup, debug);
3700 else if (pgidstr.length())
3701 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), lookup, debug);
3702 else
3703 ret = action_on_all_objects(fs.get(), lookup, debug);
3704 if (ret) {
3705 throw std::runtime_error("Internal error");
3706 } else {
3707 if (lookup.size() != 1) {
3708 stringstream ss;
3709 if (lookup.size() == 0)
3710 ss << "No object id '" << object << "' found or invalid JSON specified";
3711 else
3712 ss << "Found " << lookup.size() << " objects with id '" << object
3713 << "', please use a JSON spec from --op list instead";
3714 throw std::runtime_error(ss.str());
3715 }
3716 pair<coll_t, ghobject_t> found = lookup.pop();
3717 pgidstr = found.first.to_str();
3718 pgid.parse(pgidstr.c_str());
3719 ghobj = found.second;
3720 }
3721 } else {
3722 stringstream ss;
3723 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3724 ss << "Without --pgid the object '" << object
3725 << "' must be a JSON array";
3726 throw std::runtime_error(ss.str());
3727 }
3728 if (v.type() == json_spirit::array_type) {
3729 json_spirit::Array array = v.get_array();
3730 if (array.size() != 2) {
3731 ss << "Object '" << object
3732 << "' must be a JSON array with 2 elements";
3733 throw std::runtime_error(ss.str());
3734 }
3735 vector<json_spirit::Value>::iterator i = array.begin();
3736 ceph_assert(i != array.end());
3737 if (i->type() != json_spirit::str_type) {
3738 ss << "Object '" << object
3739 << "' must be a JSON array with the first element a string";
3740 throw std::runtime_error(ss.str());
3741 }
3742 string object_pgidstr = i->get_str();
3743 if (object_pgidstr != "meta") {
3744 spg_t object_pgid;
3745 object_pgid.parse(object_pgidstr.c_str());
3746 if (pgidstr.length() > 0) {
3747 if (object_pgid != pgid) {
3748 ss << "object '" << object
3749 << "' has a pgid different from the --pgid="
3750 << pgidstr << " option";
3751 throw std::runtime_error(ss.str());
3752 }
3753 } else {
3754 pgidstr = object_pgidstr;
3755 pgid = object_pgid;
3756 }
3757 } else {
3758 pgidstr = object_pgidstr;
3759 }
3760 ++i;
3761 v = *i;
3762 }
3763 try {
3764 ghobj.decode(v);
3765 } catch (std::runtime_error& e) {
3766 ss << "Decode object JSON error: " << e.what();
3767 throw std::runtime_error(ss.str());
3768 }
3769 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3770 cerr << "Object pool and pgid pool don't match" << std::endl;
3771 ret = 1;
3772 goto out;
3773 }
3774 if (pgidstr != "meta") {
3775 auto ch = fs->open_collection(coll_t(pgid));
3776 if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
3777 stringstream ss;
3778 ss << "object " << ghobj << " not contained by pg " << pgid;
3779 throw std::runtime_error(ss.str());
3780 }
3781 }
3782 }
3783 } catch (std::runtime_error& e) {
3784 cerr << e.what() << std::endl;
3785 ret = 1;
3786 goto out;
3787 }
3788 }
3789 }
3790
3791 // The ops which require --pgid option are checked here and
3792 // mentioned in the usage for --pgid.
3793 if ((op == "info" || op == "log" || op == "remove" || op == "export"
3794 || op == "export-remove" || op == "mark-complete"
3795 || op == "reset-last-complete"
3796 || op == "trim-pg-log") &&
3797 pgidstr.length() == 0) {
3798 cerr << "Must provide pgid" << std::endl;
3799 usage(desc);
3800 ret = 1;
3801 goto out;
3802 }
3803
3804 if (op == "import") {
3805 ceph_assert(superblock != nullptr);
3806 try {
3807 ret = tool.do_import(fs.get(), *superblock, force, pgidstr);
3808 }
3809 catch (const buffer::error &e) {
3810 cerr << "do_import threw exception error " << e.what() << std::endl;
3811 ret = -EFAULT;
3812 }
3813 if (ret == -EFAULT) {
3814 cerr << "Corrupt input for import" << std::endl;
3815 }
3816 if (ret == 0)
3817 cout << "Import successful" << std::endl;
3818 goto out;
3819 } else if (op == "dump-journal-mount") {
3820 // Undocumented feature to dump journal with mounted fs
3821 // This doesn't support the format option, but it uses the
3822 // ObjectStore::dump_journal() and mounts to get replay to run.
3823 ret = fs->dump_journal(cout);
3824 if (ret) {
3825 if (ret == -EOPNOTSUPP) {
3826 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
3827 } else {
3828 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
3829 }
3830 }
3831 goto out;
3832 } else if (op == "get-osdmap") {
3833 bufferlist bl;
3834 OSDMap osdmap;
3835 if (epoch == 0) {
3836 ceph_assert(superblock != nullptr);
3837 epoch = superblock->current_epoch;
3838 }
3839 ret = get_osdmap(fs.get(), epoch, osdmap, bl);
3840 if (ret) {
3841 cerr << "Failed to get osdmap#" << epoch << ": "
3842 << cpp_strerror(ret) << std::endl;
3843 goto out;
3844 }
3845 ret = bl.write_fd(file_fd);
3846 if (ret) {
3847 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3848 } else {
3849 cout << "osdmap#" << epoch << " exported." << std::endl;
3850 }
3851 goto out;
3852 } else if (op == "set-osdmap") {
3853 bufferlist bl;
3854 ret = get_fd_data(file_fd, bl);
3855 if (ret < 0) {
3856 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
3857 } else {
3858 ret = set_osdmap(fs.get(), epoch, bl, force);
3859 }
3860 goto out;
3861 } else if (op == "get-inc-osdmap") {
3862 bufferlist bl;
3863 if (epoch == 0) {
3864 ceph_assert(superblock != nullptr);
3865 epoch = superblock->current_epoch;
3866 }
3867 ret = get_inc_osdmap(fs.get(), epoch, bl);
3868 if (ret < 0) {
3869 cerr << "Failed to get incremental osdmap# " << epoch << ": "
3870 << cpp_strerror(ret) << std::endl;
3871 goto out;
3872 }
3873 ret = bl.write_fd(file_fd);
3874 if (ret) {
3875 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3876 } else {
3877 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
3878 }
3879 goto out;
3880 } else if (op == "set-inc-osdmap") {
3881 bufferlist bl;
3882 ret = get_fd_data(file_fd, bl);
3883 if (ret < 0) {
3884 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
3885 goto out;
3886 } else {
3887 ret = set_inc_osdmap(fs.get(), epoch, bl, force);
3888 }
3889 goto out;
3890 } else if (op == "update-mon-db") {
3891 if (!vm.count("mon-store-path")) {
3892 cerr << "Please specify the path to monitor db to update" << std::endl;
3893 ret = -EINVAL;
3894 } else {
3895 ceph_assert(superblock != nullptr);
3896 ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
3897 }
3898 goto out;
3899 }
3900
3901 if (op == "remove") {
3902 if (!force && !dry_run) {
3903 cerr << "Please use export-remove or you must use --force option" << std::endl;
3904 ret = -EINVAL;
3905 goto out;
3906 }
3907 ret = initiate_new_remove_pg(fs.get(), pgid);
3908 if (ret < 0) {
3909 cerr << "PG '" << pgid << "' not found" << std::endl;
3910 goto out;
3911 }
3912 cout << "Remove successful" << std::endl;
3913 goto out;
3914 }
3915
3916 if (op == "fix-lost") {
3917 boost::scoped_ptr<action_on_object_t> action;
3918 action.reset(new do_fix_lost());
3919 if (pgidstr.length())
3920 ret = action_on_all_objects_in_exact_pg(fs.get(), coll_t(pgid), *action, debug);
3921 else
3922 ret = action_on_all_objects(fs.get(), *action, debug);
3923 goto out;
3924 }
3925
3926 if (op == "list") {
3927 ret = do_list(fs.get(), pgidstr, object, nspace, formatter, debug,
3928 human_readable, head);
3929 if (ret < 0) {
3930 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3931 }
3932 goto out;
3933 }
3934 if (op == "list-slow-omap") {
3935 ret = do_list_slow(fs.get(), pgidstr, object, slow_threshold, formatter, debug,
3936 human_readable);
3937 if (ret < 0) {
3938 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3939 }
3940 goto out;
3941 }
3942
3943 if (op == "dump-super") {
3944 ceph_assert(superblock != nullptr);
3945 formatter->open_object_section("superblock");
3946 superblock->dump(formatter);
3947 formatter->close_section();
3948 formatter->flush(cout);
3949 cout << std::endl;
3950 goto out;
3951 }
3952
3953 if (op == "statfs") {
3954 store_statfs_t statsbuf;
3955 ret = fs->statfs(&statsbuf);
3956 if (ret < 0) {
3957 cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
3958 goto out;
3959 }
3960 formatter->open_object_section("statfs");
3961 statsbuf.dump(formatter);
3962 formatter->close_section();
3963 formatter->flush(cout);
3964 cout << std::endl;
3965 goto out;
3966 }
3967
3968 if (op == "meta-list") {
3969 ret = do_meta(fs.get(), object, formatter, debug, human_readable);
3970 if (ret < 0) {
3971 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
3972 }
3973 goto out;
3974 }
3975
3976 ret = fs->list_collections(ls);
3977 if (ret < 0) {
3978 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
3979 goto out;
3980 }
3981
3982 if (debug && op == "list-pgs")
3983 cout << "Performing list-pgs operation" << std::endl;
3984
3985 // Find pg
3986 for (it = ls.begin(); it != ls.end(); ++it) {
3987 spg_t tmppgid;
3988
3989 if (pgidstr == "meta") {
3990 if (it->to_str() == "meta")
3991 break;
3992 else
3993 continue;
3994 }
3995
3996 if (!it->is_pg(&tmppgid)) {
3997 continue;
3998 }
3999
4000 if (it->is_temp(&tmppgid)) {
4001 continue;
4002 }
4003
4004 if (op != "list-pgs" && tmppgid != pgid) {
4005 continue;
4006 }
4007
4008 if (op != "list-pgs") {
4009 //Found!
4010 break;
4011 }
4012
4013 cout << tmppgid << std::endl;
4014 }
4015
4016 if (op == "list-pgs") {
4017 ret = 0;
4018 goto out;
4019 }
4020
4021 // If not an object command nor any of the ops handled below, then output this usage
4022 // before complaining about a bad pgid
4023 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
4024 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
4025 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)"
4026 << std::endl;
4027 usage(desc);
4028 ret = 1;
4029 goto out;
4030 }
4031 epoch_t map_epoch;
4032 // The following code for export, info, log require omap or !skip-mount-omap
4033 if (it != ls.end()) {
4034
4035 coll_t coll = *it;
4036
4037 if (vm.count("objcmd")) {
4038 ret = 0;
4039 if (objcmd == "remove" || objcmd == "removeall") {
4040 bool all = (objcmd == "removeall");
4041 enum rmtype type = BOTH;
4042 if (rmtypestr == "nosnapmap")
4043 type = NOSNAPMAP;
4044 else if (rmtypestr == "snapmap")
4045 type = SNAPMAP;
4046 ret = do_remove_object(fs.get(), coll, ghobj, all, force, type);
4047 goto out;
4048 } else if (objcmd == "list-attrs") {
4049 ret = do_list_attrs(fs.get(), coll, ghobj);
4050 goto out;
4051 } else if (objcmd == "list-omap") {
4052 ret = do_list_omap(fs.get(), coll, ghobj);
4053 goto out;
4054 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
4055 if (objcmd == "get-bytes") {
4056 int fd;
4057 if (vm.count("arg1") == 0 || arg1 == "-") {
4058 fd = STDOUT_FILENO;
4059 } else {
4060 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
4061 if (fd == -1) {
4062 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4063 ret = 1;
4064 goto out;
4065 }
4066 }
4067 ret = do_get_bytes(fs.get(), coll, ghobj, fd);
4068 if (fd != STDOUT_FILENO)
4069 close(fd);
4070 } else {
4071 int fd;
4072 if (vm.count("arg1") == 0 || arg1 == "-") {
4073 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4074 if (isatty(STDIN_FILENO)) {
4075 cerr << "stdin is a tty and no file specified" << std::endl;
4076 ret = 1;
4077 goto out;
4078 }
4079 fd = STDIN_FILENO;
4080 } else {
4081 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4082 if (fd == -1) {
4083 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4084 ret = 1;
4085 goto out;
4086 }
4087 }
4088 ret = do_set_bytes(fs.get(), coll, ghobj, fd);
4089 if (fd != STDIN_FILENO)
4090 close(fd);
4091 }
4092 goto out;
4093 } else if (objcmd == "get-attr") {
4094 if (vm.count("arg1") == 0) {
4095 usage(desc);
4096 ret = 1;
4097 goto out;
4098 }
4099 ret = do_get_attr(fs.get(), coll, ghobj, arg1);
4100 goto out;
4101 } else if (objcmd == "set-attr") {
4102 if (vm.count("arg1") == 0) {
4103 usage(desc);
4104 ret = 1;
4105 }
4106
4107 int fd;
4108 if (vm.count("arg2") == 0 || arg2 == "-") {
4109 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4110 if (isatty(STDIN_FILENO)) {
4111 cerr << "stdin is a tty and no file specified" << std::endl;
4112 ret = 1;
4113 goto out;
4114 }
4115 fd = STDIN_FILENO;
4116 } else {
4117 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4118 if (fd == -1) {
4119 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4120 ret = 1;
4121 goto out;
4122 }
4123 }
4124 ret = do_set_attr(fs.get(), coll, ghobj, arg1, fd);
4125 if (fd != STDIN_FILENO)
4126 close(fd);
4127 goto out;
4128 } else if (objcmd == "rm-attr") {
4129 if (vm.count("arg1") == 0) {
4130 usage(desc);
4131 ret = 1;
4132 goto out;
4133 }
4134 ret = do_rm_attr(fs.get(), coll, ghobj, arg1);
4135 goto out;
4136 } else if (objcmd == "get-omap") {
4137 if (vm.count("arg1") == 0) {
4138 usage(desc);
4139 ret = 1;
4140 goto out;
4141 }
4142 ret = do_get_omap(fs.get(), coll, ghobj, arg1);
4143 goto out;
4144 } else if (objcmd == "set-omap") {
4145 if (vm.count("arg1") == 0) {
4146 usage(desc);
4147 ret = 1;
4148 goto out;
4149 }
4150 int fd;
4151 if (vm.count("arg2") == 0 || arg2 == "-") {
4152 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4153 if (isatty(STDIN_FILENO)) {
4154 cerr << "stdin is a tty and no file specified" << std::endl;
4155 ret = 1;
4156 goto out;
4157 }
4158 fd = STDIN_FILENO;
4159 } else {
4160 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4161 if (fd == -1) {
4162 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4163 ret = 1;
4164 goto out;
4165 }
4166 }
4167 ret = do_set_omap(fs.get(), coll, ghobj, arg1, fd);
4168 if (fd != STDIN_FILENO)
4169 close(fd);
4170 goto out;
4171 } else if (objcmd == "rm-omap") {
4172 if (vm.count("arg1") == 0) {
4173 usage(desc);
4174 ret = 1;
4175 goto out;
4176 }
4177 ret = do_rm_omap(fs.get(), coll, ghobj, arg1);
4178 goto out;
4179 } else if (objcmd == "get-omaphdr") {
4180 if (vm.count("arg1")) {
4181 usage(desc);
4182 ret = 1;
4183 goto out;
4184 }
4185 ret = do_get_omaphdr(fs.get(), coll, ghobj);
4186 goto out;
4187 } else if (objcmd == "set-omaphdr") {
4188 // Extra arg
4189 if (vm.count("arg2")) {
4190 usage(desc);
4191 ret = 1;
4192 goto out;
4193 }
4194 int fd;
4195 if (vm.count("arg1") == 0 || arg1 == "-") {
4196 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4197 if (isatty(STDIN_FILENO)) {
4198 cerr << "stdin is a tty and no file specified" << std::endl;
4199 ret = 1;
4200 goto out;
4201 }
4202 fd = STDIN_FILENO;
4203 } else {
4204 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4205 if (fd == -1) {
4206 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4207 ret = 1;
4208 goto out;
4209 }
4210 }
4211 ret = do_set_omaphdr(fs.get(), coll, ghobj, fd);
4212 if (fd != STDIN_FILENO)
4213 close(fd);
4214 goto out;
4215 } else if (objcmd == "dump") {
4216 // There should not be any other arguments
4217 if (vm.count("arg1") || vm.count("arg2")) {
4218 usage(desc);
4219 ret = 1;
4220 goto out;
4221 }
4222 ret = print_obj_info(fs.get(), coll, ghobj, formatter);
4223 goto out;
4224 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4225 // There should not be any other arguments
4226 if (vm.count("arg1") || vm.count("arg2")) {
4227 usage(desc);
4228 ret = 1;
4229 goto out;
4230 }
4231 ret = corrupt_info(fs.get(), coll, ghobj, formatter);
4232 goto out;
4233 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4234 // Undocumented testing feature
4235 bool corrupt = (objcmd == "corrupt-size");
4236 // Extra arg
4237 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4238 usage(desc);
4239 ret = 1;
4240 goto out;
4241 }
4242 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4243 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4244 ret = 1;
4245 goto out;
4246 }
4247 uint64_t size = atoll(arg1.c_str());
4248 ret = set_size(fs.get(), coll, ghobj, size, formatter, corrupt);
4249 goto out;
4250 } else if (objcmd == "clear-data-digest") {
4251 ret = clear_data_digest(fs.get(), coll, ghobj);
4252 goto out;
4253 } else if (objcmd == "clear-snapset") {
4254 // UNDOCUMENTED: For testing zap SnapSet
4255 // IGNORE extra args since not in usage anyway
4256 if (!ghobj.hobj.has_snapset()) {
4257 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4258 ret = 1;
4259 goto out;
4260 }
4261 ret = clear_snapset(fs.get(), coll, ghobj, arg1);
4262 goto out;
4263 } else if (objcmd == "remove-clone-metadata") {
4264 // Extra arg
4265 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4266 usage(desc);
4267 ret = 1;
4268 goto out;
4269 }
4270 if (!ghobj.hobj.has_snapset()) {
4271 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4272 ret = 1;
4273 goto out;
4274 }
4275 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4276 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4277 ret = 1;
4278 goto out;
4279 }
4280 snapid_t cloneid = atoi(arg1.c_str());
4281 ret = remove_clone(fs.get(), coll, ghobj, cloneid, force);
4282 goto out;
4283 }
4284 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4285 usage(desc);
4286 ret = 1;
4287 goto out;
4288 }
4289
4290 map_epoch = 0;
4291 ret = PG::peek_map_epoch(fs.get(), pgid, &map_epoch);
4292 if (ret < 0)
4293 cerr << "peek_map_epoch reports error" << std::endl;
4294 if (debug)
4295 cerr << "map_epoch " << map_epoch << std::endl;
4296
4297 pg_info_t info(pgid);
4298 PastIntervals past_intervals;
4299 __u8 struct_ver;
4300 ret = PG::read_info(fs.get(), pgid, coll, info, past_intervals, struct_ver);
4301 if (ret < 0) {
4302 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4303 goto out;
4304 }
4305 if (struct_ver < PG::get_compat_struct_v()) {
4306 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4307 ret = -EFAULT;
4308 goto out;
4309 }
4310 if (debug)
4311 cerr << "struct_v " << (int)struct_ver << std::endl;
4312
4313 if (op == "export" || op == "export-remove") {
4314 ceph_assert(superblock != nullptr);
4315 ret = tool.do_export(fs.get(), coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
4316 if (ret == 0) {
4317 cerr << "Export successful" << std::endl;
4318 if (op == "export-remove") {
4319 ret = initiate_new_remove_pg(fs.get(), pgid);
4320 // Export succeeded, so pgid is there
4321 ceph_assert(ret == 0);
4322 cerr << "Remove successful" << std::endl;
4323 }
4324 }
4325 } else if (op == "info") {
4326 formatter->open_object_section("info");
4327 info.dump(formatter);
4328 formatter->close_section();
4329 formatter->flush(cout);
4330 cout << std::endl;
4331 } else if (op == "log") {
4332 PGLog::IndexedLog log;
4333 pg_missing_t missing;
4334 ret = get_log(fs.get(), struct_ver, pgid, info, log, missing);
4335 if (ret < 0)
4336 goto out;
4337
4338 dump_log(formatter, cout, log, missing);
4339 } else if (op == "mark-complete") {
4340 ObjectStore::Transaction tran;
4341 ObjectStore::Transaction *t = &tran;
4342
4343 if (struct_ver < PG::get_compat_struct_v()) {
4344 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
4345 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4346 << std::endl;
4347 ret = 1;
4348 goto out;
4349 }
4350
4351 cout << "Marking complete " << std::endl;
4352
4353 ceph_assert(superblock != nullptr);
4354 info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
4355 info.last_backfill = hobject_t::get_max();
4356 info.last_epoch_started = superblock->current_epoch;
4357 info.history.last_epoch_started = superblock->current_epoch;
4358 info.history.last_epoch_clean = superblock->current_epoch;
4359 past_intervals.clear();
4360
4361 if (!dry_run) {
4362 ret = write_info(*t, map_epoch, info, past_intervals);
4363 if (ret != 0)
4364 goto out;
4365 auto ch = fs->open_collection(coll_t(pgid));
4366 fs->queue_transaction(ch, std::move(*t));
4367 }
4368 cout << "Marking complete succeeded" << std::endl;
4369 } else if (op == "trim-pg-log") {
4370 ret = do_trim_pg_log(fs.get(), coll, info, pgid,
4371 map_epoch, past_intervals);
4372 if (ret < 0) {
4373 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4374 goto out;
4375 }
4376 cout << "Finished trimming pg log" << std::endl;
4377 goto out;
4378 } else if (op == "reset-last-complete") {
4379 if (!force) {
4380 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4381 << "certain to lead to permanent data loss unless you know exactly "
4382 << "what you are doing. Pass --force to proceed anyway."
4383 << std::endl;
4384 ret = -EINVAL;
4385 goto out;
4386 }
4387 ObjectStore::Transaction tran;
4388 ObjectStore::Transaction *t = &tran;
4389
4390 if (struct_ver < PG::get_compat_struct_v()) {
4391 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4392 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4393 << std::endl;
4394 ret = 1;
4395 goto out;
4396 }
4397
4398 cout << "Reseting last_complete " << std::endl;
4399
4400 info.last_complete = info.last_update;
4401
4402 if (!dry_run) {
4403 ret = write_info(*t, map_epoch, info, past_intervals);
4404 if (ret != 0)
4405 goto out;
4406 fs->queue_transaction(ch, std::move(*t));
4407 }
4408 cout << "Reseting last_complete succeeded" << std::endl;
4409
4410 } else {
4411 ceph_assert(!"Should have already checked for valid --op");
4412 }
4413 } else {
4414 cerr << "PG '" << pgid << "' not found" << std::endl;
4415 ret = -ENOENT;
4416 }
4417
4418 out:
4419 if (debug) {
4420 ostringstream ostr;
4421 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
4422 cct->get_perfcounters_collection()->dump_formatted(f, false);
4423 ostr << "ceph-objectstore-tool ";
4424 f->flush(ostr);
4425 delete f;
4426 cout << ostr.str() << std::endl;
4427 }
4428
4429 int r = fs->umount();
4430 if (r < 0) {
4431 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4432 // If no previous error, then use umount() error
4433 if (ret == 0)
4434 ret = r;
4435 }
4436
4437 if (dry_run) {
4438 // Export output can go to stdout, so put this message on stderr
4439 if (op == "export")
4440 cerr << "dry-run: Nothing changed" << std::endl;
4441 else
4442 cout << "dry-run: Nothing changed" << std::endl;
4443 }
4444
4445 if (ret < 0)
4446 ret = 1;
4447 return ret;
4448 }