]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/ceph_objectstore_tool.cc
import 15.2.5
[ceph.git] / ceph / src / tools / ceph_objectstore_tool.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <boost/program_options/variables_map.hpp>
16 #include <boost/program_options/parsers.hpp>
17 #include <boost/scoped_ptr.hpp>
18 #include <boost/optional.hpp>
19
20 #include <stdlib.h>
21
22 #include "common/Formatter.h"
23 #include "common/errno.h"
24 #include "common/ceph_argparse.h"
25 #include "common/url_escape.h"
26
27 #include "global/global_init.h"
28
29 #include "os/ObjectStore.h"
30 #include "os/filestore/FileJournal.h"
31 #include "os/filestore/FileStore.h"
32 #ifdef HAVE_LIBFUSE
33 #include "os/FuseStore.h"
34 #endif
35
36 #include "osd/PGLog.h"
37 #include "osd/OSD.h"
38 #include "osd/PG.h"
39 #include "osd/ECUtil.h"
40
41 #include "json_spirit/json_spirit_value.h"
42 #include "json_spirit/json_spirit_reader.h"
43
44 #include "rebuild_mondb.h"
45 #include "ceph_objectstore_tool.h"
46 #include "include/compat.h"
47 #include "include/util.h"
48
49 namespace po = boost::program_options;
50
51 #ifdef INTERNAL_TEST
52 CompatSet get_test_compat_set() {
53 CompatSet::FeatureSet ceph_osd_feature_compat;
54 CompatSet::FeatureSet ceph_osd_feature_ro_compat;
55 CompatSet::FeatureSet ceph_osd_feature_incompat;
56 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BASE);
57 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_PGINFO);
58 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_OLOC);
59 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEC);
60 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_CATEGORIES);
61 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_HOBJECTPOOL);
62 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
63 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
64 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
65 #ifdef INTERNAL_TEST2
66 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
67 ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
68 #endif
69 return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
70 ceph_osd_feature_incompat);
71 }
72 #endif
73
74 const ssize_t max_read = 1024 * 1024;
75 const int fd_none = INT_MIN;
76 bool outistty;
77 bool dry_run;
78
79 struct action_on_object_t {
80 virtual ~action_on_object_t() {}
81 virtual void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) = 0;
82 };
83
84 int _action_on_all_objects_in_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
85 {
86 auto ch = store->open_collection(coll);
87 unsigned LIST_AT_A_TIME = 100;
88 ghobject_t next;
89 while (!next.is_max()) {
90 vector<ghobject_t> list;
91 int r = store->collection_list(ch,
92 next,
93 ghobject_t::get_max(),
94 LIST_AT_A_TIME,
95 &list,
96 &next);
97 if (r < 0) {
98 cerr << "Error listing collection: " << coll << ", "
99 << cpp_strerror(r) << std::endl;
100 return r;
101 }
102 for (vector<ghobject_t>::iterator obj = list.begin();
103 obj != list.end();
104 ++obj) {
105 if (obj->is_pgmeta())
106 continue;
107 object_info_t oi;
108 if (coll != coll_t::meta()) {
109 bufferlist attr;
110 r = store->getattr(ch, *obj, OI_ATTR, attr);
111 if (r < 0) {
112 cerr << "Error getting attr on : " << make_pair(coll, *obj) << ", "
113 << cpp_strerror(r) << std::endl;
114 } else {
115 auto bp = attr.cbegin();
116 try {
117 decode(oi, bp);
118 } catch (...) {
119 r = -EINVAL;
120 cerr << "Error decoding attr on : " << make_pair(coll, *obj) << ", "
121 << cpp_strerror(r) << std::endl;
122 }
123 }
124 }
125 action.call(store, coll, *obj, oi);
126 }
127 }
128 return 0;
129 }
130
131 int action_on_all_objects_in_pg(ObjectStore *store, string pgidstr, action_on_object_t &action, bool debug)
132 {
133 spg_t pgid;
134 // Scan collections in case this is an ec pool but no shard specified
135 unsigned scanned = 0;
136 int r = 0;
137 vector<coll_t> colls_to_check;
138 vector<coll_t> candidates;
139 r = store->list_collections(candidates);
140 if (r < 0) {
141 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
142 return r;
143 }
144 pgid.parse(pgidstr.c_str());
145 for (vector<coll_t>::iterator i = candidates.begin();
146 i != candidates.end();
147 ++i) {
148 spg_t cand_pgid;
149 if (!i->is_pg(&cand_pgid))
150 continue;
151
152 // If an exact match or treat no shard as any shard
153 if (cand_pgid == pgid ||
154 (pgid.is_no_shard() && pgid.pgid == cand_pgid.pgid)) {
155 colls_to_check.push_back(*i);
156 }
157 }
158
159 if (debug)
160 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
161 for (vector<coll_t>::iterator i = colls_to_check.begin();
162 i != colls_to_check.end();
163 ++i, ++scanned) {
164 if (debug)
165 cerr << "Scanning " << *i << ", " << scanned << "/"
166 << colls_to_check.size() << " completed" << std::endl;
167 r = _action_on_all_objects_in_pg(store, *i, action, debug);
168 if (r < 0)
169 break;
170 }
171 return r;
172 }
173
174 int action_on_all_objects_in_exact_pg(ObjectStore *store, coll_t coll, action_on_object_t &action, bool debug)
175 {
176 int r = _action_on_all_objects_in_pg(store, coll, action, debug);
177 return r;
178 }
179
180 int _action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
181 {
182 unsigned scanned = 0;
183 int r = 0;
184 vector<coll_t> colls_to_check;
185 vector<coll_t> candidates;
186 r = store->list_collections(candidates);
187 if (r < 0) {
188 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
189 return r;
190 }
191 for (vector<coll_t>::iterator i = candidates.begin();
192 i != candidates.end();
193 ++i) {
194 if (i->is_pg()) {
195 colls_to_check.push_back(*i);
196 }
197 }
198
199 if (debug)
200 cerr << colls_to_check.size() << " pgs to scan" << std::endl;
201 for (vector<coll_t>::iterator i = colls_to_check.begin();
202 i != colls_to_check.end();
203 ++i, ++scanned) {
204 if (debug)
205 cerr << "Scanning " << *i << ", " << scanned << "/"
206 << colls_to_check.size() << " completed" << std::endl;
207 r = _action_on_all_objects_in_pg(store, *i, action, debug);
208 if (r < 0)
209 return r;
210 }
211 return 0;
212 }
213
214 int action_on_all_objects(ObjectStore *store, action_on_object_t &action, bool debug)
215 {
216 int r = _action_on_all_objects(store, action, debug);
217 return r;
218 }
219
220 struct pgid_object_list {
221 list<pair<coll_t, ghobject_t> > _objects;
222
223 void insert(coll_t coll, ghobject_t &ghobj) {
224 _objects.push_back(make_pair(coll, ghobj));
225 }
226
227 void dump(Formatter *f, bool human_readable) const {
228 if (!human_readable)
229 f->open_array_section("pgid_objects");
230 for (list<pair<coll_t, ghobject_t> >::const_iterator i = _objects.begin();
231 i != _objects.end();
232 ++i) {
233 f->open_array_section("pgid_object");
234 spg_t pgid;
235 bool is_pg = i->first.is_pg(&pgid);
236 if (is_pg)
237 f->dump_string("pgid", stringify(pgid));
238 if (!is_pg || !human_readable)
239 f->dump_string("coll", i->first.to_str());
240 f->open_object_section("ghobject");
241 i->second.dump(f);
242 f->close_section();
243 f->close_section();
244 if (human_readable) {
245 f->flush(cout);
246 cout << std::endl;
247 }
248 }
249 if (!human_readable) {
250 f->close_section();
251 f->flush(cout);
252 cout << std::endl;
253 }
254 }
255 };
256
257 struct lookup_ghobject : public action_on_object_t {
258 pgid_object_list _objects;
259 const string _name;
260 const boost::optional<std::string> _namespace;
261 bool _need_snapset;
262
263 lookup_ghobject(const string& name, const boost::optional<std::string>& nspace, bool need_snapset = false) : _name(name),
264 _namespace(nspace), _need_snapset(need_snapset) { }
265
266 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
267 if (_need_snapset && !ghobj.hobj.has_snapset())
268 return;
269 if ((_name.length() == 0 || ghobj.hobj.oid.name == _name) &&
270 (!_namespace || ghobj.hobj.nspace == _namespace))
271 _objects.insert(coll, ghobj);
272 return;
273 }
274
275 int size() const {
276 return _objects._objects.size();
277 }
278
279 pair<coll_t, ghobject_t> pop() {
280 pair<coll_t, ghobject_t> front = _objects._objects.front();
281 _objects._objects.pop_front();
282 return front;
283 }
284
285 void dump(Formatter *f, bool human_readable) const {
286 _objects.dump(f, human_readable);
287 }
288 };
289
290 struct lookup_slow_ghobject : public action_on_object_t {
291 list<tuple<
292 coll_t,
293 ghobject_t,
294 ceph::signedspan,
295 ceph::signedspan,
296 ceph::signedspan,
297 string> > _objects;
298 const string _name;
299 double threshold;
300
301 coll_t last_coll;
302
303 lookup_slow_ghobject(const string& name, double _threshold) :
304 _name(name), threshold(_threshold) { }
305
306 void call(ObjectStore *store, coll_t coll, ghobject_t &ghobj, object_info_t &oi) override {
307 ObjectMap::ObjectMapIterator iter;
308 auto start1 = mono_clock::now();
309 ceph::signedspan first_seek_time = start1 - start1;
310 ceph::signedspan last_seek_time = first_seek_time;
311 ceph::signedspan total_time = first_seek_time;
312 {
313 auto ch = store->open_collection(coll);
314 iter = store->get_omap_iterator(ch, ghobj);
315 if (!iter) {
316 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT)
317 << " obj:" << ghobj
318 << std::endl;
319 return;
320 }
321 auto start = mono_clock::now();
322 iter->seek_to_first();
323 first_seek_time = mono_clock::now() - start;
324
325 while(iter->valid()) {
326 start = mono_clock::now();
327 iter->next();
328 last_seek_time = mono_clock::now() - start;
329 }
330 }
331
332 if (coll != last_coll) {
333 cerr << ">>> inspecting coll" << coll << std::endl;
334 last_coll = coll;
335 }
336
337 total_time = mono_clock::now() - start1;
338 if ( total_time >= make_timespan(threshold)) {
339 _objects.emplace_back(coll, ghobj,
340 first_seek_time, last_seek_time, total_time,
341 url_escape(iter->tail_key()));
342 cerr << ">>>>> found obj " << ghobj
343 << " first_seek_time "
344 << std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count()
345 << " last_seek_time "
346 << std::chrono::duration_cast<std::chrono::seconds>(last_seek_time).count()
347 << " total_time "
348 << std::chrono::duration_cast<std::chrono::seconds>(total_time).count()
349 << " tail key: " << url_escape(iter->tail_key())
350 << std::endl;
351 }
352 return;
353 }
354
355 int size() const {
356 return _objects.size();
357 }
358
359 void dump(Formatter *f, bool human_readable) const {
360 if (!human_readable)
361 f->open_array_section("objects");
362 for (auto i = _objects.begin();
363 i != _objects.end();
364 ++i) {
365 f->open_array_section("object");
366 coll_t coll;
367 ghobject_t ghobj;
368 ceph::signedspan first_seek_time;
369 ceph::signedspan last_seek_time;
370 ceph::signedspan total_time;
371 string tail_key;
372 std::tie(coll, ghobj, first_seek_time, last_seek_time, total_time, tail_key) = *i;
373
374 spg_t pgid;
375 bool is_pg = coll.is_pg(&pgid);
376 if (is_pg)
377 f->dump_string("pgid", stringify(pgid));
378 if (!is_pg || !human_readable)
379 f->dump_string("coll", coll.to_str());
380 f->dump_object("ghobject", ghobj);
381 f->open_object_section("times");
382 f->dump_int("first_seek_time",
383 std::chrono::duration_cast<std::chrono::seconds>(first_seek_time).count());
384 f->dump_int("last_seek_time",
385 std::chrono::duration_cast<std::chrono::seconds>
386 (last_seek_time).count());
387 f->dump_int("total_time",
388 std::chrono::duration_cast<std::chrono::seconds>(total_time).count());
389 f->dump_string("tail_key", tail_key);
390 f->close_section();
391
392 f->close_section();
393 if (human_readable) {
394 f->flush(cout);
395 cout << std::endl;
396 }
397 }
398 if (!human_readable) {
399 f->close_section();
400 f->flush(cout);
401 cout << std::endl;
402 }
403 }
404 };
405
406 int file_fd = fd_none;
407 bool debug;
408 bool force = false;
409 bool no_superblock = false;
410
411 super_header sh;
412
413 static int get_fd_data(int fd, bufferlist &bl)
414 {
415 uint64_t total = 0;
416 do {
417 ssize_t bytes = bl.read_fd(fd, max_read);
418 if (bytes < 0) {
419 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
420 return bytes;
421 }
422
423 if (bytes == 0)
424 break;
425
426 total += bytes;
427 } while(true);
428
429 ceph_assert(bl.length() == total);
430 return 0;
431 }
432
433 int get_log(ObjectStore *fs, __u8 struct_ver,
434 spg_t pgid, const pg_info_t &info,
435 PGLog::IndexedLog &log, pg_missing_t &missing)
436 {
437 try {
438 auto ch = fs->open_collection(coll_t(pgid));
439 if (!ch) {
440 return -ENOENT;
441 }
442 ostringstream oss;
443 ceph_assert(struct_ver > 0);
444 PGLog::read_log_and_missing(
445 fs, ch,
446 pgid.make_pgmeta_oid(),
447 info, log, missing,
448 oss,
449 g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
450 if (debug && oss.str().size())
451 cerr << oss.str() << std::endl;
452 }
453 catch (const buffer::error &e) {
454 cerr << "read_log_and_missing threw exception error " << e.what() << std::endl;
455 return -EFAULT;
456 }
457 return 0;
458 }
459
460 void dump_log(Formatter *formatter, ostream &out, pg_log_t &log,
461 pg_missing_t &missing)
462 {
463 formatter->open_object_section("op_log");
464 formatter->open_object_section("pg_log_t");
465 log.dump(formatter);
466 formatter->close_section();
467 formatter->flush(out);
468 formatter->open_object_section("pg_missing_t");
469 missing.dump(formatter);
470 formatter->close_section();
471 formatter->close_section();
472 formatter->flush(out);
473 }
474
475 //Based on part of OSD::load_pgs()
476 int finish_remove_pgs(ObjectStore *store)
477 {
478 vector<coll_t> ls;
479 int r = store->list_collections(ls);
480 if (r < 0) {
481 cerr << "finish_remove_pgs: failed to list pgs: " << cpp_strerror(r)
482 << std::endl;
483 return r;
484 }
485
486 for (vector<coll_t>::iterator it = ls.begin();
487 it != ls.end();
488 ++it) {
489 spg_t pgid;
490
491 if (it->is_temp(&pgid) ||
492 (it->is_pg(&pgid) && PG::_has_removal_flag(store, pgid))) {
493 cout << "finish_remove_pgs " << *it << " removing " << pgid << std::endl;
494 OSD::recursive_remove_collection(g_ceph_context, store, pgid, *it);
495 continue;
496 }
497
498 //cout << "finish_remove_pgs ignoring unrecognized " << *it << std::endl;
499 }
500 return 0;
501 }
502
503 #pragma GCC diagnostic ignored "-Wpragmas"
504 #pragma GCC diagnostic push
505 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
506
507 int mark_pg_for_removal(ObjectStore *fs, spg_t pgid, ObjectStore::Transaction *t)
508 {
509 pg_info_t info(pgid);
510 coll_t coll(pgid);
511 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
512
513 epoch_t map_epoch = 0;
514 int r = PG::peek_map_epoch(fs, pgid, &map_epoch);
515 if (r < 0)
516 cerr << __func__ << " warning: peek_map_epoch reported error" << std::endl;
517 PastIntervals past_intervals;
518 __u8 struct_v;
519 r = PG::read_info(fs, pgid, coll, info, past_intervals, struct_v);
520 if (r < 0) {
521 cerr << __func__ << " error on read_info " << cpp_strerror(r) << std::endl;
522 return r;
523 }
524 ceph_assert(struct_v >= 8);
525 // new omap key
526 cout << "setting '_remove' omap key" << std::endl;
527 map<string,bufferlist> values;
528 encode((char)1, values["_remove"]);
529 t->omap_setkeys(coll, pgmeta_oid, values);
530 return 0;
531 }
532
533 #pragma GCC diagnostic pop
534 #pragma GCC diagnostic warning "-Wpragmas"
535
536 template<typename Func>
537 void wait_until_done(ObjectStore::Transaction* txn, Func&& func)
538 {
539 bool finished = false;
540 std::condition_variable cond;
541 std::mutex m;
542 txn->register_on_complete(make_lambda_context([&](int) {
543 std::unique_lock lock{m};
544 finished = true;
545 cond.notify_one();
546 }));
547 std::move(func)();
548 std::unique_lock lock{m};
549 cond.wait(lock, [&] {return finished;});
550 }
551
552 int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
553 {
554 if (!dry_run)
555 finish_remove_pgs(store);
556 if (!store->collection_exists(coll_t(r_pgid)))
557 return -ENOENT;
558
559 cout << " marking collection for removal" << std::endl;
560 if (dry_run)
561 return 0;
562 ObjectStore::Transaction rmt;
563 int r = mark_pg_for_removal(store, r_pgid, &rmt);
564 if (r < 0) {
565 return r;
566 }
567 ObjectStore::CollectionHandle ch = store->open_collection(coll_t(r_pgid));
568 store->queue_transaction(ch, std::move(rmt));
569 finish_remove_pgs(store);
570 return r;
571 }
572
573 int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
574 PastIntervals &past_intervals)
575 {
576 //Empty for this
577 coll_t coll(info.pgid);
578 ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
579 map<string,bufferlist> km;
580 string key_to_remove;
581 pg_info_t last_written_info;
582 int ret = prepare_info_keymap(
583 g_ceph_context,
584 &km, &key_to_remove,
585 epoch,
586 info,
587 last_written_info,
588 past_intervals,
589 true, true, false);
590 if (ret) cerr << "Failed to write info" << std::endl;
591 t.omap_setkeys(coll, pgmeta_oid, km);
592 if (!key_to_remove.empty()) {
593 t.omap_rmkey(coll, pgmeta_oid, key_to_remove);
594 }
595 return ret;
596 }
597
598 typedef map<eversion_t, hobject_t> divergent_priors_t;
599
600 int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
601 pg_log_t &log, PastIntervals &past_intervals,
602 divergent_priors_t &divergent,
603 pg_missing_t &missing)
604 {
605 cout << __func__ << " epoch " << epoch << " info " << info << std::endl;
606 int ret = write_info(t, epoch, info, past_intervals);
607 if (ret)
608 return ret;
609 coll_t coll(info.pgid);
610 map<string,bufferlist> km;
611
612 if (!divergent.empty()) {
613 ceph_assert(missing.get_items().empty());
614 PGLog::write_log_and_missing_wo_missing(
615 t, &km, log, coll, info.pgid.make_pgmeta_oid(), divergent, true);
616 } else {
617 pg_missing_tracker_t tmissing(missing);
618 bool rebuilt_missing_set_with_deletes = missing.may_include_deletes;
619 PGLog::write_log_and_missing(
620 t, &km, log, coll, info.pgid.make_pgmeta_oid(), tmissing, true,
621 &rebuilt_missing_set_with_deletes);
622 }
623 t.omap_setkeys(coll, info.pgid.make_pgmeta_oid(), km);
624 return 0;
625 }
626
627 int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
628 pg_info_t &info, const spg_t &pgid,
629 epoch_t map_epoch,
630 PastIntervals &past_intervals)
631 {
632 ghobject_t oid = pgid.make_pgmeta_oid();
633 struct stat st;
634 auto ch = store->open_collection(coll);
635 int r = store->stat(ch, oid, &st);
636 ceph_assert(r == 0);
637 ceph_assert(st.st_size == 0);
638
639 cerr << "Log bounds are: " << "(" << info.log_tail << ","
640 << info.last_update << "]" << std::endl;
641
642 uint64_t max_entries = g_ceph_context->_conf->osd_max_pg_log_entries;
643 if (info.last_update.version - info.log_tail.version <= max_entries) {
644 cerr << "Log not larger than osd_max_pg_log_entries " << max_entries << std::endl;
645 return 0;
646 }
647
648 ceph_assert(info.last_update.version > max_entries);
649 version_t trim_to = info.last_update.version - max_entries;
650 size_t trim_at_once = g_ceph_context->_conf->osd_pg_log_trim_max;
651 eversion_t new_tail;
652 bool done = false;
653
654 while (!done) {
655 // gather keys so we can delete them in a batch without
656 // affecting the iterator
657 set<string> keys_to_trim;
658 {
659 ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
660 if (!p)
661 break;
662 for (p->seek_to_first(); p->valid(); p->next()) {
663 if (p->key()[0] == '_')
664 continue;
665 if (p->key() == "can_rollback_to")
666 continue;
667 if (p->key() == "divergent_priors")
668 continue;
669 if (p->key() == "rollback_info_trimmed_to")
670 continue;
671 if (p->key() == "may_include_deletes_in_missing")
672 continue;
673 if (p->key().substr(0, 7) == string("missing"))
674 continue;
675 if (p->key().substr(0, 4) == string("dup_"))
676 continue;
677
678 bufferlist bl = p->value();
679 auto bp = bl.cbegin();
680 pg_log_entry_t e;
681 try {
682 e.decode_with_checksum(bp);
683 } catch (const buffer::error &e) {
684 cerr << "Error reading pg log entry: " << e << std::endl;
685 }
686 if (debug) {
687 cerr << "read entry " << e << std::endl;
688 }
689 if (e.version.version > trim_to) {
690 done = true;
691 break;
692 }
693 keys_to_trim.insert(p->key());
694 new_tail = e.version;
695 if (keys_to_trim.size() >= trim_at_once)
696 break;
697 }
698
699 if (!p->valid())
700 done = true;
701 } // deconstruct ObjectMapIterator
702
703 // delete the keys
704 if (!dry_run && !keys_to_trim.empty()) {
705 cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
706 ObjectStore::Transaction t;
707 t.omap_rmkeys(coll, oid, keys_to_trim);
708 store->queue_transaction(ch, std::move(t));
709 ch->flush();
710 }
711 }
712
713 // update pg info with new tail
714 if (!dry_run && new_tail != eversion_t()) {
715 info.log_tail = new_tail;
716 ObjectStore::Transaction t;
717 int ret = write_info(t, map_epoch, info, past_intervals);
718 if (ret)
719 return ret;
720 store->queue_transaction(ch, std::move(t));
721 ch->flush();
722 }
723
724 // compact the db since we just removed a bunch of data
725 cerr << "Finished trimming, now compacting..." << std::endl;
726 if (!dry_run)
727 store->compact();
728 return 0;
729 }
730
731 const int OMAP_BATCH_SIZE = 25;
732 void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
733 {
734 oset.clear();
735 for (int count = OMAP_BATCH_SIZE; count && iter->valid(); --count, iter->next()) {
736 oset.insert(pair<string, bufferlist>(iter->key(), iter->value()));
737 }
738 }
739
740 int ObjectStoreTool::export_file(ObjectStore *store, coll_t cid, ghobject_t &obj)
741 {
742 struct stat st;
743 mysize_t total;
744 footer ft;
745
746 auto ch = store->open_collection(cid);
747 int ret = store->stat(ch, obj, &st);
748 if (ret < 0)
749 return ret;
750
751 cerr << "Read " << obj << std::endl;
752
753 total = st.st_size;
754 if (debug)
755 cerr << "size=" << total << std::endl;
756
757 object_begin objb(obj);
758
759 {
760 bufferptr bp;
761 bufferlist bl;
762 ret = store->getattr(ch, obj, OI_ATTR, bp);
763 if (ret < 0) {
764 cerr << "getattr failure object_info " << ret << std::endl;
765 return ret;
766 }
767 bl.push_back(bp);
768 decode(objb.oi, bl);
769 if (debug)
770 cerr << "object_info: " << objb.oi << std::endl;
771 }
772
773 // NOTE: we include whiteouts, lost, etc.
774
775 ret = write_section(TYPE_OBJECT_BEGIN, objb, file_fd);
776 if (ret < 0)
777 return ret;
778
779 uint64_t offset = 0;
780 bufferlist rawdatabl;
781 while(total > 0) {
782 rawdatabl.clear();
783 mysize_t len = max_read;
784 if (len > total)
785 len = total;
786
787 ret = store->read(ch, obj, offset, len, rawdatabl);
788 if (ret < 0)
789 return ret;
790 if (ret == 0)
791 return -EINVAL;
792
793 data_section dblock(offset, len, rawdatabl);
794 if (debug)
795 cerr << "data section offset=" << offset << " len=" << len << std::endl;
796
797 total -= ret;
798 offset += ret;
799
800 ret = write_section(TYPE_DATA, dblock, file_fd);
801 if (ret) return ret;
802 }
803
804 //Handle attrs for this object
805 map<string,bufferptr> aset;
806 ret = store->getattrs(ch, obj, aset);
807 if (ret) return ret;
808 attr_section as(aset);
809 ret = write_section(TYPE_ATTRS, as, file_fd);
810 if (ret)
811 return ret;
812
813 if (debug) {
814 cerr << "attrs size " << aset.size() << std::endl;
815 }
816
817 //Handle omap information
818 bufferlist hdrbuf;
819 ret = store->omap_get_header(ch, obj, &hdrbuf, true);
820 if (ret < 0) {
821 cerr << "omap_get_header: " << cpp_strerror(ret) << std::endl;
822 return ret;
823 }
824
825 omap_hdr_section ohs(hdrbuf);
826 ret = write_section(TYPE_OMAP_HDR, ohs, file_fd);
827 if (ret)
828 return ret;
829
830 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, obj);
831 if (!iter) {
832 ret = -ENOENT;
833 cerr << "omap_get_iterator: " << cpp_strerror(ret) << std::endl;
834 return ret;
835 }
836 iter->seek_to_first();
837 int mapcount = 0;
838 map<string, bufferlist> out;
839 while(iter->valid()) {
840 get_omap_batch(iter, out);
841
842 if (out.empty()) break;
843
844 mapcount += out.size();
845 omap_section oms(out);
846 ret = write_section(TYPE_OMAP, oms, file_fd);
847 if (ret)
848 return ret;
849 }
850 if (debug)
851 cerr << "omap map size " << mapcount << std::endl;
852
853 ret = write_simple(TYPE_OBJECT_END, file_fd);
854 if (ret)
855 return ret;
856
857 return 0;
858 }
859
860 int ObjectStoreTool::export_files(ObjectStore *store, coll_t coll)
861 {
862 ghobject_t next;
863 auto ch = store->open_collection(coll);
864 while (!next.is_max()) {
865 vector<ghobject_t> objects;
866 int r = store->collection_list(ch, next, ghobject_t::get_max(), 300,
867 &objects, &next);
868 if (r < 0)
869 return r;
870 for (vector<ghobject_t>::iterator i = objects.begin();
871 i != objects.end();
872 ++i) {
873 ceph_assert(!i->hobj.is_meta());
874 if (i->is_pgmeta() || i->hobj.is_temp() || !i->is_no_gen()) {
875 continue;
876 }
877 r = export_file(store, coll, *i);
878 if (r < 0)
879 return r;
880 }
881 }
882 return 0;
883 }
884
885 int set_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
886 OSDMap::Incremental inc;
887 auto it = bl.cbegin();
888 inc.decode(it);
889 if (e == 0) {
890 e = inc.epoch;
891 } else if (e != inc.epoch) {
892 cerr << "incremental.epoch mismatch: "
893 << inc.epoch << " != " << e << std::endl;
894 if (force) {
895 cerr << "But will continue anyway." << std::endl;
896 } else {
897 return -EINVAL;
898 }
899 }
900 auto ch = store->open_collection(coll_t::meta());
901 const ghobject_t inc_oid = OSD::get_inc_osdmap_pobject_name(e);
902 if (!store->exists(ch, inc_oid)) {
903 cerr << "inc-osdmap (" << inc_oid << ") does not exist." << std::endl;
904 if (!force) {
905 return -ENOENT;
906 }
907 cout << "Creating a new epoch." << std::endl;
908 }
909 if (dry_run)
910 return 0;
911 ObjectStore::Transaction t;
912 t.write(coll_t::meta(), inc_oid, 0, bl.length(), bl);
913 t.truncate(coll_t::meta(), inc_oid, bl.length());
914 store->queue_transaction(ch, std::move(t));
915 return 0;
916 }
917
918 int get_inc_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl)
919 {
920 auto ch = store->open_collection(coll_t::meta());
921 if (store->read(ch,
922 OSD::get_inc_osdmap_pobject_name(e),
923 0, 0, bl) < 0) {
924 return -ENOENT;
925 }
926 return 0;
927 }
928
929 int set_osdmap(ObjectStore *store, epoch_t e, bufferlist& bl, bool force) {
930 OSDMap osdmap;
931 osdmap.decode(bl);
932 if (e == 0) {
933 e = osdmap.get_epoch();
934 } else if (e != osdmap.get_epoch()) {
935 cerr << "osdmap.epoch mismatch: "
936 << e << " != " << osdmap.get_epoch() << std::endl;
937 if (force) {
938 cerr << "But will continue anyway." << std::endl;
939 } else {
940 return -EINVAL;
941 }
942 }
943 auto ch = store->open_collection(coll_t::meta());
944 const ghobject_t full_oid = OSD::get_osdmap_pobject_name(e);
945 if (!store->exists(ch, full_oid)) {
946 cerr << "osdmap (" << full_oid << ") does not exist." << std::endl;
947 if (!force) {
948 return -ENOENT;
949 }
950 cout << "Creating a new epoch." << std::endl;
951 }
952 if (dry_run)
953 return 0;
954 ObjectStore::Transaction t;
955 t.write(coll_t::meta(), full_oid, 0, bl.length(), bl);
956 t.truncate(coll_t::meta(), full_oid, bl.length());
957 store->queue_transaction(ch, std::move(t));
958 return 0;
959 }
960
961 int get_osdmap(ObjectStore *store, epoch_t e, OSDMap &osdmap, bufferlist& bl)
962 {
963 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
964 bool found = store->read(
965 ch, OSD::get_osdmap_pobject_name(e), 0, 0, bl) >= 0;
966 if (!found) {
967 cerr << "Can't find OSDMap for pg epoch " << e << std::endl;
968 return -ENOENT;
969 }
970 osdmap.decode(bl);
971 if (debug)
972 cerr << osdmap << std::endl;
973 return 0;
974 }
975
976 int get_pg_num_history(ObjectStore *store, pool_pg_num_history_t *h)
977 {
978 ObjectStore::CollectionHandle ch = store->open_collection(coll_t::meta());
979 bufferlist bl;
980 auto pghist = OSD::make_pg_num_history_oid();
981 int r = store->read(ch, pghist, 0, 0, bl, 0);
982 if (r >= 0 && bl.length() > 0) {
983 auto p = bl.cbegin();
984 decode(*h, p);
985 }
986 cout << __func__ << " pg_num_history " << *h << std::endl;
987 return 0;
988 }
989
990 int add_osdmap(ObjectStore *store, metadata_section &ms)
991 {
992 return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl);
993 }
994
995 int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid,
996 pg_info_t &info, epoch_t map_epoch, __u8 struct_ver,
997 const OSDSuperblock& superblock,
998 PastIntervals &past_intervals)
999 {
1000 PGLog::IndexedLog log;
1001 pg_missing_t missing;
1002
1003 cerr << "Exporting " << pgid << " info " << info << std::endl;
1004
1005 int ret = get_log(fs, struct_ver, pgid, info, log, missing);
1006 if (ret > 0)
1007 return ret;
1008
1009 if (debug) {
1010 Formatter *formatter = Formatter::create("json-pretty");
1011 ceph_assert(formatter);
1012 dump_log(formatter, cerr, log, missing);
1013 delete formatter;
1014 }
1015 write_super();
1016
1017 pg_begin pgb(pgid, superblock);
1018 // Special case: If replicated pg don't require the importing OSD to have shard feature
1019 if (pgid.is_no_shard()) {
1020 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1021 }
1022 ret = write_section(TYPE_PG_BEGIN, pgb, file_fd);
1023 if (ret)
1024 return ret;
1025
1026 // The metadata_section is now before files, so import can detect
1027 // errors and abort without wasting time.
1028 metadata_section ms(
1029 struct_ver,
1030 map_epoch,
1031 info,
1032 log,
1033 past_intervals,
1034 missing);
1035 ret = add_osdmap(fs, ms);
1036 if (ret)
1037 return ret;
1038 ret = write_section(TYPE_PG_METADATA, ms, file_fd);
1039 if (ret)
1040 return ret;
1041
1042 ret = export_files(fs, coll);
1043 if (ret) {
1044 cerr << "export_files error " << ret << std::endl;
1045 return ret;
1046 }
1047
1048 ret = write_simple(TYPE_PG_END, file_fd);
1049 if (ret)
1050 return ret;
1051
1052 return 0;
1053 }
1054
1055 int dump_data(Formatter *formatter, bufferlist &bl)
1056 {
1057 auto ebliter = bl.cbegin();
1058 data_section ds;
1059 ds.decode(ebliter);
1060
1061 formatter->open_object_section("data_block");
1062 formatter->dump_unsigned("offset", ds.offset);
1063 formatter->dump_unsigned("len", ds.len);
1064 // XXX: Add option to dump data like od -cx ?
1065 formatter->close_section();
1066 formatter->flush(cout);
1067 return 0;
1068 }
1069
1070 int get_data(ObjectStore *store, coll_t coll, ghobject_t hoid,
1071 ObjectStore::Transaction *t, bufferlist &bl)
1072 {
1073 auto ebliter = bl.cbegin();
1074 data_section ds;
1075 ds.decode(ebliter);
1076
1077 if (debug)
1078 cerr << "\tdata: offset " << ds.offset << " len " << ds.len << std::endl;
1079 t->write(coll, hoid, ds.offset, ds.len, ds.databl);
1080 return 0;
1081 }
1082
1083 int dump_attrs(
1084 Formatter *formatter, ghobject_t hoid,
1085 bufferlist &bl)
1086 {
1087 auto ebliter = bl.cbegin();
1088 attr_section as;
1089 as.decode(ebliter);
1090
1091 // This could have been handled in the caller if we didn't need to
1092 // support exports that didn't include object_info_t in object_begin.
1093 if (hoid.generation == ghobject_t::NO_GEN &&
1094 hoid.hobj.is_head()) {
1095 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1096 if (mi != as.data.end()) {
1097 SnapSet snapset;
1098 auto p = mi->second.cbegin();
1099 snapset.decode(p);
1100 formatter->open_object_section("snapset");
1101 snapset.dump(formatter);
1102 formatter->close_section();
1103 } else {
1104 formatter->open_object_section("snapset");
1105 formatter->dump_string("error", "missing SS_ATTR");
1106 formatter->close_section();
1107 }
1108 }
1109
1110 formatter->open_object_section("attrs");
1111 formatter->open_array_section("user");
1112 for (auto kv : as.data) {
1113 // Skip system attributes
1114 if (('_' != kv.first.at(0)) || kv.first.size() == 1)
1115 continue;
1116 formatter->open_object_section("user_attr");
1117 formatter->dump_string("name", kv.first.substr(1));
1118 bool b64;
1119 formatter->dump_string("value", cleanbin(kv.second, b64));
1120 formatter->dump_bool("Base64", b64);
1121 formatter->close_section();
1122 }
1123 formatter->close_section();
1124 formatter->open_array_section("system");
1125 for (auto kv : as.data) {
1126 // Skip user attributes
1127 if (('_' == kv.first.at(0)) && kv.first.size() != 1)
1128 continue;
1129 formatter->open_object_section("sys_attr");
1130 formatter->dump_string("name", kv.first);
1131 formatter->close_section();
1132 }
1133 formatter->close_section();
1134 formatter->close_section();
1135 formatter->flush(cout);
1136
1137 return 0;
1138 }
1139
1140 int get_attrs(
1141 ObjectStore *store, coll_t coll, ghobject_t hoid,
1142 ObjectStore::Transaction *t, bufferlist &bl,
1143 OSDriver &driver, SnapMapper &snap_mapper)
1144 {
1145 auto ebliter = bl.cbegin();
1146 attr_section as;
1147 as.decode(ebliter);
1148
1149 auto ch = store->open_collection(coll);
1150 if (debug)
1151 cerr << "\tattrs: len " << as.data.size() << std::endl;
1152 t->setattrs(coll, hoid, as.data);
1153
1154 // This could have been handled in the caller if we didn't need to
1155 // support exports that didn't include object_info_t in object_begin.
1156 if (hoid.generation == ghobject_t::NO_GEN &&
1157 hoid.hobj.is_head()) {
1158 map<string,bufferlist>::iterator mi = as.data.find(SS_ATTR);
1159 if (mi != as.data.end()) {
1160 SnapSet snapset;
1161 auto p = mi->second.cbegin();
1162 snapset.decode(p);
1163 cout << "snapset " << snapset << std::endl;
1164 for (auto& p : snapset.clone_snaps) {
1165 ghobject_t clone = hoid;
1166 clone.hobj.snap = p.first;
1167 set<snapid_t> snaps(p.second.begin(), p.second.end());
1168 if (!store->exists(ch, clone)) {
1169 // no clone, skip. this is probably a cache pool. this works
1170 // because we use a separate transaction per object and clones
1171 // come before head in the archive.
1172 if (debug)
1173 cerr << "\tskipping missing " << clone << " (snaps "
1174 << snaps << ")" << std::endl;
1175 continue;
1176 }
1177 if (debug)
1178 cerr << "\tsetting " << clone.hobj << " snaps " << snaps
1179 << std::endl;
1180 OSDriver::OSTransaction _t(driver.get_transaction(t));
1181 ceph_assert(!snaps.empty());
1182 snap_mapper.add_oid(clone.hobj, snaps, &_t);
1183 }
1184 } else {
1185 cerr << "missing SS_ATTR on " << hoid << std::endl;
1186 }
1187 }
1188 return 0;
1189 }
1190
1191 int dump_omap_hdr(Formatter *formatter, bufferlist &bl)
1192 {
1193 auto ebliter = bl.cbegin();
1194 omap_hdr_section oh;
1195 oh.decode(ebliter);
1196
1197 formatter->open_object_section("omap_header");
1198 formatter->dump_string("value", string(oh.hdr.c_str(), oh.hdr.length()));
1199 formatter->close_section();
1200 formatter->flush(cout);
1201 return 0;
1202 }
1203
1204 int get_omap_hdr(ObjectStore *store, coll_t coll, ghobject_t hoid,
1205 ObjectStore::Transaction *t, bufferlist &bl)
1206 {
1207 auto ebliter = bl.cbegin();
1208 omap_hdr_section oh;
1209 oh.decode(ebliter);
1210
1211 if (debug)
1212 cerr << "\tomap header: " << string(oh.hdr.c_str(), oh.hdr.length())
1213 << std::endl;
1214 t->omap_setheader(coll, hoid, oh.hdr);
1215 return 0;
1216 }
1217
1218 int dump_omap(Formatter *formatter, bufferlist &bl)
1219 {
1220 auto ebliter = bl.cbegin();
1221 omap_section os;
1222 os.decode(ebliter);
1223
1224 formatter->open_object_section("omaps");
1225 formatter->dump_unsigned("count", os.omap.size());
1226 formatter->open_array_section("data");
1227 for (auto o : os.omap) {
1228 formatter->open_object_section("omap");
1229 formatter->dump_string("name", o.first);
1230 bool b64;
1231 formatter->dump_string("value", cleanbin(o.second, b64));
1232 formatter->dump_bool("Base64", b64);
1233 formatter->close_section();
1234 }
1235 formatter->close_section();
1236 formatter->close_section();
1237 formatter->flush(cout);
1238 return 0;
1239 }
1240
1241 int get_omap(ObjectStore *store, coll_t coll, ghobject_t hoid,
1242 ObjectStore::Transaction *t, bufferlist &bl)
1243 {
1244 auto ebliter = bl.cbegin();
1245 omap_section os;
1246 os.decode(ebliter);
1247
1248 if (debug)
1249 cerr << "\tomap: size " << os.omap.size() << std::endl;
1250 t->omap_setkeys(coll, hoid, os.omap);
1251 return 0;
1252 }
1253
1254 int ObjectStoreTool::dump_object(Formatter *formatter,
1255 bufferlist &bl)
1256 {
1257 auto ebliter = bl.cbegin();
1258 object_begin ob;
1259 ob.decode(ebliter);
1260
1261 if (ob.hoid.hobj.is_temp()) {
1262 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1263 return -EFAULT;
1264 }
1265
1266 formatter->open_object_section("object");
1267 formatter->open_object_section("oid");
1268 ob.hoid.dump(formatter);
1269 formatter->close_section();
1270 formatter->open_object_section("object_info");
1271 ob.oi.dump(formatter);
1272 formatter->close_section();
1273
1274 bufferlist ebl;
1275 bool done = false;
1276 while(!done) {
1277 sectiontype_t type;
1278 int ret = read_section(&type, &ebl);
1279 if (ret)
1280 return ret;
1281
1282 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1283 //cout << "\t\tsection size " << ebl.length() << std::endl;
1284 if (type >= END_OF_TYPES) {
1285 cout << "Skipping unknown object section type" << std::endl;
1286 continue;
1287 }
1288 switch(type) {
1289 case TYPE_DATA:
1290 if (dry_run) break;
1291 ret = dump_data(formatter, ebl);
1292 if (ret) return ret;
1293 break;
1294 case TYPE_ATTRS:
1295 if (dry_run) break;
1296 ret = dump_attrs(formatter, ob.hoid, ebl);
1297 if (ret) return ret;
1298 break;
1299 case TYPE_OMAP_HDR:
1300 if (dry_run) break;
1301 ret = dump_omap_hdr(formatter, ebl);
1302 if (ret) return ret;
1303 break;
1304 case TYPE_OMAP:
1305 if (dry_run) break;
1306 ret = dump_omap(formatter, ebl);
1307 if (ret) return ret;
1308 break;
1309 case TYPE_OBJECT_END:
1310 done = true;
1311 break;
1312 default:
1313 cerr << "Unknown section type " << type << std::endl;
1314 return -EFAULT;
1315 }
1316 }
1317 formatter->close_section();
1318 return 0;
1319 }
1320
1321 int ObjectStoreTool::get_object(ObjectStore *store,
1322 OSDriver& driver,
1323 SnapMapper& mapper,
1324 coll_t coll,
1325 bufferlist &bl, OSDMap &origmap,
1326 bool *skipped_objects)
1327 {
1328 ObjectStore::Transaction tran;
1329 ObjectStore::Transaction *t = &tran;
1330 auto ebliter = bl.cbegin();
1331 object_begin ob;
1332 ob.decode(ebliter);
1333
1334 if (ob.hoid.hobj.is_temp()) {
1335 cerr << "ERROR: Export contains temporary object '" << ob.hoid << "'" << std::endl;
1336 return -EFAULT;
1337 }
1338 ceph_assert(g_ceph_context);
1339
1340 auto ch = store->open_collection(coll);
1341 if (ob.hoid.hobj.nspace != g_ceph_context->_conf->osd_hit_set_namespace) {
1342 object_t oid = ob.hoid.hobj.oid;
1343 object_locator_t loc(ob.hoid.hobj);
1344 pg_t raw_pgid = origmap.object_locator_to_pg(oid, loc);
1345 pg_t pgid = origmap.raw_pg_to_pg(raw_pgid);
1346
1347 spg_t coll_pgid;
1348 if (coll.is_pg(&coll_pgid) == false) {
1349 cerr << "INTERNAL ERROR: Bad collection during import" << std::endl;
1350 return -EFAULT;
1351 }
1352 if (coll_pgid.shard != ob.hoid.shard_id) {
1353 cerr << "INTERNAL ERROR: Importing shard " << coll_pgid.shard
1354 << " but object shard is " << ob.hoid.shard_id << std::endl;
1355 return -EFAULT;
1356 }
1357
1358 if (coll_pgid.pgid != pgid) {
1359 cerr << "Skipping object '" << ob.hoid << "' which belongs in pg " << pgid << std::endl;
1360 *skipped_objects = true;
1361 skip_object(bl);
1362 return 0;
1363 }
1364 }
1365
1366 if (!dry_run)
1367 t->touch(coll, ob.hoid);
1368
1369 cout << "Write " << ob.hoid << std::endl;
1370
1371 bufferlist ebl;
1372 bool done = false;
1373 while(!done) {
1374 sectiontype_t type;
1375 int ret = read_section(&type, &ebl);
1376 if (ret)
1377 return ret;
1378
1379 //cout << "\tdo_object: Section type " << hex << type << dec << std::endl;
1380 //cout << "\t\tsection size " << ebl.length() << std::endl;
1381 if (type >= END_OF_TYPES) {
1382 cout << "Skipping unknown object section type" << std::endl;
1383 continue;
1384 }
1385 switch(type) {
1386 case TYPE_DATA:
1387 if (dry_run) break;
1388 ret = get_data(store, coll, ob.hoid, t, ebl);
1389 if (ret) return ret;
1390 break;
1391 case TYPE_ATTRS:
1392 if (dry_run) break;
1393 ret = get_attrs(store, coll, ob.hoid, t, ebl, driver, mapper);
1394 if (ret) return ret;
1395 break;
1396 case TYPE_OMAP_HDR:
1397 if (dry_run) break;
1398 ret = get_omap_hdr(store, coll, ob.hoid, t, ebl);
1399 if (ret) return ret;
1400 break;
1401 case TYPE_OMAP:
1402 if (dry_run) break;
1403 ret = get_omap(store, coll, ob.hoid, t, ebl);
1404 if (ret) return ret;
1405 break;
1406 case TYPE_OBJECT_END:
1407 done = true;
1408 break;
1409 default:
1410 cerr << "Unknown section type " << type << std::endl;
1411 return -EFAULT;
1412 }
1413 }
1414 if (!dry_run) {
1415 wait_until_done(t, [&] {
1416 store->queue_transaction(ch, std::move(*t));
1417 ch->flush();
1418 });
1419 }
1420 return 0;
1421 }
1422
1423 int dump_pg_metadata(Formatter *formatter, bufferlist &bl, metadata_section &ms)
1424 {
1425 auto ebliter = bl.cbegin();
1426 ms.decode(ebliter);
1427
1428 formatter->open_object_section("metadata_section");
1429
1430 formatter->dump_unsigned("pg_disk_version", (int)ms.struct_ver);
1431 formatter->dump_unsigned("map_epoch", ms.map_epoch);
1432
1433 formatter->open_object_section("OSDMap");
1434 ms.osdmap.dump(formatter);
1435 formatter->close_section();
1436 formatter->flush(cout);
1437 cout << std::endl;
1438
1439 formatter->open_object_section("info");
1440 ms.info.dump(formatter);
1441 formatter->close_section();
1442 formatter->flush(cout);
1443
1444 formatter->open_object_section("log");
1445 ms.log.dump(formatter);
1446 formatter->close_section();
1447 formatter->flush(cout);
1448
1449 formatter->open_object_section("pg_missing_t");
1450 ms.missing.dump(formatter);
1451 formatter->close_section();
1452
1453 // XXX: ms.past_intervals?
1454
1455 formatter->close_section();
1456 formatter->flush(cout);
1457
1458 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1459 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1460 return -EFAULT;
1461 }
1462
1463 return 0;
1464 }
1465
1466 int get_pg_metadata(ObjectStore *store, bufferlist &bl, metadata_section &ms,
1467 const OSDSuperblock& sb, spg_t pgid)
1468 {
1469 auto ebliter = bl.cbegin();
1470 ms.decode(ebliter);
1471 spg_t old_pgid = ms.info.pgid;
1472 ms.info.pgid = pgid;
1473
1474 if (debug) {
1475 cout << "export pgid " << old_pgid << std::endl;
1476 cout << "struct_v " << (int)ms.struct_ver << std::endl;
1477 cout << "map epoch " << ms.map_epoch << std::endl;
1478
1479 #ifdef DIAGNOSTIC
1480 Formatter *formatter = new JSONFormatter(true);
1481 formatter->open_object_section("stuff");
1482
1483 formatter->open_object_section("importing OSDMap");
1484 ms.osdmap.dump(formatter);
1485 formatter->close_section();
1486 formatter->flush(cout);
1487 cout << std::endl;
1488
1489 cout << "osd current epoch " << sb.current_epoch << std::endl;
1490
1491 formatter->open_object_section("info");
1492 ms.info.dump(formatter);
1493 formatter->close_section();
1494 formatter->flush(cout);
1495 cout << std::endl;
1496
1497 formatter->open_object_section("log");
1498 ms.log.dump(formatter);
1499 formatter->close_section();
1500 formatter->flush(cout);
1501 cout << std::endl;
1502
1503 formatter->close_section();
1504 formatter->flush(cout);
1505 cout << std::endl;
1506 #endif
1507 }
1508
1509 if (ms.osdmap.get_epoch() != 0 && ms.map_epoch != ms.osdmap.get_epoch()) {
1510 cerr << "FATAL: Invalid OSDMap epoch in export data" << std::endl;
1511 return -EFAULT;
1512 }
1513
1514 if (ms.map_epoch > sb.current_epoch) {
1515 cerr << "ERROR: Export PG's map_epoch " << ms.map_epoch << " > OSD's epoch " << sb.current_epoch << std::endl;
1516 cerr << "The OSD you are using is older than the exported PG" << std::endl;
1517 cerr << "Either use another OSD or join selected OSD to cluster to update it first" << std::endl;
1518 return -EINVAL;
1519 }
1520
1521 // Old exports didn't include OSDMap
1522 if (ms.osdmap.get_epoch() == 0) {
1523 cerr << "WARNING: No OSDMap in old export, this is an ancient export."
1524 " Not supported." << std::endl;
1525 return -EINVAL;
1526 }
1527
1528 if (ms.osdmap.get_epoch() < sb.oldest_map) {
1529 cerr << "PG export's map " << ms.osdmap.get_epoch()
1530 << " is older than OSD's oldest_map " << sb.oldest_map << std::endl;
1531 if (!force) {
1532 cerr << " pass --force to proceed anyway (with incomplete PastIntervals)"
1533 << std::endl;
1534 return -EINVAL;
1535 }
1536 }
1537 if (debug) {
1538 cerr << "Import pgid " << ms.info.pgid << std::endl;
1539 cerr << "Previous past_intervals " << ms.past_intervals << std::endl;
1540 cerr << "history.same_interval_since "
1541 << ms.info.history.same_interval_since << std::endl;
1542 }
1543
1544 return 0;
1545 }
1546
1547 // out: pg_log_t that only has entries that apply to import_pgid using curmap
1548 // reject: Entries rejected from "in" are in the reject.log. Other fields not set.
1549 void filter_divergent_priors(spg_t import_pgid, const OSDMap &curmap,
1550 const string &hit_set_namespace, const divergent_priors_t &in,
1551 divergent_priors_t &out, divergent_priors_t &reject)
1552 {
1553 out.clear();
1554 reject.clear();
1555
1556 for (divergent_priors_t::const_iterator i = in.begin();
1557 i != in.end(); ++i) {
1558
1559 // Reject divergent priors for temporary objects
1560 if (i->second.is_temp()) {
1561 reject.insert(*i);
1562 continue;
1563 }
1564
1565 if (i->second.nspace != hit_set_namespace) {
1566 object_t oid = i->second.oid;
1567 object_locator_t loc(i->second);
1568 pg_t raw_pgid = curmap.object_locator_to_pg(oid, loc);
1569 pg_t pgid = curmap.raw_pg_to_pg(raw_pgid);
1570
1571 if (import_pgid.pgid == pgid) {
1572 out.insert(*i);
1573 } else {
1574 reject.insert(*i);
1575 }
1576 } else {
1577 out.insert(*i);
1578 }
1579 }
1580 }
1581
1582 int ObjectStoreTool::dump_export(Formatter *formatter)
1583 {
1584 bufferlist ebl;
1585 pg_info_t info;
1586 PGLog::IndexedLog log;
1587 //bool skipped_objects = false;
1588
1589 int ret = read_super();
1590 if (ret)
1591 return ret;
1592
1593 if (sh.magic != super_header::super_magic) {
1594 cerr << "Invalid magic number" << std::endl;
1595 return -EFAULT;
1596 }
1597
1598 if (sh.version > super_header::super_ver) {
1599 cerr << "Can't handle export format version=" << sh.version << std::endl;
1600 return -EINVAL;
1601 }
1602
1603 formatter->open_object_section("Export");
1604
1605 //First section must be TYPE_PG_BEGIN
1606 sectiontype_t type;
1607 ret = read_section(&type, &ebl);
1608 if (ret)
1609 return ret;
1610 if (type == TYPE_POOL_BEGIN) {
1611 cerr << "Dump of pool exports not supported" << std::endl;
1612 return -EINVAL;
1613 } else if (type != TYPE_PG_BEGIN) {
1614 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1615 return -EFAULT;
1616 }
1617
1618 auto ebliter = ebl.cbegin();
1619 pg_begin pgb;
1620 pgb.decode(ebliter);
1621 spg_t pgid = pgb.pgid;
1622
1623 formatter->dump_string("pgid", stringify(pgid));
1624 formatter->dump_string("cluster_fsid", stringify(pgb.superblock.cluster_fsid));
1625 formatter->dump_string("features", stringify(pgb.superblock.compat_features));
1626
1627 bool done = false;
1628 bool found_metadata = false;
1629 metadata_section ms;
1630 bool objects_started = false;
1631 while(!done) {
1632 ret = read_section(&type, &ebl);
1633 if (ret)
1634 return ret;
1635
1636 if (debug) {
1637 cerr << "dump_export: Section type " << std::to_string(type) << std::endl;
1638 }
1639 if (type >= END_OF_TYPES) {
1640 cerr << "Skipping unknown section type" << std::endl;
1641 continue;
1642 }
1643 switch(type) {
1644 case TYPE_OBJECT_BEGIN:
1645 if (!objects_started) {
1646 formatter->open_array_section("objects");
1647 objects_started = true;
1648 }
1649 ret = dump_object(formatter, ebl);
1650 if (ret) return ret;
1651 break;
1652 case TYPE_PG_METADATA:
1653 if (objects_started)
1654 cerr << "WARNING: metadata_section out of order" << std::endl;
1655 ret = dump_pg_metadata(formatter, ebl, ms);
1656 if (ret) return ret;
1657 found_metadata = true;
1658 break;
1659 case TYPE_PG_END:
1660 if (objects_started) {
1661 formatter->close_section();
1662 }
1663 done = true;
1664 break;
1665 default:
1666 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1667 return -EFAULT;
1668 }
1669 }
1670
1671 if (!found_metadata) {
1672 cerr << "Missing metadata section" << std::endl;
1673 return -EFAULT;
1674 }
1675
1676 formatter->close_section();
1677 formatter->flush(cout);
1678
1679 return 0;
1680 }
1681
1682 int ObjectStoreTool::do_import(ObjectStore *store, OSDSuperblock& sb,
1683 bool force, std::string pgidstr)
1684 {
1685 bufferlist ebl;
1686 pg_info_t info;
1687 PGLog::IndexedLog log;
1688 bool skipped_objects = false;
1689
1690 if (!dry_run)
1691 finish_remove_pgs(store);
1692
1693 int ret = read_super();
1694 if (ret)
1695 return ret;
1696
1697 if (sh.magic != super_header::super_magic) {
1698 cerr << "Invalid magic number" << std::endl;
1699 return -EFAULT;
1700 }
1701
1702 if (sh.version > super_header::super_ver) {
1703 cerr << "Can't handle export format version=" << sh.version << std::endl;
1704 return -EINVAL;
1705 }
1706
1707 //First section must be TYPE_PG_BEGIN
1708 sectiontype_t type;
1709 ret = read_section(&type, &ebl);
1710 if (ret)
1711 return ret;
1712 if (type == TYPE_POOL_BEGIN) {
1713 cerr << "Pool exports cannot be imported into a PG" << std::endl;
1714 return -EINVAL;
1715 } else if (type != TYPE_PG_BEGIN) {
1716 cerr << "Invalid first section type " << std::to_string(type) << std::endl;
1717 return -EFAULT;
1718 }
1719
1720 auto ebliter = ebl.cbegin();
1721 pg_begin pgb;
1722 pgb.decode(ebliter);
1723 spg_t pgid = pgb.pgid;
1724
1725 if (pgidstr.length()) {
1726 spg_t user_pgid;
1727
1728 bool ok = user_pgid.parse(pgidstr.c_str());
1729 // This succeeded in main() already
1730 ceph_assert(ok);
1731 if (pgid != user_pgid) {
1732 cerr << "specified pgid " << user_pgid
1733 << " does not match actual pgid " << pgid << std::endl;
1734 return -EINVAL;
1735 }
1736 }
1737
1738 if (!pgb.superblock.cluster_fsid.is_zero()
1739 && pgb.superblock.cluster_fsid != sb.cluster_fsid) {
1740 cerr << "Export came from different cluster with fsid "
1741 << pgb.superblock.cluster_fsid << std::endl;
1742 return -EINVAL;
1743 }
1744
1745 if (debug) {
1746 cerr << "Exported features: " << pgb.superblock.compat_features << std::endl;
1747 }
1748
1749 // Special case: Old export has SHARDS incompat feature on replicated pg, removqqe it
1750 if (pgid.is_no_shard())
1751 pgb.superblock.compat_features.incompat.remove(CEPH_OSD_FEATURE_INCOMPAT_SHARDS);
1752
1753 if (sb.compat_features.compare(pgb.superblock.compat_features) == -1) {
1754 CompatSet unsupported = sb.compat_features.unsupported(pgb.superblock.compat_features);
1755
1756 cerr << "Export has incompatible features set " << unsupported << std::endl;
1757
1758 // Let them import if they specify the --force option
1759 if (!force)
1760 return 11; // Positive return means exit status
1761 }
1762
1763 // we need the latest OSDMap to check for collisions
1764 OSDMap curmap;
1765 bufferlist bl;
1766 ret = get_osdmap(store, sb.current_epoch, curmap, bl);
1767 if (ret) {
1768 cerr << "Can't find latest local OSDMap " << sb.current_epoch << std::endl;
1769 return ret;
1770 }
1771 if (!curmap.have_pg_pool(pgid.pgid.m_pool)) {
1772 cerr << "Pool " << pgid.pgid.m_pool << " no longer exists" << std::endl;
1773 // Special exit code for this error, used by test code
1774 return 10; // Positive return means exit status
1775 }
1776
1777 pool_pg_num_history_t pg_num_history;
1778 get_pg_num_history(store, &pg_num_history);
1779
1780 ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
1781
1782 // Check for PG already present.
1783 coll_t coll(pgid);
1784 if (store->collection_exists(coll)) {
1785 cerr << "pgid " << pgid << " already exists" << std::endl;
1786 return -EEXIST;
1787 }
1788
1789 ObjectStore::CollectionHandle ch;
1790
1791 OSDriver driver(
1792 store,
1793 coll_t(),
1794 OSD::make_snapmapper_oid());
1795 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pgid.shard);
1796
1797 cout << "Importing pgid " << pgid;
1798 cout << std::endl;
1799
1800 bool done = false;
1801 bool found_metadata = false;
1802 metadata_section ms;
1803 while(!done) {
1804 ret = read_section(&type, &ebl);
1805 if (ret)
1806 return ret;
1807
1808 if (debug) {
1809 cout << __func__ << ": Section type " << std::to_string(type) << std::endl;
1810 }
1811 if (type >= END_OF_TYPES) {
1812 cout << "Skipping unknown section type" << std::endl;
1813 continue;
1814 }
1815 switch(type) {
1816 case TYPE_OBJECT_BEGIN:
1817 ceph_assert(found_metadata);
1818 ret = get_object(store, driver, mapper, coll, ebl, ms.osdmap,
1819 &skipped_objects);
1820 if (ret) return ret;
1821 break;
1822 case TYPE_PG_METADATA:
1823 ret = get_pg_metadata(store, ebl, ms, sb, pgid);
1824 if (ret) return ret;
1825 found_metadata = true;
1826
1827 if (pgid != ms.info.pgid) {
1828 cerr << "specified pgid " << pgid << " does not match import file pgid "
1829 << ms.info.pgid << std::endl;
1830 return -EINVAL;
1831 }
1832
1833 // make sure there are no conflicting splits or merges
1834 if (ms.osdmap.have_pg_pool(pgid.pgid.pool())) {
1835 auto p = pg_num_history.pg_nums.find(pgid.pgid.m_pool);
1836 if (p != pg_num_history.pg_nums.end() &&
1837 !p->second.empty()) {
1838 unsigned start_pg_num = ms.osdmap.get_pg_num(pgid.pgid.pool());
1839 unsigned pg_num = start_pg_num;
1840 for (auto q = p->second.lower_bound(ms.map_epoch);
1841 q != p->second.end();
1842 ++q) {
1843 unsigned new_pg_num = q->second;
1844 cout << "pool " << pgid.pgid.pool() << " pg_num " << pg_num
1845 << " -> " << new_pg_num << std::endl;
1846
1847 // check for merge target
1848 spg_t target;
1849 if (pgid.is_merge_source(pg_num, new_pg_num, &target)) {
1850 // FIXME: this checks assumes the OSD's PG is at the OSD's
1851 // map epoch; it could be, say, at *our* epoch, pre-merge.
1852 coll_t coll(target);
1853 if (store->collection_exists(coll)) {
1854 cerr << "pgid " << pgid << " merges to target " << target
1855 << " which already exists" << std::endl;
1856 return 12;
1857 }
1858 }
1859
1860 // check for split children
1861 set<spg_t> children;
1862 if (pgid.is_split(start_pg_num, new_pg_num, &children)) {
1863 cerr << " children are " << children << std::endl;
1864 for (auto child : children) {
1865 coll_t coll(child);
1866 if (store->collection_exists(coll)) {
1867 cerr << "pgid " << pgid << " splits to " << children
1868 << " and " << child << " exists" << std::endl;
1869 return 12;
1870 }
1871 }
1872 }
1873 pg_num = new_pg_num;
1874 }
1875 }
1876 } else {
1877 cout << "pool " << pgid.pgid.pool() << " doesn't existing, not checking"
1878 << " for splits or mergers" << std::endl;
1879 }
1880
1881 if (!dry_run) {
1882 ObjectStore::Transaction t;
1883 ch = store->create_new_collection(coll);
1884 create_pg_collection(
1885 t, pgid,
1886 pgid.get_split_bits(ms.osdmap.get_pg_pool(pgid.pool())->get_pg_num()));
1887 init_pg_ondisk(t, pgid, NULL);
1888
1889 // mark this coll for removal until we're done
1890 map<string,bufferlist> values;
1891 encode((char)1, values["_remove"]);
1892 t.omap_setkeys(coll, pgid.make_pgmeta_oid(), values);
1893
1894 store->queue_transaction(ch, std::move(t));
1895 }
1896
1897 break;
1898 case TYPE_PG_END:
1899 ceph_assert(found_metadata);
1900 done = true;
1901 break;
1902 default:
1903 cerr << "Unknown section type " << std::to_string(type) << std::endl;
1904 return -EFAULT;
1905 }
1906 }
1907
1908 if (!found_metadata) {
1909 cerr << "Missing metadata section" << std::endl;
1910 return -EFAULT;
1911 }
1912
1913 ObjectStore::Transaction t;
1914 if (!dry_run) {
1915 pg_log_t newlog, reject;
1916 pg_log_t::filter_log(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
1917 ms.log, newlog, reject);
1918 if (debug) {
1919 for (list<pg_log_entry_t>::iterator i = newlog.log.begin();
1920 i != newlog.log.end(); ++i)
1921 cerr << "Keeping log entry " << *i << std::endl;
1922 for (list<pg_log_entry_t>::iterator i = reject.log.begin();
1923 i != reject.log.end(); ++i)
1924 cerr << "Skipping log entry " << *i << std::endl;
1925 }
1926
1927 divergent_priors_t newdp, rejectdp;
1928 filter_divergent_priors(pgid, ms.osdmap, g_ceph_context->_conf->osd_hit_set_namespace,
1929 ms.divergent_priors, newdp, rejectdp);
1930 ms.divergent_priors = newdp;
1931 if (debug) {
1932 for (divergent_priors_t::iterator i = newdp.begin();
1933 i != newdp.end(); ++i)
1934 cerr << "Keeping divergent_prior " << *i << std::endl;
1935 for (divergent_priors_t::iterator i = rejectdp.begin();
1936 i != rejectdp.end(); ++i)
1937 cerr << "Skipping divergent_prior " << *i << std::endl;
1938 }
1939
1940 ms.missing.filter_objects([&](const hobject_t &obj) {
1941 if (obj.nspace == g_ceph_context->_conf->osd_hit_set_namespace)
1942 return false;
1943 ceph_assert(!obj.is_temp());
1944 object_t oid = obj.oid;
1945 object_locator_t loc(obj);
1946 pg_t raw_pgid = ms.osdmap.object_locator_to_pg(oid, loc);
1947 pg_t _pgid = ms.osdmap.raw_pg_to_pg(raw_pgid);
1948
1949 return pgid.pgid != _pgid;
1950 });
1951
1952
1953 if (debug) {
1954 pg_missing_t missing;
1955 Formatter *formatter = Formatter::create("json-pretty");
1956 dump_log(formatter, cerr, newlog, ms.missing);
1957 delete formatter;
1958 }
1959
1960 // Just like a split invalidate stats since the object count is changed
1961 if (skipped_objects)
1962 ms.info.stats.stats_invalid = true;
1963
1964 ret = write_pg(
1965 t,
1966 ms.map_epoch,
1967 ms.info,
1968 newlog,
1969 ms.past_intervals,
1970 ms.divergent_priors,
1971 ms.missing);
1972 if (ret) return ret;
1973 }
1974
1975 // done, clear removal flag
1976 if (debug)
1977 cerr << "done, clearing removal flag" << std::endl;
1978
1979 if (!dry_run) {
1980 t.omap_rmkey(coll, pgid.make_pgmeta_oid(), "_remove");
1981 wait_until_done(&t, [&] {
1982 store->queue_transaction(ch, std::move(t));
1983 // make sure we flush onreadable items before mapper/driver are destroyed.
1984 ch->flush();
1985 });
1986 }
1987 return 0;
1988 }
1989
1990 int do_list(ObjectStore *store, string pgidstr, string object, boost::optional<std::string> nspace,
1991 Formatter *formatter, bool debug, bool human_readable, bool head)
1992 {
1993 int r;
1994 lookup_ghobject lookup(object, nspace, head);
1995 if (pgidstr.length() > 0) {
1996 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
1997 } else {
1998 r = action_on_all_objects(store, lookup, debug);
1999 }
2000 if (r)
2001 return r;
2002 lookup.dump(formatter, human_readable);
2003 formatter->flush(cout);
2004 return 0;
2005 }
2006
2007 int do_list_slow(ObjectStore *store, string pgidstr, string object,
2008 double threshold, Formatter *formatter, bool debug, bool human_readable)
2009 {
2010 int r;
2011 lookup_slow_ghobject lookup(object, threshold);
2012 if (pgidstr.length() > 0) {
2013 r = action_on_all_objects_in_pg(store, pgidstr, lookup, debug);
2014 } else {
2015 r = action_on_all_objects(store, lookup, debug);
2016 }
2017 if (r)
2018 return r;
2019 lookup.dump(formatter, human_readable);
2020 formatter->flush(cout);
2021 return 0;
2022 }
2023
2024 int do_meta(ObjectStore *store, string object, Formatter *formatter, bool debug, bool human_readable)
2025 {
2026 int r;
2027 boost::optional<std::string> nspace; // Not specified
2028 lookup_ghobject lookup(object, nspace);
2029 r = action_on_all_objects_in_exact_pg(store, coll_t::meta(), lookup, debug);
2030 if (r)
2031 return r;
2032 lookup.dump(formatter, human_readable);
2033 formatter->flush(cout);
2034 return 0;
2035 }
2036
2037 enum rmtype {
2038 BOTH,
2039 SNAPMAP,
2040 NOSNAPMAP
2041 };
2042
2043 int remove_object(coll_t coll, ghobject_t &ghobj,
2044 SnapMapper &mapper,
2045 MapCacher::Transaction<std::string, bufferlist> *_t,
2046 ObjectStore::Transaction *t,
2047 enum rmtype type)
2048 {
2049 if (type == BOTH || type == SNAPMAP) {
2050 int r = mapper.remove_oid(ghobj.hobj, _t);
2051 if (r < 0 && r != -ENOENT) {
2052 cerr << "remove_oid returned " << cpp_strerror(r) << std::endl;
2053 return r;
2054 }
2055 }
2056
2057 if (type == BOTH || type == NOSNAPMAP) {
2058 t->remove(coll, ghobj);
2059 }
2060 return 0;
2061 }
2062
2063 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent);
2064
2065 int do_remove_object(ObjectStore *store, coll_t coll,
2066 ghobject_t &ghobj, bool all, bool force, enum rmtype type)
2067 {
2068 auto ch = store->open_collection(coll);
2069 spg_t pg;
2070 coll.is_pg_prefix(&pg);
2071 OSDriver driver(
2072 store,
2073 coll_t(),
2074 OSD::make_snapmapper_oid());
2075 SnapMapper mapper(g_ceph_context, &driver, 0, 0, 0, pg.shard);
2076 struct stat st;
2077
2078 int r = store->stat(ch, ghobj, &st);
2079 if (r < 0) {
2080 cerr << "remove: " << cpp_strerror(r) << std::endl;
2081 return r;
2082 }
2083
2084 SnapSet ss;
2085 if (ghobj.hobj.has_snapset()) {
2086 r = get_snapset(store, coll, ghobj, ss, false);
2087 if (r < 0) {
2088 cerr << "Can't get snapset error " << cpp_strerror(r) << std::endl;
2089 // If --force and bad snapset let them remove the head
2090 if (!(force && !all))
2091 return r;
2092 }
2093 // cout << "snapset " << ss << std::endl;
2094 if (!ss.clone_snaps.empty() && !all) {
2095 if (force) {
2096 cout << "WARNING: only removing "
2097 << (ghobj.hobj.is_head() ? "head" : "snapdir")
2098 << " with clones present" << std::endl;
2099 ss.clone_snaps.clear();
2100 } else {
2101 cerr << "Clones are present, use removeall to delete everything"
2102 << std::endl;
2103 return -EINVAL;
2104 }
2105 }
2106 }
2107
2108 ObjectStore::Transaction t;
2109 OSDriver::OSTransaction _t(driver.get_transaction(&t));
2110
2111 ghobject_t snapobj = ghobj;
2112 for (auto& p : ss.clone_snaps) {
2113 snapobj.hobj.snap = p.first;
2114 cout << "remove clone " << snapobj << std::endl;
2115 if (!dry_run) {
2116 r = remove_object(coll, snapobj, mapper, &_t, &t, type);
2117 if (r < 0)
2118 return r;
2119 }
2120 }
2121
2122 cout << "remove " << ghobj << std::endl;
2123
2124 if (!dry_run) {
2125 r = remove_object(coll, ghobj, mapper, &_t, &t, type);
2126 if (r < 0)
2127 return r;
2128 }
2129
2130 if (!dry_run) {
2131 wait_until_done(&t, [&] {
2132 store->queue_transaction(ch, std::move(t));
2133 ch->flush();
2134 });
2135 }
2136 return 0;
2137 }
2138
2139 int do_list_attrs(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2140 {
2141 auto ch = store->open_collection(coll);
2142 map<string,bufferptr> aset;
2143 int r = store->getattrs(ch, ghobj, aset);
2144 if (r < 0) {
2145 cerr << "getattrs: " << cpp_strerror(r) << std::endl;
2146 return r;
2147 }
2148
2149 for (map<string,bufferptr>::iterator i = aset.begin();i != aset.end(); ++i) {
2150 string key(i->first);
2151 if (outistty)
2152 key = cleanbin(key);
2153 cout << key << std::endl;
2154 }
2155 return 0;
2156 }
2157
2158 int do_list_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2159 {
2160 auto ch = store->open_collection(coll);
2161 ObjectMap::ObjectMapIterator iter = store->get_omap_iterator(ch, ghobj);
2162 if (!iter) {
2163 cerr << "omap_get_iterator: " << cpp_strerror(ENOENT) << std::endl;
2164 return -ENOENT;
2165 }
2166 iter->seek_to_first();
2167 map<string, bufferlist> oset;
2168 while(iter->valid()) {
2169 get_omap_batch(iter, oset);
2170
2171 for (map<string,bufferlist>::iterator i = oset.begin();i != oset.end(); ++i) {
2172 string key(i->first);
2173 if (outistty)
2174 key = cleanbin(key);
2175 cout << key << std::endl;
2176 }
2177 }
2178 return 0;
2179 }
2180
2181 int do_get_bytes(ObjectStore *store, coll_t coll, ghobject_t &ghobj, int fd)
2182 {
2183 auto ch = store->open_collection(coll);
2184 struct stat st;
2185 mysize_t total;
2186
2187 int ret = store->stat(ch, ghobj, &st);
2188 if (ret < 0) {
2189 cerr << "get-bytes: " << cpp_strerror(ret) << std::endl;
2190 return ret;
2191 }
2192
2193 total = st.st_size;
2194 if (debug)
2195 cerr << "size=" << total << std::endl;
2196
2197 uint64_t offset = 0;
2198 bufferlist rawdatabl;
2199 while(total > 0) {
2200 rawdatabl.clear();
2201 mysize_t len = max_read;
2202 if (len > total)
2203 len = total;
2204
2205 ret = store->read(ch, ghobj, offset, len, rawdatabl);
2206 if (ret < 0)
2207 return ret;
2208 if (ret == 0)
2209 return -EINVAL;
2210
2211 if (debug)
2212 cerr << "data section offset=" << offset << " len=" << len << std::endl;
2213
2214 total -= ret;
2215 offset += ret;
2216
2217 ret = write(fd, rawdatabl.c_str(), ret);
2218 if (ret == -1) {
2219 perror("write");
2220 return -errno;
2221 }
2222 }
2223
2224 return 0;
2225 }
2226
2227 int do_set_bytes(ObjectStore *store, coll_t coll,
2228 ghobject_t &ghobj, int fd)
2229 {
2230 ObjectStore::Transaction tran;
2231 ObjectStore::Transaction *t = &tran;
2232
2233 if (debug)
2234 cerr << "Write " << ghobj << std::endl;
2235
2236 if (!dry_run) {
2237 t->touch(coll, ghobj);
2238 t->truncate(coll, ghobj, 0);
2239 }
2240
2241 uint64_t offset = 0;
2242 bufferlist rawdatabl;
2243 do {
2244 rawdatabl.clear();
2245 ssize_t bytes = rawdatabl.read_fd(fd, max_read);
2246 if (bytes < 0) {
2247 cerr << "read_fd error " << cpp_strerror(bytes) << std::endl;
2248 return bytes;
2249 }
2250
2251 if (bytes == 0)
2252 break;
2253
2254 if (debug)
2255 cerr << "\tdata: offset " << offset << " bytes " << bytes << std::endl;
2256 if (!dry_run)
2257 t->write(coll, ghobj, offset, bytes, rawdatabl);
2258
2259 offset += bytes;
2260 // XXX: Should we queue_transaction() every once in a while for very large files
2261 } while(true);
2262
2263 auto ch = store->open_collection(coll);
2264 if (!dry_run)
2265 store->queue_transaction(ch, std::move(*t));
2266 return 0;
2267 }
2268
2269 int do_get_attr(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2270 {
2271 auto ch = store->open_collection(coll);
2272 bufferptr bp;
2273
2274 int r = store->getattr(ch, ghobj, key.c_str(), bp);
2275 if (r < 0) {
2276 cerr << "getattr: " << cpp_strerror(r) << std::endl;
2277 return r;
2278 }
2279
2280 string value(bp.c_str(), bp.length());
2281 if (outistty) {
2282 value = cleanbin(value);
2283 value.push_back('\n');
2284 }
2285 cout << value;
2286
2287 return 0;
2288 }
2289
2290 int do_set_attr(ObjectStore *store, coll_t coll,
2291 ghobject_t &ghobj, string key, int fd)
2292 {
2293 ObjectStore::Transaction tran;
2294 ObjectStore::Transaction *t = &tran;
2295 bufferlist bl;
2296
2297 if (debug)
2298 cerr << "Setattr " << ghobj << std::endl;
2299
2300 int ret = get_fd_data(fd, bl);
2301 if (ret < 0)
2302 return ret;
2303
2304 if (dry_run)
2305 return 0;
2306
2307 t->touch(coll, ghobj);
2308
2309 t->setattr(coll, ghobj, key, bl);
2310
2311 auto ch = store->open_collection(coll);
2312 store->queue_transaction(ch, std::move(*t));
2313 return 0;
2314 }
2315
2316 int do_rm_attr(ObjectStore *store, coll_t coll,
2317 ghobject_t &ghobj, string key)
2318 {
2319 ObjectStore::Transaction tran;
2320 ObjectStore::Transaction *t = &tran;
2321
2322 if (debug)
2323 cerr << "Rmattr " << ghobj << std::endl;
2324
2325 if (dry_run)
2326 return 0;
2327
2328 t->rmattr(coll, ghobj, key);
2329
2330 auto ch = store->open_collection(coll);
2331 store->queue_transaction(ch, std::move(*t));
2332 return 0;
2333 }
2334
2335 int do_get_omap(ObjectStore *store, coll_t coll, ghobject_t &ghobj, string key)
2336 {
2337 auto ch = store->open_collection(coll);
2338 set<string> keys;
2339 map<string, bufferlist> out;
2340
2341 keys.insert(key);
2342
2343 int r = store->omap_get_values(ch, ghobj, keys, &out);
2344 if (r < 0) {
2345 cerr << "omap_get_values: " << cpp_strerror(r) << std::endl;
2346 return r;
2347 }
2348
2349 if (out.empty()) {
2350 cerr << "Key not found" << std::endl;
2351 return -ENOENT;
2352 }
2353
2354 ceph_assert(out.size() == 1);
2355
2356 bufferlist bl = out.begin()->second;
2357 string value(bl.c_str(), bl.length());
2358 if (outistty) {
2359 value = cleanbin(value);
2360 value.push_back('\n');
2361 }
2362 cout << value;
2363
2364 return 0;
2365 }
2366
2367 int do_set_omap(ObjectStore *store, coll_t coll,
2368 ghobject_t &ghobj, string key, int fd)
2369 {
2370 ObjectStore::Transaction tran;
2371 ObjectStore::Transaction *t = &tran;
2372 map<string, bufferlist> attrset;
2373 bufferlist valbl;
2374
2375 if (debug)
2376 cerr << "Set_omap " << ghobj << std::endl;
2377
2378 int ret = get_fd_data(fd, valbl);
2379 if (ret < 0)
2380 return ret;
2381
2382 attrset.insert(pair<string, bufferlist>(key, valbl));
2383
2384 if (dry_run)
2385 return 0;
2386
2387 t->touch(coll, ghobj);
2388
2389 t->omap_setkeys(coll, ghobj, attrset);
2390
2391 auto ch = store->open_collection(coll);
2392 store->queue_transaction(ch, std::move(*t));
2393 return 0;
2394 }
2395
2396 int do_rm_omap(ObjectStore *store, coll_t coll,
2397 ghobject_t &ghobj, string key)
2398 {
2399 ObjectStore::Transaction tran;
2400 ObjectStore::Transaction *t = &tran;
2401
2402 if (debug)
2403 cerr << "Rm_omap " << ghobj << std::endl;
2404
2405 if (dry_run)
2406 return 0;
2407
2408 t->omap_rmkey(coll, ghobj, key);
2409
2410 auto ch = store->open_collection(coll);
2411 store->queue_transaction(ch, std::move(*t));
2412 return 0;
2413 }
2414
2415 int do_get_omaphdr(ObjectStore *store, coll_t coll, ghobject_t &ghobj)
2416 {
2417 auto ch = store->open_collection(coll);
2418 bufferlist hdrbl;
2419
2420 int r = store->omap_get_header(ch, ghobj, &hdrbl, true);
2421 if (r < 0) {
2422 cerr << "omap_get_header: " << cpp_strerror(r) << std::endl;
2423 return r;
2424 }
2425
2426 string header(hdrbl.c_str(), hdrbl.length());
2427 if (outistty) {
2428 header = cleanbin(header);
2429 header.push_back('\n');
2430 }
2431 cout << header;
2432
2433 return 0;
2434 }
2435
2436 int do_set_omaphdr(ObjectStore *store, coll_t coll,
2437 ghobject_t &ghobj, int fd)
2438 {
2439 ObjectStore::Transaction tran;
2440 ObjectStore::Transaction *t = &tran;
2441 bufferlist hdrbl;
2442
2443 if (debug)
2444 cerr << "Omap_setheader " << ghobj << std::endl;
2445
2446 int ret = get_fd_data(fd, hdrbl);
2447 if (ret)
2448 return ret;
2449
2450 if (dry_run)
2451 return 0;
2452
2453 t->touch(coll, ghobj);
2454
2455 t->omap_setheader(coll, ghobj, hdrbl);
2456
2457 auto ch = store->open_collection(coll);
2458 store->queue_transaction(ch, std::move(*t));
2459 return 0;
2460 }
2461
2462 struct do_fix_lost : public action_on_object_t {
2463 void call(ObjectStore *store, coll_t coll,
2464 ghobject_t &ghobj, object_info_t &oi) override {
2465 if (oi.is_lost()) {
2466 cout << coll << "/" << ghobj << " is lost";
2467 if (!dry_run)
2468 cout << ", fixing";
2469 cout << std::endl;
2470 if (dry_run)
2471 return;
2472 oi.clear_flag(object_info_t::FLAG_LOST);
2473 bufferlist bl;
2474 encode(oi, bl, -1); /* fixme: using full features */
2475 ObjectStore::Transaction t;
2476 t.setattr(coll, ghobj, OI_ATTR, bl);
2477 auto ch = store->open_collection(coll);
2478 store->queue_transaction(ch, std::move(t));
2479 }
2480 return;
2481 }
2482 };
2483
2484 int get_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj, SnapSet &ss, bool silent = false)
2485 {
2486 auto ch = store->open_collection(coll);
2487 bufferlist attr;
2488 int r = store->getattr(ch, ghobj, SS_ATTR, attr);
2489 if (r < 0) {
2490 if (!silent)
2491 cerr << "Error getting snapset on : " << make_pair(coll, ghobj) << ", "
2492 << cpp_strerror(r) << std::endl;
2493 return r;
2494 }
2495 auto bp = attr.cbegin();
2496 try {
2497 decode(ss, bp);
2498 } catch (...) {
2499 r = -EINVAL;
2500 cerr << "Error decoding snapset on : " << make_pair(coll, ghobj) << ", "
2501 << cpp_strerror(r) << std::endl;
2502 return r;
2503 }
2504 return 0;
2505 }
2506
2507 int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2508 {
2509 auto ch = store->open_collection(coll);
2510 int r = 0;
2511 formatter->open_object_section("obj");
2512 formatter->open_object_section("id");
2513 ghobj.dump(formatter);
2514 formatter->close_section();
2515
2516 bufferlist attr;
2517 int gr = store->getattr(ch, ghobj, OI_ATTR, attr);
2518 if (gr < 0) {
2519 r = gr;
2520 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2521 << cpp_strerror(r) << std::endl;
2522 } else {
2523 object_info_t oi;
2524 auto bp = attr.cbegin();
2525 try {
2526 decode(oi, bp);
2527 formatter->open_object_section("info");
2528 oi.dump(formatter);
2529 formatter->close_section();
2530 } catch (...) {
2531 r = -EINVAL;
2532 cerr << "Error decoding attr on : " << make_pair(coll, ghobj) << ", "
2533 << cpp_strerror(r) << std::endl;
2534 }
2535 }
2536 struct stat st;
2537 int sr = store->stat(ch, ghobj, &st, true);
2538 if (sr < 0) {
2539 r = sr;
2540 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2541 << cpp_strerror(r) << std::endl;
2542 } else {
2543 formatter->open_object_section("stat");
2544 formatter->dump_int("size", st.st_size);
2545 formatter->dump_int("blksize", st.st_blksize);
2546 formatter->dump_int("blocks", st.st_blocks);
2547 formatter->dump_int("nlink", st.st_nlink);
2548 formatter->close_section();
2549 }
2550
2551 if (ghobj.hobj.has_snapset()) {
2552 SnapSet ss;
2553 int snr = get_snapset(store, coll, ghobj, ss);
2554 if (snr < 0) {
2555 r = snr;
2556 } else {
2557 formatter->open_object_section("SnapSet");
2558 ss.dump(formatter);
2559 formatter->close_section();
2560 }
2561 }
2562 bufferlist hattr;
2563 gr = store->getattr(ch, ghobj, ECUtil::get_hinfo_key(), hattr);
2564 if (gr == 0) {
2565 ECUtil::HashInfo hinfo;
2566 auto hp = hattr.cbegin();
2567 try {
2568 decode(hinfo, hp);
2569 formatter->open_object_section("hinfo");
2570 hinfo.dump(formatter);
2571 formatter->close_section();
2572 } catch (...) {
2573 r = -EINVAL;
2574 cerr << "Error decoding hinfo on : " << make_pair(coll, ghobj) << ", "
2575 << cpp_strerror(r) << std::endl;
2576 }
2577 }
2578 gr = store->dump_onode(ch, ghobj, "onode", formatter);
2579
2580 formatter->close_section();
2581 formatter->flush(cout);
2582 cout << std::endl;
2583 return r;
2584 }
2585
2586 int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter)
2587 {
2588 auto ch = store->open_collection(coll);
2589 bufferlist attr;
2590 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2591 if (r < 0) {
2592 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2593 << cpp_strerror(r) << std::endl;
2594 return r;
2595 }
2596 object_info_t oi;
2597 auto bp = attr.cbegin();
2598 try {
2599 decode(oi, bp);
2600 } catch (...) {
2601 r = -EINVAL;
2602 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2603 << cpp_strerror(r) << std::endl;
2604 return r;
2605 }
2606 if (!dry_run) {
2607 attr.clear();
2608 oi.alloc_hint_flags += 0xff;
2609 ObjectStore::Transaction t;
2610 encode(oi, attr, -1); /* fixme: using full features */
2611 t.setattr(coll, ghobj, OI_ATTR, attr);
2612 auto ch = store->open_collection(coll);
2613 r = store->queue_transaction(ch, std::move(t));
2614 if (r < 0) {
2615 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2616 << cpp_strerror(r) << std::endl;
2617 return r;
2618 }
2619 }
2620 return 0;
2621 }
2622
2623 int set_size(
2624 ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
2625 bool corrupt)
2626 {
2627 auto ch = store->open_collection(coll);
2628 if (ghobj.hobj.is_snapdir()) {
2629 cerr << "Can't set the size of a snapdir" << std::endl;
2630 return -EINVAL;
2631 }
2632 bufferlist attr;
2633 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2634 if (r < 0) {
2635 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2636 << cpp_strerror(r) << std::endl;
2637 return r;
2638 }
2639 object_info_t oi;
2640 auto bp = attr.cbegin();
2641 try {
2642 decode(oi, bp);
2643 } catch (...) {
2644 r = -EINVAL;
2645 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2646 << cpp_strerror(r) << std::endl;
2647 return r;
2648 }
2649 struct stat st;
2650 r = store->stat(ch, ghobj, &st, true);
2651 if (r < 0) {
2652 cerr << "Error stat on : " << make_pair(coll, ghobj) << ", "
2653 << cpp_strerror(r) << std::endl;
2654 }
2655 ghobject_t head(ghobj);
2656 SnapSet ss;
2657 bool found_head = true;
2658 map<snapid_t, uint64_t>::iterator csi;
2659 bool is_snap = ghobj.hobj.is_snap();
2660 if (is_snap) {
2661 head.hobj = head.hobj.get_head();
2662 r = get_snapset(store, coll, head, ss, true);
2663 if (r < 0 && r != -ENOENT) {
2664 // Requested get_snapset() silent, so if not -ENOENT show error
2665 cerr << "Error getting snapset on : " << make_pair(coll, head) << ", "
2666 << cpp_strerror(r) << std::endl;
2667 return r;
2668 }
2669 if (r == -ENOENT) {
2670 head.hobj = head.hobj.get_snapdir();
2671 r = get_snapset(store, coll, head, ss);
2672 if (r < 0)
2673 return r;
2674 found_head = false;
2675 } else {
2676 found_head = true;
2677 }
2678 csi = ss.clone_size.find(ghobj.hobj.snap);
2679 if (csi == ss.clone_size.end()) {
2680 cerr << "SnapSet is missing clone_size for snap " << ghobj.hobj.snap << std::endl;
2681 return -EINVAL;
2682 }
2683 }
2684 if ((uint64_t)st.st_size == setsize && oi.size == setsize
2685 && (!is_snap || csi->second == setsize)) {
2686 cout << "Size of object is already " << setsize << std::endl;
2687 return 0;
2688 }
2689 cout << "Setting size to " << setsize << ", stat size " << st.st_size
2690 << ", obj info size " << oi.size;
2691 if (is_snap) {
2692 cout << ", " << (found_head ? "head" : "snapdir")
2693 << " clone_size " << csi->second;
2694 csi->second = setsize;
2695 }
2696 cout << std::endl;
2697 if (!dry_run) {
2698 attr.clear();
2699 oi.size = setsize;
2700 ObjectStore::Transaction t;
2701 // Only modify object info if we want to corrupt it
2702 if (!corrupt && (uint64_t)st.st_size != setsize) {
2703 t.truncate(coll, ghobj, setsize);
2704 // Changing objectstore size will invalidate data_digest, so clear it.
2705 oi.clear_data_digest();
2706 }
2707 encode(oi, attr, -1); /* fixme: using full features */
2708 t.setattr(coll, ghobj, OI_ATTR, attr);
2709 if (is_snap) {
2710 bufferlist snapattr;
2711 snapattr.clear();
2712 encode(ss, snapattr);
2713 t.setattr(coll, head, SS_ATTR, snapattr);
2714 }
2715 auto ch = store->open_collection(coll);
2716 r = store->queue_transaction(ch, std::move(t));
2717 if (r < 0) {
2718 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2719 << cpp_strerror(r) << std::endl;
2720 return r;
2721 }
2722 }
2723 return 0;
2724 }
2725
2726 int clear_data_digest(ObjectStore *store, coll_t coll, ghobject_t &ghobj) {
2727 auto ch = store->open_collection(coll);
2728 bufferlist attr;
2729 int r = store->getattr(ch, ghobj, OI_ATTR, attr);
2730 if (r < 0) {
2731 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2732 << cpp_strerror(r) << std::endl;
2733 return r;
2734 }
2735 object_info_t oi;
2736 auto bp = attr.cbegin();
2737 try {
2738 decode(oi, bp);
2739 } catch (...) {
2740 r = -EINVAL;
2741 cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
2742 << cpp_strerror(r) << std::endl;
2743 return r;
2744 }
2745 if (!dry_run) {
2746 attr.clear();
2747 oi.clear_data_digest();
2748 encode(oi, attr, -1); /* fixme: using full features */
2749 ObjectStore::Transaction t;
2750 t.setattr(coll, ghobj, OI_ATTR, attr);
2751 auto ch = store->open_collection(coll);
2752 r = store->queue_transaction(ch, std::move(t));
2753 if (r < 0) {
2754 cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
2755 << cpp_strerror(r) << std::endl;
2756 return r;
2757 }
2758 }
2759 return 0;
2760 }
2761
2762 int clear_snapset(ObjectStore *store, coll_t coll, ghobject_t &ghobj,
2763 string arg)
2764 {
2765 SnapSet ss;
2766 int ret = get_snapset(store, coll, ghobj, ss);
2767 if (ret < 0)
2768 return ret;
2769
2770 // Use "corrupt" to clear entire SnapSet
2771 // Use "seq" to just corrupt SnapSet.seq
2772 if (arg == "corrupt" || arg == "seq")
2773 ss.seq = 0;
2774 // Use "snaps" to just clear SnapSet.clone_snaps
2775 if (arg == "corrupt" || arg == "snaps")
2776 ss.clone_snaps.clear();
2777 // By default just clear clone, clone_overlap and clone_size
2778 if (arg == "corrupt")
2779 arg = "";
2780 if (arg == "" || arg == "clones")
2781 ss.clones.clear();
2782 if (arg == "" || arg == "clone_overlap")
2783 ss.clone_overlap.clear();
2784 if (arg == "" || arg == "clone_size")
2785 ss.clone_size.clear();
2786 // Break all clone sizes by adding 1
2787 if (arg == "size") {
2788 for (map<snapid_t, uint64_t>::iterator i = ss.clone_size.begin();
2789 i != ss.clone_size.end(); ++i)
2790 ++(i->second);
2791 }
2792
2793 if (!dry_run) {
2794 bufferlist bl;
2795 encode(ss, bl);
2796 ObjectStore::Transaction t;
2797 t.setattr(coll, ghobj, SS_ATTR, bl);
2798 auto ch = store->open_collection(coll);
2799 int r = store->queue_transaction(ch, std::move(t));
2800 if (r < 0) {
2801 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2802 << cpp_strerror(r) << std::endl;
2803 return r;
2804 }
2805 }
2806 return 0;
2807 }
2808
2809 vector<snapid_t>::iterator find(vector<snapid_t> &v, snapid_t clid)
2810 {
2811 return std::find(v.begin(), v.end(), clid);
2812 }
2813
2814 map<snapid_t, interval_set<uint64_t> >::iterator
2815 find(map<snapid_t, interval_set<uint64_t> > &m, snapid_t clid)
2816 {
2817 return m.find(clid);
2818 }
2819
2820 map<snapid_t, uint64_t>::iterator find(map<snapid_t, uint64_t> &m,
2821 snapid_t clid)
2822 {
2823 return m.find(clid);
2824 }
2825
2826 template<class T>
2827 int remove_from(T &mv, string name, snapid_t cloneid, bool force)
2828 {
2829 typename T::iterator i = find(mv, cloneid);
2830 if (i != mv.end()) {
2831 mv.erase(i);
2832 } else {
2833 cerr << "Clone " << cloneid << " doesn't exist in " << name;
2834 if (force) {
2835 cerr << " (ignored)" << std::endl;
2836 return 0;
2837 }
2838 cerr << std::endl;
2839 return -EINVAL;
2840 }
2841 return 0;
2842 }
2843
2844 int remove_clone(
2845 ObjectStore *store, coll_t coll, ghobject_t &ghobj, snapid_t cloneid, bool force)
2846 {
2847 // XXX: Don't allow this if in a cache tier or former cache tier
2848 // bool allow_incomplete_clones() const {
2849 // return cache_mode != CACHEMODE_NONE || has_flag(FLAG_INCOMPLETE_CLONES);
2850
2851 SnapSet snapset;
2852 int ret = get_snapset(store, coll, ghobj, snapset);
2853 if (ret < 0)
2854 return ret;
2855
2856 // Derived from trim_object()
2857 // ...from snapset
2858 vector<snapid_t>::iterator p;
2859 for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p)
2860 if (*p == cloneid)
2861 break;
2862 if (p == snapset.clones.end()) {
2863 cerr << "Clone " << cloneid << " not present";
2864 return -ENOENT;
2865 }
2866 if (p != snapset.clones.begin()) {
2867 // not the oldest... merge overlap into next older clone
2868 vector<snapid_t>::iterator n = p - 1;
2869 hobject_t prev_coid = ghobj.hobj;
2870 prev_coid.snap = *n;
2871 //bool adjust_prev_bytes = is_present_clone(prev_coid);
2872
2873 //if (adjust_prev_bytes)
2874 // ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(*n);
2875
2876 snapset.clone_overlap[*n].intersection_of(
2877 snapset.clone_overlap[*p]);
2878
2879 //if (adjust_prev_bytes)
2880 // ctx->delta_stats.num_bytes += snapset.get_clone_bytes(*n);
2881 }
2882
2883 ret = remove_from(snapset.clones, "clones", cloneid, force);
2884 if (ret) return ret;
2885 ret = remove_from(snapset.clone_overlap, "clone_overlap", cloneid, force);
2886 if (ret) return ret;
2887 ret = remove_from(snapset.clone_size, "clone_size", cloneid, force);
2888 if (ret) return ret;
2889
2890 if (dry_run)
2891 return 0;
2892
2893 bufferlist bl;
2894 encode(snapset, bl);
2895 ObjectStore::Transaction t;
2896 t.setattr(coll, ghobj, SS_ATTR, bl);
2897 auto ch = store->open_collection(coll);
2898 int r = store->queue_transaction(ch, std::move(t));
2899 if (r < 0) {
2900 cerr << "Error setting snapset on : " << make_pair(coll, ghobj) << ", "
2901 << cpp_strerror(r) << std::endl;
2902 return r;
2903 }
2904 cout << "Removal of clone " << cloneid << " complete" << std::endl;
2905 cout << "Use pg repair after OSD restarted to correct stat information" << std::endl;
2906 return 0;
2907 }
2908
2909 int dup(string srcpath, ObjectStore *src, string dstpath, ObjectStore *dst)
2910 {
2911 cout << "dup from " << src->get_type() << ": " << srcpath << "\n"
2912 << " to " << dst->get_type() << ": " << dstpath
2913 << std::endl;
2914 int num, i;
2915 vector<coll_t> collections;
2916 int r;
2917
2918 r = src->mount();
2919 if (r < 0) {
2920 cerr << "failed to mount src: " << cpp_strerror(r) << std::endl;
2921 return r;
2922 }
2923 r = dst->mount();
2924 if (r < 0) {
2925 cerr << "failed to mount dst: " << cpp_strerror(r) << std::endl;
2926 goto out_src;
2927 }
2928
2929 if (src->get_fsid() != dst->get_fsid()) {
2930 cerr << "src fsid " << src->get_fsid() << " != dest " << dst->get_fsid()
2931 << std::endl;
2932 goto out;
2933 }
2934 cout << "fsid " << src->get_fsid() << std::endl;
2935
2936 // make sure dst is empty
2937 r = dst->list_collections(collections);
2938 if (r < 0) {
2939 cerr << "error listing collections on dst: " << cpp_strerror(r) << std::endl;
2940 goto out;
2941 }
2942 if (!collections.empty()) {
2943 cerr << "destination store is not empty" << std::endl;
2944 goto out;
2945 }
2946
2947 r = src->list_collections(collections);
2948 if (r < 0) {
2949 cerr << "error listing collections on src: " << cpp_strerror(r) << std::endl;
2950 goto out;
2951 }
2952
2953 num = collections.size();
2954 cout << num << " collections" << std::endl;
2955 i = 1;
2956 for (auto cid : collections) {
2957 cout << i++ << "/" << num << " " << cid << std::endl;
2958 auto ch = src->open_collection(cid);
2959 auto dch = dst->create_new_collection(cid);
2960 {
2961 ObjectStore::Transaction t;
2962 int bits = src->collection_bits(ch);
2963 if (bits < 0) {
2964 if (src->get_type() == "filestore" && cid.is_meta()) {
2965 bits = 0;
2966 } else {
2967 cerr << "cannot get bit count for collection " << cid << ": "
2968 << cpp_strerror(bits) << std::endl;
2969 goto out;
2970 }
2971 }
2972 t.create_collection(cid, bits);
2973 dst->queue_transaction(dch, std::move(t));
2974 }
2975
2976 ghobject_t pos;
2977 uint64_t n = 0;
2978 uint64_t bytes = 0, keys = 0;
2979 while (true) {
2980 vector<ghobject_t> ls;
2981 r = src->collection_list(ch, pos, ghobject_t::get_max(), 1000, &ls, &pos);
2982 if (r < 0) {
2983 cerr << "collection_list on " << cid << " from " << pos << " got: "
2984 << cpp_strerror(r) << std::endl;
2985 goto out;
2986 }
2987 if (ls.empty()) {
2988 break;
2989 }
2990
2991 for (auto& oid : ls) {
2992 //cout << " " << cid << " " << oid << std::endl;
2993 if (n % 100 == 0) {
2994 cout << " " << std::setw(16) << n << " objects, "
2995 << std::setw(16) << bytes << " bytes, "
2996 << std::setw(16) << keys << " keys"
2997 << std::setw(1) << "\r" << std::flush;
2998 }
2999 n++;
3000
3001 ObjectStore::Transaction t;
3002 t.touch(cid, oid);
3003
3004 map<string,bufferptr> attrs;
3005 src->getattrs(ch, oid, attrs);
3006 if (!attrs.empty()) {
3007 t.setattrs(cid, oid, attrs);
3008 }
3009
3010 bufferlist bl;
3011 src->read(ch, oid, 0, 0, bl);
3012 if (bl.length()) {
3013 t.write(cid, oid, 0, bl.length(), bl);
3014 bytes += bl.length();
3015 }
3016
3017 bufferlist header;
3018 map<string,bufferlist> omap;
3019 src->omap_get(ch, oid, &header, &omap);
3020 if (header.length()) {
3021 t.omap_setheader(cid, oid, header);
3022 ++keys;
3023 }
3024 if (!omap.empty()) {
3025 keys += omap.size();
3026 t.omap_setkeys(cid, oid, omap);
3027 }
3028
3029 dst->queue_transaction(dch, std::move(t));
3030 }
3031 }
3032 cout << " " << std::setw(16) << n << " objects, "
3033 << std::setw(16) << bytes << " bytes, "
3034 << std::setw(16) << keys << " keys"
3035 << std::setw(1) << std::endl;
3036 }
3037
3038 // keyring
3039 cout << "keyring" << std::endl;
3040 {
3041 bufferlist bl;
3042 string s = srcpath + "/keyring";
3043 string err;
3044 r = bl.read_file(s.c_str(), &err);
3045 if (r < 0) {
3046 cerr << "failed to copy " << s << ": " << err << std::endl;
3047 } else {
3048 string d = dstpath + "/keyring";
3049 bl.write_file(d.c_str(), 0600);
3050 }
3051 }
3052
3053 // osd metadata
3054 cout << "duping osd metadata" << std::endl;
3055 {
3056 for (auto k : {"magic", "whoami", "ceph_fsid", "fsid"}) {
3057 string val;
3058 src->read_meta(k, &val);
3059 dst->write_meta(k, val);
3060 }
3061 }
3062
3063 dst->write_meta("ready", "ready");
3064
3065 cout << "done." << std::endl;
3066 r = 0;
3067 out:
3068 dst->umount();
3069 out_src:
3070 src->umount();
3071 return r;
3072 }
3073
3074 void usage(po::options_description &desc)
3075 {
3076 cerr << std::endl;
3077 cerr << desc << std::endl;
3078 cerr << std::endl;
3079 cerr << "Positional syntax:" << std::endl;
3080 cerr << std::endl;
3081 cerr << "ceph-objectstore-tool ... <object> (get|set)-bytes [file]" << std::endl;
3082 cerr << "ceph-objectstore-tool ... <object> set-(attr|omap) <key> [file]" << std::endl;
3083 cerr << "ceph-objectstore-tool ... <object> (get|rm)-(attr|omap) <key>" << std::endl;
3084 cerr << "ceph-objectstore-tool ... <object> get-omaphdr" << std::endl;
3085 cerr << "ceph-objectstore-tool ... <object> set-omaphdr [file]" << std::endl;
3086 cerr << "ceph-objectstore-tool ... <object> list-attrs" << std::endl;
3087 cerr << "ceph-objectstore-tool ... <object> list-omap" << std::endl;
3088 cerr << "ceph-objectstore-tool ... <object> remove|removeall" << std::endl;
3089 cerr << "ceph-objectstore-tool ... <object> dump" << std::endl;
3090 cerr << "ceph-objectstore-tool ... <object> set-size" << std::endl;
3091 cerr << "ceph-objectstore-tool ... <object> clear-data-digest" << std::endl;
3092 cerr << "ceph-objectstore-tool ... <object> remove-clone-metadata <cloneid>" << std::endl;
3093 cerr << std::endl;
3094 cerr << "<object> can be a JSON object description as displayed" << std::endl;
3095 cerr << "by --op list." << std::endl;
3096 cerr << "<object> can be an object name which will be looked up in all" << std::endl;
3097 cerr << "the OSD's PGs." << std::endl;
3098 cerr << "<object> can be the empty string ('') which with a provided pgid " << std::endl;
3099 cerr << "specifies the pgmeta object" << std::endl;
3100 cerr << std::endl;
3101 cerr << "The optional [file] argument will read stdin or write stdout" << std::endl;
3102 cerr << "if not specified or if '-' specified." << std::endl;
3103 }
3104
3105 bool ends_with(const string& check, const string& ending)
3106 {
3107 return check.size() >= ending.size() && check.rfind(ending) == (check.size() - ending.size());
3108 }
3109
3110 // Based on FileStore::dump_journal(), set-up enough to only dump
3111 int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
3112 {
3113 int r;
3114
3115 if (!journalpath.length())
3116 return -EINVAL;
3117
3118 FileJournal *journal = new FileJournal(g_ceph_context, uuid_d(), NULL, NULL,
3119 journalpath.c_str(), m_journal_dio);
3120 r = journal->_fdump(*f, false);
3121 delete journal;
3122 return r;
3123 }
3124
3125 int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
3126 const string &pool_name, const spg_t &pgid, bool dry_run,
3127 int target_level)
3128 {
3129 int r = 0;
3130
3131 FileStore *fs = dynamic_cast<FileStore*>(os);
3132 if (!fs) {
3133 cerr << "Nothing to do for non-filestore backend" << std::endl;
3134 return 0; // making this return success makes testing easier
3135 }
3136
3137 OSDMap curmap;
3138 bufferlist bl;
3139 r = get_osdmap(os, superblock.current_epoch, curmap, bl);
3140 if (r) {
3141 cerr << "Can't find local OSDMap: " << cpp_strerror(r) << std::endl;
3142 return r;
3143 }
3144
3145 int64_t poolid = -1;
3146 if (pool_name.length()) {
3147 poolid = curmap.lookup_pg_pool_name(pool_name);
3148 if (poolid < 0) {
3149 cerr << "Couldn't find pool " << pool_name << ": " << cpp_strerror(poolid)
3150 << std::endl;
3151 return poolid;
3152 }
3153 }
3154
3155 vector<coll_t> collections, filtered_colls;
3156 r = os->list_collections(collections);
3157 if (r < 0) {
3158 cerr << "Error listing collections: " << cpp_strerror(r) << std::endl;
3159 return r;
3160 }
3161
3162 for (auto const &coll : collections) {
3163 spg_t coll_pgid;
3164 if (coll.is_pg(&coll_pgid) &&
3165 ((poolid >= 0 && coll_pgid.pool() == (uint64_t)poolid) ||
3166 coll_pgid == pgid)) {
3167 filtered_colls.push_back(coll);
3168 }
3169 }
3170
3171 size_t done = 0, total = filtered_colls.size();
3172 for (auto const &coll : filtered_colls) {
3173 if (dry_run) {
3174 cerr << "Would apply layout settings to " << coll << std::endl;
3175 } else {
3176 cerr << "Finished " << done << "/" << total << " collections" << "\r";
3177 r = fs->apply_layout_settings(coll, target_level);
3178 if (r < 0) {
3179 cerr << "Error applying layout settings to " << coll << std::endl;
3180 return r;
3181 }
3182 }
3183 ++done;
3184 }
3185
3186 cerr << "Finished " << total << "/" << total << " collections" << "\r" << std::endl;
3187 return r;
3188 }
3189
3190 int main(int argc, char **argv)
3191 {
3192 string dpath, jpath, pgidstr, op, file, mountpoint, mon_store_path, object;
3193 string target_data_path, fsid;
3194 string objcmd, arg1, arg2, type, format, argnspace, pool, rmtypestr;
3195 boost::optional<std::string> nspace;
3196 spg_t pgid;
3197 unsigned epoch = 0;
3198 unsigned slow_threshold = 16;
3199 ghobject_t ghobj;
3200 bool human_readable;
3201 Formatter *formatter;
3202 bool head, tty;
3203
3204 po::options_description desc("Allowed options");
3205 desc.add_options()
3206 ("help", "produce help message")
3207 ("type", po::value<string>(&type),
3208 "Arg is one of [bluestore (default), filestore, memstore]")
3209 ("data-path", po::value<string>(&dpath),
3210 "path to object store, mandatory")
3211 ("journal-path", po::value<string>(&jpath),
3212 "path to journal, use if tool can't find it")
3213 ("pgid", po::value<string>(&pgidstr),
3214 "PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
3215 ("pool", po::value<string>(&pool),
3216 "Pool name, mandatory for apply-layout-settings if --pgid is not specified")
3217 ("op", po::value<string>(&op),
3218 "Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
3219 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, statfs]")
3220 ("epoch", po::value<unsigned>(&epoch),
3221 "epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
3222 ("file", po::value<string>(&file),
3223 "path of file to export, export-remove, import, get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap")
3224 ("mon-store-path", po::value<string>(&mon_store_path),
3225 "path of monstore to update-mon-db")
3226 ("fsid", po::value<string>(&fsid),
3227 "fsid for new store created by mkfs")
3228 ("target-data-path", po::value<string>(&target_data_path),
3229 "path of target object store (for --op dup)")
3230 ("mountpoint", po::value<string>(&mountpoint),
3231 "fuse mountpoint")
3232 ("format", po::value<string>(&format)->default_value("json-pretty"),
3233 "Output format which may be json, json-pretty, xml, xml-pretty")
3234 ("debug", "Enable diagnostic output to stderr")
3235 ("no-mon-config", "Do not contact mons for config")
3236 ("no-superblock", "Do not read superblock")
3237 ("force", "Ignore some types of errors and proceed with operation - USE WITH CAUTION: CORRUPTION POSSIBLE NOW OR IN THE FUTURE")
3238 ("skip-journal-replay", "Disable journal replay")
3239 ("skip-mount-omap", "Disable mounting of omap")
3240 ("head", "Find head/snapdir when searching for objects by name")
3241 ("dry-run", "Don't modify the objectstore")
3242 ("tty", "Treat stdout as a tty (no binary data)")
3243 ("namespace", po::value<string>(&argnspace), "Specify namespace when searching for objects")
3244 ("rmtype", po::value<string>(&rmtypestr), "Specify corrupting object removal 'snapmap' or 'nosnapmap' - TESTING USE ONLY")
3245 ("slow-omap-threshold", po::value<unsigned>(&slow_threshold),
3246 "Threshold (in seconds) to consider omap listing slow (for op=list-slow-omap)")
3247 ;
3248
3249 po::options_description positional("Positional options");
3250 positional.add_options()
3251 ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
3252 ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
3253 ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
3254 ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
3255 ;
3256
3257 po::options_description all;
3258 all.add(desc).add(positional);
3259
3260 po::positional_options_description pd;
3261 pd.add("object", 1).add("objcmd", 1).add("arg1", 1).add("arg2", 1);
3262
3263 vector<string> ceph_option_strings;
3264
3265 po::variables_map vm;
3266 try {
3267 po::parsed_options parsed =
3268 po::command_line_parser(argc, argv).options(all).allow_unregistered().positional(pd).run();
3269 po::store( parsed, vm);
3270 po::notify(vm);
3271 ceph_option_strings = po::collect_unrecognized(parsed.options,
3272 po::include_positional);
3273 } catch(po::error &e) {
3274 std::cerr << e.what() << std::endl;
3275 return 1;
3276 }
3277
3278 if (vm.count("help")) {
3279 usage(desc);
3280 return 1;
3281 }
3282
3283 // Compatibility with previous option name
3284 if (op == "dump-import")
3285 op = "dump-export";
3286
3287 debug = (vm.count("debug") > 0);
3288
3289 force = (vm.count("force") > 0);
3290
3291 no_superblock = (vm.count("no-superblock") > 0);
3292
3293 if (vm.count("namespace"))
3294 nspace = argnspace;
3295
3296 dry_run = (vm.count("dry-run") > 0);
3297 tty = (vm.count("tty") > 0);
3298
3299 osflagbits_t flags = 0;
3300 if (dry_run || vm.count("skip-journal-replay"))
3301 flags |= SKIP_JOURNAL_REPLAY;
3302 if (vm.count("skip-mount-omap"))
3303 flags |= SKIP_MOUNT_OMAP;
3304 if (op == "update-mon-db")
3305 flags |= SKIP_JOURNAL_REPLAY;
3306
3307 head = (vm.count("head") > 0);
3308
3309 // infer osd id so we can authenticate
3310 char fn[PATH_MAX];
3311 snprintf(fn, sizeof(fn), "%s/whoami", dpath.c_str());
3312 int fd = ::open(fn, O_RDONLY);
3313 if (fd >= 0) {
3314 bufferlist bl;
3315 bl.read_fd(fd, 64);
3316 string s(bl.c_str(), bl.length());
3317 int whoami = atoi(s.c_str());
3318 vector<string> tmp;
3319 // identify ourselves as this osd so we can auth and fetch our configs
3320 tmp.push_back("-n");
3321 tmp.push_back(string("osd.") + stringify(whoami));
3322 // populate osd_data so that the default keyring location works
3323 tmp.push_back("--osd-data");
3324 tmp.push_back(dpath);
3325 tmp.insert(tmp.end(), ceph_option_strings.begin(),
3326 ceph_option_strings.end());
3327 tmp.swap(ceph_option_strings);
3328 }
3329
3330 vector<const char *> ceph_options;
3331 ceph_options.reserve(ceph_options.size() + ceph_option_strings.size());
3332 for (vector<string>::iterator i = ceph_option_strings.begin();
3333 i != ceph_option_strings.end();
3334 ++i) {
3335 ceph_options.push_back(i->c_str());
3336 }
3337
3338 snprintf(fn, sizeof(fn), "%s/type", dpath.c_str());
3339 fd = ::open(fn, O_RDONLY);
3340 if (fd >= 0) {
3341 bufferlist bl;
3342 bl.read_fd(fd, 64);
3343 if (bl.length()) {
3344 string dp_type = string(bl.c_str(), bl.length() - 1); // drop \n
3345 if (vm.count("type") && dp_type != "" && type != dp_type)
3346 cerr << "WARNING: Ignoring type \"" << type << "\" - found data-path type \""
3347 << dp_type << "\"" << std::endl;
3348 type = dp_type;
3349 //cout << "object store type is " << type << std::endl;
3350 }
3351 ::close(fd);
3352 }
3353
3354 if (!vm.count("type") && type == "") {
3355 type = "bluestore";
3356 }
3357 if (!vm.count("data-path") &&
3358 op != "dump-export" &&
3359 !(op == "dump-journal" && type == "filestore")) {
3360 cerr << "Must provide --data-path" << std::endl;
3361 usage(desc);
3362 return 1;
3363 }
3364 if (type == "filestore" && !vm.count("journal-path")) {
3365 jpath = dpath + "/journal";
3366 }
3367 if (!vm.count("op") && !vm.count("object")) {
3368 cerr << "Must provide --op or object command..." << std::endl;
3369 usage(desc);
3370 return 1;
3371 }
3372 if (op != "list" && op != "apply-layout-settings" &&
3373 vm.count("op") && vm.count("object")) {
3374 cerr << "Can't specify both --op and object command syntax" << std::endl;
3375 usage(desc);
3376 return 1;
3377 }
3378 if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) {
3379 cerr << "apply-layout-settings requires either --pool or --pgid"
3380 << std::endl;
3381 usage(desc);
3382 return 1;
3383 }
3384 if (op != "list" && op != "apply-layout-settings" && vm.count("object") && !vm.count("objcmd")) {
3385 cerr << "Invalid syntax, missing command" << std::endl;
3386 usage(desc);
3387 return 1;
3388 }
3389 if (op == "fuse" && mountpoint.length() == 0) {
3390 cerr << "Missing fuse mountpoint" << std::endl;
3391 usage(desc);
3392 return 1;
3393 }
3394 outistty = isatty(STDOUT_FILENO) || tty;
3395
3396 file_fd = fd_none;
3397 if ((op == "export" || op == "export-remove" || op == "get-osdmap" || op == "get-inc-osdmap") && !dry_run) {
3398 if (!vm.count("file") || file == "-") {
3399 if (outistty) {
3400 cerr << "stdout is a tty and no --file filename specified" << std::endl;
3401 return 1;
3402 }
3403 file_fd = STDOUT_FILENO;
3404 } else {
3405 file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
3406 }
3407 } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") {
3408 if (!vm.count("file") || file == "-") {
3409 if (isatty(STDIN_FILENO)) {
3410 cerr << "stdin is a tty and no --file filename specified" << std::endl;
3411 return 1;
3412 }
3413 file_fd = STDIN_FILENO;
3414 } else {
3415 file_fd = open(file.c_str(), O_RDONLY);
3416 }
3417 }
3418
3419 ObjectStoreTool tool = ObjectStoreTool(file_fd, dry_run);
3420
3421 if (vm.count("file") && file_fd == fd_none && !dry_run) {
3422 cerr << "--file option only applies to import, dump-export, export, export-remove, "
3423 << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl;
3424 return 1;
3425 }
3426
3427 if (file_fd != fd_none && file_fd < 0) {
3428 string err = string("file: ") + file;
3429 perror(err.c_str());
3430 return 1;
3431 }
3432 int init_flags = 0;
3433 if (vm.count("no-mon-config") > 0) {
3434 init_flags |= CINIT_FLAG_NO_MON_CONFIG;
3435 }
3436
3437 auto cct = global_init(
3438 NULL, ceph_options,
3439 CEPH_ENTITY_TYPE_OSD,
3440 CODE_ENVIRONMENT_UTILITY_NODOUT,
3441 init_flags);
3442 common_init_finish(g_ceph_context);
3443 if (debug) {
3444 g_conf().set_val_or_die("log_to_stderr", "true");
3445 g_conf().set_val_or_die("err_to_stderr", "true");
3446 }
3447 g_conf().apply_changes(nullptr);
3448
3449 // Special list handling. Treating pretty_format as human readable,
3450 // with one object per line and not an enclosing array.
3451 human_readable = ends_with(format, "-pretty");
3452 if ((op == "list" || op == "meta-list") && human_readable) {
3453 // Remove -pretty from end of format which we know is there
3454 format = format.substr(0, format.size() - strlen("-pretty"));
3455 }
3456
3457 formatter = Formatter::create(format);
3458 if (formatter == NULL) {
3459 cerr << "unrecognized format: " << format << std::endl;
3460 return 1;
3461 }
3462
3463 // Special handling for filestore journal, so we can dump it without mounting
3464 if (op == "dump-journal" && type == "filestore") {
3465 int ret = mydump_journal(formatter, jpath, g_conf()->journal_dio);
3466 if (ret < 0) {
3467 cerr << "journal-path: " << jpath << ": "
3468 << cpp_strerror(ret) << std::endl;
3469 return 1;
3470 }
3471 formatter->flush(cout);
3472 return 0;
3473 }
3474
3475 if (op == "dump-export") {
3476 int ret = tool.dump_export(formatter);
3477 if (ret < 0) {
3478 cerr << "dump-export: "
3479 << cpp_strerror(ret) << std::endl;
3480 return 1;
3481 }
3482 return 0;
3483 }
3484
3485 //Verify that data-path really exists
3486 struct stat st;
3487 if (::stat(dpath.c_str(), &st) == -1) {
3488 string err = string("data-path: ") + dpath;
3489 perror(err.c_str());
3490 return 1;
3491 }
3492
3493 if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) {
3494 cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl;
3495 return 1;
3496 }
3497
3498 //Verify that the journal-path really exists
3499 if (type == "filestore") {
3500 if (::stat(jpath.c_str(), &st) == -1) {
3501 string err = string("journal-path: ") + jpath;
3502 perror(err.c_str());
3503 return 1;
3504 }
3505 if (S_ISDIR(st.st_mode)) {
3506 cerr << "journal-path: " << jpath << ": "
3507 << cpp_strerror(EISDIR) << std::endl;
3508 return 1;
3509 }
3510 }
3511
3512 ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags);
3513 if (fs == NULL) {
3514 cerr << "Unable to create store of type " << type << std::endl;
3515 return 1;
3516 }
3517
3518 if (op == "fsck" || op == "fsck-deep") {
3519 int r = fs->fsck(op == "fsck-deep");
3520 if (r < 0) {
3521 cerr << "fsck failed: " << cpp_strerror(r) << std::endl;
3522 return 1;
3523 }
3524 if (r > 0) {
3525 cerr << "fsck status: " << r << " remaining error(s) and warning(s)" << std::endl;
3526 return 1;
3527 }
3528 cout << "fsck success" << std::endl;
3529 return 0;
3530 }
3531 if (op == "repair" || op == "repair-deep") {
3532 int r = fs->repair(op == "repair-deep");
3533 if (r < 0) {
3534 cerr << "repair failed: " << cpp_strerror(r) << std::endl;
3535 return 1;
3536 }
3537 if (r > 0) {
3538 cerr << "repair status: " << r << " remaining error(s) and warning(s)" << std::endl;
3539 return 1;
3540 }
3541 cout << "repair success" << std::endl;
3542 return 0;
3543 }
3544 if (op == "mkfs") {
3545 if (fsid.length()) {
3546 uuid_d f;
3547 bool r = f.parse(fsid.c_str());
3548 if (!r) {
3549 cerr << "failed to parse uuid '" << fsid << "'" << std::endl;
3550 return 1;
3551 }
3552 fs->set_fsid(f);
3553 }
3554 int r = fs->mkfs();
3555 if (r < 0) {
3556 cerr << "mkfs failed: " << cpp_strerror(r) << std::endl;
3557 return 1;
3558 }
3559 return 0;
3560 }
3561 if (op == "dup") {
3562 string target_type;
3563 char fn[PATH_MAX];
3564 snprintf(fn, sizeof(fn), "%s/type", target_data_path.c_str());
3565 int fd = ::open(fn, O_RDONLY);
3566 if (fd < 0) {
3567 cerr << "Unable to open " << target_data_path << "/type" << std::endl;
3568 exit(1);
3569 }
3570 bufferlist bl;
3571 bl.read_fd(fd, 64);
3572 if (bl.length()) {
3573 target_type = string(bl.c_str(), bl.length() - 1); // drop \n
3574 }
3575 ::close(fd);
3576 ObjectStore *targetfs = ObjectStore::create(
3577 g_ceph_context, target_type,
3578 target_data_path, "", 0);
3579 if (targetfs == NULL) {
3580 cerr << "Unable to open store of type " << target_type << std::endl;
3581 return 1;
3582 }
3583 int r = dup(dpath, fs, target_data_path, targetfs);
3584 if (r < 0) {
3585 cerr << "dup failed: " << cpp_strerror(r) << std::endl;
3586 return 1;
3587 }
3588 return 0;
3589 }
3590
3591 int ret = fs->mount();
3592 if (ret < 0) {
3593 if (ret == -EBUSY) {
3594 cerr << "OSD has the store locked" << std::endl;
3595 } else {
3596 cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl;
3597 }
3598 return 1;
3599 }
3600
3601 if (op == "fuse") {
3602 #ifdef HAVE_LIBFUSE
3603 FuseStore fuse(fs, mountpoint);
3604 cout << "mounting fuse at " << mountpoint << " ..." << std::endl;
3605 int r = fuse.main();
3606 if (r < 0) {
3607 cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl;
3608 return 1;
3609 }
3610 #else
3611 cerr << "fuse support not enabled" << std::endl;
3612 #endif
3613 return 0;
3614 }
3615
3616 vector<coll_t> ls;
3617 vector<coll_t>::iterator it;
3618 CompatSet supported;
3619
3620 #ifdef INTERNAL_TEST
3621 supported = get_test_compat_set();
3622 #else
3623 supported = OSD::get_osd_compat_set();
3624 #endif
3625
3626 bufferlist bl;
3627 auto ch = fs->open_collection(coll_t::meta());
3628 std::unique_ptr<OSDSuperblock> superblock;
3629 if (!no_superblock) {
3630 superblock.reset(new OSDSuperblock);
3631 bufferlist::const_iterator p;
3632 ret = fs->read(ch, OSD_SUPERBLOCK_GOBJECT, 0, 0, bl);
3633 if (ret < 0) {
3634 cerr << "Failure to read OSD superblock: " << cpp_strerror(ret) << std::endl;
3635 goto out;
3636 }
3637
3638 p = bl.cbegin();
3639 decode(*superblock, p);
3640
3641 if (debug) {
3642 cerr << "Cluster fsid=" << superblock->cluster_fsid << std::endl;
3643 }
3644
3645 if (debug) {
3646 cerr << "Supported features: " << supported << std::endl;
3647 cerr << "On-disk features: " << superblock->compat_features << std::endl;
3648 }
3649 if (supported.compare(superblock->compat_features) == -1) {
3650 CompatSet unsupported = supported.unsupported(superblock->compat_features);
3651 cerr << "On-disk OSD incompatible features set "
3652 << unsupported << std::endl;
3653 ret = -EINVAL;
3654 goto out;
3655 }
3656 }
3657
3658 if (op == "apply-layout-settings") {
3659 int target_level = 0;
3660 // Single positional argument with apply-layout-settings
3661 // for target_level.
3662 if (vm.count("object") && isdigit(object[0])) {
3663 target_level = atoi(object.c_str());
3664 // This requires --arg1 to be specified since
3665 // this is the third positional argument and normally
3666 // used with object operations.
3667 } else if (vm.count("arg1") && isdigit(arg1[0])) {
3668 target_level = atoi(arg1.c_str());
3669 }
3670 ceph_assert(superblock != nullptr);
3671 ret = apply_layout_settings(fs, *superblock, pool, pgid, dry_run, target_level);
3672 goto out;
3673 }
3674
3675 if (op != "list" && vm.count("object")) {
3676 // Special case: Create pgmeta_oid if empty string specified
3677 // This can't conflict with any actual object names.
3678 if (object == "") {
3679 ghobj = pgid.make_pgmeta_oid();
3680 } else {
3681 json_spirit::Value v;
3682 try {
3683 if (!json_spirit::read(object, v) ||
3684 (v.type() != json_spirit::array_type && v.type() != json_spirit::obj_type)) {
3685 // Special: Need head/snapdir so set even if user didn't specify
3686 if (vm.count("objcmd") && (objcmd == "remove-clone-metadata"))
3687 head = true;
3688 lookup_ghobject lookup(object, nspace, head);
3689 if (pgidstr.length())
3690 ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), lookup, debug);
3691 else
3692 ret = action_on_all_objects(fs, lookup, debug);
3693 if (ret) {
3694 throw std::runtime_error("Internal error");
3695 } else {
3696 if (lookup.size() != 1) {
3697 stringstream ss;
3698 if (lookup.size() == 0)
3699 ss << "No object id '" << object << "' found or invalid JSON specified";
3700 else
3701 ss << "Found " << lookup.size() << " objects with id '" << object
3702 << "', please use a JSON spec from --op list instead";
3703 throw std::runtime_error(ss.str());
3704 }
3705 pair<coll_t, ghobject_t> found = lookup.pop();
3706 pgidstr = found.first.to_str();
3707 pgid.parse(pgidstr.c_str());
3708 ghobj = found.second;
3709 }
3710 } else {
3711 stringstream ss;
3712 if (pgidstr.length() == 0 && v.type() != json_spirit::array_type) {
3713 ss << "Without --pgid the object '" << object
3714 << "' must be a JSON array";
3715 throw std::runtime_error(ss.str());
3716 }
3717 if (v.type() == json_spirit::array_type) {
3718 json_spirit::Array array = v.get_array();
3719 if (array.size() != 2) {
3720 ss << "Object '" << object
3721 << "' must be a JSON array with 2 elements";
3722 throw std::runtime_error(ss.str());
3723 }
3724 vector<json_spirit::Value>::iterator i = array.begin();
3725 ceph_assert(i != array.end());
3726 if (i->type() != json_spirit::str_type) {
3727 ss << "Object '" << object
3728 << "' must be a JSON array with the first element a string";
3729 throw std::runtime_error(ss.str());
3730 }
3731 string object_pgidstr = i->get_str();
3732 if (object_pgidstr != "meta") {
3733 spg_t object_pgid;
3734 object_pgid.parse(object_pgidstr.c_str());
3735 if (pgidstr.length() > 0) {
3736 if (object_pgid != pgid) {
3737 ss << "object '" << object
3738 << "' has a pgid different from the --pgid="
3739 << pgidstr << " option";
3740 throw std::runtime_error(ss.str());
3741 }
3742 } else {
3743 pgidstr = object_pgidstr;
3744 pgid = object_pgid;
3745 }
3746 } else {
3747 pgidstr = object_pgidstr;
3748 }
3749 ++i;
3750 v = *i;
3751 }
3752 try {
3753 ghobj.decode(v);
3754 } catch (std::runtime_error& e) {
3755 ss << "Decode object JSON error: " << e.what();
3756 throw std::runtime_error(ss.str());
3757 }
3758 if (pgidstr != "meta" && (uint64_t)pgid.pgid.m_pool != (uint64_t)ghobj.hobj.pool) {
3759 cerr << "Object pool and pgid pool don't match" << std::endl;
3760 ret = 1;
3761 goto out;
3762 }
3763 if (pgidstr != "meta") {
3764 auto ch = fs->open_collection(coll_t(pgid));
3765 if (!ghobj.match(fs->collection_bits(ch), pgid.ps())) {
3766 stringstream ss;
3767 ss << "object " << ghobj << " not contained by pg " << pgid;
3768 throw std::runtime_error(ss.str());
3769 }
3770 }
3771 }
3772 } catch (std::runtime_error& e) {
3773 cerr << e.what() << std::endl;
3774 ret = 1;
3775 goto out;
3776 }
3777 }
3778 }
3779
3780 // The ops which require --pgid option are checked here and
3781 // mentioned in the usage for --pgid.
3782 if ((op == "info" || op == "log" || op == "remove" || op == "export"
3783 || op == "export-remove" || op == "mark-complete"
3784 || op == "reset-last-complete"
3785 || op == "trim-pg-log") &&
3786 pgidstr.length() == 0) {
3787 cerr << "Must provide pgid" << std::endl;
3788 usage(desc);
3789 ret = 1;
3790 goto out;
3791 }
3792
3793 if (op == "import") {
3794 ceph_assert(superblock != nullptr);
3795 try {
3796 ret = tool.do_import(fs, *superblock, force, pgidstr);
3797 }
3798 catch (const buffer::error &e) {
3799 cerr << "do_import threw exception error " << e.what() << std::endl;
3800 ret = -EFAULT;
3801 }
3802 if (ret == -EFAULT) {
3803 cerr << "Corrupt input for import" << std::endl;
3804 }
3805 if (ret == 0)
3806 cout << "Import successful" << std::endl;
3807 goto out;
3808 } else if (op == "dump-journal-mount") {
3809 // Undocumented feature to dump journal with mounted fs
3810 // This doesn't support the format option, but it uses the
3811 // ObjectStore::dump_journal() and mounts to get replay to run.
3812 ret = fs->dump_journal(cout);
3813 if (ret) {
3814 if (ret == -EOPNOTSUPP) {
3815 cerr << "Object store type \"" << type << "\" doesn't support journal dump" << std::endl;
3816 } else {
3817 cerr << "Journal dump failed with error " << cpp_strerror(ret) << std::endl;
3818 }
3819 }
3820 goto out;
3821 } else if (op == "get-osdmap") {
3822 bufferlist bl;
3823 OSDMap osdmap;
3824 if (epoch == 0) {
3825 ceph_assert(superblock != nullptr);
3826 epoch = superblock->current_epoch;
3827 }
3828 ret = get_osdmap(fs, epoch, osdmap, bl);
3829 if (ret) {
3830 cerr << "Failed to get osdmap#" << epoch << ": "
3831 << cpp_strerror(ret) << std::endl;
3832 goto out;
3833 }
3834 ret = bl.write_fd(file_fd);
3835 if (ret) {
3836 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3837 } else {
3838 cout << "osdmap#" << epoch << " exported." << std::endl;
3839 }
3840 goto out;
3841 } else if (op == "set-osdmap") {
3842 bufferlist bl;
3843 ret = get_fd_data(file_fd, bl);
3844 if (ret < 0) {
3845 cerr << "Failed to read osdmap " << cpp_strerror(ret) << std::endl;
3846 } else {
3847 ret = set_osdmap(fs, epoch, bl, force);
3848 }
3849 goto out;
3850 } else if (op == "get-inc-osdmap") {
3851 bufferlist bl;
3852 if (epoch == 0) {
3853 ceph_assert(superblock != nullptr);
3854 epoch = superblock->current_epoch;
3855 }
3856 ret = get_inc_osdmap(fs, epoch, bl);
3857 if (ret < 0) {
3858 cerr << "Failed to get incremental osdmap# " << epoch << ": "
3859 << cpp_strerror(ret) << std::endl;
3860 goto out;
3861 }
3862 ret = bl.write_fd(file_fd);
3863 if (ret) {
3864 cerr << "Failed to write to " << file << ": " << cpp_strerror(ret) << std::endl;
3865 } else {
3866 cout << "inc-osdmap#" << epoch << " exported." << std::endl;
3867 }
3868 goto out;
3869 } else if (op == "set-inc-osdmap") {
3870 bufferlist bl;
3871 ret = get_fd_data(file_fd, bl);
3872 if (ret < 0) {
3873 cerr << "Failed to read incremental osdmap " << cpp_strerror(ret) << std::endl;
3874 goto out;
3875 } else {
3876 ret = set_inc_osdmap(fs, epoch, bl, force);
3877 }
3878 goto out;
3879 } else if (op == "update-mon-db") {
3880 if (!vm.count("mon-store-path")) {
3881 cerr << "Please specify the path to monitor db to update" << std::endl;
3882 ret = -EINVAL;
3883 } else {
3884 ceph_assert(superblock != nullptr);
3885 ret = update_mon_db(*fs, *superblock, dpath + "/keyring", mon_store_path);
3886 }
3887 goto out;
3888 }
3889
3890 if (op == "remove") {
3891 if (!force && !dry_run) {
3892 cerr << "Please use export-remove or you must use --force option" << std::endl;
3893 ret = -EINVAL;
3894 goto out;
3895 }
3896 ret = initiate_new_remove_pg(fs, pgid);
3897 if (ret < 0) {
3898 cerr << "PG '" << pgid << "' not found" << std::endl;
3899 goto out;
3900 }
3901 cout << "Remove successful" << std::endl;
3902 goto out;
3903 }
3904
3905 if (op == "fix-lost") {
3906 boost::scoped_ptr<action_on_object_t> action;
3907 action.reset(new do_fix_lost());
3908 if (pgidstr.length())
3909 ret = action_on_all_objects_in_exact_pg(fs, coll_t(pgid), *action, debug);
3910 else
3911 ret = action_on_all_objects(fs, *action, debug);
3912 goto out;
3913 }
3914
3915 if (op == "list") {
3916 ret = do_list(fs, pgidstr, object, nspace, formatter, debug,
3917 human_readable, head);
3918 if (ret < 0) {
3919 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3920 }
3921 goto out;
3922 }
3923 if (op == "list-slow-omap") {
3924 ret = do_list_slow(fs, pgidstr, object, slow_threshold, formatter, debug,
3925 human_readable);
3926 if (ret < 0) {
3927 cerr << "do_list failed: " << cpp_strerror(ret) << std::endl;
3928 }
3929 goto out;
3930 }
3931
3932 if (op == "dump-super") {
3933 ceph_assert(superblock != nullptr);
3934 formatter->open_object_section("superblock");
3935 superblock->dump(formatter);
3936 formatter->close_section();
3937 formatter->flush(cout);
3938 cout << std::endl;
3939 goto out;
3940 }
3941
3942 if (op == "statfs") {
3943 store_statfs_t statsbuf;
3944 ret = fs->statfs(&statsbuf);
3945 if (ret < 0) {
3946 cerr << "error from statfs: " << cpp_strerror(ret) << std::endl;
3947 goto out;
3948 }
3949 formatter->open_object_section("statfs");
3950 statsbuf.dump(formatter);
3951 formatter->close_section();
3952 formatter->flush(cout);
3953 cout << std::endl;
3954 goto out;
3955 }
3956
3957 if (op == "meta-list") {
3958 ret = do_meta(fs, object, formatter, debug, human_readable);
3959 if (ret < 0) {
3960 cerr << "do_meta failed: " << cpp_strerror(ret) << std::endl;
3961 }
3962 goto out;
3963 }
3964
3965 ret = fs->list_collections(ls);
3966 if (ret < 0) {
3967 cerr << "failed to list pgs: " << cpp_strerror(ret) << std::endl;
3968 goto out;
3969 }
3970
3971 if (debug && op == "list-pgs")
3972 cout << "Performing list-pgs operation" << std::endl;
3973
3974 // Find pg
3975 for (it = ls.begin(); it != ls.end(); ++it) {
3976 spg_t tmppgid;
3977
3978 if (pgidstr == "meta") {
3979 if (it->to_str() == "meta")
3980 break;
3981 else
3982 continue;
3983 }
3984
3985 if (!it->is_pg(&tmppgid)) {
3986 continue;
3987 }
3988
3989 if (it->is_temp(&tmppgid)) {
3990 continue;
3991 }
3992
3993 if (op != "list-pgs" && tmppgid != pgid) {
3994 continue;
3995 }
3996
3997 if (op != "list-pgs") {
3998 //Found!
3999 break;
4000 }
4001
4002 cout << tmppgid << std::endl;
4003 }
4004
4005 if (op == "list-pgs") {
4006 ret = 0;
4007 goto out;
4008 }
4009
4010 // If not an object command nor any of the ops handled below, then output this usage
4011 // before complaining about a bad pgid
4012 if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
4013 cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
4014 "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)"
4015 << std::endl;
4016 usage(desc);
4017 ret = 1;
4018 goto out;
4019 }
4020 epoch_t map_epoch;
4021 // The following code for export, info, log require omap or !skip-mount-omap
4022 if (it != ls.end()) {
4023
4024 coll_t coll = *it;
4025
4026 if (vm.count("objcmd")) {
4027 ret = 0;
4028 if (objcmd == "remove" || objcmd == "removeall") {
4029 bool all = (objcmd == "removeall");
4030 enum rmtype type = BOTH;
4031 if (rmtypestr == "nosnapmap")
4032 type = NOSNAPMAP;
4033 else if (rmtypestr == "snapmap")
4034 type = SNAPMAP;
4035 ret = do_remove_object(fs, coll, ghobj, all, force, type);
4036 goto out;
4037 } else if (objcmd == "list-attrs") {
4038 ret = do_list_attrs(fs, coll, ghobj);
4039 goto out;
4040 } else if (objcmd == "list-omap") {
4041 ret = do_list_omap(fs, coll, ghobj);
4042 goto out;
4043 } else if (objcmd == "get-bytes" || objcmd == "set-bytes") {
4044 if (objcmd == "get-bytes") {
4045 int fd;
4046 if (vm.count("arg1") == 0 || arg1 == "-") {
4047 fd = STDOUT_FILENO;
4048 } else {
4049 fd = open(arg1.c_str(), O_WRONLY|O_TRUNC|O_CREAT|O_EXCL|O_LARGEFILE, 0666);
4050 if (fd == -1) {
4051 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4052 ret = 1;
4053 goto out;
4054 }
4055 }
4056 ret = do_get_bytes(fs, coll, ghobj, fd);
4057 if (fd != STDOUT_FILENO)
4058 close(fd);
4059 } else {
4060 int fd;
4061 if (vm.count("arg1") == 0 || arg1 == "-") {
4062 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4063 if (isatty(STDIN_FILENO)) {
4064 cerr << "stdin is a tty and no file specified" << std::endl;
4065 ret = 1;
4066 goto out;
4067 }
4068 fd = STDIN_FILENO;
4069 } else {
4070 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4071 if (fd == -1) {
4072 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4073 ret = 1;
4074 goto out;
4075 }
4076 }
4077 ret = do_set_bytes(fs, coll, ghobj, fd);
4078 if (fd != STDIN_FILENO)
4079 close(fd);
4080 }
4081 goto out;
4082 } else if (objcmd == "get-attr") {
4083 if (vm.count("arg1") == 0) {
4084 usage(desc);
4085 ret = 1;
4086 goto out;
4087 }
4088 ret = do_get_attr(fs, coll, ghobj, arg1);
4089 goto out;
4090 } else if (objcmd == "set-attr") {
4091 if (vm.count("arg1") == 0) {
4092 usage(desc);
4093 ret = 1;
4094 }
4095
4096 int fd;
4097 if (vm.count("arg2") == 0 || arg2 == "-") {
4098 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4099 if (isatty(STDIN_FILENO)) {
4100 cerr << "stdin is a tty and no file specified" << std::endl;
4101 ret = 1;
4102 goto out;
4103 }
4104 fd = STDIN_FILENO;
4105 } else {
4106 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4107 if (fd == -1) {
4108 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4109 ret = 1;
4110 goto out;
4111 }
4112 }
4113 ret = do_set_attr(fs, coll, ghobj, arg1, fd);
4114 if (fd != STDIN_FILENO)
4115 close(fd);
4116 goto out;
4117 } else if (objcmd == "rm-attr") {
4118 if (vm.count("arg1") == 0) {
4119 usage(desc);
4120 ret = 1;
4121 goto out;
4122 }
4123 ret = do_rm_attr(fs, coll, ghobj, arg1);
4124 goto out;
4125 } else if (objcmd == "get-omap") {
4126 if (vm.count("arg1") == 0) {
4127 usage(desc);
4128 ret = 1;
4129 goto out;
4130 }
4131 ret = do_get_omap(fs, coll, ghobj, arg1);
4132 goto out;
4133 } else if (objcmd == "set-omap") {
4134 if (vm.count("arg1") == 0) {
4135 usage(desc);
4136 ret = 1;
4137 goto out;
4138 }
4139 int fd;
4140 if (vm.count("arg2") == 0 || arg2 == "-") {
4141 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4142 if (isatty(STDIN_FILENO)) {
4143 cerr << "stdin is a tty and no file specified" << std::endl;
4144 ret = 1;
4145 goto out;
4146 }
4147 fd = STDIN_FILENO;
4148 } else {
4149 fd = open(arg2.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4150 if (fd == -1) {
4151 cerr << "open " << arg2 << " " << cpp_strerror(errno) << std::endl;
4152 ret = 1;
4153 goto out;
4154 }
4155 }
4156 ret = do_set_omap(fs, coll, ghobj, arg1, fd);
4157 if (fd != STDIN_FILENO)
4158 close(fd);
4159 goto out;
4160 } else if (objcmd == "rm-omap") {
4161 if (vm.count("arg1") == 0) {
4162 usage(desc);
4163 ret = 1;
4164 goto out;
4165 }
4166 ret = do_rm_omap(fs, coll, ghobj, arg1);
4167 goto out;
4168 } else if (objcmd == "get-omaphdr") {
4169 if (vm.count("arg1")) {
4170 usage(desc);
4171 ret = 1;
4172 goto out;
4173 }
4174 ret = do_get_omaphdr(fs, coll, ghobj);
4175 goto out;
4176 } else if (objcmd == "set-omaphdr") {
4177 // Extra arg
4178 if (vm.count("arg2")) {
4179 usage(desc);
4180 ret = 1;
4181 goto out;
4182 }
4183 int fd;
4184 if (vm.count("arg1") == 0 || arg1 == "-") {
4185 // Since read_fd() doesn't handle ^D from a tty stdin, don't allow it.
4186 if (isatty(STDIN_FILENO)) {
4187 cerr << "stdin is a tty and no file specified" << std::endl;
4188 ret = 1;
4189 goto out;
4190 }
4191 fd = STDIN_FILENO;
4192 } else {
4193 fd = open(arg1.c_str(), O_RDONLY|O_LARGEFILE, 0666);
4194 if (fd == -1) {
4195 cerr << "open " << arg1 << " " << cpp_strerror(errno) << std::endl;
4196 ret = 1;
4197 goto out;
4198 }
4199 }
4200 ret = do_set_omaphdr(fs, coll, ghobj, fd);
4201 if (fd != STDIN_FILENO)
4202 close(fd);
4203 goto out;
4204 } else if (objcmd == "dump") {
4205 // There should not be any other arguments
4206 if (vm.count("arg1") || vm.count("arg2")) {
4207 usage(desc);
4208 ret = 1;
4209 goto out;
4210 }
4211 ret = print_obj_info(fs, coll, ghobj, formatter);
4212 goto out;
4213 } else if (objcmd == "corrupt-info") { // Undocumented testing feature
4214 // There should not be any other arguments
4215 if (vm.count("arg1") || vm.count("arg2")) {
4216 usage(desc);
4217 ret = 1;
4218 goto out;
4219 }
4220 ret = corrupt_info(fs, coll, ghobj, formatter);
4221 goto out;
4222 } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
4223 // Undocumented testing feature
4224 bool corrupt = (objcmd == "corrupt-size");
4225 // Extra arg
4226 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4227 usage(desc);
4228 ret = 1;
4229 goto out;
4230 }
4231 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4232 cerr << "Invalid size '" << arg1 << "' specified" << std::endl;
4233 ret = 1;
4234 goto out;
4235 }
4236 uint64_t size = atoll(arg1.c_str());
4237 ret = set_size(fs, coll, ghobj, size, formatter, corrupt);
4238 goto out;
4239 } else if (objcmd == "clear-data-digest") {
4240 ret = clear_data_digest(fs, coll, ghobj);
4241 goto out;
4242 } else if (objcmd == "clear-snapset") {
4243 // UNDOCUMENTED: For testing zap SnapSet
4244 // IGNORE extra args since not in usage anyway
4245 if (!ghobj.hobj.has_snapset()) {
4246 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4247 ret = 1;
4248 goto out;
4249 }
4250 ret = clear_snapset(fs, coll, ghobj, arg1);
4251 goto out;
4252 } else if (objcmd == "remove-clone-metadata") {
4253 // Extra arg
4254 if (vm.count("arg1") == 0 || vm.count("arg2")) {
4255 usage(desc);
4256 ret = 1;
4257 goto out;
4258 }
4259 if (!ghobj.hobj.has_snapset()) {
4260 cerr << "'" << objcmd << "' requires a head or snapdir object" << std::endl;
4261 ret = 1;
4262 goto out;
4263 }
4264 if (arg1.length() == 0 || !isdigit(arg1.c_str()[0])) {
4265 cerr << "Invalid cloneid '" << arg1 << "' specified" << std::endl;
4266 ret = 1;
4267 goto out;
4268 }
4269 snapid_t cloneid = atoi(arg1.c_str());
4270 ret = remove_clone(fs, coll, ghobj, cloneid, force);
4271 goto out;
4272 }
4273 cerr << "Unknown object command '" << objcmd << "'" << std::endl;
4274 usage(desc);
4275 ret = 1;
4276 goto out;
4277 }
4278
4279 map_epoch = 0;
4280 ret = PG::peek_map_epoch(fs, pgid, &map_epoch);
4281 if (ret < 0)
4282 cerr << "peek_map_epoch reports error" << std::endl;
4283 if (debug)
4284 cerr << "map_epoch " << map_epoch << std::endl;
4285
4286 pg_info_t info(pgid);
4287 PastIntervals past_intervals;
4288 __u8 struct_ver;
4289 ret = PG::read_info(fs, pgid, coll, info, past_intervals, struct_ver);
4290 if (ret < 0) {
4291 cerr << "read_info error " << cpp_strerror(ret) << std::endl;
4292 goto out;
4293 }
4294 if (struct_ver < PG::get_compat_struct_v()) {
4295 cerr << "PG is too old to upgrade, use older Ceph version" << std::endl;
4296 ret = -EFAULT;
4297 goto out;
4298 }
4299 if (debug)
4300 cerr << "struct_v " << (int)struct_ver << std::endl;
4301
4302 if (op == "export" || op == "export-remove") {
4303 ceph_assert(superblock != nullptr);
4304 ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals);
4305 if (ret == 0) {
4306 cerr << "Export successful" << std::endl;
4307 if (op == "export-remove") {
4308 ret = initiate_new_remove_pg(fs, pgid);
4309 // Export succeeded, so pgid is there
4310 ceph_assert(ret == 0);
4311 cerr << "Remove successful" << std::endl;
4312 }
4313 }
4314 } else if (op == "info") {
4315 formatter->open_object_section("info");
4316 info.dump(formatter);
4317 formatter->close_section();
4318 formatter->flush(cout);
4319 cout << std::endl;
4320 } else if (op == "log") {
4321 PGLog::IndexedLog log;
4322 pg_missing_t missing;
4323 ret = get_log(fs, struct_ver, pgid, info, log, missing);
4324 if (ret < 0)
4325 goto out;
4326
4327 dump_log(formatter, cout, log, missing);
4328 } else if (op == "mark-complete") {
4329 ObjectStore::Transaction tran;
4330 ObjectStore::Transaction *t = &tran;
4331
4332 if (struct_ver < PG::get_compat_struct_v()) {
4333 cerr << "Can't mark-complete, version mismatch " << (int)struct_ver
4334 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4335 << std::endl;
4336 ret = 1;
4337 goto out;
4338 }
4339
4340 cout << "Marking complete " << std::endl;
4341
4342 ceph_assert(superblock != nullptr);
4343 info.last_update = eversion_t(superblock->current_epoch, info.last_update.version + 1);
4344 info.last_backfill = hobject_t::get_max();
4345 info.last_epoch_started = superblock->current_epoch;
4346 info.history.last_epoch_started = superblock->current_epoch;
4347 info.history.last_epoch_clean = superblock->current_epoch;
4348 past_intervals.clear();
4349
4350 if (!dry_run) {
4351 ret = write_info(*t, map_epoch, info, past_intervals);
4352 if (ret != 0)
4353 goto out;
4354 auto ch = fs->open_collection(coll_t(pgid));
4355 fs->queue_transaction(ch, std::move(*t));
4356 }
4357 cout << "Marking complete succeeded" << std::endl;
4358 } else if (op == "trim-pg-log") {
4359 ret = do_trim_pg_log(fs, coll, info, pgid,
4360 map_epoch, past_intervals);
4361 if (ret < 0) {
4362 cerr << "Error trimming pg log: " << cpp_strerror(ret) << std::endl;
4363 goto out;
4364 }
4365 cout << "Finished trimming pg log" << std::endl;
4366 goto out;
4367 } else if (op == "reset-last-complete") {
4368 if (!force) {
4369 std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
4370 << "certain to lead to permanent data loss unless you know exactly "
4371 << "what you are doing. Pass --force to proceed anyway."
4372 << std::endl;
4373 ret = -EINVAL;
4374 goto out;
4375 }
4376 ObjectStore::Transaction tran;
4377 ObjectStore::Transaction *t = &tran;
4378
4379 if (struct_ver < PG::get_compat_struct_v()) {
4380 cerr << "Can't reset-last-complete, version mismatch " << (int)struct_ver
4381 << " (pg) < compat " << (int)PG::get_compat_struct_v() << " (tool)"
4382 << std::endl;
4383 ret = 1;
4384 goto out;
4385 }
4386
4387 cout << "Reseting last_complete " << std::endl;
4388
4389 info.last_complete = info.last_update;
4390
4391 if (!dry_run) {
4392 ret = write_info(*t, map_epoch, info, past_intervals);
4393 if (ret != 0)
4394 goto out;
4395 fs->queue_transaction(ch, std::move(*t));
4396 }
4397 cout << "Reseting last_complete succeeded" << std::endl;
4398
4399 } else {
4400 ceph_assert(!"Should have already checked for valid --op");
4401 }
4402 } else {
4403 cerr << "PG '" << pgid << "' not found" << std::endl;
4404 ret = -ENOENT;
4405 }
4406
4407 out:
4408 if (debug) {
4409 ostringstream ostr;
4410 Formatter* f = Formatter::create("json-pretty", "json-pretty", "json-pretty");
4411 cct->get_perfcounters_collection()->dump_formatted(f, false);
4412 ostr << "ceph-objectstore-tool ";
4413 f->flush(ostr);
4414 delete f;
4415 cout << ostr.str() << std::endl;
4416 }
4417
4418 int r = fs->umount();
4419 if (r < 0) {
4420 cerr << "umount failed: " << cpp_strerror(r) << std::endl;
4421 // If no previous error, then use umount() error
4422 if (ret == 0)
4423 ret = r;
4424 }
4425
4426 if (dry_run) {
4427 // Export output can go to stdout, so put this message on stderr
4428 if (op == "export")
4429 cerr << "dry-run: Nothing changed" << std::endl;
4430 else
4431 cout << "dry-run: Nothing changed" << std::endl;
4432 }
4433
4434 if (ret < 0)
4435 ret = 1;
4436 return ret;
4437 }