]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_metadata.cc
5a286b4aeb67509c714342c8d0859753ce72e293
[ceph.git] / ceph / src / rgw / rgw_metadata.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <boost/intrusive_ptr.hpp>
5 #include "common/ceph_json.h"
6 #include "common/errno.h"
7 #include "rgw_metadata.h"
8 #include "rgw_coroutine.h"
9 #include "cls/version/cls_version_types.h"
10
11 #include "rgw_rados.h"
12 #include "rgw_tools.h"
13
14 #include "rgw_cr_rados.h"
15
16 #include "include/assert.h"
17 #include <boost/asio/yield.hpp>
18
19 #define dout_subsys ceph_subsys_rgw
20
21 void LogStatusDump::dump(Formatter *f) const {
22 string s;
23 switch (status) {
24 case MDLOG_STATUS_WRITE:
25 s = "write";
26 break;
27 case MDLOG_STATUS_SETATTRS:
28 s = "set_attrs";
29 break;
30 case MDLOG_STATUS_REMOVE:
31 s = "remove";
32 break;
33 case MDLOG_STATUS_COMPLETE:
34 s = "complete";
35 break;
36 case MDLOG_STATUS_ABORT:
37 s = "abort";
38 break;
39 default:
40 s = "unknown";
41 break;
42 }
43 encode_json("status", s, f);
44 }
45
46 void RGWMetadataLogData::encode(bufferlist& bl) const {
47 ENCODE_START(1, 1, bl);
48 ::encode(read_version, bl);
49 ::encode(write_version, bl);
50 uint32_t s = (uint32_t)status;
51 ::encode(s, bl);
52 ENCODE_FINISH(bl);
53 }
54
55 void RGWMetadataLogData::decode(bufferlist::iterator& bl) {
56 DECODE_START(1, bl);
57 ::decode(read_version, bl);
58 ::decode(write_version, bl);
59 uint32_t s;
60 ::decode(s, bl);
61 status = (RGWMDLogStatus)s;
62 DECODE_FINISH(bl);
63 }
64
65 void RGWMetadataLogData::dump(Formatter *f) const {
66 encode_json("read_version", read_version, f);
67 encode_json("write_version", write_version, f);
68 encode_json("status", LogStatusDump(status), f);
69 }
70
71 void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) {
72 string s;
73 JSONDecoder::decode_json("status", s, obj);
74 if (s == "complete") {
75 status = MDLOG_STATUS_COMPLETE;
76 } else if (s == "write") {
77 status = MDLOG_STATUS_WRITE;
78 } else if (s == "remove") {
79 status = MDLOG_STATUS_REMOVE;
80 } else if (s == "set_attrs") {
81 status = MDLOG_STATUS_SETATTRS;
82 } else if (s == "abort") {
83 status = MDLOG_STATUS_ABORT;
84 } else {
85 status = MDLOG_STATUS_UNKNOWN;
86 }
87 }
88
89 void RGWMetadataLogData::decode_json(JSONObj *obj) {
90 JSONDecoder::decode_json("read_version", read_version, obj);
91 JSONDecoder::decode_json("write_version", write_version, obj);
92 JSONDecoder::decode_json("status", status, obj);
93 }
94
95
96 int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) {
97 if (!store->need_to_log_metadata())
98 return 0;
99
100 string oid;
101
102 string hash_key;
103 handler->get_hash_key(section, key, hash_key);
104
105 int shard_id;
106 store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id);
107 mark_modified(shard_id);
108 real_time now = real_clock::now();
109 return store->time_log_add(oid, now, section, key, bl);
110 }
111
112 int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion)
113 {
114 string oid;
115
116 mark_modified(shard_id);
117 store->shard_name(prefix, shard_id, oid);
118 return store->time_log_add(oid, entries, completion, false);
119 }
120
121 void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time,
122 string& marker, void **handle)
123 {
124 LogListCtx *ctx = new LogListCtx();
125
126 ctx->cur_shard = shard_id;
127 ctx->from_time = from_time;
128 ctx->end_time = end_time;
129 ctx->marker = marker;
130
131 get_shard_oid(ctx->cur_shard, ctx->cur_oid);
132
133 *handle = (void *)ctx;
134 }
135
136 void RGWMetadataLog::complete_list_entries(void *handle) {
137 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
138 delete ctx;
139 }
140
141 int RGWMetadataLog::list_entries(void *handle,
142 int max_entries,
143 list<cls_log_entry>& entries,
144 string *last_marker,
145 bool *truncated) {
146 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
147
148 if (!max_entries) {
149 *truncated = false;
150 return 0;
151 }
152
153 int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time,
154 max_entries, entries, ctx->marker,
155 last_marker, truncated);
156 if ((ret < 0) && (ret != -ENOENT))
157 return ret;
158
159 if (ret == -ENOENT)
160 *truncated = false;
161
162 return 0;
163 }
164
165 int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info)
166 {
167 string oid;
168 get_shard_oid(shard_id, oid);
169
170 cls_log_header header;
171
172 int ret = store->time_log_info(oid, &header);
173 if ((ret < 0) && (ret != -ENOENT))
174 return ret;
175
176 info->marker = header.max_marker;
177 info->last_update = header.max_time.to_real_time();
178
179 return 0;
180 }
181
182 static void _mdlog_info_completion(librados::completion_t cb, void *arg)
183 {
184 auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg);
185 infoc->finish(cb);
186 infoc->put(); // drop the ref from get_info_async()
187 }
188
189 RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb)
190 : completion(librados::Rados::aio_create_completion((void *)this, nullptr,
191 _mdlog_info_completion)),
192 callback(cb)
193 {
194 }
195
196 RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion()
197 {
198 completion->release();
199 }
200
201 int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion)
202 {
203 string oid;
204 get_shard_oid(shard_id, oid);
205
206 completion->get(); // hold a ref until the completion fires
207
208 return store->time_log_info_async(completion->get_io_ctx(), oid,
209 &completion->get_header(),
210 completion->get_completion());
211 }
212
213 int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time,
214 const string& start_marker, const string& end_marker)
215 {
216 string oid;
217 get_shard_oid(shard_id, oid);
218
219 int ret;
220
221 ret = store->time_log_trim(oid, from_time, end_time, start_marker, end_marker);
222
223 if (ret == -ENOENT || ret == -ENODATA)
224 ret = 0;
225
226 return ret;
227 }
228
229 int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) {
230 string oid;
231 get_shard_oid(shard_id, oid);
232
233 return store->lock_exclusive(store->get_zone_params().log_pool, oid, duration, zone_id, owner_id);
234 }
235
236 int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) {
237 string oid;
238 get_shard_oid(shard_id, oid);
239
240 return store->unlock(store->get_zone_params().log_pool, oid, zone_id, owner_id);
241 }
242
243 void RGWMetadataLog::mark_modified(int shard_id)
244 {
245 lock.get_read();
246 if (modified_shards.find(shard_id) != modified_shards.end()) {
247 lock.unlock();
248 return;
249 }
250 lock.unlock();
251
252 RWLock::WLocker wl(lock);
253 modified_shards.insert(shard_id);
254 }
255
256 void RGWMetadataLog::read_clear_modified(set<int> &modified)
257 {
258 RWLock::WLocker wl(lock);
259 modified.swap(modified_shards);
260 modified_shards.clear();
261 }
262
263 obj_version& RGWMetadataObject::get_version()
264 {
265 return objv;
266 }
267
268 class RGWMetadataTopHandler : public RGWMetadataHandler {
269 struct iter_data {
270 list<string> sections;
271 list<string>::iterator iter;
272 };
273
274 public:
275 RGWMetadataTopHandler() {}
276
277 string get_type() override { return string(); }
278
279 int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; }
280 int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
281 real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; }
282
283 virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {}
284
285 int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; }
286
287 int list_keys_init(RGWRados *store, void **phandle) override {
288 iter_data *data = new iter_data;
289 store->meta_mgr->get_sections(data->sections);
290 data->iter = data->sections.begin();
291
292 *phandle = data;
293
294 return 0;
295 }
296 int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
297 iter_data *data = static_cast<iter_data *>(handle);
298 for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) {
299 keys.push_back(*data->iter);
300 }
301
302 *truncated = (data->iter != data->sections.end());
303
304 return 0;
305 }
306 void list_keys_complete(void *handle) override {
307 iter_data *data = static_cast<iter_data *>(handle);
308
309 delete data;
310 }
311 };
312
313 static RGWMetadataTopHandler md_top_handler;
314
315
316 RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store)
317 : cct(_cct), store(_store)
318 {
319 }
320
321 RGWMetadataManager::~RGWMetadataManager()
322 {
323 map<string, RGWMetadataHandler *>::iterator iter;
324
325 for (iter = handlers.begin(); iter != handlers.end(); ++iter) {
326 delete iter->second;
327 }
328
329 handlers.clear();
330 }
331
332 const std::string RGWMetadataLogHistory::oid = "meta.history";
333
334 namespace {
335
336 int read_history(RGWRados *store, RGWMetadataLogHistory *state,
337 RGWObjVersionTracker *objv_tracker)
338 {
339 RGWObjectCtx ctx{store};
340 auto& pool = store->get_zone_params().log_pool;
341 const auto& oid = RGWMetadataLogHistory::oid;
342 bufferlist bl;
343 int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr);
344 if (ret < 0) {
345 return ret;
346 }
347 try {
348 auto p = bl.begin();
349 state->decode(p);
350 } catch (buffer::error& e) {
351 ldout(store->ctx(), 1) << "failed to decode the mdlog history: "
352 << e.what() << dendl;
353 return -EIO;
354 }
355 return 0;
356 }
357
358 int write_history(RGWRados *store, const RGWMetadataLogHistory& state,
359 RGWObjVersionTracker *objv_tracker, bool exclusive = false)
360 {
361 bufferlist bl;
362 state.encode(bl);
363
364 auto& pool = store->get_zone_params().log_pool;
365 const auto& oid = RGWMetadataLogHistory::oid;
366 return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(),
367 exclusive, objv_tracker, real_time{});
368 }
369
370 using Cursor = RGWPeriodHistory::Cursor;
371
372 /// read the mdlog history and use it to initialize the given cursor
373 class ReadHistoryCR : public RGWCoroutine {
374 RGWRados *store;
375 Cursor *cursor;
376 RGWObjVersionTracker *objv_tracker;
377 RGWMetadataLogHistory state;
378 public:
379 ReadHistoryCR(RGWRados *store, Cursor *cursor,
380 RGWObjVersionTracker *objv_tracker)
381 : RGWCoroutine(store->ctx()), store(store), cursor(cursor),
382 objv_tracker(objv_tracker)
383 {}
384
385 int operate() {
386 reenter(this) {
387 yield {
388 rgw_raw_obj obj{store->get_zone_params().log_pool,
389 RGWMetadataLogHistory::oid};
390 constexpr bool empty_on_enoent = false;
391
392 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
393 call(new ReadCR(store->get_async_rados(), store, obj,
394 &state, empty_on_enoent, objv_tracker));
395 }
396 if (retcode < 0) {
397 ldout(cct, 1) << "failed to read mdlog history: "
398 << cpp_strerror(retcode) << dendl;
399 return set_cr_error(retcode);
400 }
401 *cursor = store->period_history->lookup(state.oldest_realm_epoch);
402 if (!*cursor) {
403 return set_cr_error(cursor->get_error());
404 }
405
406 ldout(cct, 10) << "read mdlog history with oldest period id="
407 << state.oldest_period_id << " realm_epoch="
408 << state.oldest_realm_epoch << dendl;
409 return set_cr_done();
410 }
411 return 0;
412 }
413 };
414
415 /// write the given cursor to the mdlog history
416 class WriteHistoryCR : public RGWCoroutine {
417 RGWRados *store;
418 Cursor cursor;
419 RGWObjVersionTracker *objv;
420 RGWMetadataLogHistory state;
421 public:
422 WriteHistoryCR(RGWRados *store, const Cursor& cursor,
423 RGWObjVersionTracker *objv)
424 : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv)
425 {}
426
427 int operate() {
428 reenter(this) {
429 state.oldest_period_id = cursor.get_period().get_id();
430 state.oldest_realm_epoch = cursor.get_epoch();
431
432 yield {
433 rgw_raw_obj obj{store->get_zone_params().log_pool,
434 RGWMetadataLogHistory::oid};
435
436 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
437 call(new WriteCR(store->get_async_rados(), store, obj, state, objv));
438 }
439 if (retcode < 0) {
440 ldout(cct, 1) << "failed to write mdlog history: "
441 << cpp_strerror(retcode) << dendl;
442 return set_cr_error(retcode);
443 }
444
445 ldout(cct, 10) << "wrote mdlog history with oldest period id="
446 << state.oldest_period_id << " realm_epoch="
447 << state.oldest_realm_epoch << dendl;
448 return set_cr_done();
449 }
450 return 0;
451 }
452 };
453
454 /// update the mdlog history to reflect trimmed logs
455 class TrimHistoryCR : public RGWCoroutine {
456 RGWRados *store;
457 const Cursor cursor; //< cursor to trimmed period
458 RGWObjVersionTracker *objv; //< to prevent racing updates
459 Cursor next; //< target cursor for oldest log period
460 Cursor existing; //< existing cursor read from disk
461
462 public:
463 TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv)
464 : RGWCoroutine(store->ctx()),
465 store(store), cursor(cursor), objv(objv), next(cursor)
466 {
467 next.next(); // advance past cursor
468 }
469
470 int operate() {
471 reenter(this) {
472 // read an existing history, and write the new history if it's newer
473 yield call(new ReadHistoryCR(store, &existing, objv));
474 if (retcode < 0) {
475 return set_cr_error(retcode);
476 }
477 // reject older trims with ECANCELED
478 if (cursor.get_epoch() < existing.get_epoch()) {
479 ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
480 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
481 return set_cr_error(-ECANCELED);
482 }
483 // overwrite with updated history
484 yield call(new WriteHistoryCR(store, next, objv));
485 if (retcode < 0) {
486 return set_cr_error(retcode);
487 }
488 return set_cr_done();
489 }
490 return 0;
491 }
492 };
493
494 // traverse all the way back to the beginning of the period history, and
495 // return a cursor to the first period in a fully attached history
496 Cursor find_oldest_period(RGWRados *store)
497 {
498 auto cct = store->ctx();
499 auto cursor = store->period_history->get_current();
500
501 while (cursor) {
502 // advance to the period's predecessor
503 if (!cursor.has_prev()) {
504 auto& predecessor = cursor.get_period().get_predecessor();
505 if (predecessor.empty()) {
506 // this is the first period, so our logs must start here
507 ldout(cct, 10) << "find_oldest_period returning first "
508 "period " << cursor.get_period().get_id() << dendl;
509 return cursor;
510 }
511 // pull the predecessor and add it to our history
512 RGWPeriod period;
513 int r = store->period_puller->pull(predecessor, period);
514 if (r < 0) {
515 return Cursor{r};
516 }
517 auto prev = store->period_history->insert(std::move(period));
518 if (!prev) {
519 return prev;
520 }
521 ldout(cct, 20) << "find_oldest_period advancing to "
522 "predecessor period " << predecessor << dendl;
523 assert(cursor.has_prev());
524 }
525 cursor.prev();
526 }
527 ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
528 return cursor;
529 }
530
531 } // anonymous namespace
532
533 Cursor RGWMetadataManager::init_oldest_log_period()
534 {
535 // read the mdlog history
536 RGWMetadataLogHistory state;
537 RGWObjVersionTracker objv;
538 int ret = read_history(store, &state, &objv);
539
540 if (ret == -ENOENT) {
541 // initialize the mdlog history and write it
542 ldout(cct, 10) << "initializing mdlog history" << dendl;
543 auto cursor = find_oldest_period(store);
544 if (!cursor) {
545 return cursor;
546 }
547
548 // write the initial history
549 state.oldest_realm_epoch = cursor.get_epoch();
550 state.oldest_period_id = cursor.get_period().get_id();
551
552 constexpr bool exclusive = true; // don't overwrite
553 int ret = write_history(store, state, &objv, exclusive);
554 if (ret < 0 && ret != -EEXIST) {
555 ldout(cct, 1) << "failed to write mdlog history: "
556 << cpp_strerror(ret) << dendl;
557 return Cursor{ret};
558 }
559 return cursor;
560 } else if (ret < 0) {
561 ldout(cct, 1) << "failed to read mdlog history: "
562 << cpp_strerror(ret) << dendl;
563 return Cursor{ret};
564 }
565
566 // if it's already in the history, return it
567 auto cursor = store->period_history->lookup(state.oldest_realm_epoch);
568 if (cursor) {
569 return cursor;
570 }
571 // pull the oldest period by id
572 RGWPeriod period;
573 ret = store->period_puller->pull(state.oldest_period_id, period);
574 if (ret < 0) {
575 ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
576 << " for mdlog history: " << cpp_strerror(ret) << dendl;
577 return Cursor{ret};
578 }
579 // verify its realm_epoch
580 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
581 ldout(cct, 1) << "inconsistent mdlog history: read period id="
582 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
583 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
584 return Cursor{-EINVAL};
585 }
586 // attach the period to our history
587 return store->period_history->attach(std::move(period));
588 }
589
590 Cursor RGWMetadataManager::read_oldest_log_period() const
591 {
592 RGWMetadataLogHistory state;
593 int ret = read_history(store, &state, nullptr);
594 if (ret < 0) {
595 ldout(store->ctx(), 1) << "failed to read mdlog history: "
596 << cpp_strerror(ret) << dendl;
597 return Cursor{ret};
598 }
599
600 ldout(store->ctx(), 10) << "read mdlog history with oldest period id="
601 << state.oldest_period_id << " realm_epoch="
602 << state.oldest_realm_epoch << dendl;
603
604 return store->period_history->lookup(state.oldest_realm_epoch);
605 }
606
607 RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period,
608 RGWObjVersionTracker *objv) const
609 {
610 return new ReadHistoryCR(store, period, objv);
611 }
612
613 RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period,
614 RGWObjVersionTracker *objv) const
615 {
616 return new TrimHistoryCR(store, period, objv);
617 }
618
619 int RGWMetadataManager::init(const std::string& current_period)
620 {
621 // open a log for the current period
622 current_log = get_log(current_period);
623 return 0;
624 }
625
626 RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period)
627 {
628 // construct the period's log in place if it doesn't exist
629 auto insert = md_logs.emplace(std::piecewise_construct,
630 std::forward_as_tuple(period),
631 std::forward_as_tuple(cct, store, period));
632 return &insert.first->second;
633 }
634
635 int RGWMetadataManager::register_handler(RGWMetadataHandler *handler)
636 {
637 string type = handler->get_type();
638
639 if (handlers.find(type) != handlers.end())
640 return -EINVAL;
641
642 handlers[type] = handler;
643
644 return 0;
645 }
646
647 RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type)
648 {
649 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
650 if (iter == handlers.end())
651 return NULL;
652
653 return iter->second;
654 }
655
656 void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry)
657 {
658 auto pos = metadata_key.find(':');
659 if (pos == string::npos) {
660 type = metadata_key;
661 } else {
662 type = metadata_key.substr(0, pos);
663 entry = metadata_key.substr(pos + 1);
664 }
665 }
666
667 int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry)
668 {
669 string type;
670
671 parse_metadata_key(metadata_key, type, entry);
672
673 if (type.empty()) {
674 *handler = &md_top_handler;
675 return 0;
676 }
677
678 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
679 if (iter == handlers.end())
680 return -ENOENT;
681
682 *handler = iter->second;
683
684 return 0;
685
686 }
687
688 int RGWMetadataManager::get(string& metadata_key, Formatter *f)
689 {
690 RGWMetadataHandler *handler;
691 string entry;
692 int ret = find_handler(metadata_key, &handler, entry);
693 if (ret < 0) {
694 return ret;
695 }
696
697 RGWMetadataObject *obj;
698
699 ret = handler->get(store, entry, &obj);
700 if (ret < 0) {
701 return ret;
702 }
703
704 f->open_object_section("metadata_info");
705 encode_json("key", metadata_key, f);
706 encode_json("ver", obj->get_version(), f);
707 real_time mtime = obj->get_mtime();
708 if (!real_clock::is_zero(mtime)) {
709 utime_t ut(mtime);
710 encode_json("mtime", ut, f);
711 }
712 encode_json("data", *obj, f);
713 f->close_section();
714
715 delete obj;
716
717 return 0;
718 }
719
720 int RGWMetadataManager::put(string& metadata_key, bufferlist& bl,
721 RGWMetadataHandler::sync_type_t sync_type,
722 obj_version *existing_version)
723 {
724 RGWMetadataHandler *handler;
725 string entry;
726
727 int ret = find_handler(metadata_key, &handler, entry);
728 if (ret < 0)
729 return ret;
730
731 JSONParser parser;
732 if (!parser.parse(bl.c_str(), bl.length())) {
733 return -EINVAL;
734 }
735
736 RGWObjVersionTracker objv_tracker;
737
738 obj_version *objv = &objv_tracker.write_version;
739
740 utime_t mtime;
741
742 try {
743 JSONDecoder::decode_json("key", metadata_key, &parser);
744 JSONDecoder::decode_json("ver", *objv, &parser);
745 JSONDecoder::decode_json("mtime", mtime, &parser);
746 } catch (JSONDecoder::err& e) {
747 return -EINVAL;
748 }
749
750 JSONObj *jo = parser.find_obj("data");
751 if (!jo) {
752 return -EINVAL;
753 }
754
755 ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type);
756 if (existing_version) {
757 *existing_version = objv_tracker.read_version;
758 }
759 return ret;
760 }
761
762 int RGWMetadataManager::remove(string& metadata_key)
763 {
764 RGWMetadataHandler *handler;
765 string entry;
766
767 int ret = find_handler(metadata_key, &handler, entry);
768 if (ret < 0)
769 return ret;
770
771 RGWMetadataObject *obj;
772
773 ret = handler->get(store, entry, &obj);
774 if (ret < 0) {
775 return ret;
776 }
777
778 RGWObjVersionTracker objv_tracker;
779
780 objv_tracker.read_version = obj->get_version();
781
782 delete obj;
783
784 return handler->remove(store, entry, objv_tracker);
785 }
786
787 int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) {
788 RGWMetadataHandler *handler;
789 string entry;
790 string zone_id;
791
792 int ret = find_handler(metadata_key, &handler, entry);
793 if (ret < 0)
794 return ret;
795
796 rgw_pool pool;
797 string oid;
798
799 handler->get_pool_and_oid(store, entry, pool, oid);
800
801 return store->lock_exclusive(pool, oid, duration, zone_id, owner_id);
802 }
803
804 int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) {
805 librados::IoCtx io_ctx;
806 RGWMetadataHandler *handler;
807 string entry;
808 string zone_id;
809
810 int ret = find_handler(metadata_key, &handler, entry);
811 if (ret < 0)
812 return ret;
813
814 rgw_pool pool;
815 string oid;
816
817 handler->get_pool_and_oid(store, entry, pool, oid);
818
819 return store->unlock(pool, oid, zone_id, owner_id);
820 }
821
822 struct list_keys_handle {
823 void *handle;
824 RGWMetadataHandler *handler;
825 };
826
827
828 int RGWMetadataManager::list_keys_init(string& section, void **handle)
829 {
830 string entry;
831 RGWMetadataHandler *handler;
832
833 int ret;
834
835 ret = find_handler(section, &handler, entry);
836 if (ret < 0) {
837 return -ENOENT;
838 }
839
840 list_keys_handle *h = new list_keys_handle;
841 h->handler = handler;
842 ret = handler->list_keys_init(store, &h->handle);
843 if (ret < 0) {
844 delete h;
845 return ret;
846 }
847
848 *handle = (void *)h;
849
850 return 0;
851 }
852
853 int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated)
854 {
855 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
856
857 RGWMetadataHandler *handler = h->handler;
858
859 return handler->list_keys_next(h->handle, max, keys, truncated);
860 }
861
862
863 void RGWMetadataManager::list_keys_complete(void *handle)
864 {
865 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
866
867 RGWMetadataHandler *handler = h->handler;
868
869 handler->list_keys_complete(h->handle);
870 delete h;
871 }
872
873 void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f)
874 {
875 f->open_object_section("entry");
876 f->dump_string("id", entry.id);
877 f->dump_string("section", entry.section);
878 f->dump_string("name", entry.name);
879 entry.timestamp.gmtime_nsec(f->dump_stream("timestamp"));
880
881 try {
882 RGWMetadataLogData log_data;
883 bufferlist::iterator iter = entry.data.begin();
884 ::decode(log_data, iter);
885
886 encode_json("data", log_data, f);
887 } catch (buffer::error& err) {
888 lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl;
889 }
890 f->close_section();
891 }
892
893 void RGWMetadataManager::get_sections(list<string>& sections)
894 {
895 for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) {
896 sections.push_back(iter->first);
897 }
898 }
899
900 int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
901 RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
902 RGWMDLogStatus op_type)
903 {
904 section = handler->get_type();
905
906 /* if write version has not been set, and there's a read version, set it so that we can
907 * log it
908 */
909 if (objv_tracker) {
910 if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
911 objv_tracker->write_version = objv_tracker->read_version;
912 objv_tracker->write_version.ver++;
913 }
914 log_data.read_version = objv_tracker->read_version;
915 log_data.write_version = objv_tracker->write_version;
916 }
917
918 log_data.status = op_type;
919
920 bufferlist logbl;
921 ::encode(log_data, logbl);
922
923 assert(current_log); // must have called init()
924 int ret = current_log->add_entry(handler, section, key, logbl);
925 if (ret < 0)
926 return ret;
927
928 return 0;
929 }
930
931 int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
932 RGWObjVersionTracker *objv_tracker, int ret)
933 {
934 if (ret >= 0)
935 log_data.status = MDLOG_STATUS_COMPLETE;
936 else
937 log_data.status = MDLOG_STATUS_ABORT;
938
939 bufferlist logbl;
940 ::encode(log_data, logbl);
941
942 assert(current_log); // must have called init()
943 int r = current_log->add_entry(handler, section, key, logbl);
944 if (ret < 0)
945 return ret;
946
947 if (r < 0)
948 return r;
949
950 return 0;
951 }
952
953 string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv)
954 {
955 char buf[objv.tag.size() + 32];
956 snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver);
957 return string(".meta:") + handler->get_type() + ":" + key + ":" + buf;
958 }
959
960 int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
961 RGWObjVersionTracker *objv_tracker, real_time mtime,
962 map<string, bufferlist> *pattrs)
963 {
964 if (!objv_tracker) {
965 return -EINVAL;
966 }
967
968 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
969
970 if (heap_pool.empty()) {
971 return 0;
972 }
973
974 RGWObjVersionTracker otracker;
975 otracker.write_version = objv_tracker->write_version;
976 string oid = heap_oid(handler, key, objv_tracker->write_version);
977 int ret = rgw_put_system_obj(store, heap_pool, oid,
978 bl.c_str(), bl.length(), false,
979 &otracker, mtime, pattrs);
980 if (ret < 0) {
981 ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << ") returned ret=" << ret << dendl;
982 return ret;
983 }
984
985 return 0;
986 }
987
988 int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker)
989 {
990 if (!objv_tracker) {
991 return -EINVAL;
992 }
993
994 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
995
996 if (heap_pool.empty()) {
997 return 0;
998 }
999
1000 string oid = heap_oid(handler, key, objv_tracker->write_version);
1001 rgw_raw_obj obj(heap_pool, oid);
1002 int ret = store->delete_system_obj(obj);
1003 if (ret < 0) {
1004 ldout(store->ctx(), 0) << "ERROR: store->delete_system_obj()=" << oid << ") returned ret=" << ret << dendl;
1005 return ret;
1006 }
1007
1008 return 0;
1009 }
1010
1011 int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
1012 RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs)
1013 {
1014 string section;
1015 RGWMetadataLogData log_data;
1016 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
1017 if (ret < 0)
1018 return ret;
1019
1020 string oid;
1021 rgw_pool pool;
1022
1023 handler->get_pool_and_oid(store, key, pool, oid);
1024
1025 ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs);
1026 if (ret < 0) {
1027 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl;
1028 goto done;
1029 }
1030
1031 ret = rgw_put_system_obj(store, pool, oid,
1032 bl.c_str(), bl.length(), exclusive,
1033 objv_tracker, mtime, pattrs);
1034
1035 if (ret < 0) {
1036 int r = remove_from_heap(handler, key, objv_tracker);
1037 if (r < 0) {
1038 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl;
1039 }
1040 }
1041 done:
1042 /* cascading ret into post_modify() */
1043
1044 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1045 if (ret < 0)
1046 return ret;
1047
1048 return 0;
1049 }
1050
1051 int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler, string& key, RGWObjVersionTracker *objv_tracker)
1052 {
1053 string section;
1054 RGWMetadataLogData log_data;
1055 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE);
1056 if (ret < 0)
1057 return ret;
1058
1059 string oid;
1060 rgw_pool pool;
1061
1062 handler->get_pool_and_oid(store, key, pool, oid);
1063
1064 rgw_raw_obj obj(pool, oid);
1065
1066 ret = store->delete_system_obj(obj, objv_tracker);
1067 /* cascading ret into post_modify() */
1068
1069 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1070 if (ret < 0)
1071 return ret;
1072
1073 return 0;
1074 }
1075
1076 int RGWMetadataManager::get_log_shard_id(const string& section,
1077 const string& key, int *shard_id)
1078 {
1079 RGWMetadataHandler *handler = get_handler(section);
1080 if (!handler) {
1081 return -EINVAL;
1082 }
1083 string hash_key;
1084 handler->get_hash_key(section, key, hash_key);
1085 *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards);
1086 return 0;
1087 }