]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_metadata.cc
update sources to v12.1.1
[ceph.git] / ceph / src / rgw / rgw_metadata.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <boost/intrusive_ptr.hpp>
5#include "common/ceph_json.h"
6#include "common/errno.h"
7#include "rgw_metadata.h"
8#include "rgw_coroutine.h"
9#include "cls/version/cls_version_types.h"
10
11#include "rgw_rados.h"
12#include "rgw_tools.h"
13
14#include "rgw_cr_rados.h"
7c673cae
FG
15
16#include "include/assert.h"
31f18b77 17#include <boost/asio/yield.hpp>
7c673cae
FG
18
19#define dout_subsys ceph_subsys_rgw
20
21void LogStatusDump::dump(Formatter *f) const {
22 string s;
23 switch (status) {
24 case MDLOG_STATUS_WRITE:
25 s = "write";
26 break;
27 case MDLOG_STATUS_SETATTRS:
28 s = "set_attrs";
29 break;
30 case MDLOG_STATUS_REMOVE:
31 s = "remove";
32 break;
33 case MDLOG_STATUS_COMPLETE:
34 s = "complete";
35 break;
36 case MDLOG_STATUS_ABORT:
37 s = "abort";
38 break;
39 default:
40 s = "unknown";
41 break;
42 }
43 encode_json("status", s, f);
44}
45
46void RGWMetadataLogData::encode(bufferlist& bl) const {
47 ENCODE_START(1, 1, bl);
48 ::encode(read_version, bl);
49 ::encode(write_version, bl);
50 uint32_t s = (uint32_t)status;
51 ::encode(s, bl);
52 ENCODE_FINISH(bl);
53}
54
55void RGWMetadataLogData::decode(bufferlist::iterator& bl) {
56 DECODE_START(1, bl);
57 ::decode(read_version, bl);
58 ::decode(write_version, bl);
59 uint32_t s;
60 ::decode(s, bl);
61 status = (RGWMDLogStatus)s;
62 DECODE_FINISH(bl);
63}
64
65void RGWMetadataLogData::dump(Formatter *f) const {
66 encode_json("read_version", read_version, f);
67 encode_json("write_version", write_version, f);
68 encode_json("status", LogStatusDump(status), f);
69}
70
71void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) {
72 string s;
73 JSONDecoder::decode_json("status", s, obj);
74 if (s == "complete") {
75 status = MDLOG_STATUS_COMPLETE;
76 } else if (s == "write") {
77 status = MDLOG_STATUS_WRITE;
78 } else if (s == "remove") {
79 status = MDLOG_STATUS_REMOVE;
80 } else if (s == "set_attrs") {
81 status = MDLOG_STATUS_SETATTRS;
82 } else if (s == "abort") {
83 status = MDLOG_STATUS_ABORT;
84 } else {
85 status = MDLOG_STATUS_UNKNOWN;
86 }
87}
88
89void RGWMetadataLogData::decode_json(JSONObj *obj) {
90 JSONDecoder::decode_json("read_version", read_version, obj);
91 JSONDecoder::decode_json("write_version", write_version, obj);
92 JSONDecoder::decode_json("status", status, obj);
93}
94
95
96int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) {
97 if (!store->need_to_log_metadata())
98 return 0;
99
100 string oid;
101
102 string hash_key;
103 handler->get_hash_key(section, key, hash_key);
104
105 int shard_id;
106 store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id);
107 mark_modified(shard_id);
108 real_time now = real_clock::now();
109 return store->time_log_add(oid, now, section, key, bl);
110}
111
112int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion)
113{
114 string oid;
115
116 mark_modified(shard_id);
117 store->shard_name(prefix, shard_id, oid);
118 return store->time_log_add(oid, entries, completion, false);
119}
120
121void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time,
122 string& marker, void **handle)
123{
124 LogListCtx *ctx = new LogListCtx();
125
126 ctx->cur_shard = shard_id;
127 ctx->from_time = from_time;
128 ctx->end_time = end_time;
129 ctx->marker = marker;
130
131 get_shard_oid(ctx->cur_shard, ctx->cur_oid);
132
133 *handle = (void *)ctx;
134}
135
136void RGWMetadataLog::complete_list_entries(void *handle) {
137 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
138 delete ctx;
139}
140
141int RGWMetadataLog::list_entries(void *handle,
142 int max_entries,
143 list<cls_log_entry>& entries,
144 string *last_marker,
145 bool *truncated) {
146 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
147
148 if (!max_entries) {
149 *truncated = false;
150 return 0;
151 }
152
224ce89b 153 std::string next_marker;
7c673cae
FG
154 int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time,
155 max_entries, entries, ctx->marker,
224ce89b 156 &next_marker, truncated);
7c673cae
FG
157 if ((ret < 0) && (ret != -ENOENT))
158 return ret;
159
224ce89b
WB
160 ctx->marker = std::move(next_marker);
161 if (last_marker) {
162 *last_marker = ctx->marker;
163 }
164
7c673cae
FG
165 if (ret == -ENOENT)
166 *truncated = false;
167
168 return 0;
169}
170
171int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info)
172{
173 string oid;
174 get_shard_oid(shard_id, oid);
175
176 cls_log_header header;
177
178 int ret = store->time_log_info(oid, &header);
179 if ((ret < 0) && (ret != -ENOENT))
180 return ret;
181
182 info->marker = header.max_marker;
183 info->last_update = header.max_time.to_real_time();
184
185 return 0;
186}
187
188static void _mdlog_info_completion(librados::completion_t cb, void *arg)
189{
190 auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg);
191 infoc->finish(cb);
192 infoc->put(); // drop the ref from get_info_async()
193}
194
195RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb)
196 : completion(librados::Rados::aio_create_completion((void *)this, nullptr,
197 _mdlog_info_completion)),
198 callback(cb)
199{
200}
201
202RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion()
203{
204 completion->release();
205}
206
207int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion)
208{
209 string oid;
210 get_shard_oid(shard_id, oid);
211
212 completion->get(); // hold a ref until the completion fires
213
214 return store->time_log_info_async(completion->get_io_ctx(), oid,
215 &completion->get_header(),
216 completion->get_completion());
217}
218
219int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time,
220 const string& start_marker, const string& end_marker)
221{
222 string oid;
223 get_shard_oid(shard_id, oid);
224
225 int ret;
226
227 ret = store->time_log_trim(oid, from_time, end_time, start_marker, end_marker);
228
31f18b77 229 if (ret == -ENOENT || ret == -ENODATA)
7c673cae
FG
230 ret = 0;
231
232 return ret;
233}
234
235int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) {
236 string oid;
237 get_shard_oid(shard_id, oid);
238
239 return store->lock_exclusive(store->get_zone_params().log_pool, oid, duration, zone_id, owner_id);
240}
241
242int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) {
243 string oid;
244 get_shard_oid(shard_id, oid);
245
246 return store->unlock(store->get_zone_params().log_pool, oid, zone_id, owner_id);
247}
248
249void RGWMetadataLog::mark_modified(int shard_id)
250{
251 lock.get_read();
252 if (modified_shards.find(shard_id) != modified_shards.end()) {
253 lock.unlock();
254 return;
255 }
256 lock.unlock();
257
258 RWLock::WLocker wl(lock);
259 modified_shards.insert(shard_id);
260}
261
262void RGWMetadataLog::read_clear_modified(set<int> &modified)
263{
264 RWLock::WLocker wl(lock);
265 modified.swap(modified_shards);
266 modified_shards.clear();
267}
268
269obj_version& RGWMetadataObject::get_version()
270{
271 return objv;
272}
273
274class RGWMetadataTopHandler : public RGWMetadataHandler {
275 struct iter_data {
276 list<string> sections;
277 list<string>::iterator iter;
278 };
279
280public:
281 RGWMetadataTopHandler() {}
282
283 string get_type() override { return string(); }
284
285 int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; }
286 int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
287 real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; }
288
289 virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {}
290
291 int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; }
292
293 int list_keys_init(RGWRados *store, void **phandle) override {
294 iter_data *data = new iter_data;
295 store->meta_mgr->get_sections(data->sections);
296 data->iter = data->sections.begin();
297
298 *phandle = data;
299
300 return 0;
301 }
302 int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
303 iter_data *data = static_cast<iter_data *>(handle);
304 for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) {
305 keys.push_back(*data->iter);
306 }
307
308 *truncated = (data->iter != data->sections.end());
309
310 return 0;
311 }
312 void list_keys_complete(void *handle) override {
313 iter_data *data = static_cast<iter_data *>(handle);
314
315 delete data;
316 }
317};
318
319static RGWMetadataTopHandler md_top_handler;
320
321
322RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store)
323 : cct(_cct), store(_store)
324{
325}
326
327RGWMetadataManager::~RGWMetadataManager()
328{
329 map<string, RGWMetadataHandler *>::iterator iter;
330
331 for (iter = handlers.begin(); iter != handlers.end(); ++iter) {
332 delete iter->second;
333 }
334
335 handlers.clear();
336}
337
338const std::string RGWMetadataLogHistory::oid = "meta.history";
339
340namespace {
341
342int read_history(RGWRados *store, RGWMetadataLogHistory *state,
343 RGWObjVersionTracker *objv_tracker)
344{
345 RGWObjectCtx ctx{store};
346 auto& pool = store->get_zone_params().log_pool;
347 const auto& oid = RGWMetadataLogHistory::oid;
348 bufferlist bl;
349 int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr);
350 if (ret < 0) {
351 return ret;
352 }
353 try {
354 auto p = bl.begin();
355 state->decode(p);
356 } catch (buffer::error& e) {
357 ldout(store->ctx(), 1) << "failed to decode the mdlog history: "
358 << e.what() << dendl;
359 return -EIO;
360 }
361 return 0;
362}
363
364int write_history(RGWRados *store, const RGWMetadataLogHistory& state,
365 RGWObjVersionTracker *objv_tracker, bool exclusive = false)
366{
367 bufferlist bl;
368 state.encode(bl);
369
370 auto& pool = store->get_zone_params().log_pool;
371 const auto& oid = RGWMetadataLogHistory::oid;
372 return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(),
373 exclusive, objv_tracker, real_time{});
374}
375
376using Cursor = RGWPeriodHistory::Cursor;
377
378/// read the mdlog history and use it to initialize the given cursor
379class ReadHistoryCR : public RGWCoroutine {
380 RGWRados *store;
381 Cursor *cursor;
382 RGWObjVersionTracker *objv_tracker;
383 RGWMetadataLogHistory state;
384 public:
385 ReadHistoryCR(RGWRados *store, Cursor *cursor,
386 RGWObjVersionTracker *objv_tracker)
387 : RGWCoroutine(store->ctx()), store(store), cursor(cursor),
388 objv_tracker(objv_tracker)
389 {}
390
391 int operate() {
392 reenter(this) {
393 yield {
394 rgw_raw_obj obj{store->get_zone_params().log_pool,
395 RGWMetadataLogHistory::oid};
396 constexpr bool empty_on_enoent = false;
397
398 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
399 call(new ReadCR(store->get_async_rados(), store, obj,
400 &state, empty_on_enoent, objv_tracker));
401 }
402 if (retcode < 0) {
403 ldout(cct, 1) << "failed to read mdlog history: "
404 << cpp_strerror(retcode) << dendl;
405 return set_cr_error(retcode);
406 }
407 *cursor = store->period_history->lookup(state.oldest_realm_epoch);
408 if (!*cursor) {
409 return set_cr_error(cursor->get_error());
410 }
411
412 ldout(cct, 10) << "read mdlog history with oldest period id="
413 << state.oldest_period_id << " realm_epoch="
414 << state.oldest_realm_epoch << dendl;
415 return set_cr_done();
416 }
417 return 0;
418 }
419};
420
421/// write the given cursor to the mdlog history
422class WriteHistoryCR : public RGWCoroutine {
423 RGWRados *store;
424 Cursor cursor;
425 RGWObjVersionTracker *objv;
426 RGWMetadataLogHistory state;
427 public:
428 WriteHistoryCR(RGWRados *store, const Cursor& cursor,
429 RGWObjVersionTracker *objv)
430 : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv)
431 {}
432
433 int operate() {
434 reenter(this) {
435 state.oldest_period_id = cursor.get_period().get_id();
436 state.oldest_realm_epoch = cursor.get_epoch();
437
438 yield {
439 rgw_raw_obj obj{store->get_zone_params().log_pool,
440 RGWMetadataLogHistory::oid};
441
442 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
443 call(new WriteCR(store->get_async_rados(), store, obj, state, objv));
444 }
445 if (retcode < 0) {
446 ldout(cct, 1) << "failed to write mdlog history: "
447 << cpp_strerror(retcode) << dendl;
448 return set_cr_error(retcode);
449 }
450
451 ldout(cct, 10) << "wrote mdlog history with oldest period id="
452 << state.oldest_period_id << " realm_epoch="
453 << state.oldest_realm_epoch << dendl;
454 return set_cr_done();
455 }
456 return 0;
457 }
458};
459
460/// update the mdlog history to reflect trimmed logs
461class TrimHistoryCR : public RGWCoroutine {
462 RGWRados *store;
463 const Cursor cursor; //< cursor to trimmed period
464 RGWObjVersionTracker *objv; //< to prevent racing updates
465 Cursor next; //< target cursor for oldest log period
466 Cursor existing; //< existing cursor read from disk
467
468 public:
469 TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv)
470 : RGWCoroutine(store->ctx()),
471 store(store), cursor(cursor), objv(objv), next(cursor)
472 {
473 next.next(); // advance past cursor
474 }
475
476 int operate() {
477 reenter(this) {
478 // read an existing history, and write the new history if it's newer
479 yield call(new ReadHistoryCR(store, &existing, objv));
480 if (retcode < 0) {
481 return set_cr_error(retcode);
482 }
483 // reject older trims with ECANCELED
484 if (cursor.get_epoch() < existing.get_epoch()) {
485 ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
486 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
487 return set_cr_error(-ECANCELED);
488 }
489 // overwrite with updated history
490 yield call(new WriteHistoryCR(store, next, objv));
491 if (retcode < 0) {
492 return set_cr_error(retcode);
493 }
494 return set_cr_done();
495 }
496 return 0;
497 }
498};
499
500// traverse all the way back to the beginning of the period history, and
501// return a cursor to the first period in a fully attached history
502Cursor find_oldest_period(RGWRados *store)
503{
504 auto cct = store->ctx();
505 auto cursor = store->period_history->get_current();
506
507 while (cursor) {
508 // advance to the period's predecessor
509 if (!cursor.has_prev()) {
510 auto& predecessor = cursor.get_period().get_predecessor();
511 if (predecessor.empty()) {
512 // this is the first period, so our logs must start here
513 ldout(cct, 10) << "find_oldest_period returning first "
514 "period " << cursor.get_period().get_id() << dendl;
515 return cursor;
516 }
517 // pull the predecessor and add it to our history
518 RGWPeriod period;
519 int r = store->period_puller->pull(predecessor, period);
520 if (r < 0) {
521 return Cursor{r};
522 }
523 auto prev = store->period_history->insert(std::move(period));
524 if (!prev) {
525 return prev;
526 }
527 ldout(cct, 20) << "find_oldest_period advancing to "
528 "predecessor period " << predecessor << dendl;
529 assert(cursor.has_prev());
530 }
531 cursor.prev();
532 }
533 ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
534 return cursor;
535}
536
537} // anonymous namespace
538
539Cursor RGWMetadataManager::init_oldest_log_period()
540{
541 // read the mdlog history
542 RGWMetadataLogHistory state;
543 RGWObjVersionTracker objv;
544 int ret = read_history(store, &state, &objv);
545
546 if (ret == -ENOENT) {
547 // initialize the mdlog history and write it
548 ldout(cct, 10) << "initializing mdlog history" << dendl;
549 auto cursor = find_oldest_period(store);
550 if (!cursor) {
551 return cursor;
552 }
553
554 // write the initial history
555 state.oldest_realm_epoch = cursor.get_epoch();
556 state.oldest_period_id = cursor.get_period().get_id();
557
558 constexpr bool exclusive = true; // don't overwrite
559 int ret = write_history(store, state, &objv, exclusive);
560 if (ret < 0 && ret != -EEXIST) {
561 ldout(cct, 1) << "failed to write mdlog history: "
562 << cpp_strerror(ret) << dendl;
563 return Cursor{ret};
564 }
565 return cursor;
566 } else if (ret < 0) {
567 ldout(cct, 1) << "failed to read mdlog history: "
568 << cpp_strerror(ret) << dendl;
569 return Cursor{ret};
570 }
571
572 // if it's already in the history, return it
573 auto cursor = store->period_history->lookup(state.oldest_realm_epoch);
574 if (cursor) {
575 return cursor;
576 }
577 // pull the oldest period by id
578 RGWPeriod period;
579 ret = store->period_puller->pull(state.oldest_period_id, period);
580 if (ret < 0) {
581 ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
582 << " for mdlog history: " << cpp_strerror(ret) << dendl;
583 return Cursor{ret};
584 }
585 // verify its realm_epoch
586 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
587 ldout(cct, 1) << "inconsistent mdlog history: read period id="
588 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
589 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
590 return Cursor{-EINVAL};
591 }
592 // attach the period to our history
593 return store->period_history->attach(std::move(period));
594}
595
596Cursor RGWMetadataManager::read_oldest_log_period() const
597{
598 RGWMetadataLogHistory state;
599 int ret = read_history(store, &state, nullptr);
600 if (ret < 0) {
601 ldout(store->ctx(), 1) << "failed to read mdlog history: "
602 << cpp_strerror(ret) << dendl;
603 return Cursor{ret};
604 }
605
606 ldout(store->ctx(), 10) << "read mdlog history with oldest period id="
607 << state.oldest_period_id << " realm_epoch="
608 << state.oldest_realm_epoch << dendl;
609
610 return store->period_history->lookup(state.oldest_realm_epoch);
611}
612
613RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period,
614 RGWObjVersionTracker *objv) const
615{
616 return new ReadHistoryCR(store, period, objv);
617}
618
619RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period,
620 RGWObjVersionTracker *objv) const
621{
622 return new TrimHistoryCR(store, period, objv);
623}
624
625int RGWMetadataManager::init(const std::string& current_period)
626{
627 // open a log for the current period
628 current_log = get_log(current_period);
629 return 0;
630}
631
632RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period)
633{
634 // construct the period's log in place if it doesn't exist
635 auto insert = md_logs.emplace(std::piecewise_construct,
636 std::forward_as_tuple(period),
637 std::forward_as_tuple(cct, store, period));
638 return &insert.first->second;
639}
640
641int RGWMetadataManager::register_handler(RGWMetadataHandler *handler)
642{
643 string type = handler->get_type();
644
645 if (handlers.find(type) != handlers.end())
646 return -EINVAL;
647
648 handlers[type] = handler;
649
650 return 0;
651}
652
653RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type)
654{
655 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
656 if (iter == handlers.end())
657 return NULL;
658
659 return iter->second;
660}
661
662void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry)
663{
664 auto pos = metadata_key.find(':');
665 if (pos == string::npos) {
666 type = metadata_key;
667 } else {
668 type = metadata_key.substr(0, pos);
669 entry = metadata_key.substr(pos + 1);
670 }
671}
672
673int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry)
674{
675 string type;
676
677 parse_metadata_key(metadata_key, type, entry);
678
679 if (type.empty()) {
680 *handler = &md_top_handler;
681 return 0;
682 }
683
684 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
685 if (iter == handlers.end())
686 return -ENOENT;
687
688 *handler = iter->second;
689
690 return 0;
691
692}
693
694int RGWMetadataManager::get(string& metadata_key, Formatter *f)
695{
696 RGWMetadataHandler *handler;
697 string entry;
698 int ret = find_handler(metadata_key, &handler, entry);
699 if (ret < 0) {
700 return ret;
701 }
702
703 RGWMetadataObject *obj;
704
705 ret = handler->get(store, entry, &obj);
706 if (ret < 0) {
707 return ret;
708 }
709
710 f->open_object_section("metadata_info");
711 encode_json("key", metadata_key, f);
712 encode_json("ver", obj->get_version(), f);
713 real_time mtime = obj->get_mtime();
714 if (!real_clock::is_zero(mtime)) {
715 utime_t ut(mtime);
716 encode_json("mtime", ut, f);
717 }
718 encode_json("data", *obj, f);
719 f->close_section();
720
721 delete obj;
722
723 return 0;
724}
725
726int RGWMetadataManager::put(string& metadata_key, bufferlist& bl,
727 RGWMetadataHandler::sync_type_t sync_type,
728 obj_version *existing_version)
729{
730 RGWMetadataHandler *handler;
731 string entry;
732
733 int ret = find_handler(metadata_key, &handler, entry);
734 if (ret < 0)
735 return ret;
736
737 JSONParser parser;
738 if (!parser.parse(bl.c_str(), bl.length())) {
739 return -EINVAL;
740 }
741
742 RGWObjVersionTracker objv_tracker;
743
744 obj_version *objv = &objv_tracker.write_version;
745
746 utime_t mtime;
747
748 try {
749 JSONDecoder::decode_json("key", metadata_key, &parser);
750 JSONDecoder::decode_json("ver", *objv, &parser);
751 JSONDecoder::decode_json("mtime", mtime, &parser);
752 } catch (JSONDecoder::err& e) {
753 return -EINVAL;
754 }
755
756 JSONObj *jo = parser.find_obj("data");
757 if (!jo) {
758 return -EINVAL;
759 }
760
761 ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type);
762 if (existing_version) {
763 *existing_version = objv_tracker.read_version;
764 }
765 return ret;
766}
767
768int RGWMetadataManager::remove(string& metadata_key)
769{
770 RGWMetadataHandler *handler;
771 string entry;
772
773 int ret = find_handler(metadata_key, &handler, entry);
774 if (ret < 0)
775 return ret;
776
777 RGWMetadataObject *obj;
778
779 ret = handler->get(store, entry, &obj);
780 if (ret < 0) {
781 return ret;
782 }
783
784 RGWObjVersionTracker objv_tracker;
785
786 objv_tracker.read_version = obj->get_version();
787
788 delete obj;
789
790 return handler->remove(store, entry, objv_tracker);
791}
792
793int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) {
794 RGWMetadataHandler *handler;
795 string entry;
796 string zone_id;
797
798 int ret = find_handler(metadata_key, &handler, entry);
799 if (ret < 0)
800 return ret;
801
802 rgw_pool pool;
803 string oid;
804
805 handler->get_pool_and_oid(store, entry, pool, oid);
806
807 return store->lock_exclusive(pool, oid, duration, zone_id, owner_id);
808}
809
810int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) {
811 librados::IoCtx io_ctx;
812 RGWMetadataHandler *handler;
813 string entry;
814 string zone_id;
815
816 int ret = find_handler(metadata_key, &handler, entry);
817 if (ret < 0)
818 return ret;
819
820 rgw_pool pool;
821 string oid;
822
823 handler->get_pool_and_oid(store, entry, pool, oid);
824
825 return store->unlock(pool, oid, zone_id, owner_id);
826}
827
828struct list_keys_handle {
829 void *handle;
830 RGWMetadataHandler *handler;
831};
832
833
834int RGWMetadataManager::list_keys_init(string& section, void **handle)
835{
836 string entry;
837 RGWMetadataHandler *handler;
838
839 int ret;
840
841 ret = find_handler(section, &handler, entry);
842 if (ret < 0) {
843 return -ENOENT;
844 }
845
846 list_keys_handle *h = new list_keys_handle;
847 h->handler = handler;
848 ret = handler->list_keys_init(store, &h->handle);
849 if (ret < 0) {
850 delete h;
851 return ret;
852 }
853
854 *handle = (void *)h;
855
856 return 0;
857}
858
859int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated)
860{
861 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
862
863 RGWMetadataHandler *handler = h->handler;
864
865 return handler->list_keys_next(h->handle, max, keys, truncated);
866}
867
868
869void RGWMetadataManager::list_keys_complete(void *handle)
870{
871 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
872
873 RGWMetadataHandler *handler = h->handler;
874
875 handler->list_keys_complete(h->handle);
876 delete h;
877}
878
879void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f)
880{
881 f->open_object_section("entry");
882 f->dump_string("id", entry.id);
883 f->dump_string("section", entry.section);
884 f->dump_string("name", entry.name);
885 entry.timestamp.gmtime_nsec(f->dump_stream("timestamp"));
886
887 try {
888 RGWMetadataLogData log_data;
889 bufferlist::iterator iter = entry.data.begin();
890 ::decode(log_data, iter);
891
892 encode_json("data", log_data, f);
893 } catch (buffer::error& err) {
894 lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl;
895 }
896 f->close_section();
897}
898
899void RGWMetadataManager::get_sections(list<string>& sections)
900{
901 for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) {
902 sections.push_back(iter->first);
903 }
904}
905
906int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
907 RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
908 RGWMDLogStatus op_type)
909{
910 section = handler->get_type();
911
912 /* if write version has not been set, and there's a read version, set it so that we can
913 * log it
914 */
915 if (objv_tracker) {
916 if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
917 objv_tracker->write_version = objv_tracker->read_version;
918 objv_tracker->write_version.ver++;
919 }
920 log_data.read_version = objv_tracker->read_version;
921 log_data.write_version = objv_tracker->write_version;
922 }
923
924 log_data.status = op_type;
925
926 bufferlist logbl;
927 ::encode(log_data, logbl);
928
929 assert(current_log); // must have called init()
930 int ret = current_log->add_entry(handler, section, key, logbl);
931 if (ret < 0)
932 return ret;
933
934 return 0;
935}
936
937int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
938 RGWObjVersionTracker *objv_tracker, int ret)
939{
940 if (ret >= 0)
941 log_data.status = MDLOG_STATUS_COMPLETE;
942 else
943 log_data.status = MDLOG_STATUS_ABORT;
944
945 bufferlist logbl;
946 ::encode(log_data, logbl);
947
948 assert(current_log); // must have called init()
949 int r = current_log->add_entry(handler, section, key, logbl);
950 if (ret < 0)
951 return ret;
952
953 if (r < 0)
954 return r;
955
956 return 0;
957}
958
959string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv)
960{
961 char buf[objv.tag.size() + 32];
962 snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver);
963 return string(".meta:") + handler->get_type() + ":" + key + ":" + buf;
964}
965
966int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
967 RGWObjVersionTracker *objv_tracker, real_time mtime,
968 map<string, bufferlist> *pattrs)
969{
970 if (!objv_tracker) {
971 return -EINVAL;
972 }
973
974 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
975
976 if (heap_pool.empty()) {
977 return 0;
978 }
979
980 RGWObjVersionTracker otracker;
981 otracker.write_version = objv_tracker->write_version;
982 string oid = heap_oid(handler, key, objv_tracker->write_version);
983 int ret = rgw_put_system_obj(store, heap_pool, oid,
984 bl.c_str(), bl.length(), false,
985 &otracker, mtime, pattrs);
986 if (ret < 0) {
987 ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << ") returned ret=" << ret << dendl;
988 return ret;
989 }
990
991 return 0;
992}
993
994int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker)
995{
996 if (!objv_tracker) {
997 return -EINVAL;
998 }
999
1000 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
1001
1002 if (heap_pool.empty()) {
1003 return 0;
1004 }
1005
1006 string oid = heap_oid(handler, key, objv_tracker->write_version);
1007 rgw_raw_obj obj(heap_pool, oid);
1008 int ret = store->delete_system_obj(obj);
1009 if (ret < 0) {
1010 ldout(store->ctx(), 0) << "ERROR: store->delete_system_obj()=" << oid << ") returned ret=" << ret << dendl;
1011 return ret;
1012 }
1013
1014 return 0;
1015}
1016
1017int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
1018 RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs)
1019{
1020 string section;
1021 RGWMetadataLogData log_data;
1022 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
1023 if (ret < 0)
1024 return ret;
1025
1026 string oid;
1027 rgw_pool pool;
1028
1029 handler->get_pool_and_oid(store, key, pool, oid);
1030
1031 ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs);
1032 if (ret < 0) {
1033 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl;
1034 goto done;
1035 }
1036
1037 ret = rgw_put_system_obj(store, pool, oid,
1038 bl.c_str(), bl.length(), exclusive,
1039 objv_tracker, mtime, pattrs);
1040
1041 if (ret < 0) {
1042 int r = remove_from_heap(handler, key, objv_tracker);
1043 if (r < 0) {
1044 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl;
1045 }
1046 }
1047done:
1048 /* cascading ret into post_modify() */
1049
1050 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1051 if (ret < 0)
1052 return ret;
1053
1054 return 0;
1055}
1056
1057int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler, string& key, RGWObjVersionTracker *objv_tracker)
1058{
1059 string section;
1060 RGWMetadataLogData log_data;
1061 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE);
1062 if (ret < 0)
1063 return ret;
1064
1065 string oid;
1066 rgw_pool pool;
1067
1068 handler->get_pool_and_oid(store, key, pool, oid);
1069
1070 rgw_raw_obj obj(pool, oid);
1071
1072 ret = store->delete_system_obj(obj, objv_tracker);
1073 /* cascading ret into post_modify() */
1074
1075 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1076 if (ret < 0)
1077 return ret;
1078
1079 return 0;
1080}
1081
1082int RGWMetadataManager::get_log_shard_id(const string& section,
1083 const string& key, int *shard_id)
1084{
1085 RGWMetadataHandler *handler = get_handler(section);
1086 if (!handler) {
1087 return -EINVAL;
1088 }
1089 string hash_key;
1090 handler->get_hash_key(section, key, hash_key);
1091 *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards);
1092 return 0;
1093}