]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_metadata.cc
update sources to v12.2.3
[ceph.git] / ceph / src / rgw / rgw_metadata.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <boost/intrusive_ptr.hpp>
5 #include "common/ceph_json.h"
6 #include "common/errno.h"
7 #include "rgw_metadata.h"
8 #include "rgw_coroutine.h"
9 #include "cls/version/cls_version_types.h"
10
11 #include "rgw_rados.h"
12 #include "rgw_tools.h"
13
14 #include "rgw_cr_rados.h"
15
16 #include "include/assert.h"
17 #include <boost/asio/yield.hpp>
18
19 #define dout_subsys ceph_subsys_rgw
20
21 void LogStatusDump::dump(Formatter *f) const {
22 string s;
23 switch (status) {
24 case MDLOG_STATUS_WRITE:
25 s = "write";
26 break;
27 case MDLOG_STATUS_SETATTRS:
28 s = "set_attrs";
29 break;
30 case MDLOG_STATUS_REMOVE:
31 s = "remove";
32 break;
33 case MDLOG_STATUS_COMPLETE:
34 s = "complete";
35 break;
36 case MDLOG_STATUS_ABORT:
37 s = "abort";
38 break;
39 default:
40 s = "unknown";
41 break;
42 }
43 encode_json("status", s, f);
44 }
45
46 void RGWMetadataLogData::encode(bufferlist& bl) const {
47 ENCODE_START(1, 1, bl);
48 ::encode(read_version, bl);
49 ::encode(write_version, bl);
50 uint32_t s = (uint32_t)status;
51 ::encode(s, bl);
52 ENCODE_FINISH(bl);
53 }
54
55 void RGWMetadataLogData::decode(bufferlist::iterator& bl) {
56 DECODE_START(1, bl);
57 ::decode(read_version, bl);
58 ::decode(write_version, bl);
59 uint32_t s;
60 ::decode(s, bl);
61 status = (RGWMDLogStatus)s;
62 DECODE_FINISH(bl);
63 }
64
65 void RGWMetadataLogData::dump(Formatter *f) const {
66 encode_json("read_version", read_version, f);
67 encode_json("write_version", write_version, f);
68 encode_json("status", LogStatusDump(status), f);
69 }
70
71 void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) {
72 string s;
73 JSONDecoder::decode_json("status", s, obj);
74 if (s == "complete") {
75 status = MDLOG_STATUS_COMPLETE;
76 } else if (s == "write") {
77 status = MDLOG_STATUS_WRITE;
78 } else if (s == "remove") {
79 status = MDLOG_STATUS_REMOVE;
80 } else if (s == "set_attrs") {
81 status = MDLOG_STATUS_SETATTRS;
82 } else if (s == "abort") {
83 status = MDLOG_STATUS_ABORT;
84 } else {
85 status = MDLOG_STATUS_UNKNOWN;
86 }
87 }
88
89 void RGWMetadataLogData::decode_json(JSONObj *obj) {
90 JSONDecoder::decode_json("read_version", read_version, obj);
91 JSONDecoder::decode_json("write_version", write_version, obj);
92 JSONDecoder::decode_json("status", status, obj);
93 }
94
95
96 int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) {
97 if (!store->need_to_log_metadata())
98 return 0;
99
100 string oid;
101
102 string hash_key;
103 handler->get_hash_key(section, key, hash_key);
104
105 int shard_id;
106 store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id);
107 mark_modified(shard_id);
108 real_time now = real_clock::now();
109 return store->time_log_add(oid, now, section, key, bl);
110 }
111
112 int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion)
113 {
114 string oid;
115
116 mark_modified(shard_id);
117 store->shard_name(prefix, shard_id, oid);
118 return store->time_log_add(oid, entries, completion, false);
119 }
120
121 void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time,
122 string& marker, void **handle)
123 {
124 LogListCtx *ctx = new LogListCtx();
125
126 ctx->cur_shard = shard_id;
127 ctx->from_time = from_time;
128 ctx->end_time = end_time;
129 ctx->marker = marker;
130
131 get_shard_oid(ctx->cur_shard, ctx->cur_oid);
132
133 *handle = (void *)ctx;
134 }
135
136 void RGWMetadataLog::complete_list_entries(void *handle) {
137 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
138 delete ctx;
139 }
140
141 int RGWMetadataLog::list_entries(void *handle,
142 int max_entries,
143 list<cls_log_entry>& entries,
144 string *last_marker,
145 bool *truncated) {
146 LogListCtx *ctx = static_cast<LogListCtx *>(handle);
147
148 if (!max_entries) {
149 *truncated = false;
150 return 0;
151 }
152
153 std::string next_marker;
154 int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time,
155 max_entries, entries, ctx->marker,
156 &next_marker, truncated);
157 if ((ret < 0) && (ret != -ENOENT))
158 return ret;
159
160 ctx->marker = std::move(next_marker);
161 if (last_marker) {
162 *last_marker = ctx->marker;
163 }
164
165 if (ret == -ENOENT)
166 *truncated = false;
167
168 return 0;
169 }
170
171 int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info)
172 {
173 string oid;
174 get_shard_oid(shard_id, oid);
175
176 cls_log_header header;
177
178 int ret = store->time_log_info(oid, &header);
179 if ((ret < 0) && (ret != -ENOENT))
180 return ret;
181
182 info->marker = header.max_marker;
183 info->last_update = header.max_time.to_real_time();
184
185 return 0;
186 }
187
188 static void _mdlog_info_completion(librados::completion_t cb, void *arg)
189 {
190 auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg);
191 infoc->finish(cb);
192 infoc->put(); // drop the ref from get_info_async()
193 }
194
195 RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb)
196 : completion(librados::Rados::aio_create_completion((void *)this, nullptr,
197 _mdlog_info_completion)),
198 callback(cb)
199 {
200 }
201
202 RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion()
203 {
204 completion->release();
205 }
206
207 int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion)
208 {
209 string oid;
210 get_shard_oid(shard_id, oid);
211
212 completion->get(); // hold a ref until the completion fires
213
214 return store->time_log_info_async(completion->get_io_ctx(), oid,
215 &completion->get_header(),
216 completion->get_completion());
217 }
218
219 int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time,
220 const string& start_marker, const string& end_marker)
221 {
222 string oid;
223 get_shard_oid(shard_id, oid);
224
225 int ret;
226
227 ret = store->time_log_trim(oid, from_time, end_time, start_marker, end_marker);
228
229 if (ret == -ENOENT || ret == -ENODATA)
230 ret = 0;
231
232 return ret;
233 }
234
235 int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) {
236 string oid;
237 get_shard_oid(shard_id, oid);
238
239 return store->lock_exclusive(store->get_zone_params().log_pool, oid, duration, zone_id, owner_id);
240 }
241
242 int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) {
243 string oid;
244 get_shard_oid(shard_id, oid);
245
246 return store->unlock(store->get_zone_params().log_pool, oid, zone_id, owner_id);
247 }
248
249 void RGWMetadataLog::mark_modified(int shard_id)
250 {
251 lock.get_read();
252 if (modified_shards.find(shard_id) != modified_shards.end()) {
253 lock.unlock();
254 return;
255 }
256 lock.unlock();
257
258 RWLock::WLocker wl(lock);
259 modified_shards.insert(shard_id);
260 }
261
262 void RGWMetadataLog::read_clear_modified(set<int> &modified)
263 {
264 RWLock::WLocker wl(lock);
265 modified.swap(modified_shards);
266 modified_shards.clear();
267 }
268
269 obj_version& RGWMetadataObject::get_version()
270 {
271 return objv;
272 }
273
274 class RGWMetadataTopHandler : public RGWMetadataHandler {
275 struct iter_data {
276 set<string> sections;
277 set<string>::iterator iter;
278 };
279
280 public:
281 RGWMetadataTopHandler() {}
282
283 string get_type() override { return string(); }
284
285 int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; }
286 int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
287 real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; }
288
289 virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {}
290
291 int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; }
292
293 int list_keys_init(RGWRados *store, const string& marker, void **phandle) override {
294 iter_data *data = new iter_data;
295 list<string> sections;
296 store->meta_mgr->get_sections(sections);
297 for (auto& s : sections) {
298 data->sections.insert(s);
299 }
300 data->iter = data->sections.lower_bound(marker);
301
302 *phandle = data;
303
304 return 0;
305 }
306 int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override {
307 iter_data *data = static_cast<iter_data *>(handle);
308 for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) {
309 keys.push_back(*data->iter);
310 }
311
312 *truncated = (data->iter != data->sections.end());
313
314 return 0;
315 }
316 void list_keys_complete(void *handle) override {
317 iter_data *data = static_cast<iter_data *>(handle);
318
319 delete data;
320 }
321
322 virtual string get_marker(void *handle) {
323 iter_data *data = static_cast<iter_data *>(handle);
324
325 if (data->iter != data->sections.end()) {
326 return *(data->iter);
327 }
328
329 return string();
330 }
331 };
332
333 static RGWMetadataTopHandler md_top_handler;
334
335
336 RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store)
337 : cct(_cct), store(_store)
338 {
339 }
340
341 RGWMetadataManager::~RGWMetadataManager()
342 {
343 map<string, RGWMetadataHandler *>::iterator iter;
344
345 for (iter = handlers.begin(); iter != handlers.end(); ++iter) {
346 delete iter->second;
347 }
348
349 handlers.clear();
350 }
351
352 const std::string RGWMetadataLogHistory::oid = "meta.history";
353
354 namespace {
355
356 int read_history(RGWRados *store, RGWMetadataLogHistory *state,
357 RGWObjVersionTracker *objv_tracker)
358 {
359 RGWObjectCtx ctx{store};
360 auto& pool = store->get_zone_params().log_pool;
361 const auto& oid = RGWMetadataLogHistory::oid;
362 bufferlist bl;
363 int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr);
364 if (ret < 0) {
365 return ret;
366 }
367 try {
368 auto p = bl.begin();
369 state->decode(p);
370 } catch (buffer::error& e) {
371 ldout(store->ctx(), 1) << "failed to decode the mdlog history: "
372 << e.what() << dendl;
373 return -EIO;
374 }
375 return 0;
376 }
377
378 int write_history(RGWRados *store, const RGWMetadataLogHistory& state,
379 RGWObjVersionTracker *objv_tracker, bool exclusive = false)
380 {
381 bufferlist bl;
382 state.encode(bl);
383
384 auto& pool = store->get_zone_params().log_pool;
385 const auto& oid = RGWMetadataLogHistory::oid;
386 return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(),
387 exclusive, objv_tracker, real_time{});
388 }
389
390 using Cursor = RGWPeriodHistory::Cursor;
391
392 /// read the mdlog history and use it to initialize the given cursor
393 class ReadHistoryCR : public RGWCoroutine {
394 RGWRados *store;
395 Cursor *cursor;
396 RGWObjVersionTracker *objv_tracker;
397 RGWMetadataLogHistory state;
398 public:
399 ReadHistoryCR(RGWRados *store, Cursor *cursor,
400 RGWObjVersionTracker *objv_tracker)
401 : RGWCoroutine(store->ctx()), store(store), cursor(cursor),
402 objv_tracker(objv_tracker)
403 {}
404
405 int operate() {
406 reenter(this) {
407 yield {
408 rgw_raw_obj obj{store->get_zone_params().log_pool,
409 RGWMetadataLogHistory::oid};
410 constexpr bool empty_on_enoent = false;
411
412 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
413 call(new ReadCR(store->get_async_rados(), store, obj,
414 &state, empty_on_enoent, objv_tracker));
415 }
416 if (retcode < 0) {
417 ldout(cct, 1) << "failed to read mdlog history: "
418 << cpp_strerror(retcode) << dendl;
419 return set_cr_error(retcode);
420 }
421 *cursor = store->period_history->lookup(state.oldest_realm_epoch);
422 if (!*cursor) {
423 return set_cr_error(cursor->get_error());
424 }
425
426 ldout(cct, 10) << "read mdlog history with oldest period id="
427 << state.oldest_period_id << " realm_epoch="
428 << state.oldest_realm_epoch << dendl;
429 return set_cr_done();
430 }
431 return 0;
432 }
433 };
434
435 /// write the given cursor to the mdlog history
436 class WriteHistoryCR : public RGWCoroutine {
437 RGWRados *store;
438 Cursor cursor;
439 RGWObjVersionTracker *objv;
440 RGWMetadataLogHistory state;
441 public:
442 WriteHistoryCR(RGWRados *store, const Cursor& cursor,
443 RGWObjVersionTracker *objv)
444 : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv)
445 {}
446
447 int operate() {
448 reenter(this) {
449 state.oldest_period_id = cursor.get_period().get_id();
450 state.oldest_realm_epoch = cursor.get_epoch();
451
452 yield {
453 rgw_raw_obj obj{store->get_zone_params().log_pool,
454 RGWMetadataLogHistory::oid};
455
456 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
457 call(new WriteCR(store->get_async_rados(), store, obj, state, objv));
458 }
459 if (retcode < 0) {
460 ldout(cct, 1) << "failed to write mdlog history: "
461 << cpp_strerror(retcode) << dendl;
462 return set_cr_error(retcode);
463 }
464
465 ldout(cct, 10) << "wrote mdlog history with oldest period id="
466 << state.oldest_period_id << " realm_epoch="
467 << state.oldest_realm_epoch << dendl;
468 return set_cr_done();
469 }
470 return 0;
471 }
472 };
473
474 /// update the mdlog history to reflect trimmed logs
475 class TrimHistoryCR : public RGWCoroutine {
476 RGWRados *store;
477 const Cursor cursor; //< cursor to trimmed period
478 RGWObjVersionTracker *objv; //< to prevent racing updates
479 Cursor next; //< target cursor for oldest log period
480 Cursor existing; //< existing cursor read from disk
481
482 public:
483 TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv)
484 : RGWCoroutine(store->ctx()),
485 store(store), cursor(cursor), objv(objv), next(cursor)
486 {
487 next.next(); // advance past cursor
488 }
489
490 int operate() {
491 reenter(this) {
492 // read an existing history, and write the new history if it's newer
493 yield call(new ReadHistoryCR(store, &existing, objv));
494 if (retcode < 0) {
495 return set_cr_error(retcode);
496 }
497 // reject older trims with ECANCELED
498 if (cursor.get_epoch() < existing.get_epoch()) {
499 ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
500 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
501 return set_cr_error(-ECANCELED);
502 }
503 // overwrite with updated history
504 yield call(new WriteHistoryCR(store, next, objv));
505 if (retcode < 0) {
506 return set_cr_error(retcode);
507 }
508 return set_cr_done();
509 }
510 return 0;
511 }
512 };
513
514 // traverse all the way back to the beginning of the period history, and
515 // return a cursor to the first period in a fully attached history
516 Cursor find_oldest_period(RGWRados *store)
517 {
518 auto cct = store->ctx();
519 auto cursor = store->period_history->get_current();
520
521 while (cursor) {
522 // advance to the period's predecessor
523 if (!cursor.has_prev()) {
524 auto& predecessor = cursor.get_period().get_predecessor();
525 if (predecessor.empty()) {
526 // this is the first period, so our logs must start here
527 ldout(cct, 10) << "find_oldest_period returning first "
528 "period " << cursor.get_period().get_id() << dendl;
529 return cursor;
530 }
531 // pull the predecessor and add it to our history
532 RGWPeriod period;
533 int r = store->period_puller->pull(predecessor, period);
534 if (r < 0) {
535 return Cursor{r};
536 }
537 auto prev = store->period_history->insert(std::move(period));
538 if (!prev) {
539 return prev;
540 }
541 ldout(cct, 20) << "find_oldest_period advancing to "
542 "predecessor period " << predecessor << dendl;
543 assert(cursor.has_prev());
544 }
545 cursor.prev();
546 }
547 ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
548 return cursor;
549 }
550
551 } // anonymous namespace
552
553 Cursor RGWMetadataManager::init_oldest_log_period()
554 {
555 // read the mdlog history
556 RGWMetadataLogHistory state;
557 RGWObjVersionTracker objv;
558 int ret = read_history(store, &state, &objv);
559
560 if (ret == -ENOENT) {
561 // initialize the mdlog history and write it
562 ldout(cct, 10) << "initializing mdlog history" << dendl;
563 auto cursor = find_oldest_period(store);
564 if (!cursor) {
565 return cursor;
566 }
567
568 // write the initial history
569 state.oldest_realm_epoch = cursor.get_epoch();
570 state.oldest_period_id = cursor.get_period().get_id();
571
572 constexpr bool exclusive = true; // don't overwrite
573 int ret = write_history(store, state, &objv, exclusive);
574 if (ret < 0 && ret != -EEXIST) {
575 ldout(cct, 1) << "failed to write mdlog history: "
576 << cpp_strerror(ret) << dendl;
577 return Cursor{ret};
578 }
579 return cursor;
580 } else if (ret < 0) {
581 ldout(cct, 1) << "failed to read mdlog history: "
582 << cpp_strerror(ret) << dendl;
583 return Cursor{ret};
584 }
585
586 // if it's already in the history, return it
587 auto cursor = store->period_history->lookup(state.oldest_realm_epoch);
588 if (cursor) {
589 return cursor;
590 }
591 // pull the oldest period by id
592 RGWPeriod period;
593 ret = store->period_puller->pull(state.oldest_period_id, period);
594 if (ret < 0) {
595 ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
596 << " for mdlog history: " << cpp_strerror(ret) << dendl;
597 return Cursor{ret};
598 }
599 // verify its realm_epoch
600 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
601 ldout(cct, 1) << "inconsistent mdlog history: read period id="
602 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
603 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
604 return Cursor{-EINVAL};
605 }
606 // attach the period to our history
607 return store->period_history->attach(std::move(period));
608 }
609
610 Cursor RGWMetadataManager::read_oldest_log_period() const
611 {
612 RGWMetadataLogHistory state;
613 int ret = read_history(store, &state, nullptr);
614 if (ret < 0) {
615 ldout(store->ctx(), 1) << "failed to read mdlog history: "
616 << cpp_strerror(ret) << dendl;
617 return Cursor{ret};
618 }
619
620 ldout(store->ctx(), 10) << "read mdlog history with oldest period id="
621 << state.oldest_period_id << " realm_epoch="
622 << state.oldest_realm_epoch << dendl;
623
624 return store->period_history->lookup(state.oldest_realm_epoch);
625 }
626
627 RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period,
628 RGWObjVersionTracker *objv) const
629 {
630 return new ReadHistoryCR(store, period, objv);
631 }
632
633 RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period,
634 RGWObjVersionTracker *objv) const
635 {
636 return new TrimHistoryCR(store, period, objv);
637 }
638
639 int RGWMetadataManager::init(const std::string& current_period)
640 {
641 // open a log for the current period
642 current_log = get_log(current_period);
643 return 0;
644 }
645
646 RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period)
647 {
648 // construct the period's log in place if it doesn't exist
649 auto insert = md_logs.emplace(std::piecewise_construct,
650 std::forward_as_tuple(period),
651 std::forward_as_tuple(cct, store, period));
652 return &insert.first->second;
653 }
654
655 int RGWMetadataManager::register_handler(RGWMetadataHandler *handler)
656 {
657 string type = handler->get_type();
658
659 if (handlers.find(type) != handlers.end())
660 return -EINVAL;
661
662 handlers[type] = handler;
663
664 return 0;
665 }
666
667 RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type)
668 {
669 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
670 if (iter == handlers.end())
671 return NULL;
672
673 return iter->second;
674 }
675
676 void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry)
677 {
678 auto pos = metadata_key.find(':');
679 if (pos == string::npos) {
680 type = metadata_key;
681 } else {
682 type = metadata_key.substr(0, pos);
683 entry = metadata_key.substr(pos + 1);
684 }
685 }
686
687 int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry)
688 {
689 string type;
690
691 parse_metadata_key(metadata_key, type, entry);
692
693 if (type.empty()) {
694 *handler = &md_top_handler;
695 return 0;
696 }
697
698 map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type);
699 if (iter == handlers.end())
700 return -ENOENT;
701
702 *handler = iter->second;
703
704 return 0;
705
706 }
707
708 int RGWMetadataManager::get(string& metadata_key, Formatter *f)
709 {
710 RGWMetadataHandler *handler;
711 string entry;
712 int ret = find_handler(metadata_key, &handler, entry);
713 if (ret < 0) {
714 return ret;
715 }
716
717 RGWMetadataObject *obj;
718
719 ret = handler->get(store, entry, &obj);
720 if (ret < 0) {
721 return ret;
722 }
723
724 f->open_object_section("metadata_info");
725 encode_json("key", metadata_key, f);
726 encode_json("ver", obj->get_version(), f);
727 real_time mtime = obj->get_mtime();
728 if (!real_clock::is_zero(mtime)) {
729 utime_t ut(mtime);
730 encode_json("mtime", ut, f);
731 }
732 encode_json("data", *obj, f);
733 f->close_section();
734
735 delete obj;
736
737 return 0;
738 }
739
740 int RGWMetadataManager::put(string& metadata_key, bufferlist& bl,
741 RGWMetadataHandler::sync_type_t sync_type,
742 obj_version *existing_version)
743 {
744 RGWMetadataHandler *handler;
745 string entry;
746
747 int ret = find_handler(metadata_key, &handler, entry);
748 if (ret < 0)
749 return ret;
750
751 JSONParser parser;
752 if (!parser.parse(bl.c_str(), bl.length())) {
753 return -EINVAL;
754 }
755
756 RGWObjVersionTracker objv_tracker;
757
758 obj_version *objv = &objv_tracker.write_version;
759
760 utime_t mtime;
761
762 try {
763 JSONDecoder::decode_json("key", metadata_key, &parser);
764 JSONDecoder::decode_json("ver", *objv, &parser);
765 JSONDecoder::decode_json("mtime", mtime, &parser);
766 } catch (JSONDecoder::err& e) {
767 return -EINVAL;
768 }
769
770 JSONObj *jo = parser.find_obj("data");
771 if (!jo) {
772 return -EINVAL;
773 }
774
775 ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type);
776 if (existing_version) {
777 *existing_version = objv_tracker.read_version;
778 }
779 return ret;
780 }
781
782 int RGWMetadataManager::remove(string& metadata_key)
783 {
784 RGWMetadataHandler *handler;
785 string entry;
786
787 int ret = find_handler(metadata_key, &handler, entry);
788 if (ret < 0)
789 return ret;
790
791 RGWMetadataObject *obj;
792
793 ret = handler->get(store, entry, &obj);
794 if (ret < 0) {
795 return ret;
796 }
797
798 RGWObjVersionTracker objv_tracker;
799
800 objv_tracker.read_version = obj->get_version();
801
802 delete obj;
803
804 return handler->remove(store, entry, objv_tracker);
805 }
806
807 int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) {
808 RGWMetadataHandler *handler;
809 string entry;
810 string zone_id;
811
812 int ret = find_handler(metadata_key, &handler, entry);
813 if (ret < 0)
814 return ret;
815
816 rgw_pool pool;
817 string oid;
818
819 handler->get_pool_and_oid(store, entry, pool, oid);
820
821 return store->lock_exclusive(pool, oid, duration, zone_id, owner_id);
822 }
823
824 int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) {
825 librados::IoCtx io_ctx;
826 RGWMetadataHandler *handler;
827 string entry;
828 string zone_id;
829
830 int ret = find_handler(metadata_key, &handler, entry);
831 if (ret < 0)
832 return ret;
833
834 rgw_pool pool;
835 string oid;
836
837 handler->get_pool_and_oid(store, entry, pool, oid);
838
839 return store->unlock(pool, oid, zone_id, owner_id);
840 }
841
842 struct list_keys_handle {
843 void *handle;
844 RGWMetadataHandler *handler;
845 };
846
847 int RGWMetadataManager::list_keys_init(const string& section, void **handle)
848 {
849 return list_keys_init(section, string(), handle);
850 }
851
852 int RGWMetadataManager::list_keys_init(const string& section,
853 const string& marker, void **handle)
854 {
855 string entry;
856 RGWMetadataHandler *handler;
857
858 int ret;
859
860 ret = find_handler(section, &handler, entry);
861 if (ret < 0) {
862 return -ENOENT;
863 }
864
865 list_keys_handle *h = new list_keys_handle;
866 h->handler = handler;
867 ret = handler->list_keys_init(store, marker, &h->handle);
868 if (ret < 0) {
869 delete h;
870 return ret;
871 }
872
873 *handle = (void *)h;
874
875 return 0;
876 }
877
878 int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated)
879 {
880 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
881
882 RGWMetadataHandler *handler = h->handler;
883
884 return handler->list_keys_next(h->handle, max, keys, truncated);
885 }
886
887 void RGWMetadataManager::list_keys_complete(void *handle)
888 {
889 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
890
891 RGWMetadataHandler *handler = h->handler;
892
893 handler->list_keys_complete(h->handle);
894 delete h;
895 }
896
897 string RGWMetadataManager::get_marker(void *handle)
898 {
899 list_keys_handle *h = static_cast<list_keys_handle *>(handle);
900
901 return h->handler->get_marker(h->handle);
902 }
903
904 void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f)
905 {
906 f->open_object_section("entry");
907 f->dump_string("id", entry.id);
908 f->dump_string("section", entry.section);
909 f->dump_string("name", entry.name);
910 entry.timestamp.gmtime_nsec(f->dump_stream("timestamp"));
911
912 try {
913 RGWMetadataLogData log_data;
914 bufferlist::iterator iter = entry.data.begin();
915 ::decode(log_data, iter);
916
917 encode_json("data", log_data, f);
918 } catch (buffer::error& err) {
919 lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl;
920 }
921 f->close_section();
922 }
923
924 void RGWMetadataManager::get_sections(list<string>& sections)
925 {
926 for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) {
927 sections.push_back(iter->first);
928 }
929 }
930
931 int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
932 RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
933 RGWMDLogStatus op_type)
934 {
935 section = handler->get_type();
936
937 /* if write version has not been set, and there's a read version, set it so that we can
938 * log it
939 */
940 if (objv_tracker) {
941 if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) {
942 objv_tracker->write_version = objv_tracker->read_version;
943 objv_tracker->write_version.ver++;
944 }
945 log_data.read_version = objv_tracker->read_version;
946 log_data.write_version = objv_tracker->write_version;
947 }
948
949 log_data.status = op_type;
950
951 bufferlist logbl;
952 ::encode(log_data, logbl);
953
954 assert(current_log); // must have called init()
955 int ret = current_log->add_entry(handler, section, key, logbl);
956 if (ret < 0)
957 return ret;
958
959 return 0;
960 }
961
962 int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
963 RGWObjVersionTracker *objv_tracker, int ret)
964 {
965 if (ret >= 0)
966 log_data.status = MDLOG_STATUS_COMPLETE;
967 else
968 log_data.status = MDLOG_STATUS_ABORT;
969
970 bufferlist logbl;
971 ::encode(log_data, logbl);
972
973 assert(current_log); // must have called init()
974 int r = current_log->add_entry(handler, section, key, logbl);
975 if (ret < 0)
976 return ret;
977
978 if (r < 0)
979 return r;
980
981 return 0;
982 }
983
984 string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv)
985 {
986 char buf[objv.tag.size() + 32];
987 snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver);
988 return string(".meta:") + handler->get_type() + ":" + key + ":" + buf;
989 }
990
991 int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
992 RGWObjVersionTracker *objv_tracker, real_time mtime,
993 map<string, bufferlist> *pattrs)
994 {
995 if (!objv_tracker) {
996 return -EINVAL;
997 }
998
999 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
1000
1001 if (heap_pool.empty()) {
1002 return 0;
1003 }
1004
1005 RGWObjVersionTracker otracker;
1006 otracker.write_version = objv_tracker->write_version;
1007 string oid = heap_oid(handler, key, objv_tracker->write_version);
1008 int ret = rgw_put_system_obj(store, heap_pool, oid,
1009 bl.c_str(), bl.length(), false,
1010 &otracker, mtime, pattrs);
1011 if (ret < 0) {
1012 ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << ") returned ret=" << ret << dendl;
1013 return ret;
1014 }
1015
1016 return 0;
1017 }
1018
1019 int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker)
1020 {
1021 if (!objv_tracker) {
1022 return -EINVAL;
1023 }
1024
1025 rgw_pool heap_pool(store->get_zone_params().metadata_heap);
1026
1027 if (heap_pool.empty()) {
1028 return 0;
1029 }
1030
1031 string oid = heap_oid(handler, key, objv_tracker->write_version);
1032 rgw_raw_obj obj(heap_pool, oid);
1033 int ret = store->delete_system_obj(obj);
1034 if (ret < 0) {
1035 ldout(store->ctx(), 0) << "ERROR: store->delete_system_obj()=" << oid << ") returned ret=" << ret << dendl;
1036 return ret;
1037 }
1038
1039 return 0;
1040 }
1041
1042 int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
1043 RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs)
1044 {
1045 string section;
1046 RGWMetadataLogData log_data;
1047 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
1048 if (ret < 0)
1049 return ret;
1050
1051 string oid;
1052 rgw_pool pool;
1053
1054 handler->get_pool_and_oid(store, key, pool, oid);
1055
1056 ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs);
1057 if (ret < 0) {
1058 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl;
1059 goto done;
1060 }
1061
1062 ret = rgw_put_system_obj(store, pool, oid,
1063 bl.c_str(), bl.length(), exclusive,
1064 objv_tracker, mtime, pattrs);
1065
1066 if (ret < 0) {
1067 int r = remove_from_heap(handler, key, objv_tracker);
1068 if (r < 0) {
1069 ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl;
1070 }
1071 }
1072 done:
1073 /* cascading ret into post_modify() */
1074
1075 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1076 if (ret < 0)
1077 return ret;
1078
1079 return 0;
1080 }
1081
1082 int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler, string& key, RGWObjVersionTracker *objv_tracker)
1083 {
1084 string section;
1085 RGWMetadataLogData log_data;
1086 int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE);
1087 if (ret < 0)
1088 return ret;
1089
1090 string oid;
1091 rgw_pool pool;
1092
1093 handler->get_pool_and_oid(store, key, pool, oid);
1094
1095 rgw_raw_obj obj(pool, oid);
1096
1097 ret = store->delete_system_obj(obj, objv_tracker);
1098 /* cascading ret into post_modify() */
1099
1100 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
1101 if (ret < 0)
1102 return ret;
1103
1104 return 0;
1105 }
1106
1107 int RGWMetadataManager::get_log_shard_id(const string& section,
1108 const string& key, int *shard_id)
1109 {
1110 RGWMetadataHandler *handler = get_handler(section);
1111 if (!handler) {
1112 return -EINVAL;
1113 }
1114 string hash_key;
1115 handler->get_hash_key(section, key, hash_key);
1116 *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards);
1117 return 0;
1118 }