1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #include <boost/intrusive_ptr.hpp>
5 #include "common/ceph_json.h"
6 #include "common/errno.h"
7 #include "rgw_metadata.h"
8 #include "rgw_coroutine.h"
9 #include "cls/version/cls_version_types.h"
12 #include "rgw_tools.h"
13 #include "rgw_mdlog.h"
16 #include "rgw_cr_rados.h"
18 #include "services/svc_zone.h"
19 #include "services/svc_meta.h"
20 #include "services/svc_meta_be.h"
21 #include "services/svc_meta_be_sobj.h"
22 #include "services/svc_cls.h"
24 #include "include/ceph_assert.h"
26 #include <boost/asio/yield.hpp>
28 #define dout_subsys ceph_subsys_rgw
30 const std::string
RGWMetadataLogHistory::oid
= "meta.history";
32 void LogStatusDump::dump(Formatter
*f
) const {
35 case MDLOG_STATUS_WRITE
:
38 case MDLOG_STATUS_SETATTRS
:
41 case MDLOG_STATUS_REMOVE
:
44 case MDLOG_STATUS_COMPLETE
:
47 case MDLOG_STATUS_ABORT
:
54 encode_json("status", s
, f
);
57 void RGWMetadataLogData::encode(bufferlist
& bl
) const {
58 ENCODE_START(1, 1, bl
);
59 encode(read_version
, bl
);
60 encode(write_version
, bl
);
61 uint32_t s
= (uint32_t)status
;
66 void RGWMetadataLogData::decode(bufferlist::const_iterator
& bl
) {
68 decode(read_version
, bl
);
69 decode(write_version
, bl
);
72 status
= (RGWMDLogStatus
)s
;
76 void RGWMetadataLogData::dump(Formatter
*f
) const {
77 encode_json("read_version", read_version
, f
);
78 encode_json("write_version", write_version
, f
);
79 encode_json("status", LogStatusDump(status
), f
);
82 void decode_json_obj(RGWMDLogStatus
& status
, JSONObj
*obj
) {
84 JSONDecoder::decode_json("status", s
, obj
);
85 if (s
== "complete") {
86 status
= MDLOG_STATUS_COMPLETE
;
87 } else if (s
== "write") {
88 status
= MDLOG_STATUS_WRITE
;
89 } else if (s
== "remove") {
90 status
= MDLOG_STATUS_REMOVE
;
91 } else if (s
== "set_attrs") {
92 status
= MDLOG_STATUS_SETATTRS
;
93 } else if (s
== "abort") {
94 status
= MDLOG_STATUS_ABORT
;
96 status
= MDLOG_STATUS_UNKNOWN
;
100 void RGWMetadataLogData::decode_json(JSONObj
*obj
) {
101 JSONDecoder::decode_json("read_version", read_version
, obj
);
102 JSONDecoder::decode_json("write_version", write_version
, obj
);
103 JSONDecoder::decode_json("status", status
, obj
);
107 int RGWMetadataLog::add_entry(const string
& hash_key
, const string
& section
, const string
& key
, bufferlist
& bl
) {
108 if (!svc
.zone
->need_to_log_metadata())
114 rgw_shard_name(prefix
, cct
->_conf
->rgw_md_log_max_shards
, hash_key
, oid
, &shard_id
);
115 mark_modified(shard_id
);
116 real_time now
= real_clock::now();
117 return svc
.cls
->timelog
.add(oid
, now
, section
, key
, bl
, null_yield
);
120 int RGWMetadataLog::get_shard_id(const string
& hash_key
, int *shard_id
)
124 rgw_shard_name(prefix
, cct
->_conf
->rgw_md_log_max_shards
, hash_key
, oid
, shard_id
);
128 int RGWMetadataLog::store_entries_in_shard(list
<cls_log_entry
>& entries
, int shard_id
, librados::AioCompletion
*completion
)
132 mark_modified(shard_id
);
133 rgw_shard_name(prefix
, shard_id
, oid
);
134 return svc
.cls
->timelog
.add(oid
, entries
, completion
, false, null_yield
);
137 void RGWMetadataLog::init_list_entries(int shard_id
, const real_time
& from_time
, const real_time
& end_time
,
138 const string
& marker
, void **handle
)
140 LogListCtx
*ctx
= new LogListCtx();
142 ctx
->cur_shard
= shard_id
;
143 ctx
->from_time
= from_time
;
144 ctx
->end_time
= end_time
;
145 ctx
->marker
= marker
;
147 get_shard_oid(ctx
->cur_shard
, ctx
->cur_oid
);
149 *handle
= (void *)ctx
;
152 void RGWMetadataLog::complete_list_entries(void *handle
) {
153 LogListCtx
*ctx
= static_cast<LogListCtx
*>(handle
);
157 int RGWMetadataLog::list_entries(void *handle
,
159 list
<cls_log_entry
>& entries
,
162 LogListCtx
*ctx
= static_cast<LogListCtx
*>(handle
);
169 std::string next_marker
;
170 int ret
= svc
.cls
->timelog
.list(ctx
->cur_oid
, ctx
->from_time
, ctx
->end_time
,
171 max_entries
, entries
, ctx
->marker
,
172 &next_marker
, truncated
, null_yield
);
173 if ((ret
< 0) && (ret
!= -ENOENT
))
176 ctx
->marker
= std::move(next_marker
);
178 *last_marker
= ctx
->marker
;
187 int RGWMetadataLog::get_info(int shard_id
, RGWMetadataLogInfo
*info
)
190 get_shard_oid(shard_id
, oid
);
192 cls_log_header header
;
194 int ret
= svc
.cls
->timelog
.info(oid
, &header
, null_yield
);
195 if ((ret
< 0) && (ret
!= -ENOENT
))
198 info
->marker
= header
.max_marker
;
199 info
->last_update
= header
.max_time
.to_real_time();
204 static void _mdlog_info_completion(librados::completion_t cb
, void *arg
)
206 auto infoc
= static_cast<RGWMetadataLogInfoCompletion
*>(arg
);
208 infoc
->put(); // drop the ref from get_info_async()
211 RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb
)
212 : completion(librados::Rados::aio_create_completion((void *)this,
213 _mdlog_info_completion
)),
218 RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion()
220 completion
->release();
223 int RGWMetadataLog::get_info_async(int shard_id
, RGWMetadataLogInfoCompletion
*completion
)
226 get_shard_oid(shard_id
, oid
);
228 completion
->get(); // hold a ref until the completion fires
230 return svc
.cls
->timelog
.info_async(completion
->get_io_obj(), oid
,
231 &completion
->get_header(),
232 completion
->get_completion());
235 int RGWMetadataLog::trim(int shard_id
, const real_time
& from_time
, const real_time
& end_time
,
236 const string
& start_marker
, const string
& end_marker
)
239 get_shard_oid(shard_id
, oid
);
241 return svc
.cls
->timelog
.trim(oid
, from_time
, end_time
, start_marker
,
242 end_marker
, nullptr, null_yield
);
245 int RGWMetadataLog::lock_exclusive(int shard_id
, timespan duration
, string
& zone_id
, string
& owner_id
) {
247 get_shard_oid(shard_id
, oid
);
249 return svc
.cls
->lock
.lock_exclusive(svc
.zone
->get_zone_params().log_pool
, oid
, duration
, zone_id
, owner_id
);
252 int RGWMetadataLog::unlock(int shard_id
, string
& zone_id
, string
& owner_id
) {
254 get_shard_oid(shard_id
, oid
);
256 return svc
.cls
->lock
.unlock(svc
.zone
->get_zone_params().log_pool
, oid
, zone_id
, owner_id
);
259 void RGWMetadataLog::mark_modified(int shard_id
)
262 if (modified_shards
.find(shard_id
) != modified_shards
.end()) {
268 std::unique_lock wl
{lock
};
269 modified_shards
.insert(shard_id
);
272 void RGWMetadataLog::read_clear_modified(set
<int> &modified
)
274 std::unique_lock wl
{lock
};
275 modified
.swap(modified_shards
);
276 modified_shards
.clear();
279 obj_version
& RGWMetadataObject::get_version()
284 class RGWMetadataTopHandler
: public RGWMetadataHandler
{
286 set
<string
> sections
;
287 set
<string
>::iterator iter
;
291 RGWSI_Meta
*meta
{nullptr};
294 RGWMetadataManager
*mgr
;
297 RGWMetadataTopHandler(RGWSI_Meta
*meta_svc
,
298 RGWMetadataManager
*_mgr
) : mgr(_mgr
) {
299 base_init(meta_svc
->ctx());
303 string
get_type() override
{ return string(); }
305 RGWMetadataObject
*get_meta_obj(JSONObj
*jo
, const obj_version
& objv
, const ceph::real_time
& mtime
) {
306 return new RGWMetadataObject
;
309 int get(string
& entry
, RGWMetadataObject
**obj
, optional_yield y
) override
{
313 int put(string
& entry
, RGWMetadataObject
*obj
, RGWObjVersionTracker
& objv_tracker
,
314 optional_yield y
, RGWMDLogSyncType type
, bool from_remote_zone
) override
{
318 int remove(string
& entry
, RGWObjVersionTracker
& objv_tracker
, optional_yield y
) override
{
322 int mutate(const string
& entry
,
323 const ceph::real_time
& mtime
,
324 RGWObjVersionTracker
*objv_tracker
,
326 RGWMDLogStatus op_type
,
327 std::function
<int()> f
) {
331 int list_keys_init(const string
& marker
, void **phandle
) override
{
332 iter_data
*data
= new iter_data
;
333 list
<string
> sections
;
334 mgr
->get_sections(sections
);
335 for (auto& s
: sections
) {
336 data
->sections
.insert(s
);
338 data
->iter
= data
->sections
.lower_bound(marker
);
344 int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
) override
{
345 iter_data
*data
= static_cast<iter_data
*>(handle
);
346 for (int i
= 0; i
< max
&& data
->iter
!= data
->sections
.end(); ++i
, ++(data
->iter
)) {
347 keys
.push_back(*data
->iter
);
350 *truncated
= (data
->iter
!= data
->sections
.end());
354 void list_keys_complete(void *handle
) override
{
355 iter_data
*data
= static_cast<iter_data
*>(handle
);
360 virtual string
get_marker(void *handle
) override
{
361 iter_data
*data
= static_cast<iter_data
*>(handle
);
363 if (data
->iter
!= data
->sections
.end()) {
364 return *(data
->iter
);
371 RGWMetadataManager::RGWMetadataManager(RGWSI_Meta
*_meta_svc
)
372 : cct(_meta_svc
->ctx()), meta_svc(_meta_svc
)
374 md_top_handler
.reset(new RGWMetadataTopHandler(meta_svc
, this));
377 RGWMetadataManager::~RGWMetadataManager()
381 int RGWMetadataHandler::attach(RGWMetadataManager
*manager
)
383 return manager
->register_handler(this);
386 RGWMetadataHandler_GenericMetaBE::Put::Put(RGWMetadataHandler_GenericMetaBE
*_handler
,
387 RGWSI_MetaBackend_Handler::Op
*_op
,
388 string
& _entry
, RGWMetadataObject
*_obj
,
389 RGWObjVersionTracker
& _objv_tracker
,
391 RGWMDLogSyncType _type
, bool _from_remote_zone
):
392 handler(_handler
), op(_op
),
393 entry(_entry
), obj(_obj
),
394 objv_tracker(_objv_tracker
),
397 from_remote_zone(_from_remote_zone
)
401 RGWMetadataHandlerPut_SObj::RGWMetadataHandlerPut_SObj(RGWMetadataHandler_GenericMetaBE
*handler
, RGWSI_MetaBackend_Handler::Op
*op
,
402 string
& entry
, RGWMetadataObject
*obj
, RGWObjVersionTracker
& objv_tracker
,
404 RGWMDLogSyncType type
, bool from_remote_zone
) : Put(handler
, op
, entry
, obj
, objv_tracker
, y
, type
, from_remote_zone
) {
407 RGWMetadataHandlerPut_SObj::~RGWMetadataHandlerPut_SObj() {
410 int RGWMetadataHandlerPut_SObj::put_pre()
412 int ret
= get(&old_obj
);
413 if (ret
< 0 && ret
!= -ENOENT
) {
416 exists
= (ret
!= -ENOENT
);
420 auto old_ver
= (!old_obj
? obj_version() : old_obj
->get_version());
421 auto old_mtime
= (!old_obj
? ceph::real_time() : old_obj
->get_mtime());
423 // are we actually going to perform this put, or is it too old?
424 if (!handler
->check_versions(exists
, old_ver
, old_mtime
,
425 objv_tracker
.write_version
, obj
->get_mtime(),
427 return STATUS_NO_APPLY
;
430 objv_tracker
.read_version
= old_ver
; /* maintain the obj version we just read */
435 int RGWMetadataHandlerPut_SObj::put()
437 int ret
= put_check();
442 return put_checked();
445 int RGWMetadataHandlerPut_SObj::put_checked()
447 RGWSI_MBSObj_PutParams
params(obj
->get_pattrs(), obj
->get_mtime());
449 encode_obj(¶ms
.bl
);
451 int ret
= op
->put(entry
, params
, &objv_tracker
, y
);
459 int RGWMetadataHandler_GenericMetaBE::do_put_operate(Put
*put_op
)
461 int r
= put_op
->put_pre();
462 if (r
!= 0) { /* r can also be STATUS_NO_APPLY */
471 r
= put_op
->put_post();
472 if (r
!= 0) { /* e.g., -error or STATUS_APPLIED */
479 int RGWMetadataHandler_GenericMetaBE::get(string
& entry
, RGWMetadataObject
**obj
, optional_yield y
)
481 return be_handler
->call([&](RGWSI_MetaBackend_Handler::Op
*op
) {
482 return do_get(op
, entry
, obj
, y
);
486 int RGWMetadataHandler_GenericMetaBE::put(string
& entry
, RGWMetadataObject
*obj
, RGWObjVersionTracker
& objv_tracker
,
487 optional_yield y
, RGWMDLogSyncType type
, bool from_remote_zone
)
489 return be_handler
->call([&](RGWSI_MetaBackend_Handler::Op
*op
) {
490 return do_put(op
, entry
, obj
, objv_tracker
, y
, type
, from_remote_zone
);
494 int RGWMetadataHandler_GenericMetaBE::remove(string
& entry
, RGWObjVersionTracker
& objv_tracker
, optional_yield y
)
496 return be_handler
->call([&](RGWSI_MetaBackend_Handler::Op
*op
) {
497 return do_remove(op
, entry
, objv_tracker
, y
);
501 int RGWMetadataHandler_GenericMetaBE::mutate(const string
& entry
,
502 const ceph::real_time
& mtime
,
503 RGWObjVersionTracker
*objv_tracker
,
505 RGWMDLogStatus op_type
,
506 std::function
<int()> f
)
508 return be_handler
->call([&](RGWSI_MetaBackend_Handler::Op
*op
) {
509 RGWSI_MetaBackend::MutateParams
params(mtime
, op_type
);
510 return op
->mutate(entry
,
518 int RGWMetadataHandler_GenericMetaBE::get_shard_id(const string
& entry
, int *shard_id
)
520 return be_handler
->call([&](RGWSI_MetaBackend_Handler::Op
*op
) {
521 return op
->get_shard_id(entry
, shard_id
);
525 int RGWMetadataHandler_GenericMetaBE::list_keys_init(const string
& marker
, void **phandle
)
527 auto op
= std::make_unique
<RGWSI_MetaBackend_Handler::Op_ManagedCtx
>(be_handler
);
529 int ret
= op
->list_init(marker
);
534 *phandle
= (void *)op
.release();
539 int RGWMetadataHandler_GenericMetaBE::list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
)
541 auto op
= static_cast<RGWSI_MetaBackend_Handler::Op_ManagedCtx
*>(handle
);
543 int ret
= op
->list_next(max
, &keys
, truncated
);
544 if (ret
< 0 && ret
!= -ENOENT
) {
547 if (ret
== -ENOENT
) {
557 void RGWMetadataHandler_GenericMetaBE::list_keys_complete(void *handle
)
559 auto op
= static_cast<RGWSI_MetaBackend_Handler::Op_ManagedCtx
*>(handle
);
563 string
RGWMetadataHandler_GenericMetaBE::get_marker(void *handle
)
565 auto op
= static_cast<RGWSI_MetaBackend_Handler::Op_ManagedCtx
*>(handle
);
567 int r
= op
->list_get_marker(&marker
);
569 ldout(cct
, 0) << "ERROR: " << __func__
<< "(): list_get_marker() returned: r=" << r
<< dendl
;
570 /* not much else to do */
576 int RGWMetadataManager::register_handler(RGWMetadataHandler
*handler
)
578 string type
= handler
->get_type();
580 if (handlers
.find(type
) != handlers
.end())
583 handlers
[type
] = handler
;
588 RGWMetadataHandler
*RGWMetadataManager::get_handler(const string
& type
)
590 map
<string
, RGWMetadataHandler
*>::iterator iter
= handlers
.find(type
);
591 if (iter
== handlers
.end())
597 void RGWMetadataManager::parse_metadata_key(const string
& metadata_key
, string
& type
, string
& entry
)
599 auto pos
= metadata_key
.find(':');
600 if (pos
== string::npos
) {
603 type
= metadata_key
.substr(0, pos
);
604 entry
= metadata_key
.substr(pos
+ 1);
608 int RGWMetadataManager::find_handler(const string
& metadata_key
, RGWMetadataHandler
**handler
, string
& entry
)
612 parse_metadata_key(metadata_key
, type
, entry
);
615 *handler
= md_top_handler
.get();
619 map
<string
, RGWMetadataHandler
*>::iterator iter
= handlers
.find(type
);
620 if (iter
== handlers
.end())
623 *handler
= iter
->second
;
629 int RGWMetadataManager::get(string
& metadata_key
, Formatter
*f
, optional_yield y
)
631 RGWMetadataHandler
*handler
;
633 int ret
= find_handler(metadata_key
, &handler
, entry
);
638 RGWMetadataObject
*obj
;
640 ret
= handler
->get(entry
, &obj
, y
);
645 f
->open_object_section("metadata_info");
646 encode_json("key", metadata_key
, f
);
647 encode_json("ver", obj
->get_version(), f
);
648 real_time mtime
= obj
->get_mtime();
649 if (!real_clock::is_zero(mtime
)) {
651 encode_json("mtime", ut
, f
);
653 encode_json("data", *obj
, f
);
661 int RGWMetadataManager::put(string
& metadata_key
, bufferlist
& bl
,
663 RGWMDLogSyncType sync_type
,
664 bool from_remote_zone
,
665 obj_version
*existing_version
)
667 RGWMetadataHandler
*handler
;
670 int ret
= find_handler(metadata_key
, &handler
, entry
);
676 if (!parser
.parse(bl
.c_str(), bl
.length())) {
680 RGWObjVersionTracker objv_tracker
;
682 obj_version
*objv
= &objv_tracker
.write_version
;
687 JSONDecoder::decode_json("key", metadata_key
, &parser
);
688 JSONDecoder::decode_json("ver", *objv
, &parser
);
689 JSONDecoder::decode_json("mtime", mtime
, &parser
);
690 } catch (JSONDecoder::err
& e
) {
694 JSONObj
*jo
= parser
.find_obj("data");
699 RGWMetadataObject
*obj
= handler
->get_meta_obj(jo
, *objv
, mtime
.to_real_time());
704 ret
= handler
->put(entry
, obj
, objv_tracker
, y
, sync_type
, from_remote_zone
);
705 if (existing_version
) {
706 *existing_version
= objv_tracker
.read_version
;
714 int RGWMetadataManager::remove(string
& metadata_key
, optional_yield y
)
716 RGWMetadataHandler
*handler
;
719 int ret
= find_handler(metadata_key
, &handler
, entry
);
724 RGWMetadataObject
*obj
;
725 ret
= handler
->get(entry
, &obj
, y
);
729 RGWObjVersionTracker objv_tracker
;
730 objv_tracker
.read_version
= obj
->get_version();
733 return handler
->remove(entry
, objv_tracker
, y
);
736 int RGWMetadataManager::mutate(const string
& metadata_key
,
737 const ceph::real_time
& mtime
,
738 RGWObjVersionTracker
*objv_tracker
,
740 RGWMDLogStatus op_type
,
741 std::function
<int()> f
)
743 RGWMetadataHandler
*handler
;
746 int ret
= find_handler(metadata_key
, &handler
, entry
);
751 return handler
->mutate(entry
, mtime
, objv_tracker
, y
, op_type
, f
);
754 int RGWMetadataManager::get_shard_id(const string
& section
, const string
& entry
, int *shard_id
)
756 RGWMetadataHandler
*handler
= get_handler(section
);
761 return handler
->get_shard_id(entry
, shard_id
);
764 struct list_keys_handle
{
766 RGWMetadataHandler
*handler
;
769 int RGWMetadataManager::list_keys_init(const string
& section
, void **handle
)
771 return list_keys_init(section
, string(), handle
);
774 int RGWMetadataManager::list_keys_init(const string
& section
,
775 const string
& marker
, void **handle
)
778 RGWMetadataHandler
*handler
;
782 ret
= find_handler(section
, &handler
, entry
);
787 list_keys_handle
*h
= new list_keys_handle
;
788 h
->handler
= handler
;
789 ret
= handler
->list_keys_init(marker
, &h
->handle
);
800 int RGWMetadataManager::list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
)
802 list_keys_handle
*h
= static_cast<list_keys_handle
*>(handle
);
804 RGWMetadataHandler
*handler
= h
->handler
;
806 return handler
->list_keys_next(h
->handle
, max
, keys
, truncated
);
809 void RGWMetadataManager::list_keys_complete(void *handle
)
811 list_keys_handle
*h
= static_cast<list_keys_handle
*>(handle
);
813 RGWMetadataHandler
*handler
= h
->handler
;
815 handler
->list_keys_complete(h
->handle
);
819 string
RGWMetadataManager::get_marker(void *handle
)
821 list_keys_handle
*h
= static_cast<list_keys_handle
*>(handle
);
823 return h
->handler
->get_marker(h
->handle
);
826 void RGWMetadataManager::dump_log_entry(cls_log_entry
& entry
, Formatter
*f
)
828 f
->open_object_section("entry");
829 f
->dump_string("id", entry
.id
);
830 f
->dump_string("section", entry
.section
);
831 f
->dump_string("name", entry
.name
);
832 entry
.timestamp
.gmtime_nsec(f
->dump_stream("timestamp"));
835 RGWMetadataLogData log_data
;
836 auto iter
= entry
.data
.cbegin();
837 decode(log_data
, iter
);
839 encode_json("data", log_data
, f
);
840 } catch (buffer::error
& err
) {
841 lderr(cct
) << "failed to decode log entry: " << entry
.section
<< ":" << entry
.name
<< " ts=" << entry
.timestamp
<< dendl
;
846 void RGWMetadataManager::get_sections(list
<string
>& sections
)
848 for (map
<string
, RGWMetadataHandler
*>::iterator iter
= handlers
.begin(); iter
!= handlers
.end(); ++iter
) {
849 sections
.push_back(iter
->first
);