1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_RGW_METADATA_H
5 #define CEPH_RGW_METADATA_H
8 #include <boost/optional.hpp>
10 #include "include/types.h"
11 #include "rgw_common.h"
12 #include "rgw_period_history.h"
13 #include "cls/version/cls_version_types.h"
14 #include "cls/log/cls_log_types.h"
15 #include "common/RWLock.h"
16 #include "common/RefCountedObj.h"
17 #include "common/ceph_time.h"
23 struct RGWObjVersionTracker
;
31 MDLOG_STATUS_SETATTRS
,
33 MDLOG_STATUS_COMPLETE
,
37 class RGWMetadataObject
{
40 ceph::real_time mtime
;
43 RGWMetadataObject() {}
44 virtual ~RGWMetadataObject() {}
45 obj_version
& get_version();
46 real_time
get_mtime() { return mtime
; }
48 virtual void dump(Formatter
*f
) const = 0;
51 class RGWMetadataManager
;
53 class RGWMetadataHandler
{
54 friend class RGWMetadataManager
;
62 static bool string_to_sync_type(const string
& sync_string
,
64 if (sync_string
.compare("update-by-version") == 0)
66 else if (sync_string
.compare("update-by-timestamp") == 0)
68 else if (sync_string
.compare("always") == 0)
74 virtual ~RGWMetadataHandler() {}
75 virtual string
get_type() = 0;
77 virtual int get(RGWRados
*store
, string
& entry
, RGWMetadataObject
**obj
) = 0;
78 virtual int put(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
,
79 real_time mtime
, JSONObj
*obj
, sync_type_t type
) = 0;
80 virtual int remove(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
) = 0;
82 virtual int list_keys_init(RGWRados
*store
, const string
& marker
, void **phandle
) = 0;
83 virtual int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
) = 0;
84 virtual void list_keys_complete(void *handle
) = 0;
86 virtual string
get_marker(void *handle
) = 0;
88 /* key to use for hashing entries for log shard placement */
89 virtual void get_hash_key(const string
& section
, const string
& key
, string
& hash_key
) {
90 hash_key
= section
+ ":" + key
;
94 virtual void get_pool_and_oid(RGWRados
*store
, const string
& key
, rgw_pool
& pool
, string
& oid
) = 0;
96 * Compare an incoming versus on-disk tag/version+mtime combo against
97 * the sync mode to see if the new one should replace the on-disk one.
99 * @return true if the update should proceed, false otherwise.
101 bool check_versions(const obj_version
& ondisk
, const real_time
& ondisk_time
,
102 const obj_version
& incoming
, const real_time
& incoming_time
,
103 sync_type_t sync_mode
) {
106 if ((ondisk
.tag
!= incoming
.tag
) ||
107 (ondisk
.ver
>= incoming
.ver
))
111 if (ondisk_time
>= incoming_time
)
114 case APPLY_ALWAYS
: //deliberate fall-thru -- we always apply!
121 * The tenant_name is always returned on purpose. May be empty, of course.
123 static void parse_bucket(const string
& bucket
,
126 string
*bucket_instance
= nullptr /* optional */)
128 int pos
= bucket
.find('/');
130 *tenant_name
= bucket
.substr(0, pos
);
132 tenant_name
->clear();
134 string bn
= bucket
.substr(pos
+ 1);
137 *bucket_name
= std::move(bn
);
140 *bucket_name
= bn
.substr(0, pos
);
141 if (bucket_instance
) {
142 *bucket_instance
= bn
.substr(pos
+ 1);
147 #define META_LOG_OBJ_PREFIX "meta.log."
149 struct RGWMetadataLogInfo
{
151 real_time last_update
;
153 void dump(Formatter
*f
) const;
154 void decode_json(JSONObj
*obj
);
157 class RGWCompletionManager
;
159 class RGWMetadataLogInfoCompletion
: public RefCountedObject
{
161 using info_callback_t
= std::function
<void(int, const cls_log_header
&)>;
163 cls_log_header header
;
164 librados::IoCtx io_ctx
;
165 librados::AioCompletion
*completion
;
166 std::mutex mutex
; //< protects callback between cancel/complete
167 boost::optional
<info_callback_t
> callback
; //< cleared on cancel
169 RGWMetadataLogInfoCompletion(info_callback_t callback
);
170 ~RGWMetadataLogInfoCompletion() override
;
172 librados::IoCtx
& get_io_ctx() { return io_ctx
; }
173 cls_log_header
& get_header() { return header
; }
174 librados::AioCompletion
* get_completion() { return completion
; }
176 void finish(librados::completion_t cb
) {
177 std::lock_guard
<std::mutex
> lock(mutex
);
179 (*callback
)(completion
->get_return_value(), header
);
183 std::lock_guard
<std::mutex
> lock(mutex
);
184 callback
= boost::none
;
188 class RGWMetadataLog
{
193 static std::string
make_prefix(const std::string
& period
) {
195 return META_LOG_OBJ_PREFIX
;
196 return META_LOG_OBJ_PREFIX
+ period
+ ".";
200 set
<int> modified_shards
;
202 void mark_modified(int shard_id
);
204 RGWMetadataLog(CephContext
*_cct
, RGWRados
*_store
, const std::string
& period
)
205 : cct(_cct
), store(_store
),
206 prefix(make_prefix(period
)),
207 lock("RGWMetaLog::lock") {}
209 void get_shard_oid(int id
, string
& oid
) const {
211 snprintf(buf
, sizeof(buf
), "%d", id
);
215 int add_entry(RGWMetadataHandler
*handler
, const string
& section
, const string
& key
, bufferlist
& bl
);
216 int store_entries_in_shard(list
<cls_log_entry
>& entries
, int shard_id
, librados::AioCompletion
*completion
);
228 LogListCtx() : cur_shard(0), done(false) {}
231 void init_list_entries(int shard_id
, const real_time
& from_time
, const real_time
& end_time
, string
& marker
, void **handle
);
232 void complete_list_entries(void *handle
);
233 int list_entries(void *handle
,
235 list
<cls_log_entry
>& entries
,
239 int trim(int shard_id
, const real_time
& from_time
, const real_time
& end_time
, const string
& start_marker
, const string
& end_marker
);
240 int get_info(int shard_id
, RGWMetadataLogInfo
*info
);
241 int get_info_async(int shard_id
, RGWMetadataLogInfoCompletion
*completion
);
242 int lock_exclusive(int shard_id
, timespan duration
, string
&zone_id
, string
& owner_id
);
243 int unlock(int shard_id
, string
& zone_id
, string
& owner_id
);
245 int update_shards(list
<int>& shards
);
247 void read_clear_modified(set
<int> &modified
);
250 struct LogStatusDump
{
251 RGWMDLogStatus status
;
253 explicit LogStatusDump(RGWMDLogStatus _status
) : status(_status
) {}
254 void dump(Formatter
*f
) const;
257 struct RGWMetadataLogData
{
258 obj_version read_version
;
259 obj_version write_version
;
260 RGWMDLogStatus status
;
262 RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN
) {}
264 void encode(bufferlist
& bl
) const;
265 void decode(bufferlist::iterator
& bl
);
266 void dump(Formatter
*f
) const;
267 void decode_json(JSONObj
*obj
);
269 WRITE_CLASS_ENCODER(RGWMetadataLogData
)
271 struct RGWMetadataLogHistory
{
272 epoch_t oldest_realm_epoch
;
273 std::string oldest_period_id
;
275 void encode(bufferlist
& bl
) const {
276 ENCODE_START(1, 1, bl
);
277 ::encode(oldest_realm_epoch
, bl
);
278 ::encode(oldest_period_id
, bl
);
281 void decode(bufferlist::iterator
& p
) {
283 ::decode(oldest_realm_epoch
, p
);
284 ::decode(oldest_period_id
, p
);
288 static const std::string oid
;
290 WRITE_CLASS_ENCODER(RGWMetadataLogHistory
)
292 class RGWMetadataManager
{
293 map
<string
, RGWMetadataHandler
*> handlers
;
297 // maintain a separate metadata log for each period
298 std::map
<std::string
, RGWMetadataLog
> md_logs
;
299 // use the current period's log for mutating operations
300 RGWMetadataLog
* current_log
= nullptr;
302 void parse_metadata_key(const string
& metadata_key
, string
& type
, string
& entry
);
304 int find_handler(const string
& metadata_key
, RGWMetadataHandler
**handler
, string
& entry
);
305 int pre_modify(RGWMetadataHandler
*handler
, string
& section
, const string
& key
,
306 RGWMetadataLogData
& log_data
, RGWObjVersionTracker
*objv_tracker
,
307 RGWMDLogStatus op_type
);
308 int post_modify(RGWMetadataHandler
*handler
, const string
& section
, const string
& key
, RGWMetadataLogData
& log_data
,
309 RGWObjVersionTracker
*objv_tracker
, int ret
);
311 string
heap_oid(RGWMetadataHandler
*handler
, const string
& key
, const obj_version
& objv
);
312 int store_in_heap(RGWMetadataHandler
*handler
, const string
& key
, bufferlist
& bl
,
313 RGWObjVersionTracker
*objv_tracker
, real_time mtime
,
314 map
<string
, bufferlist
> *pattrs
);
315 int remove_from_heap(RGWMetadataHandler
*handler
, const string
& key
, RGWObjVersionTracker
*objv_tracker
);
317 RGWMetadataManager(CephContext
*_cct
, RGWRados
*_store
);
318 ~RGWMetadataManager();
320 int init(const std::string
& current_period
);
322 /// initialize the oldest log period if it doesn't exist, and attach it to
323 /// our current history
324 RGWPeriodHistory::Cursor
init_oldest_log_period();
326 /// read the oldest log period, and return a cursor to it in our existing
328 RGWPeriodHistory::Cursor
read_oldest_log_period() const;
330 /// read the oldest log period asynchronously and write its result to the
331 /// given cursor pointer
332 RGWCoroutine
* read_oldest_log_period_cr(RGWPeriodHistory::Cursor
*period
,
333 RGWObjVersionTracker
*objv
) const;
335 /// try to advance the oldest log period when the given period is trimmed,
336 /// using a rados lock to provide atomicity
337 RGWCoroutine
* trim_log_period_cr(RGWPeriodHistory::Cursor period
,
338 RGWObjVersionTracker
*objv
) const;
340 /// find or create the metadata log for the given period
341 RGWMetadataLog
* get_log(const std::string
& period
);
343 int register_handler(RGWMetadataHandler
*handler
);
345 RGWMetadataHandler
*get_handler(const string
& type
);
347 int put_entry(RGWMetadataHandler
*handler
, const string
& key
, bufferlist
& bl
, bool exclusive
,
348 RGWObjVersionTracker
*objv_tracker
, real_time mtime
, map
<string
, bufferlist
> *pattrs
= NULL
);
349 int remove_entry(RGWMetadataHandler
*handler
, string
& key
, RGWObjVersionTracker
*objv_tracker
);
350 int get(string
& metadata_key
, Formatter
*f
);
351 int put(string
& metadata_key
, bufferlist
& bl
,
352 RGWMetadataHandler::sync_type_t sync_mode
,
353 obj_version
*existing_version
= NULL
);
354 int remove(string
& metadata_key
);
356 int list_keys_init(string
& section
, void **phandle
);
357 int list_keys_init(string
& section
, const string
& marker
, void **phandle
);
358 int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
);
359 void list_keys_complete(void *handle
);
361 string
get_marker(void *handle
);
363 void dump_log_entry(cls_log_entry
& entry
, Formatter
*f
);
365 void get_sections(list
<string
>& sections
);
366 int lock_exclusive(string
& metadata_key
, timespan duration
, string
& owner_id
);
367 int unlock(string
& metadata_key
, string
& owner_id
);
369 int get_log_shard_id(const string
& section
, const string
& key
, int *shard_id
);