1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_RGW_METADATA_H
5 #define CEPH_RGW_METADATA_H
8 #include <boost/optional.hpp>
10 #include "include/types.h"
11 #include "rgw_common.h"
12 #include "rgw_period_history.h"
13 #include "cls/version/cls_version_types.h"
14 #include "cls/log/cls_log_types.h"
15 #include "common/RWLock.h"
16 #include "common/RefCountedObj.h"
17 #include "common/ceph_time.h"
23 struct RGWObjVersionTracker
;
31 MDLOG_STATUS_SETATTRS
,
33 MDLOG_STATUS_COMPLETE
,
37 class RGWMetadataObject
{
40 ceph::real_time mtime
;
43 RGWMetadataObject() {}
44 virtual ~RGWMetadataObject() {}
45 obj_version
& get_version();
46 real_time
get_mtime() { return mtime
; }
48 virtual void dump(Formatter
*f
) const = 0;
51 class RGWMetadataManager
;
53 class RGWMetadataHandler
{
54 friend class RGWMetadataManager
;
62 static bool string_to_sync_type(const string
& sync_string
,
64 if (sync_string
.compare("update-by-version") == 0)
66 else if (sync_string
.compare("update-by-timestamp") == 0)
68 else if (sync_string
.compare("always") == 0)
74 virtual ~RGWMetadataHandler() {}
75 virtual string
get_type() = 0;
77 virtual int get(RGWRados
*store
, string
& entry
, RGWMetadataObject
**obj
) = 0;
78 virtual int put(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
,
79 real_time mtime
, JSONObj
*obj
, sync_type_t type
) = 0;
80 virtual int remove(RGWRados
*store
, string
& entry
, RGWObjVersionTracker
& objv_tracker
) = 0;
82 virtual int list_keys_init(RGWRados
*store
, void **phandle
) = 0;
83 virtual int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
) = 0;
84 virtual void list_keys_complete(void *handle
) = 0;
86 /* key to use for hashing entries for log shard placement */
87 virtual void get_hash_key(const string
& section
, const string
& key
, string
& hash_key
) {
88 hash_key
= section
+ ":" + key
;
92 virtual void get_pool_and_oid(RGWRados
*store
, const string
& key
, rgw_pool
& pool
, string
& oid
) = 0;
94 * Compare an incoming versus on-disk tag/version+mtime combo against
95 * the sync mode to see if the new one should replace the on-disk one.
97 * @return true if the update should proceed, false otherwise.
99 bool check_versions(const obj_version
& ondisk
, const real_time
& ondisk_time
,
100 const obj_version
& incoming
, const real_time
& incoming_time
,
101 sync_type_t sync_mode
) {
104 if ((ondisk
.tag
!= incoming
.tag
) ||
105 (ondisk
.ver
>= incoming
.ver
))
109 if (ondisk_time
>= incoming_time
)
112 case APPLY_ALWAYS
: //deliberate fall-thru -- we always apply!
119 * The tenant_name is always returned on purpose. May be empty, of course.
121 static void parse_bucket(const string
& bucket
,
124 string
*bucket_instance
= nullptr /* optional */)
126 int pos
= bucket
.find('/');
128 *tenant_name
= bucket
.substr(0, pos
);
130 tenant_name
->clear();
132 string bn
= bucket
.substr(pos
+ 1);
135 *bucket_name
= std::move(bn
);
138 *bucket_name
= bn
.substr(0, pos
);
139 if (bucket_instance
) {
140 *bucket_instance
= bn
.substr(pos
+ 1);
145 #define META_LOG_OBJ_PREFIX "meta.log."
147 struct RGWMetadataLogInfo
{
149 real_time last_update
;
151 void dump(Formatter
*f
) const;
152 void decode_json(JSONObj
*obj
);
155 class RGWCompletionManager
;
157 class RGWMetadataLogInfoCompletion
: public RefCountedObject
{
159 using info_callback_t
= std::function
<void(int, const cls_log_header
&)>;
161 cls_log_header header
;
162 librados::IoCtx io_ctx
;
163 librados::AioCompletion
*completion
;
164 std::mutex mutex
; //< protects callback between cancel/complete
165 boost::optional
<info_callback_t
> callback
; //< cleared on cancel
167 RGWMetadataLogInfoCompletion(info_callback_t callback
);
168 ~RGWMetadataLogInfoCompletion() override
;
170 librados::IoCtx
& get_io_ctx() { return io_ctx
; }
171 cls_log_header
& get_header() { return header
; }
172 librados::AioCompletion
* get_completion() { return completion
; }
174 void finish(librados::completion_t cb
) {
175 std::lock_guard
<std::mutex
> lock(mutex
);
177 (*callback
)(completion
->get_return_value(), header
);
181 std::lock_guard
<std::mutex
> lock(mutex
);
182 callback
= boost::none
;
186 class RGWMetadataLog
{
191 static std::string
make_prefix(const std::string
& period
) {
193 return META_LOG_OBJ_PREFIX
;
194 return META_LOG_OBJ_PREFIX
+ period
+ ".";
198 set
<int> modified_shards
;
200 void mark_modified(int shard_id
);
202 RGWMetadataLog(CephContext
*_cct
, RGWRados
*_store
, const std::string
& period
)
203 : cct(_cct
), store(_store
),
204 prefix(make_prefix(period
)),
205 lock("RGWMetaLog::lock") {}
207 void get_shard_oid(int id
, string
& oid
) const {
209 snprintf(buf
, sizeof(buf
), "%d", id
);
213 int add_entry(RGWMetadataHandler
*handler
, const string
& section
, const string
& key
, bufferlist
& bl
);
214 int store_entries_in_shard(list
<cls_log_entry
>& entries
, int shard_id
, librados::AioCompletion
*completion
);
226 LogListCtx() : cur_shard(0), done(false) {}
229 void init_list_entries(int shard_id
, const real_time
& from_time
, const real_time
& end_time
, string
& marker
, void **handle
);
230 void complete_list_entries(void *handle
);
231 int list_entries(void *handle
,
233 list
<cls_log_entry
>& entries
,
237 int trim(int shard_id
, const real_time
& from_time
, const real_time
& end_time
, const string
& start_marker
, const string
& end_marker
);
238 int get_info(int shard_id
, RGWMetadataLogInfo
*info
);
239 int get_info_async(int shard_id
, RGWMetadataLogInfoCompletion
*completion
);
240 int lock_exclusive(int shard_id
, timespan duration
, string
&zone_id
, string
& owner_id
);
241 int unlock(int shard_id
, string
& zone_id
, string
& owner_id
);
243 int update_shards(list
<int>& shards
);
245 void read_clear_modified(set
<int> &modified
);
248 struct LogStatusDump
{
249 RGWMDLogStatus status
;
251 explicit LogStatusDump(RGWMDLogStatus _status
) : status(_status
) {}
252 void dump(Formatter
*f
) const;
255 struct RGWMetadataLogData
{
256 obj_version read_version
;
257 obj_version write_version
;
258 RGWMDLogStatus status
;
260 RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN
) {}
262 void encode(bufferlist
& bl
) const;
263 void decode(bufferlist::iterator
& bl
);
264 void dump(Formatter
*f
) const;
265 void decode_json(JSONObj
*obj
);
267 WRITE_CLASS_ENCODER(RGWMetadataLogData
)
269 struct RGWMetadataLogHistory
{
270 epoch_t oldest_realm_epoch
;
271 std::string oldest_period_id
;
273 void encode(bufferlist
& bl
) const {
274 ENCODE_START(1, 1, bl
);
275 ::encode(oldest_realm_epoch
, bl
);
276 ::encode(oldest_period_id
, bl
);
279 void decode(bufferlist::iterator
& p
) {
281 ::decode(oldest_realm_epoch
, p
);
282 ::decode(oldest_period_id
, p
);
286 static const std::string oid
;
288 WRITE_CLASS_ENCODER(RGWMetadataLogHistory
)
290 class RGWMetadataManager
{
291 map
<string
, RGWMetadataHandler
*> handlers
;
295 // maintain a separate metadata log for each period
296 std::map
<std::string
, RGWMetadataLog
> md_logs
;
297 // use the current period's log for mutating operations
298 RGWMetadataLog
* current_log
= nullptr;
300 void parse_metadata_key(const string
& metadata_key
, string
& type
, string
& entry
);
302 int find_handler(const string
& metadata_key
, RGWMetadataHandler
**handler
, string
& entry
);
303 int pre_modify(RGWMetadataHandler
*handler
, string
& section
, const string
& key
,
304 RGWMetadataLogData
& log_data
, RGWObjVersionTracker
*objv_tracker
,
305 RGWMDLogStatus op_type
);
306 int post_modify(RGWMetadataHandler
*handler
, const string
& section
, const string
& key
, RGWMetadataLogData
& log_data
,
307 RGWObjVersionTracker
*objv_tracker
, int ret
);
309 string
heap_oid(RGWMetadataHandler
*handler
, const string
& key
, const obj_version
& objv
);
310 int store_in_heap(RGWMetadataHandler
*handler
, const string
& key
, bufferlist
& bl
,
311 RGWObjVersionTracker
*objv_tracker
, real_time mtime
,
312 map
<string
, bufferlist
> *pattrs
);
313 int remove_from_heap(RGWMetadataHandler
*handler
, const string
& key
, RGWObjVersionTracker
*objv_tracker
);
315 RGWMetadataManager(CephContext
*_cct
, RGWRados
*_store
);
316 ~RGWMetadataManager();
318 int init(const std::string
& current_period
);
320 /// initialize the oldest log period if it doesn't exist, and attach it to
321 /// our current history
322 RGWPeriodHistory::Cursor
init_oldest_log_period();
324 /// read the oldest log period, and return a cursor to it in our existing
326 RGWPeriodHistory::Cursor
read_oldest_log_period() const;
328 /// read the oldest log period asynchronously and write its result to the
329 /// given cursor pointer
330 RGWCoroutine
* read_oldest_log_period_cr(RGWPeriodHistory::Cursor
*period
,
331 RGWObjVersionTracker
*objv
) const;
333 /// try to advance the oldest log period when the given period is trimmed,
334 /// using a rados lock to provide atomicity
335 RGWCoroutine
* trim_log_period_cr(RGWPeriodHistory::Cursor period
,
336 RGWObjVersionTracker
*objv
) const;
338 /// find or create the metadata log for the given period
339 RGWMetadataLog
* get_log(const std::string
& period
);
341 int register_handler(RGWMetadataHandler
*handler
);
343 RGWMetadataHandler
*get_handler(const string
& type
);
345 int put_entry(RGWMetadataHandler
*handler
, const string
& key
, bufferlist
& bl
, bool exclusive
,
346 RGWObjVersionTracker
*objv_tracker
, real_time mtime
, map
<string
, bufferlist
> *pattrs
= NULL
);
347 int remove_entry(RGWMetadataHandler
*handler
, string
& key
, RGWObjVersionTracker
*objv_tracker
);
348 int get(string
& metadata_key
, Formatter
*f
);
349 int put(string
& metadata_key
, bufferlist
& bl
,
350 RGWMetadataHandler::sync_type_t sync_mode
,
351 obj_version
*existing_version
= NULL
);
352 int remove(string
& metadata_key
);
354 int list_keys_init(string
& section
, void **phandle
);
355 int list_keys_next(void *handle
, int max
, list
<string
>& keys
, bool *truncated
);
356 void list_keys_complete(void *handle
);
358 void dump_log_entry(cls_log_entry
& entry
, Formatter
*f
);
360 void get_sections(list
<string
>& sections
);
361 int lock_exclusive(string
& metadata_key
, timespan duration
, string
& owner_id
);
362 int unlock(string
& metadata_key
, string
& owner_id
);
364 int get_log_shard_id(const string
& section
, const string
& key
, int *shard_id
);