]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_metadata.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rgw / rgw_metadata.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGW_METADATA_H
5#define CEPH_RGW_METADATA_H
6
7#include <string>
11fdf7f2 8#include <utility>
7c673cae
FG
9#include <boost/optional.hpp>
10
11#include "include/types.h"
12#include "rgw_common.h"
13#include "rgw_period_history.h"
14#include "cls/version/cls_version_types.h"
15#include "cls/log/cls_log_types.h"
16#include "common/RWLock.h"
17#include "common/RefCountedObj.h"
18#include "common/ceph_time.h"
19
20
21class RGWRados;
22class RGWCoroutine;
23class JSONObj;
24struct RGWObjVersionTracker;
25
26struct obj_version;
27
28
29enum RGWMDLogStatus {
30 MDLOG_STATUS_UNKNOWN,
31 MDLOG_STATUS_WRITE,
32 MDLOG_STATUS_SETATTRS,
33 MDLOG_STATUS_REMOVE,
34 MDLOG_STATUS_COMPLETE,
35 MDLOG_STATUS_ABORT,
36};
37
38class RGWMetadataObject {
39protected:
40 obj_version objv;
41 ceph::real_time mtime;
42
43public:
44 RGWMetadataObject() {}
45 virtual ~RGWMetadataObject() {}
46 obj_version& get_version();
47 real_time get_mtime() { return mtime; }
48
49 virtual void dump(Formatter *f) const = 0;
50};
51
52class RGWMetadataManager;
53
54class RGWMetadataHandler {
55 friend class RGWMetadataManager;
56
57public:
58 enum sync_type_t {
59 APPLY_ALWAYS,
60 APPLY_UPDATES,
61 APPLY_NEWER
62 };
63 static bool string_to_sync_type(const string& sync_string,
64 sync_type_t& type) {
65 if (sync_string.compare("update-by-version") == 0)
66 type = APPLY_UPDATES;
67 else if (sync_string.compare("update-by-timestamp") == 0)
68 type = APPLY_NEWER;
69 else if (sync_string.compare("always") == 0)
70 type = APPLY_ALWAYS;
71 else
72 return false;
73 return true;
74 }
11fdf7f2 75
7c673cae
FG
76 virtual ~RGWMetadataHandler() {}
77 virtual string get_type() = 0;
78
79 virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0;
80 virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
81 real_time mtime, JSONObj *obj, sync_type_t type) = 0;
82 virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0;
83
181888fb 84 virtual int list_keys_init(RGWRados *store, const string& marker, void **phandle) = 0;
7c673cae
FG
85 virtual int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) = 0;
86 virtual void list_keys_complete(void *handle) = 0;
87
181888fb
FG
88 virtual string get_marker(void *handle) = 0;
89
7c673cae
FG
90 /* key to use for hashing entries for log shard placement */
91 virtual void get_hash_key(const string& section, const string& key, string& hash_key) {
92 hash_key = section + ":" + key;
93 }
94
95protected:
96 virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) = 0;
97 /**
98 * Compare an incoming versus on-disk tag/version+mtime combo against
99 * the sync mode to see if the new one should replace the on-disk one.
100 *
101 * @return true if the update should proceed, false otherwise.
102 */
11fdf7f2
TL
103 static bool check_versions(const obj_version& ondisk, const real_time& ondisk_time,
104 const obj_version& incoming, const real_time& incoming_time,
105 sync_type_t sync_mode) {
7c673cae
FG
106 switch (sync_mode) {
107 case APPLY_UPDATES:
108 if ((ondisk.tag != incoming.tag) ||
109 (ondisk.ver >= incoming.ver))
110 return false;
111 break;
112 case APPLY_NEWER:
113 if (ondisk_time >= incoming_time)
114 return false;
115 break;
116 case APPLY_ALWAYS: //deliberate fall-thru -- we always apply!
117 default: break;
118 }
119 return true;
120 }
121
122 /*
123 * The tenant_name is always returned on purpose. May be empty, of course.
124 */
125 static void parse_bucket(const string& bucket,
126 string *tenant_name,
127 string *bucket_name,
128 string *bucket_instance = nullptr /* optional */)
129 {
130 int pos = bucket.find('/');
131 if (pos >= 0) {
132 *tenant_name = bucket.substr(0, pos);
133 } else {
134 tenant_name->clear();
135 }
136 string bn = bucket.substr(pos + 1);
137 pos = bn.find (':');
138 if (pos < 0) {
139 *bucket_name = std::move(bn);
140 return;
141 }
142 *bucket_name = bn.substr(0, pos);
143 if (bucket_instance) {
144 *bucket_instance = bn.substr(pos + 1);
145 }
146 }
147};
148
149#define META_LOG_OBJ_PREFIX "meta.log."
150
151struct RGWMetadataLogInfo {
152 string marker;
153 real_time last_update;
154
155 void dump(Formatter *f) const;
156 void decode_json(JSONObj *obj);
157};
158
159class RGWCompletionManager;
160
161class RGWMetadataLogInfoCompletion : public RefCountedObject {
162 public:
163 using info_callback_t = std::function<void(int, const cls_log_header&)>;
164 private:
165 cls_log_header header;
166 librados::IoCtx io_ctx;
167 librados::AioCompletion *completion;
168 std::mutex mutex; //< protects callback between cancel/complete
169 boost::optional<info_callback_t> callback; //< cleared on cancel
170 public:
11fdf7f2 171 explicit RGWMetadataLogInfoCompletion(info_callback_t callback);
7c673cae
FG
172 ~RGWMetadataLogInfoCompletion() override;
173
174 librados::IoCtx& get_io_ctx() { return io_ctx; }
175 cls_log_header& get_header() { return header; }
176 librados::AioCompletion* get_completion() { return completion; }
177
178 void finish(librados::completion_t cb) {
179 std::lock_guard<std::mutex> lock(mutex);
180 if (callback) {
181 (*callback)(completion->get_return_value(), header);
182 }
183 }
184 void cancel() {
185 std::lock_guard<std::mutex> lock(mutex);
186 callback = boost::none;
187 }
188};
189
190class RGWMetadataLog {
191 CephContext *cct;
192 RGWRados *store;
193 const string prefix;
194
195 static std::string make_prefix(const std::string& period) {
196 if (period.empty())
197 return META_LOG_OBJ_PREFIX;
198 return META_LOG_OBJ_PREFIX + period + ".";
199 }
200
201 RWLock lock;
202 set<int> modified_shards;
203
204 void mark_modified(int shard_id);
205public:
206 RGWMetadataLog(CephContext *_cct, RGWRados *_store, const std::string& period)
207 : cct(_cct), store(_store),
208 prefix(make_prefix(period)),
209 lock("RGWMetaLog::lock") {}
210
211 void get_shard_oid(int id, string& oid) const {
212 char buf[16];
213 snprintf(buf, sizeof(buf), "%d", id);
214 oid = prefix + buf;
215 }
216
217 int add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl);
218 int store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion);
219
220 struct LogListCtx {
221 int cur_shard;
222 string marker;
223 real_time from_time;
224 real_time end_time;
225
226 string cur_oid;
227
228 bool done;
229
230 LogListCtx() : cur_shard(0), done(false) {}
231 };
232
233 void init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, string& marker, void **handle);
234 void complete_list_entries(void *handle);
235 int list_entries(void *handle,
236 int max_entries,
237 list<cls_log_entry>& entries,
238 string *out_marker,
239 bool *truncated);
240
241 int trim(int shard_id, const real_time& from_time, const real_time& end_time, const string& start_marker, const string& end_marker);
242 int get_info(int shard_id, RGWMetadataLogInfo *info);
243 int get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion);
244 int lock_exclusive(int shard_id, timespan duration, string&zone_id, string& owner_id);
245 int unlock(int shard_id, string& zone_id, string& owner_id);
246
247 int update_shards(list<int>& shards);
248
249 void read_clear_modified(set<int> &modified);
250};
251
252struct LogStatusDump {
253 RGWMDLogStatus status;
254
255 explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {}
256 void dump(Formatter *f) const;
257};
258
259struct RGWMetadataLogData {
260 obj_version read_version;
261 obj_version write_version;
262 RGWMDLogStatus status;
263
264 RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN) {}
265
266 void encode(bufferlist& bl) const;
11fdf7f2 267 void decode(bufferlist::const_iterator& bl);
7c673cae
FG
268 void dump(Formatter *f) const;
269 void decode_json(JSONObj *obj);
270};
271WRITE_CLASS_ENCODER(RGWMetadataLogData)
272
273struct RGWMetadataLogHistory {
274 epoch_t oldest_realm_epoch;
275 std::string oldest_period_id;
276
277 void encode(bufferlist& bl) const {
278 ENCODE_START(1, 1, bl);
11fdf7f2
TL
279 encode(oldest_realm_epoch, bl);
280 encode(oldest_period_id, bl);
7c673cae
FG
281 ENCODE_FINISH(bl);
282 }
11fdf7f2 283 void decode(bufferlist::const_iterator& p) {
7c673cae 284 DECODE_START(1, p);
11fdf7f2
TL
285 decode(oldest_realm_epoch, p);
286 decode(oldest_period_id, p);
7c673cae
FG
287 DECODE_FINISH(p);
288 }
289
290 static const std::string oid;
291};
292WRITE_CLASS_ENCODER(RGWMetadataLogHistory)
293
294class RGWMetadataManager {
295 map<string, RGWMetadataHandler *> handlers;
296 CephContext *cct;
297 RGWRados *store;
298
299 // maintain a separate metadata log for each period
300 std::map<std::string, RGWMetadataLog> md_logs;
301 // use the current period's log for mutating operations
302 RGWMetadataLog* current_log = nullptr;
303
7c673cae
FG
304 int find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry);
305 int pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
306 RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
307 RGWMDLogStatus op_type);
308 int post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
309 RGWObjVersionTracker *objv_tracker, int ret);
310
311 string heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv);
312 int store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
313 RGWObjVersionTracker *objv_tracker, real_time mtime,
314 map<string, bufferlist> *pattrs);
315 int remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker);
11fdf7f2
TL
316 int prepare_mutate(RGWRados *store, rgw_pool& pool, const string& oid,
317 const real_time& mtime,
318 RGWObjVersionTracker *objv_tracker,
319 RGWMetadataHandler::sync_type_t sync_mode);
320
7c673cae
FG
321public:
322 RGWMetadataManager(CephContext *_cct, RGWRados *_store);
323 ~RGWMetadataManager();
324
11fdf7f2
TL
325 RGWRados* get_store() { return store; }
326
7c673cae
FG
327 int init(const std::string& current_period);
328
329 /// initialize the oldest log period if it doesn't exist, and attach it to
330 /// our current history
331 RGWPeriodHistory::Cursor init_oldest_log_period();
332
333 /// read the oldest log period, and return a cursor to it in our existing
334 /// period history
335 RGWPeriodHistory::Cursor read_oldest_log_period() const;
336
337 /// read the oldest log period asynchronously and write its result to the
338 /// given cursor pointer
339 RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period,
340 RGWObjVersionTracker *objv) const;
341
342 /// try to advance the oldest log period when the given period is trimmed,
343 /// using a rados lock to provide atomicity
344 RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period,
345 RGWObjVersionTracker *objv) const;
346
347 /// find or create the metadata log for the given period
348 RGWMetadataLog* get_log(const std::string& period);
349
350 int register_handler(RGWMetadataHandler *handler);
351
11fdf7f2
TL
352 template <typename F>
353 int mutate(RGWMetadataHandler *handler, const string& key,
354 const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker,
355 RGWMDLogStatus op_type,
356 RGWMetadataHandler::sync_type_t sync_mode,
357 F&& f);
358
7c673cae
FG
359 RGWMetadataHandler *get_handler(const string& type);
360
361 int put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
362 RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs = NULL);
f64942e4
AA
363 int remove_entry(RGWMetadataHandler *handler,
364 const string& key,
365 RGWObjVersionTracker *objv_tracker);
7c673cae
FG
366 int get(string& metadata_key, Formatter *f);
367 int put(string& metadata_key, bufferlist& bl,
368 RGWMetadataHandler::sync_type_t sync_mode,
369 obj_version *existing_version = NULL);
370 int remove(string& metadata_key);
371
b32b8144
FG
372 int list_keys_init(const string& section, void **phandle);
373 int list_keys_init(const string& section, const string& marker, void **phandle);
7c673cae
FG
374 int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated);
375 void list_keys_complete(void *handle);
376
181888fb
FG
377 string get_marker(void *handle);
378
7c673cae
FG
379 void dump_log_entry(cls_log_entry& entry, Formatter *f);
380
381 void get_sections(list<string>& sections);
382 int lock_exclusive(string& metadata_key, timespan duration, string& owner_id);
383 int unlock(string& metadata_key, string& owner_id);
384
385 int get_log_shard_id(const string& section, const string& key, int *shard_id);
11fdf7f2
TL
386
387 void parse_metadata_key(const string& metadata_key, string& type, string& entry);
7c673cae
FG
388};
389
11fdf7f2
TL
390template <typename F>
391int RGWMetadataManager::mutate(RGWMetadataHandler *handler, const string& key,
392 const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker,
393 RGWMDLogStatus op_type,
394 RGWMetadataHandler::sync_type_t sync_mode,
395 F&& f)
396{
397 string oid;
398 rgw_pool pool;
399
400 handler->get_pool_and_oid(store, key, pool, oid);
401
402 int ret = prepare_mutate(store, pool, oid, mtime, objv_tracker, sync_mode);
403 if (ret < 0 ||
404 ret == STATUS_NO_APPLY) {
405 return ret;
406 }
407
408 string section;
409 RGWMetadataLogData log_data;
410 ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE);
411 if (ret < 0) {
412 return ret;
413 }
414
415 ret = std::forward<F>(f)();
416
417 /* cascading ret into post_modify() */
418
419 ret = post_modify(handler, section, key, log_data, objv_tracker, ret);
420 if (ret < 0)
421 return ret;
422
423 return 0;
424}
425
7c673cae 426#endif