]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_metadata.h
update sources to v12.2.1
[ceph.git] / ceph / src / rgw / rgw_metadata.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_RGW_METADATA_H
5 #define CEPH_RGW_METADATA_H
6
7 #include <string>
8 #include <boost/optional.hpp>
9
10 #include "include/types.h"
11 #include "rgw_common.h"
12 #include "rgw_period_history.h"
13 #include "cls/version/cls_version_types.h"
14 #include "cls/log/cls_log_types.h"
15 #include "common/RWLock.h"
16 #include "common/RefCountedObj.h"
17 #include "common/ceph_time.h"
18
19
20 class RGWRados;
21 class RGWCoroutine;
22 class JSONObj;
23 struct RGWObjVersionTracker;
24
25 struct obj_version;
26
27
28 enum RGWMDLogStatus {
29 MDLOG_STATUS_UNKNOWN,
30 MDLOG_STATUS_WRITE,
31 MDLOG_STATUS_SETATTRS,
32 MDLOG_STATUS_REMOVE,
33 MDLOG_STATUS_COMPLETE,
34 MDLOG_STATUS_ABORT,
35 };
36
37 class RGWMetadataObject {
38 protected:
39 obj_version objv;
40 ceph::real_time mtime;
41
42 public:
43 RGWMetadataObject() {}
44 virtual ~RGWMetadataObject() {}
45 obj_version& get_version();
46 real_time get_mtime() { return mtime; }
47
48 virtual void dump(Formatter *f) const = 0;
49 };
50
51 class RGWMetadataManager;
52
53 class RGWMetadataHandler {
54 friend class RGWMetadataManager;
55
56 public:
57 enum sync_type_t {
58 APPLY_ALWAYS,
59 APPLY_UPDATES,
60 APPLY_NEWER
61 };
62 static bool string_to_sync_type(const string& sync_string,
63 sync_type_t& type) {
64 if (sync_string.compare("update-by-version") == 0)
65 type = APPLY_UPDATES;
66 else if (sync_string.compare("update-by-timestamp") == 0)
67 type = APPLY_NEWER;
68 else if (sync_string.compare("always") == 0)
69 type = APPLY_ALWAYS;
70 else
71 return false;
72 return true;
73 }
74 virtual ~RGWMetadataHandler() {}
75 virtual string get_type() = 0;
76
77 virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0;
78 virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker,
79 real_time mtime, JSONObj *obj, sync_type_t type) = 0;
80 virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0;
81
82 virtual int list_keys_init(RGWRados *store, const string& marker, void **phandle) = 0;
83 virtual int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) = 0;
84 virtual void list_keys_complete(void *handle) = 0;
85
86 virtual string get_marker(void *handle) = 0;
87
88 /* key to use for hashing entries for log shard placement */
89 virtual void get_hash_key(const string& section, const string& key, string& hash_key) {
90 hash_key = section + ":" + key;
91 }
92
93 protected:
94 virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) = 0;
95 /**
96 * Compare an incoming versus on-disk tag/version+mtime combo against
97 * the sync mode to see if the new one should replace the on-disk one.
98 *
99 * @return true if the update should proceed, false otherwise.
100 */
101 bool check_versions(const obj_version& ondisk, const real_time& ondisk_time,
102 const obj_version& incoming, const real_time& incoming_time,
103 sync_type_t sync_mode) {
104 switch (sync_mode) {
105 case APPLY_UPDATES:
106 if ((ondisk.tag != incoming.tag) ||
107 (ondisk.ver >= incoming.ver))
108 return false;
109 break;
110 case APPLY_NEWER:
111 if (ondisk_time >= incoming_time)
112 return false;
113 break;
114 case APPLY_ALWAYS: //deliberate fall-thru -- we always apply!
115 default: break;
116 }
117 return true;
118 }
119
120 /*
121 * The tenant_name is always returned on purpose. May be empty, of course.
122 */
123 static void parse_bucket(const string& bucket,
124 string *tenant_name,
125 string *bucket_name,
126 string *bucket_instance = nullptr /* optional */)
127 {
128 int pos = bucket.find('/');
129 if (pos >= 0) {
130 *tenant_name = bucket.substr(0, pos);
131 } else {
132 tenant_name->clear();
133 }
134 string bn = bucket.substr(pos + 1);
135 pos = bn.find (':');
136 if (pos < 0) {
137 *bucket_name = std::move(bn);
138 return;
139 }
140 *bucket_name = bn.substr(0, pos);
141 if (bucket_instance) {
142 *bucket_instance = bn.substr(pos + 1);
143 }
144 }
145 };
146
147 #define META_LOG_OBJ_PREFIX "meta.log."
148
149 struct RGWMetadataLogInfo {
150 string marker;
151 real_time last_update;
152
153 void dump(Formatter *f) const;
154 void decode_json(JSONObj *obj);
155 };
156
157 class RGWCompletionManager;
158
159 class RGWMetadataLogInfoCompletion : public RefCountedObject {
160 public:
161 using info_callback_t = std::function<void(int, const cls_log_header&)>;
162 private:
163 cls_log_header header;
164 librados::IoCtx io_ctx;
165 librados::AioCompletion *completion;
166 std::mutex mutex; //< protects callback between cancel/complete
167 boost::optional<info_callback_t> callback; //< cleared on cancel
168 public:
169 RGWMetadataLogInfoCompletion(info_callback_t callback);
170 ~RGWMetadataLogInfoCompletion() override;
171
172 librados::IoCtx& get_io_ctx() { return io_ctx; }
173 cls_log_header& get_header() { return header; }
174 librados::AioCompletion* get_completion() { return completion; }
175
176 void finish(librados::completion_t cb) {
177 std::lock_guard<std::mutex> lock(mutex);
178 if (callback) {
179 (*callback)(completion->get_return_value(), header);
180 }
181 }
182 void cancel() {
183 std::lock_guard<std::mutex> lock(mutex);
184 callback = boost::none;
185 }
186 };
187
188 class RGWMetadataLog {
189 CephContext *cct;
190 RGWRados *store;
191 const string prefix;
192
193 static std::string make_prefix(const std::string& period) {
194 if (period.empty())
195 return META_LOG_OBJ_PREFIX;
196 return META_LOG_OBJ_PREFIX + period + ".";
197 }
198
199 RWLock lock;
200 set<int> modified_shards;
201
202 void mark_modified(int shard_id);
203 public:
204 RGWMetadataLog(CephContext *_cct, RGWRados *_store, const std::string& period)
205 : cct(_cct), store(_store),
206 prefix(make_prefix(period)),
207 lock("RGWMetaLog::lock") {}
208
209 void get_shard_oid(int id, string& oid) const {
210 char buf[16];
211 snprintf(buf, sizeof(buf), "%d", id);
212 oid = prefix + buf;
213 }
214
215 int add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl);
216 int store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion);
217
218 struct LogListCtx {
219 int cur_shard;
220 string marker;
221 real_time from_time;
222 real_time end_time;
223
224 string cur_oid;
225
226 bool done;
227
228 LogListCtx() : cur_shard(0), done(false) {}
229 };
230
231 void init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, string& marker, void **handle);
232 void complete_list_entries(void *handle);
233 int list_entries(void *handle,
234 int max_entries,
235 list<cls_log_entry>& entries,
236 string *out_marker,
237 bool *truncated);
238
239 int trim(int shard_id, const real_time& from_time, const real_time& end_time, const string& start_marker, const string& end_marker);
240 int get_info(int shard_id, RGWMetadataLogInfo *info);
241 int get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion);
242 int lock_exclusive(int shard_id, timespan duration, string&zone_id, string& owner_id);
243 int unlock(int shard_id, string& zone_id, string& owner_id);
244
245 int update_shards(list<int>& shards);
246
247 void read_clear_modified(set<int> &modified);
248 };
249
250 struct LogStatusDump {
251 RGWMDLogStatus status;
252
253 explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {}
254 void dump(Formatter *f) const;
255 };
256
257 struct RGWMetadataLogData {
258 obj_version read_version;
259 obj_version write_version;
260 RGWMDLogStatus status;
261
262 RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN) {}
263
264 void encode(bufferlist& bl) const;
265 void decode(bufferlist::iterator& bl);
266 void dump(Formatter *f) const;
267 void decode_json(JSONObj *obj);
268 };
269 WRITE_CLASS_ENCODER(RGWMetadataLogData)
270
271 struct RGWMetadataLogHistory {
272 epoch_t oldest_realm_epoch;
273 std::string oldest_period_id;
274
275 void encode(bufferlist& bl) const {
276 ENCODE_START(1, 1, bl);
277 ::encode(oldest_realm_epoch, bl);
278 ::encode(oldest_period_id, bl);
279 ENCODE_FINISH(bl);
280 }
281 void decode(bufferlist::iterator& p) {
282 DECODE_START(1, p);
283 ::decode(oldest_realm_epoch, p);
284 ::decode(oldest_period_id, p);
285 DECODE_FINISH(p);
286 }
287
288 static const std::string oid;
289 };
290 WRITE_CLASS_ENCODER(RGWMetadataLogHistory)
291
292 class RGWMetadataManager {
293 map<string, RGWMetadataHandler *> handlers;
294 CephContext *cct;
295 RGWRados *store;
296
297 // maintain a separate metadata log for each period
298 std::map<std::string, RGWMetadataLog> md_logs;
299 // use the current period's log for mutating operations
300 RGWMetadataLog* current_log = nullptr;
301
302 void parse_metadata_key(const string& metadata_key, string& type, string& entry);
303
304 int find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry);
305 int pre_modify(RGWMetadataHandler *handler, string& section, const string& key,
306 RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker,
307 RGWMDLogStatus op_type);
308 int post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data,
309 RGWObjVersionTracker *objv_tracker, int ret);
310
311 string heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv);
312 int store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl,
313 RGWObjVersionTracker *objv_tracker, real_time mtime,
314 map<string, bufferlist> *pattrs);
315 int remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker);
316 public:
317 RGWMetadataManager(CephContext *_cct, RGWRados *_store);
318 ~RGWMetadataManager();
319
320 int init(const std::string& current_period);
321
322 /// initialize the oldest log period if it doesn't exist, and attach it to
323 /// our current history
324 RGWPeriodHistory::Cursor init_oldest_log_period();
325
326 /// read the oldest log period, and return a cursor to it in our existing
327 /// period history
328 RGWPeriodHistory::Cursor read_oldest_log_period() const;
329
330 /// read the oldest log period asynchronously and write its result to the
331 /// given cursor pointer
332 RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period,
333 RGWObjVersionTracker *objv) const;
334
335 /// try to advance the oldest log period when the given period is trimmed,
336 /// using a rados lock to provide atomicity
337 RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period,
338 RGWObjVersionTracker *objv) const;
339
340 /// find or create the metadata log for the given period
341 RGWMetadataLog* get_log(const std::string& period);
342
343 int register_handler(RGWMetadataHandler *handler);
344
345 RGWMetadataHandler *get_handler(const string& type);
346
347 int put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive,
348 RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs = NULL);
349 int remove_entry(RGWMetadataHandler *handler, string& key, RGWObjVersionTracker *objv_tracker);
350 int get(string& metadata_key, Formatter *f);
351 int put(string& metadata_key, bufferlist& bl,
352 RGWMetadataHandler::sync_type_t sync_mode,
353 obj_version *existing_version = NULL);
354 int remove(string& metadata_key);
355
356 int list_keys_init(string& section, void **phandle);
357 int list_keys_init(string& section, const string& marker, void **phandle);
358 int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated);
359 void list_keys_complete(void *handle);
360
361 string get_marker(void *handle);
362
363 void dump_log_entry(cls_log_entry& entry, Formatter *f);
364
365 void get_sections(list<string>& sections);
366 int lock_exclusive(string& metadata_key, timespan duration, string& owner_id);
367 int unlock(string& metadata_key, string& owner_id);
368
369 int get_log_shard_id(const string& section, const string& key, int *shard_id);
370 };
371
372 #endif