]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_RGW_METADATA_H | |
5 | #define CEPH_RGW_METADATA_H | |
6 | ||
7 | #include <string> | |
11fdf7f2 | 8 | #include <utility> |
7c673cae FG |
9 | #include <boost/optional.hpp> |
10 | ||
11 | #include "include/types.h" | |
12 | #include "rgw_common.h" | |
13 | #include "rgw_period_history.h" | |
14 | #include "cls/version/cls_version_types.h" | |
15 | #include "cls/log/cls_log_types.h" | |
16 | #include "common/RWLock.h" | |
17 | #include "common/RefCountedObj.h" | |
18 | #include "common/ceph_time.h" | |
19 | ||
20 | ||
21 | class RGWRados; | |
22 | class RGWCoroutine; | |
23 | class JSONObj; | |
24 | struct RGWObjVersionTracker; | |
25 | ||
26 | struct obj_version; | |
27 | ||
28 | ||
29 | enum RGWMDLogStatus { | |
30 | MDLOG_STATUS_UNKNOWN, | |
31 | MDLOG_STATUS_WRITE, | |
32 | MDLOG_STATUS_SETATTRS, | |
33 | MDLOG_STATUS_REMOVE, | |
34 | MDLOG_STATUS_COMPLETE, | |
35 | MDLOG_STATUS_ABORT, | |
36 | }; | |
37 | ||
38 | class RGWMetadataObject { | |
39 | protected: | |
40 | obj_version objv; | |
41 | ceph::real_time mtime; | |
42 | ||
43 | public: | |
44 | RGWMetadataObject() {} | |
45 | virtual ~RGWMetadataObject() {} | |
46 | obj_version& get_version(); | |
47 | real_time get_mtime() { return mtime; } | |
48 | ||
49 | virtual void dump(Formatter *f) const = 0; | |
50 | }; | |
51 | ||
52 | class RGWMetadataManager; | |
53 | ||
54 | class RGWMetadataHandler { | |
55 | friend class RGWMetadataManager; | |
56 | ||
57 | public: | |
58 | enum sync_type_t { | |
59 | APPLY_ALWAYS, | |
60 | APPLY_UPDATES, | |
61 | APPLY_NEWER | |
62 | }; | |
63 | static bool string_to_sync_type(const string& sync_string, | |
64 | sync_type_t& type) { | |
65 | if (sync_string.compare("update-by-version") == 0) | |
66 | type = APPLY_UPDATES; | |
67 | else if (sync_string.compare("update-by-timestamp") == 0) | |
68 | type = APPLY_NEWER; | |
69 | else if (sync_string.compare("always") == 0) | |
70 | type = APPLY_ALWAYS; | |
71 | else | |
72 | return false; | |
73 | return true; | |
74 | } | |
11fdf7f2 | 75 | |
7c673cae FG |
76 | virtual ~RGWMetadataHandler() {} |
77 | virtual string get_type() = 0; | |
78 | ||
79 | virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0; | |
80 | virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, | |
81 | real_time mtime, JSONObj *obj, sync_type_t type) = 0; | |
82 | virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0; | |
83 | ||
181888fb | 84 | virtual int list_keys_init(RGWRados *store, const string& marker, void **phandle) = 0; |
7c673cae FG |
85 | virtual int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) = 0; |
86 | virtual void list_keys_complete(void *handle) = 0; | |
87 | ||
181888fb FG |
88 | virtual string get_marker(void *handle) = 0; |
89 | ||
7c673cae FG |
90 | /* key to use for hashing entries for log shard placement */ |
91 | virtual void get_hash_key(const string& section, const string& key, string& hash_key) { | |
92 | hash_key = section + ":" + key; | |
93 | } | |
94 | ||
95 | protected: | |
96 | virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) = 0; | |
97 | /** | |
98 | * Compare an incoming versus on-disk tag/version+mtime combo against | |
99 | * the sync mode to see if the new one should replace the on-disk one. | |
100 | * | |
101 | * @return true if the update should proceed, false otherwise. | |
102 | */ | |
11fdf7f2 TL |
103 | static bool check_versions(const obj_version& ondisk, const real_time& ondisk_time, |
104 | const obj_version& incoming, const real_time& incoming_time, | |
105 | sync_type_t sync_mode) { | |
7c673cae FG |
106 | switch (sync_mode) { |
107 | case APPLY_UPDATES: | |
108 | if ((ondisk.tag != incoming.tag) || | |
109 | (ondisk.ver >= incoming.ver)) | |
110 | return false; | |
111 | break; | |
112 | case APPLY_NEWER: | |
113 | if (ondisk_time >= incoming_time) | |
114 | return false; | |
115 | break; | |
116 | case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! | |
117 | default: break; | |
118 | } | |
119 | return true; | |
120 | } | |
121 | ||
122 | /* | |
123 | * The tenant_name is always returned on purpose. May be empty, of course. | |
124 | */ | |
125 | static void parse_bucket(const string& bucket, | |
126 | string *tenant_name, | |
127 | string *bucket_name, | |
128 | string *bucket_instance = nullptr /* optional */) | |
129 | { | |
130 | int pos = bucket.find('/'); | |
131 | if (pos >= 0) { | |
132 | *tenant_name = bucket.substr(0, pos); | |
133 | } else { | |
134 | tenant_name->clear(); | |
135 | } | |
136 | string bn = bucket.substr(pos + 1); | |
137 | pos = bn.find (':'); | |
138 | if (pos < 0) { | |
139 | *bucket_name = std::move(bn); | |
140 | return; | |
141 | } | |
142 | *bucket_name = bn.substr(0, pos); | |
143 | if (bucket_instance) { | |
144 | *bucket_instance = bn.substr(pos + 1); | |
145 | } | |
146 | } | |
147 | }; | |
148 | ||
149 | #define META_LOG_OBJ_PREFIX "meta.log." | |
150 | ||
151 | struct RGWMetadataLogInfo { | |
152 | string marker; | |
153 | real_time last_update; | |
154 | ||
155 | void dump(Formatter *f) const; | |
156 | void decode_json(JSONObj *obj); | |
157 | }; | |
158 | ||
159 | class RGWCompletionManager; | |
160 | ||
161 | class RGWMetadataLogInfoCompletion : public RefCountedObject { | |
162 | public: | |
163 | using info_callback_t = std::function<void(int, const cls_log_header&)>; | |
164 | private: | |
165 | cls_log_header header; | |
166 | librados::IoCtx io_ctx; | |
167 | librados::AioCompletion *completion; | |
168 | std::mutex mutex; //< protects callback between cancel/complete | |
169 | boost::optional<info_callback_t> callback; //< cleared on cancel | |
170 | public: | |
11fdf7f2 | 171 | explicit RGWMetadataLogInfoCompletion(info_callback_t callback); |
7c673cae FG |
172 | ~RGWMetadataLogInfoCompletion() override; |
173 | ||
174 | librados::IoCtx& get_io_ctx() { return io_ctx; } | |
175 | cls_log_header& get_header() { return header; } | |
176 | librados::AioCompletion* get_completion() { return completion; } | |
177 | ||
178 | void finish(librados::completion_t cb) { | |
179 | std::lock_guard<std::mutex> lock(mutex); | |
180 | if (callback) { | |
181 | (*callback)(completion->get_return_value(), header); | |
182 | } | |
183 | } | |
184 | void cancel() { | |
185 | std::lock_guard<std::mutex> lock(mutex); | |
186 | callback = boost::none; | |
187 | } | |
188 | }; | |
189 | ||
190 | class RGWMetadataLog { | |
191 | CephContext *cct; | |
192 | RGWRados *store; | |
193 | const string prefix; | |
194 | ||
195 | static std::string make_prefix(const std::string& period) { | |
196 | if (period.empty()) | |
197 | return META_LOG_OBJ_PREFIX; | |
198 | return META_LOG_OBJ_PREFIX + period + "."; | |
199 | } | |
200 | ||
201 | RWLock lock; | |
202 | set<int> modified_shards; | |
203 | ||
204 | void mark_modified(int shard_id); | |
205 | public: | |
206 | RGWMetadataLog(CephContext *_cct, RGWRados *_store, const std::string& period) | |
207 | : cct(_cct), store(_store), | |
208 | prefix(make_prefix(period)), | |
209 | lock("RGWMetaLog::lock") {} | |
210 | ||
211 | void get_shard_oid(int id, string& oid) const { | |
212 | char buf[16]; | |
213 | snprintf(buf, sizeof(buf), "%d", id); | |
214 | oid = prefix + buf; | |
215 | } | |
216 | ||
217 | int add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl); | |
218 | int store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion); | |
219 | ||
220 | struct LogListCtx { | |
221 | int cur_shard; | |
222 | string marker; | |
223 | real_time from_time; | |
224 | real_time end_time; | |
225 | ||
226 | string cur_oid; | |
227 | ||
228 | bool done; | |
229 | ||
230 | LogListCtx() : cur_shard(0), done(false) {} | |
231 | }; | |
232 | ||
233 | void init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, string& marker, void **handle); | |
234 | void complete_list_entries(void *handle); | |
235 | int list_entries(void *handle, | |
236 | int max_entries, | |
237 | list<cls_log_entry>& entries, | |
238 | string *out_marker, | |
239 | bool *truncated); | |
240 | ||
241 | int trim(int shard_id, const real_time& from_time, const real_time& end_time, const string& start_marker, const string& end_marker); | |
242 | int get_info(int shard_id, RGWMetadataLogInfo *info); | |
243 | int get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion); | |
244 | int lock_exclusive(int shard_id, timespan duration, string&zone_id, string& owner_id); | |
245 | int unlock(int shard_id, string& zone_id, string& owner_id); | |
246 | ||
247 | int update_shards(list<int>& shards); | |
248 | ||
249 | void read_clear_modified(set<int> &modified); | |
250 | }; | |
251 | ||
252 | struct LogStatusDump { | |
253 | RGWMDLogStatus status; | |
254 | ||
255 | explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {} | |
256 | void dump(Formatter *f) const; | |
257 | }; | |
258 | ||
259 | struct RGWMetadataLogData { | |
260 | obj_version read_version; | |
261 | obj_version write_version; | |
262 | RGWMDLogStatus status; | |
263 | ||
264 | RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN) {} | |
265 | ||
266 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 267 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
268 | void dump(Formatter *f) const; |
269 | void decode_json(JSONObj *obj); | |
270 | }; | |
271 | WRITE_CLASS_ENCODER(RGWMetadataLogData) | |
272 | ||
273 | struct RGWMetadataLogHistory { | |
274 | epoch_t oldest_realm_epoch; | |
275 | std::string oldest_period_id; | |
276 | ||
277 | void encode(bufferlist& bl) const { | |
278 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
279 | encode(oldest_realm_epoch, bl); |
280 | encode(oldest_period_id, bl); | |
7c673cae FG |
281 | ENCODE_FINISH(bl); |
282 | } | |
11fdf7f2 | 283 | void decode(bufferlist::const_iterator& p) { |
7c673cae | 284 | DECODE_START(1, p); |
11fdf7f2 TL |
285 | decode(oldest_realm_epoch, p); |
286 | decode(oldest_period_id, p); | |
7c673cae FG |
287 | DECODE_FINISH(p); |
288 | } | |
289 | ||
290 | static const std::string oid; | |
291 | }; | |
292 | WRITE_CLASS_ENCODER(RGWMetadataLogHistory) | |
293 | ||
294 | class RGWMetadataManager { | |
295 | map<string, RGWMetadataHandler *> handlers; | |
296 | CephContext *cct; | |
297 | RGWRados *store; | |
298 | ||
299 | // maintain a separate metadata log for each period | |
300 | std::map<std::string, RGWMetadataLog> md_logs; | |
301 | // use the current period's log for mutating operations | |
302 | RGWMetadataLog* current_log = nullptr; | |
303 | ||
7c673cae FG |
304 | int find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry); |
305 | int pre_modify(RGWMetadataHandler *handler, string& section, const string& key, | |
306 | RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker, | |
307 | RGWMDLogStatus op_type); | |
308 | int post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data, | |
309 | RGWObjVersionTracker *objv_tracker, int ret); | |
310 | ||
311 | string heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv); | |
312 | int store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl, | |
313 | RGWObjVersionTracker *objv_tracker, real_time mtime, | |
314 | map<string, bufferlist> *pattrs); | |
315 | int remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker); | |
11fdf7f2 TL |
316 | int prepare_mutate(RGWRados *store, rgw_pool& pool, const string& oid, |
317 | const real_time& mtime, | |
318 | RGWObjVersionTracker *objv_tracker, | |
319 | RGWMetadataHandler::sync_type_t sync_mode); | |
320 | ||
7c673cae FG |
321 | public: |
322 | RGWMetadataManager(CephContext *_cct, RGWRados *_store); | |
323 | ~RGWMetadataManager(); | |
324 | ||
11fdf7f2 TL |
325 | RGWRados* get_store() { return store; } |
326 | ||
7c673cae FG |
327 | int init(const std::string& current_period); |
328 | ||
329 | /// initialize the oldest log period if it doesn't exist, and attach it to | |
330 | /// our current history | |
331 | RGWPeriodHistory::Cursor init_oldest_log_period(); | |
332 | ||
333 | /// read the oldest log period, and return a cursor to it in our existing | |
334 | /// period history | |
335 | RGWPeriodHistory::Cursor read_oldest_log_period() const; | |
336 | ||
337 | /// read the oldest log period asynchronously and write its result to the | |
338 | /// given cursor pointer | |
339 | RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period, | |
340 | RGWObjVersionTracker *objv) const; | |
341 | ||
342 | /// try to advance the oldest log period when the given period is trimmed, | |
343 | /// using a rados lock to provide atomicity | |
344 | RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period, | |
345 | RGWObjVersionTracker *objv) const; | |
346 | ||
347 | /// find or create the metadata log for the given period | |
348 | RGWMetadataLog* get_log(const std::string& period); | |
349 | ||
350 | int register_handler(RGWMetadataHandler *handler); | |
351 | ||
11fdf7f2 TL |
352 | template <typename F> |
353 | int mutate(RGWMetadataHandler *handler, const string& key, | |
354 | const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker, | |
355 | RGWMDLogStatus op_type, | |
356 | RGWMetadataHandler::sync_type_t sync_mode, | |
357 | F&& f); | |
358 | ||
7c673cae FG |
359 | RGWMetadataHandler *get_handler(const string& type); |
360 | ||
361 | int put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive, | |
362 | RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs = NULL); | |
f64942e4 AA |
363 | int remove_entry(RGWMetadataHandler *handler, |
364 | const string& key, | |
365 | RGWObjVersionTracker *objv_tracker); | |
7c673cae FG |
366 | int get(string& metadata_key, Formatter *f); |
367 | int put(string& metadata_key, bufferlist& bl, | |
368 | RGWMetadataHandler::sync_type_t sync_mode, | |
369 | obj_version *existing_version = NULL); | |
370 | int remove(string& metadata_key); | |
371 | ||
b32b8144 FG |
372 | int list_keys_init(const string& section, void **phandle); |
373 | int list_keys_init(const string& section, const string& marker, void **phandle); | |
7c673cae FG |
374 | int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated); |
375 | void list_keys_complete(void *handle); | |
376 | ||
181888fb FG |
377 | string get_marker(void *handle); |
378 | ||
7c673cae FG |
379 | void dump_log_entry(cls_log_entry& entry, Formatter *f); |
380 | ||
381 | void get_sections(list<string>& sections); | |
382 | int lock_exclusive(string& metadata_key, timespan duration, string& owner_id); | |
383 | int unlock(string& metadata_key, string& owner_id); | |
384 | ||
385 | int get_log_shard_id(const string& section, const string& key, int *shard_id); | |
11fdf7f2 TL |
386 | |
387 | void parse_metadata_key(const string& metadata_key, string& type, string& entry); | |
7c673cae FG |
388 | }; |
389 | ||
11fdf7f2 TL |
390 | template <typename F> |
391 | int RGWMetadataManager::mutate(RGWMetadataHandler *handler, const string& key, | |
392 | const ceph::real_time& mtime, RGWObjVersionTracker *objv_tracker, | |
393 | RGWMDLogStatus op_type, | |
394 | RGWMetadataHandler::sync_type_t sync_mode, | |
395 | F&& f) | |
396 | { | |
397 | string oid; | |
398 | rgw_pool pool; | |
399 | ||
400 | handler->get_pool_and_oid(store, key, pool, oid); | |
401 | ||
402 | int ret = prepare_mutate(store, pool, oid, mtime, objv_tracker, sync_mode); | |
403 | if (ret < 0 || | |
404 | ret == STATUS_NO_APPLY) { | |
405 | return ret; | |
406 | } | |
407 | ||
408 | string section; | |
409 | RGWMetadataLogData log_data; | |
410 | ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE); | |
411 | if (ret < 0) { | |
412 | return ret; | |
413 | } | |
414 | ||
415 | ret = std::forward<F>(f)(); | |
416 | ||
417 | /* cascading ret into post_modify() */ | |
418 | ||
419 | ret = post_modify(handler, section, key, log_data, objv_tracker, ret); | |
420 | if (ret < 0) | |
421 | return ret; | |
422 | ||
423 | return 0; | |
424 | } | |
425 | ||
7c673cae | 426 | #endif |