]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_RGW_METADATA_H | |
5 | #define CEPH_RGW_METADATA_H | |
6 | ||
7 | #include <string> | |
8 | #include <boost/optional.hpp> | |
9 | ||
10 | #include "include/types.h" | |
11 | #include "rgw_common.h" | |
12 | #include "rgw_period_history.h" | |
13 | #include "cls/version/cls_version_types.h" | |
14 | #include "cls/log/cls_log_types.h" | |
15 | #include "common/RWLock.h" | |
16 | #include "common/RefCountedObj.h" | |
17 | #include "common/ceph_time.h" | |
18 | ||
19 | ||
20 | class RGWRados; | |
21 | class RGWCoroutine; | |
22 | class JSONObj; | |
23 | struct RGWObjVersionTracker; | |
24 | ||
25 | struct obj_version; | |
26 | ||
27 | ||
28 | enum RGWMDLogStatus { | |
29 | MDLOG_STATUS_UNKNOWN, | |
30 | MDLOG_STATUS_WRITE, | |
31 | MDLOG_STATUS_SETATTRS, | |
32 | MDLOG_STATUS_REMOVE, | |
33 | MDLOG_STATUS_COMPLETE, | |
34 | MDLOG_STATUS_ABORT, | |
35 | }; | |
36 | ||
37 | class RGWMetadataObject { | |
38 | protected: | |
39 | obj_version objv; | |
40 | ceph::real_time mtime; | |
41 | ||
42 | public: | |
43 | RGWMetadataObject() {} | |
44 | virtual ~RGWMetadataObject() {} | |
45 | obj_version& get_version(); | |
46 | real_time get_mtime() { return mtime; } | |
47 | ||
48 | virtual void dump(Formatter *f) const = 0; | |
49 | }; | |
50 | ||
51 | class RGWMetadataManager; | |
52 | ||
53 | class RGWMetadataHandler { | |
54 | friend class RGWMetadataManager; | |
55 | ||
56 | public: | |
57 | enum sync_type_t { | |
58 | APPLY_ALWAYS, | |
59 | APPLY_UPDATES, | |
60 | APPLY_NEWER | |
61 | }; | |
62 | static bool string_to_sync_type(const string& sync_string, | |
63 | sync_type_t& type) { | |
64 | if (sync_string.compare("update-by-version") == 0) | |
65 | type = APPLY_UPDATES; | |
66 | else if (sync_string.compare("update-by-timestamp") == 0) | |
67 | type = APPLY_NEWER; | |
68 | else if (sync_string.compare("always") == 0) | |
69 | type = APPLY_ALWAYS; | |
70 | else | |
71 | return false; | |
72 | return true; | |
73 | } | |
74 | virtual ~RGWMetadataHandler() {} | |
75 | virtual string get_type() = 0; | |
76 | ||
77 | virtual int get(RGWRados *store, string& entry, RGWMetadataObject **obj) = 0; | |
78 | virtual int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, | |
79 | real_time mtime, JSONObj *obj, sync_type_t type) = 0; | |
80 | virtual int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) = 0; | |
81 | ||
181888fb | 82 | virtual int list_keys_init(RGWRados *store, const string& marker, void **phandle) = 0; |
7c673cae FG |
83 | virtual int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) = 0; |
84 | virtual void list_keys_complete(void *handle) = 0; | |
85 | ||
181888fb FG |
86 | virtual string get_marker(void *handle) = 0; |
87 | ||
7c673cae FG |
88 | /* key to use for hashing entries for log shard placement */ |
89 | virtual void get_hash_key(const string& section, const string& key, string& hash_key) { | |
90 | hash_key = section + ":" + key; | |
91 | } | |
92 | ||
93 | protected: | |
94 | virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) = 0; | |
95 | /** | |
96 | * Compare an incoming versus on-disk tag/version+mtime combo against | |
97 | * the sync mode to see if the new one should replace the on-disk one. | |
98 | * | |
99 | * @return true if the update should proceed, false otherwise. | |
100 | */ | |
101 | bool check_versions(const obj_version& ondisk, const real_time& ondisk_time, | |
102 | const obj_version& incoming, const real_time& incoming_time, | |
103 | sync_type_t sync_mode) { | |
104 | switch (sync_mode) { | |
105 | case APPLY_UPDATES: | |
106 | if ((ondisk.tag != incoming.tag) || | |
107 | (ondisk.ver >= incoming.ver)) | |
108 | return false; | |
109 | break; | |
110 | case APPLY_NEWER: | |
111 | if (ondisk_time >= incoming_time) | |
112 | return false; | |
113 | break; | |
114 | case APPLY_ALWAYS: //deliberate fall-thru -- we always apply! | |
115 | default: break; | |
116 | } | |
117 | return true; | |
118 | } | |
119 | ||
120 | /* | |
121 | * The tenant_name is always returned on purpose. May be empty, of course. | |
122 | */ | |
123 | static void parse_bucket(const string& bucket, | |
124 | string *tenant_name, | |
125 | string *bucket_name, | |
126 | string *bucket_instance = nullptr /* optional */) | |
127 | { | |
128 | int pos = bucket.find('/'); | |
129 | if (pos >= 0) { | |
130 | *tenant_name = bucket.substr(0, pos); | |
131 | } else { | |
132 | tenant_name->clear(); | |
133 | } | |
134 | string bn = bucket.substr(pos + 1); | |
135 | pos = bn.find (':'); | |
136 | if (pos < 0) { | |
137 | *bucket_name = std::move(bn); | |
138 | return; | |
139 | } | |
140 | *bucket_name = bn.substr(0, pos); | |
141 | if (bucket_instance) { | |
142 | *bucket_instance = bn.substr(pos + 1); | |
143 | } | |
144 | } | |
145 | }; | |
146 | ||
147 | #define META_LOG_OBJ_PREFIX "meta.log." | |
148 | ||
149 | struct RGWMetadataLogInfo { | |
150 | string marker; | |
151 | real_time last_update; | |
152 | ||
153 | void dump(Formatter *f) const; | |
154 | void decode_json(JSONObj *obj); | |
155 | }; | |
156 | ||
157 | class RGWCompletionManager; | |
158 | ||
159 | class RGWMetadataLogInfoCompletion : public RefCountedObject { | |
160 | public: | |
161 | using info_callback_t = std::function<void(int, const cls_log_header&)>; | |
162 | private: | |
163 | cls_log_header header; | |
164 | librados::IoCtx io_ctx; | |
165 | librados::AioCompletion *completion; | |
166 | std::mutex mutex; //< protects callback between cancel/complete | |
167 | boost::optional<info_callback_t> callback; //< cleared on cancel | |
168 | public: | |
169 | RGWMetadataLogInfoCompletion(info_callback_t callback); | |
170 | ~RGWMetadataLogInfoCompletion() override; | |
171 | ||
172 | librados::IoCtx& get_io_ctx() { return io_ctx; } | |
173 | cls_log_header& get_header() { return header; } | |
174 | librados::AioCompletion* get_completion() { return completion; } | |
175 | ||
176 | void finish(librados::completion_t cb) { | |
177 | std::lock_guard<std::mutex> lock(mutex); | |
178 | if (callback) { | |
179 | (*callback)(completion->get_return_value(), header); | |
180 | } | |
181 | } | |
182 | void cancel() { | |
183 | std::lock_guard<std::mutex> lock(mutex); | |
184 | callback = boost::none; | |
185 | } | |
186 | }; | |
187 | ||
188 | class RGWMetadataLog { | |
189 | CephContext *cct; | |
190 | RGWRados *store; | |
191 | const string prefix; | |
192 | ||
193 | static std::string make_prefix(const std::string& period) { | |
194 | if (period.empty()) | |
195 | return META_LOG_OBJ_PREFIX; | |
196 | return META_LOG_OBJ_PREFIX + period + "."; | |
197 | } | |
198 | ||
199 | RWLock lock; | |
200 | set<int> modified_shards; | |
201 | ||
202 | void mark_modified(int shard_id); | |
203 | public: | |
204 | RGWMetadataLog(CephContext *_cct, RGWRados *_store, const std::string& period) | |
205 | : cct(_cct), store(_store), | |
206 | prefix(make_prefix(period)), | |
207 | lock("RGWMetaLog::lock") {} | |
208 | ||
209 | void get_shard_oid(int id, string& oid) const { | |
210 | char buf[16]; | |
211 | snprintf(buf, sizeof(buf), "%d", id); | |
212 | oid = prefix + buf; | |
213 | } | |
214 | ||
215 | int add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl); | |
216 | int store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion); | |
217 | ||
218 | struct LogListCtx { | |
219 | int cur_shard; | |
220 | string marker; | |
221 | real_time from_time; | |
222 | real_time end_time; | |
223 | ||
224 | string cur_oid; | |
225 | ||
226 | bool done; | |
227 | ||
228 | LogListCtx() : cur_shard(0), done(false) {} | |
229 | }; | |
230 | ||
231 | void init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, string& marker, void **handle); | |
232 | void complete_list_entries(void *handle); | |
233 | int list_entries(void *handle, | |
234 | int max_entries, | |
235 | list<cls_log_entry>& entries, | |
236 | string *out_marker, | |
237 | bool *truncated); | |
238 | ||
239 | int trim(int shard_id, const real_time& from_time, const real_time& end_time, const string& start_marker, const string& end_marker); | |
240 | int get_info(int shard_id, RGWMetadataLogInfo *info); | |
241 | int get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion); | |
242 | int lock_exclusive(int shard_id, timespan duration, string&zone_id, string& owner_id); | |
243 | int unlock(int shard_id, string& zone_id, string& owner_id); | |
244 | ||
245 | int update_shards(list<int>& shards); | |
246 | ||
247 | void read_clear_modified(set<int> &modified); | |
248 | }; | |
249 | ||
250 | struct LogStatusDump { | |
251 | RGWMDLogStatus status; | |
252 | ||
253 | explicit LogStatusDump(RGWMDLogStatus _status) : status(_status) {} | |
254 | void dump(Formatter *f) const; | |
255 | }; | |
256 | ||
257 | struct RGWMetadataLogData { | |
258 | obj_version read_version; | |
259 | obj_version write_version; | |
260 | RGWMDLogStatus status; | |
261 | ||
262 | RGWMetadataLogData() : status(MDLOG_STATUS_UNKNOWN) {} | |
263 | ||
264 | void encode(bufferlist& bl) const; | |
265 | void decode(bufferlist::iterator& bl); | |
266 | void dump(Formatter *f) const; | |
267 | void decode_json(JSONObj *obj); | |
268 | }; | |
269 | WRITE_CLASS_ENCODER(RGWMetadataLogData) | |
270 | ||
271 | struct RGWMetadataLogHistory { | |
272 | epoch_t oldest_realm_epoch; | |
273 | std::string oldest_period_id; | |
274 | ||
275 | void encode(bufferlist& bl) const { | |
276 | ENCODE_START(1, 1, bl); | |
277 | ::encode(oldest_realm_epoch, bl); | |
278 | ::encode(oldest_period_id, bl); | |
279 | ENCODE_FINISH(bl); | |
280 | } | |
281 | void decode(bufferlist::iterator& p) { | |
282 | DECODE_START(1, p); | |
283 | ::decode(oldest_realm_epoch, p); | |
284 | ::decode(oldest_period_id, p); | |
285 | DECODE_FINISH(p); | |
286 | } | |
287 | ||
288 | static const std::string oid; | |
289 | }; | |
290 | WRITE_CLASS_ENCODER(RGWMetadataLogHistory) | |
291 | ||
292 | class RGWMetadataManager { | |
293 | map<string, RGWMetadataHandler *> handlers; | |
294 | CephContext *cct; | |
295 | RGWRados *store; | |
296 | ||
297 | // maintain a separate metadata log for each period | |
298 | std::map<std::string, RGWMetadataLog> md_logs; | |
299 | // use the current period's log for mutating operations | |
300 | RGWMetadataLog* current_log = nullptr; | |
301 | ||
302 | void parse_metadata_key(const string& metadata_key, string& type, string& entry); | |
303 | ||
304 | int find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry); | |
305 | int pre_modify(RGWMetadataHandler *handler, string& section, const string& key, | |
306 | RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker, | |
307 | RGWMDLogStatus op_type); | |
308 | int post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data, | |
309 | RGWObjVersionTracker *objv_tracker, int ret); | |
310 | ||
311 | string heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv); | |
312 | int store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl, | |
313 | RGWObjVersionTracker *objv_tracker, real_time mtime, | |
314 | map<string, bufferlist> *pattrs); | |
315 | int remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker); | |
316 | public: | |
317 | RGWMetadataManager(CephContext *_cct, RGWRados *_store); | |
318 | ~RGWMetadataManager(); | |
319 | ||
320 | int init(const std::string& current_period); | |
321 | ||
322 | /// initialize the oldest log period if it doesn't exist, and attach it to | |
323 | /// our current history | |
324 | RGWPeriodHistory::Cursor init_oldest_log_period(); | |
325 | ||
326 | /// read the oldest log period, and return a cursor to it in our existing | |
327 | /// period history | |
328 | RGWPeriodHistory::Cursor read_oldest_log_period() const; | |
329 | ||
330 | /// read the oldest log period asynchronously and write its result to the | |
331 | /// given cursor pointer | |
332 | RGWCoroutine* read_oldest_log_period_cr(RGWPeriodHistory::Cursor *period, | |
333 | RGWObjVersionTracker *objv) const; | |
334 | ||
335 | /// try to advance the oldest log period when the given period is trimmed, | |
336 | /// using a rados lock to provide atomicity | |
337 | RGWCoroutine* trim_log_period_cr(RGWPeriodHistory::Cursor period, | |
338 | RGWObjVersionTracker *objv) const; | |
339 | ||
340 | /// find or create the metadata log for the given period | |
341 | RGWMetadataLog* get_log(const std::string& period); | |
342 | ||
343 | int register_handler(RGWMetadataHandler *handler); | |
344 | ||
345 | RGWMetadataHandler *get_handler(const string& type); | |
346 | ||
347 | int put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive, | |
348 | RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs = NULL); | |
f64942e4 AA |
349 | int remove_entry(RGWMetadataHandler *handler, |
350 | const string& key, | |
351 | RGWObjVersionTracker *objv_tracker); | |
7c673cae FG |
352 | int get(string& metadata_key, Formatter *f); |
353 | int put(string& metadata_key, bufferlist& bl, | |
354 | RGWMetadataHandler::sync_type_t sync_mode, | |
355 | obj_version *existing_version = NULL); | |
356 | int remove(string& metadata_key); | |
357 | ||
b32b8144 FG |
358 | int list_keys_init(const string& section, void **phandle); |
359 | int list_keys_init(const string& section, const string& marker, void **phandle); | |
7c673cae FG |
360 | int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated); |
361 | void list_keys_complete(void *handle); | |
362 | ||
181888fb FG |
363 | string get_marker(void *handle); |
364 | ||
7c673cae FG |
365 | void dump_log_entry(cls_log_entry& entry, Formatter *f); |
366 | ||
367 | void get_sections(list<string>& sections); | |
368 | int lock_exclusive(string& metadata_key, timespan duration, string& owner_id); | |
369 | int unlock(string& metadata_key, string& owner_id); | |
370 | ||
371 | int get_log_shard_id(const string& section, const string& key, int *shard_id); | |
372 | }; | |
373 | ||
374 | #endif |