]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <boost/intrusive_ptr.hpp> | |
5 | #include "common/ceph_json.h" | |
6 | #include "common/errno.h" | |
7 | #include "rgw_metadata.h" | |
8 | #include "rgw_coroutine.h" | |
9 | #include "cls/version/cls_version_types.h" | |
10 | ||
11 | #include "rgw_rados.h" | |
12 | #include "rgw_tools.h" | |
13 | ||
14 | #include "rgw_cr_rados.h" | |
15 | ||
16 | #include "include/assert.h" | |
17 | #include <boost/asio/yield.hpp> | |
18 | ||
19 | #define dout_subsys ceph_subsys_rgw | |
20 | ||
21 | void LogStatusDump::dump(Formatter *f) const { | |
22 | string s; | |
23 | switch (status) { | |
24 | case MDLOG_STATUS_WRITE: | |
25 | s = "write"; | |
26 | break; | |
27 | case MDLOG_STATUS_SETATTRS: | |
28 | s = "set_attrs"; | |
29 | break; | |
30 | case MDLOG_STATUS_REMOVE: | |
31 | s = "remove"; | |
32 | break; | |
33 | case MDLOG_STATUS_COMPLETE: | |
34 | s = "complete"; | |
35 | break; | |
36 | case MDLOG_STATUS_ABORT: | |
37 | s = "abort"; | |
38 | break; | |
39 | default: | |
40 | s = "unknown"; | |
41 | break; | |
42 | } | |
43 | encode_json("status", s, f); | |
44 | } | |
45 | ||
46 | void RGWMetadataLogData::encode(bufferlist& bl) const { | |
47 | ENCODE_START(1, 1, bl); | |
48 | ::encode(read_version, bl); | |
49 | ::encode(write_version, bl); | |
50 | uint32_t s = (uint32_t)status; | |
51 | ::encode(s, bl); | |
52 | ENCODE_FINISH(bl); | |
53 | } | |
54 | ||
55 | void RGWMetadataLogData::decode(bufferlist::iterator& bl) { | |
56 | DECODE_START(1, bl); | |
57 | ::decode(read_version, bl); | |
58 | ::decode(write_version, bl); | |
59 | uint32_t s; | |
60 | ::decode(s, bl); | |
61 | status = (RGWMDLogStatus)s; | |
62 | DECODE_FINISH(bl); | |
63 | } | |
64 | ||
65 | void RGWMetadataLogData::dump(Formatter *f) const { | |
66 | encode_json("read_version", read_version, f); | |
67 | encode_json("write_version", write_version, f); | |
68 | encode_json("status", LogStatusDump(status), f); | |
69 | } | |
70 | ||
71 | void decode_json_obj(RGWMDLogStatus& status, JSONObj *obj) { | |
72 | string s; | |
73 | JSONDecoder::decode_json("status", s, obj); | |
74 | if (s == "complete") { | |
75 | status = MDLOG_STATUS_COMPLETE; | |
76 | } else if (s == "write") { | |
77 | status = MDLOG_STATUS_WRITE; | |
78 | } else if (s == "remove") { | |
79 | status = MDLOG_STATUS_REMOVE; | |
80 | } else if (s == "set_attrs") { | |
81 | status = MDLOG_STATUS_SETATTRS; | |
82 | } else if (s == "abort") { | |
83 | status = MDLOG_STATUS_ABORT; | |
84 | } else { | |
85 | status = MDLOG_STATUS_UNKNOWN; | |
86 | } | |
87 | } | |
88 | ||
89 | void RGWMetadataLogData::decode_json(JSONObj *obj) { | |
90 | JSONDecoder::decode_json("read_version", read_version, obj); | |
91 | JSONDecoder::decode_json("write_version", write_version, obj); | |
92 | JSONDecoder::decode_json("status", status, obj); | |
93 | } | |
94 | ||
95 | ||
96 | int RGWMetadataLog::add_entry(RGWMetadataHandler *handler, const string& section, const string& key, bufferlist& bl) { | |
97 | if (!store->need_to_log_metadata()) | |
98 | return 0; | |
99 | ||
100 | string oid; | |
101 | ||
102 | string hash_key; | |
103 | handler->get_hash_key(section, key, hash_key); | |
104 | ||
105 | int shard_id; | |
106 | store->shard_name(prefix, cct->_conf->rgw_md_log_max_shards, hash_key, oid, &shard_id); | |
107 | mark_modified(shard_id); | |
108 | real_time now = real_clock::now(); | |
109 | return store->time_log_add(oid, now, section, key, bl); | |
110 | } | |
111 | ||
112 | int RGWMetadataLog::store_entries_in_shard(list<cls_log_entry>& entries, int shard_id, librados::AioCompletion *completion) | |
113 | { | |
114 | string oid; | |
115 | ||
116 | mark_modified(shard_id); | |
117 | store->shard_name(prefix, shard_id, oid); | |
118 | return store->time_log_add(oid, entries, completion, false); | |
119 | } | |
120 | ||
121 | void RGWMetadataLog::init_list_entries(int shard_id, const real_time& from_time, const real_time& end_time, | |
122 | string& marker, void **handle) | |
123 | { | |
124 | LogListCtx *ctx = new LogListCtx(); | |
125 | ||
126 | ctx->cur_shard = shard_id; | |
127 | ctx->from_time = from_time; | |
128 | ctx->end_time = end_time; | |
129 | ctx->marker = marker; | |
130 | ||
131 | get_shard_oid(ctx->cur_shard, ctx->cur_oid); | |
132 | ||
133 | *handle = (void *)ctx; | |
134 | } | |
135 | ||
136 | void RGWMetadataLog::complete_list_entries(void *handle) { | |
137 | LogListCtx *ctx = static_cast<LogListCtx *>(handle); | |
138 | delete ctx; | |
139 | } | |
140 | ||
141 | int RGWMetadataLog::list_entries(void *handle, | |
142 | int max_entries, | |
143 | list<cls_log_entry>& entries, | |
144 | string *last_marker, | |
145 | bool *truncated) { | |
146 | LogListCtx *ctx = static_cast<LogListCtx *>(handle); | |
147 | ||
148 | if (!max_entries) { | |
149 | *truncated = false; | |
150 | return 0; | |
151 | } | |
152 | ||
153 | std::string next_marker; | |
154 | int ret = store->time_log_list(ctx->cur_oid, ctx->from_time, ctx->end_time, | |
155 | max_entries, entries, ctx->marker, | |
156 | &next_marker, truncated); | |
157 | if ((ret < 0) && (ret != -ENOENT)) | |
158 | return ret; | |
159 | ||
160 | ctx->marker = std::move(next_marker); | |
161 | if (last_marker) { | |
162 | *last_marker = ctx->marker; | |
163 | } | |
164 | ||
165 | if (ret == -ENOENT) | |
166 | *truncated = false; | |
167 | ||
168 | return 0; | |
169 | } | |
170 | ||
171 | int RGWMetadataLog::get_info(int shard_id, RGWMetadataLogInfo *info) | |
172 | { | |
173 | string oid; | |
174 | get_shard_oid(shard_id, oid); | |
175 | ||
176 | cls_log_header header; | |
177 | ||
178 | int ret = store->time_log_info(oid, &header); | |
179 | if ((ret < 0) && (ret != -ENOENT)) | |
180 | return ret; | |
181 | ||
182 | info->marker = header.max_marker; | |
183 | info->last_update = header.max_time.to_real_time(); | |
184 | ||
185 | return 0; | |
186 | } | |
187 | ||
188 | static void _mdlog_info_completion(librados::completion_t cb, void *arg) | |
189 | { | |
190 | auto infoc = static_cast<RGWMetadataLogInfoCompletion *>(arg); | |
191 | infoc->finish(cb); | |
192 | infoc->put(); // drop the ref from get_info_async() | |
193 | } | |
194 | ||
195 | RGWMetadataLogInfoCompletion::RGWMetadataLogInfoCompletion(info_callback_t cb) | |
196 | : completion(librados::Rados::aio_create_completion((void *)this, nullptr, | |
197 | _mdlog_info_completion)), | |
198 | callback(cb) | |
199 | { | |
200 | } | |
201 | ||
202 | RGWMetadataLogInfoCompletion::~RGWMetadataLogInfoCompletion() | |
203 | { | |
204 | completion->release(); | |
205 | } | |
206 | ||
207 | int RGWMetadataLog::get_info_async(int shard_id, RGWMetadataLogInfoCompletion *completion) | |
208 | { | |
209 | string oid; | |
210 | get_shard_oid(shard_id, oid); | |
211 | ||
212 | completion->get(); // hold a ref until the completion fires | |
213 | ||
214 | return store->time_log_info_async(completion->get_io_ctx(), oid, | |
215 | &completion->get_header(), | |
216 | completion->get_completion()); | |
217 | } | |
218 | ||
219 | int RGWMetadataLog::trim(int shard_id, const real_time& from_time, const real_time& end_time, | |
220 | const string& start_marker, const string& end_marker) | |
221 | { | |
222 | string oid; | |
223 | get_shard_oid(shard_id, oid); | |
224 | ||
225 | int ret; | |
226 | ||
227 | ret = store->time_log_trim(oid, from_time, end_time, start_marker, end_marker); | |
228 | ||
229 | if (ret == -ENOENT || ret == -ENODATA) | |
230 | ret = 0; | |
231 | ||
232 | return ret; | |
233 | } | |
234 | ||
235 | int RGWMetadataLog::lock_exclusive(int shard_id, timespan duration, string& zone_id, string& owner_id) { | |
236 | string oid; | |
237 | get_shard_oid(shard_id, oid); | |
238 | ||
239 | return store->lock_exclusive(store->get_zone_params().log_pool, oid, duration, zone_id, owner_id); | |
240 | } | |
241 | ||
242 | int RGWMetadataLog::unlock(int shard_id, string& zone_id, string& owner_id) { | |
243 | string oid; | |
244 | get_shard_oid(shard_id, oid); | |
245 | ||
246 | return store->unlock(store->get_zone_params().log_pool, oid, zone_id, owner_id); | |
247 | } | |
248 | ||
249 | void RGWMetadataLog::mark_modified(int shard_id) | |
250 | { | |
251 | lock.get_read(); | |
252 | if (modified_shards.find(shard_id) != modified_shards.end()) { | |
253 | lock.unlock(); | |
254 | return; | |
255 | } | |
256 | lock.unlock(); | |
257 | ||
258 | RWLock::WLocker wl(lock); | |
259 | modified_shards.insert(shard_id); | |
260 | } | |
261 | ||
262 | void RGWMetadataLog::read_clear_modified(set<int> &modified) | |
263 | { | |
264 | RWLock::WLocker wl(lock); | |
265 | modified.swap(modified_shards); | |
266 | modified_shards.clear(); | |
267 | } | |
268 | ||
269 | obj_version& RGWMetadataObject::get_version() | |
270 | { | |
271 | return objv; | |
272 | } | |
273 | ||
274 | class RGWMetadataTopHandler : public RGWMetadataHandler { | |
275 | struct iter_data { | |
276 | list<string> sections; | |
277 | list<string>::iterator iter; | |
278 | }; | |
279 | ||
280 | public: | |
281 | RGWMetadataTopHandler() {} | |
282 | ||
283 | string get_type() override { return string(); } | |
284 | ||
285 | int get(RGWRados *store, string& entry, RGWMetadataObject **obj) override { return -ENOTSUP; } | |
286 | int put(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker, | |
287 | real_time mtime, JSONObj *obj, sync_type_t sync_type) override { return -ENOTSUP; } | |
288 | ||
289 | virtual void get_pool_and_oid(RGWRados *store, const string& key, rgw_pool& pool, string& oid) override {} | |
290 | ||
291 | int remove(RGWRados *store, string& entry, RGWObjVersionTracker& objv_tracker) override { return -ENOTSUP; } | |
292 | ||
293 | int list_keys_init(RGWRados *store, void **phandle) override { | |
294 | iter_data *data = new iter_data; | |
295 | store->meta_mgr->get_sections(data->sections); | |
296 | data->iter = data->sections.begin(); | |
297 | ||
298 | *phandle = data; | |
299 | ||
300 | return 0; | |
301 | } | |
302 | int list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) override { | |
303 | iter_data *data = static_cast<iter_data *>(handle); | |
304 | for (int i = 0; i < max && data->iter != data->sections.end(); ++i, ++(data->iter)) { | |
305 | keys.push_back(*data->iter); | |
306 | } | |
307 | ||
308 | *truncated = (data->iter != data->sections.end()); | |
309 | ||
310 | return 0; | |
311 | } | |
312 | void list_keys_complete(void *handle) override { | |
313 | iter_data *data = static_cast<iter_data *>(handle); | |
314 | ||
315 | delete data; | |
316 | } | |
317 | }; | |
318 | ||
319 | static RGWMetadataTopHandler md_top_handler; | |
320 | ||
321 | ||
322 | RGWMetadataManager::RGWMetadataManager(CephContext *_cct, RGWRados *_store) | |
323 | : cct(_cct), store(_store) | |
324 | { | |
325 | } | |
326 | ||
327 | RGWMetadataManager::~RGWMetadataManager() | |
328 | { | |
329 | map<string, RGWMetadataHandler *>::iterator iter; | |
330 | ||
331 | for (iter = handlers.begin(); iter != handlers.end(); ++iter) { | |
332 | delete iter->second; | |
333 | } | |
334 | ||
335 | handlers.clear(); | |
336 | } | |
337 | ||
338 | const std::string RGWMetadataLogHistory::oid = "meta.history"; | |
339 | ||
340 | namespace { | |
341 | ||
342 | int read_history(RGWRados *store, RGWMetadataLogHistory *state, | |
343 | RGWObjVersionTracker *objv_tracker) | |
344 | { | |
345 | RGWObjectCtx ctx{store}; | |
346 | auto& pool = store->get_zone_params().log_pool; | |
347 | const auto& oid = RGWMetadataLogHistory::oid; | |
348 | bufferlist bl; | |
349 | int ret = rgw_get_system_obj(store, ctx, pool, oid, bl, objv_tracker, nullptr); | |
350 | if (ret < 0) { | |
351 | return ret; | |
352 | } | |
353 | try { | |
354 | auto p = bl.begin(); | |
355 | state->decode(p); | |
356 | } catch (buffer::error& e) { | |
357 | ldout(store->ctx(), 1) << "failed to decode the mdlog history: " | |
358 | << e.what() << dendl; | |
359 | return -EIO; | |
360 | } | |
361 | return 0; | |
362 | } | |
363 | ||
364 | int write_history(RGWRados *store, const RGWMetadataLogHistory& state, | |
365 | RGWObjVersionTracker *objv_tracker, bool exclusive = false) | |
366 | { | |
367 | bufferlist bl; | |
368 | state.encode(bl); | |
369 | ||
370 | auto& pool = store->get_zone_params().log_pool; | |
371 | const auto& oid = RGWMetadataLogHistory::oid; | |
372 | return rgw_put_system_obj(store, pool, oid, bl.c_str(), bl.length(), | |
373 | exclusive, objv_tracker, real_time{}); | |
374 | } | |
375 | ||
376 | using Cursor = RGWPeriodHistory::Cursor; | |
377 | ||
378 | /// read the mdlog history and use it to initialize the given cursor | |
379 | class ReadHistoryCR : public RGWCoroutine { | |
380 | RGWRados *store; | |
381 | Cursor *cursor; | |
382 | RGWObjVersionTracker *objv_tracker; | |
383 | RGWMetadataLogHistory state; | |
384 | public: | |
385 | ReadHistoryCR(RGWRados *store, Cursor *cursor, | |
386 | RGWObjVersionTracker *objv_tracker) | |
387 | : RGWCoroutine(store->ctx()), store(store), cursor(cursor), | |
388 | objv_tracker(objv_tracker) | |
389 | {} | |
390 | ||
391 | int operate() { | |
392 | reenter(this) { | |
393 | yield { | |
394 | rgw_raw_obj obj{store->get_zone_params().log_pool, | |
395 | RGWMetadataLogHistory::oid}; | |
396 | constexpr bool empty_on_enoent = false; | |
397 | ||
398 | using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>; | |
399 | call(new ReadCR(store->get_async_rados(), store, obj, | |
400 | &state, empty_on_enoent, objv_tracker)); | |
401 | } | |
402 | if (retcode < 0) { | |
403 | ldout(cct, 1) << "failed to read mdlog history: " | |
404 | << cpp_strerror(retcode) << dendl; | |
405 | return set_cr_error(retcode); | |
406 | } | |
407 | *cursor = store->period_history->lookup(state.oldest_realm_epoch); | |
408 | if (!*cursor) { | |
409 | return set_cr_error(cursor->get_error()); | |
410 | } | |
411 | ||
412 | ldout(cct, 10) << "read mdlog history with oldest period id=" | |
413 | << state.oldest_period_id << " realm_epoch=" | |
414 | << state.oldest_realm_epoch << dendl; | |
415 | return set_cr_done(); | |
416 | } | |
417 | return 0; | |
418 | } | |
419 | }; | |
420 | ||
421 | /// write the given cursor to the mdlog history | |
422 | class WriteHistoryCR : public RGWCoroutine { | |
423 | RGWRados *store; | |
424 | Cursor cursor; | |
425 | RGWObjVersionTracker *objv; | |
426 | RGWMetadataLogHistory state; | |
427 | public: | |
428 | WriteHistoryCR(RGWRados *store, const Cursor& cursor, | |
429 | RGWObjVersionTracker *objv) | |
430 | : RGWCoroutine(store->ctx()), store(store), cursor(cursor), objv(objv) | |
431 | {} | |
432 | ||
433 | int operate() { | |
434 | reenter(this) { | |
435 | state.oldest_period_id = cursor.get_period().get_id(); | |
436 | state.oldest_realm_epoch = cursor.get_epoch(); | |
437 | ||
438 | yield { | |
439 | rgw_raw_obj obj{store->get_zone_params().log_pool, | |
440 | RGWMetadataLogHistory::oid}; | |
441 | ||
442 | using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>; | |
443 | call(new WriteCR(store->get_async_rados(), store, obj, state, objv)); | |
444 | } | |
445 | if (retcode < 0) { | |
446 | ldout(cct, 1) << "failed to write mdlog history: " | |
447 | << cpp_strerror(retcode) << dendl; | |
448 | return set_cr_error(retcode); | |
449 | } | |
450 | ||
451 | ldout(cct, 10) << "wrote mdlog history with oldest period id=" | |
452 | << state.oldest_period_id << " realm_epoch=" | |
453 | << state.oldest_realm_epoch << dendl; | |
454 | return set_cr_done(); | |
455 | } | |
456 | return 0; | |
457 | } | |
458 | }; | |
459 | ||
460 | /// update the mdlog history to reflect trimmed logs | |
461 | class TrimHistoryCR : public RGWCoroutine { | |
462 | RGWRados *store; | |
463 | const Cursor cursor; //< cursor to trimmed period | |
464 | RGWObjVersionTracker *objv; //< to prevent racing updates | |
465 | Cursor next; //< target cursor for oldest log period | |
466 | Cursor existing; //< existing cursor read from disk | |
467 | ||
468 | public: | |
469 | TrimHistoryCR(RGWRados *store, Cursor cursor, RGWObjVersionTracker *objv) | |
470 | : RGWCoroutine(store->ctx()), | |
471 | store(store), cursor(cursor), objv(objv), next(cursor) | |
472 | { | |
473 | next.next(); // advance past cursor | |
474 | } | |
475 | ||
476 | int operate() { | |
477 | reenter(this) { | |
478 | // read an existing history, and write the new history if it's newer | |
479 | yield call(new ReadHistoryCR(store, &existing, objv)); | |
480 | if (retcode < 0) { | |
481 | return set_cr_error(retcode); | |
482 | } | |
483 | // reject older trims with ECANCELED | |
484 | if (cursor.get_epoch() < existing.get_epoch()) { | |
485 | ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch() | |
486 | << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl; | |
487 | return set_cr_error(-ECANCELED); | |
488 | } | |
489 | // overwrite with updated history | |
490 | yield call(new WriteHistoryCR(store, next, objv)); | |
491 | if (retcode < 0) { | |
492 | return set_cr_error(retcode); | |
493 | } | |
494 | return set_cr_done(); | |
495 | } | |
496 | return 0; | |
497 | } | |
498 | }; | |
499 | ||
500 | // traverse all the way back to the beginning of the period history, and | |
501 | // return a cursor to the first period in a fully attached history | |
502 | Cursor find_oldest_period(RGWRados *store) | |
503 | { | |
504 | auto cct = store->ctx(); | |
505 | auto cursor = store->period_history->get_current(); | |
506 | ||
507 | while (cursor) { | |
508 | // advance to the period's predecessor | |
509 | if (!cursor.has_prev()) { | |
510 | auto& predecessor = cursor.get_period().get_predecessor(); | |
511 | if (predecessor.empty()) { | |
512 | // this is the first period, so our logs must start here | |
513 | ldout(cct, 10) << "find_oldest_period returning first " | |
514 | "period " << cursor.get_period().get_id() << dendl; | |
515 | return cursor; | |
516 | } | |
517 | // pull the predecessor and add it to our history | |
518 | RGWPeriod period; | |
519 | int r = store->period_puller->pull(predecessor, period); | |
520 | if (r < 0) { | |
521 | return Cursor{r}; | |
522 | } | |
523 | auto prev = store->period_history->insert(std::move(period)); | |
524 | if (!prev) { | |
525 | return prev; | |
526 | } | |
527 | ldout(cct, 20) << "find_oldest_period advancing to " | |
528 | "predecessor period " << predecessor << dendl; | |
529 | assert(cursor.has_prev()); | |
530 | } | |
531 | cursor.prev(); | |
532 | } | |
533 | ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl; | |
534 | return cursor; | |
535 | } | |
536 | ||
537 | } // anonymous namespace | |
538 | ||
539 | Cursor RGWMetadataManager::init_oldest_log_period() | |
540 | { | |
541 | // read the mdlog history | |
542 | RGWMetadataLogHistory state; | |
543 | RGWObjVersionTracker objv; | |
544 | int ret = read_history(store, &state, &objv); | |
545 | ||
546 | if (ret == -ENOENT) { | |
547 | // initialize the mdlog history and write it | |
548 | ldout(cct, 10) << "initializing mdlog history" << dendl; | |
549 | auto cursor = find_oldest_period(store); | |
550 | if (!cursor) { | |
551 | return cursor; | |
552 | } | |
553 | ||
554 | // write the initial history | |
555 | state.oldest_realm_epoch = cursor.get_epoch(); | |
556 | state.oldest_period_id = cursor.get_period().get_id(); | |
557 | ||
558 | constexpr bool exclusive = true; // don't overwrite | |
559 | int ret = write_history(store, state, &objv, exclusive); | |
560 | if (ret < 0 && ret != -EEXIST) { | |
561 | ldout(cct, 1) << "failed to write mdlog history: " | |
562 | << cpp_strerror(ret) << dendl; | |
563 | return Cursor{ret}; | |
564 | } | |
565 | return cursor; | |
566 | } else if (ret < 0) { | |
567 | ldout(cct, 1) << "failed to read mdlog history: " | |
568 | << cpp_strerror(ret) << dendl; | |
569 | return Cursor{ret}; | |
570 | } | |
571 | ||
572 | // if it's already in the history, return it | |
573 | auto cursor = store->period_history->lookup(state.oldest_realm_epoch); | |
574 | if (cursor) { | |
575 | return cursor; | |
576 | } | |
577 | // pull the oldest period by id | |
578 | RGWPeriod period; | |
579 | ret = store->period_puller->pull(state.oldest_period_id, period); | |
580 | if (ret < 0) { | |
581 | ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id | |
582 | << " for mdlog history: " << cpp_strerror(ret) << dendl; | |
583 | return Cursor{ret}; | |
584 | } | |
585 | // verify its realm_epoch | |
586 | if (period.get_realm_epoch() != state.oldest_realm_epoch) { | |
587 | ldout(cct, 1) << "inconsistent mdlog history: read period id=" | |
588 | << period.get_id() << " with realm_epoch=" << period.get_realm_epoch() | |
589 | << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl; | |
590 | return Cursor{-EINVAL}; | |
591 | } | |
592 | // attach the period to our history | |
593 | return store->period_history->attach(std::move(period)); | |
594 | } | |
595 | ||
596 | Cursor RGWMetadataManager::read_oldest_log_period() const | |
597 | { | |
598 | RGWMetadataLogHistory state; | |
599 | int ret = read_history(store, &state, nullptr); | |
600 | if (ret < 0) { | |
601 | ldout(store->ctx(), 1) << "failed to read mdlog history: " | |
602 | << cpp_strerror(ret) << dendl; | |
603 | return Cursor{ret}; | |
604 | } | |
605 | ||
606 | ldout(store->ctx(), 10) << "read mdlog history with oldest period id=" | |
607 | << state.oldest_period_id << " realm_epoch=" | |
608 | << state.oldest_realm_epoch << dendl; | |
609 | ||
610 | return store->period_history->lookup(state.oldest_realm_epoch); | |
611 | } | |
612 | ||
613 | RGWCoroutine* RGWMetadataManager::read_oldest_log_period_cr(Cursor *period, | |
614 | RGWObjVersionTracker *objv) const | |
615 | { | |
616 | return new ReadHistoryCR(store, period, objv); | |
617 | } | |
618 | ||
619 | RGWCoroutine* RGWMetadataManager::trim_log_period_cr(Cursor period, | |
620 | RGWObjVersionTracker *objv) const | |
621 | { | |
622 | return new TrimHistoryCR(store, period, objv); | |
623 | } | |
624 | ||
625 | int RGWMetadataManager::init(const std::string& current_period) | |
626 | { | |
627 | // open a log for the current period | |
628 | current_log = get_log(current_period); | |
629 | return 0; | |
630 | } | |
631 | ||
632 | RGWMetadataLog* RGWMetadataManager::get_log(const std::string& period) | |
633 | { | |
634 | // construct the period's log in place if it doesn't exist | |
635 | auto insert = md_logs.emplace(std::piecewise_construct, | |
636 | std::forward_as_tuple(period), | |
637 | std::forward_as_tuple(cct, store, period)); | |
638 | return &insert.first->second; | |
639 | } | |
640 | ||
641 | int RGWMetadataManager::register_handler(RGWMetadataHandler *handler) | |
642 | { | |
643 | string type = handler->get_type(); | |
644 | ||
645 | if (handlers.find(type) != handlers.end()) | |
646 | return -EINVAL; | |
647 | ||
648 | handlers[type] = handler; | |
649 | ||
650 | return 0; | |
651 | } | |
652 | ||
653 | RGWMetadataHandler *RGWMetadataManager::get_handler(const string& type) | |
654 | { | |
655 | map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type); | |
656 | if (iter == handlers.end()) | |
657 | return NULL; | |
658 | ||
659 | return iter->second; | |
660 | } | |
661 | ||
662 | void RGWMetadataManager::parse_metadata_key(const string& metadata_key, string& type, string& entry) | |
663 | { | |
664 | auto pos = metadata_key.find(':'); | |
665 | if (pos == string::npos) { | |
666 | type = metadata_key; | |
667 | } else { | |
668 | type = metadata_key.substr(0, pos); | |
669 | entry = metadata_key.substr(pos + 1); | |
670 | } | |
671 | } | |
672 | ||
673 | int RGWMetadataManager::find_handler(const string& metadata_key, RGWMetadataHandler **handler, string& entry) | |
674 | { | |
675 | string type; | |
676 | ||
677 | parse_metadata_key(metadata_key, type, entry); | |
678 | ||
679 | if (type.empty()) { | |
680 | *handler = &md_top_handler; | |
681 | return 0; | |
682 | } | |
683 | ||
684 | map<string, RGWMetadataHandler *>::iterator iter = handlers.find(type); | |
685 | if (iter == handlers.end()) | |
686 | return -ENOENT; | |
687 | ||
688 | *handler = iter->second; | |
689 | ||
690 | return 0; | |
691 | ||
692 | } | |
693 | ||
694 | int RGWMetadataManager::get(string& metadata_key, Formatter *f) | |
695 | { | |
696 | RGWMetadataHandler *handler; | |
697 | string entry; | |
698 | int ret = find_handler(metadata_key, &handler, entry); | |
699 | if (ret < 0) { | |
700 | return ret; | |
701 | } | |
702 | ||
703 | RGWMetadataObject *obj; | |
704 | ||
705 | ret = handler->get(store, entry, &obj); | |
706 | if (ret < 0) { | |
707 | return ret; | |
708 | } | |
709 | ||
710 | f->open_object_section("metadata_info"); | |
711 | encode_json("key", metadata_key, f); | |
712 | encode_json("ver", obj->get_version(), f); | |
713 | real_time mtime = obj->get_mtime(); | |
714 | if (!real_clock::is_zero(mtime)) { | |
715 | utime_t ut(mtime); | |
716 | encode_json("mtime", ut, f); | |
717 | } | |
718 | encode_json("data", *obj, f); | |
719 | f->close_section(); | |
720 | ||
721 | delete obj; | |
722 | ||
723 | return 0; | |
724 | } | |
725 | ||
726 | int RGWMetadataManager::put(string& metadata_key, bufferlist& bl, | |
727 | RGWMetadataHandler::sync_type_t sync_type, | |
728 | obj_version *existing_version) | |
729 | { | |
730 | RGWMetadataHandler *handler; | |
731 | string entry; | |
732 | ||
733 | int ret = find_handler(metadata_key, &handler, entry); | |
734 | if (ret < 0) | |
735 | return ret; | |
736 | ||
737 | JSONParser parser; | |
738 | if (!parser.parse(bl.c_str(), bl.length())) { | |
739 | return -EINVAL; | |
740 | } | |
741 | ||
742 | RGWObjVersionTracker objv_tracker; | |
743 | ||
744 | obj_version *objv = &objv_tracker.write_version; | |
745 | ||
746 | utime_t mtime; | |
747 | ||
748 | try { | |
749 | JSONDecoder::decode_json("key", metadata_key, &parser); | |
750 | JSONDecoder::decode_json("ver", *objv, &parser); | |
751 | JSONDecoder::decode_json("mtime", mtime, &parser); | |
752 | } catch (JSONDecoder::err& e) { | |
753 | return -EINVAL; | |
754 | } | |
755 | ||
756 | JSONObj *jo = parser.find_obj("data"); | |
757 | if (!jo) { | |
758 | return -EINVAL; | |
759 | } | |
760 | ||
761 | ret = handler->put(store, entry, objv_tracker, mtime.to_real_time(), jo, sync_type); | |
762 | if (existing_version) { | |
763 | *existing_version = objv_tracker.read_version; | |
764 | } | |
765 | return ret; | |
766 | } | |
767 | ||
768 | int RGWMetadataManager::remove(string& metadata_key) | |
769 | { | |
770 | RGWMetadataHandler *handler; | |
771 | string entry; | |
772 | ||
773 | int ret = find_handler(metadata_key, &handler, entry); | |
774 | if (ret < 0) | |
775 | return ret; | |
776 | ||
777 | RGWMetadataObject *obj; | |
778 | ||
779 | ret = handler->get(store, entry, &obj); | |
780 | if (ret < 0) { | |
781 | return ret; | |
782 | } | |
783 | ||
784 | RGWObjVersionTracker objv_tracker; | |
785 | ||
786 | objv_tracker.read_version = obj->get_version(); | |
787 | ||
788 | delete obj; | |
789 | ||
790 | return handler->remove(store, entry, objv_tracker); | |
791 | } | |
792 | ||
793 | int RGWMetadataManager::lock_exclusive(string& metadata_key, timespan duration, string& owner_id) { | |
794 | RGWMetadataHandler *handler; | |
795 | string entry; | |
796 | string zone_id; | |
797 | ||
798 | int ret = find_handler(metadata_key, &handler, entry); | |
799 | if (ret < 0) | |
800 | return ret; | |
801 | ||
802 | rgw_pool pool; | |
803 | string oid; | |
804 | ||
805 | handler->get_pool_and_oid(store, entry, pool, oid); | |
806 | ||
807 | return store->lock_exclusive(pool, oid, duration, zone_id, owner_id); | |
808 | } | |
809 | ||
810 | int RGWMetadataManager::unlock(string& metadata_key, string& owner_id) { | |
811 | librados::IoCtx io_ctx; | |
812 | RGWMetadataHandler *handler; | |
813 | string entry; | |
814 | string zone_id; | |
815 | ||
816 | int ret = find_handler(metadata_key, &handler, entry); | |
817 | if (ret < 0) | |
818 | return ret; | |
819 | ||
820 | rgw_pool pool; | |
821 | string oid; | |
822 | ||
823 | handler->get_pool_and_oid(store, entry, pool, oid); | |
824 | ||
825 | return store->unlock(pool, oid, zone_id, owner_id); | |
826 | } | |
827 | ||
828 | struct list_keys_handle { | |
829 | void *handle; | |
830 | RGWMetadataHandler *handler; | |
831 | }; | |
832 | ||
833 | ||
834 | int RGWMetadataManager::list_keys_init(string& section, void **handle) | |
835 | { | |
836 | string entry; | |
837 | RGWMetadataHandler *handler; | |
838 | ||
839 | int ret; | |
840 | ||
841 | ret = find_handler(section, &handler, entry); | |
842 | if (ret < 0) { | |
843 | return -ENOENT; | |
844 | } | |
845 | ||
846 | list_keys_handle *h = new list_keys_handle; | |
847 | h->handler = handler; | |
848 | ret = handler->list_keys_init(store, &h->handle); | |
849 | if (ret < 0) { | |
850 | delete h; | |
851 | return ret; | |
852 | } | |
853 | ||
854 | *handle = (void *)h; | |
855 | ||
856 | return 0; | |
857 | } | |
858 | ||
859 | int RGWMetadataManager::list_keys_next(void *handle, int max, list<string>& keys, bool *truncated) | |
860 | { | |
861 | list_keys_handle *h = static_cast<list_keys_handle *>(handle); | |
862 | ||
863 | RGWMetadataHandler *handler = h->handler; | |
864 | ||
865 | return handler->list_keys_next(h->handle, max, keys, truncated); | |
866 | } | |
867 | ||
868 | ||
869 | void RGWMetadataManager::list_keys_complete(void *handle) | |
870 | { | |
871 | list_keys_handle *h = static_cast<list_keys_handle *>(handle); | |
872 | ||
873 | RGWMetadataHandler *handler = h->handler; | |
874 | ||
875 | handler->list_keys_complete(h->handle); | |
876 | delete h; | |
877 | } | |
878 | ||
879 | void RGWMetadataManager::dump_log_entry(cls_log_entry& entry, Formatter *f) | |
880 | { | |
881 | f->open_object_section("entry"); | |
882 | f->dump_string("id", entry.id); | |
883 | f->dump_string("section", entry.section); | |
884 | f->dump_string("name", entry.name); | |
885 | entry.timestamp.gmtime_nsec(f->dump_stream("timestamp")); | |
886 | ||
887 | try { | |
888 | RGWMetadataLogData log_data; | |
889 | bufferlist::iterator iter = entry.data.begin(); | |
890 | ::decode(log_data, iter); | |
891 | ||
892 | encode_json("data", log_data, f); | |
893 | } catch (buffer::error& err) { | |
894 | lderr(cct) << "failed to decode log entry: " << entry.section << ":" << entry.name<< " ts=" << entry.timestamp << dendl; | |
895 | } | |
896 | f->close_section(); | |
897 | } | |
898 | ||
899 | void RGWMetadataManager::get_sections(list<string>& sections) | |
900 | { | |
901 | for (map<string, RGWMetadataHandler *>::iterator iter = handlers.begin(); iter != handlers.end(); ++iter) { | |
902 | sections.push_back(iter->first); | |
903 | } | |
904 | } | |
905 | ||
906 | int RGWMetadataManager::pre_modify(RGWMetadataHandler *handler, string& section, const string& key, | |
907 | RGWMetadataLogData& log_data, RGWObjVersionTracker *objv_tracker, | |
908 | RGWMDLogStatus op_type) | |
909 | { | |
910 | section = handler->get_type(); | |
911 | ||
912 | /* if write version has not been set, and there's a read version, set it so that we can | |
913 | * log it | |
914 | */ | |
915 | if (objv_tracker) { | |
916 | if (objv_tracker->read_version.ver && !objv_tracker->write_version.ver) { | |
917 | objv_tracker->write_version = objv_tracker->read_version; | |
918 | objv_tracker->write_version.ver++; | |
919 | } | |
920 | log_data.read_version = objv_tracker->read_version; | |
921 | log_data.write_version = objv_tracker->write_version; | |
922 | } | |
923 | ||
924 | log_data.status = op_type; | |
925 | ||
926 | bufferlist logbl; | |
927 | ::encode(log_data, logbl); | |
928 | ||
929 | assert(current_log); // must have called init() | |
930 | int ret = current_log->add_entry(handler, section, key, logbl); | |
931 | if (ret < 0) | |
932 | return ret; | |
933 | ||
934 | return 0; | |
935 | } | |
936 | ||
937 | int RGWMetadataManager::post_modify(RGWMetadataHandler *handler, const string& section, const string& key, RGWMetadataLogData& log_data, | |
938 | RGWObjVersionTracker *objv_tracker, int ret) | |
939 | { | |
940 | if (ret >= 0) | |
941 | log_data.status = MDLOG_STATUS_COMPLETE; | |
942 | else | |
943 | log_data.status = MDLOG_STATUS_ABORT; | |
944 | ||
945 | bufferlist logbl; | |
946 | ::encode(log_data, logbl); | |
947 | ||
948 | assert(current_log); // must have called init() | |
949 | int r = current_log->add_entry(handler, section, key, logbl); | |
950 | if (ret < 0) | |
951 | return ret; | |
952 | ||
953 | if (r < 0) | |
954 | return r; | |
955 | ||
956 | return 0; | |
957 | } | |
958 | ||
959 | string RGWMetadataManager::heap_oid(RGWMetadataHandler *handler, const string& key, const obj_version& objv) | |
960 | { | |
961 | char buf[objv.tag.size() + 32]; | |
962 | snprintf(buf, sizeof(buf), "%s:%lld", objv.tag.c_str(), (long long)objv.ver); | |
963 | return string(".meta:") + handler->get_type() + ":" + key + ":" + buf; | |
964 | } | |
965 | ||
966 | int RGWMetadataManager::store_in_heap(RGWMetadataHandler *handler, const string& key, bufferlist& bl, | |
967 | RGWObjVersionTracker *objv_tracker, real_time mtime, | |
968 | map<string, bufferlist> *pattrs) | |
969 | { | |
970 | if (!objv_tracker) { | |
971 | return -EINVAL; | |
972 | } | |
973 | ||
974 | rgw_pool heap_pool(store->get_zone_params().metadata_heap); | |
975 | ||
976 | if (heap_pool.empty()) { | |
977 | return 0; | |
978 | } | |
979 | ||
980 | RGWObjVersionTracker otracker; | |
981 | otracker.write_version = objv_tracker->write_version; | |
982 | string oid = heap_oid(handler, key, objv_tracker->write_version); | |
983 | int ret = rgw_put_system_obj(store, heap_pool, oid, | |
984 | bl.c_str(), bl.length(), false, | |
985 | &otracker, mtime, pattrs); | |
986 | if (ret < 0) { | |
987 | ldout(store->ctx(), 0) << "ERROR: rgw_put_system_obj() oid=" << oid << ") returned ret=" << ret << dendl; | |
988 | return ret; | |
989 | } | |
990 | ||
991 | return 0; | |
992 | } | |
993 | ||
994 | int RGWMetadataManager::remove_from_heap(RGWMetadataHandler *handler, const string& key, RGWObjVersionTracker *objv_tracker) | |
995 | { | |
996 | if (!objv_tracker) { | |
997 | return -EINVAL; | |
998 | } | |
999 | ||
1000 | rgw_pool heap_pool(store->get_zone_params().metadata_heap); | |
1001 | ||
1002 | if (heap_pool.empty()) { | |
1003 | return 0; | |
1004 | } | |
1005 | ||
1006 | string oid = heap_oid(handler, key, objv_tracker->write_version); | |
1007 | rgw_raw_obj obj(heap_pool, oid); | |
1008 | int ret = store->delete_system_obj(obj); | |
1009 | if (ret < 0) { | |
1010 | ldout(store->ctx(), 0) << "ERROR: store->delete_system_obj()=" << oid << ") returned ret=" << ret << dendl; | |
1011 | return ret; | |
1012 | } | |
1013 | ||
1014 | return 0; | |
1015 | } | |
1016 | ||
1017 | int RGWMetadataManager::put_entry(RGWMetadataHandler *handler, const string& key, bufferlist& bl, bool exclusive, | |
1018 | RGWObjVersionTracker *objv_tracker, real_time mtime, map<string, bufferlist> *pattrs) | |
1019 | { | |
1020 | string section; | |
1021 | RGWMetadataLogData log_data; | |
1022 | int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_WRITE); | |
1023 | if (ret < 0) | |
1024 | return ret; | |
1025 | ||
1026 | string oid; | |
1027 | rgw_pool pool; | |
1028 | ||
1029 | handler->get_pool_and_oid(store, key, pool, oid); | |
1030 | ||
1031 | ret = store_in_heap(handler, key, bl, objv_tracker, mtime, pattrs); | |
1032 | if (ret < 0) { | |
1033 | ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": store_in_heap() key=" << key << " returned ret=" << ret << dendl; | |
1034 | goto done; | |
1035 | } | |
1036 | ||
1037 | ret = rgw_put_system_obj(store, pool, oid, | |
1038 | bl.c_str(), bl.length(), exclusive, | |
1039 | objv_tracker, mtime, pattrs); | |
1040 | ||
1041 | if (ret < 0) { | |
1042 | int r = remove_from_heap(handler, key, objv_tracker); | |
1043 | if (r < 0) { | |
1044 | ldout(store->ctx(), 0) << "ERROR: " << __func__ << ": remove_from_heap() key=" << key << " returned ret=" << r << dendl; | |
1045 | } | |
1046 | } | |
1047 | done: | |
1048 | /* cascading ret into post_modify() */ | |
1049 | ||
1050 | ret = post_modify(handler, section, key, log_data, objv_tracker, ret); | |
1051 | if (ret < 0) | |
1052 | return ret; | |
1053 | ||
1054 | return 0; | |
1055 | } | |
1056 | ||
1057 | int RGWMetadataManager::remove_entry(RGWMetadataHandler *handler, string& key, RGWObjVersionTracker *objv_tracker) | |
1058 | { | |
1059 | string section; | |
1060 | RGWMetadataLogData log_data; | |
1061 | int ret = pre_modify(handler, section, key, log_data, objv_tracker, MDLOG_STATUS_REMOVE); | |
1062 | if (ret < 0) | |
1063 | return ret; | |
1064 | ||
1065 | string oid; | |
1066 | rgw_pool pool; | |
1067 | ||
1068 | handler->get_pool_and_oid(store, key, pool, oid); | |
1069 | ||
1070 | rgw_raw_obj obj(pool, oid); | |
1071 | ||
1072 | ret = store->delete_system_obj(obj, objv_tracker); | |
1073 | /* cascading ret into post_modify() */ | |
1074 | ||
1075 | ret = post_modify(handler, section, key, log_data, objv_tracker, ret); | |
1076 | if (ret < 0) | |
1077 | return ret; | |
1078 | ||
1079 | return 0; | |
1080 | } | |
1081 | ||
1082 | int RGWMetadataManager::get_log_shard_id(const string& section, | |
1083 | const string& key, int *shard_id) | |
1084 | { | |
1085 | RGWMetadataHandler *handler = get_handler(section); | |
1086 | if (!handler) { | |
1087 | return -EINVAL; | |
1088 | } | |
1089 | string hash_key; | |
1090 | handler->get_hash_key(section, key, hash_key); | |
1091 | *shard_id = store->key_to_shard_id(hash_key, cct->_conf->rgw_md_log_max_shards); | |
1092 | return 0; | |
1093 | } |