]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab ft=cpp | |
3 | ||
4 | #include "svc_mdlog.h" | |
5 | #include "svc_rados.h" | |
6 | #include "svc_zone.h" | |
7 | #include "svc_sys_obj.h" | |
8 | ||
9 | #include "rgw/rgw_tools.h" | |
10 | #include "rgw/rgw_mdlog.h" | |
11 | #include "rgw/rgw_coroutine.h" | |
12 | #include "rgw/rgw_cr_rados.h" | |
13 | #include "rgw/rgw_zone.h" | |
14 | ||
15 | #include "common/errno.h" | |
16 | ||
17 | #include <boost/asio/yield.hpp> | |
18 | ||
19 | #define dout_subsys ceph_subsys_rgw | |
20 | ||
20effc67 TL |
21 | using namespace std; |
22 | ||
9f95a23c TL |
23 | using Svc = RGWSI_MDLog::Svc; |
24 | using Cursor = RGWPeriodHistory::Cursor; | |
25 | ||
26 | RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) { | |
27 | } | |
28 | ||
29 | RGWSI_MDLog::~RGWSI_MDLog() { | |
30 | } | |
31 | ||
32 | int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc) | |
33 | { | |
34 | svc.zone = _zone_svc; | |
35 | svc.sysobj = _sysobj_svc; | |
36 | svc.mdlog = this; | |
37 | svc.rados = _rados_svc; | |
38 | svc.cls = _cls_svc; | |
39 | ||
40 | return 0; | |
41 | } | |
42 | ||
b3b6e05e | 43 | int RGWSI_MDLog::do_start(optional_yield y, const DoutPrefixProvider *dpp) |
9f95a23c TL |
44 | { |
45 | auto& current_period = svc.zone->get_current_period(); | |
46 | ||
47 | current_log = get_log(current_period.get_id()); | |
48 | ||
49 | period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj)); | |
50 | period_history.reset(new RGWPeriodHistory(cct, period_puller.get(), | |
51 | current_period)); | |
52 | ||
53 | if (run_sync && | |
54 | svc.zone->need_to_sync()) { | |
55 | // initialize the log period history | |
b3b6e05e | 56 | svc.mdlog->init_oldest_log_period(y, dpp); |
9f95a23c TL |
57 | } |
58 | return 0; | |
59 | } | |
60 | ||
61 | int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state, | |
f67539c2 | 62 | RGWObjVersionTracker *objv_tracker, |
b3b6e05e TL |
63 | optional_yield y, |
64 | const DoutPrefixProvider *dpp) const | |
9f95a23c TL |
65 | { |
66 | auto obj_ctx = svc.sysobj->init_obj_ctx(); | |
67 | auto& pool = svc.zone->get_zone_params().log_pool; | |
68 | const auto& oid = RGWMetadataLogHistory::oid; | |
69 | bufferlist bl; | |
b3b6e05e | 70 | int ret = rgw_get_system_obj(obj_ctx, pool, oid, bl, objv_tracker, nullptr, y, dpp); |
9f95a23c TL |
71 | if (ret < 0) { |
72 | return ret; | |
73 | } | |
74 | if (bl.length() == 0) { | |
75 | /* bad history object, remove it */ | |
76 | rgw_raw_obj obj(pool, oid); | |
77 | auto sysobj = obj_ctx.get_obj(obj); | |
b3b6e05e | 78 | ret = sysobj.wop().remove(dpp, y); |
9f95a23c | 79 | if (ret < 0) { |
b3b6e05e | 80 | ldpp_dout(dpp, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl; |
9f95a23c TL |
81 | return ret; |
82 | } | |
83 | return -ENOENT; | |
84 | } | |
85 | try { | |
86 | auto p = bl.cbegin(); | |
87 | state->decode(p); | |
88 | } catch (buffer::error& e) { | |
b3b6e05e | 89 | ldpp_dout(dpp, 1) << "failed to decode the mdlog history: " |
9f95a23c TL |
90 | << e.what() << dendl; |
91 | return -EIO; | |
92 | } | |
93 | return 0; | |
94 | } | |
95 | ||
b3b6e05e TL |
96 | int RGWSI_MDLog::write_history(const DoutPrefixProvider *dpp, |
97 | const RGWMetadataLogHistory& state, | |
9f95a23c | 98 | RGWObjVersionTracker *objv_tracker, |
f67539c2 | 99 | optional_yield y, bool exclusive) |
9f95a23c TL |
100 | { |
101 | bufferlist bl; | |
102 | state.encode(bl); | |
103 | ||
104 | auto& pool = svc.zone->get_zone_params().log_pool; | |
105 | const auto& oid = RGWMetadataLogHistory::oid; | |
106 | auto obj_ctx = svc.sysobj->init_obj_ctx(); | |
b3b6e05e | 107 | return rgw_put_system_obj(dpp, obj_ctx, pool, oid, bl, |
f67539c2 | 108 | exclusive, objv_tracker, real_time{}, y); |
9f95a23c TL |
109 | } |
110 | ||
111 | namespace mdlog { | |
112 | ||
113 | using Cursor = RGWPeriodHistory::Cursor; | |
114 | ||
115 | /// read the mdlog history and use it to initialize the given cursor | |
116 | class ReadHistoryCR : public RGWCoroutine { | |
b3b6e05e | 117 | const DoutPrefixProvider *dpp; |
9f95a23c TL |
118 | Svc svc; |
119 | Cursor *cursor; | |
120 | RGWObjVersionTracker *objv_tracker; | |
121 | RGWMetadataLogHistory state; | |
122 | RGWAsyncRadosProcessor *async_processor; | |
123 | ||
124 | public: | |
b3b6e05e TL |
125 | ReadHistoryCR(const DoutPrefixProvider *dpp, |
126 | const Svc& svc, | |
9f95a23c TL |
127 | Cursor *cursor, |
128 | RGWObjVersionTracker *objv_tracker) | |
b3b6e05e | 129 | : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), |
9f95a23c TL |
130 | cursor(cursor), |
131 | objv_tracker(objv_tracker), | |
132 | async_processor(svc.rados->get_async_processor()) | |
133 | {} | |
134 | ||
b3b6e05e | 135 | int operate(const DoutPrefixProvider *dpp) { |
9f95a23c TL |
136 | reenter(this) { |
137 | yield { | |
138 | rgw_raw_obj obj{svc.zone->get_zone_params().log_pool, | |
139 | RGWMetadataLogHistory::oid}; | |
140 | constexpr bool empty_on_enoent = false; | |
141 | ||
142 | using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>; | |
b3b6e05e | 143 | call(new ReadCR(dpp, async_processor, svc.sysobj, obj, |
9f95a23c TL |
144 | &state, empty_on_enoent, objv_tracker)); |
145 | } | |
146 | if (retcode < 0) { | |
b3b6e05e | 147 | ldpp_dout(dpp, 1) << "failed to read mdlog history: " |
9f95a23c TL |
148 | << cpp_strerror(retcode) << dendl; |
149 | return set_cr_error(retcode); | |
150 | } | |
151 | *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch); | |
152 | if (!*cursor) { | |
153 | return set_cr_error(cursor->get_error()); | |
154 | } | |
155 | ||
b3b6e05e | 156 | ldpp_dout(dpp, 10) << "read mdlog history with oldest period id=" |
9f95a23c TL |
157 | << state.oldest_period_id << " realm_epoch=" |
158 | << state.oldest_realm_epoch << dendl; | |
159 | return set_cr_done(); | |
160 | } | |
161 | return 0; | |
162 | } | |
163 | }; | |
164 | ||
165 | /// write the given cursor to the mdlog history | |
166 | class WriteHistoryCR : public RGWCoroutine { | |
b3b6e05e | 167 | const DoutPrefixProvider *dpp; |
9f95a23c TL |
168 | Svc svc; |
169 | Cursor cursor; | |
170 | RGWObjVersionTracker *objv; | |
171 | RGWMetadataLogHistory state; | |
172 | RGWAsyncRadosProcessor *async_processor; | |
173 | ||
174 | public: | |
b3b6e05e TL |
175 | WriteHistoryCR(const DoutPrefixProvider *dpp, |
176 | Svc& svc, | |
9f95a23c TL |
177 | const Cursor& cursor, |
178 | RGWObjVersionTracker *objv) | |
b3b6e05e | 179 | : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), |
9f95a23c TL |
180 | cursor(cursor), objv(objv), |
181 | async_processor(svc.rados->get_async_processor()) | |
182 | {} | |
183 | ||
b3b6e05e | 184 | int operate(const DoutPrefixProvider *dpp) { |
9f95a23c TL |
185 | reenter(this) { |
186 | state.oldest_period_id = cursor.get_period().get_id(); | |
187 | state.oldest_realm_epoch = cursor.get_epoch(); | |
188 | ||
189 | yield { | |
190 | rgw_raw_obj obj{svc.zone->get_zone_params().log_pool, | |
191 | RGWMetadataLogHistory::oid}; | |
192 | ||
193 | using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>; | |
b3b6e05e | 194 | call(new WriteCR(dpp, async_processor, svc.sysobj, obj, state, objv)); |
9f95a23c TL |
195 | } |
196 | if (retcode < 0) { | |
b3b6e05e | 197 | ldpp_dout(dpp, 1) << "failed to write mdlog history: " |
9f95a23c TL |
198 | << cpp_strerror(retcode) << dendl; |
199 | return set_cr_error(retcode); | |
200 | } | |
201 | ||
b3b6e05e | 202 | ldpp_dout(dpp, 10) << "wrote mdlog history with oldest period id=" |
9f95a23c TL |
203 | << state.oldest_period_id << " realm_epoch=" |
204 | << state.oldest_realm_epoch << dendl; | |
205 | return set_cr_done(); | |
206 | } | |
207 | return 0; | |
208 | } | |
209 | }; | |
210 | ||
211 | /// update the mdlog history to reflect trimmed logs | |
212 | class TrimHistoryCR : public RGWCoroutine { | |
b3b6e05e | 213 | const DoutPrefixProvider *dpp; |
9f95a23c TL |
214 | Svc svc; |
215 | const Cursor cursor; //< cursor to trimmed period | |
216 | RGWObjVersionTracker *objv; //< to prevent racing updates | |
217 | Cursor next; //< target cursor for oldest log period | |
218 | Cursor existing; //< existing cursor read from disk | |
219 | ||
220 | public: | |
b3b6e05e TL |
221 | TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv) |
222 | : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc), | |
9f95a23c TL |
223 | cursor(cursor), objv(objv), next(cursor) { |
224 | next.next(); // advance past cursor | |
225 | } | |
226 | ||
b3b6e05e | 227 | int operate(const DoutPrefixProvider *dpp) { |
9f95a23c TL |
228 | reenter(this) { |
229 | // read an existing history, and write the new history if it's newer | |
b3b6e05e | 230 | yield call(new ReadHistoryCR(dpp, svc, &existing, objv)); |
9f95a23c TL |
231 | if (retcode < 0) { |
232 | return set_cr_error(retcode); | |
233 | } | |
234 | // reject older trims with ECANCELED | |
235 | if (cursor.get_epoch() < existing.get_epoch()) { | |
b3b6e05e | 236 | ldpp_dout(dpp, 4) << "found oldest log epoch=" << existing.get_epoch() |
9f95a23c TL |
237 | << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl; |
238 | return set_cr_error(-ECANCELED); | |
239 | } | |
240 | // overwrite with updated history | |
b3b6e05e | 241 | yield call(new WriteHistoryCR(dpp, svc, next, objv)); |
9f95a23c TL |
242 | if (retcode < 0) { |
243 | return set_cr_error(retcode); | |
244 | } | |
245 | return set_cr_done(); | |
246 | } | |
247 | return 0; | |
248 | } | |
249 | }; | |
250 | ||
251 | } // mdlog namespace | |
252 | ||
253 | // traverse all the way back to the beginning of the period history, and | |
254 | // return a cursor to the first period in a fully attached history | |
b3b6e05e | 255 | Cursor RGWSI_MDLog::find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y) |
9f95a23c TL |
256 | { |
257 | auto cursor = period_history->get_current(); | |
258 | ||
259 | while (cursor) { | |
260 | // advance to the period's predecessor | |
261 | if (!cursor.has_prev()) { | |
262 | auto& predecessor = cursor.get_period().get_predecessor(); | |
263 | if (predecessor.empty()) { | |
264 | // this is the first period, so our logs must start here | |
b3b6e05e | 265 | ldpp_dout(dpp, 10) << "find_oldest_period returning first " |
9f95a23c TL |
266 | "period " << cursor.get_period().get_id() << dendl; |
267 | return cursor; | |
268 | } | |
269 | // pull the predecessor and add it to our history | |
270 | RGWPeriod period; | |
b3b6e05e | 271 | int r = period_puller->pull(dpp, predecessor, period, y); |
9f95a23c TL |
272 | if (r < 0) { |
273 | return cursor; | |
274 | } | |
275 | auto prev = period_history->insert(std::move(period)); | |
276 | if (!prev) { | |
277 | return prev; | |
278 | } | |
b3b6e05e | 279 | ldpp_dout(dpp, 20) << "find_oldest_period advancing to " |
9f95a23c TL |
280 | "predecessor period " << predecessor << dendl; |
281 | ceph_assert(cursor.has_prev()); | |
282 | } | |
283 | cursor.prev(); | |
284 | } | |
b3b6e05e | 285 | ldpp_dout(dpp, 10) << "find_oldest_period returning empty cursor" << dendl; |
9f95a23c TL |
286 | return cursor; |
287 | } | |
288 | ||
b3b6e05e | 289 | Cursor RGWSI_MDLog::init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) |
9f95a23c TL |
290 | { |
291 | // read the mdlog history | |
292 | RGWMetadataLogHistory state; | |
293 | RGWObjVersionTracker objv; | |
b3b6e05e | 294 | int ret = read_history(&state, &objv, y, dpp); |
9f95a23c TL |
295 | |
296 | if (ret == -ENOENT) { | |
297 | // initialize the mdlog history and write it | |
b3b6e05e TL |
298 | ldpp_dout(dpp, 10) << "initializing mdlog history" << dendl; |
299 | auto cursor = find_oldest_period(dpp, y); | |
9f95a23c TL |
300 | if (!cursor) { |
301 | return cursor; | |
302 | } | |
303 | // write the initial history | |
304 | state.oldest_realm_epoch = cursor.get_epoch(); | |
305 | state.oldest_period_id = cursor.get_period().get_id(); | |
306 | ||
307 | constexpr bool exclusive = true; // don't overwrite | |
b3b6e05e | 308 | int ret = write_history(dpp, state, &objv, y, exclusive); |
9f95a23c | 309 | if (ret < 0 && ret != -EEXIST) { |
b3b6e05e | 310 | ldpp_dout(dpp, 1) << "failed to write mdlog history: " |
9f95a23c TL |
311 | << cpp_strerror(ret) << dendl; |
312 | return Cursor{ret}; | |
313 | } | |
314 | return cursor; | |
315 | } else if (ret < 0) { | |
b3b6e05e | 316 | ldpp_dout(dpp, 1) << "failed to read mdlog history: " |
9f95a23c TL |
317 | << cpp_strerror(ret) << dendl; |
318 | return Cursor{ret}; | |
319 | } | |
320 | ||
321 | // if it's already in the history, return it | |
322 | auto cursor = period_history->lookup(state.oldest_realm_epoch); | |
323 | if (cursor) { | |
324 | return cursor; | |
325 | } else { | |
b3b6e05e | 326 | cursor = find_oldest_period(dpp, y); |
9f95a23c TL |
327 | state.oldest_realm_epoch = cursor.get_epoch(); |
328 | state.oldest_period_id = cursor.get_period().get_id(); | |
b3b6e05e TL |
329 | ldpp_dout(dpp, 10) << "rewriting mdlog history" << dendl; |
330 | ret = write_history(dpp, state, &objv, y); | |
9f95a23c | 331 | if (ret < 0 && ret != -ECANCELED) { |
b3b6e05e | 332 | ldpp_dout(dpp, 1) << "failed to write mdlog history: " |
9f95a23c TL |
333 | << cpp_strerror(ret) << dendl; |
334 | return Cursor{ret}; | |
335 | } | |
336 | return cursor; | |
337 | } | |
338 | ||
339 | // pull the oldest period by id | |
340 | RGWPeriod period; | |
b3b6e05e | 341 | ret = period_puller->pull(dpp, state.oldest_period_id, period, y); |
9f95a23c | 342 | if (ret < 0) { |
b3b6e05e | 343 | ldpp_dout(dpp, 1) << "failed to read period id=" << state.oldest_period_id |
9f95a23c TL |
344 | << " for mdlog history: " << cpp_strerror(ret) << dendl; |
345 | return Cursor{ret}; | |
346 | } | |
347 | // verify its realm_epoch | |
348 | if (period.get_realm_epoch() != state.oldest_realm_epoch) { | |
b3b6e05e | 349 | ldpp_dout(dpp, 1) << "inconsistent mdlog history: read period id=" |
9f95a23c TL |
350 | << period.get_id() << " with realm_epoch=" << period.get_realm_epoch() |
351 | << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl; | |
352 | return Cursor{-EINVAL}; | |
353 | } | |
354 | // attach the period to our history | |
b3b6e05e | 355 | return period_history->attach(dpp, std::move(period), y); |
9f95a23c TL |
356 | } |
357 | ||
b3b6e05e | 358 | Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const |
9f95a23c TL |
359 | { |
360 | RGWMetadataLogHistory state; | |
b3b6e05e | 361 | int ret = read_history(&state, nullptr, y, dpp); |
9f95a23c | 362 | if (ret < 0) { |
b3b6e05e | 363 | ldpp_dout(dpp, 1) << "failed to read mdlog history: " |
9f95a23c TL |
364 | << cpp_strerror(ret) << dendl; |
365 | return Cursor{ret}; | |
366 | } | |
367 | ||
b3b6e05e | 368 | ldpp_dout(dpp, 10) << "read mdlog history with oldest period id=" |
9f95a23c TL |
369 | << state.oldest_period_id << " realm_epoch=" |
370 | << state.oldest_realm_epoch << dendl; | |
371 | ||
372 | return period_history->lookup(state.oldest_realm_epoch); | |
373 | } | |
374 | ||
b3b6e05e TL |
375 | RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp, |
376 | Cursor *period, RGWObjVersionTracker *objv) const | |
9f95a23c | 377 | { |
b3b6e05e | 378 | return new mdlog::ReadHistoryCR(dpp, svc, period, objv); |
9f95a23c TL |
379 | } |
380 | ||
b3b6e05e TL |
381 | RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp, |
382 | Cursor period, RGWObjVersionTracker *objv) const | |
9f95a23c | 383 | { |
b3b6e05e | 384 | return new mdlog::TrimHistoryCR(dpp, svc, period, objv); |
9f95a23c TL |
385 | } |
386 | ||
387 | RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period) | |
388 | { | |
389 | // construct the period's log in place if it doesn't exist | |
390 | auto insert = md_logs.emplace(std::piecewise_construct, | |
391 | std::forward_as_tuple(period), | |
392 | std::forward_as_tuple(cct, svc.zone, svc.cls, period)); | |
393 | return &insert.first->second; | |
394 | } | |
395 | ||
b3b6e05e | 396 | int RGWSI_MDLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl) |
9f95a23c TL |
397 | { |
398 | ceph_assert(current_log); // must have called init() | |
b3b6e05e | 399 | return current_log->add_entry(dpp, hash_key, section, key, bl); |
9f95a23c TL |
400 | } |
401 | ||
402 | int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id) | |
403 | { | |
404 | ceph_assert(current_log); // must have called init() | |
405 | return current_log->get_shard_id(hash_key, shard_id); | |
406 | } | |
407 | ||
b3b6e05e | 408 | int RGWSI_MDLog::pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period, |
f67539c2 | 409 | optional_yield y) |
9f95a23c | 410 | { |
b3b6e05e | 411 | return period_puller->pull(dpp, period_id, period, y); |
9f95a23c TL |
412 | } |
413 |