]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/services/svc_mdlog.cc
cbc729ecfb796fd7db50b8fffe383075e4806190
[ceph.git] / ceph / src / rgw / services / svc_mdlog.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include "svc_mdlog.h"
5 #include "svc_rados.h"
6 #include "svc_zone.h"
7 #include "svc_sys_obj.h"
8
9 #include "rgw/rgw_tools.h"
10 #include "rgw/rgw_mdlog.h"
11 #include "rgw/rgw_coroutine.h"
12 #include "rgw/rgw_cr_rados.h"
13 #include "rgw/rgw_zone.h"
14
15 #include "common/errno.h"
16
17 #include <boost/asio/yield.hpp>
18
19 #define dout_subsys ceph_subsys_rgw
20
21 using Svc = RGWSI_MDLog::Svc;
22 using Cursor = RGWPeriodHistory::Cursor;
23
24 RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) {
25 }
26
27 RGWSI_MDLog::~RGWSI_MDLog() {
28 }
29
30 int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc)
31 {
32 svc.zone = _zone_svc;
33 svc.sysobj = _sysobj_svc;
34 svc.mdlog = this;
35 svc.rados = _rados_svc;
36 svc.cls = _cls_svc;
37
38 return 0;
39 }
40
41 int RGWSI_MDLog::do_start(optional_yield y)
42 {
43 auto& current_period = svc.zone->get_current_period();
44
45 current_log = get_log(current_period.get_id());
46
47 period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj));
48 period_history.reset(new RGWPeriodHistory(cct, period_puller.get(),
49 current_period));
50
51 if (run_sync &&
52 svc.zone->need_to_sync()) {
53 // initialize the log period history
54 svc.mdlog->init_oldest_log_period(y);
55 }
56 return 0;
57 }
58
59 int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state,
60 RGWObjVersionTracker *objv_tracker,
61 optional_yield y) const
62 {
63 auto obj_ctx = svc.sysobj->init_obj_ctx();
64 auto& pool = svc.zone->get_zone_params().log_pool;
65 const auto& oid = RGWMetadataLogHistory::oid;
66 bufferlist bl;
67 int ret = rgw_get_system_obj(obj_ctx, pool, oid, bl, objv_tracker, nullptr, y);
68 if (ret < 0) {
69 return ret;
70 }
71 if (bl.length() == 0) {
72 /* bad history object, remove it */
73 rgw_raw_obj obj(pool, oid);
74 auto sysobj = obj_ctx.get_obj(obj);
75 ret = sysobj.wop().remove(y);
76 if (ret < 0) {
77 ldout(cct, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl;
78 return ret;
79 }
80 return -ENOENT;
81 }
82 try {
83 auto p = bl.cbegin();
84 state->decode(p);
85 } catch (buffer::error& e) {
86 ldout(cct, 1) << "failed to decode the mdlog history: "
87 << e.what() << dendl;
88 return -EIO;
89 }
90 return 0;
91 }
92
93 int RGWSI_MDLog::write_history(const RGWMetadataLogHistory& state,
94 RGWObjVersionTracker *objv_tracker,
95 optional_yield y, bool exclusive)
96 {
97 bufferlist bl;
98 state.encode(bl);
99
100 auto& pool = svc.zone->get_zone_params().log_pool;
101 const auto& oid = RGWMetadataLogHistory::oid;
102 auto obj_ctx = svc.sysobj->init_obj_ctx();
103 return rgw_put_system_obj(obj_ctx, pool, oid, bl,
104 exclusive, objv_tracker, real_time{}, y);
105 }
106
107 namespace mdlog {
108
109 using Cursor = RGWPeriodHistory::Cursor;
110
111 /// read the mdlog history and use it to initialize the given cursor
112 class ReadHistoryCR : public RGWCoroutine {
113 Svc svc;
114 Cursor *cursor;
115 RGWObjVersionTracker *objv_tracker;
116 RGWMetadataLogHistory state;
117 RGWAsyncRadosProcessor *async_processor;
118
119 public:
120 ReadHistoryCR(const Svc& svc,
121 Cursor *cursor,
122 RGWObjVersionTracker *objv_tracker)
123 : RGWCoroutine(svc.zone->ctx()), svc(svc),
124 cursor(cursor),
125 objv_tracker(objv_tracker),
126 async_processor(svc.rados->get_async_processor())
127 {}
128
129 int operate() {
130 reenter(this) {
131 yield {
132 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
133 RGWMetadataLogHistory::oid};
134 constexpr bool empty_on_enoent = false;
135
136 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
137 call(new ReadCR(async_processor, svc.sysobj, obj,
138 &state, empty_on_enoent, objv_tracker));
139 }
140 if (retcode < 0) {
141 ldout(cct, 1) << "failed to read mdlog history: "
142 << cpp_strerror(retcode) << dendl;
143 return set_cr_error(retcode);
144 }
145 *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch);
146 if (!*cursor) {
147 return set_cr_error(cursor->get_error());
148 }
149
150 ldout(cct, 10) << "read mdlog history with oldest period id="
151 << state.oldest_period_id << " realm_epoch="
152 << state.oldest_realm_epoch << dendl;
153 return set_cr_done();
154 }
155 return 0;
156 }
157 };
158
159 /// write the given cursor to the mdlog history
160 class WriteHistoryCR : public RGWCoroutine {
161 Svc svc;
162 Cursor cursor;
163 RGWObjVersionTracker *objv;
164 RGWMetadataLogHistory state;
165 RGWAsyncRadosProcessor *async_processor;
166
167 public:
168 WriteHistoryCR(Svc& svc,
169 const Cursor& cursor,
170 RGWObjVersionTracker *objv)
171 : RGWCoroutine(svc.zone->ctx()), svc(svc),
172 cursor(cursor), objv(objv),
173 async_processor(svc.rados->get_async_processor())
174 {}
175
176 int operate() {
177 reenter(this) {
178 state.oldest_period_id = cursor.get_period().get_id();
179 state.oldest_realm_epoch = cursor.get_epoch();
180
181 yield {
182 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
183 RGWMetadataLogHistory::oid};
184
185 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
186 call(new WriteCR(async_processor, svc.sysobj, obj, state, objv));
187 }
188 if (retcode < 0) {
189 ldout(cct, 1) << "failed to write mdlog history: "
190 << cpp_strerror(retcode) << dendl;
191 return set_cr_error(retcode);
192 }
193
194 ldout(cct, 10) << "wrote mdlog history with oldest period id="
195 << state.oldest_period_id << " realm_epoch="
196 << state.oldest_realm_epoch << dendl;
197 return set_cr_done();
198 }
199 return 0;
200 }
201 };
202
203 /// update the mdlog history to reflect trimmed logs
204 class TrimHistoryCR : public RGWCoroutine {
205 Svc svc;
206 const Cursor cursor; //< cursor to trimmed period
207 RGWObjVersionTracker *objv; //< to prevent racing updates
208 Cursor next; //< target cursor for oldest log period
209 Cursor existing; //< existing cursor read from disk
210
211 public:
212 TrimHistoryCR(const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv)
213 : RGWCoroutine(svc.zone->ctx()), svc(svc),
214 cursor(cursor), objv(objv), next(cursor) {
215 next.next(); // advance past cursor
216 }
217
218 int operate() {
219 reenter(this) {
220 // read an existing history, and write the new history if it's newer
221 yield call(new ReadHistoryCR(svc, &existing, objv));
222 if (retcode < 0) {
223 return set_cr_error(retcode);
224 }
225 // reject older trims with ECANCELED
226 if (cursor.get_epoch() < existing.get_epoch()) {
227 ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
228 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
229 return set_cr_error(-ECANCELED);
230 }
231 // overwrite with updated history
232 yield call(new WriteHistoryCR(svc, next, objv));
233 if (retcode < 0) {
234 return set_cr_error(retcode);
235 }
236 return set_cr_done();
237 }
238 return 0;
239 }
240 };
241
242 } // mdlog namespace
243
244 // traverse all the way back to the beginning of the period history, and
245 // return a cursor to the first period in a fully attached history
246 Cursor RGWSI_MDLog::find_oldest_period(optional_yield y)
247 {
248 auto cursor = period_history->get_current();
249
250 while (cursor) {
251 // advance to the period's predecessor
252 if (!cursor.has_prev()) {
253 auto& predecessor = cursor.get_period().get_predecessor();
254 if (predecessor.empty()) {
255 // this is the first period, so our logs must start here
256 ldout(cct, 10) << "find_oldest_period returning first "
257 "period " << cursor.get_period().get_id() << dendl;
258 return cursor;
259 }
260 // pull the predecessor and add it to our history
261 RGWPeriod period;
262 int r = period_puller->pull(predecessor, period, y);
263 if (r < 0) {
264 return cursor;
265 }
266 auto prev = period_history->insert(std::move(period));
267 if (!prev) {
268 return prev;
269 }
270 ldout(cct, 20) << "find_oldest_period advancing to "
271 "predecessor period " << predecessor << dendl;
272 ceph_assert(cursor.has_prev());
273 }
274 cursor.prev();
275 }
276 ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
277 return cursor;
278 }
279
280 Cursor RGWSI_MDLog::init_oldest_log_period(optional_yield y)
281 {
282 // read the mdlog history
283 RGWMetadataLogHistory state;
284 RGWObjVersionTracker objv;
285 int ret = read_history(&state, &objv, y);
286
287 if (ret == -ENOENT) {
288 // initialize the mdlog history and write it
289 ldout(cct, 10) << "initializing mdlog history" << dendl;
290 auto cursor = find_oldest_period(y);
291 if (!cursor) {
292 return cursor;
293 }
294 // write the initial history
295 state.oldest_realm_epoch = cursor.get_epoch();
296 state.oldest_period_id = cursor.get_period().get_id();
297
298 constexpr bool exclusive = true; // don't overwrite
299 int ret = write_history(state, &objv, y, exclusive);
300 if (ret < 0 && ret != -EEXIST) {
301 ldout(cct, 1) << "failed to write mdlog history: "
302 << cpp_strerror(ret) << dendl;
303 return Cursor{ret};
304 }
305 return cursor;
306 } else if (ret < 0) {
307 ldout(cct, 1) << "failed to read mdlog history: "
308 << cpp_strerror(ret) << dendl;
309 return Cursor{ret};
310 }
311
312 // if it's already in the history, return it
313 auto cursor = period_history->lookup(state.oldest_realm_epoch);
314 if (cursor) {
315 return cursor;
316 } else {
317 cursor = find_oldest_period(y);
318 state.oldest_realm_epoch = cursor.get_epoch();
319 state.oldest_period_id = cursor.get_period().get_id();
320 ldout(cct, 10) << "rewriting mdlog history" << dendl;
321 ret = write_history(state, &objv, y);
322 if (ret < 0 && ret != -ECANCELED) {
323 ldout(cct, 1) << "failed to write mdlog history: "
324 << cpp_strerror(ret) << dendl;
325 return Cursor{ret};
326 }
327 return cursor;
328 }
329
330 // pull the oldest period by id
331 RGWPeriod period;
332 ret = period_puller->pull(state.oldest_period_id, period, y);
333 if (ret < 0) {
334 ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
335 << " for mdlog history: " << cpp_strerror(ret) << dendl;
336 return Cursor{ret};
337 }
338 // verify its realm_epoch
339 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
340 ldout(cct, 1) << "inconsistent mdlog history: read period id="
341 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
342 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
343 return Cursor{-EINVAL};
344 }
345 // attach the period to our history
346 return period_history->attach(std::move(period), y);
347 }
348
349 Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y) const
350 {
351 RGWMetadataLogHistory state;
352 int ret = read_history(&state, nullptr, y);
353 if (ret < 0) {
354 ldout(cct, 1) << "failed to read mdlog history: "
355 << cpp_strerror(ret) << dendl;
356 return Cursor{ret};
357 }
358
359 ldout(cct, 10) << "read mdlog history with oldest period id="
360 << state.oldest_period_id << " realm_epoch="
361 << state.oldest_realm_epoch << dendl;
362
363 return period_history->lookup(state.oldest_realm_epoch);
364 }
365
366 RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(Cursor *period,
367 RGWObjVersionTracker *objv) const
368 {
369 return new mdlog::ReadHistoryCR(svc, period, objv);
370 }
371
372 RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(Cursor period,
373 RGWObjVersionTracker *objv) const
374 {
375 return new mdlog::TrimHistoryCR(svc, period, objv);
376 }
377
378 RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period)
379 {
380 // construct the period's log in place if it doesn't exist
381 auto insert = md_logs.emplace(std::piecewise_construct,
382 std::forward_as_tuple(period),
383 std::forward_as_tuple(cct, svc.zone, svc.cls, period));
384 return &insert.first->second;
385 }
386
387 int RGWSI_MDLog::add_entry(const string& hash_key, const string& section, const string& key, bufferlist& bl)
388 {
389 ceph_assert(current_log); // must have called init()
390 return current_log->add_entry(hash_key, section, key, bl);
391 }
392
393 int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id)
394 {
395 ceph_assert(current_log); // must have called init()
396 return current_log->get_shard_id(hash_key, shard_id);
397 }
398
399 int RGWSI_MDLog::pull_period(const std::string& period_id, RGWPeriod& period,
400 optional_yield y)
401 {
402 return period_puller->pull(period_id, period, y);
403 }
404