]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/services/svc_mdlog.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rgw / services / svc_mdlog.cc
CommitLineData
9f95a23c
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab ft=cpp
3
4#include "svc_mdlog.h"
5#include "svc_rados.h"
6#include "svc_zone.h"
7#include "svc_sys_obj.h"
8
9#include "rgw/rgw_tools.h"
10#include "rgw/rgw_mdlog.h"
11#include "rgw/rgw_coroutine.h"
12#include "rgw/rgw_cr_rados.h"
13#include "rgw/rgw_zone.h"
14
15#include "common/errno.h"
16
17#include <boost/asio/yield.hpp>
18
19#define dout_subsys ceph_subsys_rgw
20
20effc67
TL
21using namespace std;
22
9f95a23c
TL
23using Svc = RGWSI_MDLog::Svc;
24using Cursor = RGWPeriodHistory::Cursor;
25
26RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) {
27}
28
29RGWSI_MDLog::~RGWSI_MDLog() {
30}
31
32int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc)
33{
34 svc.zone = _zone_svc;
35 svc.sysobj = _sysobj_svc;
36 svc.mdlog = this;
37 svc.rados = _rados_svc;
38 svc.cls = _cls_svc;
39
40 return 0;
41}
42
b3b6e05e 43int RGWSI_MDLog::do_start(optional_yield y, const DoutPrefixProvider *dpp)
9f95a23c
TL
44{
45 auto& current_period = svc.zone->get_current_period();
46
47 current_log = get_log(current_period.get_id());
48
49 period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj));
50 period_history.reset(new RGWPeriodHistory(cct, period_puller.get(),
51 current_period));
52
53 if (run_sync &&
54 svc.zone->need_to_sync()) {
55 // initialize the log period history
b3b6e05e 56 svc.mdlog->init_oldest_log_period(y, dpp);
9f95a23c
TL
57 }
58 return 0;
59}
60
61int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state,
f67539c2 62 RGWObjVersionTracker *objv_tracker,
b3b6e05e
TL
63 optional_yield y,
64 const DoutPrefixProvider *dpp) const
9f95a23c
TL
65{
66 auto obj_ctx = svc.sysobj->init_obj_ctx();
67 auto& pool = svc.zone->get_zone_params().log_pool;
68 const auto& oid = RGWMetadataLogHistory::oid;
69 bufferlist bl;
b3b6e05e 70 int ret = rgw_get_system_obj(obj_ctx, pool, oid, bl, objv_tracker, nullptr, y, dpp);
9f95a23c
TL
71 if (ret < 0) {
72 return ret;
73 }
74 if (bl.length() == 0) {
75 /* bad history object, remove it */
76 rgw_raw_obj obj(pool, oid);
77 auto sysobj = obj_ctx.get_obj(obj);
b3b6e05e 78 ret = sysobj.wop().remove(dpp, y);
9f95a23c 79 if (ret < 0) {
b3b6e05e 80 ldpp_dout(dpp, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl;
9f95a23c
TL
81 return ret;
82 }
83 return -ENOENT;
84 }
85 try {
86 auto p = bl.cbegin();
87 state->decode(p);
88 } catch (buffer::error& e) {
b3b6e05e 89 ldpp_dout(dpp, 1) << "failed to decode the mdlog history: "
9f95a23c
TL
90 << e.what() << dendl;
91 return -EIO;
92 }
93 return 0;
94}
95
b3b6e05e
TL
96int RGWSI_MDLog::write_history(const DoutPrefixProvider *dpp,
97 const RGWMetadataLogHistory& state,
9f95a23c 98 RGWObjVersionTracker *objv_tracker,
f67539c2 99 optional_yield y, bool exclusive)
9f95a23c
TL
100{
101 bufferlist bl;
102 state.encode(bl);
103
104 auto& pool = svc.zone->get_zone_params().log_pool;
105 const auto& oid = RGWMetadataLogHistory::oid;
106 auto obj_ctx = svc.sysobj->init_obj_ctx();
b3b6e05e 107 return rgw_put_system_obj(dpp, obj_ctx, pool, oid, bl,
f67539c2 108 exclusive, objv_tracker, real_time{}, y);
9f95a23c
TL
109}
110
111namespace mdlog {
112
113using Cursor = RGWPeriodHistory::Cursor;
114
115/// read the mdlog history and use it to initialize the given cursor
116class ReadHistoryCR : public RGWCoroutine {
b3b6e05e 117 const DoutPrefixProvider *dpp;
9f95a23c
TL
118 Svc svc;
119 Cursor *cursor;
120 RGWObjVersionTracker *objv_tracker;
121 RGWMetadataLogHistory state;
122 RGWAsyncRadosProcessor *async_processor;
123
124 public:
b3b6e05e
TL
125 ReadHistoryCR(const DoutPrefixProvider *dpp,
126 const Svc& svc,
9f95a23c
TL
127 Cursor *cursor,
128 RGWObjVersionTracker *objv_tracker)
b3b6e05e 129 : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
9f95a23c
TL
130 cursor(cursor),
131 objv_tracker(objv_tracker),
132 async_processor(svc.rados->get_async_processor())
133 {}
134
b3b6e05e 135 int operate(const DoutPrefixProvider *dpp) {
9f95a23c
TL
136 reenter(this) {
137 yield {
138 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
139 RGWMetadataLogHistory::oid};
140 constexpr bool empty_on_enoent = false;
141
142 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
b3b6e05e 143 call(new ReadCR(dpp, async_processor, svc.sysobj, obj,
9f95a23c
TL
144 &state, empty_on_enoent, objv_tracker));
145 }
146 if (retcode < 0) {
b3b6e05e 147 ldpp_dout(dpp, 1) << "failed to read mdlog history: "
9f95a23c
TL
148 << cpp_strerror(retcode) << dendl;
149 return set_cr_error(retcode);
150 }
151 *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch);
152 if (!*cursor) {
153 return set_cr_error(cursor->get_error());
154 }
155
b3b6e05e 156 ldpp_dout(dpp, 10) << "read mdlog history with oldest period id="
9f95a23c
TL
157 << state.oldest_period_id << " realm_epoch="
158 << state.oldest_realm_epoch << dendl;
159 return set_cr_done();
160 }
161 return 0;
162 }
163};
164
165/// write the given cursor to the mdlog history
166class WriteHistoryCR : public RGWCoroutine {
b3b6e05e 167 const DoutPrefixProvider *dpp;
9f95a23c
TL
168 Svc svc;
169 Cursor cursor;
170 RGWObjVersionTracker *objv;
171 RGWMetadataLogHistory state;
172 RGWAsyncRadosProcessor *async_processor;
173
174 public:
b3b6e05e
TL
175 WriteHistoryCR(const DoutPrefixProvider *dpp,
176 Svc& svc,
9f95a23c
TL
177 const Cursor& cursor,
178 RGWObjVersionTracker *objv)
b3b6e05e 179 : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
9f95a23c
TL
180 cursor(cursor), objv(objv),
181 async_processor(svc.rados->get_async_processor())
182 {}
183
b3b6e05e 184 int operate(const DoutPrefixProvider *dpp) {
9f95a23c
TL
185 reenter(this) {
186 state.oldest_period_id = cursor.get_period().get_id();
187 state.oldest_realm_epoch = cursor.get_epoch();
188
189 yield {
190 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
191 RGWMetadataLogHistory::oid};
192
193 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
b3b6e05e 194 call(new WriteCR(dpp, async_processor, svc.sysobj, obj, state, objv));
9f95a23c
TL
195 }
196 if (retcode < 0) {
b3b6e05e 197 ldpp_dout(dpp, 1) << "failed to write mdlog history: "
9f95a23c
TL
198 << cpp_strerror(retcode) << dendl;
199 return set_cr_error(retcode);
200 }
201
b3b6e05e 202 ldpp_dout(dpp, 10) << "wrote mdlog history with oldest period id="
9f95a23c
TL
203 << state.oldest_period_id << " realm_epoch="
204 << state.oldest_realm_epoch << dendl;
205 return set_cr_done();
206 }
207 return 0;
208 }
209};
210
211/// update the mdlog history to reflect trimmed logs
212class TrimHistoryCR : public RGWCoroutine {
b3b6e05e 213 const DoutPrefixProvider *dpp;
9f95a23c
TL
214 Svc svc;
215 const Cursor cursor; //< cursor to trimmed period
216 RGWObjVersionTracker *objv; //< to prevent racing updates
217 Cursor next; //< target cursor for oldest log period
218 Cursor existing; //< existing cursor read from disk
219
220 public:
b3b6e05e
TL
221 TrimHistoryCR(const DoutPrefixProvider *dpp, const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv)
222 : RGWCoroutine(svc.zone->ctx()), dpp(dpp), svc(svc),
9f95a23c
TL
223 cursor(cursor), objv(objv), next(cursor) {
224 next.next(); // advance past cursor
225 }
226
b3b6e05e 227 int operate(const DoutPrefixProvider *dpp) {
9f95a23c
TL
228 reenter(this) {
229 // read an existing history, and write the new history if it's newer
b3b6e05e 230 yield call(new ReadHistoryCR(dpp, svc, &existing, objv));
9f95a23c
TL
231 if (retcode < 0) {
232 return set_cr_error(retcode);
233 }
234 // reject older trims with ECANCELED
235 if (cursor.get_epoch() < existing.get_epoch()) {
b3b6e05e 236 ldpp_dout(dpp, 4) << "found oldest log epoch=" << existing.get_epoch()
9f95a23c
TL
237 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
238 return set_cr_error(-ECANCELED);
239 }
240 // overwrite with updated history
b3b6e05e 241 yield call(new WriteHistoryCR(dpp, svc, next, objv));
9f95a23c
TL
242 if (retcode < 0) {
243 return set_cr_error(retcode);
244 }
245 return set_cr_done();
246 }
247 return 0;
248 }
249};
250
251} // mdlog namespace
252
253// traverse all the way back to the beginning of the period history, and
254// return a cursor to the first period in a fully attached history
b3b6e05e 255Cursor RGWSI_MDLog::find_oldest_period(const DoutPrefixProvider *dpp, optional_yield y)
9f95a23c
TL
256{
257 auto cursor = period_history->get_current();
258
259 while (cursor) {
260 // advance to the period's predecessor
261 if (!cursor.has_prev()) {
262 auto& predecessor = cursor.get_period().get_predecessor();
263 if (predecessor.empty()) {
264 // this is the first period, so our logs must start here
b3b6e05e 265 ldpp_dout(dpp, 10) << "find_oldest_period returning first "
9f95a23c
TL
266 "period " << cursor.get_period().get_id() << dendl;
267 return cursor;
268 }
269 // pull the predecessor and add it to our history
270 RGWPeriod period;
b3b6e05e 271 int r = period_puller->pull(dpp, predecessor, period, y);
9f95a23c
TL
272 if (r < 0) {
273 return cursor;
274 }
275 auto prev = period_history->insert(std::move(period));
276 if (!prev) {
277 return prev;
278 }
b3b6e05e 279 ldpp_dout(dpp, 20) << "find_oldest_period advancing to "
9f95a23c
TL
280 "predecessor period " << predecessor << dendl;
281 ceph_assert(cursor.has_prev());
282 }
283 cursor.prev();
284 }
b3b6e05e 285 ldpp_dout(dpp, 10) << "find_oldest_period returning empty cursor" << dendl;
9f95a23c
TL
286 return cursor;
287}
288
b3b6e05e 289Cursor RGWSI_MDLog::init_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp)
9f95a23c
TL
290{
291 // read the mdlog history
292 RGWMetadataLogHistory state;
293 RGWObjVersionTracker objv;
b3b6e05e 294 int ret = read_history(&state, &objv, y, dpp);
9f95a23c
TL
295
296 if (ret == -ENOENT) {
297 // initialize the mdlog history and write it
b3b6e05e
TL
298 ldpp_dout(dpp, 10) << "initializing mdlog history" << dendl;
299 auto cursor = find_oldest_period(dpp, y);
9f95a23c
TL
300 if (!cursor) {
301 return cursor;
302 }
303 // write the initial history
304 state.oldest_realm_epoch = cursor.get_epoch();
305 state.oldest_period_id = cursor.get_period().get_id();
306
307 constexpr bool exclusive = true; // don't overwrite
b3b6e05e 308 int ret = write_history(dpp, state, &objv, y, exclusive);
9f95a23c 309 if (ret < 0 && ret != -EEXIST) {
b3b6e05e 310 ldpp_dout(dpp, 1) << "failed to write mdlog history: "
9f95a23c
TL
311 << cpp_strerror(ret) << dendl;
312 return Cursor{ret};
313 }
314 return cursor;
315 } else if (ret < 0) {
b3b6e05e 316 ldpp_dout(dpp, 1) << "failed to read mdlog history: "
9f95a23c
TL
317 << cpp_strerror(ret) << dendl;
318 return Cursor{ret};
319 }
320
321 // if it's already in the history, return it
322 auto cursor = period_history->lookup(state.oldest_realm_epoch);
323 if (cursor) {
324 return cursor;
325 } else {
b3b6e05e 326 cursor = find_oldest_period(dpp, y);
9f95a23c
TL
327 state.oldest_realm_epoch = cursor.get_epoch();
328 state.oldest_period_id = cursor.get_period().get_id();
b3b6e05e
TL
329 ldpp_dout(dpp, 10) << "rewriting mdlog history" << dendl;
330 ret = write_history(dpp, state, &objv, y);
9f95a23c 331 if (ret < 0 && ret != -ECANCELED) {
b3b6e05e 332 ldpp_dout(dpp, 1) << "failed to write mdlog history: "
9f95a23c
TL
333 << cpp_strerror(ret) << dendl;
334 return Cursor{ret};
335 }
336 return cursor;
337 }
338
339 // pull the oldest period by id
340 RGWPeriod period;
b3b6e05e 341 ret = period_puller->pull(dpp, state.oldest_period_id, period, y);
9f95a23c 342 if (ret < 0) {
b3b6e05e 343 ldpp_dout(dpp, 1) << "failed to read period id=" << state.oldest_period_id
9f95a23c
TL
344 << " for mdlog history: " << cpp_strerror(ret) << dendl;
345 return Cursor{ret};
346 }
347 // verify its realm_epoch
348 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
b3b6e05e 349 ldpp_dout(dpp, 1) << "inconsistent mdlog history: read period id="
9f95a23c
TL
350 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
351 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
352 return Cursor{-EINVAL};
353 }
354 // attach the period to our history
b3b6e05e 355 return period_history->attach(dpp, std::move(period), y);
9f95a23c
TL
356}
357
b3b6e05e 358Cursor RGWSI_MDLog::read_oldest_log_period(optional_yield y, const DoutPrefixProvider *dpp) const
9f95a23c
TL
359{
360 RGWMetadataLogHistory state;
b3b6e05e 361 int ret = read_history(&state, nullptr, y, dpp);
9f95a23c 362 if (ret < 0) {
b3b6e05e 363 ldpp_dout(dpp, 1) << "failed to read mdlog history: "
9f95a23c
TL
364 << cpp_strerror(ret) << dendl;
365 return Cursor{ret};
366 }
367
b3b6e05e 368 ldpp_dout(dpp, 10) << "read mdlog history with oldest period id="
9f95a23c
TL
369 << state.oldest_period_id << " realm_epoch="
370 << state.oldest_realm_epoch << dendl;
371
372 return period_history->lookup(state.oldest_realm_epoch);
373}
374
b3b6e05e
TL
375RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(const DoutPrefixProvider *dpp,
376 Cursor *period, RGWObjVersionTracker *objv) const
9f95a23c 377{
b3b6e05e 378 return new mdlog::ReadHistoryCR(dpp, svc, period, objv);
9f95a23c
TL
379}
380
b3b6e05e
TL
381RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(const DoutPrefixProvider *dpp,
382 Cursor period, RGWObjVersionTracker *objv) const
9f95a23c 383{
b3b6e05e 384 return new mdlog::TrimHistoryCR(dpp, svc, period, objv);
9f95a23c
TL
385}
386
387RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period)
388{
389 // construct the period's log in place if it doesn't exist
390 auto insert = md_logs.emplace(std::piecewise_construct,
391 std::forward_as_tuple(period),
392 std::forward_as_tuple(cct, svc.zone, svc.cls, period));
393 return &insert.first->second;
394}
395
b3b6e05e 396int RGWSI_MDLog::add_entry(const DoutPrefixProvider *dpp, const string& hash_key, const string& section, const string& key, bufferlist& bl)
9f95a23c
TL
397{
398 ceph_assert(current_log); // must have called init()
b3b6e05e 399 return current_log->add_entry(dpp, hash_key, section, key, bl);
9f95a23c
TL
400}
401
402int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id)
403{
404 ceph_assert(current_log); // must have called init()
405 return current_log->get_shard_id(hash_key, shard_id);
406}
407
b3b6e05e 408int RGWSI_MDLog::pull_period(const DoutPrefixProvider *dpp, const std::string& period_id, RGWPeriod& period,
f67539c2 409 optional_yield y)
9f95a23c 410{
b3b6e05e 411 return period_puller->pull(dpp, period_id, period, y);
9f95a23c
TL
412}
413