]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/services/svc_mdlog.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / rgw / services / svc_mdlog.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include "svc_mdlog.h"
5 #include "svc_rados.h"
6 #include "svc_zone.h"
7 #include "svc_sys_obj.h"
8
9 #include "rgw/rgw_tools.h"
10 #include "rgw/rgw_mdlog.h"
11 #include "rgw/rgw_coroutine.h"
12 #include "rgw/rgw_cr_rados.h"
13 #include "rgw/rgw_zone.h"
14
15 #include "common/errno.h"
16
17 #include <boost/asio/yield.hpp>
18
19 #define dout_subsys ceph_subsys_rgw
20
21 using Svc = RGWSI_MDLog::Svc;
22 using Cursor = RGWPeriodHistory::Cursor;
23
24 RGWSI_MDLog::RGWSI_MDLog(CephContext *cct, bool _run_sync) : RGWServiceInstance(cct), run_sync(_run_sync) {
25 }
26
27 RGWSI_MDLog::~RGWSI_MDLog() {
28 }
29
30 int RGWSI_MDLog::init(RGWSI_RADOS *_rados_svc, RGWSI_Zone *_zone_svc, RGWSI_SysObj *_sysobj_svc, RGWSI_Cls *_cls_svc)
31 {
32 svc.zone = _zone_svc;
33 svc.sysobj = _sysobj_svc;
34 svc.mdlog = this;
35 svc.rados = _rados_svc;
36 svc.cls = _cls_svc;
37
38 return 0;
39 }
40
41 int RGWSI_MDLog::do_start()
42 {
43 auto& current_period = svc.zone->get_current_period();
44
45 current_log = get_log(current_period.get_id());
46
47 period_puller.reset(new RGWPeriodPuller(svc.zone, svc.sysobj));
48 period_history.reset(new RGWPeriodHistory(cct, period_puller.get(),
49 current_period));
50
51 if (run_sync &&
52 svc.zone->need_to_sync()) {
53 // initialize the log period history
54 svc.mdlog->init_oldest_log_period();
55 }
56 return 0;
57 }
58
59 int RGWSI_MDLog::read_history(RGWMetadataLogHistory *state,
60 RGWObjVersionTracker *objv_tracker) const
61 {
62 auto obj_ctx = svc.sysobj->init_obj_ctx();
63 auto& pool = svc.zone->get_zone_params().log_pool;
64 const auto& oid = RGWMetadataLogHistory::oid;
65 bufferlist bl;
66 int ret = rgw_get_system_obj(obj_ctx, pool, oid, bl, objv_tracker, nullptr, null_yield);
67 if (ret < 0) {
68 return ret;
69 }
70 if (bl.length() == 0) {
71 /* bad history object, remove it */
72 rgw_raw_obj obj(pool, oid);
73 auto sysobj = obj_ctx.get_obj(obj);
74 ret = sysobj.wop().remove(null_yield);
75 if (ret < 0) {
76 ldout(cct, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret) << ")" << dendl;
77 return ret;
78 }
79 return -ENOENT;
80 }
81 try {
82 auto p = bl.cbegin();
83 state->decode(p);
84 } catch (buffer::error& e) {
85 ldout(cct, 1) << "failed to decode the mdlog history: "
86 << e.what() << dendl;
87 return -EIO;
88 }
89 return 0;
90 }
91
92 int RGWSI_MDLog::write_history(const RGWMetadataLogHistory& state,
93 RGWObjVersionTracker *objv_tracker,
94 bool exclusive)
95 {
96 bufferlist bl;
97 state.encode(bl);
98
99 auto& pool = svc.zone->get_zone_params().log_pool;
100 const auto& oid = RGWMetadataLogHistory::oid;
101 auto obj_ctx = svc.sysobj->init_obj_ctx();
102 return rgw_put_system_obj(obj_ctx, pool, oid, bl,
103 exclusive, objv_tracker, real_time{});
104 }
105
106 namespace mdlog {
107
108 using Cursor = RGWPeriodHistory::Cursor;
109
110 /// read the mdlog history and use it to initialize the given cursor
111 class ReadHistoryCR : public RGWCoroutine {
112 Svc svc;
113 Cursor *cursor;
114 RGWObjVersionTracker *objv_tracker;
115 RGWMetadataLogHistory state;
116 RGWAsyncRadosProcessor *async_processor;
117
118 public:
119 ReadHistoryCR(const Svc& svc,
120 Cursor *cursor,
121 RGWObjVersionTracker *objv_tracker)
122 : RGWCoroutine(svc.zone->ctx()), svc(svc),
123 cursor(cursor),
124 objv_tracker(objv_tracker),
125 async_processor(svc.rados->get_async_processor())
126 {}
127
128 int operate() {
129 reenter(this) {
130 yield {
131 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
132 RGWMetadataLogHistory::oid};
133 constexpr bool empty_on_enoent = false;
134
135 using ReadCR = RGWSimpleRadosReadCR<RGWMetadataLogHistory>;
136 call(new ReadCR(async_processor, svc.sysobj, obj,
137 &state, empty_on_enoent, objv_tracker));
138 }
139 if (retcode < 0) {
140 ldout(cct, 1) << "failed to read mdlog history: "
141 << cpp_strerror(retcode) << dendl;
142 return set_cr_error(retcode);
143 }
144 *cursor = svc.mdlog->period_history->lookup(state.oldest_realm_epoch);
145 if (!*cursor) {
146 return set_cr_error(cursor->get_error());
147 }
148
149 ldout(cct, 10) << "read mdlog history with oldest period id="
150 << state.oldest_period_id << " realm_epoch="
151 << state.oldest_realm_epoch << dendl;
152 return set_cr_done();
153 }
154 return 0;
155 }
156 };
157
158 /// write the given cursor to the mdlog history
159 class WriteHistoryCR : public RGWCoroutine {
160 Svc svc;
161 Cursor cursor;
162 RGWObjVersionTracker *objv;
163 RGWMetadataLogHistory state;
164 RGWAsyncRadosProcessor *async_processor;
165
166 public:
167 WriteHistoryCR(Svc& svc,
168 const Cursor& cursor,
169 RGWObjVersionTracker *objv)
170 : RGWCoroutine(svc.zone->ctx()), svc(svc),
171 cursor(cursor), objv(objv),
172 async_processor(svc.rados->get_async_processor())
173 {}
174
175 int operate() {
176 reenter(this) {
177 state.oldest_period_id = cursor.get_period().get_id();
178 state.oldest_realm_epoch = cursor.get_epoch();
179
180 yield {
181 rgw_raw_obj obj{svc.zone->get_zone_params().log_pool,
182 RGWMetadataLogHistory::oid};
183
184 using WriteCR = RGWSimpleRadosWriteCR<RGWMetadataLogHistory>;
185 call(new WriteCR(async_processor, svc.sysobj, obj, state, objv));
186 }
187 if (retcode < 0) {
188 ldout(cct, 1) << "failed to write mdlog history: "
189 << cpp_strerror(retcode) << dendl;
190 return set_cr_error(retcode);
191 }
192
193 ldout(cct, 10) << "wrote mdlog history with oldest period id="
194 << state.oldest_period_id << " realm_epoch="
195 << state.oldest_realm_epoch << dendl;
196 return set_cr_done();
197 }
198 return 0;
199 }
200 };
201
202 /// update the mdlog history to reflect trimmed logs
203 class TrimHistoryCR : public RGWCoroutine {
204 Svc svc;
205 const Cursor cursor; //< cursor to trimmed period
206 RGWObjVersionTracker *objv; //< to prevent racing updates
207 Cursor next; //< target cursor for oldest log period
208 Cursor existing; //< existing cursor read from disk
209
210 public:
211 TrimHistoryCR(const Svc& svc, Cursor cursor, RGWObjVersionTracker *objv)
212 : RGWCoroutine(svc.zone->ctx()), svc(svc),
213 cursor(cursor), objv(objv), next(cursor) {
214 next.next(); // advance past cursor
215 }
216
217 int operate() {
218 reenter(this) {
219 // read an existing history, and write the new history if it's newer
220 yield call(new ReadHistoryCR(svc, &existing, objv));
221 if (retcode < 0) {
222 return set_cr_error(retcode);
223 }
224 // reject older trims with ECANCELED
225 if (cursor.get_epoch() < existing.get_epoch()) {
226 ldout(cct, 4) << "found oldest log epoch=" << existing.get_epoch()
227 << ", rejecting trim at epoch=" << cursor.get_epoch() << dendl;
228 return set_cr_error(-ECANCELED);
229 }
230 // overwrite with updated history
231 yield call(new WriteHistoryCR(svc, next, objv));
232 if (retcode < 0) {
233 return set_cr_error(retcode);
234 }
235 return set_cr_done();
236 }
237 return 0;
238 }
239 };
240
241 } // mdlog namespace
242
243 // traverse all the way back to the beginning of the period history, and
244 // return a cursor to the first period in a fully attached history
245 Cursor RGWSI_MDLog::find_oldest_period()
246 {
247 auto cursor = period_history->get_current();
248
249 while (cursor) {
250 // advance to the period's predecessor
251 if (!cursor.has_prev()) {
252 auto& predecessor = cursor.get_period().get_predecessor();
253 if (predecessor.empty()) {
254 // this is the first period, so our logs must start here
255 ldout(cct, 10) << "find_oldest_period returning first "
256 "period " << cursor.get_period().get_id() << dendl;
257 return cursor;
258 }
259 // pull the predecessor and add it to our history
260 RGWPeriod period;
261 int r = period_puller->pull(predecessor, period);
262 if (r < 0) {
263 return cursor;
264 }
265 auto prev = period_history->insert(std::move(period));
266 if (!prev) {
267 return prev;
268 }
269 ldout(cct, 20) << "find_oldest_period advancing to "
270 "predecessor period " << predecessor << dendl;
271 ceph_assert(cursor.has_prev());
272 }
273 cursor.prev();
274 }
275 ldout(cct, 10) << "find_oldest_period returning empty cursor" << dendl;
276 return cursor;
277 }
278
279 Cursor RGWSI_MDLog::init_oldest_log_period()
280 {
281 // read the mdlog history
282 RGWMetadataLogHistory state;
283 RGWObjVersionTracker objv;
284 int ret = read_history(&state, &objv);
285
286 if (ret == -ENOENT) {
287 // initialize the mdlog history and write it
288 ldout(cct, 10) << "initializing mdlog history" << dendl;
289 auto cursor = find_oldest_period();
290 if (!cursor) {
291 return cursor;
292 }
293 // write the initial history
294 state.oldest_realm_epoch = cursor.get_epoch();
295 state.oldest_period_id = cursor.get_period().get_id();
296
297 constexpr bool exclusive = true; // don't overwrite
298 int ret = write_history(state, &objv, exclusive);
299 if (ret < 0 && ret != -EEXIST) {
300 ldout(cct, 1) << "failed to write mdlog history: "
301 << cpp_strerror(ret) << dendl;
302 return Cursor{ret};
303 }
304 return cursor;
305 } else if (ret < 0) {
306 ldout(cct, 1) << "failed to read mdlog history: "
307 << cpp_strerror(ret) << dendl;
308 return Cursor{ret};
309 }
310
311 // if it's already in the history, return it
312 auto cursor = period_history->lookup(state.oldest_realm_epoch);
313 if (cursor) {
314 return cursor;
315 } else {
316 cursor = find_oldest_period();
317 state.oldest_realm_epoch = cursor.get_epoch();
318 state.oldest_period_id = cursor.get_period().get_id();
319 ldout(cct, 10) << "rewriting mdlog history" << dendl;
320 ret = write_history(state, &objv);
321 if (ret < 0 && ret != -ECANCELED) {
322 ldout(cct, 1) << "failed to write mdlog history: "
323 << cpp_strerror(ret) << dendl;
324 return Cursor{ret};
325 }
326 return cursor;
327 }
328
329 // pull the oldest period by id
330 RGWPeriod period;
331 ret = period_puller->pull(state.oldest_period_id, period);
332 if (ret < 0) {
333 ldout(cct, 1) << "failed to read period id=" << state.oldest_period_id
334 << " for mdlog history: " << cpp_strerror(ret) << dendl;
335 return Cursor{ret};
336 }
337 // verify its realm_epoch
338 if (period.get_realm_epoch() != state.oldest_realm_epoch) {
339 ldout(cct, 1) << "inconsistent mdlog history: read period id="
340 << period.get_id() << " with realm_epoch=" << period.get_realm_epoch()
341 << ", expected realm_epoch=" << state.oldest_realm_epoch << dendl;
342 return Cursor{-EINVAL};
343 }
344 // attach the period to our history
345 return period_history->attach(std::move(period));
346 }
347
348 Cursor RGWSI_MDLog::read_oldest_log_period() const
349 {
350 RGWMetadataLogHistory state;
351 int ret = read_history(&state, nullptr);
352 if (ret < 0) {
353 ldout(cct, 1) << "failed to read mdlog history: "
354 << cpp_strerror(ret) << dendl;
355 return Cursor{ret};
356 }
357
358 ldout(cct, 10) << "read mdlog history with oldest period id="
359 << state.oldest_period_id << " realm_epoch="
360 << state.oldest_realm_epoch << dendl;
361
362 return period_history->lookup(state.oldest_realm_epoch);
363 }
364
365 RGWCoroutine* RGWSI_MDLog::read_oldest_log_period_cr(Cursor *period,
366 RGWObjVersionTracker *objv) const
367 {
368 return new mdlog::ReadHistoryCR(svc, period, objv);
369 }
370
371 RGWCoroutine* RGWSI_MDLog::trim_log_period_cr(Cursor period,
372 RGWObjVersionTracker *objv) const
373 {
374 return new mdlog::TrimHistoryCR(svc, period, objv);
375 }
376
377 RGWMetadataLog* RGWSI_MDLog::get_log(const std::string& period)
378 {
379 // construct the period's log in place if it doesn't exist
380 auto insert = md_logs.emplace(std::piecewise_construct,
381 std::forward_as_tuple(period),
382 std::forward_as_tuple(cct, svc.zone, svc.cls, period));
383 return &insert.first->second;
384 }
385
386 int RGWSI_MDLog::add_entry(const string& hash_key, const string& section, const string& key, bufferlist& bl)
387 {
388 ceph_assert(current_log); // must have called init()
389 return current_log->add_entry(hash_key, section, key, bl);
390 }
391
392 int RGWSI_MDLog::get_shard_id(const string& hash_key, int *shard_id)
393 {
394 ceph_assert(current_log); // must have called init()
395 return current_log->get_shard_id(hash_key, shard_id);
396 }
397
398 int RGWSI_MDLog::pull_period(const std::string& period_id, RGWPeriod& period)
399 {
400 return period_puller->pull(period_id, period);
401 }
402