1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
7 #include "svc_sys_obj.h"
9 #include "rgw/rgw_tools.h"
10 #include "rgw/rgw_mdlog.h"
11 #include "rgw/rgw_coroutine.h"
12 #include "rgw/rgw_cr_rados.h"
13 #include "rgw/rgw_zone.h"
15 #include "common/errno.h"
17 #include <boost/asio/yield.hpp>
19 #define dout_subsys ceph_subsys_rgw
21 using Svc
= RGWSI_MDLog::Svc
;
22 using Cursor
= RGWPeriodHistory::Cursor
;
24 RGWSI_MDLog::RGWSI_MDLog(CephContext
*cct
, bool _run_sync
) : RGWServiceInstance(cct
), run_sync(_run_sync
) {
27 RGWSI_MDLog::~RGWSI_MDLog() {
30 int RGWSI_MDLog::init(RGWSI_RADOS
*_rados_svc
, RGWSI_Zone
*_zone_svc
, RGWSI_SysObj
*_sysobj_svc
, RGWSI_Cls
*_cls_svc
)
33 svc
.sysobj
= _sysobj_svc
;
35 svc
.rados
= _rados_svc
;
41 int RGWSI_MDLog::do_start()
43 auto& current_period
= svc
.zone
->get_current_period();
45 current_log
= get_log(current_period
.get_id());
47 period_puller
.reset(new RGWPeriodPuller(svc
.zone
, svc
.sysobj
));
48 period_history
.reset(new RGWPeriodHistory(cct
, period_puller
.get(),
52 svc
.zone
->need_to_sync()) {
53 // initialize the log period history
54 svc
.mdlog
->init_oldest_log_period();
59 int RGWSI_MDLog::read_history(RGWMetadataLogHistory
*state
,
60 RGWObjVersionTracker
*objv_tracker
) const
62 auto obj_ctx
= svc
.sysobj
->init_obj_ctx();
63 auto& pool
= svc
.zone
->get_zone_params().log_pool
;
64 const auto& oid
= RGWMetadataLogHistory::oid
;
66 int ret
= rgw_get_system_obj(obj_ctx
, pool
, oid
, bl
, objv_tracker
, nullptr, null_yield
);
70 if (bl
.length() == 0) {
71 /* bad history object, remove it */
72 rgw_raw_obj
obj(pool
, oid
);
73 auto sysobj
= obj_ctx
.get_obj(obj
);
74 ret
= sysobj
.wop().remove(null_yield
);
76 ldout(cct
, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret
) << ")" << dendl
;
84 } catch (buffer::error
& e
) {
85 ldout(cct
, 1) << "failed to decode the mdlog history: "
92 int RGWSI_MDLog::write_history(const RGWMetadataLogHistory
& state
,
93 RGWObjVersionTracker
*objv_tracker
,
99 auto& pool
= svc
.zone
->get_zone_params().log_pool
;
100 const auto& oid
= RGWMetadataLogHistory::oid
;
101 auto obj_ctx
= svc
.sysobj
->init_obj_ctx();
102 return rgw_put_system_obj(obj_ctx
, pool
, oid
, bl
,
103 exclusive
, objv_tracker
, real_time
{});
108 using Cursor
= RGWPeriodHistory::Cursor
;
110 /// read the mdlog history and use it to initialize the given cursor
111 class ReadHistoryCR
: public RGWCoroutine
{
114 RGWObjVersionTracker
*objv_tracker
;
115 RGWMetadataLogHistory state
;
116 RGWAsyncRadosProcessor
*async_processor
;
119 ReadHistoryCR(const Svc
& svc
,
121 RGWObjVersionTracker
*objv_tracker
)
122 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
124 objv_tracker(objv_tracker
),
125 async_processor(svc
.rados
->get_async_processor())
131 rgw_raw_obj obj
{svc
.zone
->get_zone_params().log_pool
,
132 RGWMetadataLogHistory::oid
};
133 constexpr bool empty_on_enoent
= false;
135 using ReadCR
= RGWSimpleRadosReadCR
<RGWMetadataLogHistory
>;
136 call(new ReadCR(async_processor
, svc
.sysobj
, obj
,
137 &state
, empty_on_enoent
, objv_tracker
));
140 ldout(cct
, 1) << "failed to read mdlog history: "
141 << cpp_strerror(retcode
) << dendl
;
142 return set_cr_error(retcode
);
144 *cursor
= svc
.mdlog
->period_history
->lookup(state
.oldest_realm_epoch
);
146 return set_cr_error(cursor
->get_error());
149 ldout(cct
, 10) << "read mdlog history with oldest period id="
150 << state
.oldest_period_id
<< " realm_epoch="
151 << state
.oldest_realm_epoch
<< dendl
;
152 return set_cr_done();
158 /// write the given cursor to the mdlog history
159 class WriteHistoryCR
: public RGWCoroutine
{
162 RGWObjVersionTracker
*objv
;
163 RGWMetadataLogHistory state
;
164 RGWAsyncRadosProcessor
*async_processor
;
167 WriteHistoryCR(Svc
& svc
,
168 const Cursor
& cursor
,
169 RGWObjVersionTracker
*objv
)
170 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
171 cursor(cursor
), objv(objv
),
172 async_processor(svc
.rados
->get_async_processor())
177 state
.oldest_period_id
= cursor
.get_period().get_id();
178 state
.oldest_realm_epoch
= cursor
.get_epoch();
181 rgw_raw_obj obj
{svc
.zone
->get_zone_params().log_pool
,
182 RGWMetadataLogHistory::oid
};
184 using WriteCR
= RGWSimpleRadosWriteCR
<RGWMetadataLogHistory
>;
185 call(new WriteCR(async_processor
, svc
.sysobj
, obj
, state
, objv
));
188 ldout(cct
, 1) << "failed to write mdlog history: "
189 << cpp_strerror(retcode
) << dendl
;
190 return set_cr_error(retcode
);
193 ldout(cct
, 10) << "wrote mdlog history with oldest period id="
194 << state
.oldest_period_id
<< " realm_epoch="
195 << state
.oldest_realm_epoch
<< dendl
;
196 return set_cr_done();
202 /// update the mdlog history to reflect trimmed logs
203 class TrimHistoryCR
: public RGWCoroutine
{
205 const Cursor cursor
; //< cursor to trimmed period
206 RGWObjVersionTracker
*objv
; //< to prevent racing updates
207 Cursor next
; //< target cursor for oldest log period
208 Cursor existing
; //< existing cursor read from disk
211 TrimHistoryCR(const Svc
& svc
, Cursor cursor
, RGWObjVersionTracker
*objv
)
212 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
213 cursor(cursor
), objv(objv
), next(cursor
) {
214 next
.next(); // advance past cursor
219 // read an existing history, and write the new history if it's newer
220 yield
call(new ReadHistoryCR(svc
, &existing
, objv
));
222 return set_cr_error(retcode
);
224 // reject older trims with ECANCELED
225 if (cursor
.get_epoch() < existing
.get_epoch()) {
226 ldout(cct
, 4) << "found oldest log epoch=" << existing
.get_epoch()
227 << ", rejecting trim at epoch=" << cursor
.get_epoch() << dendl
;
228 return set_cr_error(-ECANCELED
);
230 // overwrite with updated history
231 yield
call(new WriteHistoryCR(svc
, next
, objv
));
233 return set_cr_error(retcode
);
235 return set_cr_done();
243 // traverse all the way back to the beginning of the period history, and
244 // return a cursor to the first period in a fully attached history
245 Cursor
RGWSI_MDLog::find_oldest_period()
247 auto cursor
= period_history
->get_current();
250 // advance to the period's predecessor
251 if (!cursor
.has_prev()) {
252 auto& predecessor
= cursor
.get_period().get_predecessor();
253 if (predecessor
.empty()) {
254 // this is the first period, so our logs must start here
255 ldout(cct
, 10) << "find_oldest_period returning first "
256 "period " << cursor
.get_period().get_id() << dendl
;
259 // pull the predecessor and add it to our history
261 int r
= period_puller
->pull(predecessor
, period
);
265 auto prev
= period_history
->insert(std::move(period
));
269 ldout(cct
, 20) << "find_oldest_period advancing to "
270 "predecessor period " << predecessor
<< dendl
;
271 ceph_assert(cursor
.has_prev());
275 ldout(cct
, 10) << "find_oldest_period returning empty cursor" << dendl
;
279 Cursor
RGWSI_MDLog::init_oldest_log_period()
281 // read the mdlog history
282 RGWMetadataLogHistory state
;
283 RGWObjVersionTracker objv
;
284 int ret
= read_history(&state
, &objv
);
286 if (ret
== -ENOENT
) {
287 // initialize the mdlog history and write it
288 ldout(cct
, 10) << "initializing mdlog history" << dendl
;
289 auto cursor
= find_oldest_period();
293 // write the initial history
294 state
.oldest_realm_epoch
= cursor
.get_epoch();
295 state
.oldest_period_id
= cursor
.get_period().get_id();
297 constexpr bool exclusive
= true; // don't overwrite
298 int ret
= write_history(state
, &objv
, exclusive
);
299 if (ret
< 0 && ret
!= -EEXIST
) {
300 ldout(cct
, 1) << "failed to write mdlog history: "
301 << cpp_strerror(ret
) << dendl
;
305 } else if (ret
< 0) {
306 ldout(cct
, 1) << "failed to read mdlog history: "
307 << cpp_strerror(ret
) << dendl
;
311 // if it's already in the history, return it
312 auto cursor
= period_history
->lookup(state
.oldest_realm_epoch
);
316 cursor
= find_oldest_period();
317 state
.oldest_realm_epoch
= cursor
.get_epoch();
318 state
.oldest_period_id
= cursor
.get_period().get_id();
319 ldout(cct
, 10) << "rewriting mdlog history" << dendl
;
320 ret
= write_history(state
, &objv
);
321 if (ret
< 0 && ret
!= -ECANCELED
) {
322 ldout(cct
, 1) << "failed to write mdlog history: "
323 << cpp_strerror(ret
) << dendl
;
329 // pull the oldest period by id
331 ret
= period_puller
->pull(state
.oldest_period_id
, period
);
333 ldout(cct
, 1) << "failed to read period id=" << state
.oldest_period_id
334 << " for mdlog history: " << cpp_strerror(ret
) << dendl
;
337 // verify its realm_epoch
338 if (period
.get_realm_epoch() != state
.oldest_realm_epoch
) {
339 ldout(cct
, 1) << "inconsistent mdlog history: read period id="
340 << period
.get_id() << " with realm_epoch=" << period
.get_realm_epoch()
341 << ", expected realm_epoch=" << state
.oldest_realm_epoch
<< dendl
;
342 return Cursor
{-EINVAL
};
344 // attach the period to our history
345 return period_history
->attach(std::move(period
));
348 Cursor
RGWSI_MDLog::read_oldest_log_period() const
350 RGWMetadataLogHistory state
;
351 int ret
= read_history(&state
, nullptr);
353 ldout(cct
, 1) << "failed to read mdlog history: "
354 << cpp_strerror(ret
) << dendl
;
358 ldout(cct
, 10) << "read mdlog history with oldest period id="
359 << state
.oldest_period_id
<< " realm_epoch="
360 << state
.oldest_realm_epoch
<< dendl
;
362 return period_history
->lookup(state
.oldest_realm_epoch
);
365 RGWCoroutine
* RGWSI_MDLog::read_oldest_log_period_cr(Cursor
*period
,
366 RGWObjVersionTracker
*objv
) const
368 return new mdlog::ReadHistoryCR(svc
, period
, objv
);
371 RGWCoroutine
* RGWSI_MDLog::trim_log_period_cr(Cursor period
,
372 RGWObjVersionTracker
*objv
) const
374 return new mdlog::TrimHistoryCR(svc
, period
, objv
);
377 RGWMetadataLog
* RGWSI_MDLog::get_log(const std::string
& period
)
379 // construct the period's log in place if it doesn't exist
380 auto insert
= md_logs
.emplace(std::piecewise_construct
,
381 std::forward_as_tuple(period
),
382 std::forward_as_tuple(cct
, svc
.zone
, svc
.cls
, period
));
383 return &insert
.first
->second
;
386 int RGWSI_MDLog::add_entry(const string
& hash_key
, const string
& section
, const string
& key
, bufferlist
& bl
)
388 ceph_assert(current_log
); // must have called init()
389 return current_log
->add_entry(hash_key
, section
, key
, bl
);
392 int RGWSI_MDLog::get_shard_id(const string
& hash_key
, int *shard_id
)
394 ceph_assert(current_log
); // must have called init()
395 return current_log
->get_shard_id(hash_key
, shard_id
);
398 int RGWSI_MDLog::pull_period(const std::string
& period_id
, RGWPeriod
& period
)
400 return period_puller
->pull(period_id
, period
);