1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
7 #include "svc_sys_obj.h"
9 #include "rgw/rgw_tools.h"
10 #include "rgw/rgw_mdlog.h"
11 #include "rgw/rgw_coroutine.h"
12 #include "rgw/rgw_cr_rados.h"
13 #include "rgw/rgw_zone.h"
15 #include "common/errno.h"
17 #include <boost/asio/yield.hpp>
19 #define dout_subsys ceph_subsys_rgw
21 using Svc
= RGWSI_MDLog::Svc
;
22 using Cursor
= RGWPeriodHistory::Cursor
;
24 RGWSI_MDLog::RGWSI_MDLog(CephContext
*cct
, bool _run_sync
) : RGWServiceInstance(cct
), run_sync(_run_sync
) {
27 RGWSI_MDLog::~RGWSI_MDLog() {
30 int RGWSI_MDLog::init(RGWSI_RADOS
*_rados_svc
, RGWSI_Zone
*_zone_svc
, RGWSI_SysObj
*_sysobj_svc
, RGWSI_Cls
*_cls_svc
)
33 svc
.sysobj
= _sysobj_svc
;
35 svc
.rados
= _rados_svc
;
41 int RGWSI_MDLog::do_start(optional_yield y
)
43 auto& current_period
= svc
.zone
->get_current_period();
45 current_log
= get_log(current_period
.get_id());
47 period_puller
.reset(new RGWPeriodPuller(svc
.zone
, svc
.sysobj
));
48 period_history
.reset(new RGWPeriodHistory(cct
, period_puller
.get(),
52 svc
.zone
->need_to_sync()) {
53 // initialize the log period history
54 svc
.mdlog
->init_oldest_log_period(y
);
59 int RGWSI_MDLog::read_history(RGWMetadataLogHistory
*state
,
60 RGWObjVersionTracker
*objv_tracker
,
61 optional_yield y
) const
63 auto obj_ctx
= svc
.sysobj
->init_obj_ctx();
64 auto& pool
= svc
.zone
->get_zone_params().log_pool
;
65 const auto& oid
= RGWMetadataLogHistory::oid
;
67 int ret
= rgw_get_system_obj(obj_ctx
, pool
, oid
, bl
, objv_tracker
, nullptr, y
);
71 if (bl
.length() == 0) {
72 /* bad history object, remove it */
73 rgw_raw_obj
obj(pool
, oid
);
74 auto sysobj
= obj_ctx
.get_obj(obj
);
75 ret
= sysobj
.wop().remove(y
);
77 ldout(cct
, 0) << "ERROR: meta history is empty, but cannot remove it (" << cpp_strerror(-ret
) << ")" << dendl
;
85 } catch (buffer::error
& e
) {
86 ldout(cct
, 1) << "failed to decode the mdlog history: "
93 int RGWSI_MDLog::write_history(const RGWMetadataLogHistory
& state
,
94 RGWObjVersionTracker
*objv_tracker
,
95 optional_yield y
, bool exclusive
)
100 auto& pool
= svc
.zone
->get_zone_params().log_pool
;
101 const auto& oid
= RGWMetadataLogHistory::oid
;
102 auto obj_ctx
= svc
.sysobj
->init_obj_ctx();
103 return rgw_put_system_obj(obj_ctx
, pool
, oid
, bl
,
104 exclusive
, objv_tracker
, real_time
{}, y
);
109 using Cursor
= RGWPeriodHistory::Cursor
;
111 /// read the mdlog history and use it to initialize the given cursor
112 class ReadHistoryCR
: public RGWCoroutine
{
115 RGWObjVersionTracker
*objv_tracker
;
116 RGWMetadataLogHistory state
;
117 RGWAsyncRadosProcessor
*async_processor
;
120 ReadHistoryCR(const Svc
& svc
,
122 RGWObjVersionTracker
*objv_tracker
)
123 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
125 objv_tracker(objv_tracker
),
126 async_processor(svc
.rados
->get_async_processor())
132 rgw_raw_obj obj
{svc
.zone
->get_zone_params().log_pool
,
133 RGWMetadataLogHistory::oid
};
134 constexpr bool empty_on_enoent
= false;
136 using ReadCR
= RGWSimpleRadosReadCR
<RGWMetadataLogHistory
>;
137 call(new ReadCR(async_processor
, svc
.sysobj
, obj
,
138 &state
, empty_on_enoent
, objv_tracker
));
141 ldout(cct
, 1) << "failed to read mdlog history: "
142 << cpp_strerror(retcode
) << dendl
;
143 return set_cr_error(retcode
);
145 *cursor
= svc
.mdlog
->period_history
->lookup(state
.oldest_realm_epoch
);
147 return set_cr_error(cursor
->get_error());
150 ldout(cct
, 10) << "read mdlog history with oldest period id="
151 << state
.oldest_period_id
<< " realm_epoch="
152 << state
.oldest_realm_epoch
<< dendl
;
153 return set_cr_done();
159 /// write the given cursor to the mdlog history
160 class WriteHistoryCR
: public RGWCoroutine
{
163 RGWObjVersionTracker
*objv
;
164 RGWMetadataLogHistory state
;
165 RGWAsyncRadosProcessor
*async_processor
;
168 WriteHistoryCR(Svc
& svc
,
169 const Cursor
& cursor
,
170 RGWObjVersionTracker
*objv
)
171 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
172 cursor(cursor
), objv(objv
),
173 async_processor(svc
.rados
->get_async_processor())
178 state
.oldest_period_id
= cursor
.get_period().get_id();
179 state
.oldest_realm_epoch
= cursor
.get_epoch();
182 rgw_raw_obj obj
{svc
.zone
->get_zone_params().log_pool
,
183 RGWMetadataLogHistory::oid
};
185 using WriteCR
= RGWSimpleRadosWriteCR
<RGWMetadataLogHistory
>;
186 call(new WriteCR(async_processor
, svc
.sysobj
, obj
, state
, objv
));
189 ldout(cct
, 1) << "failed to write mdlog history: "
190 << cpp_strerror(retcode
) << dendl
;
191 return set_cr_error(retcode
);
194 ldout(cct
, 10) << "wrote mdlog history with oldest period id="
195 << state
.oldest_period_id
<< " realm_epoch="
196 << state
.oldest_realm_epoch
<< dendl
;
197 return set_cr_done();
203 /// update the mdlog history to reflect trimmed logs
204 class TrimHistoryCR
: public RGWCoroutine
{
206 const Cursor cursor
; //< cursor to trimmed period
207 RGWObjVersionTracker
*objv
; //< to prevent racing updates
208 Cursor next
; //< target cursor for oldest log period
209 Cursor existing
; //< existing cursor read from disk
212 TrimHistoryCR(const Svc
& svc
, Cursor cursor
, RGWObjVersionTracker
*objv
)
213 : RGWCoroutine(svc
.zone
->ctx()), svc(svc
),
214 cursor(cursor
), objv(objv
), next(cursor
) {
215 next
.next(); // advance past cursor
220 // read an existing history, and write the new history if it's newer
221 yield
call(new ReadHistoryCR(svc
, &existing
, objv
));
223 return set_cr_error(retcode
);
225 // reject older trims with ECANCELED
226 if (cursor
.get_epoch() < existing
.get_epoch()) {
227 ldout(cct
, 4) << "found oldest log epoch=" << existing
.get_epoch()
228 << ", rejecting trim at epoch=" << cursor
.get_epoch() << dendl
;
229 return set_cr_error(-ECANCELED
);
231 // overwrite with updated history
232 yield
call(new WriteHistoryCR(svc
, next
, objv
));
234 return set_cr_error(retcode
);
236 return set_cr_done();
244 // traverse all the way back to the beginning of the period history, and
245 // return a cursor to the first period in a fully attached history
246 Cursor
RGWSI_MDLog::find_oldest_period(optional_yield y
)
248 auto cursor
= period_history
->get_current();
251 // advance to the period's predecessor
252 if (!cursor
.has_prev()) {
253 auto& predecessor
= cursor
.get_period().get_predecessor();
254 if (predecessor
.empty()) {
255 // this is the first period, so our logs must start here
256 ldout(cct
, 10) << "find_oldest_period returning first "
257 "period " << cursor
.get_period().get_id() << dendl
;
260 // pull the predecessor and add it to our history
262 int r
= period_puller
->pull(predecessor
, period
, y
);
266 auto prev
= period_history
->insert(std::move(period
));
270 ldout(cct
, 20) << "find_oldest_period advancing to "
271 "predecessor period " << predecessor
<< dendl
;
272 ceph_assert(cursor
.has_prev());
276 ldout(cct
, 10) << "find_oldest_period returning empty cursor" << dendl
;
280 Cursor
RGWSI_MDLog::init_oldest_log_period(optional_yield y
)
282 // read the mdlog history
283 RGWMetadataLogHistory state
;
284 RGWObjVersionTracker objv
;
285 int ret
= read_history(&state
, &objv
, y
);
287 if (ret
== -ENOENT
) {
288 // initialize the mdlog history and write it
289 ldout(cct
, 10) << "initializing mdlog history" << dendl
;
290 auto cursor
= find_oldest_period(y
);
294 // write the initial history
295 state
.oldest_realm_epoch
= cursor
.get_epoch();
296 state
.oldest_period_id
= cursor
.get_period().get_id();
298 constexpr bool exclusive
= true; // don't overwrite
299 int ret
= write_history(state
, &objv
, y
, exclusive
);
300 if (ret
< 0 && ret
!= -EEXIST
) {
301 ldout(cct
, 1) << "failed to write mdlog history: "
302 << cpp_strerror(ret
) << dendl
;
306 } else if (ret
< 0) {
307 ldout(cct
, 1) << "failed to read mdlog history: "
308 << cpp_strerror(ret
) << dendl
;
312 // if it's already in the history, return it
313 auto cursor
= period_history
->lookup(state
.oldest_realm_epoch
);
317 cursor
= find_oldest_period(y
);
318 state
.oldest_realm_epoch
= cursor
.get_epoch();
319 state
.oldest_period_id
= cursor
.get_period().get_id();
320 ldout(cct
, 10) << "rewriting mdlog history" << dendl
;
321 ret
= write_history(state
, &objv
, y
);
322 if (ret
< 0 && ret
!= -ECANCELED
) {
323 ldout(cct
, 1) << "failed to write mdlog history: "
324 << cpp_strerror(ret
) << dendl
;
330 // pull the oldest period by id
332 ret
= period_puller
->pull(state
.oldest_period_id
, period
, y
);
334 ldout(cct
, 1) << "failed to read period id=" << state
.oldest_period_id
335 << " for mdlog history: " << cpp_strerror(ret
) << dendl
;
338 // verify its realm_epoch
339 if (period
.get_realm_epoch() != state
.oldest_realm_epoch
) {
340 ldout(cct
, 1) << "inconsistent mdlog history: read period id="
341 << period
.get_id() << " with realm_epoch=" << period
.get_realm_epoch()
342 << ", expected realm_epoch=" << state
.oldest_realm_epoch
<< dendl
;
343 return Cursor
{-EINVAL
};
345 // attach the period to our history
346 return period_history
->attach(std::move(period
), y
);
349 Cursor
RGWSI_MDLog::read_oldest_log_period(optional_yield y
) const
351 RGWMetadataLogHistory state
;
352 int ret
= read_history(&state
, nullptr, y
);
354 ldout(cct
, 1) << "failed to read mdlog history: "
355 << cpp_strerror(ret
) << dendl
;
359 ldout(cct
, 10) << "read mdlog history with oldest period id="
360 << state
.oldest_period_id
<< " realm_epoch="
361 << state
.oldest_realm_epoch
<< dendl
;
363 return period_history
->lookup(state
.oldest_realm_epoch
);
366 RGWCoroutine
* RGWSI_MDLog::read_oldest_log_period_cr(Cursor
*period
,
367 RGWObjVersionTracker
*objv
) const
369 return new mdlog::ReadHistoryCR(svc
, period
, objv
);
372 RGWCoroutine
* RGWSI_MDLog::trim_log_period_cr(Cursor period
,
373 RGWObjVersionTracker
*objv
) const
375 return new mdlog::TrimHistoryCR(svc
, period
, objv
);
378 RGWMetadataLog
* RGWSI_MDLog::get_log(const std::string
& period
)
380 // construct the period's log in place if it doesn't exist
381 auto insert
= md_logs
.emplace(std::piecewise_construct
,
382 std::forward_as_tuple(period
),
383 std::forward_as_tuple(cct
, svc
.zone
, svc
.cls
, period
));
384 return &insert
.first
->second
;
387 int RGWSI_MDLog::add_entry(const string
& hash_key
, const string
& section
, const string
& key
, bufferlist
& bl
)
389 ceph_assert(current_log
); // must have called init()
390 return current_log
->add_entry(hash_key
, section
, key
, bl
);
393 int RGWSI_MDLog::get_shard_id(const string
& hash_key
, int *shard_id
)
395 ceph_assert(current_log
); // must have called init()
396 return current_log
->get_shard_id(hash_key
, shard_id
);
399 int RGWSI_MDLog::pull_period(const std::string
& period_id
, RGWPeriod
& period
,
402 return period_puller
->pull(period_id
, period
, y
);