]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
9f95a23c | 2 | // vim: ts=8 sw=2 smarttab ft=cpp |
7c673cae FG |
3 | |
4 | #include "rgw_gc.h" | |
494da23a | 5 | |
494da23a | 6 | #include "rgw_tools.h" |
9f95a23c | 7 | #include "include/scope_guard.h" |
7c673cae FG |
8 | #include "include/rados/librados.hpp" |
9 | #include "cls/rgw/cls_rgw_client.h" | |
9f95a23c | 10 | #include "cls/rgw_gc/cls_rgw_gc_client.h" |
7c673cae | 11 | #include "cls/refcount/cls_refcount_client.h" |
9f95a23c | 12 | #include "cls/version/cls_version_client.h" |
494da23a | 13 | #include "rgw_perf_counters.h" |
7c673cae | 14 | #include "cls/lock/cls_lock_client.h" |
11fdf7f2 | 15 | #include "include/random.h" |
9f95a23c | 16 | #include "rgw_gc_log.h" |
7c673cae | 17 | |
494da23a | 18 | #include <list> // XXX |
11fdf7f2 | 19 | #include <sstream> |
7c673cae FG |
20 | |
21 | #define dout_context g_ceph_context | |
22 | #define dout_subsys ceph_subsys_rgw | |
23 | ||
7c673cae FG |
24 | using namespace librados; |
25 | ||
26 | static string gc_oid_prefix = "gc"; | |
27 | static string gc_index_lock_name = "gc_process"; | |
28 | ||
7c673cae FG |
29 | void RGWGC::initialize(CephContext *_cct, RGWRados *_store) { |
30 | cct = _cct; | |
31 | store = _store; | |
32 | ||
c07f9fc5 | 33 | max_objs = min(static_cast<int>(cct->_conf->rgw_gc_max_objs), rgw_shards_max()); |
7c673cae FG |
34 | |
35 | obj_names = new string[max_objs]; | |
36 | ||
37 | for (int i = 0; i < max_objs; i++) { | |
38 | obj_names[i] = gc_oid_prefix; | |
39 | char buf[32]; | |
40 | snprintf(buf, 32, ".%d", i); | |
41 | obj_names[i].append(buf); | |
9f95a23c TL |
42 | |
43 | auto it = transitioned_objects_cache.begin() + i; | |
44 | transitioned_objects_cache.insert(it, false); | |
45 | ||
46 | //version = 0 -> not ready for transition | |
47 | //version = 1 -> marked ready for transition | |
48 | librados::ObjectWriteOperation op; | |
49 | op.create(false); | |
50 | const uint64_t queue_size = cct->_conf->rgw_gc_max_queue_size, num_deferred_entries = cct->_conf->rgw_gc_max_deferred; | |
51 | gc_log_init2(op, queue_size, num_deferred_entries); | |
52 | store->gc_operate(obj_names[i], &op); | |
7c673cae FG |
53 | } |
54 | } | |
55 | ||
56 | void RGWGC::finalize() | |
57 | { | |
58 | delete[] obj_names; | |
59 | } | |
60 | ||
61 | int RGWGC::tag_index(const string& tag) | |
62 | { | |
1adf2230 | 63 | return rgw_shard_id(tag, max_objs); |
7c673cae FG |
64 | } |
65 | ||
9f95a23c | 66 | int RGWGC::send_chain(cls_rgw_obj_chain& chain, const string& tag) |
7c673cae | 67 | { |
9f95a23c | 68 | ObjectWriteOperation op; |
7c673cae FG |
69 | cls_rgw_gc_obj_info info; |
70 | info.chain = chain; | |
71 | info.tag = tag; | |
9f95a23c | 72 | gc_log_enqueue2(op, cct->_conf->rgw_gc_obj_min_wait, info); |
7c673cae | 73 | |
9f95a23c TL |
74 | int i = tag_index(tag); |
75 | ||
76 | ldpp_dout(this, 20) << "RGWGC::send_chain - on object name: " << obj_names[i] << "tag is: " << tag << dendl; | |
77 | ||
78 | auto ret = store->gc_operate(obj_names[i], &op); | |
79 | if (ret != -ECANCELED && ret != -EPERM) { | |
80 | return ret; | |
81 | } | |
82 | ObjectWriteOperation set_entry_op; | |
83 | cls_rgw_gc_set_entry(set_entry_op, cct->_conf->rgw_gc_obj_min_wait, info); | |
84 | return store->gc_operate(obj_names[i], &set_entry_op); | |
7c673cae FG |
85 | } |
86 | ||
9f95a23c TL |
87 | struct defer_chain_state { |
88 | librados::AioCompletion* completion = nullptr; | |
89 | // TODO: hold a reference on the state in RGWGC to avoid use-after-free if | |
90 | // RGWGC destructs before this completion fires | |
91 | RGWGC* gc = nullptr; | |
92 | cls_rgw_gc_obj_info info; | |
93 | ||
94 | ~defer_chain_state() { | |
95 | if (completion) { | |
96 | completion->release(); | |
97 | } | |
98 | } | |
99 | }; | |
100 | ||
101 | static void async_defer_callback(librados::completion_t, void* arg) | |
7c673cae | 102 | { |
9f95a23c TL |
103 | std::unique_ptr<defer_chain_state> state{static_cast<defer_chain_state*>(arg)}; |
104 | if (state->completion->get_return_value() == -ECANCELED) { | |
105 | state->gc->on_defer_canceled(state->info); | |
106 | } | |
107 | } | |
7c673cae | 108 | |
9f95a23c TL |
109 | void RGWGC::on_defer_canceled(const cls_rgw_gc_obj_info& info) |
110 | { | |
111 | const std::string& tag = info.tag; | |
112 | const int i = tag_index(tag); | |
113 | ||
114 | // ECANCELED from cls_version_check() tells us that we've transitioned | |
115 | transitioned_objects_cache[i] = true; | |
7c673cae | 116 | |
9f95a23c TL |
117 | ObjectWriteOperation op; |
118 | cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
119 | cls_rgw_gc_remove(op, {tag}); | |
7c673cae | 120 | |
9f95a23c TL |
121 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); |
122 | store->gc_aio_operate(obj_names[i], c, &op); | |
123 | c->release(); | |
7c673cae FG |
124 | } |
125 | ||
9f95a23c | 126 | int RGWGC::async_defer_chain(const string& tag, const cls_rgw_obj_chain& chain) |
7c673cae | 127 | { |
9f95a23c TL |
128 | const int i = tag_index(tag); |
129 | cls_rgw_gc_obj_info info; | |
130 | info.chain = chain; | |
131 | info.tag = tag; | |
7c673cae | 132 | |
9f95a23c TL |
133 | // if we've transitioned this shard object, we can rely on the cls_rgw_gc queue |
134 | if (transitioned_objects_cache[i]) { | |
135 | ObjectWriteOperation op; | |
136 | cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
137 | ||
138 | // this tag may still be present in omap, so remove it once the cls_rgw_gc | |
139 | // enqueue succeeds | |
140 | cls_rgw_gc_remove(op, {tag}); | |
141 | ||
142 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); | |
143 | int ret = store->gc_aio_operate(obj_names[i], c, &op); | |
144 | c->release(); | |
145 | return ret; | |
146 | } | |
7c673cae | 147 | |
9f95a23c TL |
148 | // if we haven't seen the transition yet, write the defer to omap with cls_rgw |
149 | ObjectWriteOperation op; | |
7c673cae | 150 | |
9f95a23c TL |
151 | // assert that we haven't initialized cls_rgw_gc queue. this prevents us |
152 | // from writing new entries to omap after the transition | |
153 | gc_log_defer1(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
154 | ||
155 | // prepare a callback to detect the transition via ECANCELED from cls_version_check() | |
156 | auto state = std::make_unique<defer_chain_state>(); | |
157 | state->gc = this; | |
158 | state->info.chain = chain; | |
159 | state->info.tag = tag; | |
160 | state->completion = librados::Rados::aio_create_completion( | |
161 | state.get(), async_defer_callback); | |
162 | ||
163 | int ret = store->gc_aio_operate(obj_names[i], state->completion, &op); | |
164 | if (ret == 0) { | |
165 | state.release(); // release ownership until async_defer_callback() | |
166 | } | |
167 | return ret; | |
7c673cae FG |
168 | } |
169 | ||
11fdf7f2 | 170 | int RGWGC::remove(int index, const std::vector<string>& tags, AioCompletion **pc) |
7c673cae FG |
171 | { |
172 | ObjectWriteOperation op; | |
173 | cls_rgw_gc_remove(op, tags); | |
9f95a23c TL |
174 | |
175 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); | |
176 | int ret = store->gc_aio_operate(obj_names[index], c, &op); | |
177 | if (ret < 0) { | |
178 | c->release(); | |
179 | } else { | |
180 | *pc = c; | |
181 | } | |
182 | return ret; | |
183 | } | |
184 | ||
185 | int RGWGC::remove(int index, int num_entries) | |
186 | { | |
187 | ObjectWriteOperation op; | |
188 | cls_rgw_gc_queue_remove_entries(op, num_entries); | |
189 | ||
190 | return store->gc_operate(obj_names[index], &op); | |
7c673cae FG |
191 | } |
192 | ||
9f95a23c | 193 | int RGWGC::list(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated, bool& processing_queue) |
7c673cae FG |
194 | { |
195 | result.clear(); | |
31f18b77 | 196 | string next_marker; |
9f95a23c | 197 | bool check_queue = false; |
7c673cae | 198 | |
9f95a23c TL |
199 | for (; *index < max_objs && result.size() < max; (*index)++, marker.clear(), check_queue = false) { |
200 | std::list<cls_rgw_gc_obj_info> entries, queue_entries; | |
201 | int ret = 0; | |
202 | ||
203 | //processing_queue is set to true from previous iteration if the queue was under process and probably has more elements in it. | |
204 | if (! transitioned_objects_cache[*index] && ! check_queue && ! processing_queue) { | |
205 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, max - result.size(), expired_only, entries, truncated, next_marker); | |
206 | if (ret != -ENOENT && ret < 0) { | |
207 | return ret; | |
208 | } | |
209 | obj_version objv; | |
210 | cls_version_read(store->gc_pool_ctx, obj_names[*index], &objv); | |
211 | if (ret == -ENOENT) { | |
212 | if (objv.ver == 0) { | |
213 | continue; | |
214 | } else { | |
215 | if (! expired_only) { | |
216 | transitioned_objects_cache[*index] = true; | |
217 | marker.clear(); | |
218 | } else { | |
219 | std::list<cls_rgw_gc_obj_info> non_expired_entries; | |
220 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, 1, false, non_expired_entries, truncated, next_marker); | |
221 | if (non_expired_entries.size() == 0) { | |
222 | transitioned_objects_cache[*index] = true; | |
223 | marker.clear(); | |
224 | } | |
225 | } | |
226 | } | |
227 | } | |
228 | if ((objv.ver == 1) && (entries.size() < max - result.size())) { | |
229 | check_queue = true; | |
230 | marker.clear(); | |
231 | } | |
232 | } | |
233 | if (transitioned_objects_cache[*index] || check_queue || processing_queue) { | |
234 | processing_queue = false; | |
235 | ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[*index], marker, (max - result.size()) - entries.size(), expired_only, queue_entries, truncated, next_marker); | |
236 | if (ret < 0) { | |
237 | return ret; | |
238 | } | |
239 | } | |
240 | if (entries.size() == 0 && queue_entries.size() == 0) | |
7c673cae | 241 | continue; |
7c673cae FG |
242 | |
243 | std::list<cls_rgw_gc_obj_info>::iterator iter; | |
244 | for (iter = entries.begin(); iter != entries.end(); ++iter) { | |
245 | result.push_back(*iter); | |
246 | } | |
247 | ||
9f95a23c TL |
248 | for (iter = queue_entries.begin(); iter != queue_entries.end(); ++iter) { |
249 | result.push_back(*iter); | |
250 | } | |
251 | ||
31f18b77 FG |
252 | marker = next_marker; |
253 | ||
7c673cae FG |
254 | if (*index == max_objs - 1) { |
255 | /* we cut short here, truncated will hold the correct value */ | |
256 | return 0; | |
257 | } | |
258 | ||
259 | if (result.size() == max) { | |
9f95a23c TL |
260 | if (queue_entries.size() > 0 && *truncated) { |
261 | processing_queue = true; | |
262 | } else { | |
263 | processing_queue = false; | |
264 | *index += 1; //move to next gc object | |
265 | } | |
266 | ||
7c673cae FG |
267 | /* close approximation, it might be that the next of the objects don't hold |
268 | * anything, in this case truncated should have been false, but we can find | |
269 | * that out on the next iteration | |
270 | */ | |
271 | *truncated = true; | |
272 | return 0; | |
273 | } | |
7c673cae FG |
274 | } |
275 | *truncated = false; | |
9f95a23c | 276 | processing_queue = false; |
7c673cae FG |
277 | |
278 | return 0; | |
279 | } | |
280 | ||
11fdf7f2 TL |
281 | class RGWGCIOManager { |
282 | const DoutPrefixProvider* dpp; | |
283 | CephContext *cct; | |
284 | RGWGC *gc; | |
285 | ||
286 | struct IO { | |
287 | enum Type { | |
288 | UnknownIO = 0, | |
289 | TailIO = 1, | |
290 | IndexIO = 2, | |
291 | } type{UnknownIO}; | |
292 | librados::AioCompletion *c{nullptr}; | |
293 | string oid; | |
294 | int index{-1}; | |
295 | string tag; | |
296 | }; | |
297 | ||
298 | deque<IO> ios; | |
299 | vector<std::vector<string> > remove_tags; | |
92f5a8d4 TL |
300 | /* tracks the number of remaining shadow objects for a given tag in order to |
301 | * only remove the tag once all shadow objects have themselves been removed | |
302 | */ | |
303 | vector<map<string, size_t> > tag_io_size; | |
11fdf7f2 TL |
304 | |
305 | #define MAX_AIO_DEFAULT 10 | |
306 | size_t max_aio{MAX_AIO_DEFAULT}; | |
307 | ||
308 | public: | |
309 | RGWGCIOManager(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), | |
310 | cct(_cct), | |
311 | gc(_gc), | |
92f5a8d4 TL |
312 | remove_tags(cct->_conf->rgw_gc_max_objs), |
313 | tag_io_size(cct->_conf->rgw_gc_max_objs) { | |
11fdf7f2 TL |
314 | max_aio = cct->_conf->rgw_gc_max_concurrent_io; |
315 | } | |
316 | ||
317 | ~RGWGCIOManager() { | |
318 | for (auto io : ios) { | |
319 | io.c->release(); | |
320 | } | |
321 | } | |
322 | ||
323 | int schedule_io(IoCtx *ioctx, const string& oid, ObjectWriteOperation *op, | |
324 | int index, const string& tag) { | |
325 | while (ios.size() > max_aio) { | |
326 | if (gc->going_down()) { | |
327 | return 0; | |
328 | } | |
9f95a23c TL |
329 | auto ret = handle_next_completion(); |
330 | //Return error if we are using queue, else ignore it | |
331 | if (gc->transitioned_objects_cache[index] && ret < 0) { | |
332 | return ret; | |
333 | } | |
11fdf7f2 TL |
334 | } |
335 | ||
9f95a23c | 336 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); |
11fdf7f2 TL |
337 | int ret = ioctx->aio_operate(oid, c, op); |
338 | if (ret < 0) { | |
339 | return ret; | |
340 | } | |
341 | ios.push_back(IO{IO::TailIO, c, oid, index, tag}); | |
342 | ||
343 | return 0; | |
344 | } | |
345 | ||
9f95a23c | 346 | int handle_next_completion() { |
11fdf7f2 TL |
347 | ceph_assert(!ios.empty()); |
348 | IO& io = ios.front(); | |
9f95a23c | 349 | io.c->wait_for_complete(); |
11fdf7f2 TL |
350 | int ret = io.c->get_return_value(); |
351 | io.c->release(); | |
352 | ||
353 | if (ret == -ENOENT) { | |
354 | ret = 0; | |
355 | } | |
356 | ||
9f95a23c | 357 | if (io.type == IO::IndexIO && ! gc->transitioned_objects_cache[io.index]) { |
11fdf7f2 TL |
358 | if (ret < 0) { |
359 | ldpp_dout(dpp, 0) << "WARNING: gc cleanup of tags on gc shard index=" << | |
360 | io.index << " returned error, ret=" << ret << dendl; | |
361 | } | |
362 | goto done; | |
363 | } | |
364 | ||
365 | if (ret < 0) { | |
366 | ldpp_dout(dpp, 0) << "WARNING: gc could not remove oid=" << io.oid << | |
367 | ", ret=" << ret << dendl; | |
368 | goto done; | |
369 | } | |
370 | ||
9f95a23c TL |
371 | if (! gc->transitioned_objects_cache[io.index]) { |
372 | schedule_tag_removal(io.index, io.tag); | |
373 | } | |
11fdf7f2 TL |
374 | |
375 | done: | |
376 | ios.pop_front(); | |
9f95a23c | 377 | return ret; |
11fdf7f2 TL |
378 | } |
379 | ||
92f5a8d4 TL |
380 | /* This is a request to schedule a tag removal. It will be called once when |
381 | * there are no shadow objects. But it will also be called for every shadow | |
382 | * object when there are any. Since we do not want the tag to be removed | |
383 | * until all shadow objects have been successfully removed, the scheduling | |
384 | * will not happen until the shadow object count goes down to zero | |
385 | */ | |
11fdf7f2 | 386 | void schedule_tag_removal(int index, string tag) { |
92f5a8d4 TL |
387 | auto& ts = tag_io_size[index]; |
388 | auto ts_it = ts.find(tag); | |
389 | if (ts_it != ts.end()) { | |
390 | auto& size = ts_it->second; | |
391 | --size; | |
392 | // wait all shadow obj delete return | |
393 | if (size != 0) | |
394 | return; | |
395 | ||
396 | ts.erase(ts_it); | |
397 | } | |
398 | ||
11fdf7f2 TL |
399 | auto& rt = remove_tags[index]; |
400 | ||
92f5a8d4 TL |
401 | rt.push_back(tag); |
402 | if (rt.size() >= (size_t)cct->_conf->rgw_gc_max_trim_chunk) { | |
403 | flush_remove_tags(index, rt); | |
11fdf7f2 TL |
404 | } |
405 | } | |
406 | ||
92f5a8d4 TL |
407 | void add_tag_io_size(int index, string tag, size_t size) { |
408 | auto& ts = tag_io_size[index]; | |
409 | ts.emplace(tag, size); | |
410 | } | |
411 | ||
9f95a23c TL |
412 | int drain_ios() { |
413 | int ret_val = 0; | |
11fdf7f2 TL |
414 | while (!ios.empty()) { |
415 | if (gc->going_down()) { | |
9f95a23c TL |
416 | return -EAGAIN; |
417 | } | |
418 | auto ret = handle_next_completion(); | |
419 | if (ret < 0) { | |
420 | ret_val = ret; | |
11fdf7f2 | 421 | } |
11fdf7f2 | 422 | } |
9f95a23c | 423 | return ret_val; |
11fdf7f2 TL |
424 | } |
425 | ||
426 | void drain() { | |
427 | drain_ios(); | |
428 | flush_remove_tags(); | |
429 | /* the tags draining might have generated more ios, drain those too */ | |
430 | drain_ios(); | |
431 | } | |
432 | ||
433 | void flush_remove_tags(int index, vector<string>& rt) { | |
434 | IO index_io; | |
435 | index_io.type = IO::IndexIO; | |
436 | index_io.index = index; | |
437 | ||
11fdf7f2 TL |
438 | ldpp_dout(dpp, 20) << __func__ << |
439 | " removing entries from gc log shard index=" << index << ", size=" << | |
81eedcae | 440 | rt.size() << ", entries=" << rt << dendl; |
11fdf7f2 | 441 | |
494da23a TL |
442 | auto rt_guard = make_scope_guard( |
443 | [&] | |
444 | { | |
445 | rt.clear(); | |
446 | } | |
447 | ); | |
448 | ||
11fdf7f2 | 449 | int ret = gc->remove(index, rt, &index_io.c); |
11fdf7f2 TL |
450 | if (ret < 0) { |
451 | /* we already cleared list of tags, this prevents us from | |
452 | * ballooning in case of a persistent problem | |
453 | */ | |
454 | ldpp_dout(dpp, 0) << "WARNING: failed to remove tags on gc shard index=" << | |
455 | index << " ret=" << ret << dendl; | |
456 | return; | |
457 | } | |
494da23a TL |
458 | if (perfcounter) { |
459 | /* log the count of tags retired for rate estimation */ | |
460 | perfcounter->inc(l_rgw_gc_retire, rt.size()); | |
461 | } | |
11fdf7f2 TL |
462 | ios.push_back(index_io); |
463 | } | |
464 | ||
465 | void flush_remove_tags() { | |
466 | int index = 0; | |
467 | for (auto& rt : remove_tags) { | |
9f95a23c TL |
468 | if (! gc->transitioned_objects_cache[index]) { |
469 | flush_remove_tags(index, rt); | |
470 | } | |
11fdf7f2 TL |
471 | ++index; |
472 | } | |
473 | } | |
9f95a23c TL |
474 | |
475 | int remove_queue_entries(int index, int num_entries) { | |
476 | int ret = gc->remove(index, num_entries); | |
477 | if (ret < 0) { | |
478 | ldpp_dout(dpp, 0) << "ERROR: failed to remove queue entries on index=" << | |
479 | index << " ret=" << ret << dendl; | |
480 | return ret; | |
481 | } | |
482 | return 0; | |
483 | } | |
11fdf7f2 TL |
484 | }; // class RGWGCIOManger |
485 | ||
486 | int RGWGC::process(int index, int max_secs, bool expired_only, | |
487 | RGWGCIOManager& io_manager) | |
7c673cae | 488 | { |
11fdf7f2 TL |
489 | ldpp_dout(this, 20) << "RGWGC::process entered with GC index_shard=" << |
490 | index << ", max_secs=" << max_secs << ", expired_only=" << | |
491 | expired_only << dendl; | |
492 | ||
7c673cae FG |
493 | rados::cls::lock::Lock l(gc_index_lock_name); |
494 | utime_t end = ceph_clock_now(); | |
7c673cae FG |
495 | |
496 | /* max_secs should be greater than zero. We don't want a zero max_secs | |
497 | * to be translated as no timeout, since we'd then need to break the | |
498 | * lock and that would require a manual intervention. In this case | |
499 | * we can just wait it out. */ | |
500 | if (max_secs <= 0) | |
501 | return -EAGAIN; | |
502 | ||
503 | end += max_secs; | |
504 | utime_t time(max_secs, 0); | |
505 | l.set_duration(time); | |
506 | ||
507 | int ret = l.lock_exclusive(&store->gc_pool_ctx, obj_names[index]); | |
508 | if (ret == -EBUSY) { /* already locked by another gc processor */ | |
11fdf7f2 TL |
509 | ldpp_dout(this, 10) << "RGWGC::process failed to acquire lock on " << |
510 | obj_names[index] << dendl; | |
7c673cae FG |
511 | return 0; |
512 | } | |
513 | if (ret < 0) | |
514 | return ret; | |
515 | ||
516 | string marker; | |
31f18b77 | 517 | string next_marker; |
7c673cae FG |
518 | bool truncated; |
519 | IoCtx *ctx = new IoCtx; | |
520 | do { | |
521 | int max = 100; | |
522 | std::list<cls_rgw_gc_obj_info> entries; | |
11fdf7f2 | 523 | |
9f95a23c TL |
524 | int ret = 0; |
525 | ||
526 | if (! transitioned_objects_cache[index]) { | |
527 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); | |
528 | ldpp_dout(this, 20) << | |
11fdf7f2 TL |
529 | "RGWGC::process cls_rgw_gc_list returned with returned:" << ret << |
530 | ", entries.size=" << entries.size() << ", truncated=" << truncated << | |
531 | ", next_marker='" << next_marker << "'" << dendl; | |
9f95a23c TL |
532 | obj_version objv; |
533 | cls_version_read(store->gc_pool_ctx, obj_names[index], &objv); | |
534 | if ((objv.ver == 1) && entries.size() == 0) { | |
535 | std::list<cls_rgw_gc_obj_info> non_expired_entries; | |
536 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, 1, false, non_expired_entries, &truncated, next_marker); | |
537 | if (non_expired_entries.size() == 0) { | |
538 | transitioned_objects_cache[index] = true; | |
539 | marker.clear(); | |
540 | ldpp_dout(this, 20) << "RGWGC::process cls_rgw_gc_list returned ENOENT for non expired entries, so setting cache entry to TRUE" << dendl; | |
541 | } else { | |
542 | ret = 0; | |
543 | goto done; | |
544 | } | |
545 | } | |
546 | if ((objv.ver == 0) && (ret == -ENOENT)) { | |
547 | ret = 0; | |
548 | goto done; | |
549 | } | |
550 | } | |
11fdf7f2 | 551 | |
9f95a23c TL |
552 | if (transitioned_objects_cache[index]) { |
553 | ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); | |
554 | ldpp_dout(this, 20) << | |
555 | "RGWGC::process cls_rgw_gc_queue_list_entries returned with return value:" << ret << | |
556 | ", entries.size=" << entries.size() << ", truncated=" << truncated << | |
557 | ", next_marker='" << next_marker << "'" << dendl; | |
558 | if (entries.size() == 0) { | |
559 | ret = 0; | |
560 | goto done; | |
561 | } | |
7c673cae | 562 | } |
9f95a23c | 563 | |
7c673cae FG |
564 | if (ret < 0) |
565 | goto done; | |
566 | ||
11fdf7f2 TL |
567 | marker = next_marker; |
568 | ||
7c673cae FG |
569 | string last_pool; |
570 | std::list<cls_rgw_gc_obj_info>::iterator iter; | |
571 | for (iter = entries.begin(); iter != entries.end(); ++iter) { | |
7c673cae | 572 | cls_rgw_gc_obj_info& info = *iter; |
11fdf7f2 TL |
573 | |
574 | ldpp_dout(this, 20) << "RGWGC::process iterating over entry tag='" << | |
575 | info.tag << "', time=" << info.time << ", chain.objs.size()=" << | |
576 | info.chain.objs.size() << dendl; | |
577 | ||
7c673cae FG |
578 | std::list<cls_rgw_obj>::iterator liter; |
579 | cls_rgw_obj_chain& chain = info.chain; | |
580 | ||
581 | utime_t now = ceph_clock_now(); | |
11fdf7f2 | 582 | if (now >= end) { |
7c673cae | 583 | goto done; |
11fdf7f2 | 584 | } |
9f95a23c TL |
585 | if (! transitioned_objects_cache[index]) { |
586 | if (chain.objs.empty()) { | |
587 | io_manager.schedule_tag_removal(index, info.tag); | |
588 | } else { | |
589 | io_manager.add_tag_io_size(index, info.tag, chain.objs.size()); | |
590 | } | |
591 | } | |
592 | if (! chain.objs.empty()) { | |
11fdf7f2 TL |
593 | for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) { |
594 | cls_rgw_obj& obj = *liter; | |
595 | ||
596 | if (obj.pool != last_pool) { | |
597 | delete ctx; | |
598 | ctx = new IoCtx; | |
599 | ret = rgw_init_ioctx(store->get_rados_handle(), obj.pool, *ctx); | |
600 | if (ret < 0) { | |
9f95a23c TL |
601 | if (transitioned_objects_cache[index]) { |
602 | goto done; | |
603 | } | |
11fdf7f2 TL |
604 | last_pool = ""; |
605 | ldpp_dout(this, 0) << "ERROR: failed to create ioctx pool=" << | |
606 | obj.pool << dendl; | |
607 | continue; | |
608 | } | |
609 | last_pool = obj.pool; | |
610 | } | |
611 | ||
612 | ctx->locator_set_key(obj.loc); | |
613 | ||
614 | const string& oid = obj.key.name; /* just stored raw oid there */ | |
7c673cae | 615 | |
11fdf7f2 TL |
616 | ldpp_dout(this, 5) << "RGWGC::process removing " << obj.pool << |
617 | ":" << obj.key.name << dendl; | |
618 | ObjectWriteOperation op; | |
619 | cls_refcount_put(op, info.tag, true); | |
7c673cae | 620 | |
11fdf7f2 | 621 | ret = io_manager.schedule_io(ctx, oid, &op, index, info.tag); |
7c673cae | 622 | if (ret < 0) { |
11fdf7f2 TL |
623 | ldpp_dout(this, 0) << |
624 | "WARNING: failed to schedule deletion for oid=" << oid << dendl; | |
9f95a23c TL |
625 | if (transitioned_objects_cache[index]) { |
626 | //If deleting oid failed for any of them, we will not delete queue entries | |
627 | goto done; | |
628 | } | |
7c673cae | 629 | } |
11fdf7f2 TL |
630 | if (going_down()) { |
631 | // leave early, even if tag isn't removed, it's ok since it | |
632 | // will be picked up next time around | |
633 | goto done; | |
634 | } | |
635 | } // chains loop | |
636 | } // else -- chains not empty | |
637 | } // entries loop | |
9f95a23c TL |
638 | if (transitioned_objects_cache[index] && entries.size() > 0) { |
639 | ret = io_manager.drain_ios(); | |
640 | if (ret < 0) { | |
641 | goto done; | |
642 | } | |
643 | //Remove the entries from the queue | |
644 | ldpp_dout(this, 5) << "RGWGC::process removing entries, marker: " << marker << dendl; | |
645 | ret = io_manager.remove_queue_entries(index, entries.size()); | |
646 | if (ret < 0) { | |
647 | ldpp_dout(this, 0) << | |
648 | "WARNING: failed to remove queue entries" << dendl; | |
649 | goto done; | |
650 | } | |
651 | } | |
7c673cae FG |
652 | } while (truncated); |
653 | ||
654 | done: | |
11fdf7f2 TL |
655 | /* we don't drain here, because if we're going down we don't want to |
656 | * hold the system if backend is unresponsive | |
657 | */ | |
7c673cae FG |
658 | l.unlock(&store->gc_pool_ctx, obj_names[index]); |
659 | delete ctx; | |
11fdf7f2 | 660 | |
7c673cae FG |
661 | return 0; |
662 | } | |
663 | ||
11fdf7f2 | 664 | int RGWGC::process(bool expired_only) |
7c673cae FG |
665 | { |
666 | int max_secs = cct->_conf->rgw_gc_processor_max_time; | |
667 | ||
11fdf7f2 TL |
668 | const int start = ceph::util::generate_random_number(0, max_objs - 1); |
669 | ||
670 | RGWGCIOManager io_manager(this, store->ctx(), this); | |
7c673cae FG |
671 | |
672 | for (int i = 0; i < max_objs; i++) { | |
673 | int index = (i + start) % max_objs; | |
11fdf7f2 | 674 | int ret = process(index, max_secs, expired_only, io_manager); |
7c673cae FG |
675 | if (ret < 0) |
676 | return ret; | |
677 | } | |
11fdf7f2 TL |
678 | if (!going_down()) { |
679 | io_manager.drain(); | |
680 | } | |
7c673cae FG |
681 | |
682 | return 0; | |
683 | } | |
684 | ||
685 | bool RGWGC::going_down() | |
686 | { | |
687 | return down_flag; | |
688 | } | |
689 | ||
690 | void RGWGC::start_processor() | |
691 | { | |
11fdf7f2 | 692 | worker = new GCWorker(this, cct, this); |
7c673cae FG |
693 | worker->create("rgw_gc"); |
694 | } | |
695 | ||
696 | void RGWGC::stop_processor() | |
697 | { | |
698 | down_flag = true; | |
699 | if (worker) { | |
700 | worker->stop(); | |
701 | worker->join(); | |
702 | } | |
703 | delete worker; | |
704 | worker = NULL; | |
705 | } | |
706 | ||
11fdf7f2 TL |
707 | unsigned RGWGC::get_subsys() const |
708 | { | |
709 | return dout_subsys; | |
710 | } | |
711 | ||
712 | std::ostream& RGWGC::gen_prefix(std::ostream& out) const | |
713 | { | |
714 | return out << "garbage collection: "; | |
715 | } | |
716 | ||
7c673cae FG |
717 | void *RGWGC::GCWorker::entry() { |
718 | do { | |
719 | utime_t start = ceph_clock_now(); | |
11fdf7f2 TL |
720 | ldpp_dout(dpp, 2) << "garbage collection: start" << dendl; |
721 | int r = gc->process(true); | |
7c673cae | 722 | if (r < 0) { |
11fdf7f2 | 723 | ldpp_dout(dpp, 0) << "ERROR: garbage collection process() returned error r=" << r << dendl; |
7c673cae | 724 | } |
11fdf7f2 | 725 | ldpp_dout(dpp, 2) << "garbage collection: stop" << dendl; |
7c673cae FG |
726 | |
727 | if (gc->going_down()) | |
728 | break; | |
729 | ||
730 | utime_t end = ceph_clock_now(); | |
731 | end -= start; | |
732 | int secs = cct->_conf->rgw_gc_processor_period; | |
733 | ||
734 | if (secs <= end.sec()) | |
735 | continue; // next round | |
736 | ||
737 | secs -= end.sec(); | |
738 | ||
9f95a23c TL |
739 | std::unique_lock locker{lock}; |
740 | cond.wait_for(locker, std::chrono::seconds(secs)); | |
7c673cae FG |
741 | } while (!gc->going_down()); |
742 | ||
743 | return NULL; | |
744 | } | |
745 | ||
746 | void RGWGC::GCWorker::stop() | |
747 | { | |
9f95a23c TL |
748 | std::lock_guard l{lock}; |
749 | cond.notify_all(); | |
7c673cae | 750 | } |