]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
9f95a23c | 2 | // vim: ts=8 sw=2 smarttab ft=cpp |
7c673cae FG |
3 | |
4 | #include "rgw_gc.h" | |
494da23a | 5 | |
494da23a | 6 | #include "rgw_tools.h" |
9f95a23c | 7 | #include "include/scope_guard.h" |
7c673cae FG |
8 | #include "include/rados/librados.hpp" |
9 | #include "cls/rgw/cls_rgw_client.h" | |
9f95a23c | 10 | #include "cls/rgw_gc/cls_rgw_gc_client.h" |
7c673cae | 11 | #include "cls/refcount/cls_refcount_client.h" |
9f95a23c | 12 | #include "cls/version/cls_version_client.h" |
494da23a | 13 | #include "rgw_perf_counters.h" |
7c673cae | 14 | #include "cls/lock/cls_lock_client.h" |
11fdf7f2 | 15 | #include "include/random.h" |
9f95a23c | 16 | #include "rgw_gc_log.h" |
7c673cae | 17 | |
494da23a | 18 | #include <list> // XXX |
11fdf7f2 | 19 | #include <sstream> |
f67539c2 | 20 | #include "xxhash.h" |
7c673cae FG |
21 | |
22 | #define dout_context g_ceph_context | |
23 | #define dout_subsys ceph_subsys_rgw | |
24 | ||
7c673cae FG |
25 | using namespace librados; |
26 | ||
27 | static string gc_oid_prefix = "gc"; | |
28 | static string gc_index_lock_name = "gc_process"; | |
29 | ||
7c673cae FG |
30 | void RGWGC::initialize(CephContext *_cct, RGWRados *_store) { |
31 | cct = _cct; | |
32 | store = _store; | |
33 | ||
c07f9fc5 | 34 | max_objs = min(static_cast<int>(cct->_conf->rgw_gc_max_objs), rgw_shards_max()); |
7c673cae FG |
35 | |
36 | obj_names = new string[max_objs]; | |
37 | ||
38 | for (int i = 0; i < max_objs; i++) { | |
39 | obj_names[i] = gc_oid_prefix; | |
40 | char buf[32]; | |
41 | snprintf(buf, 32, ".%d", i); | |
42 | obj_names[i].append(buf); | |
9f95a23c TL |
43 | |
44 | auto it = transitioned_objects_cache.begin() + i; | |
45 | transitioned_objects_cache.insert(it, false); | |
46 | ||
47 | //version = 0 -> not ready for transition | |
48 | //version = 1 -> marked ready for transition | |
49 | librados::ObjectWriteOperation op; | |
50 | op.create(false); | |
51 | const uint64_t queue_size = cct->_conf->rgw_gc_max_queue_size, num_deferred_entries = cct->_conf->rgw_gc_max_deferred; | |
52 | gc_log_init2(op, queue_size, num_deferred_entries); | |
b3b6e05e | 53 | store->gc_operate(this, obj_names[i], &op); |
7c673cae FG |
54 | } |
55 | } | |
56 | ||
57 | void RGWGC::finalize() | |
58 | { | |
59 | delete[] obj_names; | |
60 | } | |
61 | ||
62 | int RGWGC::tag_index(const string& tag) | |
63 | { | |
f67539c2 | 64 | return rgw_shards_mod(XXH64(tag.c_str(), tag.size(), seed), max_objs); |
7c673cae FG |
65 | } |
66 | ||
9f95a23c | 67 | int RGWGC::send_chain(cls_rgw_obj_chain& chain, const string& tag) |
7c673cae | 68 | { |
9f95a23c | 69 | ObjectWriteOperation op; |
7c673cae FG |
70 | cls_rgw_gc_obj_info info; |
71 | info.chain = chain; | |
72 | info.tag = tag; | |
9f95a23c | 73 | gc_log_enqueue2(op, cct->_conf->rgw_gc_obj_min_wait, info); |
7c673cae | 74 | |
9f95a23c TL |
75 | int i = tag_index(tag); |
76 | ||
77 | ldpp_dout(this, 20) << "RGWGC::send_chain - on object name: " << obj_names[i] << "tag is: " << tag << dendl; | |
78 | ||
b3b6e05e | 79 | auto ret = store->gc_operate(this, obj_names[i], &op); |
9f95a23c TL |
80 | if (ret != -ECANCELED && ret != -EPERM) { |
81 | return ret; | |
82 | } | |
83 | ObjectWriteOperation set_entry_op; | |
84 | cls_rgw_gc_set_entry(set_entry_op, cct->_conf->rgw_gc_obj_min_wait, info); | |
b3b6e05e | 85 | return store->gc_operate(this, obj_names[i], &set_entry_op); |
7c673cae FG |
86 | } |
87 | ||
9f95a23c TL |
88 | struct defer_chain_state { |
89 | librados::AioCompletion* completion = nullptr; | |
90 | // TODO: hold a reference on the state in RGWGC to avoid use-after-free if | |
91 | // RGWGC destructs before this completion fires | |
92 | RGWGC* gc = nullptr; | |
93 | cls_rgw_gc_obj_info info; | |
94 | ||
95 | ~defer_chain_state() { | |
96 | if (completion) { | |
97 | completion->release(); | |
98 | } | |
99 | } | |
100 | }; | |
101 | ||
102 | static void async_defer_callback(librados::completion_t, void* arg) | |
7c673cae | 103 | { |
9f95a23c TL |
104 | std::unique_ptr<defer_chain_state> state{static_cast<defer_chain_state*>(arg)}; |
105 | if (state->completion->get_return_value() == -ECANCELED) { | |
106 | state->gc->on_defer_canceled(state->info); | |
107 | } | |
108 | } | |
7c673cae | 109 | |
9f95a23c TL |
110 | void RGWGC::on_defer_canceled(const cls_rgw_gc_obj_info& info) |
111 | { | |
112 | const std::string& tag = info.tag; | |
113 | const int i = tag_index(tag); | |
114 | ||
115 | // ECANCELED from cls_version_check() tells us that we've transitioned | |
116 | transitioned_objects_cache[i] = true; | |
7c673cae | 117 | |
9f95a23c TL |
118 | ObjectWriteOperation op; |
119 | cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
120 | cls_rgw_gc_remove(op, {tag}); | |
7c673cae | 121 | |
9f95a23c TL |
122 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); |
123 | store->gc_aio_operate(obj_names[i], c, &op); | |
124 | c->release(); | |
7c673cae FG |
125 | } |
126 | ||
9f95a23c | 127 | int RGWGC::async_defer_chain(const string& tag, const cls_rgw_obj_chain& chain) |
7c673cae | 128 | { |
9f95a23c TL |
129 | const int i = tag_index(tag); |
130 | cls_rgw_gc_obj_info info; | |
131 | info.chain = chain; | |
132 | info.tag = tag; | |
7c673cae | 133 | |
9f95a23c TL |
134 | // if we've transitioned this shard object, we can rely on the cls_rgw_gc queue |
135 | if (transitioned_objects_cache[i]) { | |
136 | ObjectWriteOperation op; | |
137 | cls_rgw_gc_queue_defer_entry(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
138 | ||
139 | // this tag may still be present in omap, so remove it once the cls_rgw_gc | |
140 | // enqueue succeeds | |
141 | cls_rgw_gc_remove(op, {tag}); | |
142 | ||
143 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); | |
144 | int ret = store->gc_aio_operate(obj_names[i], c, &op); | |
145 | c->release(); | |
146 | return ret; | |
147 | } | |
7c673cae | 148 | |
9f95a23c TL |
149 | // if we haven't seen the transition yet, write the defer to omap with cls_rgw |
150 | ObjectWriteOperation op; | |
7c673cae | 151 | |
9f95a23c TL |
152 | // assert that we haven't initialized cls_rgw_gc queue. this prevents us |
153 | // from writing new entries to omap after the transition | |
154 | gc_log_defer1(op, cct->_conf->rgw_gc_obj_min_wait, info); | |
155 | ||
156 | // prepare a callback to detect the transition via ECANCELED from cls_version_check() | |
157 | auto state = std::make_unique<defer_chain_state>(); | |
158 | state->gc = this; | |
159 | state->info.chain = chain; | |
160 | state->info.tag = tag; | |
161 | state->completion = librados::Rados::aio_create_completion( | |
162 | state.get(), async_defer_callback); | |
163 | ||
164 | int ret = store->gc_aio_operate(obj_names[i], state->completion, &op); | |
165 | if (ret == 0) { | |
166 | state.release(); // release ownership until async_defer_callback() | |
167 | } | |
168 | return ret; | |
7c673cae FG |
169 | } |
170 | ||
11fdf7f2 | 171 | int RGWGC::remove(int index, const std::vector<string>& tags, AioCompletion **pc) |
7c673cae FG |
172 | { |
173 | ObjectWriteOperation op; | |
174 | cls_rgw_gc_remove(op, tags); | |
9f95a23c TL |
175 | |
176 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); | |
177 | int ret = store->gc_aio_operate(obj_names[index], c, &op); | |
178 | if (ret < 0) { | |
179 | c->release(); | |
180 | } else { | |
181 | *pc = c; | |
182 | } | |
183 | return ret; | |
184 | } | |
185 | ||
186 | int RGWGC::remove(int index, int num_entries) | |
187 | { | |
188 | ObjectWriteOperation op; | |
189 | cls_rgw_gc_queue_remove_entries(op, num_entries); | |
190 | ||
b3b6e05e | 191 | return store->gc_operate(this, obj_names[index], &op); |
7c673cae FG |
192 | } |
193 | ||
9f95a23c | 194 | int RGWGC::list(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated, bool& processing_queue) |
7c673cae FG |
195 | { |
196 | result.clear(); | |
31f18b77 | 197 | string next_marker; |
9f95a23c | 198 | bool check_queue = false; |
7c673cae | 199 | |
9f95a23c TL |
200 | for (; *index < max_objs && result.size() < max; (*index)++, marker.clear(), check_queue = false) { |
201 | std::list<cls_rgw_gc_obj_info> entries, queue_entries; | |
202 | int ret = 0; | |
203 | ||
204 | //processing_queue is set to true from previous iteration if the queue was under process and probably has more elements in it. | |
205 | if (! transitioned_objects_cache[*index] && ! check_queue && ! processing_queue) { | |
206 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, max - result.size(), expired_only, entries, truncated, next_marker); | |
207 | if (ret != -ENOENT && ret < 0) { | |
208 | return ret; | |
209 | } | |
210 | obj_version objv; | |
211 | cls_version_read(store->gc_pool_ctx, obj_names[*index], &objv); | |
f91f0fd5 | 212 | if (ret == -ENOENT || entries.size() == 0) { |
9f95a23c TL |
213 | if (objv.ver == 0) { |
214 | continue; | |
215 | } else { | |
216 | if (! expired_only) { | |
217 | transitioned_objects_cache[*index] = true; | |
218 | marker.clear(); | |
219 | } else { | |
220 | std::list<cls_rgw_gc_obj_info> non_expired_entries; | |
221 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[*index], marker, 1, false, non_expired_entries, truncated, next_marker); | |
222 | if (non_expired_entries.size() == 0) { | |
223 | transitioned_objects_cache[*index] = true; | |
224 | marker.clear(); | |
225 | } | |
226 | } | |
227 | } | |
228 | } | |
229 | if ((objv.ver == 1) && (entries.size() < max - result.size())) { | |
230 | check_queue = true; | |
231 | marker.clear(); | |
232 | } | |
233 | } | |
234 | if (transitioned_objects_cache[*index] || check_queue || processing_queue) { | |
235 | processing_queue = false; | |
236 | ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[*index], marker, (max - result.size()) - entries.size(), expired_only, queue_entries, truncated, next_marker); | |
237 | if (ret < 0) { | |
238 | return ret; | |
239 | } | |
240 | } | |
241 | if (entries.size() == 0 && queue_entries.size() == 0) | |
7c673cae | 242 | continue; |
7c673cae FG |
243 | |
244 | std::list<cls_rgw_gc_obj_info>::iterator iter; | |
245 | for (iter = entries.begin(); iter != entries.end(); ++iter) { | |
246 | result.push_back(*iter); | |
247 | } | |
248 | ||
9f95a23c TL |
249 | for (iter = queue_entries.begin(); iter != queue_entries.end(); ++iter) { |
250 | result.push_back(*iter); | |
251 | } | |
252 | ||
31f18b77 FG |
253 | marker = next_marker; |
254 | ||
7c673cae | 255 | if (*index == max_objs - 1) { |
f91f0fd5 TL |
256 | if (queue_entries.size() > 0 && *truncated) { |
257 | processing_queue = true; | |
258 | } else { | |
259 | processing_queue = false; | |
260 | } | |
7c673cae FG |
261 | /* we cut short here, truncated will hold the correct value */ |
262 | return 0; | |
263 | } | |
264 | ||
265 | if (result.size() == max) { | |
9f95a23c TL |
266 | if (queue_entries.size() > 0 && *truncated) { |
267 | processing_queue = true; | |
268 | } else { | |
269 | processing_queue = false; | |
270 | *index += 1; //move to next gc object | |
271 | } | |
272 | ||
7c673cae FG |
273 | /* close approximation, it might be that the next of the objects don't hold |
274 | * anything, in this case truncated should have been false, but we can find | |
275 | * that out on the next iteration | |
276 | */ | |
277 | *truncated = true; | |
278 | return 0; | |
279 | } | |
7c673cae FG |
280 | } |
281 | *truncated = false; | |
9f95a23c | 282 | processing_queue = false; |
7c673cae FG |
283 | |
284 | return 0; | |
285 | } | |
286 | ||
11fdf7f2 TL |
287 | class RGWGCIOManager { |
288 | const DoutPrefixProvider* dpp; | |
289 | CephContext *cct; | |
290 | RGWGC *gc; | |
291 | ||
292 | struct IO { | |
293 | enum Type { | |
294 | UnknownIO = 0, | |
295 | TailIO = 1, | |
296 | IndexIO = 2, | |
297 | } type{UnknownIO}; | |
298 | librados::AioCompletion *c{nullptr}; | |
299 | string oid; | |
300 | int index{-1}; | |
301 | string tag; | |
302 | }; | |
303 | ||
304 | deque<IO> ios; | |
305 | vector<std::vector<string> > remove_tags; | |
92f5a8d4 TL |
306 | /* tracks the number of remaining shadow objects for a given tag in order to |
307 | * only remove the tag once all shadow objects have themselves been removed | |
308 | */ | |
309 | vector<map<string, size_t> > tag_io_size; | |
11fdf7f2 TL |
310 | |
311 | #define MAX_AIO_DEFAULT 10 | |
312 | size_t max_aio{MAX_AIO_DEFAULT}; | |
313 | ||
314 | public: | |
315 | RGWGCIOManager(const DoutPrefixProvider* _dpp, CephContext *_cct, RGWGC *_gc) : dpp(_dpp), | |
f67539c2 TL |
316 | cct(_cct), |
317 | gc(_gc) { | |
11fdf7f2 | 318 | max_aio = cct->_conf->rgw_gc_max_concurrent_io; |
f67539c2 TL |
319 | remove_tags.resize(min(static_cast<int>(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); |
320 | tag_io_size.resize(min(static_cast<int>(cct->_conf->rgw_gc_max_objs), rgw_shards_max())); | |
11fdf7f2 TL |
321 | } |
322 | ||
323 | ~RGWGCIOManager() { | |
324 | for (auto io : ios) { | |
325 | io.c->release(); | |
326 | } | |
327 | } | |
328 | ||
329 | int schedule_io(IoCtx *ioctx, const string& oid, ObjectWriteOperation *op, | |
330 | int index, const string& tag) { | |
331 | while (ios.size() > max_aio) { | |
332 | if (gc->going_down()) { | |
333 | return 0; | |
334 | } | |
9f95a23c TL |
335 | auto ret = handle_next_completion(); |
336 | //Return error if we are using queue, else ignore it | |
337 | if (gc->transitioned_objects_cache[index] && ret < 0) { | |
338 | return ret; | |
339 | } | |
11fdf7f2 TL |
340 | } |
341 | ||
9f95a23c | 342 | auto c = librados::Rados::aio_create_completion(nullptr, nullptr); |
11fdf7f2 TL |
343 | int ret = ioctx->aio_operate(oid, c, op); |
344 | if (ret < 0) { | |
345 | return ret; | |
346 | } | |
347 | ios.push_back(IO{IO::TailIO, c, oid, index, tag}); | |
348 | ||
349 | return 0; | |
350 | } | |
351 | ||
9f95a23c | 352 | int handle_next_completion() { |
11fdf7f2 TL |
353 | ceph_assert(!ios.empty()); |
354 | IO& io = ios.front(); | |
9f95a23c | 355 | io.c->wait_for_complete(); |
11fdf7f2 TL |
356 | int ret = io.c->get_return_value(); |
357 | io.c->release(); | |
358 | ||
359 | if (ret == -ENOENT) { | |
360 | ret = 0; | |
361 | } | |
362 | ||
9f95a23c | 363 | if (io.type == IO::IndexIO && ! gc->transitioned_objects_cache[io.index]) { |
11fdf7f2 TL |
364 | if (ret < 0) { |
365 | ldpp_dout(dpp, 0) << "WARNING: gc cleanup of tags on gc shard index=" << | |
366 | io.index << " returned error, ret=" << ret << dendl; | |
367 | } | |
368 | goto done; | |
369 | } | |
370 | ||
371 | if (ret < 0) { | |
372 | ldpp_dout(dpp, 0) << "WARNING: gc could not remove oid=" << io.oid << | |
373 | ", ret=" << ret << dendl; | |
374 | goto done; | |
375 | } | |
376 | ||
9f95a23c TL |
377 | if (! gc->transitioned_objects_cache[io.index]) { |
378 | schedule_tag_removal(io.index, io.tag); | |
379 | } | |
11fdf7f2 TL |
380 | |
381 | done: | |
382 | ios.pop_front(); | |
9f95a23c | 383 | return ret; |
11fdf7f2 TL |
384 | } |
385 | ||
92f5a8d4 TL |
386 | /* This is a request to schedule a tag removal. It will be called once when |
387 | * there are no shadow objects. But it will also be called for every shadow | |
388 | * object when there are any. Since we do not want the tag to be removed | |
389 | * until all shadow objects have been successfully removed, the scheduling | |
390 | * will not happen until the shadow object count goes down to zero | |
391 | */ | |
11fdf7f2 | 392 | void schedule_tag_removal(int index, string tag) { |
92f5a8d4 TL |
393 | auto& ts = tag_io_size[index]; |
394 | auto ts_it = ts.find(tag); | |
395 | if (ts_it != ts.end()) { | |
396 | auto& size = ts_it->second; | |
397 | --size; | |
398 | // wait all shadow obj delete return | |
399 | if (size != 0) | |
400 | return; | |
401 | ||
402 | ts.erase(ts_it); | |
403 | } | |
404 | ||
11fdf7f2 TL |
405 | auto& rt = remove_tags[index]; |
406 | ||
92f5a8d4 TL |
407 | rt.push_back(tag); |
408 | if (rt.size() >= (size_t)cct->_conf->rgw_gc_max_trim_chunk) { | |
409 | flush_remove_tags(index, rt); | |
11fdf7f2 TL |
410 | } |
411 | } | |
412 | ||
92f5a8d4 TL |
413 | void add_tag_io_size(int index, string tag, size_t size) { |
414 | auto& ts = tag_io_size[index]; | |
415 | ts.emplace(tag, size); | |
416 | } | |
417 | ||
9f95a23c TL |
418 | int drain_ios() { |
419 | int ret_val = 0; | |
11fdf7f2 TL |
420 | while (!ios.empty()) { |
421 | if (gc->going_down()) { | |
9f95a23c TL |
422 | return -EAGAIN; |
423 | } | |
424 | auto ret = handle_next_completion(); | |
425 | if (ret < 0) { | |
426 | ret_val = ret; | |
11fdf7f2 | 427 | } |
11fdf7f2 | 428 | } |
9f95a23c | 429 | return ret_val; |
11fdf7f2 TL |
430 | } |
431 | ||
432 | void drain() { | |
433 | drain_ios(); | |
434 | flush_remove_tags(); | |
435 | /* the tags draining might have generated more ios, drain those too */ | |
436 | drain_ios(); | |
437 | } | |
438 | ||
439 | void flush_remove_tags(int index, vector<string>& rt) { | |
440 | IO index_io; | |
441 | index_io.type = IO::IndexIO; | |
442 | index_io.index = index; | |
443 | ||
11fdf7f2 TL |
444 | ldpp_dout(dpp, 20) << __func__ << |
445 | " removing entries from gc log shard index=" << index << ", size=" << | |
81eedcae | 446 | rt.size() << ", entries=" << rt << dendl; |
11fdf7f2 | 447 | |
494da23a TL |
448 | auto rt_guard = make_scope_guard( |
449 | [&] | |
450 | { | |
451 | rt.clear(); | |
452 | } | |
453 | ); | |
454 | ||
11fdf7f2 | 455 | int ret = gc->remove(index, rt, &index_io.c); |
11fdf7f2 TL |
456 | if (ret < 0) { |
457 | /* we already cleared list of tags, this prevents us from | |
458 | * ballooning in case of a persistent problem | |
459 | */ | |
460 | ldpp_dout(dpp, 0) << "WARNING: failed to remove tags on gc shard index=" << | |
461 | index << " ret=" << ret << dendl; | |
462 | return; | |
463 | } | |
494da23a TL |
464 | if (perfcounter) { |
465 | /* log the count of tags retired for rate estimation */ | |
466 | perfcounter->inc(l_rgw_gc_retire, rt.size()); | |
467 | } | |
11fdf7f2 TL |
468 | ios.push_back(index_io); |
469 | } | |
470 | ||
471 | void flush_remove_tags() { | |
472 | int index = 0; | |
473 | for (auto& rt : remove_tags) { | |
9f95a23c TL |
474 | if (! gc->transitioned_objects_cache[index]) { |
475 | flush_remove_tags(index, rt); | |
476 | } | |
11fdf7f2 TL |
477 | ++index; |
478 | } | |
479 | } | |
9f95a23c TL |
480 | |
481 | int remove_queue_entries(int index, int num_entries) { | |
482 | int ret = gc->remove(index, num_entries); | |
483 | if (ret < 0) { | |
484 | ldpp_dout(dpp, 0) << "ERROR: failed to remove queue entries on index=" << | |
485 | index << " ret=" << ret << dendl; | |
486 | return ret; | |
487 | } | |
f91f0fd5 TL |
488 | if (perfcounter) { |
489 | /* log the count of tags retired for rate estimation */ | |
490 | perfcounter->inc(l_rgw_gc_retire, num_entries); | |
491 | } | |
9f95a23c TL |
492 | return 0; |
493 | } | |
11fdf7f2 TL |
494 | }; // class RGWGCIOManger |
495 | ||
496 | int RGWGC::process(int index, int max_secs, bool expired_only, | |
497 | RGWGCIOManager& io_manager) | |
7c673cae | 498 | { |
11fdf7f2 TL |
499 | ldpp_dout(this, 20) << "RGWGC::process entered with GC index_shard=" << |
500 | index << ", max_secs=" << max_secs << ", expired_only=" << | |
501 | expired_only << dendl; | |
502 | ||
7c673cae FG |
503 | rados::cls::lock::Lock l(gc_index_lock_name); |
504 | utime_t end = ceph_clock_now(); | |
7c673cae FG |
505 | |
506 | /* max_secs should be greater than zero. We don't want a zero max_secs | |
507 | * to be translated as no timeout, since we'd then need to break the | |
508 | * lock and that would require a manual intervention. In this case | |
509 | * we can just wait it out. */ | |
510 | if (max_secs <= 0) | |
511 | return -EAGAIN; | |
512 | ||
513 | end += max_secs; | |
514 | utime_t time(max_secs, 0); | |
515 | l.set_duration(time); | |
516 | ||
517 | int ret = l.lock_exclusive(&store->gc_pool_ctx, obj_names[index]); | |
518 | if (ret == -EBUSY) { /* already locked by another gc processor */ | |
11fdf7f2 TL |
519 | ldpp_dout(this, 10) << "RGWGC::process failed to acquire lock on " << |
520 | obj_names[index] << dendl; | |
7c673cae FG |
521 | return 0; |
522 | } | |
523 | if (ret < 0) | |
524 | return ret; | |
525 | ||
526 | string marker; | |
31f18b77 | 527 | string next_marker; |
7c673cae FG |
528 | bool truncated; |
529 | IoCtx *ctx = new IoCtx; | |
530 | do { | |
531 | int max = 100; | |
532 | std::list<cls_rgw_gc_obj_info> entries; | |
11fdf7f2 | 533 | |
9f95a23c TL |
534 | int ret = 0; |
535 | ||
536 | if (! transitioned_objects_cache[index]) { | |
537 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); | |
538 | ldpp_dout(this, 20) << | |
11fdf7f2 TL |
539 | "RGWGC::process cls_rgw_gc_list returned with returned:" << ret << |
540 | ", entries.size=" << entries.size() << ", truncated=" << truncated << | |
541 | ", next_marker='" << next_marker << "'" << dendl; | |
9f95a23c TL |
542 | obj_version objv; |
543 | cls_version_read(store->gc_pool_ctx, obj_names[index], &objv); | |
544 | if ((objv.ver == 1) && entries.size() == 0) { | |
545 | std::list<cls_rgw_gc_obj_info> non_expired_entries; | |
546 | ret = cls_rgw_gc_list(store->gc_pool_ctx, obj_names[index], marker, 1, false, non_expired_entries, &truncated, next_marker); | |
547 | if (non_expired_entries.size() == 0) { | |
548 | transitioned_objects_cache[index] = true; | |
549 | marker.clear(); | |
f91f0fd5 | 550 | ldpp_dout(this, 20) << "RGWGC::process cls_rgw_gc_list returned NO non expired entries, so setting cache entry to TRUE" << dendl; |
9f95a23c TL |
551 | } else { |
552 | ret = 0; | |
553 | goto done; | |
554 | } | |
555 | } | |
f91f0fd5 | 556 | if ((objv.ver == 0) && (ret == -ENOENT || entries.size() == 0)) { |
9f95a23c TL |
557 | ret = 0; |
558 | goto done; | |
559 | } | |
560 | } | |
11fdf7f2 | 561 | |
9f95a23c TL |
562 | if (transitioned_objects_cache[index]) { |
563 | ret = cls_rgw_gc_queue_list_entries(store->gc_pool_ctx, obj_names[index], marker, max, expired_only, entries, &truncated, next_marker); | |
564 | ldpp_dout(this, 20) << | |
565 | "RGWGC::process cls_rgw_gc_queue_list_entries returned with return value:" << ret << | |
566 | ", entries.size=" << entries.size() << ", truncated=" << truncated << | |
567 | ", next_marker='" << next_marker << "'" << dendl; | |
568 | if (entries.size() == 0) { | |
569 | ret = 0; | |
570 | goto done; | |
571 | } | |
7c673cae | 572 | } |
9f95a23c | 573 | |
7c673cae FG |
574 | if (ret < 0) |
575 | goto done; | |
576 | ||
11fdf7f2 TL |
577 | marker = next_marker; |
578 | ||
7c673cae FG |
579 | string last_pool; |
580 | std::list<cls_rgw_gc_obj_info>::iterator iter; | |
581 | for (iter = entries.begin(); iter != entries.end(); ++iter) { | |
7c673cae | 582 | cls_rgw_gc_obj_info& info = *iter; |
11fdf7f2 TL |
583 | |
584 | ldpp_dout(this, 20) << "RGWGC::process iterating over entry tag='" << | |
585 | info.tag << "', time=" << info.time << ", chain.objs.size()=" << | |
586 | info.chain.objs.size() << dendl; | |
587 | ||
7c673cae FG |
588 | std::list<cls_rgw_obj>::iterator liter; |
589 | cls_rgw_obj_chain& chain = info.chain; | |
590 | ||
591 | utime_t now = ceph_clock_now(); | |
11fdf7f2 | 592 | if (now >= end) { |
7c673cae | 593 | goto done; |
11fdf7f2 | 594 | } |
9f95a23c TL |
595 | if (! transitioned_objects_cache[index]) { |
596 | if (chain.objs.empty()) { | |
597 | io_manager.schedule_tag_removal(index, info.tag); | |
598 | } else { | |
599 | io_manager.add_tag_io_size(index, info.tag, chain.objs.size()); | |
600 | } | |
601 | } | |
602 | if (! chain.objs.empty()) { | |
11fdf7f2 TL |
603 | for (liter = chain.objs.begin(); liter != chain.objs.end(); ++liter) { |
604 | cls_rgw_obj& obj = *liter; | |
605 | ||
606 | if (obj.pool != last_pool) { | |
607 | delete ctx; | |
608 | ctx = new IoCtx; | |
b3b6e05e | 609 | ret = rgw_init_ioctx(this, store->get_rados_handle(), obj.pool, *ctx); |
11fdf7f2 | 610 | if (ret < 0) { |
9f95a23c TL |
611 | if (transitioned_objects_cache[index]) { |
612 | goto done; | |
613 | } | |
11fdf7f2 TL |
614 | last_pool = ""; |
615 | ldpp_dout(this, 0) << "ERROR: failed to create ioctx pool=" << | |
616 | obj.pool << dendl; | |
617 | continue; | |
618 | } | |
619 | last_pool = obj.pool; | |
620 | } | |
621 | ||
622 | ctx->locator_set_key(obj.loc); | |
623 | ||
624 | const string& oid = obj.key.name; /* just stored raw oid there */ | |
7c673cae | 625 | |
11fdf7f2 TL |
626 | ldpp_dout(this, 5) << "RGWGC::process removing " << obj.pool << |
627 | ":" << obj.key.name << dendl; | |
628 | ObjectWriteOperation op; | |
629 | cls_refcount_put(op, info.tag, true); | |
7c673cae | 630 | |
11fdf7f2 | 631 | ret = io_manager.schedule_io(ctx, oid, &op, index, info.tag); |
7c673cae | 632 | if (ret < 0) { |
11fdf7f2 TL |
633 | ldpp_dout(this, 0) << |
634 | "WARNING: failed to schedule deletion for oid=" << oid << dendl; | |
9f95a23c TL |
635 | if (transitioned_objects_cache[index]) { |
636 | //If deleting oid failed for any of them, we will not delete queue entries | |
637 | goto done; | |
638 | } | |
7c673cae | 639 | } |
11fdf7f2 TL |
640 | if (going_down()) { |
641 | // leave early, even if tag isn't removed, it's ok since it | |
642 | // will be picked up next time around | |
643 | goto done; | |
644 | } | |
645 | } // chains loop | |
646 | } // else -- chains not empty | |
647 | } // entries loop | |
9f95a23c TL |
648 | if (transitioned_objects_cache[index] && entries.size() > 0) { |
649 | ret = io_manager.drain_ios(); | |
650 | if (ret < 0) { | |
651 | goto done; | |
652 | } | |
653 | //Remove the entries from the queue | |
654 | ldpp_dout(this, 5) << "RGWGC::process removing entries, marker: " << marker << dendl; | |
655 | ret = io_manager.remove_queue_entries(index, entries.size()); | |
656 | if (ret < 0) { | |
657 | ldpp_dout(this, 0) << | |
658 | "WARNING: failed to remove queue entries" << dendl; | |
659 | goto done; | |
660 | } | |
661 | } | |
7c673cae FG |
662 | } while (truncated); |
663 | ||
664 | done: | |
11fdf7f2 TL |
665 | /* we don't drain here, because if we're going down we don't want to |
666 | * hold the system if backend is unresponsive | |
667 | */ | |
7c673cae FG |
668 | l.unlock(&store->gc_pool_ctx, obj_names[index]); |
669 | delete ctx; | |
11fdf7f2 | 670 | |
7c673cae FG |
671 | return 0; |
672 | } | |
673 | ||
11fdf7f2 | 674 | int RGWGC::process(bool expired_only) |
7c673cae FG |
675 | { |
676 | int max_secs = cct->_conf->rgw_gc_processor_max_time; | |
677 | ||
11fdf7f2 TL |
678 | const int start = ceph::util::generate_random_number(0, max_objs - 1); |
679 | ||
680 | RGWGCIOManager io_manager(this, store->ctx(), this); | |
7c673cae FG |
681 | |
682 | for (int i = 0; i < max_objs; i++) { | |
683 | int index = (i + start) % max_objs; | |
11fdf7f2 | 684 | int ret = process(index, max_secs, expired_only, io_manager); |
7c673cae FG |
685 | if (ret < 0) |
686 | return ret; | |
687 | } | |
11fdf7f2 TL |
688 | if (!going_down()) { |
689 | io_manager.drain(); | |
690 | } | |
7c673cae FG |
691 | |
692 | return 0; | |
693 | } | |
694 | ||
695 | bool RGWGC::going_down() | |
696 | { | |
697 | return down_flag; | |
698 | } | |
699 | ||
700 | void RGWGC::start_processor() | |
701 | { | |
11fdf7f2 | 702 | worker = new GCWorker(this, cct, this); |
7c673cae FG |
703 | worker->create("rgw_gc"); |
704 | } | |
705 | ||
706 | void RGWGC::stop_processor() | |
707 | { | |
708 | down_flag = true; | |
709 | if (worker) { | |
710 | worker->stop(); | |
711 | worker->join(); | |
712 | } | |
713 | delete worker; | |
714 | worker = NULL; | |
715 | } | |
716 | ||
11fdf7f2 TL |
717 | unsigned RGWGC::get_subsys() const |
718 | { | |
719 | return dout_subsys; | |
720 | } | |
721 | ||
722 | std::ostream& RGWGC::gen_prefix(std::ostream& out) const | |
723 | { | |
724 | return out << "garbage collection: "; | |
725 | } | |
726 | ||
7c673cae FG |
727 | void *RGWGC::GCWorker::entry() { |
728 | do { | |
729 | utime_t start = ceph_clock_now(); | |
11fdf7f2 TL |
730 | ldpp_dout(dpp, 2) << "garbage collection: start" << dendl; |
731 | int r = gc->process(true); | |
7c673cae | 732 | if (r < 0) { |
11fdf7f2 | 733 | ldpp_dout(dpp, 0) << "ERROR: garbage collection process() returned error r=" << r << dendl; |
7c673cae | 734 | } |
11fdf7f2 | 735 | ldpp_dout(dpp, 2) << "garbage collection: stop" << dendl; |
7c673cae FG |
736 | |
737 | if (gc->going_down()) | |
738 | break; | |
739 | ||
740 | utime_t end = ceph_clock_now(); | |
741 | end -= start; | |
742 | int secs = cct->_conf->rgw_gc_processor_period; | |
743 | ||
744 | if (secs <= end.sec()) | |
745 | continue; // next round | |
746 | ||
747 | secs -= end.sec(); | |
748 | ||
9f95a23c TL |
749 | std::unique_lock locker{lock}; |
750 | cond.wait_for(locker, std::chrono::seconds(secs)); | |
7c673cae FG |
751 | } while (!gc->going_down()); |
752 | ||
753 | return NULL; | |
754 | } | |
755 | ||
756 | void RGWGC::GCWorker::stop() | |
757 | { | |
9f95a23c TL |
758 | std::lock_guard l{lock}; |
759 | cond.notify_all(); | |
7c673cae | 760 | } |