]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rbd_mirror/ImageDeleter.cc
update sources to v12.1.0
[ceph.git] / ceph / src / tools / rbd_mirror / ImageDeleter.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2016 SUSE LINUX GmbH
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14#include <boost/bind.hpp>
15#include <map>
16#include <set>
17#include <sstream>
18
19#include "include/rados/librados.hpp"
20#include "common/Formatter.h"
21#include "common/admin_socket.h"
22#include "common/debug.h"
23#include "common/errno.h"
24#include "common/WorkQueue.h"
25#include "global/global_context.h"
26#include "librbd/internal.h"
27#include "librbd/ImageCtx.h"
28#include "librbd/ImageState.h"
29#include "librbd/Journal.h"
30#include "librbd/Operations.h"
31#include "librbd/journal/Policy.h"
32#include "cls/rbd/cls_rbd_client.h"
33#include "cls/rbd/cls_rbd_types.h"
34#include "librbd/Utils.h"
35#include "ImageDeleter.h"
36
37#define dout_context g_ceph_context
38#define dout_subsys ceph_subsys_rbd_mirror
39#undef dout_prefix
40#define dout_prefix *_dout << "rbd::mirror::ImageDeleter: " << this << " " \
41 << __func__ << ": "
42
43using std::string;
44using std::map;
45using std::stringstream;
46using std::vector;
47using std::pair;
48using std::make_pair;
49
50using librados::IoCtx;
51using namespace librbd;
52
53namespace rbd {
54namespace mirror {
55
56namespace {
57
58class ImageDeleterAdminSocketCommand {
59public:
60 virtual ~ImageDeleterAdminSocketCommand() {}
61 virtual bool call(Formatter *f, stringstream *ss) = 0;
62};
63
64class StatusCommand : public ImageDeleterAdminSocketCommand {
65public:
66 explicit StatusCommand(ImageDeleter *image_del) : image_del(image_del) {}
67
68 bool call(Formatter *f, stringstream *ss) override {
69 image_del->print_status(f, ss);
70 return true;
71 }
72
73private:
74 ImageDeleter *image_del;
75};
76
77struct DeleteJournalPolicy : public librbd::journal::Policy {
78 bool append_disabled() const override {
79 return true;
80 }
81 bool journal_disabled() const override {
82 return false;
83 }
84
85 void allocate_tag_on_lock(Context *on_finish) override {
86 on_finish->complete(0);
87 }
88};
89
90} // anonymous namespace
91
92class ImageDeleterAdminSocketHook : public AdminSocketHook {
93public:
94 ImageDeleterAdminSocketHook(CephContext *cct, ImageDeleter *image_del) :
95 admin_socket(cct->get_admin_socket()) {
96
97 std::string command;
98 int r;
99
100 command = "rbd mirror deletion status";
101 r = admin_socket->register_command(command, command, this,
102 "get status for image deleter");
103 if (r == 0) {
104 commands[command] = new StatusCommand(image_del);
105 }
106
107 }
108
109 ~ImageDeleterAdminSocketHook() override {
110 for (Commands::const_iterator i = commands.begin(); i != commands.end();
111 ++i) {
112 (void)admin_socket->unregister_command(i->first);
113 delete i->second;
114 }
115 }
116
117 bool call(std::string command, cmdmap_t& cmdmap, std::string format,
118 bufferlist& out) override {
119 Commands::const_iterator i = commands.find(command);
120 assert(i != commands.end());
121 Formatter *f = Formatter::create(format);
122 stringstream ss;
123 bool r = i->second->call(f, &ss);
124 delete f;
125 out.append(ss);
126 return r;
127 }
128
129private:
130 typedef std::map<std::string, ImageDeleterAdminSocketCommand*> Commands;
131 AdminSocket *admin_socket;
132 Commands commands;
133};
134
135ImageDeleter::ImageDeleter(ContextWQ *work_queue, SafeTimer *timer,
136 Mutex *timer_lock)
137 : m_running(true),
138 m_work_queue(work_queue),
139 m_delete_lock("rbd::mirror::ImageDeleter::Delete"),
140 m_image_deleter_thread(this),
141 m_failed_timer(timer),
142 m_failed_timer_lock(timer_lock),
143 m_asok_hook(new ImageDeleterAdminSocketHook(g_ceph_context, this))
144{
145 set_failed_timer_interval(g_ceph_context->_conf->rbd_mirror_delete_retry_interval);
146 m_image_deleter_thread.create("image_deleter");
147}
148
149ImageDeleter::~ImageDeleter() {
150 dout(20) << "enter" << dendl;
151
152 m_running = false;
153 {
154 Mutex::Locker l (m_delete_lock);
155 m_delete_queue_cond.Signal();
156 }
157 if (m_image_deleter_thread.is_started()) {
158 m_image_deleter_thread.join();
159 }
160
161 delete m_asok_hook;
162 dout(20) << "return" << dendl;
163}
164
165void ImageDeleter::run() {
166 dout(20) << "enter" << dendl;
167 while(m_running) {
168 m_delete_lock.Lock();
169 while (m_delete_queue.empty()) {
170 dout(20) << "waiting for delete requests" << dendl;
171 m_delete_queue_cond.Wait(m_delete_lock);
172
173 if (!m_running) {
174 m_delete_lock.Unlock();
175 dout(20) << "return" << dendl;
176 return;
177 }
178 }
179
180 m_active_delete = std::move(m_delete_queue.back());
181 m_delete_queue.pop_back();
182 m_delete_lock.Unlock();
183
184 bool move_to_next = process_image_delete();
185 if (!move_to_next) {
186 if (!m_running) {
187 dout(20) << "return" << dendl;
188 return;
189 }
190
191 Mutex::Locker l(m_delete_lock);
192 if (m_delete_queue.size() == 1) {
193 m_delete_queue_cond.Wait(m_delete_lock);
194 }
195 }
196 }
197}
198
199void ImageDeleter::schedule_image_delete(RadosRef local_rados,
200 int64_t local_pool_id,
201 const std::string& global_image_id) {
202 dout(20) << "enter" << dendl;
203
204 Mutex::Locker locker(m_delete_lock);
205
206 auto del_info = find_delete_info(local_pool_id, global_image_id);
207 if (del_info != nullptr) {
208 dout(20) << "image " << global_image_id << " "
209 << "was already scheduled for deletion" << dendl;
210 return;
211 }
212
213 m_delete_queue.push_front(
214 unique_ptr<DeleteInfo>(new DeleteInfo(local_rados, local_pool_id,
215 global_image_id)));
216 m_delete_queue_cond.Signal();
217}
218
219void ImageDeleter::wait_for_scheduled_deletion(int64_t local_pool_id,
220 const std::string &global_image_id,
221 Context *ctx,
222 bool notify_on_failed_retry) {
223
224 ctx = new FunctionContext([this, ctx](int r) {
225 m_work_queue->queue(ctx, r);
226 });
227
228 Mutex::Locker locker(m_delete_lock);
229 auto del_info = find_delete_info(local_pool_id, global_image_id);
230 if (!del_info) {
231 // image not scheduled for deletion
232 ctx->complete(0);
233 return;
234 }
235
236 dout(20) << "local_pool_id=" << local_pool_id << ", "
237 << "global_image_id=" << global_image_id << dendl;
238
239 if ((*del_info)->on_delete != nullptr) {
240 (*del_info)->on_delete->complete(-ESTALE);
241 }
242 (*del_info)->on_delete = ctx;
243 (*del_info)->notify_on_failed_retry = notify_on_failed_retry;
244}
245
246void ImageDeleter::cancel_waiter(int64_t local_pool_id,
247 const std::string &global_image_id) {
248 Mutex::Locker locker(m_delete_lock);
249 auto del_info = find_delete_info(local_pool_id, global_image_id);
250 if (!del_info) {
251 return;
252 }
253
254 if ((*del_info)->on_delete != nullptr) {
255 (*del_info)->on_delete->complete(-ECANCELED);
256 (*del_info)->on_delete = nullptr;
257 }
258}
259
260bool ImageDeleter::process_image_delete() {
261
262 stringstream ss;
263 m_active_delete->to_string(ss);
264 std::string del_info_str = ss.str();
265 dout(10) << "start processing delete request: " << del_info_str << dendl;
266 int r;
267 cls::rbd::MirrorImage mirror_image;
268
269 // remote image was disabled, now we need to delete local image
270 IoCtx ioctx;
271 r = m_active_delete->local_rados->ioctx_create2(
272 m_active_delete->local_pool_id, ioctx);
273 if (r < 0) {
274 derr << "error accessing local pool " << m_active_delete->local_pool_id
275 << ": " << cpp_strerror(r) << dendl;
276 enqueue_failed_delete(r);
277 return true;
278 }
279
280 dout(20) << "connected to local pool: " << ioctx.get_pool_name() << dendl;
281
282 auto &global_image_id = m_active_delete->global_image_id;
283 std::string local_image_id;
284 r = librbd::cls_client::mirror_image_get_image_id(
285 &ioctx, global_image_id, &local_image_id);
286 if (r == -ENOENT) {
287 dout(10) << "image " << global_image_id << " is not mirrored" << dendl;
288 complete_active_delete(r);
289 return true;
290 } else if (r < 0) {
291 derr << "error retrieving local id for image " << global_image_id
292 << ": " << cpp_strerror(r) << dendl;
293 enqueue_failed_delete(r);
294 return true;
295 }
296
297 bool is_primary = false;
298 C_SaferCond tag_owner_ctx;
299 Journal<>::is_tag_owner(ioctx, local_image_id, &is_primary,
300 m_work_queue, &tag_owner_ctx);
301 r = tag_owner_ctx.wait();
302 if (r < 0 && r != -ENOENT) {
303 derr << "error retrieving image primary info for image " << global_image_id
304 << ": " << cpp_strerror(r) << dendl;
305 enqueue_failed_delete(r);
306 return true;
307 }
308 if (is_primary) {
309 dout(10) << "image " << global_image_id << " is local primary" << dendl;
310 complete_active_delete(-EISPRM);
311 return true;
312 }
313
314 dout(20) << "local image is not the primary" << dendl;
315
316 bool has_snapshots;
317 r = image_has_snapshots_and_children(&ioctx, local_image_id, &has_snapshots);
318 if (r < 0) {
319 enqueue_failed_delete(r);
320 return true;
321 }
322
323 mirror_image.global_image_id = global_image_id;
324 mirror_image.state = cls::rbd::MIRROR_IMAGE_STATE_DISABLING;
325 r = cls_client::mirror_image_set(&ioctx, local_image_id, mirror_image);
326 if (r == -ENOENT) {
327 dout(10) << "local image is not mirrored, aborting deletion..." << dendl;
328 complete_active_delete(r);
329 return true;
330 } else if (r == -EEXIST || r == -EINVAL) {
331 derr << "cannot disable mirroring for image " << global_image_id
332 << ": global_image_id has changed/reused: "
333 << cpp_strerror(r) << dendl;
334 complete_active_delete(r);
335 return true;
336 } else if (r < 0) {
337 derr << "cannot disable mirroring for image " << global_image_id
338 << ": " << cpp_strerror(r) << dendl;
339 enqueue_failed_delete(r);
340 return true;
341 }
342
343 dout(20) << "set local image mirroring to disable" << dendl;
344
345 if (has_snapshots) {
346 dout(20) << "local image has snapshots" << dendl;
347
348 ImageCtx *imgctx = new ImageCtx("", local_image_id, nullptr, ioctx, false);
349 r = imgctx->state->open(false);
350 if (r < 0) {
351 derr << "error opening image " << global_image_id << " ("
352 << local_image_id << "): " << cpp_strerror(r) << dendl;
353 enqueue_failed_delete(r);
354 return true;
355 }
356
357 {
358 RWLock::WLocker snap_locker(imgctx->snap_lock);
359 imgctx->set_journal_policy(new DeleteJournalPolicy());
360 }
361
362 std::vector<librbd::snap_info_t> snaps;
363 r = librbd::snap_list(imgctx, snaps);
364 if (r < 0) {
365 derr << "error listing snapshot of image " << imgctx->name
366 << cpp_strerror(r) << dendl;
367 imgctx->state->close();
368 enqueue_failed_delete(r);
369 return true;
370 }
371
372 for (const auto& snap : snaps) {
373 dout(20) << "processing deletion of snapshot " << imgctx->name << "@"
374 << snap.name << dendl;
375
376 bool is_protected;
377 r = librbd::snap_is_protected(imgctx, snap.name.c_str(), &is_protected);
378 if (r < 0) {
379 derr << "error checking snapshot protection of snapshot "
380 << imgctx->name << "@" << snap.name << ": " << cpp_strerror(r)
381 << dendl;
382 imgctx->state->close();
383 enqueue_failed_delete(r);
384 return true;
385 }
386 if (is_protected) {
387 dout(20) << "snapshot " << imgctx->name << "@" << snap.name
388 << " is protected, issuing unprotect command" << dendl;
389
390 r = imgctx->operations->snap_unprotect(
391 cls::rbd::UserSnapshotNamespace(), snap.name.c_str());
392 if (r == -EBUSY) {
393 // there are still clones of snapshots of this image, therefore send
394 // the delete request to the end of the queue
395 dout(10) << "local image id " << local_image_id << " has "
396 << "snapshots with cloned children, postponing deletion..."
397 << dendl;
398 imgctx->state->close();
399 Mutex::Locker l(m_delete_lock);
400 m_active_delete->notify(r);
401 m_delete_queue.push_front(std::move(m_active_delete));
402 return false;
403 } else if (r < 0) {
404 derr << "error unprotecting snapshot " << imgctx->name << "@"
405 << snap.name << ": " << cpp_strerror(r) << dendl;
406 imgctx->state->close();
407 enqueue_failed_delete(r);
408 return true;
409 }
410 }
411
412 r = imgctx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(),
413 snap.name.c_str());
414 if (r < 0) {
415 derr << "error removing snapshot " << imgctx->name << "@"
416 << snap.name << ": " << cpp_strerror(r) << dendl;
417 imgctx->state->close();
418 enqueue_failed_delete(r);
419 return true;
420 }
421
422 dout(10) << "snapshot " << imgctx->name << "@" << snap.name
423 << " was deleted" << dendl;
424 }
425
426 imgctx->state->close();
427 }
428
429 librbd::NoOpProgressContext ctx;
430 r = librbd::remove(ioctx, "", local_image_id, ctx, true);
431 if (r < 0 && r != -ENOENT) {
432 derr << "error removing image " << global_image_id << " "
433 << "(" << local_image_id << ") from local pool: "
434 << cpp_strerror(r) << dendl;
435 enqueue_failed_delete(r);
436 return true;
437 }
438
439 // image was already deleted from rbd_directory, now we will make sure
440 // that will be also removed from rbd_mirroring
441 if (r == -ENOENT) {
442 dout(20) << "local image does not exist, removing image from rbd_mirroring"
443 << dendl;
444 }
445
446 r = cls_client::mirror_image_remove(&ioctx, local_image_id);
447 if (r < 0 && r != -ENOENT) {
448 derr << "error removing image from mirroring directory: "
449 << cpp_strerror(r) << dendl;
450 enqueue_failed_delete(r);
451 return true;
452 }
453
454 dout(10) << "Successfully deleted image "
455 << global_image_id << " " << "(" << local_image_id << ")" << dendl;
456
457 complete_active_delete(0);
458 return true;
459}
460
461int ImageDeleter::image_has_snapshots_and_children(IoCtx *ioctx,
462 string& image_id,
463 bool *has_snapshots) {
464
465 string header_oid = librbd::util::header_name(image_id);
466 ::SnapContext snapc;
467 int r = cls_client::get_snapcontext(ioctx, header_oid, &snapc);
468 if (r < 0 && r != -ENOENT) {
469 derr << "error retrieving snapshot context for image id " << image_id
470 << ": " << cpp_strerror(r) << dendl;
471 return r;
472 }
473
474 *has_snapshots = !snapc.snaps.empty();
475
476 return 0;
477}
478
479void ImageDeleter::complete_active_delete(int r) {
480 dout(20) << dendl;
481
31f18b77
FG
482 Mutex::Locker delete_locker(m_delete_lock);
483 m_active_delete->notify(r);
484 m_active_delete.reset();
7c673cae
FG
485}
486
487void ImageDeleter::enqueue_failed_delete(int error_code) {
488 dout(20) << "enter" << dendl;
489
490 if (error_code == -EBLACKLISTED) {
491 derr << "blacklisted while deleting local image" << dendl;
492 complete_active_delete(error_code);
493 return;
494 }
495
496 m_delete_lock.Lock();
497 if (m_active_delete->notify_on_failed_retry) {
498 m_active_delete->notify(error_code);
499 }
500 m_active_delete->error_code = error_code;
501 bool was_empty = m_failed_queue.empty();
502 m_failed_queue.push_front(std::move(m_active_delete));
503 m_delete_lock.Unlock();
504 if (was_empty) {
505 FunctionContext *ctx = new FunctionContext(
506 boost::bind(&ImageDeleter::retry_failed_deletions, this));
507 Mutex::Locker l(*m_failed_timer_lock);
508 m_failed_timer->add_event_after(m_failed_interval, ctx);
509 }
510}
511
512void ImageDeleter::retry_failed_deletions() {
513 dout(20) << "enter" << dendl;
514
515 Mutex::Locker l(m_delete_lock);
516
517 bool empty = m_failed_queue.empty();
518 while (!m_failed_queue.empty()) {
519 m_delete_queue.push_back(std::move(m_failed_queue.back()));
520 m_delete_queue.back()->retries++;
521 m_failed_queue.pop_back();
522 }
523 if (!empty) {
524 m_delete_queue_cond.Signal();
525 }
526}
527
528unique_ptr<ImageDeleter::DeleteInfo> const* ImageDeleter::find_delete_info(
529 int64_t local_pool_id, const std::string &global_image_id) {
530 assert(m_delete_lock.is_locked());
531
532 if (m_active_delete && m_active_delete->match(local_pool_id,
533 global_image_id)) {
534 return &m_active_delete;
535 }
536
537 for (const auto& del_info : m_delete_queue) {
538 if (del_info->match(local_pool_id, global_image_id)) {
539 return &del_info;
540 }
541 }
542
543 for (const auto& del_info : m_failed_queue) {
544 if (del_info->match(local_pool_id, global_image_id)) {
545 return &del_info;
546 }
547 }
548
549 return nullptr;
550}
551
552void ImageDeleter::print_status(Formatter *f, stringstream *ss) {
553 dout(20) << "enter" << dendl;
554
555 if (f) {
556 f->open_object_section("image_deleter_status");
557 f->open_array_section("delete_images_queue");
558 }
559
560 Mutex::Locker l(m_delete_lock);
561 for (const auto& image : m_delete_queue) {
562 image->print_status(f, ss);
563 }
564
565 if (f) {
566 f->close_section();
567 f->open_array_section("failed_deletes_queue");
568 }
569
570 for (const auto& image : m_failed_queue) {
571 image->print_status(f, ss, true);
572 }
573
574 if (f) {
575 f->close_section();
576 f->close_section();
577 f->flush(*ss);
578 }
579}
580
581void ImageDeleter::DeleteInfo::notify(int r) {
582 if (on_delete) {
583 dout(20) << "executing image deletion handler r=" << r << dendl;
584
585 Context *ctx = on_delete;
586 on_delete = nullptr;
587 ctx->complete(r);
588 }
589}
590
591void ImageDeleter::DeleteInfo::to_string(stringstream& ss) {
592 ss << "[" << "local_pool_id=" << local_pool_id << ", ";
593 ss << "global_image_id=" << global_image_id << "]";
594}
595
596void ImageDeleter::DeleteInfo::print_status(Formatter *f, stringstream *ss,
597 bool print_failure_info) {
598 if (f) {
599 f->open_object_section("delete_info");
600 f->dump_int("local_pool_id", local_pool_id);
601 f->dump_string("global_image_id", global_image_id);
602 if (print_failure_info) {
603 f->dump_string("error_code", cpp_strerror(error_code));
604 f->dump_int("retries", retries);
605 }
606 f->close_section();
607 f->flush(*ss);
608 } else {
609 this->to_string(*ss);
610 }
611}
612
613vector<string> ImageDeleter::get_delete_queue_items() {
614 vector<string> items;
615
616 Mutex::Locker l(m_delete_lock);
617 for (const auto& del_info : m_delete_queue) {
618 items.push_back(del_info->global_image_id);
619 }
620
621 return items;
622}
623
624vector<pair<string, int> > ImageDeleter::get_failed_queue_items() {
625 vector<pair<string, int> > items;
626
627 Mutex::Locker l(m_delete_lock);
628 for (const auto& del_info : m_failed_queue) {
629 items.push_back(make_pair(del_info->global_image_id,
630 del_info->error_code));
631 }
632
633 return items;
634}
635
636void ImageDeleter::set_failed_timer_interval(double interval) {
637 this->m_failed_interval = interval;
638}
639
640} // namespace mirror
641} // namespace rbd