]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/internal.cc
patches: remove fuzz and re-format
[ceph.git] / ceph / src / librbd / internal.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include "include/int_types.h"
4
5#include <errno.h>
6#include <limits.h>
7
8#include "include/types.h"
9#include "include/uuid.h"
10#include "common/ceph_context.h"
11#include "common/dout.h"
12#include "common/errno.h"
13#include "common/Throttle.h"
14#include "common/event_socket.h"
15#include "cls/lock/cls_lock_client.h"
16#include "include/stringify.h"
17
18#include "cls/rbd/cls_rbd.h"
19#include "cls/rbd/cls_rbd_types.h"
20#include "cls/rbd/cls_rbd_client.h"
21#include "cls/journal/cls_journal_types.h"
22#include "cls/journal/cls_journal_client.h"
23
24#include "librbd/ExclusiveLock.h"
25#include "librbd/ImageCtx.h"
26#include "librbd/ImageState.h"
27#include "librbd/internal.h"
28#include "librbd/Journal.h"
29#include "librbd/ObjectMap.h"
30#include "librbd/Operations.h"
31#include "librbd/Types.h"
32#include "librbd/Utils.h"
33#include "librbd/api/Image.h"
34#include "librbd/exclusive_lock/AutomaticPolicy.h"
35#include "librbd/exclusive_lock/StandardPolicy.h"
36#include "librbd/image/CloneRequest.h"
37#include "librbd/image/CreateRequest.h"
38#include "librbd/image/RemoveRequest.h"
39#include "librbd/io/AioCompletion.h"
40#include "librbd/io/ImageRequest.h"
41#include "librbd/io/ImageRequestWQ.h"
42#include "librbd/io/ObjectRequest.h"
43#include "librbd/io/ReadResult.h"
44#include "librbd/journal/Types.h"
45#include "librbd/managed_lock/Types.h"
46#include "librbd/mirror/EnableRequest.h"
47#include "librbd/operation/TrimRequest.h"
48
49#include "journal/Journaler.h"
50
51#include <boost/scope_exit.hpp>
52#include <boost/variant.hpp>
53#include "include/assert.h"
54
55#define dout_subsys ceph_subsys_rbd
56#undef dout_prefix
57#define dout_prefix *_dout << "librbd: "
58
59#define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
60
61using std::map;
62using std::pair;
63using std::set;
64using std::string;
65using std::vector;
66// list binds to list() here, so std::list is explicitly used below
67
68using ceph::bufferlist;
69using librados::snap_t;
70using librados::IoCtx;
71using librados::Rados;
72
73namespace librbd {
74
75namespace {
76
77int validate_pool(IoCtx &io_ctx, CephContext *cct) {
181888fb 78 if (!cct->_conf->get_val<bool>("rbd_validate_pool")) {
7c673cae
FG
79 return 0;
80 }
81
82 int r = io_ctx.stat(RBD_DIRECTORY, NULL, NULL);
83 if (r == 0) {
84 return 0;
85 } else if (r < 0 && r != -ENOENT) {
86 lderr(cct) << "failed to stat RBD directory: " << cpp_strerror(r) << dendl;
87 return r;
88 }
89
90 // allocate a self-managed snapshot id if this a new pool to force
91 // self-managed snapshot mode
92 uint64_t snap_id;
93 r = io_ctx.selfmanaged_snap_create(&snap_id);
94 if (r == -EINVAL) {
95 lderr(cct) << "pool not configured for self-managed RBD snapshot support"
96 << dendl;
97 return r;
98 } else if (r < 0) {
99 lderr(cct) << "failed to allocate self-managed snapshot: "
100 << cpp_strerror(r) << dendl;
101 return r;
102 }
103
104 r = io_ctx.selfmanaged_snap_remove(snap_id);
105 if (r < 0) {
106 lderr(cct) << "failed to release self-managed snapshot " << snap_id
107 << ": " << cpp_strerror(r) << dendl;
108 }
109 return 0;
110}
111
112
113} // anonymous namespace
114
115 int detect_format(IoCtx &io_ctx, const string &name,
116 bool *old_format, uint64_t *size)
117 {
118 CephContext *cct = (CephContext *)io_ctx.cct();
119 if (old_format)
120 *old_format = true;
121 int r = io_ctx.stat(util::old_header_name(name), size, NULL);
122 if (r == -ENOENT) {
123 if (old_format)
124 *old_format = false;
125 r = io_ctx.stat(util::id_obj_name(name), size, NULL);
126 if (r < 0)
127 return r;
128 } else if (r < 0) {
129 return r;
130 }
131
132 ldout(cct, 20) << "detect format of " << name << " : "
133 << (old_format ? (*old_format ? "old" : "new") :
134 "don't care") << dendl;
135 return 0;
136 }
137
138 bool has_parent(int64_t parent_pool_id, uint64_t off, uint64_t overlap)
139 {
140 return (parent_pool_id != -1 && off <= overlap);
141 }
142
143 void init_rbd_header(struct rbd_obj_header_ondisk& ondisk,
144 uint64_t size, int order, uint64_t bid)
145 {
146 uint32_t hi = bid >> 32;
147 uint32_t lo = bid & 0xFFFFFFFF;
148 uint32_t extra = rand() % 0xFFFFFFFF;
149 memset(&ondisk, 0, sizeof(ondisk));
150
151 memcpy(&ondisk.text, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT));
152 memcpy(&ondisk.signature, RBD_HEADER_SIGNATURE,
153 sizeof(RBD_HEADER_SIGNATURE));
154 memcpy(&ondisk.version, RBD_HEADER_VERSION, sizeof(RBD_HEADER_VERSION));
155
156 snprintf(ondisk.block_name, sizeof(ondisk.block_name), "rb.%x.%x.%x",
157 hi, lo, extra);
158
159 ondisk.image_size = size;
160 ondisk.options.order = order;
161 ondisk.options.crypt_type = RBD_CRYPT_NONE;
162 ondisk.options.comp_type = RBD_COMP_NONE;
163 ondisk.snap_seq = 0;
164 ondisk.snap_count = 0;
165 ondisk.reserved = 0;
166 ondisk.snap_names_len = 0;
167 }
168
169 void image_info(ImageCtx *ictx, image_info_t& info, size_t infosize)
170 {
171 int obj_order = ictx->order;
172 ictx->snap_lock.get_read();
173 info.size = ictx->get_image_size(ictx->snap_id);
174 ictx->snap_lock.put_read();
175 info.obj_size = 1ULL << obj_order;
176 info.num_objs = Striper::get_num_objects(ictx->layout, info.size);
177 info.order = obj_order;
178 strncpy(info.block_name_prefix, ictx->object_prefix.c_str(),
179 RBD_MAX_BLOCK_NAME_SIZE);
180 info.block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE - 1] = '\0';
181
182 // clear deprecated fields
183 info.parent_pool = -1L;
184 info.parent_name[0] = '\0';
185 }
186
187 uint64_t oid_to_object_no(const string& oid, const string& object_prefix)
188 {
189 istringstream iss(oid);
190 // skip object prefix and separator
191 iss.ignore(object_prefix.length() + 1);
192 uint64_t num;
193 iss >> std::hex >> num;
194 return num;
195 }
196
197 void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx)
198 {
199 assert(ictx->owner_lock.is_locked());
200 assert(ictx->exclusive_lock == nullptr ||
201 ictx->exclusive_lock->is_lock_owner());
202
203 C_SaferCond ctx;
204 ictx->snap_lock.get_read();
205 operation::TrimRequest<> *req = operation::TrimRequest<>::create(
206 *ictx, &ctx, ictx->size, newsize, prog_ctx);
207 ictx->snap_lock.put_read();
208 req->send();
209
210 int r = ctx.wait();
211 if (r < 0) {
212 lderr(ictx->cct) << "warning: failed to remove some object(s): "
213 << cpp_strerror(r) << dendl;
214 }
215 }
216
217 int read_header_bl(IoCtx& io_ctx, const string& header_oid,
218 bufferlist& header, uint64_t *ver)
219 {
220 int r;
221 uint64_t off = 0;
222#define READ_SIZE 4096
223 do {
224 bufferlist bl;
225 r = io_ctx.read(header_oid, bl, READ_SIZE, off);
226 if (r < 0)
227 return r;
228 header.claim_append(bl);
229 off += r;
230 } while (r == READ_SIZE);
231
232 if (header.length() < sizeof(RBD_HEADER_TEXT) ||
233 memcmp(RBD_HEADER_TEXT, header.c_str(), sizeof(RBD_HEADER_TEXT))) {
234 CephContext *cct = (CephContext *)io_ctx.cct();
235 lderr(cct) << "unrecognized header format" << dendl;
236 return -ENXIO;
237 }
238
239 if (ver)
240 *ver = io_ctx.get_last_version();
241
242 return 0;
243 }
244
245 int read_header(IoCtx& io_ctx, const string& header_oid,
246 struct rbd_obj_header_ondisk *header, uint64_t *ver)
247 {
248 bufferlist header_bl;
249 int r = read_header_bl(io_ctx, header_oid, header_bl, ver);
250 if (r < 0)
251 return r;
252 if (header_bl.length() < (int)sizeof(*header))
253 return -EIO;
254 memcpy(header, header_bl.c_str(), sizeof(*header));
255
256 return 0;
257 }
258
259 int tmap_set(IoCtx& io_ctx, const string& imgname)
260 {
261 bufferlist cmdbl, emptybl;
262 __u8 c = CEPH_OSD_TMAP_SET;
263 ::encode(c, cmdbl);
264 ::encode(imgname, cmdbl);
265 ::encode(emptybl, cmdbl);
266 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
267 }
268
269 int tmap_rm(IoCtx& io_ctx, const string& imgname)
270 {
271 bufferlist cmdbl;
272 __u8 c = CEPH_OSD_TMAP_RM;
273 ::encode(c, cmdbl);
274 ::encode(imgname, cmdbl);
275 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
276 }
277
278 typedef boost::variant<std::string,uint64_t> image_option_value_t;
279 typedef std::map<int,image_option_value_t> image_options_t;
280 typedef std::shared_ptr<image_options_t> image_options_ref;
281
282 enum image_option_type_t {
283 STR,
284 UINT64,
285 };
286
287 const std::map<int, image_option_type_t> IMAGE_OPTIONS_TYPE_MAPPING = {
288 {RBD_IMAGE_OPTION_FORMAT, UINT64},
289 {RBD_IMAGE_OPTION_FEATURES, UINT64},
290 {RBD_IMAGE_OPTION_ORDER, UINT64},
291 {RBD_IMAGE_OPTION_STRIPE_UNIT, UINT64},
292 {RBD_IMAGE_OPTION_STRIPE_COUNT, UINT64},
293 {RBD_IMAGE_OPTION_JOURNAL_ORDER, UINT64},
294 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, UINT64},
295 {RBD_IMAGE_OPTION_JOURNAL_POOL, STR},
296 {RBD_IMAGE_OPTION_FEATURES_SET, UINT64},
297 {RBD_IMAGE_OPTION_FEATURES_CLEAR, UINT64},
298 {RBD_IMAGE_OPTION_DATA_POOL, STR},
299 };
300
301 std::string image_option_name(int optname) {
302 switch (optname) {
303 case RBD_IMAGE_OPTION_FORMAT:
304 return "format";
305 case RBD_IMAGE_OPTION_FEATURES:
306 return "features";
307 case RBD_IMAGE_OPTION_ORDER:
308 return "order";
309 case RBD_IMAGE_OPTION_STRIPE_UNIT:
310 return "stripe_unit";
311 case RBD_IMAGE_OPTION_STRIPE_COUNT:
312 return "stripe_count";
313 case RBD_IMAGE_OPTION_JOURNAL_ORDER:
314 return "journal_order";
315 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH:
316 return "journal_splay_width";
317 case RBD_IMAGE_OPTION_JOURNAL_POOL:
318 return "journal_pool";
319 case RBD_IMAGE_OPTION_FEATURES_SET:
320 return "features_set";
321 case RBD_IMAGE_OPTION_FEATURES_CLEAR:
322 return "features_clear";
323 case RBD_IMAGE_OPTION_DATA_POOL:
324 return "data_pool";
325 default:
326 return "unknown (" + stringify(optname) + ")";
327 }
328 }
329
31f18b77 330 std::ostream &operator<<(std::ostream &os, const ImageOptions &opts) {
7c673cae
FG
331 os << "[";
332
333 const char *delimiter = "";
334 for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) {
335 if (i.second == STR) {
336 std::string val;
337 if (opts.get(i.first, &val) == 0) {
338 os << delimiter << image_option_name(i.first) << "=" << val;
339 delimiter = ", ";
340 }
341 } else if (i.second == UINT64) {
342 uint64_t val;
343 if (opts.get(i.first, &val) == 0) {
344 os << delimiter << image_option_name(i.first) << "=" << val;
345 delimiter = ", ";
346 }
347 }
348 }
349
350 os << "]";
351
352 return os;
353 }
354
355 void image_options_create(rbd_image_options_t* opts)
356 {
357 image_options_ref* opts_ = new image_options_ref(new image_options_t());
358
359 *opts = static_cast<rbd_image_options_t>(opts_);
360 }
361
362 void image_options_create_ref(rbd_image_options_t* opts,
363 rbd_image_options_t orig)
364 {
365 image_options_ref* orig_ = static_cast<image_options_ref*>(orig);
366 image_options_ref* opts_ = new image_options_ref(*orig_);
367
368 *opts = static_cast<rbd_image_options_t>(opts_);
369 }
370
371 void image_options_copy(rbd_image_options_t* opts,
372 const ImageOptions &orig)
373 {
374 image_options_ref* opts_ = new image_options_ref(new image_options_t());
375
376 *opts = static_cast<rbd_image_options_t>(opts_);
377
378 std::string str_val;
379 uint64_t uint64_val;
380 for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) {
381 switch (i.second) {
382 case STR:
383 if (orig.get(i.first, &str_val) == 0) {
384 image_options_set(*opts, i.first, str_val);
385 }
386 continue;
387 case UINT64:
388 if (orig.get(i.first, &uint64_val) == 0) {
389 image_options_set(*opts, i.first, uint64_val);
390 }
391 continue;
392 }
393 }
394 }
395
396 void image_options_destroy(rbd_image_options_t opts)
397 {
398 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
399
400 delete opts_;
401 }
402
403 int image_options_set(rbd_image_options_t opts, int optname,
404 const std::string& optval)
405 {
406 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
407
408 std::map<int, image_option_type_t>::const_iterator i =
409 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
410
411 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
412 return -EINVAL;
413 }
414
415 (*opts_->get())[optname] = optval;
416 return 0;
417 }
418
419 int image_options_set(rbd_image_options_t opts, int optname, uint64_t optval)
420 {
421 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
422
423 std::map<int, image_option_type_t>::const_iterator i =
424 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
425
426 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
427 return -EINVAL;
428 }
429
430 (*opts_->get())[optname] = optval;
431 return 0;
432 }
433
434 int image_options_get(rbd_image_options_t opts, int optname,
435 std::string* optval)
436 {
437 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
438
439 std::map<int, image_option_type_t>::const_iterator i =
440 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
441
442 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
443 return -EINVAL;
444 }
445
446 image_options_t::const_iterator j = (*opts_)->find(optname);
447
448 if (j == (*opts_)->end()) {
449 return -ENOENT;
450 }
451
452 *optval = boost::get<std::string>(j->second);
453 return 0;
454 }
455
456 int image_options_get(rbd_image_options_t opts, int optname, uint64_t* optval)
457 {
458 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
459
460 std::map<int, image_option_type_t>::const_iterator i =
461 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
462
463 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
464 return -EINVAL;
465 }
466
467 image_options_t::const_iterator j = (*opts_)->find(optname);
468
469 if (j == (*opts_)->end()) {
470 return -ENOENT;
471 }
472
473 *optval = boost::get<uint64_t>(j->second);
474 return 0;
475 }
476
477 int image_options_is_set(rbd_image_options_t opts, int optname,
478 bool* is_set)
479 {
480 if (IMAGE_OPTIONS_TYPE_MAPPING.find(optname) ==
481 IMAGE_OPTIONS_TYPE_MAPPING.end()) {
482 return -EINVAL;
483 }
484
485 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
486 *is_set = ((*opts_)->find(optname) != (*opts_)->end());
487 return 0;
488 }
489
490 int image_options_unset(rbd_image_options_t opts, int optname)
491 {
492 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
493
494 std::map<int, image_option_type_t>::const_iterator i =
495 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
496
497 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end()) {
498 assert((*opts_)->find(optname) == (*opts_)->end());
499 return -EINVAL;
500 }
501
502 image_options_t::const_iterator j = (*opts_)->find(optname);
503
504 if (j == (*opts_)->end()) {
505 return -ENOENT;
506 }
507
508 (*opts_)->erase(j);
509 return 0;
510 }
511
512 void image_options_clear(rbd_image_options_t opts)
513 {
514 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
515
516 (*opts_)->clear();
517 }
518
519 bool image_options_is_empty(rbd_image_options_t opts)
520 {
521 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
522
523 return (*opts_)->empty();
524 }
525
526 int list(IoCtx& io_ctx, vector<string>& names)
527 {
528 CephContext *cct = (CephContext *)io_ctx.cct();
529 ldout(cct, 20) << "list " << &io_ctx << dendl;
530
531 bufferlist bl;
532 int r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0);
31f18b77
FG
533 if (r < 0) {
534 if (r == -ENOENT) {
535 r = 0;
536 }
7c673cae 537 return r;
31f18b77 538 }
7c673cae
FG
539
540 // old format images are in a tmap
541 if (bl.length()) {
542 bufferlist::iterator p = bl.begin();
543 bufferlist header;
544 map<string,bufferlist> m;
545 ::decode(header, p);
546 ::decode(m, p);
547 for (map<string,bufferlist>::iterator q = m.begin(); q != m.end(); ++q) {
548 names.push_back(q->first);
549 }
550 }
551
552 map<string, string> images;
553 r = api::Image<>::list_images(io_ctx, &images);
554 if (r < 0) {
555 lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
556 return r;
557 }
558 for (const auto& img_pair : images) {
559 names.push_back(img_pair.first);
560 }
561
562 return 0;
563 }
564
565 int flatten_children(ImageCtx *ictx, const char* snap_name,
566 ProgressContext& pctx)
567 {
568 CephContext *cct = ictx->cct;
569 ldout(cct, 20) << "children flatten " << ictx->name << dendl;
570
b32b8144
FG
571 int r = ictx->state->refresh_if_required();
572 if (r < 0) {
573 return r;
574 }
575
7c673cae
FG
576 RWLock::RLocker l(ictx->snap_lock);
577 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
578 ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, snap_id);
579 map< pair<int64_t, string>, set<string> > image_info;
580
b32b8144 581 r = api::Image<>::list_children(ictx, parent_spec, &image_info);
7c673cae
FG
582 if (r < 0) {
583 return r;
584 }
585
586 size_t size = image_info.size();
587 if (size == 0)
588 return 0;
589
590 size_t i = 0;
591 Rados rados(ictx->md_ctx);
592 for ( auto &info : image_info){
593 string pool = info.first.second;
594 IoCtx ioctx;
595 r = rados.ioctx_create2(info.first.first, ioctx);
596 if (r < 0) {
597 lderr(cct) << "Error accessing child image pool " << pool
598 << dendl;
599 return r;
600 }
601
602 for (auto &id_it : info.second) {
603 ImageCtx *imctx = new ImageCtx("", id_it, NULL, ioctx, false);
604 int r = imctx->state->open(false);
605 if (r < 0) {
606 lderr(cct) << "error opening image: "
607 << cpp_strerror(r) << dendl;
608 return r;
609 }
610
b32b8144
FG
611 if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
612 !imctx->snaps.empty()) {
613 lderr(cct) << "snapshot in-use by " << pool << "/" << imctx->name
614 << dendl;
615 imctx->state->close();
616 return -EBUSY;
617 }
618
7c673cae
FG
619 librbd::NoOpProgressContext prog_ctx;
620 r = imctx->operations->flatten(prog_ctx);
621 if (r < 0) {
622 lderr(cct) << "error flattening image: " << pool << "/" << id_it
623 << cpp_strerror(r) << dendl;
624 imctx->state->close();
625 return r;
626 }
627
7c673cae
FG
628 r = imctx->state->close();
629 if (r < 0) {
630 lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl;
631 return r;
632 }
633 }
634 pctx.update_progress(++i, size);
635 assert(i <= size);
636 }
637
638 return 0;
639 }
640
641 int list_children(ImageCtx *ictx, set<pair<string, string> >& names)
642 {
643 CephContext *cct = ictx->cct;
644 ldout(cct, 20) << "children list " << ictx->name << dendl;
645
b32b8144
FG
646 int r = ictx->state->refresh_if_required();
647 if (r < 0) {
648 return r;
649 }
650
7c673cae
FG
651 RWLock::RLocker l(ictx->snap_lock);
652 ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, ictx->snap_id);
653 map< pair<int64_t, string>, set<string> > image_info;
654
b32b8144 655 r = api::Image<>::list_children(ictx, parent_spec, &image_info);
7c673cae
FG
656 if (r < 0) {
657 return r;
658 }
659
660 Rados rados(ictx->md_ctx);
661 for ( auto &info : image_info){
662 IoCtx ioctx;
663 r = rados.ioctx_create2(info.first.first, ioctx);
664 if (r < 0) {
665 lderr(cct) << "Error accessing child image pool " << info.first.second
666 << dendl;
667 return r;
668 }
669
670 for (auto &id_it : info.second) {
671 string name;
672 r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY, id_it, &name);
673 if (r < 0) {
674 lderr(cct) << "Error looking up name for image id " << id_it
675 << " in pool " << info.first.second << dendl;
676 return r;
677 }
678 names.insert(make_pair(info.first.second, name));
679 }
680 }
681
682 return 0;
683 }
684
685 int get_snap_namespace(ImageCtx *ictx,
686 const char *snap_name,
687 cls::rbd::SnapshotNamespace *snap_namespace) {
688 ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name
689 << dendl;
690
691 int r = ictx->state->refresh_if_required();
692 if (r < 0)
693 return r;
694 RWLock::RLocker l(ictx->snap_lock);
695 snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name);
696 if (snap_id == CEPH_NOSNAP)
697 return -ENOENT;
698 r = ictx->get_snap_namespace(snap_id, snap_namespace);
699 return r;
700 }
701
702 int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected)
703 {
704 ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name
705 << dendl;
706
707 int r = ictx->state->refresh_if_required();
708 if (r < 0)
709 return r;
710
711 RWLock::RLocker l(ictx->snap_lock);
712 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
713 if (snap_id == CEPH_NOSNAP)
714 return -ENOENT;
715 bool is_unprotected;
716 r = ictx->is_snap_unprotected(snap_id, &is_unprotected);
717 // consider both PROTECTED or UNPROTECTING to be 'protected',
718 // since in either state they can't be deleted
719 *is_protected = !is_unprotected;
720 return r;
721 }
722
723 int create_v1(IoCtx& io_ctx, const char *imgname, uint64_t size, int order)
724 {
725 CephContext *cct = (CephContext *)io_ctx.cct();
726
727 ldout(cct, 20) << __func__ << " " << &io_ctx << " name = " << imgname
728 << " size = " << size << " order = " << order << dendl;
729 int r = validate_pool(io_ctx, cct);
730 if (r < 0) {
731 return r;
732 }
733
734 ldout(cct, 2) << "adding rbd image to directory..." << dendl;
735 r = tmap_set(io_ctx, imgname);
736 if (r < 0) {
737 lderr(cct) << "error adding image to directory: " << cpp_strerror(r)
738 << dendl;
739 return r;
740 }
741
742 Rados rados(io_ctx);
743 uint64_t bid = rados.get_instance_id();
744
745 ldout(cct, 2) << "creating rbd image..." << dendl;
746 struct rbd_obj_header_ondisk header;
747 init_rbd_header(header, size, order, bid);
748
749 bufferlist bl;
750 bl.append((const char *)&header, sizeof(header));
751
752 string header_oid = util::old_header_name(imgname);
753 r = io_ctx.write(header_oid, bl, bl.length(), 0);
754 if (r < 0) {
755 lderr(cct) << "Error writing image header: " << cpp_strerror(r)
756 << dendl;
757 int remove_r = tmap_rm(io_ctx, imgname);
758 if (remove_r < 0) {
759 lderr(cct) << "Could not remove image from directory after "
760 << "header creation failed: "
761 << cpp_strerror(remove_r) << dendl;
762 }
763 return r;
764 }
765
766 ldout(cct, 2) << "done." << dendl;
767 return 0;
768 }
769
770 int create(librados::IoCtx& io_ctx, const char *imgname, uint64_t size,
771 int *order)
772 {
773 uint64_t order_ = *order;
774 ImageOptions opts;
775
776 int r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
777 assert(r == 0);
778
779 r = create(io_ctx, imgname, "", size, opts, "", "", false);
780
781 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
782 assert(r1 == 0);
783 *order = order_;
784
785 return r;
786 }
787
788 int create(IoCtx& io_ctx, const char *imgname, uint64_t size,
789 bool old_format, uint64_t features, int *order,
790 uint64_t stripe_unit, uint64_t stripe_count)
791 {
792 if (!order)
793 return -EINVAL;
794
795 uint64_t order_ = *order;
796 uint64_t format = old_format ? 1 : 2;
797 ImageOptions opts;
798 int r;
799
800 r = opts.set(RBD_IMAGE_OPTION_FORMAT, format);
801 assert(r == 0);
802 r = opts.set(RBD_IMAGE_OPTION_FEATURES, features);
803 assert(r == 0);
804 r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
805 assert(r == 0);
806 r = opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
807 assert(r == 0);
808 r = opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
809 assert(r == 0);
810
811 r = create(io_ctx, imgname, "", size, opts, "", "", false);
812
813 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
814 assert(r1 == 0);
815 *order = order_;
816
817 return r;
818 }
819
820 int create(IoCtx& io_ctx, const std::string &image_name,
821 const std::string &image_id, uint64_t size,
822 ImageOptions& opts,
823 const std::string &non_primary_global_image_id,
824 const std::string &primary_mirror_uuid,
825 bool skip_mirror_enable)
826 {
827 std::string id(image_id);
828 if (id.empty()) {
829 id = util::generate_image_id(io_ctx);
830 }
831
832 CephContext *cct = (CephContext *)io_ctx.cct();
833 ldout(cct, 10) << __func__ << " name=" << image_name << ", "
834 << "id= " << id << ", "
835 << "size=" << size << ", opts=" << opts << dendl;
836
837 uint64_t format;
838 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0)
181888fb 839 format = cct->_conf->get_val<int64_t>("rbd_default_format");
7c673cae
FG
840 bool old_format = format == 1;
841
842 // make sure it doesn't already exist, in either format
843 int r = detect_format(io_ctx, image_name, NULL, NULL);
844 if (r != -ENOENT) {
845 if (r) {
846 lderr(cct) << "Could not tell if " << image_name << " already exists"
847 << dendl;
848 return r;
849 }
850 lderr(cct) << "rbd image " << image_name << " already exists" << dendl;
851 return -EEXIST;
852 }
853
854 uint64_t order = 0;
855 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0 || order == 0) {
181888fb 856 order = cct->_conf->get_val<int64_t>("rbd_default_order");
7c673cae
FG
857 }
858 r = image::CreateRequest<>::validate_order(cct, order);
859 if (r < 0) {
860 return r;
861 }
862
863 if (old_format) {
864 r = create_v1(io_ctx, image_name.c_str(), size, order);
865 } else {
866 ThreadPool *thread_pool;
867 ContextWQ *op_work_queue;
868 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
869
870 C_SaferCond cond;
871 image::CreateRequest<> *req = image::CreateRequest<>::create(
872 io_ctx, image_name, id, size, opts, non_primary_global_image_id,
873 primary_mirror_uuid, skip_mirror_enable, op_work_queue, &cond);
874 req->send();
875
876 r = cond.wait();
877 }
878
879 int r1 = opts.set(RBD_IMAGE_OPTION_ORDER, order);
880 assert(r1 == 0);
881
882 return r;
883 }
884
885 /*
886 * Parent may be in different pool, hence different IoCtx
887 */
888 int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name,
889 IoCtx& c_ioctx, const char *c_name,
890 uint64_t features, int *c_order,
891 uint64_t stripe_unit, int stripe_count)
892 {
893 uint64_t order = *c_order;
894
895 ImageOptions opts;
896 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
897 opts.set(RBD_IMAGE_OPTION_ORDER, order);
898 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
899 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
900
901 int r = clone(p_ioctx, p_name, p_snap_name, c_ioctx, c_name, opts);
902 opts.get(RBD_IMAGE_OPTION_ORDER, &order);
903 *c_order = order;
904 return r;
905 }
906
907 int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name,
908 IoCtx& c_ioctx, const char *c_name, ImageOptions& c_opts)
909 {
910 CephContext *cct = (CephContext *)p_ioctx.cct();
911 if (p_snap_name == NULL) {
912 lderr(cct) << "image to be cloned must be a snapshot" << dendl;
913 return -EINVAL;
914 }
915
916 // make sure parent snapshot exists
917 ImageCtx *p_imctx = new ImageCtx(p_name, "", p_snap_name, p_ioctx, true);
918 int r = p_imctx->state->open(false);
919 if (r < 0) {
920 lderr(cct) << "error opening parent image: "
921 << cpp_strerror(r) << dendl;
922 return r;
923 }
924
925 r = clone(p_imctx, c_ioctx, c_name, "", c_opts, "", "");
926
927 int close_r = p_imctx->state->close();
928 if (r == 0 && close_r < 0) {
929 r = close_r;
930 }
931
932 if (r < 0) {
933 return r;
934 }
935 return 0;
936 }
937
938 int clone(ImageCtx *p_imctx, IoCtx& c_ioctx, const std::string &c_name,
939 const std::string &c_id, ImageOptions& c_opts,
940 const std::string &non_primary_global_image_id,
941 const std::string &primary_mirror_uuid)
942 {
943 std::string id(c_id);
944 if (id.empty()) {
945 id = util::generate_image_id(c_ioctx);
946 }
947
948 CephContext *cct = (CephContext *)c_ioctx.cct();
949 ldout(cct, 10) << __func__ << " "
950 << "c_name=" << c_name << ", "
951 << "c_id= " << c_id << ", "
952 << "c_opts=" << c_opts << dendl;
953
954 ThreadPool *thread_pool;
955 ContextWQ *op_work_queue;
956 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
957
958 C_SaferCond cond;
959 auto *req = image::CloneRequest<>::create(
960 p_imctx, c_ioctx, c_name, id, c_opts,
961 non_primary_global_image_id, primary_mirror_uuid, op_work_queue, &cond);
962 req->send();
963
964 return cond.wait();
965 }
966
967 int rename(IoCtx& io_ctx, const char *srcname, const char *dstname)
968 {
969 CephContext *cct = (CephContext *)io_ctx.cct();
970 ldout(cct, 20) << "rename " << &io_ctx << " " << srcname << " -> "
971 << dstname << dendl;
972
973 ImageCtx *ictx = new ImageCtx(srcname, "", "", io_ctx, false);
974 int r = ictx->state->open(false);
975 if (r < 0) {
181888fb 976 lderr(cct) << "error opening source image: " << cpp_strerror(r) << dendl;
7c673cae
FG
977 return r;
978 }
979 BOOST_SCOPE_EXIT((ictx)) {
980 ictx->state->close();
981 } BOOST_SCOPE_EXIT_END
982
983 return ictx->operations->rename(dstname);
984 }
985
986 int info(ImageCtx *ictx, image_info_t& info, size_t infosize)
987 {
988 ldout(ictx->cct, 20) << "info " << ictx << dendl;
989
990 int r = ictx->state->refresh_if_required();
991 if (r < 0)
992 return r;
993
994 image_info(ictx, info, infosize);
995 return 0;
996 }
997
998 int get_old_format(ImageCtx *ictx, uint8_t *old)
999 {
1000 int r = ictx->state->refresh_if_required();
1001 if (r < 0)
1002 return r;
1003 *old = ictx->old_format;
1004 return 0;
1005 }
1006
1007 int get_size(ImageCtx *ictx, uint64_t *size)
1008 {
1009 int r = ictx->state->refresh_if_required();
1010 if (r < 0)
1011 return r;
1012 RWLock::RLocker l2(ictx->snap_lock);
1013 *size = ictx->get_image_size(ictx->snap_id);
1014 return 0;
1015 }
1016
1017 int get_features(ImageCtx *ictx, uint64_t *features)
1018 {
1019 int r = ictx->state->refresh_if_required();
1020 if (r < 0)
1021 return r;
1022 RWLock::RLocker l(ictx->snap_lock);
1023 *features = ictx->features;
1024 return 0;
1025 }
1026
1027 int get_overlap(ImageCtx *ictx, uint64_t *overlap)
1028 {
1029 int r = ictx->state->refresh_if_required();
1030 if (r < 0)
1031 return r;
1032 RWLock::RLocker l(ictx->snap_lock);
1033 RWLock::RLocker l2(ictx->parent_lock);
1034 return ictx->get_parent_overlap(ictx->snap_id, overlap);
1035 }
1036
1037 int get_parent_info(ImageCtx *ictx, string *parent_pool_name,
1038 string *parent_name, string *parent_id,
1039 string *parent_snap_name)
1040 {
1041 int r = ictx->state->refresh_if_required();
1042 if (r < 0)
1043 return r;
1044
1045 RWLock::RLocker l(ictx->snap_lock);
1046 RWLock::RLocker l2(ictx->parent_lock);
1047 if (ictx->parent == NULL) {
1048 return -ENOENT;
1049 }
1050
1051 ParentSpec parent_spec;
1052
1053 if (ictx->snap_id == CEPH_NOSNAP) {
1054 parent_spec = ictx->parent_md.spec;
1055 } else {
1056 r = ictx->get_parent_spec(ictx->snap_id, &parent_spec);
1057 if (r < 0) {
1058 lderr(ictx->cct) << "Can't find snapshot id = " << ictx->snap_id
1059 << dendl;
1060 return r;
1061 }
1062 if (parent_spec.pool_id == -1)
1063 return -ENOENT;
1064 }
1065 if (parent_pool_name) {
1066 Rados rados(ictx->md_ctx);
1067 r = rados.pool_reverse_lookup(parent_spec.pool_id,
1068 parent_pool_name);
1069 if (r < 0) {
1070 lderr(ictx->cct) << "error looking up pool name: " << cpp_strerror(r)
1071 << dendl;
1072 return r;
1073 }
1074 }
1075
1076 if (parent_snap_name) {
1077 RWLock::RLocker l(ictx->parent->snap_lock);
1078 r = ictx->parent->get_snap_name(parent_spec.snap_id,
1079 parent_snap_name);
1080 if (r < 0) {
1081 lderr(ictx->cct) << "error finding parent snap name: "
1082 << cpp_strerror(r) << dendl;
1083 return r;
1084 }
1085 }
1086
1087 if (parent_name) {
1088 RWLock::RLocker snap_locker(ictx->parent->snap_lock);
1089 *parent_name = ictx->parent->name;
1090 }
1091 if (parent_id) {
1092 *parent_id = ictx->parent->id;
1093 }
1094
1095 return 0;
1096 }
1097
1098 int get_flags(ImageCtx *ictx, uint64_t *flags)
1099 {
1100 int r = ictx->state->refresh_if_required();
1101 if (r < 0) {
1102 return r;
1103 }
1104
1105 RWLock::RLocker l2(ictx->snap_lock);
1106 return ictx->get_flags(ictx->snap_id, flags);
1107 }
1108
1109 int set_image_notification(ImageCtx *ictx, int fd, int type)
1110 {
1111 CephContext *cct = ictx->cct;
1112 ldout(cct, 20) << __func__ << " " << ictx << " fd " << fd << " type" << type << dendl;
1113
1114 int r = ictx->state->refresh_if_required();
1115 if (r < 0) {
1116 return r;
1117 }
1118
1119 if (ictx->event_socket.is_valid())
1120 return -EINVAL;
1121 return ictx->event_socket.init(fd, type);
1122 }
1123
1124 int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner)
1125 {
1126 *is_owner = false;
1127
1128 RWLock::RLocker owner_locker(ictx->owner_lock);
1129 if (ictx->exclusive_lock == nullptr ||
1130 !ictx->exclusive_lock->is_lock_owner()) {
1131 return 0;
1132 }
1133
1134 // might have been blacklisted by peer -- ensure we still own
1135 // the lock by pinging the OSD
1136 int r = ictx->exclusive_lock->assert_header_locked();
31f18b77
FG
1137 if (r == -EBUSY || r == -ENOENT) {
1138 return 0;
1139 } else if (r < 0) {
7c673cae
FG
1140 return r;
1141 }
1142
1143 *is_owner = true;
1144 return 0;
1145 }
1146
1147 int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode)
1148 {
1149 CephContext *cct = ictx->cct;
1150 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1151 << "lock_mode=" << lock_mode << dendl;
1152
1153 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1154 return -EOPNOTSUPP;
1155 }
1156
1157 C_SaferCond lock_ctx;
1158 {
1159 RWLock::WLocker l(ictx->owner_lock);
1160
1161 if (ictx->exclusive_lock == nullptr) {
1162 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1163 return -EINVAL;
1164 }
1165
1166 if (ictx->get_exclusive_lock_policy()->may_auto_request_lock()) {
1167 ictx->set_exclusive_lock_policy(
1168 new exclusive_lock::StandardPolicy(ictx));
1169 }
1170
1171 if (ictx->exclusive_lock->is_lock_owner()) {
1172 return 0;
1173 }
1174
1175 ictx->exclusive_lock->acquire_lock(&lock_ctx);
1176 }
1177
1178 int r = lock_ctx.wait();
1179 if (r < 0) {
1180 lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
1181 << dendl;
1182 return r;
1183 }
1184
1185 RWLock::RLocker l(ictx->owner_lock);
1186
1187 if (ictx->exclusive_lock == nullptr ||
1188 !ictx->exclusive_lock->is_lock_owner()) {
1189 lderr(cct) << "failed to acquire exclusive lock" << dendl;
1190 return -EROFS;
1191 }
1192
1193 return 0;
1194 }
1195
1196 int lock_release(ImageCtx *ictx)
1197 {
1198 CephContext *cct = ictx->cct;
1199 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1200
1201 C_SaferCond lock_ctx;
1202 {
1203 RWLock::WLocker l(ictx->owner_lock);
1204
1205 if (ictx->exclusive_lock == nullptr ||
1206 !ictx->exclusive_lock->is_lock_owner()) {
1207 lderr(cct) << "not exclusive lock owner" << dendl;
1208 return -EINVAL;
1209 }
1210
1211 ictx->exclusive_lock->release_lock(&lock_ctx);
1212 }
1213
1214 int r = lock_ctx.wait();
1215 if (r < 0) {
1216 lderr(cct) << "failed to release exclusive lock: " << cpp_strerror(r)
1217 << dendl;
1218 return r;
1219 }
1220 return 0;
1221 }
1222
1223 int lock_get_owners(ImageCtx *ictx, rbd_lock_mode_t *lock_mode,
1224 std::list<std::string> *lock_owners)
1225 {
1226 CephContext *cct = ictx->cct;
1227 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1228
1229 if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
1230 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1231 return -EINVAL;
1232 }
1233
1234 managed_lock::Locker locker;
1235 C_SaferCond get_owner_ctx;
1236 ExclusiveLock<>(*ictx).get_locker(&locker, &get_owner_ctx);
1237 int r = get_owner_ctx.wait();
1238 if (r == -ENOENT) {
1239 return r;
1240 } else if (r < 0) {
1241 lderr(cct) << "failed to determine current lock owner: "
1242 << cpp_strerror(r) << dendl;
1243 return r;
1244 }
1245
1246 *lock_mode = RBD_LOCK_MODE_EXCLUSIVE;
1247 lock_owners->clear();
1248 lock_owners->emplace_back(locker.address);
1249 return 0;
1250 }
1251
1252 int lock_break(ImageCtx *ictx, rbd_lock_mode_t lock_mode,
1253 const std::string &lock_owner)
1254 {
1255 CephContext *cct = ictx->cct;
1256 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1257 << "lock_mode=" << lock_mode << ", "
1258 << "lock_owner=" << lock_owner << dendl;
1259
1260 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1261 return -EOPNOTSUPP;
1262 }
1263
1264 if (ictx->read_only) {
1265 return -EROFS;
1266 }
1267
1268 managed_lock::Locker locker;
1269 C_SaferCond get_owner_ctx;
1270 {
1271 RWLock::RLocker l(ictx->owner_lock);
1272
1273 if (ictx->exclusive_lock == nullptr) {
1274 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1275 return -EINVAL;
1276 }
1277
1278 ictx->exclusive_lock->get_locker(&locker, &get_owner_ctx);
1279 }
1280 int r = get_owner_ctx.wait();
1281 if (r == -ENOENT) {
1282 return r;
1283 } else if (r < 0) {
1284 lderr(cct) << "failed to determine current lock owner: "
1285 << cpp_strerror(r) << dendl;
1286 return r;
1287 }
1288
1289 if (locker.address != lock_owner) {
1290 return -EBUSY;
1291 }
1292
1293 C_SaferCond break_ctx;
1294 {
1295 RWLock::RLocker l(ictx->owner_lock);
1296
1297 if (ictx->exclusive_lock == nullptr) {
1298 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1299 return -EINVAL;
1300 }
1301
1302 ictx->exclusive_lock->break_lock(locker, true, &break_ctx);
1303 }
1304 r = break_ctx.wait();
1305 if (r == -ENOENT) {
1306 return r;
1307 } else if (r < 0) {
1308 lderr(cct) << "failed to break lock: " << cpp_strerror(r) << dendl;
1309 return r;
1310 }
1311 return 0;
1312 }
1313
1314 int remove(IoCtx& io_ctx, const std::string &image_name,
1315 const std::string &image_id, ProgressContext& prog_ctx,
1316 bool force, bool from_trash_remove)
1317 {
1318 CephContext *cct((CephContext *)io_ctx.cct());
1319 ldout(cct, 20) << "remove " << &io_ctx << " "
1320 << (image_id.empty() ? image_name : image_id) << dendl;
1321
1322 ThreadPool *thread_pool;
1323 ContextWQ *op_work_queue;
1324 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
1325
1326 C_SaferCond cond;
1327 auto req = librbd::image::RemoveRequest<>::create(
1328 io_ctx, image_name, image_id, force, from_trash_remove, prog_ctx,
1329 op_work_queue, &cond);
1330 req->send();
1331
1332 return cond.wait();
1333 }
1334
1335 int trash_move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
1336 const std::string &image_name, uint64_t delay) {
1337 CephContext *cct((CephContext *)io_ctx.cct());
1338 ldout(cct, 20) << "trash_move " << &io_ctx << " " << image_name
1339 << dendl;
1340
1341 std::string image_id;
1342 ImageCtx *ictx = new ImageCtx(image_name, "", nullptr, io_ctx, false);
1343 int r = ictx->state->open(true);
1344 if (r < 0) {
1345 ictx = nullptr;
1346
1347 if (r != -ENOENT) {
1348 ldout(cct, 2) << "error opening image: " << cpp_strerror(-r) << dendl;
1349 return r;
1350 }
1351
1352 // try to get image id from the directory
1353 r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, &image_id);
1354 if (r < 0) {
1355 if (r != -ENOENT) {
1356 ldout(cct, 2) << "error reading image id from dirctory: "
1357 << cpp_strerror(-r) << dendl;
1358 }
1359 return r;
1360 }
1361 } else {
1362 if (ictx->old_format) {
1363 ictx->state->close();
1364 return -EOPNOTSUPP;
1365 }
1366
1367 image_id = ictx->id;
1368 ictx->owner_lock.get_read();
1369 if (ictx->exclusive_lock != nullptr) {
b32b8144
FG
1370 r = ictx->operations->prepare_image_update(false);
1371 if (r < 0) {
7c673cae
FG
1372 lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl;
1373 ictx->owner_lock.put_read();
1374 ictx->state->close();
1375 return -EBUSY;
1376 }
1377 }
1378 }
1379
1380 BOOST_SCOPE_EXIT_ALL(ictx, cct) {
1381 if (ictx == nullptr)
1382 return;
1383
1384 bool is_locked = ictx->exclusive_lock != nullptr &&
1385 ictx->exclusive_lock->is_lock_owner();
1386 if (is_locked) {
1387 C_SaferCond ctx;
31f18b77
FG
1388 auto exclusive_lock = ictx->exclusive_lock;
1389 exclusive_lock->shut_down(&ctx);
7c673cae
FG
1390 ictx->owner_lock.put_read();
1391 int r = ctx.wait();
1392 if (r < 0) {
1393 lderr(cct) << "error shutting down exclusive lock" << dendl;
1394 }
31f18b77 1395 delete exclusive_lock;
7c673cae
FG
1396 } else {
1397 ictx->owner_lock.put_read();
1398 }
1399 ictx->state->close();
1400 };
1401
1402 ldout(cct, 2) << "adding image entry to rbd_trash" << dendl;
1403 utime_t ts = ceph_clock_now();
1404 utime_t deferment_end_time = ts;
1405 deferment_end_time += (double)delay;
1406 cls::rbd::TrashImageSource trash_source =
1407 static_cast<cls::rbd::TrashImageSource>(source);
1408 cls::rbd::TrashImageSpec trash_spec(trash_source, image_name, ts,
1409 deferment_end_time);
1410 r = cls_client::trash_add(&io_ctx, image_id, trash_spec);
1411 if (r < 0 && r != -EEXIST) {
1412 lderr(cct) << "error adding image " << image_name << " to rbd_trash"
1413 << dendl;
1414 return r;
1415 } else if (r == -EEXIST) {
1416 ldout(cct, 10) << "found previous unfinished deferred remove for image:"
1417 << image_id << dendl;
1418 // continue with removing image from directory
1419 }
1420
1421 ldout(cct, 2) << "removing id object..." << dendl;
1422 r = io_ctx.remove(util::id_obj_name(image_name));
1423 if (r < 0 && r != -ENOENT) {
1424 lderr(cct) << "error removing id object: " << cpp_strerror(r)
1425 << dendl;
1426 return r;
1427 }
1428
1429 ldout(cct, 2) << "removing rbd image from v2 directory..." << dendl;
1430 r = cls_client::dir_remove_image(&io_ctx, RBD_DIRECTORY, image_name,
1431 image_id);
1432 if (r < 0) {
1433 if (r != -ENOENT) {
1434 lderr(cct) << "error removing image from v2 directory: "
1435 << cpp_strerror(-r) << dendl;
1436 }
1437 return r;
1438 }
1439
1440 return 0;
1441 }
1442
1443 int trash_get(IoCtx &io_ctx, const std::string &id,
1444 trash_image_info_t *info) {
1445 CephContext *cct((CephContext *)io_ctx.cct());
1446 ldout(cct, 20) << __func__ << " " << &io_ctx << dendl;
1447
1448 cls::rbd::TrashImageSpec spec;
1449 int r = cls_client::trash_get(&io_ctx, id, &spec);
1450 if (r == -ENOENT) {
1451 return r;
1452 } else if (r < 0) {
1453 lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r)
1454 << dendl;
1455 return r;
1456 }
1457
1458 rbd_trash_image_source_t source = static_cast<rbd_trash_image_source_t>(
1459 spec.source);
1460 *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(),
1461 spec.deferment_end_time.sec()};
1462 return 0;
1463 }
1464
1465 int trash_list(IoCtx &io_ctx, vector<trash_image_info_t> &entries) {
1466 CephContext *cct((CephContext *)io_ctx.cct());
1467 ldout(cct, 20) << "trash_list " << &io_ctx << dendl;
1468
c07f9fc5
FG
1469 bool more_entries;
1470 uint32_t max_read = 1024;
1471 std::string last_read = "";
1472 do {
1473 map<string, cls::rbd::TrashImageSpec> trash_entries;
1474 int r = cls_client::trash_list(&io_ctx, last_read, max_read,
1475 &trash_entries);
1476 if (r < 0 && r != -ENOENT) {
1477 lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r)
7c673cae 1478 << dendl;
c07f9fc5
FG
1479 return r;
1480 } else if (r == -ENOENT) {
1481 break;
7c673cae 1482 }
7c673cae 1483
c07f9fc5
FG
1484 if (trash_entries.empty()) {
1485 break;
1486 }
1487
1488 for (const auto &entry : trash_entries) {
1489 rbd_trash_image_source_t source =
1490 static_cast<rbd_trash_image_source_t>(entry.second.source);
1491 entries.push_back({entry.first, entry.second.name, source,
1492 entry.second.deletion_time.sec(),
1493 entry.second.deferment_end_time.sec()});
1494 }
1495 last_read = trash_entries.rbegin()->first;
1496 more_entries = (trash_entries.size() >= max_read);
1497 } while (more_entries);
1498
7c673cae
FG
1499 return 0;
1500 }
1501
1502 int trash_remove(IoCtx &io_ctx, const std::string &image_id, bool force,
1503 ProgressContext& prog_ctx) {
1504 CephContext *cct((CephContext *)io_ctx.cct());
1505 ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id
1506 << " " << force << dendl;
1507
1508 cls::rbd::TrashImageSpec trash_spec;
1509 int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
1510 if (r < 0) {
1511 lderr(cct) << "error getting image id " << image_id
1512 << " info from trash: " << cpp_strerror(r) << dendl;
1513 return r;
1514 }
1515
1516 utime_t now = ceph_clock_now();
1517 if (now < trash_spec.deferment_end_time && !force) {
1518 lderr(cct) << "error: deferment time has not expired." << dendl;
1519 return -EPERM;
1520 }
1521
1522 r = remove(io_ctx, "", image_id, prog_ctx, false, true);
1523 if (r < 0) {
1524 lderr(cct) << "error removing image " << image_id
1525 << ", which is pending deletion" << dendl;
1526 return r;
1527 }
1528 r = cls_client::trash_remove(&io_ctx, image_id);
1529 if (r < 0 && r != -ENOENT) {
1530 lderr(cct) << "error removing image " << image_id
1531 << " from rbd_trash object" << dendl;
1532 return r;
1533 }
1534 return 0;
1535 }
1536
1537 int trash_restore(librados::IoCtx &io_ctx, const std::string &image_id,
1538 const std::string &image_new_name) {
1539 CephContext *cct((CephContext *)io_ctx.cct());
1540 ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " "
1541 << image_new_name << dendl;
1542
1543 cls::rbd::TrashImageSpec trash_spec;
1544 int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
1545 if (r < 0) {
1546 lderr(cct) << "error getting image id " << image_id
1547 << " info from trash: " << cpp_strerror(r) << dendl;
1548 return r;
1549 }
1550
1551 std::string image_name = image_new_name;
1552 if (image_name.empty()) {
1553 // if user didn't specify a new name, let's try using the old name
1554 image_name = trash_spec.name;
1555 ldout(cct, 20) << "restoring image id " << image_id << " with name "
1556 << image_name << dendl;
1557 }
1558
1559 // check if no image exists with the same name
1560 bool create_id_obj = true;
1561 std::string existing_id;
1562 r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id);
1563 if (r < 0 && r != -ENOENT) {
1564 lderr(cct) << "error checking if image " << image_name << " exists: "
1565 << cpp_strerror(r) << dendl;
1566 return r;
1567 } else if (r != -ENOENT){
1568 // checking if we are recovering from an incomplete restore
1569 if (existing_id != image_id) {
1570 ldout(cct, 2) << "an image with the same name already exists" << dendl;
1571 return -EEXIST;
1572 }
1573 create_id_obj = false;
1574 }
1575
1576 if (create_id_obj) {
1577 ldout(cct, 2) << "adding id object" << dendl;
1578 librados::ObjectWriteOperation op;
1579 op.create(true);
1580 cls_client::set_id(&op, image_id);
1581 r = io_ctx.operate(util::id_obj_name(image_name), &op);
1582 if (r < 0) {
1583 lderr(cct) << "error adding id object for image " << image_name
1584 << ": " << cpp_strerror(r) << dendl;
1585 return r;
1586 }
1587 }
1588
1589 ldout(cct, 2) << "adding rbd image from v2 directory..." << dendl;
1590 r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name,
1591 image_id);
1592 if (r < 0 && r != -EEXIST) {
1593 lderr(cct) << "error adding image to v2 directory: "
1594 << cpp_strerror(r) << dendl;
1595 return r;
1596 }
1597
1598 ldout(cct, 2) << "removing image from trash..." << dendl;
1599 r = cls_client::trash_remove(&io_ctx, image_id);
1600 if (r < 0 && r != -ENOENT) {
1601 lderr(cct) << "error removing image id " << image_id << " from trash: "
1602 << cpp_strerror(r) << dendl;
1603 return r;
1604 }
1605
1606 return 0;
1607 }
1608
1609 int snap_list(ImageCtx *ictx, vector<snap_info_t>& snaps)
1610 {
1611 ldout(ictx->cct, 20) << "snap_list " << ictx << dendl;
1612
1613 int r = ictx->state->refresh_if_required();
1614 if (r < 0)
1615 return r;
1616
1617 RWLock::RLocker l(ictx->snap_lock);
1618 for (map<snap_t, SnapInfo>::iterator it = ictx->snap_info.begin();
1619 it != ictx->snap_info.end(); ++it) {
1620 snap_info_t info;
1621 info.name = it->second.name;
1622 info.id = it->first;
1623 info.size = it->second.size;
1624 snaps.push_back(info);
1625 }
1626
1627 return 0;
1628 }
1629
1630 int snap_exists(ImageCtx *ictx, const cls::rbd::SnapshotNamespace& snap_namespace,
1631 const char *snap_name, bool *exists)
1632 {
1633 ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl;
1634
1635 int r = ictx->state->refresh_if_required();
1636 if (r < 0)
1637 return r;
1638
1639 RWLock::RLocker l(ictx->snap_lock);
1640 *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP;
1641 return 0;
1642 }
1643
1644 int snap_remove(ImageCtx *ictx, const char *snap_name, uint32_t flags,
1645 ProgressContext& pctx)
1646 {
1647 ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl;
1648
1649 int r = 0;
1650
1651 r = ictx->state->refresh_if_required();
1652 if (r < 0)
1653 return r;
1654
1655 if (flags & RBD_SNAP_REMOVE_FLATTEN) {
1656 r = flatten_children(ictx, snap_name, pctx);
1657 if (r < 0) {
1658 return r;
1659 }
1660 }
1661
1662 bool is_protected;
1663 r = snap_is_protected(ictx, snap_name, &is_protected);
1664 if (r < 0) {
1665 return r;
1666 }
1667
1668 if (is_protected && flags & RBD_SNAP_REMOVE_UNPROTECT) {
1669 r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name);
1670 if (r < 0) {
1671 lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl;
1672 return r;
1673 }
1674
1675 r = snap_is_protected(ictx, snap_name, &is_protected);
1676 if (r < 0) {
1677 return r;
1678 }
1679 if (is_protected) {
1680 lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl;
1681 ceph_abort();
1682 }
1683 }
1684
1685 C_SaferCond ctx;
1686 ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx);
1687
1688 r = ctx.wait();
1689 return r;
1690 }
1691
1692 int snap_get_timestamp(ImageCtx *ictx, uint64_t snap_id, struct timespec *timestamp)
1693 {
1694 std::map<librados::snap_t, SnapInfo>::iterator snap_it = ictx->snap_info.find(snap_id);
1695 assert(snap_it != ictx->snap_info.end());
1696 utime_t time = snap_it->second.timestamp;
1697 time.to_timespec(timestamp);
1698 return 0;
1699 }
1700
1701 int snap_get_limit(ImageCtx *ictx, uint64_t *limit)
1702 {
1703 int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid,
1704 limit);
1705 if (r == -EOPNOTSUPP) {
1706 *limit = UINT64_MAX;
1707 r = 0;
1708 }
1709 return r;
1710 }
1711
1712 int snap_set_limit(ImageCtx *ictx, uint64_t limit)
1713 {
1714 return ictx->operations->snap_set_limit(limit);
1715 }
1716
1717 struct CopyProgressCtx {
1718 explicit CopyProgressCtx(ProgressContext &p)
1719 : destictx(NULL), src_size(0), prog_ctx(p)
1720 { }
1721
1722 ImageCtx *destictx;
1723 uint64_t src_size;
1724 ProgressContext &prog_ctx;
1725 };
1726
1727 int copy(ImageCtx *src, IoCtx& dest_md_ctx, const char *destname,
1728 ImageOptions& opts, ProgressContext &prog_ctx, size_t sparse_size)
1729 {
1730 CephContext *cct = (CephContext *)dest_md_ctx.cct();
1731 ldout(cct, 20) << "copy " << src->name
1732 << (src->snap_name.length() ? "@" + src->snap_name : "")
1733 << " -> " << destname << " opts = " << opts << dendl;
1734
1735 src->snap_lock.get_read();
1736 uint64_t features = src->features;
1737 uint64_t src_size = src->get_image_size(src->snap_id);
1738 src->snap_lock.put_read();
1739 uint64_t format = src->old_format ? 1 : 2;
1740 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
1741 opts.set(RBD_IMAGE_OPTION_FORMAT, format);
1742 }
1743 uint64_t stripe_unit = src->stripe_unit;
1744 if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
1745 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
1746 }
1747 uint64_t stripe_count = src->stripe_count;
1748 if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
1749 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
1750 }
1751 uint64_t order = src->order;
1752 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
1753 opts.set(RBD_IMAGE_OPTION_ORDER, order);
1754 }
1755 if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) {
1756 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
1757 }
1758 if (features & ~RBD_FEATURES_ALL) {
1759 lderr(cct) << "librbd does not support requested features" << dendl;
1760 return -ENOSYS;
1761 }
1762
1763 int r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false);
1764 if (r < 0) {
1765 lderr(cct) << "header creation failed" << dendl;
1766 return r;
1767 }
1768 opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order));
1769
1770 ImageCtx *dest = new librbd::ImageCtx(destname, "", NULL,
1771 dest_md_ctx, false);
1772 r = dest->state->open(false);
1773 if (r < 0) {
1774 lderr(cct) << "failed to read newly created header" << dendl;
1775 return r;
1776 }
1777
1778 r = copy(src, dest, prog_ctx, sparse_size);
1779
1780 int close_r = dest->state->close();
1781 if (r == 0 && close_r < 0) {
1782 r = close_r;
1783 }
1784 return r;
1785 }
1786
1787 class C_CopyWrite : public Context {
1788 public:
1789 C_CopyWrite(bufferlist *bl, Context* ctx)
1790 : m_bl(bl), m_ctx(ctx) {}
1791 void finish(int r) override {
1792 delete m_bl;
1793 m_ctx->complete(r);
1794 }
1795 private:
1796 bufferlist *m_bl;
1797 Context *m_ctx;
1798 };
1799
1800 class C_CopyRead : public Context {
1801 public:
1802 C_CopyRead(SimpleThrottle *throttle, ImageCtx *dest, uint64_t offset,
1803 bufferlist *bl, size_t sparse_size)
1804 : m_throttle(throttle), m_dest(dest), m_offset(offset), m_bl(bl),
1805 m_sparse_size(sparse_size) {
1806 m_throttle->start_op();
1807 }
1808 void finish(int r) override {
1809 if (r < 0) {
1810 lderr(m_dest->cct) << "error reading from source image at offset "
1811 << m_offset << ": " << cpp_strerror(r) << dendl;
1812 delete m_bl;
1813 m_throttle->end_op(r);
1814 return;
1815 }
1816 assert(m_bl->length() == (size_t)r);
1817
1818 if (m_bl->is_zero()) {
1819 delete m_bl;
1820 m_throttle->end_op(r);
1821 return;
1822 }
1823
1824 if (!m_sparse_size) {
1825 m_sparse_size = (1 << m_dest->order);
1826 }
1827
1828 auto *throttle = m_throttle;
1829 auto *end_op_ctx = new FunctionContext([throttle](int r) {
1830 throttle->end_op(r);
1831 });
1832 auto gather_ctx = new C_Gather(m_dest->cct, end_op_ctx);
1833
1834 bufferptr m_ptr(m_bl->length());
1835 m_bl->rebuild(m_ptr);
1836 size_t write_offset = 0;
1837 size_t write_length = 0;
1838 size_t offset = 0;
1839 size_t length = m_bl->length();
1840 while (offset < length) {
1841 if (util::calc_sparse_extent(m_ptr,
1842 m_sparse_size,
1843 length,
1844 &write_offset,
1845 &write_length,
1846 &offset)) {
1847 bufferptr write_ptr(m_ptr, write_offset, write_length);
1848 bufferlist *write_bl = new bufferlist();
1849 write_bl->push_back(write_ptr);
1850 Context *ctx = new C_CopyWrite(write_bl, gather_ctx->new_sub());
1851 auto comp = io::AioCompletion::create(ctx);
1852
1853 // coordinate through AIO WQ to ensure lock is acquired if needed
1854 m_dest->io_work_queue->aio_write(comp, m_offset + write_offset,
1855 write_length,
1856 std::move(*write_bl),
31f18b77
FG
1857 LIBRADOS_OP_FLAG_FADVISE_DONTNEED,
1858 std::move(read_trace));
7c673cae
FG
1859 write_offset = offset;
1860 write_length = 0;
1861 }
1862 }
1863 delete m_bl;
1864 assert(gather_ctx->get_sub_created_count() > 0);
1865 gather_ctx->activate();
1866 }
1867
31f18b77
FG
1868 ZTracer::Trace read_trace;
1869
7c673cae
FG
1870 private:
1871 SimpleThrottle *m_throttle;
1872 ImageCtx *m_dest;
1873 uint64_t m_offset;
1874 bufferlist *m_bl;
1875 size_t m_sparse_size;
1876 };
1877
1878 int copy(ImageCtx *src, ImageCtx *dest, ProgressContext &prog_ctx, size_t sparse_size)
1879 {
1880 src->snap_lock.get_read();
1881 uint64_t src_size = src->get_image_size(src->snap_id);
1882 src->snap_lock.put_read();
1883
1884 dest->snap_lock.get_read();
1885 uint64_t dest_size = dest->get_image_size(dest->snap_id);
1886 dest->snap_lock.put_read();
1887
1888 CephContext *cct = src->cct;
1889 if (dest_size < src_size) {
1890 lderr(cct) << " src size " << src_size << " > dest size "
1891 << dest_size << dendl;
1892 return -EINVAL;
1893 }
1894 int r;
b32b8144 1895 const uint32_t MAX_KEYS = 64;
7c673cae 1896 map<string, bufferlist> pairs;
b32b8144
FG
1897 std::string last_key = "";
1898 bool more_results = true;
7c673cae 1899
b32b8144
FG
1900 while (more_results) {
1901 r = cls_client::metadata_list(&src->md_ctx, src->header_oid, last_key, 0, &pairs);
1902 if (r < 0 && r != -EOPNOTSUPP && r != -EIO) {
1903 lderr(cct) << "couldn't list metadata: " << cpp_strerror(r) << dendl;
7c673cae 1904 return r;
b32b8144
FG
1905 } else if (r == 0 && !pairs.empty()) {
1906 r = cls_client::metadata_set(&dest->md_ctx, dest->header_oid, pairs);
1907 if (r < 0) {
1908 lderr(cct) << "couldn't set metadata: " << cpp_strerror(r) << dendl;
1909 return r;
1910 }
1911
1912 last_key = pairs.rbegin()->first;
7c673cae 1913 }
b32b8144
FG
1914
1915 more_results = (pairs.size() == MAX_KEYS);
1916 pairs.clear();
7c673cae
FG
1917 }
1918
31f18b77 1919 ZTracer::Trace trace;
181888fb 1920 if (src->blkin_trace_all) {
31f18b77
FG
1921 trace.init("copy", &src->trace_endpoint);
1922 }
1923
7c673cae
FG
1924 RWLock::RLocker owner_lock(src->owner_lock);
1925 SimpleThrottle throttle(src->concurrent_management_ops, false);
1926 uint64_t period = src->get_stripe_period();
31f18b77
FG
1927 unsigned fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
1928 LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
7c673cae
FG
1929 for (uint64_t offset = 0; offset < src_size; offset += period) {
1930 if (throttle.pending_error()) {
1931 return throttle.wait_for_ret();
1932 }
1933
1934 uint64_t len = min(period, src_size - offset);
1935 bufferlist *bl = new bufferlist();
31f18b77
FG
1936 auto ctx = new C_CopyRead(&throttle, dest, offset, bl, sparse_size);
1937 auto comp = io::AioCompletion::create_and_start<Context>(
1938 ctx, src, io::AIO_TYPE_READ);
1939
1940 io::ImageReadRequest<> req(*src, comp, {{offset, len}},
1941 io::ReadResult{bl}, fadvise_flags,
1942 std::move(trace));
1943 ctx->read_trace = req.get_trace();
1944
1945 req.send();
7c673cae
FG
1946 prog_ctx.update_progress(offset, src_size);
1947 }
1948
1949 r = throttle.wait_for_ret();
1950 if (r >= 0)
1951 prog_ctx.update_progress(src_size, src_size);
1952 return r;
1953 }
1954
1955 int snap_set(ImageCtx *ictx, const cls::rbd::SnapshotNamespace &snap_namespace,
1956 const char *snap_name)
1957 {
1958 ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = "
1959 << (snap_name ? snap_name : "NULL") << dendl;
1960
1961 // ignore return value, since we may be set to a non-existent
1962 // snapshot and the user is trying to fix that
1963 ictx->state->refresh_if_required();
1964
1965 C_SaferCond ctx;
1966 std::string name(snap_name == nullptr ? "" : snap_name);
1967 ictx->state->snap_set(snap_namespace, name, &ctx);
1968
1969 int r = ctx.wait();
1970 if (r < 0) {
1971 if (r != -ENOENT) {
1972 lderr(ictx->cct) << "failed to " << (name.empty() ? "un" : "") << "set "
1973 << "snapshot: " << cpp_strerror(r) << dendl;
1974 }
1975 return r;
1976 }
1977
1978 return 0;
1979 }
1980
1981 int list_lockers(ImageCtx *ictx,
1982 std::list<locker_t> *lockers,
1983 bool *exclusive,
1984 string *tag)
1985 {
1986 ldout(ictx->cct, 20) << "list_locks on image " << ictx << dendl;
1987
1988 int r = ictx->state->refresh_if_required();
1989 if (r < 0)
1990 return r;
1991
1992 RWLock::RLocker locker(ictx->md_lock);
1993 if (exclusive)
1994 *exclusive = ictx->exclusive_locked;
1995 if (tag)
1996 *tag = ictx->lock_tag;
1997 if (lockers) {
1998 lockers->clear();
1999 map<rados::cls::lock::locker_id_t,
2000 rados::cls::lock::locker_info_t>::const_iterator it;
2001 for (it = ictx->lockers.begin(); it != ictx->lockers.end(); ++it) {
2002 locker_t locker;
2003 locker.client = stringify(it->first.locker);
2004 locker.cookie = it->first.cookie;
2005 locker.address = stringify(it->second.addr);
2006 lockers->push_back(locker);
2007 }
2008 }
2009
2010 return 0;
2011 }
2012
2013 int lock(ImageCtx *ictx, bool exclusive, const string& cookie,
2014 const string& tag)
2015 {
2016 ldout(ictx->cct, 20) << "lock image " << ictx << " exclusive=" << exclusive
2017 << " cookie='" << cookie << "' tag='" << tag << "'"
2018 << dendl;
2019
2020 int r = ictx->state->refresh_if_required();
2021 if (r < 0)
2022 return r;
2023
2024 /**
2025 * If we wanted we could do something more intelligent, like local
2026 * checks that we think we will succeed. But for now, let's not
2027 * duplicate that code.
2028 */
2029 {
2030 RWLock::RLocker locker(ictx->md_lock);
2031 r = rados::cls::lock::lock(&ictx->md_ctx, ictx->header_oid, RBD_LOCK_NAME,
2032 exclusive ? LOCK_EXCLUSIVE : LOCK_SHARED,
2033 cookie, tag, "", utime_t(), 0);
2034 if (r < 0) {
2035 return r;
2036 }
2037 }
2038
2039 ictx->notify_update();
2040 return 0;
2041 }
2042
2043 int unlock(ImageCtx *ictx, const string& cookie)
2044 {
2045 ldout(ictx->cct, 20) << "unlock image " << ictx
2046 << " cookie='" << cookie << "'" << dendl;
2047
2048 int r = ictx->state->refresh_if_required();
2049 if (r < 0)
2050 return r;
2051
2052 {
2053 RWLock::RLocker locker(ictx->md_lock);
2054 r = rados::cls::lock::unlock(&ictx->md_ctx, ictx->header_oid,
2055 RBD_LOCK_NAME, cookie);
2056 if (r < 0) {
2057 return r;
2058 }
2059 }
2060
2061 ictx->notify_update();
2062 return 0;
2063 }
2064
2065 int break_lock(ImageCtx *ictx, const string& client,
2066 const string& cookie)
2067 {
2068 ldout(ictx->cct, 20) << "break_lock image " << ictx << " client='" << client
2069 << "' cookie='" << cookie << "'" << dendl;
2070
2071 int r = ictx->state->refresh_if_required();
2072 if (r < 0)
2073 return r;
2074
2075 entity_name_t lock_client;
2076 if (!lock_client.parse(client)) {
2077 lderr(ictx->cct) << "Unable to parse client '" << client
2078 << "'" << dendl;
2079 return -EINVAL;
2080 }
2081
2082 if (ictx->blacklist_on_break_lock) {
2083 typedef std::map<rados::cls::lock::locker_id_t,
2084 rados::cls::lock::locker_info_t> Lockers;
2085 Lockers lockers;
2086 ClsLockType lock_type;
2087 std::string lock_tag;
2088 r = rados::cls::lock::get_lock_info(&ictx->md_ctx, ictx->header_oid,
2089 RBD_LOCK_NAME, &lockers, &lock_type,
2090 &lock_tag);
2091 if (r < 0) {
2092 lderr(ictx->cct) << "unable to retrieve lock info: " << cpp_strerror(r)
2093 << dendl;
2094 return r;
2095 }
2096
2097 std::string client_address;
2098 for (Lockers::iterator it = lockers.begin();
2099 it != lockers.end(); ++it) {
2100 if (it->first.locker == lock_client) {
2101 client_address = stringify(it->second.addr);
2102 break;
2103 }
2104 }
2105 if (client_address.empty()) {
2106 return -ENOENT;
2107 }
2108
2109 RWLock::RLocker locker(ictx->md_lock);
2110 librados::Rados rados(ictx->md_ctx);
2111 r = rados.blacklist_add(client_address,
2112 ictx->blacklist_expire_seconds);
2113 if (r < 0) {
2114 lderr(ictx->cct) << "unable to blacklist client: " << cpp_strerror(r)
2115 << dendl;
2116 return r;
2117 }
2118 }
2119
2120 r = rados::cls::lock::break_lock(&ictx->md_ctx, ictx->header_oid,
2121 RBD_LOCK_NAME, cookie, lock_client);
2122 if (r < 0)
2123 return r;
2124 ictx->notify_update();
2125 return 0;
2126 }
2127
2128 void rbd_ctx_cb(completion_t cb, void *arg)
2129 {
2130 Context *ctx = reinterpret_cast<Context *>(arg);
2131 auto comp = reinterpret_cast<io::AioCompletion *>(cb);
2132 ctx->complete(comp->get_return_value());
2133 comp->release();
2134 }
2135
2136 int64_t read_iterate(ImageCtx *ictx, uint64_t off, uint64_t len,
2137 int (*cb)(uint64_t, size_t, const char *, void *),
2138 void *arg)
2139 {
2140 utime_t start_time, elapsed;
2141
2142 ldout(ictx->cct, 20) << "read_iterate " << ictx << " off = " << off
2143 << " len = " << len << dendl;
2144
2145 int r = ictx->state->refresh_if_required();
2146 if (r < 0)
2147 return r;
2148
2149 uint64_t mylen = len;
2150 ictx->snap_lock.get_read();
2151 r = clip_io(ictx, off, &mylen);
2152 ictx->snap_lock.put_read();
2153 if (r < 0)
2154 return r;
2155
2156 int64_t total_read = 0;
2157 uint64_t period = ictx->get_stripe_period();
2158 uint64_t left = mylen;
2159
31f18b77 2160 ZTracer::Trace trace;
181888fb 2161 if (ictx->blkin_trace_all) {
31f18b77
FG
2162 trace.init("read_iterate", &ictx->trace_endpoint);
2163 }
2164
7c673cae
FG
2165 RWLock::RLocker owner_locker(ictx->owner_lock);
2166 start_time = ceph_clock_now();
2167 while (left > 0) {
2168 uint64_t period_off = off - (off % period);
2169 uint64_t read_len = min(period_off + period - off, left);
2170
2171 bufferlist bl;
2172
2173 C_SaferCond ctx;
2174 auto c = io::AioCompletion::create_and_start(&ctx, ictx,
2175 io::AIO_TYPE_READ);
2176 io::ImageRequest<>::aio_read(ictx, c, {{off, read_len}},
31f18b77 2177 io::ReadResult{&bl}, 0, std::move(trace));
7c673cae
FG
2178
2179 int ret = ctx.wait();
2180 if (ret < 0) {
2181 return ret;
2182 }
2183
2184 r = cb(total_read, ret, bl.c_str(), arg);
2185 if (r < 0) {
2186 return r;
2187 }
2188
2189 total_read += ret;
2190 left -= ret;
2191 off += ret;
2192 }
2193
2194 elapsed = ceph_clock_now() - start_time;
2195 ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed);
2196 ictx->perfcounter->inc(l_librbd_rd);
2197 ictx->perfcounter->inc(l_librbd_rd_bytes, mylen);
2198 return total_read;
2199 }
2200
2201 // validate extent against image size; clip to image size if necessary
2202 int clip_io(ImageCtx *ictx, uint64_t off, uint64_t *len)
2203 {
2204 assert(ictx->snap_lock.is_locked());
2205 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
2206 bool snap_exists = ictx->snap_exists;
2207
2208 if (!snap_exists)
2209 return -ENOENT;
2210
2211 // special-case "len == 0" requests: always valid
2212 if (*len == 0)
2213 return 0;
2214
2215 // can't start past end
2216 if (off >= image_size)
2217 return -EINVAL;
2218
2219 // clip requests that extend past end to just end
2220 if ((off + *len) > image_size)
2221 *len = (size_t)(image_size - off);
2222
2223 return 0;
2224 }
2225
2226 int flush(ImageCtx *ictx)
2227 {
2228 CephContext *cct = ictx->cct;
2229 ldout(cct, 20) << "flush " << ictx << dendl;
2230
2231 int r = ictx->state->refresh_if_required();
2232 if (r < 0) {
2233 return r;
2234 }
2235
2236 ictx->user_flushed();
2237 C_SaferCond ctx;
2238 {
2239 RWLock::RLocker owner_locker(ictx->owner_lock);
2240 ictx->flush(&ctx);
2241 }
2242 r = ctx.wait();
2243
2244 ictx->perfcounter->inc(l_librbd_flush);
2245 return r;
2246 }
2247
2248 int invalidate_cache(ImageCtx *ictx)
2249 {
2250 CephContext *cct = ictx->cct;
2251 ldout(cct, 20) << "invalidate_cache " << ictx << dendl;
2252
2253 int r = ictx->state->refresh_if_required();
2254 if (r < 0) {
2255 return r;
2256 }
2257
2258 RWLock::RLocker owner_locker(ictx->owner_lock);
7c673cae
FG
2259 r = ictx->invalidate_cache(false);
2260 ictx->perfcounter->inc(l_librbd_invalidate_cache);
2261 return r;
2262 }
2263
2264 int poll_io_events(ImageCtx *ictx, io::AioCompletion **comps, int numcomp)
2265 {
2266 if (numcomp <= 0)
2267 return -EINVAL;
2268 CephContext *cct = ictx->cct;
2269 ldout(cct, 20) << __func__ << " " << ictx << " numcomp = " << numcomp
2270 << dendl;
2271 int i = 0;
2272 Mutex::Locker l(ictx->completed_reqs_lock);
2273 while (i < numcomp) {
2274 if (ictx->completed_reqs.empty())
2275 break;
2276 comps[i++] = ictx->completed_reqs.front();
2277 ictx->completed_reqs.pop_front();
2278 }
2279 return i;
2280 }
2281
2282 int metadata_get(ImageCtx *ictx, const string &key, string *value)
2283 {
2284 CephContext *cct = ictx->cct;
2285 ldout(cct, 20) << "metadata_get " << ictx << " key=" << key << dendl;
2286
2287 int r = ictx->state->refresh_if_required();
2288 if (r < 0) {
2289 return r;
2290 }
2291
2292 return cls_client::metadata_get(&ictx->md_ctx, ictx->header_oid, key, value);
2293 }
2294
2295 int metadata_list(ImageCtx *ictx, const string &start, uint64_t max, map<string, bufferlist> *pairs)
2296 {
2297 CephContext *cct = ictx->cct;
2298 ldout(cct, 20) << "metadata_list " << ictx << dendl;
2299
2300 int r = ictx->state->refresh_if_required();
2301 if (r < 0) {
2302 return r;
2303 }
2304
2305 return cls_client::metadata_list(&ictx->md_ctx, ictx->header_oid, start, max, pairs);
2306 }
2307
2308 struct C_RBD_Readahead : public Context {
2309 ImageCtx *ictx;
2310 object_t oid;
2311 uint64_t offset;
2312 uint64_t length;
2313 C_RBD_Readahead(ImageCtx *ictx, object_t oid, uint64_t offset, uint64_t length)
2314 : ictx(ictx), oid(oid), offset(offset), length(length) { }
2315 void finish(int r) override {
2316 ldout(ictx->cct, 20) << "C_RBD_Readahead on " << oid << ": " << offset << "+" << length << dendl;
2317 ictx->readahead.dec_pending();
2318 }
2319 };
2320
2321 void readahead(ImageCtx *ictx,
2322 const vector<pair<uint64_t,uint64_t> >& image_extents)
2323 {
2324 uint64_t total_bytes = 0;
2325 for (vector<pair<uint64_t,uint64_t> >::const_iterator p = image_extents.begin();
2326 p != image_extents.end();
2327 ++p) {
2328 total_bytes += p->second;
2329 }
2330
2331 ictx->md_lock.get_write();
2332 bool abort = ictx->readahead_disable_after_bytes != 0 &&
2333 ictx->total_bytes_read > ictx->readahead_disable_after_bytes;
2334 if (abort) {
2335 ictx->md_lock.put_write();
2336 return;
2337 }
2338 ictx->total_bytes_read += total_bytes;
2339 ictx->snap_lock.get_read();
2340 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
2341 ictx->snap_lock.put_read();
2342 ictx->md_lock.put_write();
2343
2344 pair<uint64_t, uint64_t> readahead_extent = ictx->readahead.update(image_extents, image_size);
2345 uint64_t readahead_offset = readahead_extent.first;
2346 uint64_t readahead_length = readahead_extent.second;
2347
2348 if (readahead_length > 0) {
2349 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" << readahead_length << dendl;
2350 map<object_t,vector<ObjectExtent> > readahead_object_extents;
2351 Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
2352 readahead_offset, readahead_length, 0, readahead_object_extents);
2353 for (map<object_t,vector<ObjectExtent> >::iterator p = readahead_object_extents.begin(); p != readahead_object_extents.end(); ++p) {
2354 for (vector<ObjectExtent>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
2355 ldout(ictx->cct, 20) << "(readahead) oid " << q->oid << " " << q->offset << "~" << q->length << dendl;
2356
2357 Context *req_comp = new C_RBD_Readahead(ictx, q->oid, q->offset, q->length);
2358 ictx->readahead.inc_pending();
2359 ictx->aio_read_from_cache(q->oid, q->objectno, NULL,
2360 q->length, q->offset,
31f18b77 2361 req_comp, 0, nullptr);
7c673cae
FG
2362 }
2363 }
2364 ictx->perfcounter->inc(l_librbd_readahead);
2365 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
2366 }
2367 }
2368
2369
2370
2371}