]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/internal.cc
70c706c6d4beec42e45430c0e2f894f157ad6363
[ceph.git] / ceph / src / librbd / internal.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "include/int_types.h"
4
5 #include <errno.h>
6 #include <limits.h>
7
8 #include "include/types.h"
9 #include "include/uuid.h"
10 #include "common/ceph_context.h"
11 #include "common/dout.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "common/event_socket.h"
15 #include "cls/lock/cls_lock_client.h"
16 #include "include/stringify.h"
17
18 #include "cls/rbd/cls_rbd.h"
19 #include "cls/rbd/cls_rbd_types.h"
20 #include "cls/rbd/cls_rbd_client.h"
21 #include "cls/journal/cls_journal_types.h"
22 #include "cls/journal/cls_journal_client.h"
23
24 #include "librbd/ExclusiveLock.h"
25 #include "librbd/ImageCtx.h"
26 #include "librbd/ImageState.h"
27 #include "librbd/internal.h"
28 #include "librbd/Journal.h"
29 #include "librbd/ObjectMap.h"
30 #include "librbd/Operations.h"
31 #include "librbd/Types.h"
32 #include "librbd/Utils.h"
33 #include "librbd/api/Image.h"
34 #include "librbd/exclusive_lock/AutomaticPolicy.h"
35 #include "librbd/exclusive_lock/StandardPolicy.h"
36 #include "librbd/image/CloneRequest.h"
37 #include "librbd/image/CreateRequest.h"
38 #include "librbd/image/RemoveRequest.h"
39 #include "librbd/io/AioCompletion.h"
40 #include "librbd/io/ImageRequest.h"
41 #include "librbd/io/ImageRequestWQ.h"
42 #include "librbd/io/ObjectRequest.h"
43 #include "librbd/io/ReadResult.h"
44 #include "librbd/journal/Types.h"
45 #include "librbd/managed_lock/Types.h"
46 #include "librbd/mirror/EnableRequest.h"
47 #include "librbd/operation/TrimRequest.h"
48
49 #include "journal/Journaler.h"
50
51 #include <boost/scope_exit.hpp>
52 #include <boost/variant.hpp>
53 #include "include/assert.h"
54
55 #define dout_subsys ceph_subsys_rbd
56 #undef dout_prefix
57 #define dout_prefix *_dout << "librbd: "
58
59 #define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
60
61 using std::map;
62 using std::pair;
63 using std::set;
64 using std::string;
65 using std::vector;
66 // list binds to list() here, so std::list is explicitly used below
67
68 using ceph::bufferlist;
69 using librados::snap_t;
70 using librados::IoCtx;
71 using librados::Rados;
72
73 namespace librbd {
74
75 namespace {
76
77 int validate_pool(IoCtx &io_ctx, CephContext *cct) {
78 if (!cct->_conf->rbd_validate_pool) {
79 return 0;
80 }
81
82 int r = io_ctx.stat(RBD_DIRECTORY, NULL, NULL);
83 if (r == 0) {
84 return 0;
85 } else if (r < 0 && r != -ENOENT) {
86 lderr(cct) << "failed to stat RBD directory: " << cpp_strerror(r) << dendl;
87 return r;
88 }
89
90 // allocate a self-managed snapshot id if this a new pool to force
91 // self-managed snapshot mode
92 uint64_t snap_id;
93 r = io_ctx.selfmanaged_snap_create(&snap_id);
94 if (r == -EINVAL) {
95 lderr(cct) << "pool not configured for self-managed RBD snapshot support"
96 << dendl;
97 return r;
98 } else if (r < 0) {
99 lderr(cct) << "failed to allocate self-managed snapshot: "
100 << cpp_strerror(r) << dendl;
101 return r;
102 }
103
104 r = io_ctx.selfmanaged_snap_remove(snap_id);
105 if (r < 0) {
106 lderr(cct) << "failed to release self-managed snapshot " << snap_id
107 << ": " << cpp_strerror(r) << dendl;
108 }
109 return 0;
110 }
111
112
113 } // anonymous namespace
114
115 int detect_format(IoCtx &io_ctx, const string &name,
116 bool *old_format, uint64_t *size)
117 {
118 CephContext *cct = (CephContext *)io_ctx.cct();
119 if (old_format)
120 *old_format = true;
121 int r = io_ctx.stat(util::old_header_name(name), size, NULL);
122 if (r == -ENOENT) {
123 if (old_format)
124 *old_format = false;
125 r = io_ctx.stat(util::id_obj_name(name), size, NULL);
126 if (r < 0)
127 return r;
128 } else if (r < 0) {
129 return r;
130 }
131
132 ldout(cct, 20) << "detect format of " << name << " : "
133 << (old_format ? (*old_format ? "old" : "new") :
134 "don't care") << dendl;
135 return 0;
136 }
137
138 bool has_parent(int64_t parent_pool_id, uint64_t off, uint64_t overlap)
139 {
140 return (parent_pool_id != -1 && off <= overlap);
141 }
142
143 void init_rbd_header(struct rbd_obj_header_ondisk& ondisk,
144 uint64_t size, int order, uint64_t bid)
145 {
146 uint32_t hi = bid >> 32;
147 uint32_t lo = bid & 0xFFFFFFFF;
148 uint32_t extra = rand() % 0xFFFFFFFF;
149 memset(&ondisk, 0, sizeof(ondisk));
150
151 memcpy(&ondisk.text, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT));
152 memcpy(&ondisk.signature, RBD_HEADER_SIGNATURE,
153 sizeof(RBD_HEADER_SIGNATURE));
154 memcpy(&ondisk.version, RBD_HEADER_VERSION, sizeof(RBD_HEADER_VERSION));
155
156 snprintf(ondisk.block_name, sizeof(ondisk.block_name), "rb.%x.%x.%x",
157 hi, lo, extra);
158
159 ondisk.image_size = size;
160 ondisk.options.order = order;
161 ondisk.options.crypt_type = RBD_CRYPT_NONE;
162 ondisk.options.comp_type = RBD_COMP_NONE;
163 ondisk.snap_seq = 0;
164 ondisk.snap_count = 0;
165 ondisk.reserved = 0;
166 ondisk.snap_names_len = 0;
167 }
168
169 void image_info(ImageCtx *ictx, image_info_t& info, size_t infosize)
170 {
171 int obj_order = ictx->order;
172 ictx->snap_lock.get_read();
173 info.size = ictx->get_image_size(ictx->snap_id);
174 ictx->snap_lock.put_read();
175 info.obj_size = 1ULL << obj_order;
176 info.num_objs = Striper::get_num_objects(ictx->layout, info.size);
177 info.order = obj_order;
178 strncpy(info.block_name_prefix, ictx->object_prefix.c_str(),
179 RBD_MAX_BLOCK_NAME_SIZE);
180 info.block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE - 1] = '\0';
181
182 // clear deprecated fields
183 info.parent_pool = -1L;
184 info.parent_name[0] = '\0';
185 }
186
187 uint64_t oid_to_object_no(const string& oid, const string& object_prefix)
188 {
189 istringstream iss(oid);
190 // skip object prefix and separator
191 iss.ignore(object_prefix.length() + 1);
192 uint64_t num;
193 iss >> std::hex >> num;
194 return num;
195 }
196
197 void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx)
198 {
199 assert(ictx->owner_lock.is_locked());
200 assert(ictx->exclusive_lock == nullptr ||
201 ictx->exclusive_lock->is_lock_owner());
202
203 C_SaferCond ctx;
204 ictx->snap_lock.get_read();
205 operation::TrimRequest<> *req = operation::TrimRequest<>::create(
206 *ictx, &ctx, ictx->size, newsize, prog_ctx);
207 ictx->snap_lock.put_read();
208 req->send();
209
210 int r = ctx.wait();
211 if (r < 0) {
212 lderr(ictx->cct) << "warning: failed to remove some object(s): "
213 << cpp_strerror(r) << dendl;
214 }
215 }
216
217 int read_header_bl(IoCtx& io_ctx, const string& header_oid,
218 bufferlist& header, uint64_t *ver)
219 {
220 int r;
221 uint64_t off = 0;
222 #define READ_SIZE 4096
223 do {
224 bufferlist bl;
225 r = io_ctx.read(header_oid, bl, READ_SIZE, off);
226 if (r < 0)
227 return r;
228 header.claim_append(bl);
229 off += r;
230 } while (r == READ_SIZE);
231
232 if (header.length() < sizeof(RBD_HEADER_TEXT) ||
233 memcmp(RBD_HEADER_TEXT, header.c_str(), sizeof(RBD_HEADER_TEXT))) {
234 CephContext *cct = (CephContext *)io_ctx.cct();
235 lderr(cct) << "unrecognized header format" << dendl;
236 return -ENXIO;
237 }
238
239 if (ver)
240 *ver = io_ctx.get_last_version();
241
242 return 0;
243 }
244
245 int read_header(IoCtx& io_ctx, const string& header_oid,
246 struct rbd_obj_header_ondisk *header, uint64_t *ver)
247 {
248 bufferlist header_bl;
249 int r = read_header_bl(io_ctx, header_oid, header_bl, ver);
250 if (r < 0)
251 return r;
252 if (header_bl.length() < (int)sizeof(*header))
253 return -EIO;
254 memcpy(header, header_bl.c_str(), sizeof(*header));
255
256 return 0;
257 }
258
259 int tmap_set(IoCtx& io_ctx, const string& imgname)
260 {
261 bufferlist cmdbl, emptybl;
262 __u8 c = CEPH_OSD_TMAP_SET;
263 ::encode(c, cmdbl);
264 ::encode(imgname, cmdbl);
265 ::encode(emptybl, cmdbl);
266 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
267 }
268
269 int tmap_rm(IoCtx& io_ctx, const string& imgname)
270 {
271 bufferlist cmdbl;
272 __u8 c = CEPH_OSD_TMAP_RM;
273 ::encode(c, cmdbl);
274 ::encode(imgname, cmdbl);
275 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
276 }
277
278 typedef boost::variant<std::string,uint64_t> image_option_value_t;
279 typedef std::map<int,image_option_value_t> image_options_t;
280 typedef std::shared_ptr<image_options_t> image_options_ref;
281
282 enum image_option_type_t {
283 STR,
284 UINT64,
285 };
286
287 const std::map<int, image_option_type_t> IMAGE_OPTIONS_TYPE_MAPPING = {
288 {RBD_IMAGE_OPTION_FORMAT, UINT64},
289 {RBD_IMAGE_OPTION_FEATURES, UINT64},
290 {RBD_IMAGE_OPTION_ORDER, UINT64},
291 {RBD_IMAGE_OPTION_STRIPE_UNIT, UINT64},
292 {RBD_IMAGE_OPTION_STRIPE_COUNT, UINT64},
293 {RBD_IMAGE_OPTION_JOURNAL_ORDER, UINT64},
294 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, UINT64},
295 {RBD_IMAGE_OPTION_JOURNAL_POOL, STR},
296 {RBD_IMAGE_OPTION_FEATURES_SET, UINT64},
297 {RBD_IMAGE_OPTION_FEATURES_CLEAR, UINT64},
298 {RBD_IMAGE_OPTION_DATA_POOL, STR},
299 };
300
301 std::string image_option_name(int optname) {
302 switch (optname) {
303 case RBD_IMAGE_OPTION_FORMAT:
304 return "format";
305 case RBD_IMAGE_OPTION_FEATURES:
306 return "features";
307 case RBD_IMAGE_OPTION_ORDER:
308 return "order";
309 case RBD_IMAGE_OPTION_STRIPE_UNIT:
310 return "stripe_unit";
311 case RBD_IMAGE_OPTION_STRIPE_COUNT:
312 return "stripe_count";
313 case RBD_IMAGE_OPTION_JOURNAL_ORDER:
314 return "journal_order";
315 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH:
316 return "journal_splay_width";
317 case RBD_IMAGE_OPTION_JOURNAL_POOL:
318 return "journal_pool";
319 case RBD_IMAGE_OPTION_FEATURES_SET:
320 return "features_set";
321 case RBD_IMAGE_OPTION_FEATURES_CLEAR:
322 return "features_clear";
323 case RBD_IMAGE_OPTION_DATA_POOL:
324 return "data_pool";
325 default:
326 return "unknown (" + stringify(optname) + ")";
327 }
328 }
329
330 std::ostream &operator<<(std::ostream &os, const ImageOptions &opts) {
331 os << "[";
332
333 const char *delimiter = "";
334 for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) {
335 if (i.second == STR) {
336 std::string val;
337 if (opts.get(i.first, &val) == 0) {
338 os << delimiter << image_option_name(i.first) << "=" << val;
339 delimiter = ", ";
340 }
341 } else if (i.second == UINT64) {
342 uint64_t val;
343 if (opts.get(i.first, &val) == 0) {
344 os << delimiter << image_option_name(i.first) << "=" << val;
345 delimiter = ", ";
346 }
347 }
348 }
349
350 os << "]";
351
352 return os;
353 }
354
355 void image_options_create(rbd_image_options_t* opts)
356 {
357 image_options_ref* opts_ = new image_options_ref(new image_options_t());
358
359 *opts = static_cast<rbd_image_options_t>(opts_);
360 }
361
362 void image_options_create_ref(rbd_image_options_t* opts,
363 rbd_image_options_t orig)
364 {
365 image_options_ref* orig_ = static_cast<image_options_ref*>(orig);
366 image_options_ref* opts_ = new image_options_ref(*orig_);
367
368 *opts = static_cast<rbd_image_options_t>(opts_);
369 }
370
371 void image_options_copy(rbd_image_options_t* opts,
372 const ImageOptions &orig)
373 {
374 image_options_ref* opts_ = new image_options_ref(new image_options_t());
375
376 *opts = static_cast<rbd_image_options_t>(opts_);
377
378 std::string str_val;
379 uint64_t uint64_val;
380 for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) {
381 switch (i.second) {
382 case STR:
383 if (orig.get(i.first, &str_val) == 0) {
384 image_options_set(*opts, i.first, str_val);
385 }
386 continue;
387 case UINT64:
388 if (orig.get(i.first, &uint64_val) == 0) {
389 image_options_set(*opts, i.first, uint64_val);
390 }
391 continue;
392 }
393 }
394 }
395
396 void image_options_destroy(rbd_image_options_t opts)
397 {
398 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
399
400 delete opts_;
401 }
402
403 int image_options_set(rbd_image_options_t opts, int optname,
404 const std::string& optval)
405 {
406 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
407
408 std::map<int, image_option_type_t>::const_iterator i =
409 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
410
411 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
412 return -EINVAL;
413 }
414
415 (*opts_->get())[optname] = optval;
416 return 0;
417 }
418
419 int image_options_set(rbd_image_options_t opts, int optname, uint64_t optval)
420 {
421 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
422
423 std::map<int, image_option_type_t>::const_iterator i =
424 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
425
426 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
427 return -EINVAL;
428 }
429
430 (*opts_->get())[optname] = optval;
431 return 0;
432 }
433
434 int image_options_get(rbd_image_options_t opts, int optname,
435 std::string* optval)
436 {
437 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
438
439 std::map<int, image_option_type_t>::const_iterator i =
440 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
441
442 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
443 return -EINVAL;
444 }
445
446 image_options_t::const_iterator j = (*opts_)->find(optname);
447
448 if (j == (*opts_)->end()) {
449 return -ENOENT;
450 }
451
452 *optval = boost::get<std::string>(j->second);
453 return 0;
454 }
455
456 int image_options_get(rbd_image_options_t opts, int optname, uint64_t* optval)
457 {
458 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
459
460 std::map<int, image_option_type_t>::const_iterator i =
461 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
462
463 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
464 return -EINVAL;
465 }
466
467 image_options_t::const_iterator j = (*opts_)->find(optname);
468
469 if (j == (*opts_)->end()) {
470 return -ENOENT;
471 }
472
473 *optval = boost::get<uint64_t>(j->second);
474 return 0;
475 }
476
477 int image_options_is_set(rbd_image_options_t opts, int optname,
478 bool* is_set)
479 {
480 if (IMAGE_OPTIONS_TYPE_MAPPING.find(optname) ==
481 IMAGE_OPTIONS_TYPE_MAPPING.end()) {
482 return -EINVAL;
483 }
484
485 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
486 *is_set = ((*opts_)->find(optname) != (*opts_)->end());
487 return 0;
488 }
489
490 int image_options_unset(rbd_image_options_t opts, int optname)
491 {
492 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
493
494 std::map<int, image_option_type_t>::const_iterator i =
495 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
496
497 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end()) {
498 assert((*opts_)->find(optname) == (*opts_)->end());
499 return -EINVAL;
500 }
501
502 image_options_t::const_iterator j = (*opts_)->find(optname);
503
504 if (j == (*opts_)->end()) {
505 return -ENOENT;
506 }
507
508 (*opts_)->erase(j);
509 return 0;
510 }
511
512 void image_options_clear(rbd_image_options_t opts)
513 {
514 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
515
516 (*opts_)->clear();
517 }
518
519 bool image_options_is_empty(rbd_image_options_t opts)
520 {
521 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
522
523 return (*opts_)->empty();
524 }
525
526 int list(IoCtx& io_ctx, vector<string>& names)
527 {
528 CephContext *cct = (CephContext *)io_ctx.cct();
529 ldout(cct, 20) << "list " << &io_ctx << dendl;
530
531 bufferlist bl;
532 int r = io_ctx.read(RBD_DIRECTORY, bl, 0, 0);
533 if (r < 0) {
534 if (r == -ENOENT) {
535 r = 0;
536 }
537 return r;
538 }
539
540 // old format images are in a tmap
541 if (bl.length()) {
542 bufferlist::iterator p = bl.begin();
543 bufferlist header;
544 map<string,bufferlist> m;
545 ::decode(header, p);
546 ::decode(m, p);
547 for (map<string,bufferlist>::iterator q = m.begin(); q != m.end(); ++q) {
548 names.push_back(q->first);
549 }
550 }
551
552 map<string, string> images;
553 r = api::Image<>::list_images(io_ctx, &images);
554 if (r < 0) {
555 lderr(cct) << "error listing v2 images: " << cpp_strerror(r) << dendl;
556 return r;
557 }
558 for (const auto& img_pair : images) {
559 names.push_back(img_pair.first);
560 }
561
562 return 0;
563 }
564
565 int flatten_children(ImageCtx *ictx, const char* snap_name,
566 ProgressContext& pctx)
567 {
568 CephContext *cct = ictx->cct;
569 ldout(cct, 20) << "children flatten " << ictx->name << dendl;
570
571 RWLock::RLocker l(ictx->snap_lock);
572 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
573 ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, snap_id);
574 map< pair<int64_t, string>, set<string> > image_info;
575
576 int r = api::Image<>::list_children(ictx, parent_spec, &image_info);
577 if (r < 0) {
578 return r;
579 }
580
581 size_t size = image_info.size();
582 if (size == 0)
583 return 0;
584
585 size_t i = 0;
586 Rados rados(ictx->md_ctx);
587 for ( auto &info : image_info){
588 string pool = info.first.second;
589 IoCtx ioctx;
590 r = rados.ioctx_create2(info.first.first, ioctx);
591 if (r < 0) {
592 lderr(cct) << "Error accessing child image pool " << pool
593 << dendl;
594 return r;
595 }
596
597 for (auto &id_it : info.second) {
598 ImageCtx *imctx = new ImageCtx("", id_it, NULL, ioctx, false);
599 int r = imctx->state->open(false);
600 if (r < 0) {
601 lderr(cct) << "error opening image: "
602 << cpp_strerror(r) << dendl;
603 return r;
604 }
605
606 librbd::NoOpProgressContext prog_ctx;
607 r = imctx->operations->flatten(prog_ctx);
608 if (r < 0) {
609 lderr(cct) << "error flattening image: " << pool << "/" << id_it
610 << cpp_strerror(r) << dendl;
611 imctx->state->close();
612 return r;
613 }
614
615 if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
616 !imctx->snaps.empty()) {
617 imctx->parent_lock.get_read();
618 ParentInfo parent_info = imctx->parent_md;
619 imctx->parent_lock.put_read();
620
621 r = cls_client::remove_child(&imctx->md_ctx, RBD_CHILDREN,
622 parent_info.spec, imctx->id);
623 if (r < 0 && r != -ENOENT) {
624 lderr(cct) << "error removing child from children list" << dendl;
625 imctx->state->close();
626 return r;
627 }
628 }
629
630 r = imctx->state->close();
631 if (r < 0) {
632 lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl;
633 return r;
634 }
635 }
636 pctx.update_progress(++i, size);
637 assert(i <= size);
638 }
639
640 return 0;
641 }
642
643 int list_children(ImageCtx *ictx, set<pair<string, string> >& names)
644 {
645 CephContext *cct = ictx->cct;
646 ldout(cct, 20) << "children list " << ictx->name << dendl;
647
648 RWLock::RLocker l(ictx->snap_lock);
649 ParentSpec parent_spec(ictx->md_ctx.get_id(), ictx->id, ictx->snap_id);
650 map< pair<int64_t, string>, set<string> > image_info;
651
652 int r = api::Image<>::list_children(ictx, parent_spec, &image_info);
653 if (r < 0) {
654 return r;
655 }
656
657 Rados rados(ictx->md_ctx);
658 for ( auto &info : image_info){
659 IoCtx ioctx;
660 r = rados.ioctx_create2(info.first.first, ioctx);
661 if (r < 0) {
662 lderr(cct) << "Error accessing child image pool " << info.first.second
663 << dendl;
664 return r;
665 }
666
667 for (auto &id_it : info.second) {
668 string name;
669 r = cls_client::dir_get_name(&ioctx, RBD_DIRECTORY, id_it, &name);
670 if (r < 0) {
671 lderr(cct) << "Error looking up name for image id " << id_it
672 << " in pool " << info.first.second << dendl;
673 return r;
674 }
675 names.insert(make_pair(info.first.second, name));
676 }
677 }
678
679 return 0;
680 }
681
682 int get_snap_namespace(ImageCtx *ictx,
683 const char *snap_name,
684 cls::rbd::SnapshotNamespace *snap_namespace) {
685 ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name
686 << dendl;
687
688 int r = ictx->state->refresh_if_required();
689 if (r < 0)
690 return r;
691 RWLock::RLocker l(ictx->snap_lock);
692 snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name);
693 if (snap_id == CEPH_NOSNAP)
694 return -ENOENT;
695 r = ictx->get_snap_namespace(snap_id, snap_namespace);
696 return r;
697 }
698
699 int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected)
700 {
701 ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name
702 << dendl;
703
704 int r = ictx->state->refresh_if_required();
705 if (r < 0)
706 return r;
707
708 RWLock::RLocker l(ictx->snap_lock);
709 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
710 if (snap_id == CEPH_NOSNAP)
711 return -ENOENT;
712 bool is_unprotected;
713 r = ictx->is_snap_unprotected(snap_id, &is_unprotected);
714 // consider both PROTECTED or UNPROTECTING to be 'protected',
715 // since in either state they can't be deleted
716 *is_protected = !is_unprotected;
717 return r;
718 }
719
720 int create_v1(IoCtx& io_ctx, const char *imgname, uint64_t size, int order)
721 {
722 CephContext *cct = (CephContext *)io_ctx.cct();
723
724 ldout(cct, 20) << __func__ << " " << &io_ctx << " name = " << imgname
725 << " size = " << size << " order = " << order << dendl;
726 int r = validate_pool(io_ctx, cct);
727 if (r < 0) {
728 return r;
729 }
730
731 ldout(cct, 2) << "adding rbd image to directory..." << dendl;
732 r = tmap_set(io_ctx, imgname);
733 if (r < 0) {
734 lderr(cct) << "error adding image to directory: " << cpp_strerror(r)
735 << dendl;
736 return r;
737 }
738
739 Rados rados(io_ctx);
740 uint64_t bid = rados.get_instance_id();
741
742 ldout(cct, 2) << "creating rbd image..." << dendl;
743 struct rbd_obj_header_ondisk header;
744 init_rbd_header(header, size, order, bid);
745
746 bufferlist bl;
747 bl.append((const char *)&header, sizeof(header));
748
749 string header_oid = util::old_header_name(imgname);
750 r = io_ctx.write(header_oid, bl, bl.length(), 0);
751 if (r < 0) {
752 lderr(cct) << "Error writing image header: " << cpp_strerror(r)
753 << dendl;
754 int remove_r = tmap_rm(io_ctx, imgname);
755 if (remove_r < 0) {
756 lderr(cct) << "Could not remove image from directory after "
757 << "header creation failed: "
758 << cpp_strerror(remove_r) << dendl;
759 }
760 return r;
761 }
762
763 ldout(cct, 2) << "done." << dendl;
764 return 0;
765 }
766
767 int create(librados::IoCtx& io_ctx, const char *imgname, uint64_t size,
768 int *order)
769 {
770 uint64_t order_ = *order;
771 ImageOptions opts;
772
773 int r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
774 assert(r == 0);
775
776 r = create(io_ctx, imgname, "", size, opts, "", "", false);
777
778 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
779 assert(r1 == 0);
780 *order = order_;
781
782 return r;
783 }
784
785 int create(IoCtx& io_ctx, const char *imgname, uint64_t size,
786 bool old_format, uint64_t features, int *order,
787 uint64_t stripe_unit, uint64_t stripe_count)
788 {
789 if (!order)
790 return -EINVAL;
791
792 uint64_t order_ = *order;
793 uint64_t format = old_format ? 1 : 2;
794 ImageOptions opts;
795 int r;
796
797 r = opts.set(RBD_IMAGE_OPTION_FORMAT, format);
798 assert(r == 0);
799 r = opts.set(RBD_IMAGE_OPTION_FEATURES, features);
800 assert(r == 0);
801 r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
802 assert(r == 0);
803 r = opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
804 assert(r == 0);
805 r = opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
806 assert(r == 0);
807
808 r = create(io_ctx, imgname, "", size, opts, "", "", false);
809
810 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
811 assert(r1 == 0);
812 *order = order_;
813
814 return r;
815 }
816
817 int create(IoCtx& io_ctx, const std::string &image_name,
818 const std::string &image_id, uint64_t size,
819 ImageOptions& opts,
820 const std::string &non_primary_global_image_id,
821 const std::string &primary_mirror_uuid,
822 bool skip_mirror_enable)
823 {
824 std::string id(image_id);
825 if (id.empty()) {
826 id = util::generate_image_id(io_ctx);
827 }
828
829 CephContext *cct = (CephContext *)io_ctx.cct();
830 ldout(cct, 10) << __func__ << " name=" << image_name << ", "
831 << "id= " << id << ", "
832 << "size=" << size << ", opts=" << opts << dendl;
833
834 uint64_t format;
835 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0)
836 format = cct->_conf->rbd_default_format;
837 bool old_format = format == 1;
838
839 // make sure it doesn't already exist, in either format
840 int r = detect_format(io_ctx, image_name, NULL, NULL);
841 if (r != -ENOENT) {
842 if (r) {
843 lderr(cct) << "Could not tell if " << image_name << " already exists"
844 << dendl;
845 return r;
846 }
847 lderr(cct) << "rbd image " << image_name << " already exists" << dendl;
848 return -EEXIST;
849 }
850
851 uint64_t order = 0;
852 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0 || order == 0) {
853 order = cct->_conf->rbd_default_order;
854 }
855 r = image::CreateRequest<>::validate_order(cct, order);
856 if (r < 0) {
857 return r;
858 }
859
860 if (old_format) {
861 r = create_v1(io_ctx, image_name.c_str(), size, order);
862 } else {
863 ThreadPool *thread_pool;
864 ContextWQ *op_work_queue;
865 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
866
867 C_SaferCond cond;
868 image::CreateRequest<> *req = image::CreateRequest<>::create(
869 io_ctx, image_name, id, size, opts, non_primary_global_image_id,
870 primary_mirror_uuid, skip_mirror_enable, op_work_queue, &cond);
871 req->send();
872
873 r = cond.wait();
874 }
875
876 int r1 = opts.set(RBD_IMAGE_OPTION_ORDER, order);
877 assert(r1 == 0);
878
879 return r;
880 }
881
882 /*
883 * Parent may be in different pool, hence different IoCtx
884 */
885 int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name,
886 IoCtx& c_ioctx, const char *c_name,
887 uint64_t features, int *c_order,
888 uint64_t stripe_unit, int stripe_count)
889 {
890 uint64_t order = *c_order;
891
892 ImageOptions opts;
893 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
894 opts.set(RBD_IMAGE_OPTION_ORDER, order);
895 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
896 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
897
898 int r = clone(p_ioctx, p_name, p_snap_name, c_ioctx, c_name, opts);
899 opts.get(RBD_IMAGE_OPTION_ORDER, &order);
900 *c_order = order;
901 return r;
902 }
903
904 int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name,
905 IoCtx& c_ioctx, const char *c_name, ImageOptions& c_opts)
906 {
907 CephContext *cct = (CephContext *)p_ioctx.cct();
908 if (p_snap_name == NULL) {
909 lderr(cct) << "image to be cloned must be a snapshot" << dendl;
910 return -EINVAL;
911 }
912
913 // make sure parent snapshot exists
914 ImageCtx *p_imctx = new ImageCtx(p_name, "", p_snap_name, p_ioctx, true);
915 int r = p_imctx->state->open(false);
916 if (r < 0) {
917 lderr(cct) << "error opening parent image: "
918 << cpp_strerror(r) << dendl;
919 return r;
920 }
921
922 r = clone(p_imctx, c_ioctx, c_name, "", c_opts, "", "");
923
924 int close_r = p_imctx->state->close();
925 if (r == 0 && close_r < 0) {
926 r = close_r;
927 }
928
929 if (r < 0) {
930 return r;
931 }
932 return 0;
933 }
934
935 int clone(ImageCtx *p_imctx, IoCtx& c_ioctx, const std::string &c_name,
936 const std::string &c_id, ImageOptions& c_opts,
937 const std::string &non_primary_global_image_id,
938 const std::string &primary_mirror_uuid)
939 {
940 std::string id(c_id);
941 if (id.empty()) {
942 id = util::generate_image_id(c_ioctx);
943 }
944
945 CephContext *cct = (CephContext *)c_ioctx.cct();
946 ldout(cct, 10) << __func__ << " "
947 << "c_name=" << c_name << ", "
948 << "c_id= " << c_id << ", "
949 << "c_opts=" << c_opts << dendl;
950
951 ThreadPool *thread_pool;
952 ContextWQ *op_work_queue;
953 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
954
955 C_SaferCond cond;
956 auto *req = image::CloneRequest<>::create(
957 p_imctx, c_ioctx, c_name, id, c_opts,
958 non_primary_global_image_id, primary_mirror_uuid, op_work_queue, &cond);
959 req->send();
960
961 return cond.wait();
962 }
963
964 int rename(IoCtx& io_ctx, const char *srcname, const char *dstname)
965 {
966 CephContext *cct = (CephContext *)io_ctx.cct();
967 ldout(cct, 20) << "rename " << &io_ctx << " " << srcname << " -> "
968 << dstname << dendl;
969
970 ImageCtx *ictx = new ImageCtx(srcname, "", "", io_ctx, false);
971 int r = ictx->state->open(false);
972 if (r < 0) {
973 lderr(ictx->cct) << "error opening source image: " << cpp_strerror(r)
974 << dendl;
975 return r;
976 }
977 BOOST_SCOPE_EXIT((ictx)) {
978 ictx->state->close();
979 } BOOST_SCOPE_EXIT_END
980
981 return ictx->operations->rename(dstname);
982 }
983
984 int info(ImageCtx *ictx, image_info_t& info, size_t infosize)
985 {
986 ldout(ictx->cct, 20) << "info " << ictx << dendl;
987
988 int r = ictx->state->refresh_if_required();
989 if (r < 0)
990 return r;
991
992 image_info(ictx, info, infosize);
993 return 0;
994 }
995
996 int get_old_format(ImageCtx *ictx, uint8_t *old)
997 {
998 int r = ictx->state->refresh_if_required();
999 if (r < 0)
1000 return r;
1001 *old = ictx->old_format;
1002 return 0;
1003 }
1004
1005 int get_size(ImageCtx *ictx, uint64_t *size)
1006 {
1007 int r = ictx->state->refresh_if_required();
1008 if (r < 0)
1009 return r;
1010 RWLock::RLocker l2(ictx->snap_lock);
1011 *size = ictx->get_image_size(ictx->snap_id);
1012 return 0;
1013 }
1014
1015 int get_features(ImageCtx *ictx, uint64_t *features)
1016 {
1017 int r = ictx->state->refresh_if_required();
1018 if (r < 0)
1019 return r;
1020 RWLock::RLocker l(ictx->snap_lock);
1021 *features = ictx->features;
1022 return 0;
1023 }
1024
1025 int get_overlap(ImageCtx *ictx, uint64_t *overlap)
1026 {
1027 int r = ictx->state->refresh_if_required();
1028 if (r < 0)
1029 return r;
1030 RWLock::RLocker l(ictx->snap_lock);
1031 RWLock::RLocker l2(ictx->parent_lock);
1032 return ictx->get_parent_overlap(ictx->snap_id, overlap);
1033 }
1034
1035 int get_parent_info(ImageCtx *ictx, string *parent_pool_name,
1036 string *parent_name, string *parent_id,
1037 string *parent_snap_name)
1038 {
1039 int r = ictx->state->refresh_if_required();
1040 if (r < 0)
1041 return r;
1042
1043 RWLock::RLocker l(ictx->snap_lock);
1044 RWLock::RLocker l2(ictx->parent_lock);
1045 if (ictx->parent == NULL) {
1046 return -ENOENT;
1047 }
1048
1049 ParentSpec parent_spec;
1050
1051 if (ictx->snap_id == CEPH_NOSNAP) {
1052 parent_spec = ictx->parent_md.spec;
1053 } else {
1054 r = ictx->get_parent_spec(ictx->snap_id, &parent_spec);
1055 if (r < 0) {
1056 lderr(ictx->cct) << "Can't find snapshot id = " << ictx->snap_id
1057 << dendl;
1058 return r;
1059 }
1060 if (parent_spec.pool_id == -1)
1061 return -ENOENT;
1062 }
1063 if (parent_pool_name) {
1064 Rados rados(ictx->md_ctx);
1065 r = rados.pool_reverse_lookup(parent_spec.pool_id,
1066 parent_pool_name);
1067 if (r < 0) {
1068 lderr(ictx->cct) << "error looking up pool name: " << cpp_strerror(r)
1069 << dendl;
1070 return r;
1071 }
1072 }
1073
1074 if (parent_snap_name) {
1075 RWLock::RLocker l(ictx->parent->snap_lock);
1076 r = ictx->parent->get_snap_name(parent_spec.snap_id,
1077 parent_snap_name);
1078 if (r < 0) {
1079 lderr(ictx->cct) << "error finding parent snap name: "
1080 << cpp_strerror(r) << dendl;
1081 return r;
1082 }
1083 }
1084
1085 if (parent_name) {
1086 RWLock::RLocker snap_locker(ictx->parent->snap_lock);
1087 *parent_name = ictx->parent->name;
1088 }
1089 if (parent_id) {
1090 *parent_id = ictx->parent->id;
1091 }
1092
1093 return 0;
1094 }
1095
1096 int get_flags(ImageCtx *ictx, uint64_t *flags)
1097 {
1098 int r = ictx->state->refresh_if_required();
1099 if (r < 0) {
1100 return r;
1101 }
1102
1103 RWLock::RLocker l2(ictx->snap_lock);
1104 return ictx->get_flags(ictx->snap_id, flags);
1105 }
1106
1107 int set_image_notification(ImageCtx *ictx, int fd, int type)
1108 {
1109 CephContext *cct = ictx->cct;
1110 ldout(cct, 20) << __func__ << " " << ictx << " fd " << fd << " type" << type << dendl;
1111
1112 int r = ictx->state->refresh_if_required();
1113 if (r < 0) {
1114 return r;
1115 }
1116
1117 if (ictx->event_socket.is_valid())
1118 return -EINVAL;
1119 return ictx->event_socket.init(fd, type);
1120 }
1121
1122 int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner)
1123 {
1124 *is_owner = false;
1125
1126 RWLock::RLocker owner_locker(ictx->owner_lock);
1127 if (ictx->exclusive_lock == nullptr ||
1128 !ictx->exclusive_lock->is_lock_owner()) {
1129 return 0;
1130 }
1131
1132 // might have been blacklisted by peer -- ensure we still own
1133 // the lock by pinging the OSD
1134 int r = ictx->exclusive_lock->assert_header_locked();
1135 if (r == -EBUSY || r == -ENOENT) {
1136 return 0;
1137 } else if (r < 0) {
1138 return r;
1139 }
1140
1141 *is_owner = true;
1142 return 0;
1143 }
1144
1145 int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode)
1146 {
1147 CephContext *cct = ictx->cct;
1148 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1149 << "lock_mode=" << lock_mode << dendl;
1150
1151 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1152 return -EOPNOTSUPP;
1153 }
1154
1155 C_SaferCond lock_ctx;
1156 {
1157 RWLock::WLocker l(ictx->owner_lock);
1158
1159 if (ictx->exclusive_lock == nullptr) {
1160 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1161 return -EINVAL;
1162 }
1163
1164 if (ictx->get_exclusive_lock_policy()->may_auto_request_lock()) {
1165 ictx->set_exclusive_lock_policy(
1166 new exclusive_lock::StandardPolicy(ictx));
1167 }
1168
1169 if (ictx->exclusive_lock->is_lock_owner()) {
1170 return 0;
1171 }
1172
1173 ictx->exclusive_lock->acquire_lock(&lock_ctx);
1174 }
1175
1176 int r = lock_ctx.wait();
1177 if (r < 0) {
1178 lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
1179 << dendl;
1180 return r;
1181 }
1182
1183 RWLock::RLocker l(ictx->owner_lock);
1184
1185 if (ictx->exclusive_lock == nullptr ||
1186 !ictx->exclusive_lock->is_lock_owner()) {
1187 lderr(cct) << "failed to acquire exclusive lock" << dendl;
1188 return -EROFS;
1189 }
1190
1191 return 0;
1192 }
1193
1194 int lock_release(ImageCtx *ictx)
1195 {
1196 CephContext *cct = ictx->cct;
1197 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1198
1199 C_SaferCond lock_ctx;
1200 {
1201 RWLock::WLocker l(ictx->owner_lock);
1202
1203 if (ictx->exclusive_lock == nullptr ||
1204 !ictx->exclusive_lock->is_lock_owner()) {
1205 lderr(cct) << "not exclusive lock owner" << dendl;
1206 return -EINVAL;
1207 }
1208
1209 ictx->exclusive_lock->release_lock(&lock_ctx);
1210 }
1211
1212 int r = lock_ctx.wait();
1213 if (r < 0) {
1214 lderr(cct) << "failed to release exclusive lock: " << cpp_strerror(r)
1215 << dendl;
1216 return r;
1217 }
1218 return 0;
1219 }
1220
1221 int lock_get_owners(ImageCtx *ictx, rbd_lock_mode_t *lock_mode,
1222 std::list<std::string> *lock_owners)
1223 {
1224 CephContext *cct = ictx->cct;
1225 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1226
1227 if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
1228 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1229 return -EINVAL;
1230 }
1231
1232 managed_lock::Locker locker;
1233 C_SaferCond get_owner_ctx;
1234 ExclusiveLock<>(*ictx).get_locker(&locker, &get_owner_ctx);
1235 int r = get_owner_ctx.wait();
1236 if (r == -ENOENT) {
1237 return r;
1238 } else if (r < 0) {
1239 lderr(cct) << "failed to determine current lock owner: "
1240 << cpp_strerror(r) << dendl;
1241 return r;
1242 }
1243
1244 *lock_mode = RBD_LOCK_MODE_EXCLUSIVE;
1245 lock_owners->clear();
1246 lock_owners->emplace_back(locker.address);
1247 return 0;
1248 }
1249
1250 int lock_break(ImageCtx *ictx, rbd_lock_mode_t lock_mode,
1251 const std::string &lock_owner)
1252 {
1253 CephContext *cct = ictx->cct;
1254 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1255 << "lock_mode=" << lock_mode << ", "
1256 << "lock_owner=" << lock_owner << dendl;
1257
1258 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1259 return -EOPNOTSUPP;
1260 }
1261
1262 if (ictx->read_only) {
1263 return -EROFS;
1264 }
1265
1266 managed_lock::Locker locker;
1267 C_SaferCond get_owner_ctx;
1268 {
1269 RWLock::RLocker l(ictx->owner_lock);
1270
1271 if (ictx->exclusive_lock == nullptr) {
1272 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1273 return -EINVAL;
1274 }
1275
1276 ictx->exclusive_lock->get_locker(&locker, &get_owner_ctx);
1277 }
1278 int r = get_owner_ctx.wait();
1279 if (r == -ENOENT) {
1280 return r;
1281 } else if (r < 0) {
1282 lderr(cct) << "failed to determine current lock owner: "
1283 << cpp_strerror(r) << dendl;
1284 return r;
1285 }
1286
1287 if (locker.address != lock_owner) {
1288 return -EBUSY;
1289 }
1290
1291 C_SaferCond break_ctx;
1292 {
1293 RWLock::RLocker l(ictx->owner_lock);
1294
1295 if (ictx->exclusive_lock == nullptr) {
1296 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1297 return -EINVAL;
1298 }
1299
1300 ictx->exclusive_lock->break_lock(locker, true, &break_ctx);
1301 }
1302 r = break_ctx.wait();
1303 if (r == -ENOENT) {
1304 return r;
1305 } else if (r < 0) {
1306 lderr(cct) << "failed to break lock: " << cpp_strerror(r) << dendl;
1307 return r;
1308 }
1309 return 0;
1310 }
1311
1312 int remove(IoCtx& io_ctx, const std::string &image_name,
1313 const std::string &image_id, ProgressContext& prog_ctx,
1314 bool force, bool from_trash_remove)
1315 {
1316 CephContext *cct((CephContext *)io_ctx.cct());
1317 ldout(cct, 20) << "remove " << &io_ctx << " "
1318 << (image_id.empty() ? image_name : image_id) << dendl;
1319
1320 ThreadPool *thread_pool;
1321 ContextWQ *op_work_queue;
1322 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
1323
1324 C_SaferCond cond;
1325 auto req = librbd::image::RemoveRequest<>::create(
1326 io_ctx, image_name, image_id, force, from_trash_remove, prog_ctx,
1327 op_work_queue, &cond);
1328 req->send();
1329
1330 return cond.wait();
1331 }
1332
1333 int trash_move(librados::IoCtx &io_ctx, rbd_trash_image_source_t source,
1334 const std::string &image_name, uint64_t delay) {
1335 CephContext *cct((CephContext *)io_ctx.cct());
1336 ldout(cct, 20) << "trash_move " << &io_ctx << " " << image_name
1337 << dendl;
1338
1339 std::string image_id;
1340 ImageCtx *ictx = new ImageCtx(image_name, "", nullptr, io_ctx, false);
1341 int r = ictx->state->open(true);
1342 if (r < 0) {
1343 ictx = nullptr;
1344
1345 if (r != -ENOENT) {
1346 ldout(cct, 2) << "error opening image: " << cpp_strerror(-r) << dendl;
1347 return r;
1348 }
1349
1350 // try to get image id from the directory
1351 r = cls_client::dir_get_id(&io_ctx, RBD_DIRECTORY, image_name, &image_id);
1352 if (r < 0) {
1353 if (r != -ENOENT) {
1354 ldout(cct, 2) << "error reading image id from dirctory: "
1355 << cpp_strerror(-r) << dendl;
1356 }
1357 return r;
1358 }
1359 } else {
1360 if (ictx->old_format) {
1361 ictx->state->close();
1362 return -EOPNOTSUPP;
1363 }
1364
1365 image_id = ictx->id;
1366 ictx->owner_lock.get_read();
1367 if (ictx->exclusive_lock != nullptr) {
1368 r = ictx->operations->prepare_image_update();
1369 if (r < 0 || (ictx->exclusive_lock != nullptr &&
1370 !ictx->exclusive_lock->is_lock_owner())) {
1371 lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl;
1372 ictx->owner_lock.put_read();
1373 ictx->state->close();
1374 return -EBUSY;
1375 }
1376 }
1377 }
1378
1379 BOOST_SCOPE_EXIT_ALL(ictx, cct) {
1380 if (ictx == nullptr)
1381 return;
1382
1383 bool is_locked = ictx->exclusive_lock != nullptr &&
1384 ictx->exclusive_lock->is_lock_owner();
1385 if (is_locked) {
1386 C_SaferCond ctx;
1387 auto exclusive_lock = ictx->exclusive_lock;
1388 exclusive_lock->shut_down(&ctx);
1389 ictx->owner_lock.put_read();
1390 int r = ctx.wait();
1391 if (r < 0) {
1392 lderr(cct) << "error shutting down exclusive lock" << dendl;
1393 }
1394 delete exclusive_lock;
1395 } else {
1396 ictx->owner_lock.put_read();
1397 }
1398 ictx->state->close();
1399 };
1400
1401 ldout(cct, 2) << "adding image entry to rbd_trash" << dendl;
1402 utime_t ts = ceph_clock_now();
1403 utime_t deferment_end_time = ts;
1404 deferment_end_time += (double)delay;
1405 cls::rbd::TrashImageSource trash_source =
1406 static_cast<cls::rbd::TrashImageSource>(source);
1407 cls::rbd::TrashImageSpec trash_spec(trash_source, image_name, ts,
1408 deferment_end_time);
1409 r = cls_client::trash_add(&io_ctx, image_id, trash_spec);
1410 if (r < 0 && r != -EEXIST) {
1411 lderr(cct) << "error adding image " << image_name << " to rbd_trash"
1412 << dendl;
1413 return r;
1414 } else if (r == -EEXIST) {
1415 ldout(cct, 10) << "found previous unfinished deferred remove for image:"
1416 << image_id << dendl;
1417 // continue with removing image from directory
1418 }
1419
1420 ldout(cct, 2) << "removing id object..." << dendl;
1421 r = io_ctx.remove(util::id_obj_name(image_name));
1422 if (r < 0 && r != -ENOENT) {
1423 lderr(cct) << "error removing id object: " << cpp_strerror(r)
1424 << dendl;
1425 return r;
1426 }
1427
1428 ldout(cct, 2) << "removing rbd image from v2 directory..." << dendl;
1429 r = cls_client::dir_remove_image(&io_ctx, RBD_DIRECTORY, image_name,
1430 image_id);
1431 if (r < 0) {
1432 if (r != -ENOENT) {
1433 lderr(cct) << "error removing image from v2 directory: "
1434 << cpp_strerror(-r) << dendl;
1435 }
1436 return r;
1437 }
1438
1439 return 0;
1440 }
1441
1442 int trash_get(IoCtx &io_ctx, const std::string &id,
1443 trash_image_info_t *info) {
1444 CephContext *cct((CephContext *)io_ctx.cct());
1445 ldout(cct, 20) << __func__ << " " << &io_ctx << dendl;
1446
1447 cls::rbd::TrashImageSpec spec;
1448 int r = cls_client::trash_get(&io_ctx, id, &spec);
1449 if (r == -ENOENT) {
1450 return r;
1451 } else if (r < 0) {
1452 lderr(cct) << "error retrieving trash entry: " << cpp_strerror(r)
1453 << dendl;
1454 return r;
1455 }
1456
1457 rbd_trash_image_source_t source = static_cast<rbd_trash_image_source_t>(
1458 spec.source);
1459 *info = trash_image_info_t{id, spec.name, source, spec.deletion_time.sec(),
1460 spec.deferment_end_time.sec()};
1461 return 0;
1462 }
1463
1464 int trash_list(IoCtx &io_ctx, vector<trash_image_info_t> &entries) {
1465 CephContext *cct((CephContext *)io_ctx.cct());
1466 ldout(cct, 20) << "trash_list " << &io_ctx << dendl;
1467
1468 bool more_entries;
1469 uint32_t max_read = 1024;
1470 std::string last_read = "";
1471 do {
1472 map<string, cls::rbd::TrashImageSpec> trash_entries;
1473 int r = cls_client::trash_list(&io_ctx, last_read, max_read,
1474 &trash_entries);
1475 if (r < 0 && r != -ENOENT) {
1476 lderr(cct) << "error listing rbd trash entries: " << cpp_strerror(r)
1477 << dendl;
1478 return r;
1479 } else if (r == -ENOENT) {
1480 break;
1481 }
1482
1483 if (trash_entries.empty()) {
1484 break;
1485 }
1486
1487 for (const auto &entry : trash_entries) {
1488 rbd_trash_image_source_t source =
1489 static_cast<rbd_trash_image_source_t>(entry.second.source);
1490 entries.push_back({entry.first, entry.second.name, source,
1491 entry.second.deletion_time.sec(),
1492 entry.second.deferment_end_time.sec()});
1493 }
1494 last_read = trash_entries.rbegin()->first;
1495 more_entries = (trash_entries.size() >= max_read);
1496 } while (more_entries);
1497
1498 return 0;
1499 }
1500
1501 int trash_remove(IoCtx &io_ctx, const std::string &image_id, bool force,
1502 ProgressContext& prog_ctx) {
1503 CephContext *cct((CephContext *)io_ctx.cct());
1504 ldout(cct, 20) << "trash_remove " << &io_ctx << " " << image_id
1505 << " " << force << dendl;
1506
1507 cls::rbd::TrashImageSpec trash_spec;
1508 int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
1509 if (r < 0) {
1510 lderr(cct) << "error getting image id " << image_id
1511 << " info from trash: " << cpp_strerror(r) << dendl;
1512 return r;
1513 }
1514
1515 utime_t now = ceph_clock_now();
1516 if (now < trash_spec.deferment_end_time && !force) {
1517 lderr(cct) << "error: deferment time has not expired." << dendl;
1518 return -EPERM;
1519 }
1520
1521 r = remove(io_ctx, "", image_id, prog_ctx, false, true);
1522 if (r < 0) {
1523 lderr(cct) << "error removing image " << image_id
1524 << ", which is pending deletion" << dendl;
1525 return r;
1526 }
1527 r = cls_client::trash_remove(&io_ctx, image_id);
1528 if (r < 0 && r != -ENOENT) {
1529 lderr(cct) << "error removing image " << image_id
1530 << " from rbd_trash object" << dendl;
1531 return r;
1532 }
1533 return 0;
1534 }
1535
1536 int trash_restore(librados::IoCtx &io_ctx, const std::string &image_id,
1537 const std::string &image_new_name) {
1538 CephContext *cct((CephContext *)io_ctx.cct());
1539 ldout(cct, 20) << "trash_restore " << &io_ctx << " " << image_id << " "
1540 << image_new_name << dendl;
1541
1542 cls::rbd::TrashImageSpec trash_spec;
1543 int r = cls_client::trash_get(&io_ctx, image_id, &trash_spec);
1544 if (r < 0) {
1545 lderr(cct) << "error getting image id " << image_id
1546 << " info from trash: " << cpp_strerror(r) << dendl;
1547 return r;
1548 }
1549
1550 std::string image_name = image_new_name;
1551 if (image_name.empty()) {
1552 // if user didn't specify a new name, let's try using the old name
1553 image_name = trash_spec.name;
1554 ldout(cct, 20) << "restoring image id " << image_id << " with name "
1555 << image_name << dendl;
1556 }
1557
1558 // check if no image exists with the same name
1559 bool create_id_obj = true;
1560 std::string existing_id;
1561 r = cls_client::get_id(&io_ctx, util::id_obj_name(image_name), &existing_id);
1562 if (r < 0 && r != -ENOENT) {
1563 lderr(cct) << "error checking if image " << image_name << " exists: "
1564 << cpp_strerror(r) << dendl;
1565 return r;
1566 } else if (r != -ENOENT){
1567 // checking if we are recovering from an incomplete restore
1568 if (existing_id != image_id) {
1569 ldout(cct, 2) << "an image with the same name already exists" << dendl;
1570 return -EEXIST;
1571 }
1572 create_id_obj = false;
1573 }
1574
1575 if (create_id_obj) {
1576 ldout(cct, 2) << "adding id object" << dendl;
1577 librados::ObjectWriteOperation op;
1578 op.create(true);
1579 cls_client::set_id(&op, image_id);
1580 r = io_ctx.operate(util::id_obj_name(image_name), &op);
1581 if (r < 0) {
1582 lderr(cct) << "error adding id object for image " << image_name
1583 << ": " << cpp_strerror(r) << dendl;
1584 return r;
1585 }
1586 }
1587
1588 ldout(cct, 2) << "adding rbd image from v2 directory..." << dendl;
1589 r = cls_client::dir_add_image(&io_ctx, RBD_DIRECTORY, image_name,
1590 image_id);
1591 if (r < 0 && r != -EEXIST) {
1592 lderr(cct) << "error adding image to v2 directory: "
1593 << cpp_strerror(r) << dendl;
1594 return r;
1595 }
1596
1597 ldout(cct, 2) << "removing image from trash..." << dendl;
1598 r = cls_client::trash_remove(&io_ctx, image_id);
1599 if (r < 0 && r != -ENOENT) {
1600 lderr(cct) << "error removing image id " << image_id << " from trash: "
1601 << cpp_strerror(r) << dendl;
1602 return r;
1603 }
1604
1605 return 0;
1606 }
1607
1608 int snap_list(ImageCtx *ictx, vector<snap_info_t>& snaps)
1609 {
1610 ldout(ictx->cct, 20) << "snap_list " << ictx << dendl;
1611
1612 int r = ictx->state->refresh_if_required();
1613 if (r < 0)
1614 return r;
1615
1616 RWLock::RLocker l(ictx->snap_lock);
1617 for (map<snap_t, SnapInfo>::iterator it = ictx->snap_info.begin();
1618 it != ictx->snap_info.end(); ++it) {
1619 snap_info_t info;
1620 info.name = it->second.name;
1621 info.id = it->first;
1622 info.size = it->second.size;
1623 snaps.push_back(info);
1624 }
1625
1626 return 0;
1627 }
1628
1629 int snap_exists(ImageCtx *ictx, const cls::rbd::SnapshotNamespace& snap_namespace,
1630 const char *snap_name, bool *exists)
1631 {
1632 ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl;
1633
1634 int r = ictx->state->refresh_if_required();
1635 if (r < 0)
1636 return r;
1637
1638 RWLock::RLocker l(ictx->snap_lock);
1639 *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP;
1640 return 0;
1641 }
1642
1643 int snap_remove(ImageCtx *ictx, const char *snap_name, uint32_t flags,
1644 ProgressContext& pctx)
1645 {
1646 ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl;
1647
1648 int r = 0;
1649
1650 r = ictx->state->refresh_if_required();
1651 if (r < 0)
1652 return r;
1653
1654 if (flags & RBD_SNAP_REMOVE_FLATTEN) {
1655 r = flatten_children(ictx, snap_name, pctx);
1656 if (r < 0) {
1657 return r;
1658 }
1659 }
1660
1661 bool is_protected;
1662 r = snap_is_protected(ictx, snap_name, &is_protected);
1663 if (r < 0) {
1664 return r;
1665 }
1666
1667 if (is_protected && flags & RBD_SNAP_REMOVE_UNPROTECT) {
1668 r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name);
1669 if (r < 0) {
1670 lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl;
1671 return r;
1672 }
1673
1674 r = snap_is_protected(ictx, snap_name, &is_protected);
1675 if (r < 0) {
1676 return r;
1677 }
1678 if (is_protected) {
1679 lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl;
1680 ceph_abort();
1681 }
1682 }
1683
1684 C_SaferCond ctx;
1685 ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx);
1686
1687 r = ctx.wait();
1688 return r;
1689 }
1690
1691 int snap_get_timestamp(ImageCtx *ictx, uint64_t snap_id, struct timespec *timestamp)
1692 {
1693 std::map<librados::snap_t, SnapInfo>::iterator snap_it = ictx->snap_info.find(snap_id);
1694 assert(snap_it != ictx->snap_info.end());
1695 utime_t time = snap_it->second.timestamp;
1696 time.to_timespec(timestamp);
1697 return 0;
1698 }
1699
1700 int snap_get_limit(ImageCtx *ictx, uint64_t *limit)
1701 {
1702 int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid,
1703 limit);
1704 if (r == -EOPNOTSUPP) {
1705 *limit = UINT64_MAX;
1706 r = 0;
1707 }
1708 return r;
1709 }
1710
1711 int snap_set_limit(ImageCtx *ictx, uint64_t limit)
1712 {
1713 return ictx->operations->snap_set_limit(limit);
1714 }
1715
1716 struct CopyProgressCtx {
1717 explicit CopyProgressCtx(ProgressContext &p)
1718 : destictx(NULL), src_size(0), prog_ctx(p)
1719 { }
1720
1721 ImageCtx *destictx;
1722 uint64_t src_size;
1723 ProgressContext &prog_ctx;
1724 };
1725
1726 int copy(ImageCtx *src, IoCtx& dest_md_ctx, const char *destname,
1727 ImageOptions& opts, ProgressContext &prog_ctx, size_t sparse_size)
1728 {
1729 CephContext *cct = (CephContext *)dest_md_ctx.cct();
1730 ldout(cct, 20) << "copy " << src->name
1731 << (src->snap_name.length() ? "@" + src->snap_name : "")
1732 << " -> " << destname << " opts = " << opts << dendl;
1733
1734 src->snap_lock.get_read();
1735 uint64_t features = src->features;
1736 uint64_t src_size = src->get_image_size(src->snap_id);
1737 src->snap_lock.put_read();
1738 uint64_t format = src->old_format ? 1 : 2;
1739 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
1740 opts.set(RBD_IMAGE_OPTION_FORMAT, format);
1741 }
1742 uint64_t stripe_unit = src->stripe_unit;
1743 if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
1744 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
1745 }
1746 uint64_t stripe_count = src->stripe_count;
1747 if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
1748 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
1749 }
1750 uint64_t order = src->order;
1751 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
1752 opts.set(RBD_IMAGE_OPTION_ORDER, order);
1753 }
1754 if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) {
1755 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
1756 }
1757 if (features & ~RBD_FEATURES_ALL) {
1758 lderr(cct) << "librbd does not support requested features" << dendl;
1759 return -ENOSYS;
1760 }
1761
1762 int r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false);
1763 if (r < 0) {
1764 lderr(cct) << "header creation failed" << dendl;
1765 return r;
1766 }
1767 opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order));
1768
1769 ImageCtx *dest = new librbd::ImageCtx(destname, "", NULL,
1770 dest_md_ctx, false);
1771 r = dest->state->open(false);
1772 if (r < 0) {
1773 lderr(cct) << "failed to read newly created header" << dendl;
1774 return r;
1775 }
1776
1777 r = copy(src, dest, prog_ctx, sparse_size);
1778
1779 int close_r = dest->state->close();
1780 if (r == 0 && close_r < 0) {
1781 r = close_r;
1782 }
1783 return r;
1784 }
1785
1786 class C_CopyWrite : public Context {
1787 public:
1788 C_CopyWrite(bufferlist *bl, Context* ctx)
1789 : m_bl(bl), m_ctx(ctx) {}
1790 void finish(int r) override {
1791 delete m_bl;
1792 m_ctx->complete(r);
1793 }
1794 private:
1795 bufferlist *m_bl;
1796 Context *m_ctx;
1797 };
1798
1799 class C_CopyRead : public Context {
1800 public:
1801 C_CopyRead(SimpleThrottle *throttle, ImageCtx *dest, uint64_t offset,
1802 bufferlist *bl, size_t sparse_size)
1803 : m_throttle(throttle), m_dest(dest), m_offset(offset), m_bl(bl),
1804 m_sparse_size(sparse_size) {
1805 m_throttle->start_op();
1806 }
1807 void finish(int r) override {
1808 if (r < 0) {
1809 lderr(m_dest->cct) << "error reading from source image at offset "
1810 << m_offset << ": " << cpp_strerror(r) << dendl;
1811 delete m_bl;
1812 m_throttle->end_op(r);
1813 return;
1814 }
1815 assert(m_bl->length() == (size_t)r);
1816
1817 if (m_bl->is_zero()) {
1818 delete m_bl;
1819 m_throttle->end_op(r);
1820 return;
1821 }
1822
1823 if (!m_sparse_size) {
1824 m_sparse_size = (1 << m_dest->order);
1825 }
1826
1827 auto *throttle = m_throttle;
1828 auto *end_op_ctx = new FunctionContext([throttle](int r) {
1829 throttle->end_op(r);
1830 });
1831 auto gather_ctx = new C_Gather(m_dest->cct, end_op_ctx);
1832
1833 bufferptr m_ptr(m_bl->length());
1834 m_bl->rebuild(m_ptr);
1835 size_t write_offset = 0;
1836 size_t write_length = 0;
1837 size_t offset = 0;
1838 size_t length = m_bl->length();
1839 while (offset < length) {
1840 if (util::calc_sparse_extent(m_ptr,
1841 m_sparse_size,
1842 length,
1843 &write_offset,
1844 &write_length,
1845 &offset)) {
1846 bufferptr write_ptr(m_ptr, write_offset, write_length);
1847 bufferlist *write_bl = new bufferlist();
1848 write_bl->push_back(write_ptr);
1849 Context *ctx = new C_CopyWrite(write_bl, gather_ctx->new_sub());
1850 auto comp = io::AioCompletion::create(ctx);
1851
1852 // coordinate through AIO WQ to ensure lock is acquired if needed
1853 m_dest->io_work_queue->aio_write(comp, m_offset + write_offset,
1854 write_length,
1855 std::move(*write_bl),
1856 LIBRADOS_OP_FLAG_FADVISE_DONTNEED,
1857 std::move(read_trace));
1858 write_offset = offset;
1859 write_length = 0;
1860 }
1861 }
1862 delete m_bl;
1863 assert(gather_ctx->get_sub_created_count() > 0);
1864 gather_ctx->activate();
1865 }
1866
1867 ZTracer::Trace read_trace;
1868
1869 private:
1870 SimpleThrottle *m_throttle;
1871 ImageCtx *m_dest;
1872 uint64_t m_offset;
1873 bufferlist *m_bl;
1874 size_t m_sparse_size;
1875 };
1876
1877 int copy(ImageCtx *src, ImageCtx *dest, ProgressContext &prog_ctx, size_t sparse_size)
1878 {
1879 src->snap_lock.get_read();
1880 uint64_t src_size = src->get_image_size(src->snap_id);
1881 src->snap_lock.put_read();
1882
1883 dest->snap_lock.get_read();
1884 uint64_t dest_size = dest->get_image_size(dest->snap_id);
1885 dest->snap_lock.put_read();
1886
1887 CephContext *cct = src->cct;
1888 if (dest_size < src_size) {
1889 lderr(cct) << " src size " << src_size << " > dest size "
1890 << dest_size << dendl;
1891 return -EINVAL;
1892 }
1893 int r;
1894 map<string, bufferlist> pairs;
1895
1896 r = cls_client::metadata_list(&src->md_ctx, src->header_oid, "", 0, &pairs);
1897 if (r < 0 && r != -EOPNOTSUPP && r != -EIO) {
1898 lderr(cct) << "couldn't list metadata: " << cpp_strerror(r) << dendl;
1899 return r;
1900 } else if (r == 0 && !pairs.empty()) {
1901 r = cls_client::metadata_set(&dest->md_ctx, dest->header_oid, pairs);
1902 if (r < 0) {
1903 lderr(cct) << "couldn't set metadata: " << cpp_strerror(r) << dendl;
1904 return r;
1905 }
1906 }
1907
1908 ZTracer::Trace trace;
1909 if (cct->_conf->rbd_blkin_trace_all) {
1910 trace.init("copy", &src->trace_endpoint);
1911 }
1912
1913 RWLock::RLocker owner_lock(src->owner_lock);
1914 SimpleThrottle throttle(src->concurrent_management_ops, false);
1915 uint64_t period = src->get_stripe_period();
1916 unsigned fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
1917 LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
1918 for (uint64_t offset = 0; offset < src_size; offset += period) {
1919 if (throttle.pending_error()) {
1920 return throttle.wait_for_ret();
1921 }
1922
1923 uint64_t len = min(period, src_size - offset);
1924 bufferlist *bl = new bufferlist();
1925 auto ctx = new C_CopyRead(&throttle, dest, offset, bl, sparse_size);
1926 auto comp = io::AioCompletion::create_and_start<Context>(
1927 ctx, src, io::AIO_TYPE_READ);
1928
1929 io::ImageReadRequest<> req(*src, comp, {{offset, len}},
1930 io::ReadResult{bl}, fadvise_flags,
1931 std::move(trace));
1932 ctx->read_trace = req.get_trace();
1933
1934 req.send();
1935 prog_ctx.update_progress(offset, src_size);
1936 }
1937
1938 r = throttle.wait_for_ret();
1939 if (r >= 0)
1940 prog_ctx.update_progress(src_size, src_size);
1941 return r;
1942 }
1943
1944 int snap_set(ImageCtx *ictx, const cls::rbd::SnapshotNamespace &snap_namespace,
1945 const char *snap_name)
1946 {
1947 ldout(ictx->cct, 20) << "snap_set " << ictx << " snap = "
1948 << (snap_name ? snap_name : "NULL") << dendl;
1949
1950 // ignore return value, since we may be set to a non-existent
1951 // snapshot and the user is trying to fix that
1952 ictx->state->refresh_if_required();
1953
1954 C_SaferCond ctx;
1955 std::string name(snap_name == nullptr ? "" : snap_name);
1956 ictx->state->snap_set(snap_namespace, name, &ctx);
1957
1958 int r = ctx.wait();
1959 if (r < 0) {
1960 if (r != -ENOENT) {
1961 lderr(ictx->cct) << "failed to " << (name.empty() ? "un" : "") << "set "
1962 << "snapshot: " << cpp_strerror(r) << dendl;
1963 }
1964 return r;
1965 }
1966
1967 return 0;
1968 }
1969
1970 int list_lockers(ImageCtx *ictx,
1971 std::list<locker_t> *lockers,
1972 bool *exclusive,
1973 string *tag)
1974 {
1975 ldout(ictx->cct, 20) << "list_locks on image " << ictx << dendl;
1976
1977 int r = ictx->state->refresh_if_required();
1978 if (r < 0)
1979 return r;
1980
1981 RWLock::RLocker locker(ictx->md_lock);
1982 if (exclusive)
1983 *exclusive = ictx->exclusive_locked;
1984 if (tag)
1985 *tag = ictx->lock_tag;
1986 if (lockers) {
1987 lockers->clear();
1988 map<rados::cls::lock::locker_id_t,
1989 rados::cls::lock::locker_info_t>::const_iterator it;
1990 for (it = ictx->lockers.begin(); it != ictx->lockers.end(); ++it) {
1991 locker_t locker;
1992 locker.client = stringify(it->first.locker);
1993 locker.cookie = it->first.cookie;
1994 locker.address = stringify(it->second.addr);
1995 lockers->push_back(locker);
1996 }
1997 }
1998
1999 return 0;
2000 }
2001
2002 int lock(ImageCtx *ictx, bool exclusive, const string& cookie,
2003 const string& tag)
2004 {
2005 ldout(ictx->cct, 20) << "lock image " << ictx << " exclusive=" << exclusive
2006 << " cookie='" << cookie << "' tag='" << tag << "'"
2007 << dendl;
2008
2009 int r = ictx->state->refresh_if_required();
2010 if (r < 0)
2011 return r;
2012
2013 /**
2014 * If we wanted we could do something more intelligent, like local
2015 * checks that we think we will succeed. But for now, let's not
2016 * duplicate that code.
2017 */
2018 {
2019 RWLock::RLocker locker(ictx->md_lock);
2020 r = rados::cls::lock::lock(&ictx->md_ctx, ictx->header_oid, RBD_LOCK_NAME,
2021 exclusive ? LOCK_EXCLUSIVE : LOCK_SHARED,
2022 cookie, tag, "", utime_t(), 0);
2023 if (r < 0) {
2024 return r;
2025 }
2026 }
2027
2028 ictx->notify_update();
2029 return 0;
2030 }
2031
2032 int unlock(ImageCtx *ictx, const string& cookie)
2033 {
2034 ldout(ictx->cct, 20) << "unlock image " << ictx
2035 << " cookie='" << cookie << "'" << dendl;
2036
2037 int r = ictx->state->refresh_if_required();
2038 if (r < 0)
2039 return r;
2040
2041 {
2042 RWLock::RLocker locker(ictx->md_lock);
2043 r = rados::cls::lock::unlock(&ictx->md_ctx, ictx->header_oid,
2044 RBD_LOCK_NAME, cookie);
2045 if (r < 0) {
2046 return r;
2047 }
2048 }
2049
2050 ictx->notify_update();
2051 return 0;
2052 }
2053
2054 int break_lock(ImageCtx *ictx, const string& client,
2055 const string& cookie)
2056 {
2057 ldout(ictx->cct, 20) << "break_lock image " << ictx << " client='" << client
2058 << "' cookie='" << cookie << "'" << dendl;
2059
2060 int r = ictx->state->refresh_if_required();
2061 if (r < 0)
2062 return r;
2063
2064 entity_name_t lock_client;
2065 if (!lock_client.parse(client)) {
2066 lderr(ictx->cct) << "Unable to parse client '" << client
2067 << "'" << dendl;
2068 return -EINVAL;
2069 }
2070
2071 if (ictx->blacklist_on_break_lock) {
2072 typedef std::map<rados::cls::lock::locker_id_t,
2073 rados::cls::lock::locker_info_t> Lockers;
2074 Lockers lockers;
2075 ClsLockType lock_type;
2076 std::string lock_tag;
2077 r = rados::cls::lock::get_lock_info(&ictx->md_ctx, ictx->header_oid,
2078 RBD_LOCK_NAME, &lockers, &lock_type,
2079 &lock_tag);
2080 if (r < 0) {
2081 lderr(ictx->cct) << "unable to retrieve lock info: " << cpp_strerror(r)
2082 << dendl;
2083 return r;
2084 }
2085
2086 std::string client_address;
2087 for (Lockers::iterator it = lockers.begin();
2088 it != lockers.end(); ++it) {
2089 if (it->first.locker == lock_client) {
2090 client_address = stringify(it->second.addr);
2091 break;
2092 }
2093 }
2094 if (client_address.empty()) {
2095 return -ENOENT;
2096 }
2097
2098 RWLock::RLocker locker(ictx->md_lock);
2099 librados::Rados rados(ictx->md_ctx);
2100 r = rados.blacklist_add(client_address,
2101 ictx->blacklist_expire_seconds);
2102 if (r < 0) {
2103 lderr(ictx->cct) << "unable to blacklist client: " << cpp_strerror(r)
2104 << dendl;
2105 return r;
2106 }
2107 }
2108
2109 r = rados::cls::lock::break_lock(&ictx->md_ctx, ictx->header_oid,
2110 RBD_LOCK_NAME, cookie, lock_client);
2111 if (r < 0)
2112 return r;
2113 ictx->notify_update();
2114 return 0;
2115 }
2116
2117 void rbd_ctx_cb(completion_t cb, void *arg)
2118 {
2119 Context *ctx = reinterpret_cast<Context *>(arg);
2120 auto comp = reinterpret_cast<io::AioCompletion *>(cb);
2121 ctx->complete(comp->get_return_value());
2122 comp->release();
2123 }
2124
2125 int64_t read_iterate(ImageCtx *ictx, uint64_t off, uint64_t len,
2126 int (*cb)(uint64_t, size_t, const char *, void *),
2127 void *arg)
2128 {
2129 utime_t start_time, elapsed;
2130
2131 ldout(ictx->cct, 20) << "read_iterate " << ictx << " off = " << off
2132 << " len = " << len << dendl;
2133
2134 int r = ictx->state->refresh_if_required();
2135 if (r < 0)
2136 return r;
2137
2138 uint64_t mylen = len;
2139 ictx->snap_lock.get_read();
2140 r = clip_io(ictx, off, &mylen);
2141 ictx->snap_lock.put_read();
2142 if (r < 0)
2143 return r;
2144
2145 int64_t total_read = 0;
2146 uint64_t period = ictx->get_stripe_period();
2147 uint64_t left = mylen;
2148
2149 ZTracer::Trace trace;
2150 if (ictx->cct->_conf->rbd_blkin_trace_all) {
2151 trace.init("read_iterate", &ictx->trace_endpoint);
2152 }
2153
2154 RWLock::RLocker owner_locker(ictx->owner_lock);
2155 start_time = ceph_clock_now();
2156 while (left > 0) {
2157 uint64_t period_off = off - (off % period);
2158 uint64_t read_len = min(period_off + period - off, left);
2159
2160 bufferlist bl;
2161
2162 C_SaferCond ctx;
2163 auto c = io::AioCompletion::create_and_start(&ctx, ictx,
2164 io::AIO_TYPE_READ);
2165 io::ImageRequest<>::aio_read(ictx, c, {{off, read_len}},
2166 io::ReadResult{&bl}, 0, std::move(trace));
2167
2168 int ret = ctx.wait();
2169 if (ret < 0) {
2170 return ret;
2171 }
2172
2173 r = cb(total_read, ret, bl.c_str(), arg);
2174 if (r < 0) {
2175 return r;
2176 }
2177
2178 total_read += ret;
2179 left -= ret;
2180 off += ret;
2181 }
2182
2183 elapsed = ceph_clock_now() - start_time;
2184 ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed);
2185 ictx->perfcounter->inc(l_librbd_rd);
2186 ictx->perfcounter->inc(l_librbd_rd_bytes, mylen);
2187 return total_read;
2188 }
2189
2190 // validate extent against image size; clip to image size if necessary
2191 int clip_io(ImageCtx *ictx, uint64_t off, uint64_t *len)
2192 {
2193 assert(ictx->snap_lock.is_locked());
2194 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
2195 bool snap_exists = ictx->snap_exists;
2196
2197 if (!snap_exists)
2198 return -ENOENT;
2199
2200 // special-case "len == 0" requests: always valid
2201 if (*len == 0)
2202 return 0;
2203
2204 // can't start past end
2205 if (off >= image_size)
2206 return -EINVAL;
2207
2208 // clip requests that extend past end to just end
2209 if ((off + *len) > image_size)
2210 *len = (size_t)(image_size - off);
2211
2212 return 0;
2213 }
2214
2215 int flush(ImageCtx *ictx)
2216 {
2217 CephContext *cct = ictx->cct;
2218 ldout(cct, 20) << "flush " << ictx << dendl;
2219
2220 int r = ictx->state->refresh_if_required();
2221 if (r < 0) {
2222 return r;
2223 }
2224
2225 ictx->user_flushed();
2226 C_SaferCond ctx;
2227 {
2228 RWLock::RLocker owner_locker(ictx->owner_lock);
2229 ictx->flush(&ctx);
2230 }
2231 r = ctx.wait();
2232
2233 ictx->perfcounter->inc(l_librbd_flush);
2234 return r;
2235 }
2236
2237 int invalidate_cache(ImageCtx *ictx)
2238 {
2239 CephContext *cct = ictx->cct;
2240 ldout(cct, 20) << "invalidate_cache " << ictx << dendl;
2241
2242 int r = ictx->state->refresh_if_required();
2243 if (r < 0) {
2244 return r;
2245 }
2246
2247 RWLock::RLocker owner_locker(ictx->owner_lock);
2248 RWLock::WLocker md_locker(ictx->md_lock);
2249 r = ictx->invalidate_cache(false);
2250 ictx->perfcounter->inc(l_librbd_invalidate_cache);
2251 return r;
2252 }
2253
2254 int poll_io_events(ImageCtx *ictx, io::AioCompletion **comps, int numcomp)
2255 {
2256 if (numcomp <= 0)
2257 return -EINVAL;
2258 CephContext *cct = ictx->cct;
2259 ldout(cct, 20) << __func__ << " " << ictx << " numcomp = " << numcomp
2260 << dendl;
2261 int i = 0;
2262 Mutex::Locker l(ictx->completed_reqs_lock);
2263 while (i < numcomp) {
2264 if (ictx->completed_reqs.empty())
2265 break;
2266 comps[i++] = ictx->completed_reqs.front();
2267 ictx->completed_reqs.pop_front();
2268 }
2269 return i;
2270 }
2271
2272 int metadata_get(ImageCtx *ictx, const string &key, string *value)
2273 {
2274 CephContext *cct = ictx->cct;
2275 ldout(cct, 20) << "metadata_get " << ictx << " key=" << key << dendl;
2276
2277 int r = ictx->state->refresh_if_required();
2278 if (r < 0) {
2279 return r;
2280 }
2281
2282 return cls_client::metadata_get(&ictx->md_ctx, ictx->header_oid, key, value);
2283 }
2284
2285 int metadata_list(ImageCtx *ictx, const string &start, uint64_t max, map<string, bufferlist> *pairs)
2286 {
2287 CephContext *cct = ictx->cct;
2288 ldout(cct, 20) << "metadata_list " << ictx << dendl;
2289
2290 int r = ictx->state->refresh_if_required();
2291 if (r < 0) {
2292 return r;
2293 }
2294
2295 return cls_client::metadata_list(&ictx->md_ctx, ictx->header_oid, start, max, pairs);
2296 }
2297
2298 struct C_RBD_Readahead : public Context {
2299 ImageCtx *ictx;
2300 object_t oid;
2301 uint64_t offset;
2302 uint64_t length;
2303 C_RBD_Readahead(ImageCtx *ictx, object_t oid, uint64_t offset, uint64_t length)
2304 : ictx(ictx), oid(oid), offset(offset), length(length) { }
2305 void finish(int r) override {
2306 ldout(ictx->cct, 20) << "C_RBD_Readahead on " << oid << ": " << offset << "+" << length << dendl;
2307 ictx->readahead.dec_pending();
2308 }
2309 };
2310
2311 void readahead(ImageCtx *ictx,
2312 const vector<pair<uint64_t,uint64_t> >& image_extents)
2313 {
2314 uint64_t total_bytes = 0;
2315 for (vector<pair<uint64_t,uint64_t> >::const_iterator p = image_extents.begin();
2316 p != image_extents.end();
2317 ++p) {
2318 total_bytes += p->second;
2319 }
2320
2321 ictx->md_lock.get_write();
2322 bool abort = ictx->readahead_disable_after_bytes != 0 &&
2323 ictx->total_bytes_read > ictx->readahead_disable_after_bytes;
2324 if (abort) {
2325 ictx->md_lock.put_write();
2326 return;
2327 }
2328 ictx->total_bytes_read += total_bytes;
2329 ictx->snap_lock.get_read();
2330 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
2331 ictx->snap_lock.put_read();
2332 ictx->md_lock.put_write();
2333
2334 pair<uint64_t, uint64_t> readahead_extent = ictx->readahead.update(image_extents, image_size);
2335 uint64_t readahead_offset = readahead_extent.first;
2336 uint64_t readahead_length = readahead_extent.second;
2337
2338 if (readahead_length > 0) {
2339 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" << readahead_length << dendl;
2340 map<object_t,vector<ObjectExtent> > readahead_object_extents;
2341 Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
2342 readahead_offset, readahead_length, 0, readahead_object_extents);
2343 for (map<object_t,vector<ObjectExtent> >::iterator p = readahead_object_extents.begin(); p != readahead_object_extents.end(); ++p) {
2344 for (vector<ObjectExtent>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
2345 ldout(ictx->cct, 20) << "(readahead) oid " << q->oid << " " << q->offset << "~" << q->length << dendl;
2346
2347 Context *req_comp = new C_RBD_Readahead(ictx, q->oid, q->offset, q->length);
2348 ictx->readahead.inc_pending();
2349 ictx->aio_read_from_cache(q->oid, q->objectno, NULL,
2350 q->length, q->offset,
2351 req_comp, 0, nullptr);
2352 }
2353 }
2354 ictx->perfcounter->inc(l_librbd_readahead);
2355 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
2356 }
2357 }
2358
2359
2360
2361 }