]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/internal.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / librbd / internal.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include "include/int_types.h"
4
5#include <errno.h>
6#include <limits.h>
7
8#include "include/types.h"
9#include "include/uuid.h"
10#include "common/ceph_context.h"
11#include "common/dout.h"
12#include "common/errno.h"
13#include "common/Throttle.h"
14#include "common/event_socket.h"
11fdf7f2
TL
15#include "common/perf_counters.h"
16#include "osdc/Striper.h"
7c673cae
FG
17#include "include/stringify.h"
18
11fdf7f2 19#include "cls/lock/cls_lock_client.h"
7c673cae
FG
20#include "cls/rbd/cls_rbd.h"
21#include "cls/rbd/cls_rbd_types.h"
22#include "cls/rbd/cls_rbd_client.h"
23#include "cls/journal/cls_journal_types.h"
24#include "cls/journal/cls_journal_client.h"
25
26#include "librbd/ExclusiveLock.h"
27#include "librbd/ImageCtx.h"
28#include "librbd/ImageState.h"
29#include "librbd/internal.h"
30#include "librbd/Journal.h"
31#include "librbd/ObjectMap.h"
32#include "librbd/Operations.h"
33#include "librbd/Types.h"
34#include "librbd/Utils.h"
11fdf7f2 35#include "librbd/api/Config.h"
7c673cae
FG
36#include "librbd/api/Image.h"
37#include "librbd/exclusive_lock/AutomaticPolicy.h"
38#include "librbd/exclusive_lock/StandardPolicy.h"
39#include "librbd/image/CloneRequest.h"
40#include "librbd/image/CreateRequest.h"
7c673cae
FG
41#include "librbd/io/AioCompletion.h"
42#include "librbd/io/ImageRequest.h"
43#include "librbd/io/ImageRequestWQ.h"
11fdf7f2
TL
44#include "librbd/io/ObjectDispatcher.h"
45#include "librbd/io/ObjectDispatchSpec.h"
7c673cae
FG
46#include "librbd/io/ObjectRequest.h"
47#include "librbd/io/ReadResult.h"
48#include "librbd/journal/Types.h"
49#include "librbd/managed_lock/Types.h"
50#include "librbd/mirror/EnableRequest.h"
51#include "librbd/operation/TrimRequest.h"
52
53#include "journal/Journaler.h"
54
55#include <boost/scope_exit.hpp>
56#include <boost/variant.hpp>
11fdf7f2 57#include "include/ceph_assert.h"
7c673cae
FG
58
59#define dout_subsys ceph_subsys_rbd
60#undef dout_prefix
61#define dout_prefix *_dout << "librbd: "
62
63#define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
64
65using std::map;
66using std::pair;
67using std::set;
68using std::string;
69using std::vector;
70// list binds to list() here, so std::list is explicitly used below
71
72using ceph::bufferlist;
73using librados::snap_t;
74using librados::IoCtx;
75using librados::Rados;
76
77namespace librbd {
78
79namespace {
80
81int validate_pool(IoCtx &io_ctx, CephContext *cct) {
11fdf7f2 82 if (!cct->_conf.get_val<bool>("rbd_validate_pool")) {
7c673cae
FG
83 return 0;
84 }
85
86 int r = io_ctx.stat(RBD_DIRECTORY, NULL, NULL);
87 if (r == 0) {
88 return 0;
89 } else if (r < 0 && r != -ENOENT) {
90 lderr(cct) << "failed to stat RBD directory: " << cpp_strerror(r) << dendl;
91 return r;
92 }
93
94 // allocate a self-managed snapshot id if this a new pool to force
95 // self-managed snapshot mode
96 uint64_t snap_id;
97 r = io_ctx.selfmanaged_snap_create(&snap_id);
98 if (r == -EINVAL) {
99 lderr(cct) << "pool not configured for self-managed RBD snapshot support"
100 << dendl;
101 return r;
102 } else if (r < 0) {
103 lderr(cct) << "failed to allocate self-managed snapshot: "
104 << cpp_strerror(r) << dendl;
105 return r;
106 }
107
108 r = io_ctx.selfmanaged_snap_remove(snap_id);
109 if (r < 0) {
110 lderr(cct) << "failed to release self-managed snapshot " << snap_id
111 << ": " << cpp_strerror(r) << dendl;
112 }
113 return 0;
114}
115
7c673cae
FG
116} // anonymous namespace
117
118 int detect_format(IoCtx &io_ctx, const string &name,
119 bool *old_format, uint64_t *size)
120 {
121 CephContext *cct = (CephContext *)io_ctx.cct();
122 if (old_format)
123 *old_format = true;
124 int r = io_ctx.stat(util::old_header_name(name), size, NULL);
125 if (r == -ENOENT) {
126 if (old_format)
127 *old_format = false;
128 r = io_ctx.stat(util::id_obj_name(name), size, NULL);
129 if (r < 0)
130 return r;
131 } else if (r < 0) {
132 return r;
133 }
134
135 ldout(cct, 20) << "detect format of " << name << " : "
136 << (old_format ? (*old_format ? "old" : "new") :
137 "don't care") << dendl;
138 return 0;
139 }
140
141 bool has_parent(int64_t parent_pool_id, uint64_t off, uint64_t overlap)
142 {
143 return (parent_pool_id != -1 && off <= overlap);
144 }
145
146 void init_rbd_header(struct rbd_obj_header_ondisk& ondisk,
147 uint64_t size, int order, uint64_t bid)
148 {
149 uint32_t hi = bid >> 32;
150 uint32_t lo = bid & 0xFFFFFFFF;
151 uint32_t extra = rand() % 0xFFFFFFFF;
92f5a8d4 152 // FIPS zeroization audit 20191117: this memset is not security related.
7c673cae
FG
153 memset(&ondisk, 0, sizeof(ondisk));
154
155 memcpy(&ondisk.text, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT));
156 memcpy(&ondisk.signature, RBD_HEADER_SIGNATURE,
157 sizeof(RBD_HEADER_SIGNATURE));
158 memcpy(&ondisk.version, RBD_HEADER_VERSION, sizeof(RBD_HEADER_VERSION));
159
160 snprintf(ondisk.block_name, sizeof(ondisk.block_name), "rb.%x.%x.%x",
161 hi, lo, extra);
162
163 ondisk.image_size = size;
164 ondisk.options.order = order;
165 ondisk.options.crypt_type = RBD_CRYPT_NONE;
166 ondisk.options.comp_type = RBD_COMP_NONE;
167 ondisk.snap_seq = 0;
168 ondisk.snap_count = 0;
169 ondisk.reserved = 0;
170 ondisk.snap_names_len = 0;
171 }
172
173 void image_info(ImageCtx *ictx, image_info_t& info, size_t infosize)
174 {
175 int obj_order = ictx->order;
176 ictx->snap_lock.get_read();
177 info.size = ictx->get_image_size(ictx->snap_id);
178 ictx->snap_lock.put_read();
179 info.obj_size = 1ULL << obj_order;
180 info.num_objs = Striper::get_num_objects(ictx->layout, info.size);
181 info.order = obj_order;
182 strncpy(info.block_name_prefix, ictx->object_prefix.c_str(),
183 RBD_MAX_BLOCK_NAME_SIZE);
184 info.block_name_prefix[RBD_MAX_BLOCK_NAME_SIZE - 1] = '\0';
185
186 // clear deprecated fields
187 info.parent_pool = -1L;
188 info.parent_name[0] = '\0';
189 }
190
191 uint64_t oid_to_object_no(const string& oid, const string& object_prefix)
192 {
193 istringstream iss(oid);
194 // skip object prefix and separator
195 iss.ignore(object_prefix.length() + 1);
196 uint64_t num;
197 iss >> std::hex >> num;
198 return num;
199 }
200
201 void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx)
202 {
11fdf7f2
TL
203 ceph_assert(ictx->owner_lock.is_locked());
204 ceph_assert(ictx->exclusive_lock == nullptr ||
205 ictx->exclusive_lock->is_lock_owner());
7c673cae
FG
206
207 C_SaferCond ctx;
208 ictx->snap_lock.get_read();
209 operation::TrimRequest<> *req = operation::TrimRequest<>::create(
210 *ictx, &ctx, ictx->size, newsize, prog_ctx);
211 ictx->snap_lock.put_read();
212 req->send();
213
214 int r = ctx.wait();
215 if (r < 0) {
216 lderr(ictx->cct) << "warning: failed to remove some object(s): "
217 << cpp_strerror(r) << dendl;
218 }
219 }
220
221 int read_header_bl(IoCtx& io_ctx, const string& header_oid,
222 bufferlist& header, uint64_t *ver)
223 {
224 int r;
225 uint64_t off = 0;
226#define READ_SIZE 4096
227 do {
228 bufferlist bl;
229 r = io_ctx.read(header_oid, bl, READ_SIZE, off);
230 if (r < 0)
231 return r;
232 header.claim_append(bl);
233 off += r;
234 } while (r == READ_SIZE);
235
11fdf7f2
TL
236 static_assert(sizeof(RBD_HEADER_TEXT) == sizeof(RBD_MIGRATE_HEADER_TEXT),
237 "length of rbd headers must be the same");
238
7c673cae 239 if (header.length() < sizeof(RBD_HEADER_TEXT) ||
11fdf7f2
TL
240 (memcmp(RBD_HEADER_TEXT, header.c_str(),
241 sizeof(RBD_HEADER_TEXT)) != 0 &&
242 memcmp(RBD_MIGRATE_HEADER_TEXT, header.c_str(),
243 sizeof(RBD_MIGRATE_HEADER_TEXT)) != 0)) {
7c673cae
FG
244 CephContext *cct = (CephContext *)io_ctx.cct();
245 lderr(cct) << "unrecognized header format" << dendl;
246 return -ENXIO;
247 }
248
249 if (ver)
250 *ver = io_ctx.get_last_version();
251
252 return 0;
253 }
254
255 int read_header(IoCtx& io_ctx, const string& header_oid,
256 struct rbd_obj_header_ondisk *header, uint64_t *ver)
257 {
258 bufferlist header_bl;
259 int r = read_header_bl(io_ctx, header_oid, header_bl, ver);
260 if (r < 0)
261 return r;
262 if (header_bl.length() < (int)sizeof(*header))
263 return -EIO;
264 memcpy(header, header_bl.c_str(), sizeof(*header));
265
266 return 0;
267 }
268
269 int tmap_set(IoCtx& io_ctx, const string& imgname)
270 {
271 bufferlist cmdbl, emptybl;
272 __u8 c = CEPH_OSD_TMAP_SET;
11fdf7f2
TL
273 encode(c, cmdbl);
274 encode(imgname, cmdbl);
275 encode(emptybl, cmdbl);
7c673cae
FG
276 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
277 }
278
279 int tmap_rm(IoCtx& io_ctx, const string& imgname)
280 {
281 bufferlist cmdbl;
282 __u8 c = CEPH_OSD_TMAP_RM;
11fdf7f2
TL
283 encode(c, cmdbl);
284 encode(imgname, cmdbl);
7c673cae
FG
285 return io_ctx.tmap_update(RBD_DIRECTORY, cmdbl);
286 }
287
288 typedef boost::variant<std::string,uint64_t> image_option_value_t;
289 typedef std::map<int,image_option_value_t> image_options_t;
290 typedef std::shared_ptr<image_options_t> image_options_ref;
291
292 enum image_option_type_t {
293 STR,
294 UINT64,
295 };
296
297 const std::map<int, image_option_type_t> IMAGE_OPTIONS_TYPE_MAPPING = {
298 {RBD_IMAGE_OPTION_FORMAT, UINT64},
299 {RBD_IMAGE_OPTION_FEATURES, UINT64},
300 {RBD_IMAGE_OPTION_ORDER, UINT64},
301 {RBD_IMAGE_OPTION_STRIPE_UNIT, UINT64},
302 {RBD_IMAGE_OPTION_STRIPE_COUNT, UINT64},
303 {RBD_IMAGE_OPTION_JOURNAL_ORDER, UINT64},
304 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH, UINT64},
305 {RBD_IMAGE_OPTION_JOURNAL_POOL, STR},
306 {RBD_IMAGE_OPTION_FEATURES_SET, UINT64},
307 {RBD_IMAGE_OPTION_FEATURES_CLEAR, UINT64},
308 {RBD_IMAGE_OPTION_DATA_POOL, STR},
11fdf7f2 309 {RBD_IMAGE_OPTION_FLATTEN, UINT64},
92f5a8d4 310 {RBD_IMAGE_OPTION_CLONE_FORMAT, UINT64},
7c673cae
FG
311 };
312
313 std::string image_option_name(int optname) {
314 switch (optname) {
315 case RBD_IMAGE_OPTION_FORMAT:
316 return "format";
317 case RBD_IMAGE_OPTION_FEATURES:
318 return "features";
319 case RBD_IMAGE_OPTION_ORDER:
320 return "order";
321 case RBD_IMAGE_OPTION_STRIPE_UNIT:
322 return "stripe_unit";
323 case RBD_IMAGE_OPTION_STRIPE_COUNT:
324 return "stripe_count";
325 case RBD_IMAGE_OPTION_JOURNAL_ORDER:
326 return "journal_order";
327 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH:
328 return "journal_splay_width";
329 case RBD_IMAGE_OPTION_JOURNAL_POOL:
330 return "journal_pool";
331 case RBD_IMAGE_OPTION_FEATURES_SET:
332 return "features_set";
333 case RBD_IMAGE_OPTION_FEATURES_CLEAR:
334 return "features_clear";
335 case RBD_IMAGE_OPTION_DATA_POOL:
336 return "data_pool";
11fdf7f2
TL
337 case RBD_IMAGE_OPTION_FLATTEN:
338 return "flatten";
92f5a8d4
TL
339 case RBD_IMAGE_OPTION_CLONE_FORMAT:
340 return "clone_format";
7c673cae
FG
341 default:
342 return "unknown (" + stringify(optname) + ")";
343 }
344 }
345
7c673cae
FG
346 void image_options_create(rbd_image_options_t* opts)
347 {
348 image_options_ref* opts_ = new image_options_ref(new image_options_t());
349
350 *opts = static_cast<rbd_image_options_t>(opts_);
351 }
352
353 void image_options_create_ref(rbd_image_options_t* opts,
354 rbd_image_options_t orig)
355 {
356 image_options_ref* orig_ = static_cast<image_options_ref*>(orig);
357 image_options_ref* opts_ = new image_options_ref(*orig_);
358
359 *opts = static_cast<rbd_image_options_t>(opts_);
360 }
361
362 void image_options_copy(rbd_image_options_t* opts,
363 const ImageOptions &orig)
364 {
365 image_options_ref* opts_ = new image_options_ref(new image_options_t());
366
367 *opts = static_cast<rbd_image_options_t>(opts_);
368
369 std::string str_val;
370 uint64_t uint64_val;
371 for (auto &i : IMAGE_OPTIONS_TYPE_MAPPING) {
372 switch (i.second) {
373 case STR:
374 if (orig.get(i.first, &str_val) == 0) {
375 image_options_set(*opts, i.first, str_val);
376 }
377 continue;
378 case UINT64:
379 if (orig.get(i.first, &uint64_val) == 0) {
380 image_options_set(*opts, i.first, uint64_val);
381 }
382 continue;
383 }
384 }
385 }
386
387 void image_options_destroy(rbd_image_options_t opts)
388 {
389 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
390
391 delete opts_;
392 }
393
394 int image_options_set(rbd_image_options_t opts, int optname,
395 const std::string& optval)
396 {
397 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
398
399 std::map<int, image_option_type_t>::const_iterator i =
400 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
401
402 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
403 return -EINVAL;
404 }
405
406 (*opts_->get())[optname] = optval;
407 return 0;
408 }
409
410 int image_options_set(rbd_image_options_t opts, int optname, uint64_t optval)
411 {
412 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
413
414 std::map<int, image_option_type_t>::const_iterator i =
415 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
416
417 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
418 return -EINVAL;
419 }
420
421 (*opts_->get())[optname] = optval;
422 return 0;
423 }
424
425 int image_options_get(rbd_image_options_t opts, int optname,
426 std::string* optval)
427 {
428 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
429
430 std::map<int, image_option_type_t>::const_iterator i =
431 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
432
433 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != STR) {
434 return -EINVAL;
435 }
436
437 image_options_t::const_iterator j = (*opts_)->find(optname);
438
439 if (j == (*opts_)->end()) {
440 return -ENOENT;
441 }
442
443 *optval = boost::get<std::string>(j->second);
444 return 0;
445 }
446
447 int image_options_get(rbd_image_options_t opts, int optname, uint64_t* optval)
448 {
449 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
450
451 std::map<int, image_option_type_t>::const_iterator i =
452 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
453
454 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end() || i->second != UINT64) {
455 return -EINVAL;
456 }
457
458 image_options_t::const_iterator j = (*opts_)->find(optname);
459
460 if (j == (*opts_)->end()) {
461 return -ENOENT;
462 }
463
464 *optval = boost::get<uint64_t>(j->second);
465 return 0;
466 }
467
468 int image_options_is_set(rbd_image_options_t opts, int optname,
469 bool* is_set)
470 {
471 if (IMAGE_OPTIONS_TYPE_MAPPING.find(optname) ==
472 IMAGE_OPTIONS_TYPE_MAPPING.end()) {
473 return -EINVAL;
474 }
475
476 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
477 *is_set = ((*opts_)->find(optname) != (*opts_)->end());
478 return 0;
479 }
480
481 int image_options_unset(rbd_image_options_t opts, int optname)
482 {
483 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
484
485 std::map<int, image_option_type_t>::const_iterator i =
486 IMAGE_OPTIONS_TYPE_MAPPING.find(optname);
487
488 if (i == IMAGE_OPTIONS_TYPE_MAPPING.end()) {
11fdf7f2 489 ceph_assert((*opts_)->find(optname) == (*opts_)->end());
7c673cae
FG
490 return -EINVAL;
491 }
492
493 image_options_t::const_iterator j = (*opts_)->find(optname);
494
495 if (j == (*opts_)->end()) {
496 return -ENOENT;
497 }
498
499 (*opts_)->erase(j);
500 return 0;
501 }
502
503 void image_options_clear(rbd_image_options_t opts)
504 {
505 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
506
507 (*opts_)->clear();
508 }
509
510 bool image_options_is_empty(rbd_image_options_t opts)
511 {
512 image_options_ref* opts_ = static_cast<image_options_ref*>(opts);
513
514 return (*opts_)->empty();
515 }
516
7c673cae
FG
517 int flatten_children(ImageCtx *ictx, const char* snap_name,
518 ProgressContext& pctx)
519 {
520 CephContext *cct = ictx->cct;
521 ldout(cct, 20) << "children flatten " << ictx->name << dendl;
522
b32b8144
FG
523 int r = ictx->state->refresh_if_required();
524 if (r < 0) {
525 return r;
526 }
527
7c673cae 528 RWLock::RLocker l(ictx->snap_lock);
11fdf7f2
TL
529 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(),
530 snap_name);
531
532 cls::rbd::ParentImageSpec parent_spec{ictx->md_ctx.get_id(),
533 ictx->md_ctx.get_namespace(),
534 ictx->id, snap_id};
535 std::vector<librbd::linked_image_spec_t> child_images;
536 r = api::Image<>::list_children(ictx, parent_spec, &child_images);
7c673cae
FG
537 if (r < 0) {
538 return r;
539 }
540
11fdf7f2
TL
541 size_t size = child_images.size();
542 if (size == 0) {
7c673cae 543 return 0;
11fdf7f2 544 }
7c673cae 545
11fdf7f2
TL
546 librados::IoCtx child_io_ctx;
547 int64_t child_pool_id = -1;
7c673cae 548 size_t i = 0;
11fdf7f2
TL
549 for (auto &child_image : child_images){
550 std::string pool = child_image.pool_name;
551 if (child_pool_id == -1 ||
552 child_pool_id != child_image.pool_id ||
553 child_io_ctx.get_namespace() != child_image.pool_namespace) {
554 r = util::create_ioctx(ictx->md_ctx, "child image",
555 child_image.pool_id, child_image.pool_namespace,
556 &child_io_ctx);
7c673cae 557 if (r < 0) {
7c673cae
FG
558 return r;
559 }
7c673cae 560
11fdf7f2
TL
561 child_pool_id = child_image.pool_id;
562 }
b32b8144 563
11fdf7f2
TL
564 ImageCtx *imctx = new ImageCtx("", child_image.image_id, nullptr,
565 child_io_ctx, false);
566 r = imctx->state->open(0);
567 if (r < 0) {
568 lderr(cct) << "error opening image: " << cpp_strerror(r) << dendl;
569 return r;
570 }
7c673cae 571
11fdf7f2
TL
572 if ((imctx->features & RBD_FEATURE_DEEP_FLATTEN) == 0 &&
573 !imctx->snaps.empty()) {
574 lderr(cct) << "snapshot in-use by " << pool << "/" << imctx->name
575 << dendl;
576 imctx->state->close();
577 return -EBUSY;
578 }
7c673cae 579
11fdf7f2
TL
580 librbd::NoOpProgressContext prog_ctx;
581 r = imctx->operations->flatten(prog_ctx);
7c673cae 582 if (r < 0) {
11fdf7f2
TL
583 lderr(cct) << "error flattening image: " << pool << "/"
584 << (child_image.pool_namespace.empty() ?
585 "" : "/" + child_image.pool_namespace)
586 << child_image.image_name << cpp_strerror(r) << dendl;
587 imctx->state->close();
7c673cae
FG
588 return r;
589 }
590
11fdf7f2
TL
591 r = imctx->state->close();
592 if (r < 0) {
593 lderr(cct) << "failed to close image: " << cpp_strerror(r) << dendl;
594 return r;
7c673cae 595 }
11fdf7f2
TL
596
597 pctx.update_progress(++i, size);
598 ceph_assert(i <= size);
7c673cae 599 }
11fdf7f2 600
7c673cae
FG
601 return 0;
602 }
603
604 int get_snap_namespace(ImageCtx *ictx,
605 const char *snap_name,
606 cls::rbd::SnapshotNamespace *snap_namespace) {
607 ldout(ictx->cct, 20) << "get_snap_namespace " << ictx << " " << snap_name
608 << dendl;
609
610 int r = ictx->state->refresh_if_required();
611 if (r < 0)
612 return r;
613 RWLock::RLocker l(ictx->snap_lock);
614 snap_t snap_id = ictx->get_snap_id(*snap_namespace, snap_name);
615 if (snap_id == CEPH_NOSNAP)
616 return -ENOENT;
617 r = ictx->get_snap_namespace(snap_id, snap_namespace);
618 return r;
619 }
620
621 int snap_is_protected(ImageCtx *ictx, const char *snap_name, bool *is_protected)
622 {
623 ldout(ictx->cct, 20) << "snap_is_protected " << ictx << " " << snap_name
624 << dendl;
625
626 int r = ictx->state->refresh_if_required();
627 if (r < 0)
628 return r;
629
630 RWLock::RLocker l(ictx->snap_lock);
631 snap_t snap_id = ictx->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name);
632 if (snap_id == CEPH_NOSNAP)
633 return -ENOENT;
634 bool is_unprotected;
635 r = ictx->is_snap_unprotected(snap_id, &is_unprotected);
636 // consider both PROTECTED or UNPROTECTING to be 'protected',
637 // since in either state they can't be deleted
638 *is_protected = !is_unprotected;
639 return r;
640 }
641
642 int create_v1(IoCtx& io_ctx, const char *imgname, uint64_t size, int order)
643 {
644 CephContext *cct = (CephContext *)io_ctx.cct();
645
646 ldout(cct, 20) << __func__ << " " << &io_ctx << " name = " << imgname
647 << " size = " << size << " order = " << order << dendl;
648 int r = validate_pool(io_ctx, cct);
649 if (r < 0) {
650 return r;
651 }
652
11fdf7f2
TL
653 if (!io_ctx.get_namespace().empty()) {
654 lderr(cct) << "attempting to add v1 image to namespace" << dendl;
655 return -EINVAL;
656 }
657
7c673cae
FG
658 ldout(cct, 2) << "adding rbd image to directory..." << dendl;
659 r = tmap_set(io_ctx, imgname);
660 if (r < 0) {
661 lderr(cct) << "error adding image to directory: " << cpp_strerror(r)
662 << dendl;
663 return r;
664 }
665
666 Rados rados(io_ctx);
667 uint64_t bid = rados.get_instance_id();
668
669 ldout(cct, 2) << "creating rbd image..." << dendl;
670 struct rbd_obj_header_ondisk header;
671 init_rbd_header(header, size, order, bid);
672
673 bufferlist bl;
674 bl.append((const char *)&header, sizeof(header));
675
676 string header_oid = util::old_header_name(imgname);
677 r = io_ctx.write(header_oid, bl, bl.length(), 0);
678 if (r < 0) {
679 lderr(cct) << "Error writing image header: " << cpp_strerror(r)
680 << dendl;
681 int remove_r = tmap_rm(io_ctx, imgname);
682 if (remove_r < 0) {
683 lderr(cct) << "Could not remove image from directory after "
684 << "header creation failed: "
685 << cpp_strerror(remove_r) << dendl;
686 }
687 return r;
688 }
689
690 ldout(cct, 2) << "done." << dendl;
691 return 0;
692 }
693
694 int create(librados::IoCtx& io_ctx, const char *imgname, uint64_t size,
695 int *order)
696 {
697 uint64_t order_ = *order;
698 ImageOptions opts;
699
700 int r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
11fdf7f2 701 ceph_assert(r == 0);
7c673cae
FG
702
703 r = create(io_ctx, imgname, "", size, opts, "", "", false);
704
705 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
11fdf7f2 706 ceph_assert(r1 == 0);
7c673cae
FG
707 *order = order_;
708
709 return r;
710 }
711
712 int create(IoCtx& io_ctx, const char *imgname, uint64_t size,
713 bool old_format, uint64_t features, int *order,
714 uint64_t stripe_unit, uint64_t stripe_count)
715 {
716 if (!order)
717 return -EINVAL;
718
719 uint64_t order_ = *order;
720 uint64_t format = old_format ? 1 : 2;
721 ImageOptions opts;
722 int r;
723
724 r = opts.set(RBD_IMAGE_OPTION_FORMAT, format);
11fdf7f2 725 ceph_assert(r == 0);
7c673cae 726 r = opts.set(RBD_IMAGE_OPTION_FEATURES, features);
11fdf7f2 727 ceph_assert(r == 0);
7c673cae 728 r = opts.set(RBD_IMAGE_OPTION_ORDER, order_);
11fdf7f2 729 ceph_assert(r == 0);
7c673cae 730 r = opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
11fdf7f2 731 ceph_assert(r == 0);
7c673cae 732 r = opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
11fdf7f2 733 ceph_assert(r == 0);
7c673cae
FG
734
735 r = create(io_ctx, imgname, "", size, opts, "", "", false);
736
737 int r1 = opts.get(RBD_IMAGE_OPTION_ORDER, &order_);
11fdf7f2 738 ceph_assert(r1 == 0);
7c673cae
FG
739 *order = order_;
740
741 return r;
742 }
743
744 int create(IoCtx& io_ctx, const std::string &image_name,
745 const std::string &image_id, uint64_t size,
746 ImageOptions& opts,
747 const std::string &non_primary_global_image_id,
748 const std::string &primary_mirror_uuid,
749 bool skip_mirror_enable)
750 {
751 std::string id(image_id);
752 if (id.empty()) {
753 id = util::generate_image_id(io_ctx);
754 }
755
756 CephContext *cct = (CephContext *)io_ctx.cct();
92f5a8d4
TL
757 uint64_t option;
758 if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &option) == 0) {
11fdf7f2
TL
759 lderr(cct) << "create does not support 'flatten' image option" << dendl;
760 return -EINVAL;
761 }
92f5a8d4
TL
762 if (opts.get(RBD_IMAGE_OPTION_CLONE_FORMAT, &option) == 0) {
763 lderr(cct) << "create does not support 'clone_format' image option"
764 << dendl;
765 return -EINVAL;
766 }
11fdf7f2 767
7c673cae
FG
768 ldout(cct, 10) << __func__ << " name=" << image_name << ", "
769 << "id= " << id << ", "
770 << "size=" << size << ", opts=" << opts << dendl;
771
772 uint64_t format;
773 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0)
11fdf7f2 774 format = cct->_conf.get_val<uint64_t>("rbd_default_format");
7c673cae
FG
775 bool old_format = format == 1;
776
777 // make sure it doesn't already exist, in either format
778 int r = detect_format(io_ctx, image_name, NULL, NULL);
779 if (r != -ENOENT) {
780 if (r) {
781 lderr(cct) << "Could not tell if " << image_name << " already exists"
782 << dendl;
783 return r;
784 }
785 lderr(cct) << "rbd image " << image_name << " already exists" << dendl;
786 return -EEXIST;
787 }
788
789 uint64_t order = 0;
790 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0 || order == 0) {
11fdf7f2 791 order = cct->_conf.get_val<uint64_t>("rbd_default_order");
7c673cae
FG
792 }
793 r = image::CreateRequest<>::validate_order(cct, order);
794 if (r < 0) {
795 return r;
796 }
797
798 if (old_format) {
11fdf7f2
TL
799 if ( !getenv("RBD_FORCE_ALLOW_V1") ) {
800 lderr(cct) << "Format 1 image creation unsupported. " << dendl;
801 return -EINVAL;
802 }
803 lderr(cct) << "Forced V1 image creation. " << dendl;
7c673cae
FG
804 r = create_v1(io_ctx, image_name.c_str(), size, order);
805 } else {
806 ThreadPool *thread_pool;
807 ContextWQ *op_work_queue;
808 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
809
11fdf7f2
TL
810 ConfigProxy config{cct->_conf};
811 api::Config<>::apply_pool_overrides(io_ctx, &config);
812
7c673cae
FG
813 C_SaferCond cond;
814 image::CreateRequest<> *req = image::CreateRequest<>::create(
11fdf7f2 815 config, io_ctx, image_name, id, size, opts, non_primary_global_image_id,
7c673cae
FG
816 primary_mirror_uuid, skip_mirror_enable, op_work_queue, &cond);
817 req->send();
818
819 r = cond.wait();
820 }
821
822 int r1 = opts.set(RBD_IMAGE_OPTION_ORDER, order);
11fdf7f2 823 ceph_assert(r1 == 0);
7c673cae
FG
824
825 return r;
826 }
827
828 /*
829 * Parent may be in different pool, hence different IoCtx
830 */
831 int clone(IoCtx& p_ioctx, const char *p_name, const char *p_snap_name,
832 IoCtx& c_ioctx, const char *c_name,
833 uint64_t features, int *c_order,
834 uint64_t stripe_unit, int stripe_count)
835 {
836 uint64_t order = *c_order;
837
838 ImageOptions opts;
839 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
840 opts.set(RBD_IMAGE_OPTION_ORDER, order);
841 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
842 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
843
11fdf7f2
TL
844 int r = clone(p_ioctx, nullptr, p_name, p_snap_name, c_ioctx, nullptr,
845 c_name, opts, "", "");
7c673cae
FG
846 opts.get(RBD_IMAGE_OPTION_ORDER, &order);
847 *c_order = order;
848 return r;
849 }
850
11fdf7f2
TL
851 int clone(IoCtx& p_ioctx, const char *p_id, const char *p_name,
852 const char *p_snap_name, IoCtx& c_ioctx, const char *c_id,
853 const char *c_name, ImageOptions& c_opts,
854 const std::string &non_primary_global_image_id,
855 const std::string &primary_mirror_uuid)
7c673cae 856 {
11fdf7f2
TL
857 ceph_assert((p_id == nullptr) ^ (p_name == nullptr));
858
7c673cae 859 CephContext *cct = (CephContext *)p_ioctx.cct();
11fdf7f2 860 if (p_snap_name == nullptr) {
7c673cae
FG
861 lderr(cct) << "image to be cloned must be a snapshot" << dendl;
862 return -EINVAL;
863 }
864
11fdf7f2
TL
865 uint64_t flatten;
866 if (c_opts.get(RBD_IMAGE_OPTION_FLATTEN, &flatten) == 0) {
867 lderr(cct) << "clone does not support 'flatten' image option" << dendl;
868 return -EINVAL;
7c673cae
FG
869 }
870
11fdf7f2
TL
871 int r;
872 std::string parent_id;
873 if (p_id == nullptr) {
874 r = cls_client::dir_get_id(&p_ioctx, RBD_DIRECTORY, p_name,
875 &parent_id);
876 if (r < 0) {
877 if (r != -ENOENT) {
878 lderr(cct) << "failed to retrieve parent image id: "
879 << cpp_strerror(r) << dendl;
880 }
881 return r;
882 }
883 } else {
884 parent_id = p_id;
7c673cae 885 }
7c673cae 886
11fdf7f2
TL
887 std::string clone_id;
888 if (c_id == nullptr) {
889 clone_id = util::generate_image_id(c_ioctx);
890 } else {
891 clone_id = c_id;
7c673cae
FG
892 }
893
7c673cae
FG
894 ldout(cct, 10) << __func__ << " "
895 << "c_name=" << c_name << ", "
11fdf7f2 896 << "c_id= " << clone_id << ", "
7c673cae
FG
897 << "c_opts=" << c_opts << dendl;
898
11fdf7f2
TL
899 ConfigProxy config{reinterpret_cast<CephContext *>(c_ioctx.cct())->_conf};
900 api::Config<>::apply_pool_overrides(c_ioctx, &config);
901
7c673cae
FG
902 ThreadPool *thread_pool;
903 ContextWQ *op_work_queue;
904 ImageCtx::get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
905
906 C_SaferCond cond;
907 auto *req = image::CloneRequest<>::create(
11fdf7f2
TL
908 config, p_ioctx, parent_id, p_snap_name, CEPH_NOSNAP, c_ioctx, c_name,
909 clone_id, c_opts, non_primary_global_image_id, primary_mirror_uuid,
910 op_work_queue, &cond);
7c673cae
FG
911 req->send();
912
11fdf7f2
TL
913 r = cond.wait();
914 if (r < 0) {
915 return r;
916 }
917
918 return 0;
7c673cae
FG
919 }
920
921 int rename(IoCtx& io_ctx, const char *srcname, const char *dstname)
922 {
923 CephContext *cct = (CephContext *)io_ctx.cct();
924 ldout(cct, 20) << "rename " << &io_ctx << " " << srcname << " -> "
925 << dstname << dendl;
926
927 ImageCtx *ictx = new ImageCtx(srcname, "", "", io_ctx, false);
11fdf7f2 928 int r = ictx->state->open(0);
7c673cae 929 if (r < 0) {
181888fb 930 lderr(cct) << "error opening source image: " << cpp_strerror(r) << dendl;
7c673cae
FG
931 return r;
932 }
933 BOOST_SCOPE_EXIT((ictx)) {
934 ictx->state->close();
935 } BOOST_SCOPE_EXIT_END
936
937 return ictx->operations->rename(dstname);
938 }
939
940 int info(ImageCtx *ictx, image_info_t& info, size_t infosize)
941 {
942 ldout(ictx->cct, 20) << "info " << ictx << dendl;
943
944 int r = ictx->state->refresh_if_required();
945 if (r < 0)
946 return r;
947
948 image_info(ictx, info, infosize);
949 return 0;
950 }
951
952 int get_old_format(ImageCtx *ictx, uint8_t *old)
953 {
954 int r = ictx->state->refresh_if_required();
955 if (r < 0)
956 return r;
957 *old = ictx->old_format;
958 return 0;
959 }
960
961 int get_size(ImageCtx *ictx, uint64_t *size)
962 {
963 int r = ictx->state->refresh_if_required();
964 if (r < 0)
965 return r;
966 RWLock::RLocker l2(ictx->snap_lock);
967 *size = ictx->get_image_size(ictx->snap_id);
968 return 0;
969 }
970
971 int get_features(ImageCtx *ictx, uint64_t *features)
972 {
973 int r = ictx->state->refresh_if_required();
974 if (r < 0)
975 return r;
976 RWLock::RLocker l(ictx->snap_lock);
977 *features = ictx->features;
978 return 0;
979 }
980
981 int get_overlap(ImageCtx *ictx, uint64_t *overlap)
982 {
983 int r = ictx->state->refresh_if_required();
984 if (r < 0)
985 return r;
986 RWLock::RLocker l(ictx->snap_lock);
987 RWLock::RLocker l2(ictx->parent_lock);
988 return ictx->get_parent_overlap(ictx->snap_id, overlap);
989 }
990
7c673cae
FG
991 int get_flags(ImageCtx *ictx, uint64_t *flags)
992 {
993 int r = ictx->state->refresh_if_required();
994 if (r < 0) {
995 return r;
996 }
997
998 RWLock::RLocker l2(ictx->snap_lock);
999 return ictx->get_flags(ictx->snap_id, flags);
1000 }
1001
1002 int set_image_notification(ImageCtx *ictx, int fd, int type)
1003 {
1004 CephContext *cct = ictx->cct;
1005 ldout(cct, 20) << __func__ << " " << ictx << " fd " << fd << " type" << type << dendl;
1006
1007 int r = ictx->state->refresh_if_required();
1008 if (r < 0) {
1009 return r;
1010 }
1011
1012 if (ictx->event_socket.is_valid())
1013 return -EINVAL;
1014 return ictx->event_socket.init(fd, type);
1015 }
1016
1017 int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner)
1018 {
91327a77
AA
1019 CephContext *cct = ictx->cct;
1020 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
7c673cae
FG
1021 *is_owner = false;
1022
1023 RWLock::RLocker owner_locker(ictx->owner_lock);
91327a77 1024 if (ictx->exclusive_lock == nullptr) {
7c673cae
FG
1025 return 0;
1026 }
1027
1028 // might have been blacklisted by peer -- ensure we still own
1029 // the lock by pinging the OSD
1030 int r = ictx->exclusive_lock->assert_header_locked();
31f18b77
FG
1031 if (r == -EBUSY || r == -ENOENT) {
1032 return 0;
1033 } else if (r < 0) {
7c673cae
FG
1034 return r;
1035 }
1036
1037 *is_owner = true;
1038 return 0;
1039 }
1040
1041 int lock_acquire(ImageCtx *ictx, rbd_lock_mode_t lock_mode)
1042 {
1043 CephContext *cct = ictx->cct;
1044 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1045 << "lock_mode=" << lock_mode << dendl;
1046
1047 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1048 return -EOPNOTSUPP;
1049 }
1050
1051 C_SaferCond lock_ctx;
1052 {
1053 RWLock::WLocker l(ictx->owner_lock);
1054
1055 if (ictx->exclusive_lock == nullptr) {
1056 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1057 return -EINVAL;
1058 }
1059
1060 if (ictx->get_exclusive_lock_policy()->may_auto_request_lock()) {
1061 ictx->set_exclusive_lock_policy(
1062 new exclusive_lock::StandardPolicy(ictx));
1063 }
1064
1065 if (ictx->exclusive_lock->is_lock_owner()) {
1066 return 0;
1067 }
1068
1069 ictx->exclusive_lock->acquire_lock(&lock_ctx);
1070 }
1071
1072 int r = lock_ctx.wait();
1073 if (r < 0) {
1074 lderr(cct) << "failed to request exclusive lock: " << cpp_strerror(r)
1075 << dendl;
1076 return r;
1077 }
1078
1079 RWLock::RLocker l(ictx->owner_lock);
91327a77
AA
1080 if (ictx->exclusive_lock == nullptr) {
1081 return -EINVAL;
1082 } else if (!ictx->exclusive_lock->is_lock_owner()) {
7c673cae 1083 lderr(cct) << "failed to acquire exclusive lock" << dendl;
91327a77 1084 return ictx->exclusive_lock->get_unlocked_op_error();
7c673cae
FG
1085 }
1086
1087 return 0;
1088 }
1089
1090 int lock_release(ImageCtx *ictx)
1091 {
1092 CephContext *cct = ictx->cct;
1093 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1094
1095 C_SaferCond lock_ctx;
1096 {
1097 RWLock::WLocker l(ictx->owner_lock);
1098
1099 if (ictx->exclusive_lock == nullptr ||
1100 !ictx->exclusive_lock->is_lock_owner()) {
1101 lderr(cct) << "not exclusive lock owner" << dendl;
1102 return -EINVAL;
1103 }
1104
1105 ictx->exclusive_lock->release_lock(&lock_ctx);
1106 }
1107
1108 int r = lock_ctx.wait();
1109 if (r < 0) {
1110 lderr(cct) << "failed to release exclusive lock: " << cpp_strerror(r)
1111 << dendl;
1112 return r;
1113 }
1114 return 0;
1115 }
1116
1117 int lock_get_owners(ImageCtx *ictx, rbd_lock_mode_t *lock_mode,
1118 std::list<std::string> *lock_owners)
1119 {
1120 CephContext *cct = ictx->cct;
1121 ldout(cct, 20) << __func__ << ": ictx=" << ictx << dendl;
1122
1123 if (!ictx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
1124 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1125 return -EINVAL;
1126 }
1127
1128 managed_lock::Locker locker;
1129 C_SaferCond get_owner_ctx;
1130 ExclusiveLock<>(*ictx).get_locker(&locker, &get_owner_ctx);
1131 int r = get_owner_ctx.wait();
1132 if (r == -ENOENT) {
1133 return r;
1134 } else if (r < 0) {
1135 lderr(cct) << "failed to determine current lock owner: "
1136 << cpp_strerror(r) << dendl;
1137 return r;
1138 }
1139
1140 *lock_mode = RBD_LOCK_MODE_EXCLUSIVE;
1141 lock_owners->clear();
1142 lock_owners->emplace_back(locker.address);
1143 return 0;
1144 }
1145
1146 int lock_break(ImageCtx *ictx, rbd_lock_mode_t lock_mode,
11fdf7f2 1147 const std::string &lock_owner) {
7c673cae
FG
1148 CephContext *cct = ictx->cct;
1149 ldout(cct, 20) << __func__ << ": ictx=" << ictx << ", "
1150 << "lock_mode=" << lock_mode << ", "
1151 << "lock_owner=" << lock_owner << dendl;
1152
1153 if (lock_mode != RBD_LOCK_MODE_EXCLUSIVE) {
1154 return -EOPNOTSUPP;
1155 }
1156
1157 if (ictx->read_only) {
1158 return -EROFS;
1159 }
1160
1161 managed_lock::Locker locker;
1162 C_SaferCond get_owner_ctx;
1163 {
1164 RWLock::RLocker l(ictx->owner_lock);
1165
1166 if (ictx->exclusive_lock == nullptr) {
1167 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1168 return -EINVAL;
1169 }
1170
1171 ictx->exclusive_lock->get_locker(&locker, &get_owner_ctx);
1172 }
1173 int r = get_owner_ctx.wait();
1174 if (r == -ENOENT) {
1175 return r;
1176 } else if (r < 0) {
1177 lderr(cct) << "failed to determine current lock owner: "
1178 << cpp_strerror(r) << dendl;
1179 return r;
1180 }
1181
1182 if (locker.address != lock_owner) {
1183 return -EBUSY;
1184 }
1185
1186 C_SaferCond break_ctx;
1187 {
1188 RWLock::RLocker l(ictx->owner_lock);
1189
1190 if (ictx->exclusive_lock == nullptr) {
1191 lderr(cct) << "exclusive-lock feature is not enabled" << dendl;
1192 return -EINVAL;
1193 }
1194
1195 ictx->exclusive_lock->break_lock(locker, true, &break_ctx);
1196 }
1197 r = break_ctx.wait();
1198 if (r == -ENOENT) {
1199 return r;
1200 } else if (r < 0) {
1201 lderr(cct) << "failed to break lock: " << cpp_strerror(r) << dendl;
1202 return r;
1203 }
1204 return 0;
1205 }
1206
7c673cae
FG
1207 int snap_list(ImageCtx *ictx, vector<snap_info_t>& snaps)
1208 {
1209 ldout(ictx->cct, 20) << "snap_list " << ictx << dendl;
1210
1211 int r = ictx->state->refresh_if_required();
1212 if (r < 0)
1213 return r;
1214
1215 RWLock::RLocker l(ictx->snap_lock);
1216 for (map<snap_t, SnapInfo>::iterator it = ictx->snap_info.begin();
1217 it != ictx->snap_info.end(); ++it) {
1218 snap_info_t info;
1219 info.name = it->second.name;
1220 info.id = it->first;
1221 info.size = it->second.size;
1222 snaps.push_back(info);
1223 }
1224
1225 return 0;
1226 }
1227
1228 int snap_exists(ImageCtx *ictx, const cls::rbd::SnapshotNamespace& snap_namespace,
1229 const char *snap_name, bool *exists)
1230 {
1231 ldout(ictx->cct, 20) << "snap_exists " << ictx << " " << snap_name << dendl;
1232
1233 int r = ictx->state->refresh_if_required();
1234 if (r < 0)
1235 return r;
1236
1237 RWLock::RLocker l(ictx->snap_lock);
1238 *exists = ictx->get_snap_id(snap_namespace, snap_name) != CEPH_NOSNAP;
1239 return 0;
1240 }
1241
1242 int snap_remove(ImageCtx *ictx, const char *snap_name, uint32_t flags,
1243 ProgressContext& pctx)
1244 {
1245 ldout(ictx->cct, 20) << "snap_remove " << ictx << " " << snap_name << " flags: " << flags << dendl;
1246
1247 int r = 0;
1248
1249 r = ictx->state->refresh_if_required();
1250 if (r < 0)
1251 return r;
1252
1253 if (flags & RBD_SNAP_REMOVE_FLATTEN) {
1254 r = flatten_children(ictx, snap_name, pctx);
1255 if (r < 0) {
1256 return r;
1257 }
1258 }
1259
1260 bool is_protected;
1261 r = snap_is_protected(ictx, snap_name, &is_protected);
1262 if (r < 0) {
1263 return r;
1264 }
1265
1266 if (is_protected && flags & RBD_SNAP_REMOVE_UNPROTECT) {
1267 r = ictx->operations->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name);
1268 if (r < 0) {
1269 lderr(ictx->cct) << "failed to unprotect snapshot: " << snap_name << dendl;
1270 return r;
1271 }
1272
1273 r = snap_is_protected(ictx, snap_name, &is_protected);
1274 if (r < 0) {
1275 return r;
1276 }
1277 if (is_protected) {
1278 lderr(ictx->cct) << "snapshot is still protected after unprotection" << dendl;
1279 ceph_abort();
1280 }
1281 }
1282
1283 C_SaferCond ctx;
1284 ictx->operations->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name, &ctx);
1285
1286 r = ctx.wait();
1287 return r;
1288 }
1289
1290 int snap_get_timestamp(ImageCtx *ictx, uint64_t snap_id, struct timespec *timestamp)
1291 {
1292 std::map<librados::snap_t, SnapInfo>::iterator snap_it = ictx->snap_info.find(snap_id);
11fdf7f2 1293 ceph_assert(snap_it != ictx->snap_info.end());
7c673cae
FG
1294 utime_t time = snap_it->second.timestamp;
1295 time.to_timespec(timestamp);
1296 return 0;
1297 }
1298
1299 int snap_get_limit(ImageCtx *ictx, uint64_t *limit)
1300 {
1301 int r = cls_client::snapshot_get_limit(&ictx->md_ctx, ictx->header_oid,
1302 limit);
1303 if (r == -EOPNOTSUPP) {
1304 *limit = UINT64_MAX;
1305 r = 0;
1306 }
1307 return r;
1308 }
1309
1310 int snap_set_limit(ImageCtx *ictx, uint64_t limit)
1311 {
1312 return ictx->operations->snap_set_limit(limit);
1313 }
1314
1315 struct CopyProgressCtx {
1316 explicit CopyProgressCtx(ProgressContext &p)
1317 : destictx(NULL), src_size(0), prog_ctx(p)
1318 { }
1319
1320 ImageCtx *destictx;
1321 uint64_t src_size;
1322 ProgressContext &prog_ctx;
1323 };
1324
1325 int copy(ImageCtx *src, IoCtx& dest_md_ctx, const char *destname,
1326 ImageOptions& opts, ProgressContext &prog_ctx, size_t sparse_size)
1327 {
1328 CephContext *cct = (CephContext *)dest_md_ctx.cct();
92f5a8d4
TL
1329 uint64_t option;
1330 if (opts.get(RBD_IMAGE_OPTION_FLATTEN, &option) == 0) {
11fdf7f2
TL
1331 lderr(cct) << "copy does not support 'flatten' image option" << dendl;
1332 return -EINVAL;
1333 }
92f5a8d4
TL
1334 if (opts.get(RBD_IMAGE_OPTION_CLONE_FORMAT, &option) == 0) {
1335 lderr(cct) << "copy does not support 'clone_format' image option"
1336 << dendl;
1337 return -EINVAL;
1338 }
11fdf7f2 1339
7c673cae
FG
1340 ldout(cct, 20) << "copy " << src->name
1341 << (src->snap_name.length() ? "@" + src->snap_name : "")
1342 << " -> " << destname << " opts = " << opts << dendl;
1343
1344 src->snap_lock.get_read();
1345 uint64_t features = src->features;
1346 uint64_t src_size = src->get_image_size(src->snap_id);
1347 src->snap_lock.put_read();
1348 uint64_t format = src->old_format ? 1 : 2;
1349 if (opts.get(RBD_IMAGE_OPTION_FORMAT, &format) != 0) {
1350 opts.set(RBD_IMAGE_OPTION_FORMAT, format);
1351 }
1352 uint64_t stripe_unit = src->stripe_unit;
1353 if (opts.get(RBD_IMAGE_OPTION_STRIPE_UNIT, &stripe_unit) != 0) {
1354 opts.set(RBD_IMAGE_OPTION_STRIPE_UNIT, stripe_unit);
1355 }
1356 uint64_t stripe_count = src->stripe_count;
1357 if (opts.get(RBD_IMAGE_OPTION_STRIPE_COUNT, &stripe_count) != 0) {
1358 opts.set(RBD_IMAGE_OPTION_STRIPE_COUNT, stripe_count);
1359 }
1360 uint64_t order = src->order;
1361 if (opts.get(RBD_IMAGE_OPTION_ORDER, &order) != 0) {
1362 opts.set(RBD_IMAGE_OPTION_ORDER, order);
1363 }
1364 if (opts.get(RBD_IMAGE_OPTION_FEATURES, &features) != 0) {
1365 opts.set(RBD_IMAGE_OPTION_FEATURES, features);
1366 }
1367 if (features & ~RBD_FEATURES_ALL) {
1368 lderr(cct) << "librbd does not support requested features" << dendl;
1369 return -ENOSYS;
1370 }
1371
1372 int r = create(dest_md_ctx, destname, "", src_size, opts, "", "", false);
1373 if (r < 0) {
1374 lderr(cct) << "header creation failed" << dendl;
1375 return r;
1376 }
1377 opts.set(RBD_IMAGE_OPTION_ORDER, static_cast<uint64_t>(order));
1378
11fdf7f2
TL
1379 ImageCtx *dest = new librbd::ImageCtx(destname, "", nullptr, dest_md_ctx,
1380 false);
1381 r = dest->state->open(0);
7c673cae
FG
1382 if (r < 0) {
1383 lderr(cct) << "failed to read newly created header" << dendl;
1384 return r;
1385 }
1386
1387 r = copy(src, dest, prog_ctx, sparse_size);
1388
1389 int close_r = dest->state->close();
1390 if (r == 0 && close_r < 0) {
1391 r = close_r;
1392 }
1393 return r;
1394 }
1395
1396 class C_CopyWrite : public Context {
1397 public:
1398 C_CopyWrite(bufferlist *bl, Context* ctx)
1399 : m_bl(bl), m_ctx(ctx) {}
1400 void finish(int r) override {
1401 delete m_bl;
1402 m_ctx->complete(r);
1403 }
1404 private:
1405 bufferlist *m_bl;
1406 Context *m_ctx;
1407 };
1408
1409 class C_CopyRead : public Context {
1410 public:
1411 C_CopyRead(SimpleThrottle *throttle, ImageCtx *dest, uint64_t offset,
1412 bufferlist *bl, size_t sparse_size)
1413 : m_throttle(throttle), m_dest(dest), m_offset(offset), m_bl(bl),
1414 m_sparse_size(sparse_size) {
1415 m_throttle->start_op();
1416 }
1417 void finish(int r) override {
1418 if (r < 0) {
1419 lderr(m_dest->cct) << "error reading from source image at offset "
1420 << m_offset << ": " << cpp_strerror(r) << dendl;
1421 delete m_bl;
1422 m_throttle->end_op(r);
1423 return;
1424 }
11fdf7f2 1425 ceph_assert(m_bl->length() == (size_t)r);
7c673cae
FG
1426
1427 if (m_bl->is_zero()) {
1428 delete m_bl;
1429 m_throttle->end_op(r);
1430 return;
1431 }
1432
1433 if (!m_sparse_size) {
1434 m_sparse_size = (1 << m_dest->order);
1435 }
1436
1437 auto *throttle = m_throttle;
1438 auto *end_op_ctx = new FunctionContext([throttle](int r) {
1439 throttle->end_op(r);
1440 });
1441 auto gather_ctx = new C_Gather(m_dest->cct, end_op_ctx);
1442
11fdf7f2 1443 m_bl->rebuild(buffer::ptr_node::create(m_bl->length()));
7c673cae
FG
1444 size_t write_offset = 0;
1445 size_t write_length = 0;
1446 size_t offset = 0;
1447 size_t length = m_bl->length();
11fdf7f2 1448 const auto& m_ptr = m_bl->front();
7c673cae
FG
1449 while (offset < length) {
1450 if (util::calc_sparse_extent(m_ptr,
1451 m_sparse_size,
1452 length,
1453 &write_offset,
1454 &write_length,
1455 &offset)) {
7c673cae 1456 bufferlist *write_bl = new bufferlist();
11fdf7f2
TL
1457 write_bl->push_back(
1458 buffer::ptr_node::create(m_ptr, write_offset, write_length));
7c673cae
FG
1459 Context *ctx = new C_CopyWrite(write_bl, gather_ctx->new_sub());
1460 auto comp = io::AioCompletion::create(ctx);
1461
1462 // coordinate through AIO WQ to ensure lock is acquired if needed
1463 m_dest->io_work_queue->aio_write(comp, m_offset + write_offset,
1464 write_length,
1465 std::move(*write_bl),
31f18b77
FG
1466 LIBRADOS_OP_FLAG_FADVISE_DONTNEED,
1467 std::move(read_trace));
7c673cae
FG
1468 write_offset = offset;
1469 write_length = 0;
1470 }
1471 }
1472 delete m_bl;
11fdf7f2 1473 ceph_assert(gather_ctx->get_sub_created_count() > 0);
7c673cae
FG
1474 gather_ctx->activate();
1475 }
1476
31f18b77
FG
1477 ZTracer::Trace read_trace;
1478
7c673cae
FG
1479 private:
1480 SimpleThrottle *m_throttle;
1481 ImageCtx *m_dest;
1482 uint64_t m_offset;
1483 bufferlist *m_bl;
1484 size_t m_sparse_size;
1485 };
1486
1487 int copy(ImageCtx *src, ImageCtx *dest, ProgressContext &prog_ctx, size_t sparse_size)
1488 {
1489 src->snap_lock.get_read();
1490 uint64_t src_size = src->get_image_size(src->snap_id);
1491 src->snap_lock.put_read();
1492
1493 dest->snap_lock.get_read();
1494 uint64_t dest_size = dest->get_image_size(dest->snap_id);
1495 dest->snap_lock.put_read();
1496
1497 CephContext *cct = src->cct;
1498 if (dest_size < src_size) {
1499 lderr(cct) << " src size " << src_size << " > dest size "
1500 << dest_size << dendl;
1501 return -EINVAL;
1502 }
1503 int r;
b32b8144 1504 const uint32_t MAX_KEYS = 64;
7c673cae 1505 map<string, bufferlist> pairs;
b32b8144
FG
1506 std::string last_key = "";
1507 bool more_results = true;
7c673cae 1508
b32b8144
FG
1509 while (more_results) {
1510 r = cls_client::metadata_list(&src->md_ctx, src->header_oid, last_key, 0, &pairs);
1511 if (r < 0 && r != -EOPNOTSUPP && r != -EIO) {
1512 lderr(cct) << "couldn't list metadata: " << cpp_strerror(r) << dendl;
7c673cae 1513 return r;
b32b8144
FG
1514 } else if (r == 0 && !pairs.empty()) {
1515 r = cls_client::metadata_set(&dest->md_ctx, dest->header_oid, pairs);
1516 if (r < 0) {
1517 lderr(cct) << "couldn't set metadata: " << cpp_strerror(r) << dendl;
1518 return r;
1519 }
1520
1521 last_key = pairs.rbegin()->first;
7c673cae 1522 }
b32b8144
FG
1523
1524 more_results = (pairs.size() == MAX_KEYS);
1525 pairs.clear();
7c673cae
FG
1526 }
1527
31f18b77 1528 ZTracer::Trace trace;
181888fb 1529 if (src->blkin_trace_all) {
31f18b77
FG
1530 trace.init("copy", &src->trace_endpoint);
1531 }
1532
7c673cae 1533 RWLock::RLocker owner_lock(src->owner_lock);
11fdf7f2 1534 SimpleThrottle throttle(src->config.get_val<uint64_t>("rbd_concurrent_management_ops"), false);
7c673cae 1535 uint64_t period = src->get_stripe_period();
31f18b77
FG
1536 unsigned fadvise_flags = LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL |
1537 LIBRADOS_OP_FLAG_FADVISE_NOCACHE;
11fdf7f2 1538 uint64_t object_id = 0;
7c673cae
FG
1539 for (uint64_t offset = 0; offset < src_size; offset += period) {
1540 if (throttle.pending_error()) {
1541 return throttle.wait_for_ret();
1542 }
1543
11fdf7f2
TL
1544 {
1545 RWLock::RLocker snap_locker(src->snap_lock);
1546 if (src->object_map != nullptr) {
1547 bool skip = true;
1548 // each period is related to src->stripe_count objects, check them all
1549 for (uint64_t i=0; i < src->stripe_count; i++) {
1550 if (object_id < src->object_map->size() &&
1551 src->object_map->object_may_exist(object_id)) {
1552 skip = false;
1553 }
1554 ++object_id;
1555 }
1556
1557 if (skip) continue;
1558 } else {
1559 object_id += src->stripe_count;
1560 }
1561 }
1562
7c673cae
FG
1563 uint64_t len = min(period, src_size - offset);
1564 bufferlist *bl = new bufferlist();
31f18b77
FG
1565 auto ctx = new C_CopyRead(&throttle, dest, offset, bl, sparse_size);
1566 auto comp = io::AioCompletion::create_and_start<Context>(
1567 ctx, src, io::AIO_TYPE_READ);
1568
1569 io::ImageReadRequest<> req(*src, comp, {{offset, len}},
1570 io::ReadResult{bl}, fadvise_flags,
1571 std::move(trace));
1572 ctx->read_trace = req.get_trace();
1573
1574 req.send();
7c673cae
FG
1575 prog_ctx.update_progress(offset, src_size);
1576 }
1577
1578 r = throttle.wait_for_ret();
1579 if (r >= 0)
1580 prog_ctx.update_progress(src_size, src_size);
1581 return r;
1582 }
1583
7c673cae
FG
1584 int list_lockers(ImageCtx *ictx,
1585 std::list<locker_t> *lockers,
1586 bool *exclusive,
1587 string *tag)
1588 {
1589 ldout(ictx->cct, 20) << "list_locks on image " << ictx << dendl;
1590
1591 int r = ictx->state->refresh_if_required();
1592 if (r < 0)
1593 return r;
1594
1595 RWLock::RLocker locker(ictx->md_lock);
1596 if (exclusive)
1597 *exclusive = ictx->exclusive_locked;
1598 if (tag)
1599 *tag = ictx->lock_tag;
1600 if (lockers) {
1601 lockers->clear();
1602 map<rados::cls::lock::locker_id_t,
1603 rados::cls::lock::locker_info_t>::const_iterator it;
1604 for (it = ictx->lockers.begin(); it != ictx->lockers.end(); ++it) {
1605 locker_t locker;
1606 locker.client = stringify(it->first.locker);
1607 locker.cookie = it->first.cookie;
11fdf7f2 1608 locker.address = it->second.addr.get_legacy_str();
7c673cae
FG
1609 lockers->push_back(locker);
1610 }
1611 }
1612
1613 return 0;
1614 }
1615
1616 int lock(ImageCtx *ictx, bool exclusive, const string& cookie,
1617 const string& tag)
1618 {
1619 ldout(ictx->cct, 20) << "lock image " << ictx << " exclusive=" << exclusive
1620 << " cookie='" << cookie << "' tag='" << tag << "'"
1621 << dendl;
1622
1623 int r = ictx->state->refresh_if_required();
1624 if (r < 0)
1625 return r;
1626
1627 /**
1628 * If we wanted we could do something more intelligent, like local
1629 * checks that we think we will succeed. But for now, let's not
1630 * duplicate that code.
1631 */
1632 {
1633 RWLock::RLocker locker(ictx->md_lock);
1634 r = rados::cls::lock::lock(&ictx->md_ctx, ictx->header_oid, RBD_LOCK_NAME,
1635 exclusive ? LOCK_EXCLUSIVE : LOCK_SHARED,
1636 cookie, tag, "", utime_t(), 0);
1637 if (r < 0) {
1638 return r;
1639 }
1640 }
1641
1642 ictx->notify_update();
1643 return 0;
1644 }
1645
1646 int unlock(ImageCtx *ictx, const string& cookie)
1647 {
1648 ldout(ictx->cct, 20) << "unlock image " << ictx
1649 << " cookie='" << cookie << "'" << dendl;
1650
1651 int r = ictx->state->refresh_if_required();
1652 if (r < 0)
1653 return r;
1654
1655 {
1656 RWLock::RLocker locker(ictx->md_lock);
1657 r = rados::cls::lock::unlock(&ictx->md_ctx, ictx->header_oid,
1658 RBD_LOCK_NAME, cookie);
1659 if (r < 0) {
1660 return r;
1661 }
1662 }
1663
1664 ictx->notify_update();
1665 return 0;
1666 }
1667
1668 int break_lock(ImageCtx *ictx, const string& client,
1669 const string& cookie)
1670 {
1671 ldout(ictx->cct, 20) << "break_lock image " << ictx << " client='" << client
1672 << "' cookie='" << cookie << "'" << dendl;
1673
1674 int r = ictx->state->refresh_if_required();
1675 if (r < 0)
1676 return r;
1677
1678 entity_name_t lock_client;
1679 if (!lock_client.parse(client)) {
1680 lderr(ictx->cct) << "Unable to parse client '" << client
1681 << "'" << dendl;
1682 return -EINVAL;
1683 }
1684
11fdf7f2 1685 if (ictx->config.get_val<bool>("rbd_blacklist_on_break_lock")) {
7c673cae
FG
1686 typedef std::map<rados::cls::lock::locker_id_t,
1687 rados::cls::lock::locker_info_t> Lockers;
1688 Lockers lockers;
1689 ClsLockType lock_type;
1690 std::string lock_tag;
1691 r = rados::cls::lock::get_lock_info(&ictx->md_ctx, ictx->header_oid,
1692 RBD_LOCK_NAME, &lockers, &lock_type,
1693 &lock_tag);
1694 if (r < 0) {
1695 lderr(ictx->cct) << "unable to retrieve lock info: " << cpp_strerror(r)
1696 << dendl;
1697 return r;
1698 }
1699
1700 std::string client_address;
1701 for (Lockers::iterator it = lockers.begin();
1702 it != lockers.end(); ++it) {
1703 if (it->first.locker == lock_client) {
11fdf7f2 1704 client_address = it->second.addr.get_legacy_str();
7c673cae
FG
1705 break;
1706 }
1707 }
1708 if (client_address.empty()) {
1709 return -ENOENT;
1710 }
1711
1712 RWLock::RLocker locker(ictx->md_lock);
1713 librados::Rados rados(ictx->md_ctx);
11fdf7f2
TL
1714 r = rados.blacklist_add(
1715 client_address,
1716 ictx->config.get_val<uint64_t>("rbd_blacklist_expire_seconds"));
7c673cae
FG
1717 if (r < 0) {
1718 lderr(ictx->cct) << "unable to blacklist client: " << cpp_strerror(r)
1719 << dendl;
1720 return r;
1721 }
1722 }
1723
1724 r = rados::cls::lock::break_lock(&ictx->md_ctx, ictx->header_oid,
1725 RBD_LOCK_NAME, cookie, lock_client);
1726 if (r < 0)
1727 return r;
1728 ictx->notify_update();
1729 return 0;
1730 }
1731
1732 void rbd_ctx_cb(completion_t cb, void *arg)
1733 {
1734 Context *ctx = reinterpret_cast<Context *>(arg);
1735 auto comp = reinterpret_cast<io::AioCompletion *>(cb);
1736 ctx->complete(comp->get_return_value());
1737 comp->release();
1738 }
1739
1740 int64_t read_iterate(ImageCtx *ictx, uint64_t off, uint64_t len,
1741 int (*cb)(uint64_t, size_t, const char *, void *),
1742 void *arg)
1743 {
11fdf7f2
TL
1744 coarse_mono_time start_time;
1745 ceph::timespan elapsed;
7c673cae
FG
1746
1747 ldout(ictx->cct, 20) << "read_iterate " << ictx << " off = " << off
1748 << " len = " << len << dendl;
1749
1750 int r = ictx->state->refresh_if_required();
1751 if (r < 0)
1752 return r;
1753
1754 uint64_t mylen = len;
1755 ictx->snap_lock.get_read();
1756 r = clip_io(ictx, off, &mylen);
1757 ictx->snap_lock.put_read();
1758 if (r < 0)
1759 return r;
1760
1761 int64_t total_read = 0;
1762 uint64_t period = ictx->get_stripe_period();
1763 uint64_t left = mylen;
1764
31f18b77 1765 ZTracer::Trace trace;
181888fb 1766 if (ictx->blkin_trace_all) {
31f18b77
FG
1767 trace.init("read_iterate", &ictx->trace_endpoint);
1768 }
1769
7c673cae 1770 RWLock::RLocker owner_locker(ictx->owner_lock);
11fdf7f2 1771 start_time = coarse_mono_clock::now();
7c673cae
FG
1772 while (left > 0) {
1773 uint64_t period_off = off - (off % period);
1774 uint64_t read_len = min(period_off + period - off, left);
1775
1776 bufferlist bl;
1777
1778 C_SaferCond ctx;
1779 auto c = io::AioCompletion::create_and_start(&ctx, ictx,
1780 io::AIO_TYPE_READ);
1781 io::ImageRequest<>::aio_read(ictx, c, {{off, read_len}},
31f18b77 1782 io::ReadResult{&bl}, 0, std::move(trace));
7c673cae
FG
1783
1784 int ret = ctx.wait();
1785 if (ret < 0) {
1786 return ret;
1787 }
1788
1789 r = cb(total_read, ret, bl.c_str(), arg);
1790 if (r < 0) {
1791 return r;
1792 }
1793
1794 total_read += ret;
1795 left -= ret;
1796 off += ret;
1797 }
1798
11fdf7f2 1799 elapsed = coarse_mono_clock::now() - start_time;
7c673cae
FG
1800 ictx->perfcounter->tinc(l_librbd_rd_latency, elapsed);
1801 ictx->perfcounter->inc(l_librbd_rd);
1802 ictx->perfcounter->inc(l_librbd_rd_bytes, mylen);
1803 return total_read;
1804 }
1805
1806 // validate extent against image size; clip to image size if necessary
1807 int clip_io(ImageCtx *ictx, uint64_t off, uint64_t *len)
1808 {
11fdf7f2 1809 ceph_assert(ictx->snap_lock.is_locked());
7c673cae
FG
1810 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
1811 bool snap_exists = ictx->snap_exists;
1812
1813 if (!snap_exists)
1814 return -ENOENT;
1815
1816 // special-case "len == 0" requests: always valid
1817 if (*len == 0)
1818 return 0;
1819
1820 // can't start past end
1821 if (off >= image_size)
1822 return -EINVAL;
1823
1824 // clip requests that extend past end to just end
1825 if ((off + *len) > image_size)
1826 *len = (size_t)(image_size - off);
1827
1828 return 0;
1829 }
1830
11fdf7f2 1831 int invalidate_cache(ImageCtx *ictx)
7c673cae
FG
1832 {
1833 CephContext *cct = ictx->cct;
11fdf7f2 1834 ldout(cct, 20) << "invalidate_cache " << ictx << dendl;
7c673cae
FG
1835
1836 int r = ictx->state->refresh_if_required();
1837 if (r < 0) {
1838 return r;
1839 }
1840
7c673cae
FG
1841 C_SaferCond ctx;
1842 {
1843 RWLock::RLocker owner_locker(ictx->owner_lock);
11fdf7f2 1844 ictx->io_object_dispatcher->invalidate_cache(&ctx);
7c673cae
FG
1845 }
1846 r = ctx.wait();
7c673cae
FG
1847 ictx->perfcounter->inc(l_librbd_invalidate_cache);
1848 return r;
1849 }
1850
1851 int poll_io_events(ImageCtx *ictx, io::AioCompletion **comps, int numcomp)
1852 {
1853 if (numcomp <= 0)
1854 return -EINVAL;
1855 CephContext *cct = ictx->cct;
1856 ldout(cct, 20) << __func__ << " " << ictx << " numcomp = " << numcomp
1857 << dendl;
1858 int i = 0;
1859 Mutex::Locker l(ictx->completed_reqs_lock);
11fdf7f2 1860 numcomp = std::min(numcomp, (int)ictx->completed_reqs.size());
7c673cae 1861 while (i < numcomp) {
7c673cae
FG
1862 comps[i++] = ictx->completed_reqs.front();
1863 ictx->completed_reqs.pop_front();
1864 }
1865 return i;
1866 }
1867
1868 int metadata_get(ImageCtx *ictx, const string &key, string *value)
1869 {
1870 CephContext *cct = ictx->cct;
1871 ldout(cct, 20) << "metadata_get " << ictx << " key=" << key << dendl;
1872
1873 int r = ictx->state->refresh_if_required();
1874 if (r < 0) {
1875 return r;
1876 }
1877
1878 return cls_client::metadata_get(&ictx->md_ctx, ictx->header_oid, key, value);
1879 }
1880
1881 int metadata_list(ImageCtx *ictx, const string &start, uint64_t max, map<string, bufferlist> *pairs)
1882 {
1883 CephContext *cct = ictx->cct;
1884 ldout(cct, 20) << "metadata_list " << ictx << dendl;
1885
1886 int r = ictx->state->refresh_if_required();
1887 if (r < 0) {
1888 return r;
1889 }
1890
1891 return cls_client::metadata_list(&ictx->md_ctx, ictx->header_oid, start, max, pairs);
1892 }
1893
1894 struct C_RBD_Readahead : public Context {
1895 ImageCtx *ictx;
1896 object_t oid;
1897 uint64_t offset;
1898 uint64_t length;
11fdf7f2
TL
1899
1900 bufferlist read_data;
1901 io::ExtentMap extent_map;
1902
7c673cae 1903 C_RBD_Readahead(ImageCtx *ictx, object_t oid, uint64_t offset, uint64_t length)
11fdf7f2
TL
1904 : ictx(ictx), oid(oid), offset(offset), length(length) {
1905 ictx->readahead.inc_pending();
1906 }
1907
7c673cae 1908 void finish(int r) override {
11fdf7f2
TL
1909 ldout(ictx->cct, 20) << "C_RBD_Readahead on " << oid << ": "
1910 << offset << "~" << length << dendl;
7c673cae
FG
1911 ictx->readahead.dec_pending();
1912 }
1913 };
1914
1915 void readahead(ImageCtx *ictx,
1916 const vector<pair<uint64_t,uint64_t> >& image_extents)
1917 {
1918 uint64_t total_bytes = 0;
1919 for (vector<pair<uint64_t,uint64_t> >::const_iterator p = image_extents.begin();
1920 p != image_extents.end();
1921 ++p) {
1922 total_bytes += p->second;
1923 }
11fdf7f2 1924
7c673cae
FG
1925 ictx->md_lock.get_write();
1926 bool abort = ictx->readahead_disable_after_bytes != 0 &&
1927 ictx->total_bytes_read > ictx->readahead_disable_after_bytes;
1928 if (abort) {
1929 ictx->md_lock.put_write();
1930 return;
1931 }
1932 ictx->total_bytes_read += total_bytes;
1933 ictx->snap_lock.get_read();
1934 uint64_t image_size = ictx->get_image_size(ictx->snap_id);
11fdf7f2 1935 auto snap_id = ictx->snap_id;
7c673cae
FG
1936 ictx->snap_lock.put_read();
1937 ictx->md_lock.put_write();
11fdf7f2 1938
7c673cae
FG
1939 pair<uint64_t, uint64_t> readahead_extent = ictx->readahead.update(image_extents, image_size);
1940 uint64_t readahead_offset = readahead_extent.first;
1941 uint64_t readahead_length = readahead_extent.second;
1942
1943 if (readahead_length > 0) {
1944 ldout(ictx->cct, 20) << "(readahead logical) " << readahead_offset << "~" << readahead_length << dendl;
1945 map<object_t,vector<ObjectExtent> > readahead_object_extents;
1946 Striper::file_to_extents(ictx->cct, ictx->format_string, &ictx->layout,
1947 readahead_offset, readahead_length, 0, readahead_object_extents);
1948 for (map<object_t,vector<ObjectExtent> >::iterator p = readahead_object_extents.begin(); p != readahead_object_extents.end(); ++p) {
1949 for (vector<ObjectExtent>::iterator q = p->second.begin(); q != p->second.end(); ++q) {
1950 ldout(ictx->cct, 20) << "(readahead) oid " << q->oid << " " << q->offset << "~" << q->length << dendl;
1951
11fdf7f2
TL
1952 auto req_comp = new C_RBD_Readahead(ictx, q->oid, q->offset,
1953 q->length);
1954 auto req = io::ObjectDispatchSpec::create_read(
1955 ictx, io::OBJECT_DISPATCH_LAYER_NONE, q->oid.name, q->objectno,
1956 q->offset, q->length, snap_id, 0, {}, &req_comp->read_data,
1957 &req_comp->extent_map, req_comp);
1958 req->send();
7c673cae
FG
1959 }
1960 }
1961 ictx->perfcounter->inc(l_librbd_readahead);
1962 ictx->perfcounter->inc(l_librbd_readahead_bytes, readahead_length);
1963 }
1964 }
1965
11fdf7f2
TL
1966 int list_watchers(ImageCtx *ictx,
1967 std::list<librbd::image_watcher_t> &watchers)
1968 {
1969 int r;
1970 std::string header_oid;
1971 std::list<obj_watch_t> obj_watchers;
1972
1973 if (ictx->old_format) {
1974 header_oid = util::old_header_name(ictx->name);
1975 } else {
1976 header_oid = util::header_name(ictx->id);
1977 }
1978
1979 r = ictx->md_ctx.list_watchers(header_oid, &obj_watchers);
1980 if (r < 0) {
1981 return r;
1982 }
1983
1984 for (auto i = obj_watchers.begin(); i != obj_watchers.end(); ++i) {
1985 librbd::image_watcher_t watcher;
1986 watcher.addr = i->addr;
1987 watcher.id = i->watcher_id;
1988 watcher.cookie = i->cookie;
1989
1990 watchers.push_back(watcher);
1991 }
1992
1993 return 0;
1994 }
1995
1996}
1997
1998std::ostream &operator<<(std::ostream &os, const librbd::ImageOptions &opts) {
1999 os << "[";
2000
2001 const char *delimiter = "";
2002 for (auto &i : librbd::IMAGE_OPTIONS_TYPE_MAPPING) {
2003 if (i.second == librbd::STR) {
2004 std::string val;
2005 if (opts.get(i.first, &val) == 0) {
2006 os << delimiter << librbd::image_option_name(i.first) << "=" << val;
2007 delimiter = ", ";
2008 }
2009 } else if (i.second == librbd::UINT64) {
2010 uint64_t val;
2011 if (opts.get(i.first, &val) == 0) {
2012 os << delimiter << librbd::image_option_name(i.first) << "=" << val;
2013 delimiter = ", ";
2014 }
2015 }
2016 }
7c673cae 2017
11fdf7f2 2018 os << "]";
7c673cae 2019
11fdf7f2 2020 return os;
7c673cae 2021}