1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "include/int_types.h"
8 #include "include/types.h"
9 #include "include/uuid.h"
10 #include "common/ceph_context.h"
11 #include "common/dout.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "common/event_socket.h"
15 #include "common/perf_counters.h"
16 #include "osdc/Striper.h"
17 #include "include/stringify.h"
19 #include "cls/lock/cls_lock_client.h"
20 #include "cls/rbd/cls_rbd.h"
21 #include "cls/rbd/cls_rbd_types.h"
22 #include "cls/rbd/cls_rbd_client.h"
23 #include "cls/journal/cls_journal_types.h"
24 #include "cls/journal/cls_journal_client.h"
26 #include "librbd/AsioEngine.h"
27 #include "librbd/ExclusiveLock.h"
28 #include "librbd/ImageCtx.h"
29 #include "librbd/ImageState.h"
30 #include "librbd/internal.h"
31 #include "librbd/Journal.h"
32 #include "librbd/ObjectMap.h"
33 #include "librbd/Operations.h"
34 #include "librbd/PluginRegistry.h"
35 #include "librbd/Types.h"
36 #include "librbd/Utils.h"
37 #include "librbd/api/Config.h"
38 #include "librbd/api/Image.h"
39 #include "librbd/api/Io.h"
40 #include "librbd/cache/Utils.h"
41 #include "librbd/exclusive_lock/AutomaticPolicy.h"
42 #include "librbd/exclusive_lock/StandardPolicy.h"
43 #include "librbd/deep_copy/MetadataCopyRequest.h"
44 #include "librbd/image/CloneRequest.h"
45 #include "librbd/image/CreateRequest.h"
46 #include "librbd/image/GetMetadataRequest.h"
47 #include "librbd/image/Types.h"
48 #include "librbd/io/AioCompletion.h"
49 #include "librbd/io/ImageDispatchSpec.h"
50 #include "librbd/io/ImageDispatcherInterface.h"
51 #include "librbd/io/ObjectDispatcherInterface.h"
52 #include "librbd/io/ObjectRequest.h"
53 #include "librbd/io/ReadResult.h"
54 #include "librbd/journal/Types.h"
55 #include "librbd/managed_lock/Types.h"
56 #include "librbd/mirror/EnableRequest.h"
57 #include "librbd/operation/TrimRequest.h"
59 #include "journal/Journaler.h"
61 #include <boost/scope_exit.hpp>
62 #include <boost/variant.hpp>
63 #include "include/ceph_assert.h"
65 #define dout_subsys ceph_subsys_rbd
67 #define dout_prefix *_dout << "librbd: "
69 #define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
71 using std::istringstream
;
77 // list binds to list() here, so std::list is explicitly used below
79 using ceph::bufferlist
;
80 using librados::snap_t
;
81 using librados::IoCtx
;
82 using librados::Rados
;
88 int validate_pool(IoCtx
&io_ctx
, CephContext
*cct
) {
89 if (!cct
->_conf
.get_val
<bool>("rbd_validate_pool")) {
93 int r
= io_ctx
.stat(RBD_DIRECTORY
, NULL
, NULL
);
96 } else if (r
< 0 && r
!= -ENOENT
) {
97 lderr(cct
) << "failed to stat RBD directory: " << cpp_strerror(r
) << dendl
;
101 // allocate a self-managed snapshot id if this a new pool to force
102 // self-managed snapshot mode
104 r
= io_ctx
.selfmanaged_snap_create(&snap_id
);
106 lderr(cct
) << "pool not configured for self-managed RBD snapshot support"
110 lderr(cct
) << "failed to allocate self-managed snapshot: "
111 << cpp_strerror(r
) << dendl
;
115 r
= io_ctx
.selfmanaged_snap_remove(snap_id
);
117 lderr(cct
) << "failed to release self-managed snapshot " << snap_id
118 << ": " << cpp_strerror(r
) << dendl
;
123 } // anonymous namespace
125 int detect_format(IoCtx
&io_ctx
, const string
&name
,
126 bool *old_format
, uint64_t *size
)
128 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
131 int r
= io_ctx
.stat(util::old_header_name(name
), size
, NULL
);
135 r
= io_ctx
.stat(util::id_obj_name(name
), size
, NULL
);
142 ldout(cct
, 20) << "detect format of " << name
<< " : "
143 << (old_format
? (*old_format
? "old" : "new") :
144 "don't care") << dendl
;
148 bool has_parent(int64_t parent_pool_id
, uint64_t off
, uint64_t overlap
)
150 return (parent_pool_id
!= -1 && off
<= overlap
);
153 void init_rbd_header(struct rbd_obj_header_ondisk
& ondisk
,
154 uint64_t size
, int order
, uint64_t bid
)
156 uint32_t hi
= bid
>> 32;
157 uint32_t lo
= bid
& 0xFFFFFFFF;
158 uint32_t extra
= rand() % 0xFFFFFFFF;
159 // FIPS zeroization audit 20191117: this memset is not security related.
160 memset(&ondisk
, 0, sizeof(ondisk
));
162 memcpy(&ondisk
.text
, RBD_HEADER_TEXT
, sizeof(RBD_HEADER_TEXT
));
163 memcpy(&ondisk
.signature
, RBD_HEADER_SIGNATURE
,
164 sizeof(RBD_HEADER_SIGNATURE
));
165 memcpy(&ondisk
.version
, RBD_HEADER_VERSION
, sizeof(RBD_HEADER_VERSION
));
167 snprintf(ondisk
.block_name
, sizeof(ondisk
.block_name
), "rb.%x.%x.%x",
170 ondisk
.image_size
= size
;
171 ondisk
.options
.order
= order
;
172 ondisk
.options
.crypt_type
= RBD_CRYPT_NONE
;
173 ondisk
.options
.comp_type
= RBD_COMP_NONE
;
175 ondisk
.snap_count
= 0;
177 ondisk
.snap_names_len
= 0;
180 void image_info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
182 int obj_order
= ictx
->order
;
184 std::shared_lock locker
{ictx
->image_lock
};
185 info
.size
= ictx
->get_area_size(io::ImageArea::DATA
);
187 info
.obj_size
= 1ULL << obj_order
;
188 info
.num_objs
= Striper::get_num_objects(ictx
->layout
, info
.size
);
189 info
.order
= obj_order
;
190 strncpy(info
.block_name_prefix
, ictx
->object_prefix
.c_str(),
191 RBD_MAX_BLOCK_NAME_SIZE
);
192 info
.block_name_prefix
[RBD_MAX_BLOCK_NAME_SIZE
- 1] = '\0';
194 // clear deprecated fields
195 info
.parent_pool
= -1L;
196 info
.parent_name
[0] = '\0';
199 uint64_t oid_to_object_no(const string
& oid
, const string
& object_prefix
)
201 istringstream
iss(oid
);
202 // skip object prefix and separator
203 iss
.ignore(object_prefix
.length() + 1);
205 iss
>> std::hex
>> num
;
209 int read_header_bl(IoCtx
& io_ctx
, const string
& header_oid
,
210 bufferlist
& header
, uint64_t *ver
)
214 #define READ_SIZE 4096
217 r
= io_ctx
.read(header_oid
, bl
, READ_SIZE
, off
);
220 header
.claim_append(bl
);
222 } while (r
== READ_SIZE
);
224 static_assert(sizeof(RBD_HEADER_TEXT
) == sizeof(RBD_MIGRATE_HEADER_TEXT
),
225 "length of rbd headers must be the same");
227 if (header
.length() < sizeof(RBD_HEADER_TEXT
) ||
228 (memcmp(RBD_HEADER_TEXT
, header
.c_str(),
229 sizeof(RBD_HEADER_TEXT
)) != 0 &&
230 memcmp(RBD_MIGRATE_HEADER_TEXT
, header
.c_str(),
231 sizeof(RBD_MIGRATE_HEADER_TEXT
)) != 0)) {
232 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
233 lderr(cct
) << "unrecognized header format" << dendl
;
238 *ver
= io_ctx
.get_last_version();
243 int read_header(IoCtx
& io_ctx
, const string
& header_oid
,
244 struct rbd_obj_header_ondisk
*header
, uint64_t *ver
)
246 bufferlist header_bl
;
247 int r
= read_header_bl(io_ctx
, header_oid
, header_bl
, ver
);
250 if (header_bl
.length() < (int)sizeof(*header
))
252 memcpy(header
, header_bl
.c_str(), sizeof(*header
));
257 int tmap_set(IoCtx
& io_ctx
, const string
& imgname
)
259 bufferlist cmdbl
, emptybl
;
260 __u8 c
= CEPH_OSD_TMAP_SET
;
262 encode(imgname
, cmdbl
);
263 encode(emptybl
, cmdbl
);
264 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
267 int tmap_rm(IoCtx
& io_ctx
, const string
& imgname
)
270 __u8 c
= CEPH_OSD_TMAP_RM
;
272 encode(imgname
, cmdbl
);
273 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
276 typedef boost::variant
<std::string
,uint64_t> image_option_value_t
;
277 typedef std::map
<int,image_option_value_t
> image_options_t
;
278 typedef std::shared_ptr
<image_options_t
> image_options_ref
;
280 enum image_option_type_t
{
285 const std::map
<int, image_option_type_t
> IMAGE_OPTIONS_TYPE_MAPPING
= {
286 {RBD_IMAGE_OPTION_FORMAT
, UINT64
},
287 {RBD_IMAGE_OPTION_FEATURES
, UINT64
},
288 {RBD_IMAGE_OPTION_ORDER
, UINT64
},
289 {RBD_IMAGE_OPTION_STRIPE_UNIT
, UINT64
},
290 {RBD_IMAGE_OPTION_STRIPE_COUNT
, UINT64
},
291 {RBD_IMAGE_OPTION_JOURNAL_ORDER
, UINT64
},
292 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
, UINT64
},
293 {RBD_IMAGE_OPTION_JOURNAL_POOL
, STR
},
294 {RBD_IMAGE_OPTION_FEATURES_SET
, UINT64
},
295 {RBD_IMAGE_OPTION_FEATURES_CLEAR
, UINT64
},
296 {RBD_IMAGE_OPTION_DATA_POOL
, STR
},
297 {RBD_IMAGE_OPTION_FLATTEN
, UINT64
},
298 {RBD_IMAGE_OPTION_CLONE_FORMAT
, UINT64
},
299 {RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
, UINT64
},
302 std::string
image_option_name(int optname
) {
304 case RBD_IMAGE_OPTION_FORMAT
:
306 case RBD_IMAGE_OPTION_FEATURES
:
308 case RBD_IMAGE_OPTION_ORDER
:
310 case RBD_IMAGE_OPTION_STRIPE_UNIT
:
311 return "stripe_unit";
312 case RBD_IMAGE_OPTION_STRIPE_COUNT
:
313 return "stripe_count";
314 case RBD_IMAGE_OPTION_JOURNAL_ORDER
:
315 return "journal_order";
316 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
:
317 return "journal_splay_width";
318 case RBD_IMAGE_OPTION_JOURNAL_POOL
:
319 return "journal_pool";
320 case RBD_IMAGE_OPTION_FEATURES_SET
:
321 return "features_set";
322 case RBD_IMAGE_OPTION_FEATURES_CLEAR
:
323 return "features_clear";
324 case RBD_IMAGE_OPTION_DATA_POOL
:
326 case RBD_IMAGE_OPTION_FLATTEN
:
328 case RBD_IMAGE_OPTION_CLONE_FORMAT
:
329 return "clone_format";
330 case RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
:
331 return "mirror_image_mode";
333 return "unknown (" + stringify(optname
) + ")";
337 void image_options_create(rbd_image_options_t
* opts
)
339 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
341 *opts
= static_cast<rbd_image_options_t
>(opts_
);
344 void image_options_create_ref(rbd_image_options_t
* opts
,
345 rbd_image_options_t orig
)
347 image_options_ref
* orig_
= static_cast<image_options_ref
*>(orig
);
348 image_options_ref
* opts_
= new image_options_ref(*orig_
);
350 *opts
= static_cast<rbd_image_options_t
>(opts_
);
353 void image_options_copy(rbd_image_options_t
* opts
,
354 const ImageOptions
&orig
)
356 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
358 *opts
= static_cast<rbd_image_options_t
>(opts_
);
362 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
365 if (orig
.get(i
.first
, &str_val
) == 0) {
366 image_options_set(*opts
, i
.first
, str_val
);
370 if (orig
.get(i
.first
, &uint64_val
) == 0) {
371 image_options_set(*opts
, i
.first
, uint64_val
);
378 void image_options_destroy(rbd_image_options_t opts
)
380 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
385 int image_options_set(rbd_image_options_t opts
, int optname
,
386 const std::string
& optval
)
388 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
390 std::map
<int, image_option_type_t
>::const_iterator i
=
391 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
393 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
397 (*opts_
->get())[optname
] = optval
;
401 int image_options_set(rbd_image_options_t opts
, int optname
, uint64_t optval
)
403 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
405 std::map
<int, image_option_type_t
>::const_iterator i
=
406 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
408 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
412 (*opts_
->get())[optname
] = optval
;
416 int image_options_get(rbd_image_options_t opts
, int optname
,
419 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
421 std::map
<int, image_option_type_t
>::const_iterator i
=
422 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
424 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
428 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
430 if (j
== (*opts_
)->end()) {
434 *optval
= boost::get
<std::string
>(j
->second
);
438 int image_options_get(rbd_image_options_t opts
, int optname
, uint64_t* optval
)
440 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
442 std::map
<int, image_option_type_t
>::const_iterator i
=
443 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
445 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
449 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
451 if (j
== (*opts_
)->end()) {
455 *optval
= boost::get
<uint64_t>(j
->second
);
459 int image_options_is_set(rbd_image_options_t opts
, int optname
,
462 if (IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
) ==
463 IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
467 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
468 *is_set
= ((*opts_
)->find(optname
) != (*opts_
)->end());
472 int image_options_unset(rbd_image_options_t opts
, int optname
)
474 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
476 std::map
<int, image_option_type_t
>::const_iterator i
=
477 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
479 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
480 ceph_assert((*opts_
)->find(optname
) == (*opts_
)->end());
484 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
486 if (j
== (*opts_
)->end()) {
494 void image_options_clear(rbd_image_options_t opts
)
496 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
501 bool image_options_is_empty(rbd_image_options_t opts
)
503 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
505 return (*opts_
)->empty();
508 int create_v1(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
, int order
)
510 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
512 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< " name = " << imgname
513 << " size = " << size
<< " order = " << order
<< dendl
;
514 int r
= validate_pool(io_ctx
, cct
);
519 if (!io_ctx
.get_namespace().empty()) {
520 lderr(cct
) << "attempting to add v1 image to namespace" << dendl
;
524 ldout(cct
, 2) << "adding rbd image to directory..." << dendl
;
525 r
= tmap_set(io_ctx
, imgname
);
527 lderr(cct
) << "error adding image to directory: " << cpp_strerror(r
)
533 uint64_t bid
= rados
.get_instance_id();
535 ldout(cct
, 2) << "creating rbd image..." << dendl
;
536 struct rbd_obj_header_ondisk header
;
537 init_rbd_header(header
, size
, order
, bid
);
540 bl
.append((const char *)&header
, sizeof(header
));
542 string header_oid
= util::old_header_name(imgname
);
543 r
= io_ctx
.write(header_oid
, bl
, bl
.length(), 0);
545 lderr(cct
) << "Error writing image header: " << cpp_strerror(r
)
547 int remove_r
= tmap_rm(io_ctx
, imgname
);
549 lderr(cct
) << "Could not remove image from directory after "
550 << "header creation failed: "
551 << cpp_strerror(remove_r
) << dendl
;
556 ldout(cct
, 2) << "done." << dendl
;
560 int create(librados::IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
563 uint64_t order_
= *order
;
566 int r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
569 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
571 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
572 ceph_assert(r1
== 0);
578 int create(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
579 bool old_format
, uint64_t features
, int *order
,
580 uint64_t stripe_unit
, uint64_t stripe_count
)
585 uint64_t order_
= *order
;
586 uint64_t format
= old_format
? 1 : 2;
590 r
= opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
592 r
= opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
594 r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
596 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
598 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
601 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
603 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
604 ceph_assert(r1
== 0);
610 int create(IoCtx
& io_ctx
, const std::string
&image_name
,
611 const std::string
&image_id
, uint64_t size
,
613 const std::string
&non_primary_global_image_id
,
614 const std::string
&primary_mirror_uuid
,
615 bool skip_mirror_enable
)
617 std::string
id(image_id
);
619 id
= util::generate_image_id(io_ctx
);
622 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
624 if (opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &option
) == 0) {
625 lderr(cct
) << "create does not support 'flatten' image option" << dendl
;
628 if (opts
.get(RBD_IMAGE_OPTION_CLONE_FORMAT
, &option
) == 0) {
629 lderr(cct
) << "create does not support 'clone_format' image option"
634 ldout(cct
, 10) << __func__
<< " name=" << image_name
<< ", "
635 << "id= " << id
<< ", "
636 << "size=" << size
<< ", opts=" << opts
<< dendl
;
639 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0)
640 format
= cct
->_conf
.get_val
<uint64_t>("rbd_default_format");
641 bool old_format
= format
== 1;
643 // make sure it doesn't already exist, in either format
644 int r
= detect_format(io_ctx
, image_name
, NULL
, NULL
);
647 lderr(cct
) << "Could not tell if " << image_name
<< " already exists"
651 lderr(cct
) << "rbd image " << image_name
<< " already exists" << dendl
;
656 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0 || order
== 0) {
657 order
= cct
->_conf
.get_val
<uint64_t>("rbd_default_order");
659 r
= image::CreateRequest
<>::validate_order(cct
, order
);
665 if ( !getenv("RBD_FORCE_ALLOW_V1") ) {
666 lderr(cct
) << "Format 1 image creation unsupported. " << dendl
;
669 lderr(cct
) << "Forced V1 image creation. " << dendl
;
670 r
= create_v1(io_ctx
, image_name
.c_str(), size
, order
);
672 AsioEngine
asio_engine(io_ctx
);
674 ConfigProxy config
{cct
->_conf
};
675 api::Config
<>::apply_pool_overrides(io_ctx
, &config
);
677 uint32_t create_flags
= 0U;
678 uint64_t mirror_image_mode
= RBD_MIRROR_IMAGE_MODE_JOURNAL
;
679 if (skip_mirror_enable
) {
680 create_flags
= image::CREATE_FLAG_SKIP_MIRROR_ENABLE
;
681 } else if (opts
.get(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
,
682 &mirror_image_mode
) == 0) {
683 create_flags
= image::CREATE_FLAG_FORCE_MIRROR_ENABLE
;
687 image::CreateRequest
<> *req
= image::CreateRequest
<>::create(
688 config
, io_ctx
, image_name
, id
, size
, opts
, create_flags
,
689 static_cast<cls::rbd::MirrorImageMode
>(mirror_image_mode
),
690 non_primary_global_image_id
, primary_mirror_uuid
,
691 asio_engine
.get_work_queue(), &cond
);
697 int r1
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
698 ceph_assert(r1
== 0);
704 * Parent may be in different pool, hence different IoCtx
706 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
707 IoCtx
& c_ioctx
, const char *c_name
,
708 uint64_t features
, int *c_order
,
709 uint64_t stripe_unit
, int stripe_count
)
711 uint64_t order
= *c_order
;
714 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
715 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
716 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
717 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
719 int r
= clone(p_ioctx
, nullptr, p_name
, CEPH_NOSNAP
, p_snap_name
,
720 c_ioctx
, nullptr, c_name
, opts
, "", "");
721 opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
);
726 int clone(IoCtx
& p_ioctx
, const char *p_id
, const char *p_name
,
727 uint64_t p_snap_id
, const char *p_snap_name
, IoCtx
& c_ioctx
,
728 const char *c_id
, const char *c_name
, ImageOptions
& c_opts
,
729 const std::string
&non_primary_global_image_id
,
730 const std::string
&primary_mirror_uuid
)
732 CephContext
*cct
= (CephContext
*)p_ioctx
.cct();
733 ldout(cct
, 10) << __func__
734 << " p_id=" << (p_id
?: "")
735 << ", p_name=" << (p_name
?: "")
736 << ", p_snap_id=" << p_snap_id
737 << ", p_snap_name=" << (p_snap_name
?: "")
738 << ", c_id=" << (c_id
?: "")
739 << ", c_name=" << c_name
740 << ", c_opts=" << c_opts
741 << ", non_primary_global_image_id=" << non_primary_global_image_id
742 << ", primary_mirror_uuid=" << primary_mirror_uuid
745 if (((p_id
== nullptr) ^ (p_name
== nullptr)) == 0) {
746 lderr(cct
) << "must specify either parent image id or parent image name"
750 if (((p_snap_id
== CEPH_NOSNAP
) ^ (p_snap_name
== nullptr)) == 0) {
751 lderr(cct
) << "must specify either parent snap id or parent snap name"
757 if (c_opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &flatten
) == 0) {
758 lderr(cct
) << "clone does not support 'flatten' image option" << dendl
;
763 std::string parent_id
;
764 if (p_id
== nullptr) {
765 r
= cls_client::dir_get_id(&p_ioctx
, RBD_DIRECTORY
, p_name
,
769 lderr(cct
) << "failed to retrieve parent image id: "
770 << cpp_strerror(r
) << dendl
;
778 std::string clone_id
;
779 if (c_id
== nullptr) {
780 clone_id
= util::generate_image_id(c_ioctx
);
785 ldout(cct
, 10) << __func__
<< " parent_id=" << parent_id
786 << ", clone_id=" << clone_id
<< dendl
;
788 ConfigProxy config
{reinterpret_cast<CephContext
*>(c_ioctx
.cct())->_conf
};
789 api::Config
<>::apply_pool_overrides(c_ioctx
, &config
);
791 AsioEngine
asio_engine(p_ioctx
);
794 auto *req
= image::CloneRequest
<>::create(
795 config
, p_ioctx
, parent_id
, (p_snap_name
?: ""),
796 {cls::rbd::UserSnapshotNamespace
{}}, p_snap_id
, c_ioctx
, c_name
,
797 clone_id
, c_opts
, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL
,
798 non_primary_global_image_id
, primary_mirror_uuid
,
799 asio_engine
.get_work_queue(), &cond
);
810 int rename(IoCtx
& io_ctx
, const char *srcname
, const char *dstname
)
812 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
813 ldout(cct
, 20) << "rename " << &io_ctx
<< " " << srcname
<< " -> "
816 ImageCtx
*ictx
= new ImageCtx(srcname
, "", "", io_ctx
, false);
817 int r
= ictx
->state
->open(0);
819 lderr(cct
) << "error opening source image: " << cpp_strerror(r
) << dendl
;
822 BOOST_SCOPE_EXIT((ictx
)) {
823 ictx
->state
->close();
824 } BOOST_SCOPE_EXIT_END
826 return ictx
->operations
->rename(dstname
);
829 int info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
831 ldout(ictx
->cct
, 20) << "info " << ictx
<< dendl
;
833 int r
= ictx
->state
->refresh_if_required();
837 image_info(ictx
, info
, infosize
);
841 int get_old_format(ImageCtx
*ictx
, uint8_t *old
)
843 int r
= ictx
->state
->refresh_if_required();
846 *old
= ictx
->old_format
;
850 int get_size(ImageCtx
*ictx
, uint64_t *size
)
852 int r
= ictx
->state
->refresh_if_required();
855 std::shared_lock l2
{ictx
->image_lock
};
856 *size
= ictx
->get_area_size(io::ImageArea::DATA
);
860 int get_features(ImageCtx
*ictx
, uint64_t *features
)
862 int r
= ictx
->state
->refresh_if_required();
865 std::shared_lock l
{ictx
->image_lock
};
866 *features
= ictx
->features
;
870 int get_overlap(ImageCtx
*ictx
, uint64_t *overlap
)
872 int r
= ictx
->state
->refresh_if_required();
876 std::shared_lock image_locker
{ictx
->image_lock
};
877 uint64_t raw_overlap
;
878 r
= ictx
->get_parent_overlap(ictx
->snap_id
, &raw_overlap
);
882 auto _overlap
= ictx
->reduce_parent_overlap(raw_overlap
, false);
883 *overlap
= (_overlap
.second
== io::ImageArea::DATA
? _overlap
.first
: 0);
887 int get_flags(ImageCtx
*ictx
, uint64_t *flags
)
889 int r
= ictx
->state
->refresh_if_required();
894 std::shared_lock l2
{ictx
->image_lock
};
895 return ictx
->get_flags(ictx
->snap_id
, flags
);
898 int set_image_notification(ImageCtx
*ictx
, int fd
, int type
)
900 CephContext
*cct
= ictx
->cct
;
901 ldout(cct
, 20) << __func__
<< " " << ictx
<< " fd " << fd
<< " type" << type
<< dendl
;
903 int r
= ictx
->state
->refresh_if_required();
908 if (ictx
->event_socket
.is_valid())
910 return ictx
->event_socket
.init(fd
, type
);
913 int is_exclusive_lock_owner(ImageCtx
*ictx
, bool *is_owner
)
915 CephContext
*cct
= ictx
->cct
;
916 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
919 std::shared_lock owner_locker
{ictx
->owner_lock
};
920 if (ictx
->exclusive_lock
== nullptr) {
924 // might have been blocklisted by peer -- ensure we still own
925 // the lock by pinging the OSD
926 int r
= ictx
->exclusive_lock
->assert_header_locked();
927 if (r
== -EBUSY
|| r
== -ENOENT
) {
937 int lock_acquire(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
)
939 CephContext
*cct
= ictx
->cct
;
940 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
941 << "lock_mode=" << lock_mode
<< dendl
;
943 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
947 C_SaferCond lock_ctx
;
949 std::unique_lock l
{ictx
->owner_lock
};
951 if (ictx
->exclusive_lock
== nullptr) {
952 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
956 if (ictx
->get_exclusive_lock_policy()->may_auto_request_lock()) {
957 ictx
->set_exclusive_lock_policy(
958 new exclusive_lock::StandardPolicy(ictx
));
961 if (ictx
->exclusive_lock
->is_lock_owner()) {
965 ictx
->exclusive_lock
->acquire_lock(&lock_ctx
);
968 int r
= lock_ctx
.wait();
970 lderr(cct
) << "failed to request exclusive lock: " << cpp_strerror(r
)
975 std::shared_lock l
{ictx
->owner_lock
};
976 if (ictx
->exclusive_lock
== nullptr) {
978 } else if (!ictx
->exclusive_lock
->is_lock_owner()) {
979 lderr(cct
) << "failed to acquire exclusive lock" << dendl
;
980 return ictx
->exclusive_lock
->get_unlocked_op_error();
986 int lock_release(ImageCtx
*ictx
)
988 CephContext
*cct
= ictx
->cct
;
989 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
991 C_SaferCond lock_ctx
;
993 std::unique_lock l
{ictx
->owner_lock
};
995 if (ictx
->exclusive_lock
== nullptr ||
996 !ictx
->exclusive_lock
->is_lock_owner()) {
997 lderr(cct
) << "not exclusive lock owner" << dendl
;
1001 ictx
->exclusive_lock
->release_lock(&lock_ctx
);
1004 int r
= lock_ctx
.wait();
1006 lderr(cct
) << "failed to release exclusive lock: " << cpp_strerror(r
)
1013 int lock_get_owners(ImageCtx
*ictx
, rbd_lock_mode_t
*lock_mode
,
1014 std::list
<std::string
> *lock_owners
)
1016 CephContext
*cct
= ictx
->cct
;
1017 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1019 managed_lock::Locker locker
;
1020 C_SaferCond get_owner_ctx
;
1022 std::shared_lock owner_locker
{ictx
->owner_lock
};
1024 if (ictx
->exclusive_lock
== nullptr) {
1025 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1029 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1032 int r
= get_owner_ctx
.wait();
1036 lderr(cct
) << "failed to determine current lock owner: "
1037 << cpp_strerror(r
) << dendl
;
1041 *lock_mode
= RBD_LOCK_MODE_EXCLUSIVE
;
1042 lock_owners
->clear();
1043 lock_owners
->emplace_back(locker
.address
);
1047 int lock_break(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
,
1048 const std::string
&lock_owner
) {
1049 CephContext
*cct
= ictx
->cct
;
1050 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1051 << "lock_mode=" << lock_mode
<< ", "
1052 << "lock_owner=" << lock_owner
<< dendl
;
1054 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1058 if (ictx
->read_only
) {
1062 managed_lock::Locker locker
;
1063 C_SaferCond get_owner_ctx
;
1065 std::shared_lock l
{ictx
->owner_lock
};
1067 if (ictx
->exclusive_lock
== nullptr) {
1068 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1072 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1074 int r
= get_owner_ctx
.wait();
1078 lderr(cct
) << "failed to determine current lock owner: "
1079 << cpp_strerror(r
) << dendl
;
1083 if (locker
.address
!= lock_owner
) {
1087 C_SaferCond break_ctx
;
1089 std::shared_lock l
{ictx
->owner_lock
};
1091 if (ictx
->exclusive_lock
== nullptr) {
1092 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1096 ictx
->exclusive_lock
->break_lock(locker
, true, &break_ctx
);
1098 r
= break_ctx
.wait();
1102 lderr(cct
) << "failed to break lock: " << cpp_strerror(r
) << dendl
;
1108 int copy(ImageCtx
*src
, IoCtx
& dest_md_ctx
, const char *destname
,
1109 ImageOptions
& opts
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1111 CephContext
*cct
= (CephContext
*)dest_md_ctx
.cct();
1113 if (opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &option
) == 0) {
1114 lderr(cct
) << "copy does not support 'flatten' image option" << dendl
;
1117 if (opts
.get(RBD_IMAGE_OPTION_CLONE_FORMAT
, &option
) == 0) {
1118 lderr(cct
) << "copy does not support 'clone_format' image option"
1123 ldout(cct
, 20) << "copy " << src
->name
1124 << (src
->snap_name
.length() ? "@" + src
->snap_name
: "")
1125 << " -> " << destname
<< " opts = " << opts
<< dendl
;
1127 src
->image_lock
.lock_shared();
1128 uint64_t features
= src
->features
;
1129 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1130 src
->image_lock
.unlock_shared();
1131 uint64_t format
= 2;
1132 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0) {
1133 opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
1135 uint64_t stripe_unit
= src
->stripe_unit
;
1136 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_UNIT
, &stripe_unit
) != 0) {
1137 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
1139 uint64_t stripe_count
= src
->stripe_count
;
1140 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_COUNT
, &stripe_count
) != 0) {
1141 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
1143 uint64_t order
= src
->order
;
1144 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0) {
1145 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
1147 if (opts
.get(RBD_IMAGE_OPTION_FEATURES
, &features
) != 0) {
1148 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
1150 if (features
& ~RBD_FEATURES_ALL
) {
1151 lderr(cct
) << "librbd does not support requested features" << dendl
;
1155 int r
= create(dest_md_ctx
, destname
, "", src_size
, opts
, "", "", false);
1157 lderr(cct
) << "header creation failed" << dendl
;
1160 opts
.set(RBD_IMAGE_OPTION_ORDER
, static_cast<uint64_t>(order
));
1162 ImageCtx
*dest
= new librbd::ImageCtx(destname
, "", nullptr, dest_md_ctx
,
1164 r
= dest
->state
->open(0);
1166 lderr(cct
) << "failed to read newly created header" << dendl
;
1170 r
= copy(src
, dest
, prog_ctx
, sparse_size
);
1172 int close_r
= dest
->state
->close();
1173 if (r
== 0 && close_r
< 0) {
1179 class C_CopyWrite
: public Context
{
1181 C_CopyWrite(bufferlist
*bl
, Context
* ctx
)
1182 : m_bl(bl
), m_ctx(ctx
) {}
1183 void finish(int r
) override
{
1192 class C_CopyRead
: public Context
{
1194 C_CopyRead(SimpleThrottle
*throttle
, ImageCtx
*dest
, uint64_t offset
,
1195 bufferlist
*bl
, size_t sparse_size
)
1196 : m_throttle(throttle
), m_dest(dest
), m_offset(offset
), m_bl(bl
),
1197 m_sparse_size(sparse_size
) {
1198 m_throttle
->start_op();
1200 void finish(int r
) override
{
1202 lderr(m_dest
->cct
) << "error reading from source image at offset "
1203 << m_offset
<< ": " << cpp_strerror(r
) << dendl
;
1205 m_throttle
->end_op(r
);
1208 ceph_assert(m_bl
->length() == (size_t)r
);
1210 if (m_bl
->is_zero()) {
1212 m_throttle
->end_op(r
);
1216 if (!m_sparse_size
) {
1217 m_sparse_size
= (1 << m_dest
->order
);
1220 auto *throttle
= m_throttle
;
1221 auto *end_op_ctx
= new LambdaContext([throttle
](int r
) {
1222 throttle
->end_op(r
);
1224 auto gather_ctx
= new C_Gather(m_dest
->cct
, end_op_ctx
);
1226 m_bl
->rebuild(buffer::ptr_node::create(m_bl
->length()));
1227 size_t write_offset
= 0;
1228 size_t write_length
= 0;
1230 size_t length
= m_bl
->length();
1231 const auto& m_ptr
= m_bl
->front();
1232 while (offset
< length
) {
1233 if (util::calc_sparse_extent(m_ptr
,
1239 bufferlist
*write_bl
= new bufferlist();
1240 write_bl
->push_back(
1241 buffer::ptr_node::create(m_ptr
, write_offset
, write_length
));
1242 Context
*ctx
= new C_CopyWrite(write_bl
, gather_ctx
->new_sub());
1243 auto comp
= io::AioCompletion::create(ctx
);
1245 // coordinate through AIO WQ to ensure lock is acquired if needed
1246 api::Io
<>::aio_write(*m_dest
, comp
, m_offset
+ write_offset
,
1247 write_length
, std::move(*write_bl
),
1248 LIBRADOS_OP_FLAG_FADVISE_DONTNEED
,
1249 std::move(read_trace
));
1250 write_offset
= offset
;
1255 ceph_assert(gather_ctx
->get_sub_created_count() > 0);
1256 gather_ctx
->activate();
1259 ZTracer::Trace read_trace
;
1262 SimpleThrottle
*m_throttle
;
1266 size_t m_sparse_size
;
1269 int copy(ImageCtx
*src
, ImageCtx
*dest
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1271 src
->image_lock
.lock_shared();
1272 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1273 src
->image_lock
.unlock_shared();
1275 dest
->image_lock
.lock_shared();
1276 uint64_t dest_size
= dest
->get_image_size(dest
->snap_id
);
1277 dest
->image_lock
.unlock_shared();
1279 CephContext
*cct
= src
->cct
;
1280 if (dest_size
< src_size
) {
1281 lderr(cct
) << " src size " << src_size
<< " > dest size "
1282 << dest_size
<< dendl
;
1286 // ensure previous writes are visible to dest
1287 C_SaferCond flush_ctx
;
1289 auto aio_comp
= io::AioCompletion::create_and_start(&flush_ctx
, src
,
1290 io::AIO_TYPE_FLUSH
);
1291 auto req
= io::ImageDispatchSpec::create_flush(
1292 *src
, io::IMAGE_DISPATCH_LAYER_INTERNAL_START
,
1293 aio_comp
, io::FLUSH_SOURCE_INTERNAL
, {});
1296 int r
= flush_ctx
.wait();
1302 auto req
= deep_copy::MetadataCopyRequest
<>::create(
1308 lderr(cct
) << "failed to copy metadata: " << cpp_strerror(r
) << dendl
;
1312 ZTracer::Trace trace
;
1313 if (src
->blkin_trace_all
) {
1314 trace
.init("copy", &src
->trace_endpoint
);
1317 SimpleThrottle
throttle(src
->config
.get_val
<uint64_t>("rbd_concurrent_management_ops"), false);
1318 uint64_t period
= src
->get_stripe_period();
1319 unsigned fadvise_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
1320 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
;
1321 uint64_t object_id
= 0;
1322 for (uint64_t offset
= 0; offset
< src_size
; offset
+= period
) {
1323 if (throttle
.pending_error()) {
1324 return throttle
.wait_for_ret();
1328 std::shared_lock image_locker
{src
->image_lock
};
1329 if (src
->object_map
!= nullptr) {
1331 // each period is related to src->stripe_count objects, check them all
1332 for (uint64_t i
=0; i
< src
->stripe_count
; i
++) {
1333 if (object_id
< src
->object_map
->size() &&
1334 src
->object_map
->object_may_exist(object_id
)) {
1342 object_id
+= src
->stripe_count
;
1346 uint64_t len
= std::min(period
, src_size
- offset
);
1347 bufferlist
*bl
= new bufferlist();
1348 auto ctx
= new C_CopyRead(&throttle
, dest
, offset
, bl
, sparse_size
);
1349 auto comp
= io::AioCompletion::create_and_start
<Context
>(
1350 ctx
, src
, io::AIO_TYPE_READ
);
1351 auto req
= io::ImageDispatchSpec::create_read(
1352 *src
, io::IMAGE_DISPATCH_LAYER_NONE
, comp
,
1353 {{offset
, len
}}, io::ImageArea::DATA
, io::ReadResult
{bl
},
1354 src
->get_data_io_context(), fadvise_flags
, 0, trace
);
1356 ctx
->read_trace
= trace
;
1359 prog_ctx
.update_progress(offset
, src_size
);
1362 r
= throttle
.wait_for_ret();
1364 prog_ctx
.update_progress(src_size
, src_size
);
1368 int list_lockers(ImageCtx
*ictx
,
1369 std::list
<locker_t
> *lockers
,
1373 ldout(ictx
->cct
, 20) << "list_locks on image " << ictx
<< dendl
;
1375 int r
= ictx
->state
->refresh_if_required();
1379 std::shared_lock locker
{ictx
->image_lock
};
1381 *exclusive
= ictx
->exclusive_locked
;
1383 *tag
= ictx
->lock_tag
;
1386 map
<rados::cls::lock::locker_id_t
,
1387 rados::cls::lock::locker_info_t
>::const_iterator it
;
1388 for (it
= ictx
->lockers
.begin(); it
!= ictx
->lockers
.end(); ++it
) {
1390 locker
.client
= stringify(it
->first
.locker
);
1391 locker
.cookie
= it
->first
.cookie
;
1392 locker
.address
= it
->second
.addr
.get_legacy_str();
1393 lockers
->push_back(locker
);
1400 int lock(ImageCtx
*ictx
, bool exclusive
, const string
& cookie
,
1403 ldout(ictx
->cct
, 20) << "lock image " << ictx
<< " exclusive=" << exclusive
1404 << " cookie='" << cookie
<< "' tag='" << tag
<< "'"
1407 int r
= ictx
->state
->refresh_if_required();
1412 * If we wanted we could do something more intelligent, like local
1413 * checks that we think we will succeed. But for now, let's not
1414 * duplicate that code.
1417 std::shared_lock locker
{ictx
->image_lock
};
1418 r
= rados::cls::lock::lock(&ictx
->md_ctx
, ictx
->header_oid
, RBD_LOCK_NAME
,
1419 exclusive
? ClsLockType::EXCLUSIVE
: ClsLockType::SHARED
,
1420 cookie
, tag
, "", utime_t(), 0);
1426 ictx
->notify_update();
1430 int unlock(ImageCtx
*ictx
, const string
& cookie
)
1432 ldout(ictx
->cct
, 20) << "unlock image " << ictx
1433 << " cookie='" << cookie
<< "'" << dendl
;
1435 int r
= ictx
->state
->refresh_if_required();
1440 std::shared_lock locker
{ictx
->image_lock
};
1441 r
= rados::cls::lock::unlock(&ictx
->md_ctx
, ictx
->header_oid
,
1442 RBD_LOCK_NAME
, cookie
);
1448 ictx
->notify_update();
1452 int break_lock(ImageCtx
*ictx
, const string
& client
,
1453 const string
& cookie
)
1455 ldout(ictx
->cct
, 20) << "break_lock image " << ictx
<< " client='" << client
1456 << "' cookie='" << cookie
<< "'" << dendl
;
1458 int r
= ictx
->state
->refresh_if_required();
1462 entity_name_t lock_client
;
1463 if (!lock_client
.parse(client
)) {
1464 lderr(ictx
->cct
) << "Unable to parse client '" << client
1469 if (ictx
->config
.get_val
<bool>("rbd_blocklist_on_break_lock")) {
1470 typedef std::map
<rados::cls::lock::locker_id_t
,
1471 rados::cls::lock::locker_info_t
> Lockers
;
1473 ClsLockType lock_type
;
1474 std::string lock_tag
;
1475 r
= rados::cls::lock::get_lock_info(&ictx
->md_ctx
, ictx
->header_oid
,
1476 RBD_LOCK_NAME
, &lockers
, &lock_type
,
1479 lderr(ictx
->cct
) << "unable to retrieve lock info: " << cpp_strerror(r
)
1484 std::string client_address
;
1485 for (Lockers::iterator it
= lockers
.begin();
1486 it
!= lockers
.end(); ++it
) {
1487 if (it
->first
.locker
== lock_client
) {
1488 client_address
= it
->second
.addr
.get_legacy_str();
1492 if (client_address
.empty()) {
1496 librados::Rados
rados(ictx
->md_ctx
);
1497 r
= rados
.blocklist_add(
1499 ictx
->config
.get_val
<uint64_t>("rbd_blocklist_expire_seconds"));
1501 lderr(ictx
->cct
) << "unable to blocklist client: " << cpp_strerror(r
)
1507 r
= rados::cls::lock::break_lock(&ictx
->md_ctx
, ictx
->header_oid
,
1508 RBD_LOCK_NAME
, cookie
, lock_client
);
1511 ictx
->notify_update();
1515 void rbd_ctx_cb(completion_t cb
, void *arg
)
1517 Context
*ctx
= reinterpret_cast<Context
*>(arg
);
1518 auto comp
= reinterpret_cast<io::AioCompletion
*>(cb
);
1519 ctx
->complete(comp
->get_return_value());
1523 int64_t read_iterate(ImageCtx
*ictx
, uint64_t off
, uint64_t len
,
1524 int (*cb
)(uint64_t, size_t, const char *, void *),
1527 coarse_mono_time start_time
;
1528 ceph::timespan elapsed
;
1530 ldout(ictx
->cct
, 20) << "read_iterate " << ictx
<< " off = " << off
1531 << " len = " << len
<< dendl
;
1533 int r
= ictx
->state
->refresh_if_required();
1537 uint64_t mylen
= len
;
1538 ictx
->image_lock
.lock_shared();
1539 r
= clip_io(ictx
, off
, &mylen
, io::ImageArea::DATA
);
1540 ictx
->image_lock
.unlock_shared();
1544 int64_t total_read
= 0;
1545 uint64_t period
= ictx
->get_stripe_period();
1546 uint64_t left
= mylen
;
1548 ZTracer::Trace trace
;
1549 if (ictx
->blkin_trace_all
) {
1550 trace
.init("read_iterate", &ictx
->trace_endpoint
);
1553 std::shared_lock owner_locker
{ictx
->owner_lock
};
1554 start_time
= coarse_mono_clock::now();
1556 uint64_t period_off
= off
- (off
% period
);
1557 uint64_t read_len
= std::min(period_off
+ period
- off
, left
);
1562 auto c
= io::AioCompletion::create_and_start(&ctx
, ictx
,
1564 auto req
= io::ImageDispatchSpec::create_read(
1565 *ictx
, io::IMAGE_DISPATCH_LAYER_NONE
, c
,
1566 {{off
, read_len
}}, io::ImageArea::DATA
, io::ReadResult
{&bl
},
1567 ictx
->get_data_io_context(), 0, 0, trace
);
1570 int ret
= ctx
.wait();
1575 r
= cb(total_read
, ret
, bl
.c_str(), arg
);
1585 elapsed
= coarse_mono_clock::now() - start_time
;
1586 ictx
->perfcounter
->tinc(l_librbd_rd_latency
, elapsed
);
1587 ictx
->perfcounter
->inc(l_librbd_rd
);
1588 ictx
->perfcounter
->inc(l_librbd_rd_bytes
, mylen
);
1592 // validate extent against area size; clip to area size if necessary
1593 int clip_io(ImageCtx
* ictx
, uint64_t off
, uint64_t* len
, io::ImageArea area
) {
1594 ceph_assert(ceph_mutex_is_locked(ictx
->image_lock
));
1596 if (ictx
->snap_id
!= CEPH_NOSNAP
&&
1597 ictx
->get_snap_info(ictx
->snap_id
) == nullptr) {
1601 // special-case "len == 0" requests: always valid
1605 uint64_t area_size
= ictx
->get_area_size(area
);
1607 // can't start past end
1608 if (off
>= area_size
)
1611 // clip requests that extend past end to just end
1612 if ((off
+ *len
) > area_size
)
1613 *len
= (size_t)(area_size
- off
);
1618 int invalidate_cache(ImageCtx
*ictx
)
1620 CephContext
*cct
= ictx
->cct
;
1621 ldout(cct
, 20) << "invalidate_cache " << ictx
<< dendl
;
1623 int r
= ictx
->state
->refresh_if_required();
1630 ictx
->io_image_dispatcher
->invalidate_cache(&ctx
);
1635 ldout(cct
, 20) << "failed to invalidate image cache" << dendl
;
1639 ictx
->perfcounter
->inc(l_librbd_invalidate_cache
);
1641 // Delete writeback cache if it is not initialized
1642 if ((!ictx
->exclusive_lock
||
1643 !ictx
->exclusive_lock
->is_lock_owner()) &&
1644 ictx
->test_features(RBD_FEATURE_DIRTY_CACHE
)) {
1646 ictx
->plugin_registry
->discard(&ctx3
);
1652 int poll_io_events(ImageCtx
*ictx
, io::AioCompletion
**comps
, int numcomp
)
1656 CephContext
*cct
= ictx
->cct
;
1657 ldout(cct
, 20) << __func__
<< " " << ictx
<< " numcomp = " << numcomp
1660 while (i
< numcomp
&& ictx
->event_socket_completions
.pop(comps
[i
])) {
1667 int metadata_get(ImageCtx
*ictx
, const string
&key
, string
*value
)
1669 CephContext
*cct
= ictx
->cct
;
1670 ldout(cct
, 20) << "metadata_get " << ictx
<< " key=" << key
<< dendl
;
1672 int r
= ictx
->state
->refresh_if_required();
1677 return cls_client::metadata_get(&ictx
->md_ctx
, ictx
->header_oid
, key
, value
);
1680 int metadata_list(ImageCtx
*ictx
, const string
&start
, uint64_t max
, map
<string
, bufferlist
> *pairs
)
1682 CephContext
*cct
= ictx
->cct
;
1683 ldout(cct
, 20) << "metadata_list " << ictx
<< dendl
;
1685 int r
= ictx
->state
->refresh_if_required();
1691 auto req
= image::GetMetadataRequest
<>::create(
1692 ictx
->md_ctx
, ictx
->header_oid
, false, "", start
, max
, pairs
, &ctx
);
1698 int list_watchers(ImageCtx
*ictx
,
1699 std::list
<librbd::image_watcher_t
> &watchers
)
1702 std::string header_oid
;
1703 std::list
<obj_watch_t
> obj_watchers
;
1705 if (ictx
->old_format
) {
1706 header_oid
= util::old_header_name(ictx
->name
);
1708 header_oid
= util::header_name(ictx
->id
);
1711 r
= ictx
->md_ctx
.list_watchers(header_oid
, &obj_watchers
);
1717 for (auto i
= obj_watchers
.begin(); i
!= obj_watchers
.end(); ++i
) {
1718 librbd::image_watcher_t watcher
;
1719 watcher
.addr
= i
->addr
;
1720 watcher
.id
= i
->watcher_id
;
1721 watcher
.cookie
= i
->cookie
;
1723 watchers
.push_back(watcher
);
1731 std::ostream
&operator<<(std::ostream
&os
, const librbd::ImageOptions
&opts
) {
1734 const char *delimiter
= "";
1735 for (auto &i
: librbd::IMAGE_OPTIONS_TYPE_MAPPING
) {
1736 if (i
.second
== librbd::STR
) {
1738 if (opts
.get(i
.first
, &val
) == 0) {
1739 os
<< delimiter
<< librbd::image_option_name(i
.first
) << "=" << val
;
1742 } else if (i
.second
== librbd::UINT64
) {
1744 if (opts
.get(i
.first
, &val
) == 0) {
1745 os
<< delimiter
<< librbd::image_option_name(i
.first
) << "=" << val
;