1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "include/int_types.h"
8 #include "include/types.h"
9 #include "include/uuid.h"
10 #include "common/ceph_context.h"
11 #include "common/dout.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "common/event_socket.h"
15 #include "common/perf_counters.h"
16 #include "osdc/Striper.h"
17 #include "include/stringify.h"
19 #include "cls/lock/cls_lock_client.h"
20 #include "cls/rbd/cls_rbd.h"
21 #include "cls/rbd/cls_rbd_types.h"
22 #include "cls/rbd/cls_rbd_client.h"
23 #include "cls/journal/cls_journal_types.h"
24 #include "cls/journal/cls_journal_client.h"
26 #include "librbd/AsioEngine.h"
27 #include "librbd/ExclusiveLock.h"
28 #include "librbd/ImageCtx.h"
29 #include "librbd/ImageState.h"
30 #include "librbd/internal.h"
31 #include "librbd/Journal.h"
32 #include "librbd/ObjectMap.h"
33 #include "librbd/Operations.h"
34 #include "librbd/PluginRegistry.h"
35 #include "librbd/Types.h"
36 #include "librbd/Utils.h"
37 #include "librbd/api/Config.h"
38 #include "librbd/api/Image.h"
39 #include "librbd/api/Io.h"
40 #include "librbd/cache/Utils.h"
41 #include "librbd/exclusive_lock/AutomaticPolicy.h"
42 #include "librbd/exclusive_lock/StandardPolicy.h"
43 #include "librbd/deep_copy/MetadataCopyRequest.h"
44 #include "librbd/image/CloneRequest.h"
45 #include "librbd/image/CreateRequest.h"
46 #include "librbd/image/GetMetadataRequest.h"
47 #include "librbd/image/Types.h"
48 #include "librbd/io/AioCompletion.h"
49 #include "librbd/io/ImageDispatchSpec.h"
50 #include "librbd/io/ImageDispatcherInterface.h"
51 #include "librbd/io/ObjectDispatcherInterface.h"
52 #include "librbd/io/ObjectRequest.h"
53 #include "librbd/io/ReadResult.h"
54 #include "librbd/journal/Types.h"
55 #include "librbd/managed_lock/Types.h"
56 #include "librbd/mirror/EnableRequest.h"
57 #include "librbd/operation/TrimRequest.h"
59 #include "journal/Journaler.h"
61 #include <boost/scope_exit.hpp>
62 #include <boost/variant.hpp>
63 #include "include/ceph_assert.h"
65 #define dout_subsys ceph_subsys_rbd
67 #define dout_prefix *_dout << "librbd: "
69 #define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
71 using std::istringstream
;
77 // list binds to list() here, so std::list is explicitly used below
79 using ceph::bufferlist
;
80 using librados::snap_t
;
81 using librados::IoCtx
;
82 using librados::Rados
;
88 int validate_pool(IoCtx
&io_ctx
, CephContext
*cct
) {
89 if (!cct
->_conf
.get_val
<bool>("rbd_validate_pool")) {
93 int r
= io_ctx
.stat(RBD_DIRECTORY
, NULL
, NULL
);
96 } else if (r
< 0 && r
!= -ENOENT
) {
97 lderr(cct
) << "failed to stat RBD directory: " << cpp_strerror(r
) << dendl
;
101 // allocate a self-managed snapshot id if this a new pool to force
102 // self-managed snapshot mode
104 r
= io_ctx
.selfmanaged_snap_create(&snap_id
);
106 lderr(cct
) << "pool not configured for self-managed RBD snapshot support"
110 lderr(cct
) << "failed to allocate self-managed snapshot: "
111 << cpp_strerror(r
) << dendl
;
115 r
= io_ctx
.selfmanaged_snap_remove(snap_id
);
117 lderr(cct
) << "failed to release self-managed snapshot " << snap_id
118 << ": " << cpp_strerror(r
) << dendl
;
123 } // anonymous namespace
125 int detect_format(IoCtx
&io_ctx
, const string
&name
,
126 bool *old_format
, uint64_t *size
)
128 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
131 int r
= io_ctx
.stat(util::old_header_name(name
), size
, NULL
);
135 r
= io_ctx
.stat(util::id_obj_name(name
), size
, NULL
);
142 ldout(cct
, 20) << "detect format of " << name
<< " : "
143 << (old_format
? (*old_format
? "old" : "new") :
144 "don't care") << dendl
;
148 bool has_parent(int64_t parent_pool_id
, uint64_t off
, uint64_t overlap
)
150 return (parent_pool_id
!= -1 && off
<= overlap
);
153 void init_rbd_header(struct rbd_obj_header_ondisk
& ondisk
,
154 uint64_t size
, int order
, uint64_t bid
)
156 uint32_t hi
= bid
>> 32;
157 uint32_t lo
= bid
& 0xFFFFFFFF;
158 uint32_t extra
= rand() % 0xFFFFFFFF;
159 // FIPS zeroization audit 20191117: this memset is not security related.
160 memset(&ondisk
, 0, sizeof(ondisk
));
162 memcpy(&ondisk
.text
, RBD_HEADER_TEXT
, sizeof(RBD_HEADER_TEXT
));
163 memcpy(&ondisk
.signature
, RBD_HEADER_SIGNATURE
,
164 sizeof(RBD_HEADER_SIGNATURE
));
165 memcpy(&ondisk
.version
, RBD_HEADER_VERSION
, sizeof(RBD_HEADER_VERSION
));
167 snprintf(ondisk
.block_name
, sizeof(ondisk
.block_name
), "rb.%x.%x.%x",
170 ondisk
.image_size
= size
;
171 ondisk
.options
.order
= order
;
172 ondisk
.options
.crypt_type
= RBD_CRYPT_NONE
;
173 ondisk
.options
.comp_type
= RBD_COMP_NONE
;
175 ondisk
.snap_count
= 0;
177 ondisk
.snap_names_len
= 0;
180 void image_info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
182 int obj_order
= ictx
->order
;
184 std::shared_lock locker
{ictx
->image_lock
};
185 info
.size
= ictx
->get_effective_image_size(ictx
->snap_id
);
187 info
.obj_size
= 1ULL << obj_order
;
188 info
.num_objs
= Striper::get_num_objects(ictx
->layout
, info
.size
);
189 info
.order
= obj_order
;
190 strncpy(info
.block_name_prefix
, ictx
->object_prefix
.c_str(),
191 RBD_MAX_BLOCK_NAME_SIZE
);
192 info
.block_name_prefix
[RBD_MAX_BLOCK_NAME_SIZE
- 1] = '\0';
194 // clear deprecated fields
195 info
.parent_pool
= -1L;
196 info
.parent_name
[0] = '\0';
199 uint64_t oid_to_object_no(const string
& oid
, const string
& object_prefix
)
201 istringstream
iss(oid
);
202 // skip object prefix and separator
203 iss
.ignore(object_prefix
.length() + 1);
205 iss
>> std::hex
>> num
;
209 void trim_image(ImageCtx
*ictx
, uint64_t newsize
, ProgressContext
& prog_ctx
)
211 ceph_assert(ceph_mutex_is_locked(ictx
->owner_lock
));
212 ceph_assert(ictx
->exclusive_lock
== nullptr ||
213 ictx
->exclusive_lock
->is_lock_owner());
216 ictx
->image_lock
.lock_shared();
217 operation::TrimRequest
<> *req
= operation::TrimRequest
<>::create(
218 *ictx
, &ctx
, ictx
->size
, newsize
, prog_ctx
);
219 ictx
->image_lock
.unlock_shared();
224 lderr(ictx
->cct
) << "warning: failed to remove some object(s): "
225 << cpp_strerror(r
) << dendl
;
229 int read_header_bl(IoCtx
& io_ctx
, const string
& header_oid
,
230 bufferlist
& header
, uint64_t *ver
)
234 #define READ_SIZE 4096
237 r
= io_ctx
.read(header_oid
, bl
, READ_SIZE
, off
);
240 header
.claim_append(bl
);
242 } while (r
== READ_SIZE
);
244 static_assert(sizeof(RBD_HEADER_TEXT
) == sizeof(RBD_MIGRATE_HEADER_TEXT
),
245 "length of rbd headers must be the same");
247 if (header
.length() < sizeof(RBD_HEADER_TEXT
) ||
248 (memcmp(RBD_HEADER_TEXT
, header
.c_str(),
249 sizeof(RBD_HEADER_TEXT
)) != 0 &&
250 memcmp(RBD_MIGRATE_HEADER_TEXT
, header
.c_str(),
251 sizeof(RBD_MIGRATE_HEADER_TEXT
)) != 0)) {
252 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
253 lderr(cct
) << "unrecognized header format" << dendl
;
258 *ver
= io_ctx
.get_last_version();
263 int read_header(IoCtx
& io_ctx
, const string
& header_oid
,
264 struct rbd_obj_header_ondisk
*header
, uint64_t *ver
)
266 bufferlist header_bl
;
267 int r
= read_header_bl(io_ctx
, header_oid
, header_bl
, ver
);
270 if (header_bl
.length() < (int)sizeof(*header
))
272 memcpy(header
, header_bl
.c_str(), sizeof(*header
));
277 int tmap_set(IoCtx
& io_ctx
, const string
& imgname
)
279 bufferlist cmdbl
, emptybl
;
280 __u8 c
= CEPH_OSD_TMAP_SET
;
282 encode(imgname
, cmdbl
);
283 encode(emptybl
, cmdbl
);
284 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
287 int tmap_rm(IoCtx
& io_ctx
, const string
& imgname
)
290 __u8 c
= CEPH_OSD_TMAP_RM
;
292 encode(imgname
, cmdbl
);
293 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
296 typedef boost::variant
<std::string
,uint64_t> image_option_value_t
;
297 typedef std::map
<int,image_option_value_t
> image_options_t
;
298 typedef std::shared_ptr
<image_options_t
> image_options_ref
;
300 enum image_option_type_t
{
305 const std::map
<int, image_option_type_t
> IMAGE_OPTIONS_TYPE_MAPPING
= {
306 {RBD_IMAGE_OPTION_FORMAT
, UINT64
},
307 {RBD_IMAGE_OPTION_FEATURES
, UINT64
},
308 {RBD_IMAGE_OPTION_ORDER
, UINT64
},
309 {RBD_IMAGE_OPTION_STRIPE_UNIT
, UINT64
},
310 {RBD_IMAGE_OPTION_STRIPE_COUNT
, UINT64
},
311 {RBD_IMAGE_OPTION_JOURNAL_ORDER
, UINT64
},
312 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
, UINT64
},
313 {RBD_IMAGE_OPTION_JOURNAL_POOL
, STR
},
314 {RBD_IMAGE_OPTION_FEATURES_SET
, UINT64
},
315 {RBD_IMAGE_OPTION_FEATURES_CLEAR
, UINT64
},
316 {RBD_IMAGE_OPTION_DATA_POOL
, STR
},
317 {RBD_IMAGE_OPTION_FLATTEN
, UINT64
},
318 {RBD_IMAGE_OPTION_CLONE_FORMAT
, UINT64
},
319 {RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
, UINT64
},
322 std::string
image_option_name(int optname
) {
324 case RBD_IMAGE_OPTION_FORMAT
:
326 case RBD_IMAGE_OPTION_FEATURES
:
328 case RBD_IMAGE_OPTION_ORDER
:
330 case RBD_IMAGE_OPTION_STRIPE_UNIT
:
331 return "stripe_unit";
332 case RBD_IMAGE_OPTION_STRIPE_COUNT
:
333 return "stripe_count";
334 case RBD_IMAGE_OPTION_JOURNAL_ORDER
:
335 return "journal_order";
336 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
:
337 return "journal_splay_width";
338 case RBD_IMAGE_OPTION_JOURNAL_POOL
:
339 return "journal_pool";
340 case RBD_IMAGE_OPTION_FEATURES_SET
:
341 return "features_set";
342 case RBD_IMAGE_OPTION_FEATURES_CLEAR
:
343 return "features_clear";
344 case RBD_IMAGE_OPTION_DATA_POOL
:
346 case RBD_IMAGE_OPTION_FLATTEN
:
348 case RBD_IMAGE_OPTION_CLONE_FORMAT
:
349 return "clone_format";
350 case RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
:
351 return "mirror_image_mode";
353 return "unknown (" + stringify(optname
) + ")";
357 void image_options_create(rbd_image_options_t
* opts
)
359 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
361 *opts
= static_cast<rbd_image_options_t
>(opts_
);
364 void image_options_create_ref(rbd_image_options_t
* opts
,
365 rbd_image_options_t orig
)
367 image_options_ref
* orig_
= static_cast<image_options_ref
*>(orig
);
368 image_options_ref
* opts_
= new image_options_ref(*orig_
);
370 *opts
= static_cast<rbd_image_options_t
>(opts_
);
373 void image_options_copy(rbd_image_options_t
* opts
,
374 const ImageOptions
&orig
)
376 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
378 *opts
= static_cast<rbd_image_options_t
>(opts_
);
382 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
385 if (orig
.get(i
.first
, &str_val
) == 0) {
386 image_options_set(*opts
, i
.first
, str_val
);
390 if (orig
.get(i
.first
, &uint64_val
) == 0) {
391 image_options_set(*opts
, i
.first
, uint64_val
);
398 void image_options_destroy(rbd_image_options_t opts
)
400 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
405 int image_options_set(rbd_image_options_t opts
, int optname
,
406 const std::string
& optval
)
408 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
410 std::map
<int, image_option_type_t
>::const_iterator i
=
411 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
413 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
417 (*opts_
->get())[optname
] = optval
;
421 int image_options_set(rbd_image_options_t opts
, int optname
, uint64_t optval
)
423 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
425 std::map
<int, image_option_type_t
>::const_iterator i
=
426 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
428 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
432 (*opts_
->get())[optname
] = optval
;
436 int image_options_get(rbd_image_options_t opts
, int optname
,
439 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
441 std::map
<int, image_option_type_t
>::const_iterator i
=
442 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
444 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
448 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
450 if (j
== (*opts_
)->end()) {
454 *optval
= boost::get
<std::string
>(j
->second
);
458 int image_options_get(rbd_image_options_t opts
, int optname
, uint64_t* optval
)
460 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
462 std::map
<int, image_option_type_t
>::const_iterator i
=
463 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
465 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
469 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
471 if (j
== (*opts_
)->end()) {
475 *optval
= boost::get
<uint64_t>(j
->second
);
479 int image_options_is_set(rbd_image_options_t opts
, int optname
,
482 if (IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
) ==
483 IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
487 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
488 *is_set
= ((*opts_
)->find(optname
) != (*opts_
)->end());
492 int image_options_unset(rbd_image_options_t opts
, int optname
)
494 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
496 std::map
<int, image_option_type_t
>::const_iterator i
=
497 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
499 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
500 ceph_assert((*opts_
)->find(optname
) == (*opts_
)->end());
504 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
506 if (j
== (*opts_
)->end()) {
514 void image_options_clear(rbd_image_options_t opts
)
516 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
521 bool image_options_is_empty(rbd_image_options_t opts
)
523 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
525 return (*opts_
)->empty();
528 int create_v1(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
, int order
)
530 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
532 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< " name = " << imgname
533 << " size = " << size
<< " order = " << order
<< dendl
;
534 int r
= validate_pool(io_ctx
, cct
);
539 if (!io_ctx
.get_namespace().empty()) {
540 lderr(cct
) << "attempting to add v1 image to namespace" << dendl
;
544 ldout(cct
, 2) << "adding rbd image to directory..." << dendl
;
545 r
= tmap_set(io_ctx
, imgname
);
547 lderr(cct
) << "error adding image to directory: " << cpp_strerror(r
)
553 uint64_t bid
= rados
.get_instance_id();
555 ldout(cct
, 2) << "creating rbd image..." << dendl
;
556 struct rbd_obj_header_ondisk header
;
557 init_rbd_header(header
, size
, order
, bid
);
560 bl
.append((const char *)&header
, sizeof(header
));
562 string header_oid
= util::old_header_name(imgname
);
563 r
= io_ctx
.write(header_oid
, bl
, bl
.length(), 0);
565 lderr(cct
) << "Error writing image header: " << cpp_strerror(r
)
567 int remove_r
= tmap_rm(io_ctx
, imgname
);
569 lderr(cct
) << "Could not remove image from directory after "
570 << "header creation failed: "
571 << cpp_strerror(remove_r
) << dendl
;
576 ldout(cct
, 2) << "done." << dendl
;
580 int create(librados::IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
583 uint64_t order_
= *order
;
586 int r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
589 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
591 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
592 ceph_assert(r1
== 0);
598 int create(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
599 bool old_format
, uint64_t features
, int *order
,
600 uint64_t stripe_unit
, uint64_t stripe_count
)
605 uint64_t order_
= *order
;
606 uint64_t format
= old_format
? 1 : 2;
610 r
= opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
612 r
= opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
614 r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
616 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
618 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
621 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
623 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
624 ceph_assert(r1
== 0);
630 int create(IoCtx
& io_ctx
, const std::string
&image_name
,
631 const std::string
&image_id
, uint64_t size
,
633 const std::string
&non_primary_global_image_id
,
634 const std::string
&primary_mirror_uuid
,
635 bool skip_mirror_enable
)
637 std::string
id(image_id
);
639 id
= util::generate_image_id(io_ctx
);
642 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
644 if (opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &option
) == 0) {
645 lderr(cct
) << "create does not support 'flatten' image option" << dendl
;
648 if (opts
.get(RBD_IMAGE_OPTION_CLONE_FORMAT
, &option
) == 0) {
649 lderr(cct
) << "create does not support 'clone_format' image option"
654 ldout(cct
, 10) << __func__
<< " name=" << image_name
<< ", "
655 << "id= " << id
<< ", "
656 << "size=" << size
<< ", opts=" << opts
<< dendl
;
659 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0)
660 format
= cct
->_conf
.get_val
<uint64_t>("rbd_default_format");
661 bool old_format
= format
== 1;
663 // make sure it doesn't already exist, in either format
664 int r
= detect_format(io_ctx
, image_name
, NULL
, NULL
);
667 lderr(cct
) << "Could not tell if " << image_name
<< " already exists"
671 lderr(cct
) << "rbd image " << image_name
<< " already exists" << dendl
;
676 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0 || order
== 0) {
677 order
= cct
->_conf
.get_val
<uint64_t>("rbd_default_order");
679 r
= image::CreateRequest
<>::validate_order(cct
, order
);
685 if ( !getenv("RBD_FORCE_ALLOW_V1") ) {
686 lderr(cct
) << "Format 1 image creation unsupported. " << dendl
;
689 lderr(cct
) << "Forced V1 image creation. " << dendl
;
690 r
= create_v1(io_ctx
, image_name
.c_str(), size
, order
);
692 AsioEngine
asio_engine(io_ctx
);
694 ConfigProxy config
{cct
->_conf
};
695 api::Config
<>::apply_pool_overrides(io_ctx
, &config
);
697 uint32_t create_flags
= 0U;
698 uint64_t mirror_image_mode
= RBD_MIRROR_IMAGE_MODE_JOURNAL
;
699 if (skip_mirror_enable
) {
700 create_flags
= image::CREATE_FLAG_SKIP_MIRROR_ENABLE
;
701 } else if (opts
.get(RBD_IMAGE_OPTION_MIRROR_IMAGE_MODE
,
702 &mirror_image_mode
) == 0) {
703 create_flags
= image::CREATE_FLAG_FORCE_MIRROR_ENABLE
;
707 image::CreateRequest
<> *req
= image::CreateRequest
<>::create(
708 config
, io_ctx
, image_name
, id
, size
, opts
, create_flags
,
709 static_cast<cls::rbd::MirrorImageMode
>(mirror_image_mode
),
710 non_primary_global_image_id
, primary_mirror_uuid
,
711 asio_engine
.get_work_queue(), &cond
);
717 int r1
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
718 ceph_assert(r1
== 0);
724 * Parent may be in different pool, hence different IoCtx
726 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
727 IoCtx
& c_ioctx
, const char *c_name
,
728 uint64_t features
, int *c_order
,
729 uint64_t stripe_unit
, int stripe_count
)
731 uint64_t order
= *c_order
;
734 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
735 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
736 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
737 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
739 int r
= clone(p_ioctx
, nullptr, p_name
, p_snap_name
, c_ioctx
, nullptr,
740 c_name
, opts
, "", "");
741 opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
);
746 int clone(IoCtx
& p_ioctx
, const char *p_id
, const char *p_name
,
747 const char *p_snap_name
, IoCtx
& c_ioctx
, const char *c_id
,
748 const char *c_name
, ImageOptions
& c_opts
,
749 const std::string
&non_primary_global_image_id
,
750 const std::string
&primary_mirror_uuid
)
752 ceph_assert((p_id
== nullptr) ^ (p_name
== nullptr));
754 CephContext
*cct
= (CephContext
*)p_ioctx
.cct();
755 if (p_snap_name
== nullptr) {
756 lderr(cct
) << "image to be cloned must be a snapshot" << dendl
;
761 if (c_opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &flatten
) == 0) {
762 lderr(cct
) << "clone does not support 'flatten' image option" << dendl
;
767 std::string parent_id
;
768 if (p_id
== nullptr) {
769 r
= cls_client::dir_get_id(&p_ioctx
, RBD_DIRECTORY
, p_name
,
773 lderr(cct
) << "failed to retrieve parent image id: "
774 << cpp_strerror(r
) << dendl
;
782 std::string clone_id
;
783 if (c_id
== nullptr) {
784 clone_id
= util::generate_image_id(c_ioctx
);
789 ldout(cct
, 10) << __func__
<< " "
790 << "c_name=" << c_name
<< ", "
791 << "c_id= " << clone_id
<< ", "
792 << "c_opts=" << c_opts
<< dendl
;
794 ConfigProxy config
{reinterpret_cast<CephContext
*>(c_ioctx
.cct())->_conf
};
795 api::Config
<>::apply_pool_overrides(c_ioctx
, &config
);
797 AsioEngine
asio_engine(p_ioctx
);
800 auto *req
= image::CloneRequest
<>::create(
801 config
, p_ioctx
, parent_id
, p_snap_name
,
802 {cls::rbd::UserSnapshotNamespace
{}}, CEPH_NOSNAP
, c_ioctx
, c_name
,
803 clone_id
, c_opts
, cls::rbd::MIRROR_IMAGE_MODE_JOURNAL
,
804 non_primary_global_image_id
, primary_mirror_uuid
,
805 asio_engine
.get_work_queue(), &cond
);
816 int rename(IoCtx
& io_ctx
, const char *srcname
, const char *dstname
)
818 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
819 ldout(cct
, 20) << "rename " << &io_ctx
<< " " << srcname
<< " -> "
822 ImageCtx
*ictx
= new ImageCtx(srcname
, "", "", io_ctx
, false);
823 int r
= ictx
->state
->open(0);
825 lderr(cct
) << "error opening source image: " << cpp_strerror(r
) << dendl
;
828 BOOST_SCOPE_EXIT((ictx
)) {
829 ictx
->state
->close();
830 } BOOST_SCOPE_EXIT_END
832 return ictx
->operations
->rename(dstname
);
835 int info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
837 ldout(ictx
->cct
, 20) << "info " << ictx
<< dendl
;
839 int r
= ictx
->state
->refresh_if_required();
843 image_info(ictx
, info
, infosize
);
847 int get_old_format(ImageCtx
*ictx
, uint8_t *old
)
849 int r
= ictx
->state
->refresh_if_required();
852 *old
= ictx
->old_format
;
856 int get_size(ImageCtx
*ictx
, uint64_t *size
)
858 int r
= ictx
->state
->refresh_if_required();
861 std::shared_lock l2
{ictx
->image_lock
};
862 *size
= ictx
->get_effective_image_size(ictx
->snap_id
);
866 int get_features(ImageCtx
*ictx
, uint64_t *features
)
868 int r
= ictx
->state
->refresh_if_required();
871 std::shared_lock l
{ictx
->image_lock
};
872 *features
= ictx
->features
;
876 int get_overlap(ImageCtx
*ictx
, uint64_t *overlap
)
878 int r
= ictx
->state
->refresh_if_required();
881 std::shared_lock image_locker
{ictx
->image_lock
};
882 return ictx
->get_parent_overlap(ictx
->snap_id
, overlap
);
885 int get_flags(ImageCtx
*ictx
, uint64_t *flags
)
887 int r
= ictx
->state
->refresh_if_required();
892 std::shared_lock l2
{ictx
->image_lock
};
893 return ictx
->get_flags(ictx
->snap_id
, flags
);
896 int set_image_notification(ImageCtx
*ictx
, int fd
, int type
)
898 CephContext
*cct
= ictx
->cct
;
899 ldout(cct
, 20) << __func__
<< " " << ictx
<< " fd " << fd
<< " type" << type
<< dendl
;
901 int r
= ictx
->state
->refresh_if_required();
906 if (ictx
->event_socket
.is_valid())
908 return ictx
->event_socket
.init(fd
, type
);
911 int is_exclusive_lock_owner(ImageCtx
*ictx
, bool *is_owner
)
913 CephContext
*cct
= ictx
->cct
;
914 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
917 std::shared_lock owner_locker
{ictx
->owner_lock
};
918 if (ictx
->exclusive_lock
== nullptr) {
922 // might have been blocklisted by peer -- ensure we still own
923 // the lock by pinging the OSD
924 int r
= ictx
->exclusive_lock
->assert_header_locked();
925 if (r
== -EBUSY
|| r
== -ENOENT
) {
935 int lock_acquire(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
)
937 CephContext
*cct
= ictx
->cct
;
938 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
939 << "lock_mode=" << lock_mode
<< dendl
;
941 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
945 C_SaferCond lock_ctx
;
947 std::unique_lock l
{ictx
->owner_lock
};
949 if (ictx
->exclusive_lock
== nullptr) {
950 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
954 if (ictx
->get_exclusive_lock_policy()->may_auto_request_lock()) {
955 ictx
->set_exclusive_lock_policy(
956 new exclusive_lock::StandardPolicy(ictx
));
959 if (ictx
->exclusive_lock
->is_lock_owner()) {
963 ictx
->exclusive_lock
->acquire_lock(&lock_ctx
);
966 int r
= lock_ctx
.wait();
968 lderr(cct
) << "failed to request exclusive lock: " << cpp_strerror(r
)
973 std::shared_lock l
{ictx
->owner_lock
};
974 if (ictx
->exclusive_lock
== nullptr) {
976 } else if (!ictx
->exclusive_lock
->is_lock_owner()) {
977 lderr(cct
) << "failed to acquire exclusive lock" << dendl
;
978 return ictx
->exclusive_lock
->get_unlocked_op_error();
984 int lock_release(ImageCtx
*ictx
)
986 CephContext
*cct
= ictx
->cct
;
987 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
989 C_SaferCond lock_ctx
;
991 std::unique_lock l
{ictx
->owner_lock
};
993 if (ictx
->exclusive_lock
== nullptr ||
994 !ictx
->exclusive_lock
->is_lock_owner()) {
995 lderr(cct
) << "not exclusive lock owner" << dendl
;
999 ictx
->exclusive_lock
->release_lock(&lock_ctx
);
1002 int r
= lock_ctx
.wait();
1004 lderr(cct
) << "failed to release exclusive lock: " << cpp_strerror(r
)
1011 int lock_get_owners(ImageCtx
*ictx
, rbd_lock_mode_t
*lock_mode
,
1012 std::list
<std::string
> *lock_owners
)
1014 CephContext
*cct
= ictx
->cct
;
1015 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1017 managed_lock::Locker locker
;
1018 C_SaferCond get_owner_ctx
;
1020 std::shared_lock owner_locker
{ictx
->owner_lock
};
1022 if (ictx
->exclusive_lock
== nullptr) {
1023 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1027 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1030 int r
= get_owner_ctx
.wait();
1034 lderr(cct
) << "failed to determine current lock owner: "
1035 << cpp_strerror(r
) << dendl
;
1039 *lock_mode
= RBD_LOCK_MODE_EXCLUSIVE
;
1040 lock_owners
->clear();
1041 lock_owners
->emplace_back(locker
.address
);
1045 int lock_break(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
,
1046 const std::string
&lock_owner
) {
1047 CephContext
*cct
= ictx
->cct
;
1048 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1049 << "lock_mode=" << lock_mode
<< ", "
1050 << "lock_owner=" << lock_owner
<< dendl
;
1052 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1056 if (ictx
->read_only
) {
1060 managed_lock::Locker locker
;
1061 C_SaferCond get_owner_ctx
;
1063 std::shared_lock l
{ictx
->owner_lock
};
1065 if (ictx
->exclusive_lock
== nullptr) {
1066 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1070 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1072 int r
= get_owner_ctx
.wait();
1076 lderr(cct
) << "failed to determine current lock owner: "
1077 << cpp_strerror(r
) << dendl
;
1081 if (locker
.address
!= lock_owner
) {
1085 C_SaferCond break_ctx
;
1087 std::shared_lock l
{ictx
->owner_lock
};
1089 if (ictx
->exclusive_lock
== nullptr) {
1090 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1094 ictx
->exclusive_lock
->break_lock(locker
, true, &break_ctx
);
1096 r
= break_ctx
.wait();
1100 lderr(cct
) << "failed to break lock: " << cpp_strerror(r
) << dendl
;
1106 int copy(ImageCtx
*src
, IoCtx
& dest_md_ctx
, const char *destname
,
1107 ImageOptions
& opts
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1109 CephContext
*cct
= (CephContext
*)dest_md_ctx
.cct();
1111 if (opts
.get(RBD_IMAGE_OPTION_FLATTEN
, &option
) == 0) {
1112 lderr(cct
) << "copy does not support 'flatten' image option" << dendl
;
1115 if (opts
.get(RBD_IMAGE_OPTION_CLONE_FORMAT
, &option
) == 0) {
1116 lderr(cct
) << "copy does not support 'clone_format' image option"
1121 ldout(cct
, 20) << "copy " << src
->name
1122 << (src
->snap_name
.length() ? "@" + src
->snap_name
: "")
1123 << " -> " << destname
<< " opts = " << opts
<< dendl
;
1125 src
->image_lock
.lock_shared();
1126 uint64_t features
= src
->features
;
1127 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1128 src
->image_lock
.unlock_shared();
1129 uint64_t format
= 2;
1130 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0) {
1131 opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
1133 uint64_t stripe_unit
= src
->stripe_unit
;
1134 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_UNIT
, &stripe_unit
) != 0) {
1135 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
1137 uint64_t stripe_count
= src
->stripe_count
;
1138 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_COUNT
, &stripe_count
) != 0) {
1139 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
1141 uint64_t order
= src
->order
;
1142 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0) {
1143 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
1145 if (opts
.get(RBD_IMAGE_OPTION_FEATURES
, &features
) != 0) {
1146 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
1148 if (features
& ~RBD_FEATURES_ALL
) {
1149 lderr(cct
) << "librbd does not support requested features" << dendl
;
1153 int r
= create(dest_md_ctx
, destname
, "", src_size
, opts
, "", "", false);
1155 lderr(cct
) << "header creation failed" << dendl
;
1158 opts
.set(RBD_IMAGE_OPTION_ORDER
, static_cast<uint64_t>(order
));
1160 ImageCtx
*dest
= new librbd::ImageCtx(destname
, "", nullptr, dest_md_ctx
,
1162 r
= dest
->state
->open(0);
1164 lderr(cct
) << "failed to read newly created header" << dendl
;
1168 r
= copy(src
, dest
, prog_ctx
, sparse_size
);
1170 int close_r
= dest
->state
->close();
1171 if (r
== 0 && close_r
< 0) {
1177 class C_CopyWrite
: public Context
{
1179 C_CopyWrite(bufferlist
*bl
, Context
* ctx
)
1180 : m_bl(bl
), m_ctx(ctx
) {}
1181 void finish(int r
) override
{
1190 class C_CopyRead
: public Context
{
1192 C_CopyRead(SimpleThrottle
*throttle
, ImageCtx
*dest
, uint64_t offset
,
1193 bufferlist
*bl
, size_t sparse_size
)
1194 : m_throttle(throttle
), m_dest(dest
), m_offset(offset
), m_bl(bl
),
1195 m_sparse_size(sparse_size
) {
1196 m_throttle
->start_op();
1198 void finish(int r
) override
{
1200 lderr(m_dest
->cct
) << "error reading from source image at offset "
1201 << m_offset
<< ": " << cpp_strerror(r
) << dendl
;
1203 m_throttle
->end_op(r
);
1206 ceph_assert(m_bl
->length() == (size_t)r
);
1208 if (m_bl
->is_zero()) {
1210 m_throttle
->end_op(r
);
1214 if (!m_sparse_size
) {
1215 m_sparse_size
= (1 << m_dest
->order
);
1218 auto *throttle
= m_throttle
;
1219 auto *end_op_ctx
= new LambdaContext([throttle
](int r
) {
1220 throttle
->end_op(r
);
1222 auto gather_ctx
= new C_Gather(m_dest
->cct
, end_op_ctx
);
1224 m_bl
->rebuild(buffer::ptr_node::create(m_bl
->length()));
1225 size_t write_offset
= 0;
1226 size_t write_length
= 0;
1228 size_t length
= m_bl
->length();
1229 const auto& m_ptr
= m_bl
->front();
1230 while (offset
< length
) {
1231 if (util::calc_sparse_extent(m_ptr
,
1237 bufferlist
*write_bl
= new bufferlist();
1238 write_bl
->push_back(
1239 buffer::ptr_node::create(m_ptr
, write_offset
, write_length
));
1240 Context
*ctx
= new C_CopyWrite(write_bl
, gather_ctx
->new_sub());
1241 auto comp
= io::AioCompletion::create(ctx
);
1243 // coordinate through AIO WQ to ensure lock is acquired if needed
1244 api::Io
<>::aio_write(*m_dest
, comp
, m_offset
+ write_offset
,
1245 write_length
, std::move(*write_bl
),
1246 LIBRADOS_OP_FLAG_FADVISE_DONTNEED
,
1247 std::move(read_trace
));
1248 write_offset
= offset
;
1253 ceph_assert(gather_ctx
->get_sub_created_count() > 0);
1254 gather_ctx
->activate();
1257 ZTracer::Trace read_trace
;
1260 SimpleThrottle
*m_throttle
;
1264 size_t m_sparse_size
;
1267 int copy(ImageCtx
*src
, ImageCtx
*dest
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1269 src
->image_lock
.lock_shared();
1270 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1271 src
->image_lock
.unlock_shared();
1273 dest
->image_lock
.lock_shared();
1274 uint64_t dest_size
= dest
->get_image_size(dest
->snap_id
);
1275 dest
->image_lock
.unlock_shared();
1277 CephContext
*cct
= src
->cct
;
1278 if (dest_size
< src_size
) {
1279 lderr(cct
) << " src size " << src_size
<< " > dest size "
1280 << dest_size
<< dendl
;
1284 // ensure previous writes are visible to dest
1285 C_SaferCond flush_ctx
;
1287 auto aio_comp
= io::AioCompletion::create_and_start(&flush_ctx
, src
,
1288 io::AIO_TYPE_FLUSH
);
1289 auto req
= io::ImageDispatchSpec::create_flush(
1290 *src
, io::IMAGE_DISPATCH_LAYER_INTERNAL_START
,
1291 aio_comp
, io::FLUSH_SOURCE_INTERNAL
, {});
1294 int r
= flush_ctx
.wait();
1300 auto req
= deep_copy::MetadataCopyRequest
<>::create(
1306 lderr(cct
) << "failed to copy metadata: " << cpp_strerror(r
) << dendl
;
1310 ZTracer::Trace trace
;
1311 if (src
->blkin_trace_all
) {
1312 trace
.init("copy", &src
->trace_endpoint
);
1315 SimpleThrottle
throttle(src
->config
.get_val
<uint64_t>("rbd_concurrent_management_ops"), false);
1316 uint64_t period
= src
->get_stripe_period();
1317 unsigned fadvise_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
1318 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
;
1319 uint64_t object_id
= 0;
1320 for (uint64_t offset
= 0; offset
< src_size
; offset
+= period
) {
1321 if (throttle
.pending_error()) {
1322 return throttle
.wait_for_ret();
1326 std::shared_lock image_locker
{src
->image_lock
};
1327 if (src
->object_map
!= nullptr) {
1329 // each period is related to src->stripe_count objects, check them all
1330 for (uint64_t i
=0; i
< src
->stripe_count
; i
++) {
1331 if (object_id
< src
->object_map
->size() &&
1332 src
->object_map
->object_may_exist(object_id
)) {
1340 object_id
+= src
->stripe_count
;
1344 uint64_t len
= std::min(period
, src_size
- offset
);
1345 bufferlist
*bl
= new bufferlist();
1346 auto ctx
= new C_CopyRead(&throttle
, dest
, offset
, bl
, sparse_size
);
1347 auto comp
= io::AioCompletion::create_and_start
<Context
>(
1348 ctx
, src
, io::AIO_TYPE_READ
);
1349 auto req
= io::ImageDispatchSpec::create_read(
1350 *src
, io::IMAGE_DISPATCH_LAYER_NONE
, comp
,
1351 {{offset
, len
}}, io::ReadResult
{bl
},
1352 src
->get_data_io_context(), fadvise_flags
, 0, trace
);
1354 ctx
->read_trace
= trace
;
1357 prog_ctx
.update_progress(offset
, src_size
);
1360 r
= throttle
.wait_for_ret();
1362 prog_ctx
.update_progress(src_size
, src_size
);
1366 int list_lockers(ImageCtx
*ictx
,
1367 std::list
<locker_t
> *lockers
,
1371 ldout(ictx
->cct
, 20) << "list_locks on image " << ictx
<< dendl
;
1373 int r
= ictx
->state
->refresh_if_required();
1377 std::shared_lock locker
{ictx
->image_lock
};
1379 *exclusive
= ictx
->exclusive_locked
;
1381 *tag
= ictx
->lock_tag
;
1384 map
<rados::cls::lock::locker_id_t
,
1385 rados::cls::lock::locker_info_t
>::const_iterator it
;
1386 for (it
= ictx
->lockers
.begin(); it
!= ictx
->lockers
.end(); ++it
) {
1388 locker
.client
= stringify(it
->first
.locker
);
1389 locker
.cookie
= it
->first
.cookie
;
1390 locker
.address
= it
->second
.addr
.get_legacy_str();
1391 lockers
->push_back(locker
);
1398 int lock(ImageCtx
*ictx
, bool exclusive
, const string
& cookie
,
1401 ldout(ictx
->cct
, 20) << "lock image " << ictx
<< " exclusive=" << exclusive
1402 << " cookie='" << cookie
<< "' tag='" << tag
<< "'"
1405 int r
= ictx
->state
->refresh_if_required();
1410 * If we wanted we could do something more intelligent, like local
1411 * checks that we think we will succeed. But for now, let's not
1412 * duplicate that code.
1415 std::shared_lock locker
{ictx
->image_lock
};
1416 r
= rados::cls::lock::lock(&ictx
->md_ctx
, ictx
->header_oid
, RBD_LOCK_NAME
,
1417 exclusive
? ClsLockType::EXCLUSIVE
: ClsLockType::SHARED
,
1418 cookie
, tag
, "", utime_t(), 0);
1424 ictx
->notify_update();
1428 int unlock(ImageCtx
*ictx
, const string
& cookie
)
1430 ldout(ictx
->cct
, 20) << "unlock image " << ictx
1431 << " cookie='" << cookie
<< "'" << dendl
;
1433 int r
= ictx
->state
->refresh_if_required();
1438 std::shared_lock locker
{ictx
->image_lock
};
1439 r
= rados::cls::lock::unlock(&ictx
->md_ctx
, ictx
->header_oid
,
1440 RBD_LOCK_NAME
, cookie
);
1446 ictx
->notify_update();
1450 int break_lock(ImageCtx
*ictx
, const string
& client
,
1451 const string
& cookie
)
1453 ldout(ictx
->cct
, 20) << "break_lock image " << ictx
<< " client='" << client
1454 << "' cookie='" << cookie
<< "'" << dendl
;
1456 int r
= ictx
->state
->refresh_if_required();
1460 entity_name_t lock_client
;
1461 if (!lock_client
.parse(client
)) {
1462 lderr(ictx
->cct
) << "Unable to parse client '" << client
1467 if (ictx
->config
.get_val
<bool>("rbd_blocklist_on_break_lock")) {
1468 typedef std::map
<rados::cls::lock::locker_id_t
,
1469 rados::cls::lock::locker_info_t
> Lockers
;
1471 ClsLockType lock_type
;
1472 std::string lock_tag
;
1473 r
= rados::cls::lock::get_lock_info(&ictx
->md_ctx
, ictx
->header_oid
,
1474 RBD_LOCK_NAME
, &lockers
, &lock_type
,
1477 lderr(ictx
->cct
) << "unable to retrieve lock info: " << cpp_strerror(r
)
1482 std::string client_address
;
1483 for (Lockers::iterator it
= lockers
.begin();
1484 it
!= lockers
.end(); ++it
) {
1485 if (it
->first
.locker
== lock_client
) {
1486 client_address
= it
->second
.addr
.get_legacy_str();
1490 if (client_address
.empty()) {
1494 librados::Rados
rados(ictx
->md_ctx
);
1495 r
= rados
.blocklist_add(
1497 ictx
->config
.get_val
<uint64_t>("rbd_blocklist_expire_seconds"));
1499 lderr(ictx
->cct
) << "unable to blocklist client: " << cpp_strerror(r
)
1505 r
= rados::cls::lock::break_lock(&ictx
->md_ctx
, ictx
->header_oid
,
1506 RBD_LOCK_NAME
, cookie
, lock_client
);
1509 ictx
->notify_update();
1513 void rbd_ctx_cb(completion_t cb
, void *arg
)
1515 Context
*ctx
= reinterpret_cast<Context
*>(arg
);
1516 auto comp
= reinterpret_cast<io::AioCompletion
*>(cb
);
1517 ctx
->complete(comp
->get_return_value());
1521 int64_t read_iterate(ImageCtx
*ictx
, uint64_t off
, uint64_t len
,
1522 int (*cb
)(uint64_t, size_t, const char *, void *),
1525 coarse_mono_time start_time
;
1526 ceph::timespan elapsed
;
1528 ldout(ictx
->cct
, 20) << "read_iterate " << ictx
<< " off = " << off
1529 << " len = " << len
<< dendl
;
1531 int r
= ictx
->state
->refresh_if_required();
1535 uint64_t mylen
= len
;
1536 ictx
->image_lock
.lock_shared();
1537 r
= clip_io(ictx
, off
, &mylen
);
1538 ictx
->image_lock
.unlock_shared();
1542 int64_t total_read
= 0;
1543 uint64_t period
= ictx
->get_stripe_period();
1544 uint64_t left
= mylen
;
1546 ZTracer::Trace trace
;
1547 if (ictx
->blkin_trace_all
) {
1548 trace
.init("read_iterate", &ictx
->trace_endpoint
);
1551 std::shared_lock owner_locker
{ictx
->owner_lock
};
1552 start_time
= coarse_mono_clock::now();
1554 uint64_t period_off
= off
- (off
% period
);
1555 uint64_t read_len
= std::min(period_off
+ period
- off
, left
);
1560 auto c
= io::AioCompletion::create_and_start(&ctx
, ictx
,
1562 auto req
= io::ImageDispatchSpec::create_read(
1563 *ictx
, io::IMAGE_DISPATCH_LAYER_NONE
, c
,
1564 {{off
, read_len
}}, io::ReadResult
{&bl
},
1565 ictx
->get_data_io_context(), 0, 0, trace
);
1568 int ret
= ctx
.wait();
1573 r
= cb(total_read
, ret
, bl
.c_str(), arg
);
1583 elapsed
= coarse_mono_clock::now() - start_time
;
1584 ictx
->perfcounter
->tinc(l_librbd_rd_latency
, elapsed
);
1585 ictx
->perfcounter
->inc(l_librbd_rd
);
1586 ictx
->perfcounter
->inc(l_librbd_rd_bytes
, mylen
);
1590 // validate extent against image size; clip to image size if necessary
1591 int clip_io(ImageCtx
*ictx
, uint64_t off
, uint64_t *len
)
1593 ceph_assert(ceph_mutex_is_locked(ictx
->image_lock
));
1595 if (ictx
->snap_id
!= CEPH_NOSNAP
&&
1596 ictx
->get_snap_info(ictx
->snap_id
) == nullptr) {
1599 uint64_t image_size
= ictx
->get_effective_image_size(ictx
->snap_id
);
1601 // special-case "len == 0" requests: always valid
1605 // can't start past end
1606 if (off
>= image_size
)
1609 // clip requests that extend past end to just end
1610 if ((off
+ *len
) > image_size
)
1611 *len
= (size_t)(image_size
- off
);
1616 int invalidate_cache(ImageCtx
*ictx
)
1618 CephContext
*cct
= ictx
->cct
;
1619 ldout(cct
, 20) << "invalidate_cache " << ictx
<< dendl
;
1621 int r
= ictx
->state
->refresh_if_required();
1628 ictx
->io_image_dispatcher
->invalidate_cache(&ctx
);
1633 ldout(cct
, 20) << "failed to invalidate image cache" << dendl
;
1637 ictx
->perfcounter
->inc(l_librbd_invalidate_cache
);
1639 // Delete writeback cache if it is not initialized
1640 if ((!ictx
->exclusive_lock
||
1641 !ictx
->exclusive_lock
->is_lock_owner()) &&
1642 ictx
->test_features(RBD_FEATURE_DIRTY_CACHE
)) {
1644 ictx
->plugin_registry
->discard(&ctx3
);
1650 int poll_io_events(ImageCtx
*ictx
, io::AioCompletion
**comps
, int numcomp
)
1654 CephContext
*cct
= ictx
->cct
;
1655 ldout(cct
, 20) << __func__
<< " " << ictx
<< " numcomp = " << numcomp
1658 while (i
< numcomp
&& ictx
->event_socket_completions
.pop(comps
[i
])) {
1665 int metadata_get(ImageCtx
*ictx
, const string
&key
, string
*value
)
1667 CephContext
*cct
= ictx
->cct
;
1668 ldout(cct
, 20) << "metadata_get " << ictx
<< " key=" << key
<< dendl
;
1670 int r
= ictx
->state
->refresh_if_required();
1675 return cls_client::metadata_get(&ictx
->md_ctx
, ictx
->header_oid
, key
, value
);
1678 int metadata_list(ImageCtx
*ictx
, const string
&start
, uint64_t max
, map
<string
, bufferlist
> *pairs
)
1680 CephContext
*cct
= ictx
->cct
;
1681 ldout(cct
, 20) << "metadata_list " << ictx
<< dendl
;
1683 int r
= ictx
->state
->refresh_if_required();
1689 auto req
= image::GetMetadataRequest
<>::create(
1690 ictx
->md_ctx
, ictx
->header_oid
, false, "", start
, max
, pairs
, &ctx
);
1696 int list_watchers(ImageCtx
*ictx
,
1697 std::list
<librbd::image_watcher_t
> &watchers
)
1700 std::string header_oid
;
1701 std::list
<obj_watch_t
> obj_watchers
;
1703 if (ictx
->old_format
) {
1704 header_oid
= util::old_header_name(ictx
->name
);
1706 header_oid
= util::header_name(ictx
->id
);
1709 r
= ictx
->md_ctx
.list_watchers(header_oid
, &obj_watchers
);
1714 for (auto i
= obj_watchers
.begin(); i
!= obj_watchers
.end(); ++i
) {
1715 librbd::image_watcher_t watcher
;
1716 watcher
.addr
= i
->addr
;
1717 watcher
.id
= i
->watcher_id
;
1718 watcher
.cookie
= i
->cookie
;
1720 watchers
.push_back(watcher
);
1728 std::ostream
&operator<<(std::ostream
&os
, const librbd::ImageOptions
&opts
) {
1731 const char *delimiter
= "";
1732 for (auto &i
: librbd::IMAGE_OPTIONS_TYPE_MAPPING
) {
1733 if (i
.second
== librbd::STR
) {
1735 if (opts
.get(i
.first
, &val
) == 0) {
1736 os
<< delimiter
<< librbd::image_option_name(i
.first
) << "=" << val
;
1739 } else if (i
.second
== librbd::UINT64
) {
1741 if (opts
.get(i
.first
, &val
) == 0) {
1742 os
<< delimiter
<< librbd::image_option_name(i
.first
) << "=" << val
;