1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "include/int_types.h"
8 #include "include/types.h"
9 #include "include/uuid.h"
10 #include "common/ceph_context.h"
11 #include "common/dout.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "common/event_socket.h"
15 #include "cls/lock/cls_lock_client.h"
16 #include "include/stringify.h"
18 #include "cls/rbd/cls_rbd.h"
19 #include "cls/rbd/cls_rbd_types.h"
20 #include "cls/rbd/cls_rbd_client.h"
21 #include "cls/journal/cls_journal_types.h"
22 #include "cls/journal/cls_journal_client.h"
24 #include "librbd/ExclusiveLock.h"
25 #include "librbd/ImageCtx.h"
26 #include "librbd/ImageState.h"
27 #include "librbd/internal.h"
28 #include "librbd/Journal.h"
29 #include "librbd/ObjectMap.h"
30 #include "librbd/Operations.h"
31 #include "librbd/Types.h"
32 #include "librbd/Utils.h"
33 #include "librbd/api/Image.h"
34 #include "librbd/exclusive_lock/AutomaticPolicy.h"
35 #include "librbd/exclusive_lock/StandardPolicy.h"
36 #include "librbd/image/CloneRequest.h"
37 #include "librbd/image/CreateRequest.h"
38 #include "librbd/image/RemoveRequest.h"
39 #include "librbd/io/AioCompletion.h"
40 #include "librbd/io/ImageRequest.h"
41 #include "librbd/io/ImageRequestWQ.h"
42 #include "librbd/io/ObjectRequest.h"
43 #include "librbd/io/ReadResult.h"
44 #include "librbd/journal/Types.h"
45 #include "librbd/managed_lock/Types.h"
46 #include "librbd/mirror/EnableRequest.h"
47 #include "librbd/operation/TrimRequest.h"
49 #include "journal/Journaler.h"
51 #include <boost/scope_exit.hpp>
52 #include <boost/variant.hpp>
53 #include "include/assert.h"
55 #define dout_subsys ceph_subsys_rbd
57 #define dout_prefix *_dout << "librbd: "
59 #define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
66 // list binds to list() here, so std::list is explicitly used below
68 using ceph::bufferlist
;
69 using librados::snap_t
;
70 using librados::IoCtx
;
71 using librados::Rados
;
77 int validate_pool(IoCtx
&io_ctx
, CephContext
*cct
) {
78 if (!cct
->_conf
->rbd_validate_pool
) {
82 int r
= io_ctx
.stat(RBD_DIRECTORY
, NULL
, NULL
);
85 } else if (r
< 0 && r
!= -ENOENT
) {
86 lderr(cct
) << "failed to stat RBD directory: " << cpp_strerror(r
) << dendl
;
90 // allocate a self-managed snapshot id if this a new pool to force
91 // self-managed snapshot mode
93 r
= io_ctx
.selfmanaged_snap_create(&snap_id
);
95 lderr(cct
) << "pool not configured for self-managed RBD snapshot support"
99 lderr(cct
) << "failed to allocate self-managed snapshot: "
100 << cpp_strerror(r
) << dendl
;
104 r
= io_ctx
.selfmanaged_snap_remove(snap_id
);
106 lderr(cct
) << "failed to release self-managed snapshot " << snap_id
107 << ": " << cpp_strerror(r
) << dendl
;
113 } // anonymous namespace
115 int detect_format(IoCtx
&io_ctx
, const string
&name
,
116 bool *old_format
, uint64_t *size
)
118 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
121 int r
= io_ctx
.stat(util::old_header_name(name
), size
, NULL
);
125 r
= io_ctx
.stat(util::id_obj_name(name
), size
, NULL
);
132 ldout(cct
, 20) << "detect format of " << name
<< " : "
133 << (old_format
? (*old_format
? "old" : "new") :
134 "don't care") << dendl
;
138 bool has_parent(int64_t parent_pool_id
, uint64_t off
, uint64_t overlap
)
140 return (parent_pool_id
!= -1 && off
<= overlap
);
143 void init_rbd_header(struct rbd_obj_header_ondisk
& ondisk
,
144 uint64_t size
, int order
, uint64_t bid
)
146 uint32_t hi
= bid
>> 32;
147 uint32_t lo
= bid
& 0xFFFFFFFF;
148 uint32_t extra
= rand() % 0xFFFFFFFF;
149 memset(&ondisk
, 0, sizeof(ondisk
));
151 memcpy(&ondisk
.text
, RBD_HEADER_TEXT
, sizeof(RBD_HEADER_TEXT
));
152 memcpy(&ondisk
.signature
, RBD_HEADER_SIGNATURE
,
153 sizeof(RBD_HEADER_SIGNATURE
));
154 memcpy(&ondisk
.version
, RBD_HEADER_VERSION
, sizeof(RBD_HEADER_VERSION
));
156 snprintf(ondisk
.block_name
, sizeof(ondisk
.block_name
), "rb.%x.%x.%x",
159 ondisk
.image_size
= size
;
160 ondisk
.options
.order
= order
;
161 ondisk
.options
.crypt_type
= RBD_CRYPT_NONE
;
162 ondisk
.options
.comp_type
= RBD_COMP_NONE
;
164 ondisk
.snap_count
= 0;
166 ondisk
.snap_names_len
= 0;
169 void image_info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
171 int obj_order
= ictx
->order
;
172 ictx
->snap_lock
.get_read();
173 info
.size
= ictx
->get_image_size(ictx
->snap_id
);
174 ictx
->snap_lock
.put_read();
175 info
.obj_size
= 1ULL << obj_order
;
176 info
.num_objs
= Striper::get_num_objects(ictx
->layout
, info
.size
);
177 info
.order
= obj_order
;
178 strncpy(info
.block_name_prefix
, ictx
->object_prefix
.c_str(),
179 RBD_MAX_BLOCK_NAME_SIZE
);
180 info
.block_name_prefix
[RBD_MAX_BLOCK_NAME_SIZE
- 1] = '\0';
182 // clear deprecated fields
183 info
.parent_pool
= -1L;
184 info
.parent_name
[0] = '\0';
187 uint64_t oid_to_object_no(const string
& oid
, const string
& object_prefix
)
189 istringstream
iss(oid
);
190 // skip object prefix and separator
191 iss
.ignore(object_prefix
.length() + 1);
193 iss
>> std::hex
>> num
;
197 void trim_image(ImageCtx
*ictx
, uint64_t newsize
, ProgressContext
& prog_ctx
)
199 assert(ictx
->owner_lock
.is_locked());
200 assert(ictx
->exclusive_lock
== nullptr ||
201 ictx
->exclusive_lock
->is_lock_owner());
204 ictx
->snap_lock
.get_read();
205 operation::TrimRequest
<> *req
= operation::TrimRequest
<>::create(
206 *ictx
, &ctx
, ictx
->size
, newsize
, prog_ctx
);
207 ictx
->snap_lock
.put_read();
212 lderr(ictx
->cct
) << "warning: failed to remove some object(s): "
213 << cpp_strerror(r
) << dendl
;
217 int read_header_bl(IoCtx
& io_ctx
, const string
& header_oid
,
218 bufferlist
& header
, uint64_t *ver
)
222 #define READ_SIZE 4096
225 r
= io_ctx
.read(header_oid
, bl
, READ_SIZE
, off
);
228 header
.claim_append(bl
);
230 } while (r
== READ_SIZE
);
232 if (header
.length() < sizeof(RBD_HEADER_TEXT
) ||
233 memcmp(RBD_HEADER_TEXT
, header
.c_str(), sizeof(RBD_HEADER_TEXT
))) {
234 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
235 lderr(cct
) << "unrecognized header format" << dendl
;
240 *ver
= io_ctx
.get_last_version();
245 int read_header(IoCtx
& io_ctx
, const string
& header_oid
,
246 struct rbd_obj_header_ondisk
*header
, uint64_t *ver
)
248 bufferlist header_bl
;
249 int r
= read_header_bl(io_ctx
, header_oid
, header_bl
, ver
);
252 if (header_bl
.length() < (int)sizeof(*header
))
254 memcpy(header
, header_bl
.c_str(), sizeof(*header
));
259 int tmap_set(IoCtx
& io_ctx
, const string
& imgname
)
261 bufferlist cmdbl
, emptybl
;
262 __u8 c
= CEPH_OSD_TMAP_SET
;
264 ::encode(imgname
, cmdbl
);
265 ::encode(emptybl
, cmdbl
);
266 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
269 int tmap_rm(IoCtx
& io_ctx
, const string
& imgname
)
272 __u8 c
= CEPH_OSD_TMAP_RM
;
274 ::encode(imgname
, cmdbl
);
275 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
278 typedef boost::variant
<std::string
,uint64_t> image_option_value_t
;
279 typedef std::map
<int,image_option_value_t
> image_options_t
;
280 typedef std::shared_ptr
<image_options_t
> image_options_ref
;
282 enum image_option_type_t
{
287 const std::map
<int, image_option_type_t
> IMAGE_OPTIONS_TYPE_MAPPING
= {
288 {RBD_IMAGE_OPTION_FORMAT
, UINT64
},
289 {RBD_IMAGE_OPTION_FEATURES
, UINT64
},
290 {RBD_IMAGE_OPTION_ORDER
, UINT64
},
291 {RBD_IMAGE_OPTION_STRIPE_UNIT
, UINT64
},
292 {RBD_IMAGE_OPTION_STRIPE_COUNT
, UINT64
},
293 {RBD_IMAGE_OPTION_JOURNAL_ORDER
, UINT64
},
294 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
, UINT64
},
295 {RBD_IMAGE_OPTION_JOURNAL_POOL
, STR
},
296 {RBD_IMAGE_OPTION_FEATURES_SET
, UINT64
},
297 {RBD_IMAGE_OPTION_FEATURES_CLEAR
, UINT64
},
298 {RBD_IMAGE_OPTION_DATA_POOL
, STR
},
301 std::string
image_option_name(int optname
) {
303 case RBD_IMAGE_OPTION_FORMAT
:
305 case RBD_IMAGE_OPTION_FEATURES
:
307 case RBD_IMAGE_OPTION_ORDER
:
309 case RBD_IMAGE_OPTION_STRIPE_UNIT
:
310 return "stripe_unit";
311 case RBD_IMAGE_OPTION_STRIPE_COUNT
:
312 return "stripe_count";
313 case RBD_IMAGE_OPTION_JOURNAL_ORDER
:
314 return "journal_order";
315 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
:
316 return "journal_splay_width";
317 case RBD_IMAGE_OPTION_JOURNAL_POOL
:
318 return "journal_pool";
319 case RBD_IMAGE_OPTION_FEATURES_SET
:
320 return "features_set";
321 case RBD_IMAGE_OPTION_FEATURES_CLEAR
:
322 return "features_clear";
323 case RBD_IMAGE_OPTION_DATA_POOL
:
326 return "unknown (" + stringify(optname
) + ")";
330 std::ostream
&operator<<(std::ostream
&os
, const ImageOptions
&opts
) {
333 const char *delimiter
= "";
334 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
335 if (i
.second
== STR
) {
337 if (opts
.get(i
.first
, &val
) == 0) {
338 os
<< delimiter
<< image_option_name(i
.first
) << "=" << val
;
341 } else if (i
.second
== UINT64
) {
343 if (opts
.get(i
.first
, &val
) == 0) {
344 os
<< delimiter
<< image_option_name(i
.first
) << "=" << val
;
355 void image_options_create(rbd_image_options_t
* opts
)
357 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
359 *opts
= static_cast<rbd_image_options_t
>(opts_
);
362 void image_options_create_ref(rbd_image_options_t
* opts
,
363 rbd_image_options_t orig
)
365 image_options_ref
* orig_
= static_cast<image_options_ref
*>(orig
);
366 image_options_ref
* opts_
= new image_options_ref(*orig_
);
368 *opts
= static_cast<rbd_image_options_t
>(opts_
);
371 void image_options_copy(rbd_image_options_t
* opts
,
372 const ImageOptions
&orig
)
374 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
376 *opts
= static_cast<rbd_image_options_t
>(opts_
);
380 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
383 if (orig
.get(i
.first
, &str_val
) == 0) {
384 image_options_set(*opts
, i
.first
, str_val
);
388 if (orig
.get(i
.first
, &uint64_val
) == 0) {
389 image_options_set(*opts
, i
.first
, uint64_val
);
396 void image_options_destroy(rbd_image_options_t opts
)
398 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
403 int image_options_set(rbd_image_options_t opts
, int optname
,
404 const std::string
& optval
)
406 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
408 std::map
<int, image_option_type_t
>::const_iterator i
=
409 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
411 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
415 (*opts_
->get())[optname
] = optval
;
419 int image_options_set(rbd_image_options_t opts
, int optname
, uint64_t optval
)
421 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
423 std::map
<int, image_option_type_t
>::const_iterator i
=
424 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
426 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
430 (*opts_
->get())[optname
] = optval
;
434 int image_options_get(rbd_image_options_t opts
, int optname
,
437 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
439 std::map
<int, image_option_type_t
>::const_iterator i
=
440 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
442 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
446 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
448 if (j
== (*opts_
)->end()) {
452 *optval
= boost::get
<std::string
>(j
->second
);
456 int image_options_get(rbd_image_options_t opts
, int optname
, uint64_t* optval
)
458 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
460 std::map
<int, image_option_type_t
>::const_iterator i
=
461 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
463 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
467 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
469 if (j
== (*opts_
)->end()) {
473 *optval
= boost::get
<uint64_t>(j
->second
);
477 int image_options_is_set(rbd_image_options_t opts
, int optname
,
480 if (IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
) ==
481 IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
485 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
486 *is_set
= ((*opts_
)->find(optname
) != (*opts_
)->end());
490 int image_options_unset(rbd_image_options_t opts
, int optname
)
492 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
494 std::map
<int, image_option_type_t
>::const_iterator i
=
495 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
497 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
498 assert((*opts_
)->find(optname
) == (*opts_
)->end());
502 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
504 if (j
== (*opts_
)->end()) {
512 void image_options_clear(rbd_image_options_t opts
)
514 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
519 bool image_options_is_empty(rbd_image_options_t opts
)
521 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
523 return (*opts_
)->empty();
526 int list(IoCtx
& io_ctx
, vector
<string
>& names
)
528 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
529 ldout(cct
, 20) << "list " << &io_ctx
<< dendl
;
532 int r
= io_ctx
.read(RBD_DIRECTORY
, bl
, 0, 0);
540 // old format images are in a tmap
542 bufferlist::iterator p
= bl
.begin();
544 map
<string
,bufferlist
> m
;
547 for (map
<string
,bufferlist
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
548 names
.push_back(q
->first
);
552 map
<string
, string
> images
;
553 r
= api::Image
<>::list_images(io_ctx
, &images
);
555 lderr(cct
) << "error listing v2 images: " << cpp_strerror(r
) << dendl
;
558 for (const auto& img_pair
: images
) {
559 names
.push_back(img_pair
.first
);
565 int flatten_children(ImageCtx
*ictx
, const char* snap_name
,
566 ProgressContext
& pctx
)
568 CephContext
*cct
= ictx
->cct
;
569 ldout(cct
, 20) << "children flatten " << ictx
->name
<< dendl
;
571 RWLock::RLocker
l(ictx
->snap_lock
);
572 snap_t snap_id
= ictx
->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name
);
573 ParentSpec
parent_spec(ictx
->md_ctx
.get_id(), ictx
->id
, snap_id
);
574 map
< pair
<int64_t, string
>, set
<string
> > image_info
;
576 int r
= api::Image
<>::list_children(ictx
, parent_spec
, &image_info
);
581 size_t size
= image_info
.size();
586 Rados
rados(ictx
->md_ctx
);
587 for ( auto &info
: image_info
){
588 string pool
= info
.first
.second
;
590 r
= rados
.ioctx_create2(info
.first
.first
, ioctx
);
592 lderr(cct
) << "Error accessing child image pool " << pool
597 for (auto &id_it
: info
.second
) {
598 ImageCtx
*imctx
= new ImageCtx("", id_it
, NULL
, ioctx
, false);
599 int r
= imctx
->state
->open(false);
601 lderr(cct
) << "error opening image: "
602 << cpp_strerror(r
) << dendl
;
606 librbd::NoOpProgressContext prog_ctx
;
607 r
= imctx
->operations
->flatten(prog_ctx
);
609 lderr(cct
) << "error flattening image: " << pool
<< "/" << id_it
610 << cpp_strerror(r
) << dendl
;
611 imctx
->state
->close();
615 if ((imctx
->features
& RBD_FEATURE_DEEP_FLATTEN
) == 0 &&
616 !imctx
->snaps
.empty()) {
617 imctx
->parent_lock
.get_read();
618 ParentInfo parent_info
= imctx
->parent_md
;
619 imctx
->parent_lock
.put_read();
621 r
= cls_client::remove_child(&imctx
->md_ctx
, RBD_CHILDREN
,
622 parent_info
.spec
, imctx
->id
);
623 if (r
< 0 && r
!= -ENOENT
) {
624 lderr(cct
) << "error removing child from children list" << dendl
;
625 imctx
->state
->close();
630 r
= imctx
->state
->close();
632 lderr(cct
) << "failed to close image: " << cpp_strerror(r
) << dendl
;
636 pctx
.update_progress(++i
, size
);
643 int list_children(ImageCtx
*ictx
, set
<pair
<string
, string
> >& names
)
645 CephContext
*cct
= ictx
->cct
;
646 ldout(cct
, 20) << "children list " << ictx
->name
<< dendl
;
648 RWLock::RLocker
l(ictx
->snap_lock
);
649 ParentSpec
parent_spec(ictx
->md_ctx
.get_id(), ictx
->id
, ictx
->snap_id
);
650 map
< pair
<int64_t, string
>, set
<string
> > image_info
;
652 int r
= api::Image
<>::list_children(ictx
, parent_spec
, &image_info
);
657 Rados
rados(ictx
->md_ctx
);
658 for ( auto &info
: image_info
){
660 r
= rados
.ioctx_create2(info
.first
.first
, ioctx
);
662 lderr(cct
) << "Error accessing child image pool " << info
.first
.second
667 for (auto &id_it
: info
.second
) {
669 r
= cls_client::dir_get_name(&ioctx
, RBD_DIRECTORY
, id_it
, &name
);
671 lderr(cct
) << "Error looking up name for image id " << id_it
672 << " in pool " << info
.first
.second
<< dendl
;
675 names
.insert(make_pair(info
.first
.second
, name
));
682 int get_snap_namespace(ImageCtx
*ictx
,
683 const char *snap_name
,
684 cls::rbd::SnapshotNamespace
*snap_namespace
) {
685 ldout(ictx
->cct
, 20) << "get_snap_namespace " << ictx
<< " " << snap_name
688 int r
= ictx
->state
->refresh_if_required();
691 RWLock::RLocker
l(ictx
->snap_lock
);
692 snap_t snap_id
= ictx
->get_snap_id(*snap_namespace
, snap_name
);
693 if (snap_id
== CEPH_NOSNAP
)
695 r
= ictx
->get_snap_namespace(snap_id
, snap_namespace
);
699 int snap_is_protected(ImageCtx
*ictx
, const char *snap_name
, bool *is_protected
)
701 ldout(ictx
->cct
, 20) << "snap_is_protected " << ictx
<< " " << snap_name
704 int r
= ictx
->state
->refresh_if_required();
708 RWLock::RLocker
l(ictx
->snap_lock
);
709 snap_t snap_id
= ictx
->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name
);
710 if (snap_id
== CEPH_NOSNAP
)
713 r
= ictx
->is_snap_unprotected(snap_id
, &is_unprotected
);
714 // consider both PROTECTED or UNPROTECTING to be 'protected',
715 // since in either state they can't be deleted
716 *is_protected
= !is_unprotected
;
720 int create_v1(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
, int order
)
722 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
724 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< " name = " << imgname
725 << " size = " << size
<< " order = " << order
<< dendl
;
726 int r
= validate_pool(io_ctx
, cct
);
731 ldout(cct
, 2) << "adding rbd image to directory..." << dendl
;
732 r
= tmap_set(io_ctx
, imgname
);
734 lderr(cct
) << "error adding image to directory: " << cpp_strerror(r
)
740 uint64_t bid
= rados
.get_instance_id();
742 ldout(cct
, 2) << "creating rbd image..." << dendl
;
743 struct rbd_obj_header_ondisk header
;
744 init_rbd_header(header
, size
, order
, bid
);
747 bl
.append((const char *)&header
, sizeof(header
));
749 string header_oid
= util::old_header_name(imgname
);
750 r
= io_ctx
.write(header_oid
, bl
, bl
.length(), 0);
752 lderr(cct
) << "Error writing image header: " << cpp_strerror(r
)
754 int remove_r
= tmap_rm(io_ctx
, imgname
);
756 lderr(cct
) << "Could not remove image from directory after "
757 << "header creation failed: "
758 << cpp_strerror(remove_r
) << dendl
;
763 ldout(cct
, 2) << "done." << dendl
;
767 int create(librados::IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
770 uint64_t order_
= *order
;
773 int r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
776 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
778 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
785 int create(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
786 bool old_format
, uint64_t features
, int *order
,
787 uint64_t stripe_unit
, uint64_t stripe_count
)
792 uint64_t order_
= *order
;
793 uint64_t format
= old_format
? 1 : 2;
797 r
= opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
799 r
= opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
801 r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
803 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
805 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
808 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
810 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
817 int create(IoCtx
& io_ctx
, const std::string
&image_name
,
818 const std::string
&image_id
, uint64_t size
,
820 const std::string
&non_primary_global_image_id
,
821 const std::string
&primary_mirror_uuid
,
822 bool skip_mirror_enable
)
824 std::string
id(image_id
);
826 id
= util::generate_image_id(io_ctx
);
829 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
830 ldout(cct
, 10) << __func__
<< " name=" << image_name
<< ", "
831 << "id= " << id
<< ", "
832 << "size=" << size
<< ", opts=" << opts
<< dendl
;
835 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0)
836 format
= cct
->_conf
->rbd_default_format
;
837 bool old_format
= format
== 1;
839 // make sure it doesn't already exist, in either format
840 int r
= detect_format(io_ctx
, image_name
, NULL
, NULL
);
843 lderr(cct
) << "Could not tell if " << image_name
<< " already exists"
847 lderr(cct
) << "rbd image " << image_name
<< " already exists" << dendl
;
852 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0 || order
== 0) {
853 order
= cct
->_conf
->rbd_default_order
;
855 r
= image::CreateRequest
<>::validate_order(cct
, order
);
861 r
= create_v1(io_ctx
, image_name
.c_str(), size
, order
);
863 ThreadPool
*thread_pool
;
864 ContextWQ
*op_work_queue
;
865 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
868 image::CreateRequest
<> *req
= image::CreateRequest
<>::create(
869 io_ctx
, image_name
, id
, size
, opts
, non_primary_global_image_id
,
870 primary_mirror_uuid
, skip_mirror_enable
, op_work_queue
, &cond
);
876 int r1
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
883 * Parent may be in different pool, hence different IoCtx
885 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
886 IoCtx
& c_ioctx
, const char *c_name
,
887 uint64_t features
, int *c_order
,
888 uint64_t stripe_unit
, int stripe_count
)
890 uint64_t order
= *c_order
;
893 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
894 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
895 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
896 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
898 int r
= clone(p_ioctx
, p_name
, p_snap_name
, c_ioctx
, c_name
, opts
);
899 opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
);
904 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
905 IoCtx
& c_ioctx
, const char *c_name
, ImageOptions
& c_opts
)
907 CephContext
*cct
= (CephContext
*)p_ioctx
.cct();
908 if (p_snap_name
== NULL
) {
909 lderr(cct
) << "image to be cloned must be a snapshot" << dendl
;
913 // make sure parent snapshot exists
914 ImageCtx
*p_imctx
= new ImageCtx(p_name
, "", p_snap_name
, p_ioctx
, true);
915 int r
= p_imctx
->state
->open(false);
917 lderr(cct
) << "error opening parent image: "
918 << cpp_strerror(r
) << dendl
;
922 r
= clone(p_imctx
, c_ioctx
, c_name
, "", c_opts
, "", "");
924 int close_r
= p_imctx
->state
->close();
925 if (r
== 0 && close_r
< 0) {
935 int clone(ImageCtx
*p_imctx
, IoCtx
& c_ioctx
, const std::string
&c_name
,
936 const std::string
&c_id
, ImageOptions
& c_opts
,
937 const std::string
&non_primary_global_image_id
,
938 const std::string
&primary_mirror_uuid
)
940 std::string
id(c_id
);
942 id
= util::generate_image_id(c_ioctx
);
945 CephContext
*cct
= (CephContext
*)c_ioctx
.cct();
946 ldout(cct
, 10) << __func__
<< " "
947 << "c_name=" << c_name
<< ", "
948 << "c_id= " << c_id
<< ", "
949 << "c_opts=" << c_opts
<< dendl
;
951 ThreadPool
*thread_pool
;
952 ContextWQ
*op_work_queue
;
953 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
956 auto *req
= image::CloneRequest
<>::create(
957 p_imctx
, c_ioctx
, c_name
, id
, c_opts
,
958 non_primary_global_image_id
, primary_mirror_uuid
, op_work_queue
, &cond
);
964 int rename(IoCtx
& io_ctx
, const char *srcname
, const char *dstname
)
966 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
967 ldout(cct
, 20) << "rename " << &io_ctx
<< " " << srcname
<< " -> "
970 ImageCtx
*ictx
= new ImageCtx(srcname
, "", "", io_ctx
, false);
971 int r
= ictx
->state
->open(false);
973 lderr(ictx
->cct
) << "error opening source image: " << cpp_strerror(r
)
977 BOOST_SCOPE_EXIT((ictx
)) {
978 ictx
->state
->close();
979 } BOOST_SCOPE_EXIT_END
981 return ictx
->operations
->rename(dstname
);
984 int info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
986 ldout(ictx
->cct
, 20) << "info " << ictx
<< dendl
;
988 int r
= ictx
->state
->refresh_if_required();
992 image_info(ictx
, info
, infosize
);
996 int get_old_format(ImageCtx
*ictx
, uint8_t *old
)
998 int r
= ictx
->state
->refresh_if_required();
1001 *old
= ictx
->old_format
;
1005 int get_size(ImageCtx
*ictx
, uint64_t *size
)
1007 int r
= ictx
->state
->refresh_if_required();
1010 RWLock::RLocker
l2(ictx
->snap_lock
);
1011 *size
= ictx
->get_image_size(ictx
->snap_id
);
1015 int get_features(ImageCtx
*ictx
, uint64_t *features
)
1017 int r
= ictx
->state
->refresh_if_required();
1020 RWLock::RLocker
l(ictx
->snap_lock
);
1021 *features
= ictx
->features
;
1025 int get_overlap(ImageCtx
*ictx
, uint64_t *overlap
)
1027 int r
= ictx
->state
->refresh_if_required();
1030 RWLock::RLocker
l(ictx
->snap_lock
);
1031 RWLock::RLocker
l2(ictx
->parent_lock
);
1032 return ictx
->get_parent_overlap(ictx
->snap_id
, overlap
);
1035 int get_parent_info(ImageCtx
*ictx
, string
*parent_pool_name
,
1036 string
*parent_name
, string
*parent_id
,
1037 string
*parent_snap_name
)
1039 int r
= ictx
->state
->refresh_if_required();
1043 RWLock::RLocker
l(ictx
->snap_lock
);
1044 RWLock::RLocker
l2(ictx
->parent_lock
);
1045 if (ictx
->parent
== NULL
) {
1049 ParentSpec parent_spec
;
1051 if (ictx
->snap_id
== CEPH_NOSNAP
) {
1052 parent_spec
= ictx
->parent_md
.spec
;
1054 r
= ictx
->get_parent_spec(ictx
->snap_id
, &parent_spec
);
1056 lderr(ictx
->cct
) << "Can't find snapshot id = " << ictx
->snap_id
1060 if (parent_spec
.pool_id
== -1)
1063 if (parent_pool_name
) {
1064 Rados
rados(ictx
->md_ctx
);
1065 r
= rados
.pool_reverse_lookup(parent_spec
.pool_id
,
1068 lderr(ictx
->cct
) << "error looking up pool name: " << cpp_strerror(r
)
1074 if (parent_snap_name
) {
1075 RWLock::RLocker
l(ictx
->parent
->snap_lock
);
1076 r
= ictx
->parent
->get_snap_name(parent_spec
.snap_id
,
1079 lderr(ictx
->cct
) << "error finding parent snap name: "
1080 << cpp_strerror(r
) << dendl
;
1086 RWLock::RLocker
snap_locker(ictx
->parent
->snap_lock
);
1087 *parent_name
= ictx
->parent
->name
;
1090 *parent_id
= ictx
->parent
->id
;
1096 int get_flags(ImageCtx
*ictx
, uint64_t *flags
)
1098 int r
= ictx
->state
->refresh_if_required();
1103 RWLock::RLocker
l2(ictx
->snap_lock
);
1104 return ictx
->get_flags(ictx
->snap_id
, flags
);
1107 int set_image_notification(ImageCtx
*ictx
, int fd
, int type
)
1109 CephContext
*cct
= ictx
->cct
;
1110 ldout(cct
, 20) << __func__
<< " " << ictx
<< " fd " << fd
<< " type" << type
<< dendl
;
1112 int r
= ictx
->state
->refresh_if_required();
1117 if (ictx
->event_socket
.is_valid())
1119 return ictx
->event_socket
.init(fd
, type
);
1122 int is_exclusive_lock_owner(ImageCtx
*ictx
, bool *is_owner
)
1126 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
1127 if (ictx
->exclusive_lock
== nullptr ||
1128 !ictx
->exclusive_lock
->is_lock_owner()) {
1132 // might have been blacklisted by peer -- ensure we still own
1133 // the lock by pinging the OSD
1134 int r
= ictx
->exclusive_lock
->assert_header_locked();
1135 if (r
== -EBUSY
|| r
== -ENOENT
) {
1145 int lock_acquire(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
)
1147 CephContext
*cct
= ictx
->cct
;
1148 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1149 << "lock_mode=" << lock_mode
<< dendl
;
1151 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1155 C_SaferCond lock_ctx
;
1157 RWLock::WLocker
l(ictx
->owner_lock
);
1159 if (ictx
->exclusive_lock
== nullptr) {
1160 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1164 if (ictx
->get_exclusive_lock_policy()->may_auto_request_lock()) {
1165 ictx
->set_exclusive_lock_policy(
1166 new exclusive_lock::StandardPolicy(ictx
));
1169 if (ictx
->exclusive_lock
->is_lock_owner()) {
1173 ictx
->exclusive_lock
->acquire_lock(&lock_ctx
);
1176 int r
= lock_ctx
.wait();
1178 lderr(cct
) << "failed to request exclusive lock: " << cpp_strerror(r
)
1183 RWLock::RLocker
l(ictx
->owner_lock
);
1185 if (ictx
->exclusive_lock
== nullptr ||
1186 !ictx
->exclusive_lock
->is_lock_owner()) {
1187 lderr(cct
) << "failed to acquire exclusive lock" << dendl
;
1194 int lock_release(ImageCtx
*ictx
)
1196 CephContext
*cct
= ictx
->cct
;
1197 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1199 C_SaferCond lock_ctx
;
1201 RWLock::WLocker
l(ictx
->owner_lock
);
1203 if (ictx
->exclusive_lock
== nullptr ||
1204 !ictx
->exclusive_lock
->is_lock_owner()) {
1205 lderr(cct
) << "not exclusive lock owner" << dendl
;
1209 ictx
->exclusive_lock
->release_lock(&lock_ctx
);
1212 int r
= lock_ctx
.wait();
1214 lderr(cct
) << "failed to release exclusive lock: " << cpp_strerror(r
)
1221 int lock_get_owners(ImageCtx
*ictx
, rbd_lock_mode_t
*lock_mode
,
1222 std::list
<std::string
> *lock_owners
)
1224 CephContext
*cct
= ictx
->cct
;
1225 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1227 if (!ictx
->test_features(RBD_FEATURE_EXCLUSIVE_LOCK
)) {
1228 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1232 managed_lock::Locker locker
;
1233 C_SaferCond get_owner_ctx
;
1234 ExclusiveLock
<>(*ictx
).get_locker(&locker
, &get_owner_ctx
);
1235 int r
= get_owner_ctx
.wait();
1239 lderr(cct
) << "failed to determine current lock owner: "
1240 << cpp_strerror(r
) << dendl
;
1244 *lock_mode
= RBD_LOCK_MODE_EXCLUSIVE
;
1245 lock_owners
->clear();
1246 lock_owners
->emplace_back(locker
.address
);
1250 int lock_break(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
,
1251 const std::string
&lock_owner
)
1253 CephContext
*cct
= ictx
->cct
;
1254 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1255 << "lock_mode=" << lock_mode
<< ", "
1256 << "lock_owner=" << lock_owner
<< dendl
;
1258 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1262 if (ictx
->read_only
) {
1266 managed_lock::Locker locker
;
1267 C_SaferCond get_owner_ctx
;
1269 RWLock::RLocker
l(ictx
->owner_lock
);
1271 if (ictx
->exclusive_lock
== nullptr) {
1272 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1276 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1278 int r
= get_owner_ctx
.wait();
1282 lderr(cct
) << "failed to determine current lock owner: "
1283 << cpp_strerror(r
) << dendl
;
1287 if (locker
.address
!= lock_owner
) {
1291 C_SaferCond break_ctx
;
1293 RWLock::RLocker
l(ictx
->owner_lock
);
1295 if (ictx
->exclusive_lock
== nullptr) {
1296 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1300 ictx
->exclusive_lock
->break_lock(locker
, true, &break_ctx
);
1302 r
= break_ctx
.wait();
1306 lderr(cct
) << "failed to break lock: " << cpp_strerror(r
) << dendl
;
1312 int remove(IoCtx
& io_ctx
, const std::string
&image_name
,
1313 const std::string
&image_id
, ProgressContext
& prog_ctx
,
1314 bool force
, bool from_trash_remove
)
1316 CephContext
*cct((CephContext
*)io_ctx
.cct());
1317 ldout(cct
, 20) << "remove " << &io_ctx
<< " "
1318 << (image_id
.empty() ? image_name
: image_id
) << dendl
;
1320 ThreadPool
*thread_pool
;
1321 ContextWQ
*op_work_queue
;
1322 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
1325 auto req
= librbd::image::RemoveRequest
<>::create(
1326 io_ctx
, image_name
, image_id
, force
, from_trash_remove
, prog_ctx
,
1327 op_work_queue
, &cond
);
1333 int trash_move(librados::IoCtx
&io_ctx
, rbd_trash_image_source_t source
,
1334 const std::string
&image_name
, uint64_t delay
) {
1335 CephContext
*cct((CephContext
*)io_ctx
.cct());
1336 ldout(cct
, 20) << "trash_move " << &io_ctx
<< " " << image_name
1339 std::string image_id
;
1340 ImageCtx
*ictx
= new ImageCtx(image_name
, "", nullptr, io_ctx
, false);
1341 int r
= ictx
->state
->open(true);
1346 ldout(cct
, 2) << "error opening image: " << cpp_strerror(-r
) << dendl
;
1350 // try to get image id from the directory
1351 r
= cls_client::dir_get_id(&io_ctx
, RBD_DIRECTORY
, image_name
, &image_id
);
1354 ldout(cct
, 2) << "error reading image id from dirctory: "
1355 << cpp_strerror(-r
) << dendl
;
1360 if (ictx
->old_format
) {
1361 ictx
->state
->close();
1365 image_id
= ictx
->id
;
1366 ictx
->owner_lock
.get_read();
1367 if (ictx
->exclusive_lock
!= nullptr) {
1368 r
= ictx
->operations
->prepare_image_update();
1369 if (r
< 0 || (ictx
->exclusive_lock
!= nullptr &&
1370 !ictx
->exclusive_lock
->is_lock_owner())) {
1371 lderr(cct
) << "cannot obtain exclusive lock - not removing" << dendl
;
1372 ictx
->owner_lock
.put_read();
1373 ictx
->state
->close();
1379 BOOST_SCOPE_EXIT_ALL(ictx
, cct
) {
1380 if (ictx
== nullptr)
1383 bool is_locked
= ictx
->exclusive_lock
!= nullptr &&
1384 ictx
->exclusive_lock
->is_lock_owner();
1387 auto exclusive_lock
= ictx
->exclusive_lock
;
1388 exclusive_lock
->shut_down(&ctx
);
1389 ictx
->owner_lock
.put_read();
1392 lderr(cct
) << "error shutting down exclusive lock" << dendl
;
1394 delete exclusive_lock
;
1396 ictx
->owner_lock
.put_read();
1398 ictx
->state
->close();
1401 ldout(cct
, 2) << "adding image entry to rbd_trash" << dendl
;
1402 utime_t ts
= ceph_clock_now();
1403 utime_t deferment_end_time
= ts
;
1404 deferment_end_time
+= (double)delay
;
1405 cls::rbd::TrashImageSource trash_source
=
1406 static_cast<cls::rbd::TrashImageSource
>(source
);
1407 cls::rbd::TrashImageSpec
trash_spec(trash_source
, image_name
, ts
,
1408 deferment_end_time
);
1409 r
= cls_client::trash_add(&io_ctx
, image_id
, trash_spec
);
1410 if (r
< 0 && r
!= -EEXIST
) {
1411 lderr(cct
) << "error adding image " << image_name
<< " to rbd_trash"
1414 } else if (r
== -EEXIST
) {
1415 ldout(cct
, 10) << "found previous unfinished deferred remove for image:"
1416 << image_id
<< dendl
;
1417 // continue with removing image from directory
1420 ldout(cct
, 2) << "removing id object..." << dendl
;
1421 r
= io_ctx
.remove(util::id_obj_name(image_name
));
1422 if (r
< 0 && r
!= -ENOENT
) {
1423 lderr(cct
) << "error removing id object: " << cpp_strerror(r
)
1428 ldout(cct
, 2) << "removing rbd image from v2 directory..." << dendl
;
1429 r
= cls_client::dir_remove_image(&io_ctx
, RBD_DIRECTORY
, image_name
,
1433 lderr(cct
) << "error removing image from v2 directory: "
1434 << cpp_strerror(-r
) << dendl
;
1442 int trash_get(IoCtx
&io_ctx
, const std::string
&id
,
1443 trash_image_info_t
*info
) {
1444 CephContext
*cct((CephContext
*)io_ctx
.cct());
1445 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< dendl
;
1447 cls::rbd::TrashImageSpec spec
;
1448 int r
= cls_client::trash_get(&io_ctx
, id
, &spec
);
1452 lderr(cct
) << "error retrieving trash entry: " << cpp_strerror(r
)
1457 rbd_trash_image_source_t source
= static_cast<rbd_trash_image_source_t
>(
1459 *info
= trash_image_info_t
{id
, spec
.name
, source
, spec
.deletion_time
.sec(),
1460 spec
.deferment_end_time
.sec()};
1464 int trash_list(IoCtx
&io_ctx
, vector
<trash_image_info_t
> &entries
) {
1465 CephContext
*cct((CephContext
*)io_ctx
.cct());
1466 ldout(cct
, 20) << "trash_list " << &io_ctx
<< dendl
;
1468 map
<string
, cls::rbd::TrashImageSpec
> trash_entries
;
1469 int r
= cls_client::trash_list(&io_ctx
, &trash_entries
);
1472 lderr(cct
) << "error listing rbd_trash entries: " << cpp_strerror(r
)
1480 for (const auto &entry
: trash_entries
) {
1481 rbd_trash_image_source_t source
=
1482 static_cast<rbd_trash_image_source_t
>(entry
.second
.source
);
1483 entries
.push_back({entry
.first
, entry
.second
.name
, source
,
1484 entry
.second
.deletion_time
.sec(),
1485 entry
.second
.deferment_end_time
.sec()});
1490 int trash_remove(IoCtx
&io_ctx
, const std::string
&image_id
, bool force
,
1491 ProgressContext
& prog_ctx
) {
1492 CephContext
*cct((CephContext
*)io_ctx
.cct());
1493 ldout(cct
, 20) << "trash_remove " << &io_ctx
<< " " << image_id
1494 << " " << force
<< dendl
;
1496 cls::rbd::TrashImageSpec trash_spec
;
1497 int r
= cls_client::trash_get(&io_ctx
, image_id
, &trash_spec
);
1499 lderr(cct
) << "error getting image id " << image_id
1500 << " info from trash: " << cpp_strerror(r
) << dendl
;
1504 utime_t now
= ceph_clock_now();
1505 if (now
< trash_spec
.deferment_end_time
&& !force
) {
1506 lderr(cct
) << "error: deferment time has not expired." << dendl
;
1510 r
= remove(io_ctx
, "", image_id
, prog_ctx
, false, true);
1512 lderr(cct
) << "error removing image " << image_id
1513 << ", which is pending deletion" << dendl
;
1516 r
= cls_client::trash_remove(&io_ctx
, image_id
);
1517 if (r
< 0 && r
!= -ENOENT
) {
1518 lderr(cct
) << "error removing image " << image_id
1519 << " from rbd_trash object" << dendl
;
1525 int trash_restore(librados::IoCtx
&io_ctx
, const std::string
&image_id
,
1526 const std::string
&image_new_name
) {
1527 CephContext
*cct((CephContext
*)io_ctx
.cct());
1528 ldout(cct
, 20) << "trash_restore " << &io_ctx
<< " " << image_id
<< " "
1529 << image_new_name
<< dendl
;
1531 cls::rbd::TrashImageSpec trash_spec
;
1532 int r
= cls_client::trash_get(&io_ctx
, image_id
, &trash_spec
);
1534 lderr(cct
) << "error getting image id " << image_id
1535 << " info from trash: " << cpp_strerror(r
) << dendl
;
1539 std::string image_name
= image_new_name
;
1540 if (image_name
.empty()) {
1541 // if user didn't specify a new name, let's try using the old name
1542 image_name
= trash_spec
.name
;
1543 ldout(cct
, 20) << "restoring image id " << image_id
<< " with name "
1544 << image_name
<< dendl
;
1547 // check if no image exists with the same name
1548 bool create_id_obj
= true;
1549 std::string existing_id
;
1550 r
= cls_client::get_id(&io_ctx
, util::id_obj_name(image_name
), &existing_id
);
1551 if (r
< 0 && r
!= -ENOENT
) {
1552 lderr(cct
) << "error checking if image " << image_name
<< " exists: "
1553 << cpp_strerror(r
) << dendl
;
1555 } else if (r
!= -ENOENT
){
1556 // checking if we are recovering from an incomplete restore
1557 if (existing_id
!= image_id
) {
1558 ldout(cct
, 2) << "an image with the same name already exists" << dendl
;
1561 create_id_obj
= false;
1564 if (create_id_obj
) {
1565 ldout(cct
, 2) << "adding id object" << dendl
;
1566 librados::ObjectWriteOperation op
;
1568 cls_client::set_id(&op
, image_id
);
1569 r
= io_ctx
.operate(util::id_obj_name(image_name
), &op
);
1571 lderr(cct
) << "error adding id object for image " << image_name
1572 << ": " << cpp_strerror(r
) << dendl
;
1577 ldout(cct
, 2) << "adding rbd image from v2 directory..." << dendl
;
1578 r
= cls_client::dir_add_image(&io_ctx
, RBD_DIRECTORY
, image_name
,
1580 if (r
< 0 && r
!= -EEXIST
) {
1581 lderr(cct
) << "error adding image to v2 directory: "
1582 << cpp_strerror(r
) << dendl
;
1586 ldout(cct
, 2) << "removing image from trash..." << dendl
;
1587 r
= cls_client::trash_remove(&io_ctx
, image_id
);
1588 if (r
< 0 && r
!= -ENOENT
) {
1589 lderr(cct
) << "error removing image id " << image_id
<< " from trash: "
1590 << cpp_strerror(r
) << dendl
;
1597 int snap_list(ImageCtx
*ictx
, vector
<snap_info_t
>& snaps
)
1599 ldout(ictx
->cct
, 20) << "snap_list " << ictx
<< dendl
;
1601 int r
= ictx
->state
->refresh_if_required();
1605 RWLock::RLocker
l(ictx
->snap_lock
);
1606 for (map
<snap_t
, SnapInfo
>::iterator it
= ictx
->snap_info
.begin();
1607 it
!= ictx
->snap_info
.end(); ++it
) {
1609 info
.name
= it
->second
.name
;
1610 info
.id
= it
->first
;
1611 info
.size
= it
->second
.size
;
1612 snaps
.push_back(info
);
1618 int snap_exists(ImageCtx
*ictx
, const cls::rbd::SnapshotNamespace
& snap_namespace
,
1619 const char *snap_name
, bool *exists
)
1621 ldout(ictx
->cct
, 20) << "snap_exists " << ictx
<< " " << snap_name
<< dendl
;
1623 int r
= ictx
->state
->refresh_if_required();
1627 RWLock::RLocker
l(ictx
->snap_lock
);
1628 *exists
= ictx
->get_snap_id(snap_namespace
, snap_name
) != CEPH_NOSNAP
;
1632 int snap_remove(ImageCtx
*ictx
, const char *snap_name
, uint32_t flags
,
1633 ProgressContext
& pctx
)
1635 ldout(ictx
->cct
, 20) << "snap_remove " << ictx
<< " " << snap_name
<< " flags: " << flags
<< dendl
;
1639 r
= ictx
->state
->refresh_if_required();
1643 if (flags
& RBD_SNAP_REMOVE_FLATTEN
) {
1644 r
= flatten_children(ictx
, snap_name
, pctx
);
1651 r
= snap_is_protected(ictx
, snap_name
, &is_protected
);
1656 if (is_protected
&& flags
& RBD_SNAP_REMOVE_UNPROTECT
) {
1657 r
= ictx
->operations
->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name
);
1659 lderr(ictx
->cct
) << "failed to unprotect snapshot: " << snap_name
<< dendl
;
1663 r
= snap_is_protected(ictx
, snap_name
, &is_protected
);
1668 lderr(ictx
->cct
) << "snapshot is still protected after unprotection" << dendl
;
1674 ictx
->operations
->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name
, &ctx
);
1680 int snap_get_timestamp(ImageCtx
*ictx
, uint64_t snap_id
, struct timespec
*timestamp
)
1682 std::map
<librados::snap_t
, SnapInfo
>::iterator snap_it
= ictx
->snap_info
.find(snap_id
);
1683 assert(snap_it
!= ictx
->snap_info
.end());
1684 utime_t time
= snap_it
->second
.timestamp
;
1685 time
.to_timespec(timestamp
);
1689 int snap_get_limit(ImageCtx
*ictx
, uint64_t *limit
)
1691 int r
= cls_client::snapshot_get_limit(&ictx
->md_ctx
, ictx
->header_oid
,
1693 if (r
== -EOPNOTSUPP
) {
1694 *limit
= UINT64_MAX
;
1700 int snap_set_limit(ImageCtx
*ictx
, uint64_t limit
)
1702 return ictx
->operations
->snap_set_limit(limit
);
1705 struct CopyProgressCtx
{
1706 explicit CopyProgressCtx(ProgressContext
&p
)
1707 : destictx(NULL
), src_size(0), prog_ctx(p
)
1712 ProgressContext
&prog_ctx
;
1715 int copy(ImageCtx
*src
, IoCtx
& dest_md_ctx
, const char *destname
,
1716 ImageOptions
& opts
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1718 CephContext
*cct
= (CephContext
*)dest_md_ctx
.cct();
1719 ldout(cct
, 20) << "copy " << src
->name
1720 << (src
->snap_name
.length() ? "@" + src
->snap_name
: "")
1721 << " -> " << destname
<< " opts = " << opts
<< dendl
;
1723 src
->snap_lock
.get_read();
1724 uint64_t features
= src
->features
;
1725 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1726 src
->snap_lock
.put_read();
1727 uint64_t format
= src
->old_format
? 1 : 2;
1728 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0) {
1729 opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
1731 uint64_t stripe_unit
= src
->stripe_unit
;
1732 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_UNIT
, &stripe_unit
) != 0) {
1733 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
1735 uint64_t stripe_count
= src
->stripe_count
;
1736 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_COUNT
, &stripe_count
) != 0) {
1737 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
1739 uint64_t order
= src
->order
;
1740 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0) {
1741 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
1743 if (opts
.get(RBD_IMAGE_OPTION_FEATURES
, &features
) != 0) {
1744 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
1746 if (features
& ~RBD_FEATURES_ALL
) {
1747 lderr(cct
) << "librbd does not support requested features" << dendl
;
1751 int r
= create(dest_md_ctx
, destname
, "", src_size
, opts
, "", "", false);
1753 lderr(cct
) << "header creation failed" << dendl
;
1756 opts
.set(RBD_IMAGE_OPTION_ORDER
, static_cast<uint64_t>(order
));
1758 ImageCtx
*dest
= new librbd::ImageCtx(destname
, "", NULL
,
1759 dest_md_ctx
, false);
1760 r
= dest
->state
->open(false);
1762 lderr(cct
) << "failed to read newly created header" << dendl
;
1766 r
= copy(src
, dest
, prog_ctx
, sparse_size
);
1768 int close_r
= dest
->state
->close();
1769 if (r
== 0 && close_r
< 0) {
1775 class C_CopyWrite
: public Context
{
1777 C_CopyWrite(bufferlist
*bl
, Context
* ctx
)
1778 : m_bl(bl
), m_ctx(ctx
) {}
1779 void finish(int r
) override
{
1788 class C_CopyRead
: public Context
{
1790 C_CopyRead(SimpleThrottle
*throttle
, ImageCtx
*dest
, uint64_t offset
,
1791 bufferlist
*bl
, size_t sparse_size
)
1792 : m_throttle(throttle
), m_dest(dest
), m_offset(offset
), m_bl(bl
),
1793 m_sparse_size(sparse_size
) {
1794 m_throttle
->start_op();
1796 void finish(int r
) override
{
1798 lderr(m_dest
->cct
) << "error reading from source image at offset "
1799 << m_offset
<< ": " << cpp_strerror(r
) << dendl
;
1801 m_throttle
->end_op(r
);
1804 assert(m_bl
->length() == (size_t)r
);
1806 if (m_bl
->is_zero()) {
1808 m_throttle
->end_op(r
);
1812 if (!m_sparse_size
) {
1813 m_sparse_size
= (1 << m_dest
->order
);
1816 auto *throttle
= m_throttle
;
1817 auto *end_op_ctx
= new FunctionContext([throttle
](int r
) {
1818 throttle
->end_op(r
);
1820 auto gather_ctx
= new C_Gather(m_dest
->cct
, end_op_ctx
);
1822 bufferptr
m_ptr(m_bl
->length());
1823 m_bl
->rebuild(m_ptr
);
1824 size_t write_offset
= 0;
1825 size_t write_length
= 0;
1827 size_t length
= m_bl
->length();
1828 while (offset
< length
) {
1829 if (util::calc_sparse_extent(m_ptr
,
1835 bufferptr
write_ptr(m_ptr
, write_offset
, write_length
);
1836 bufferlist
*write_bl
= new bufferlist();
1837 write_bl
->push_back(write_ptr
);
1838 Context
*ctx
= new C_CopyWrite(write_bl
, gather_ctx
->new_sub());
1839 auto comp
= io::AioCompletion::create(ctx
);
1841 // coordinate through AIO WQ to ensure lock is acquired if needed
1842 m_dest
->io_work_queue
->aio_write(comp
, m_offset
+ write_offset
,
1844 std::move(*write_bl
),
1845 LIBRADOS_OP_FLAG_FADVISE_DONTNEED
,
1846 std::move(read_trace
));
1847 write_offset
= offset
;
1852 assert(gather_ctx
->get_sub_created_count() > 0);
1853 gather_ctx
->activate();
1856 ZTracer::Trace read_trace
;
1859 SimpleThrottle
*m_throttle
;
1863 size_t m_sparse_size
;
1866 int copy(ImageCtx
*src
, ImageCtx
*dest
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1868 src
->snap_lock
.get_read();
1869 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1870 src
->snap_lock
.put_read();
1872 dest
->snap_lock
.get_read();
1873 uint64_t dest_size
= dest
->get_image_size(dest
->snap_id
);
1874 dest
->snap_lock
.put_read();
1876 CephContext
*cct
= src
->cct
;
1877 if (dest_size
< src_size
) {
1878 lderr(cct
) << " src size " << src_size
<< " > dest size "
1879 << dest_size
<< dendl
;
1883 map
<string
, bufferlist
> pairs
;
1885 r
= cls_client::metadata_list(&src
->md_ctx
, src
->header_oid
, "", 0, &pairs
);
1886 if (r
< 0 && r
!= -EOPNOTSUPP
&& r
!= -EIO
) {
1887 lderr(cct
) << "couldn't list metadata: " << cpp_strerror(r
) << dendl
;
1889 } else if (r
== 0 && !pairs
.empty()) {
1890 r
= cls_client::metadata_set(&dest
->md_ctx
, dest
->header_oid
, pairs
);
1892 lderr(cct
) << "couldn't set metadata: " << cpp_strerror(r
) << dendl
;
1897 ZTracer::Trace trace
;
1898 if (cct
->_conf
->rbd_blkin_trace_all
) {
1899 trace
.init("copy", &src
->trace_endpoint
);
1902 RWLock::RLocker
owner_lock(src
->owner_lock
);
1903 SimpleThrottle
throttle(src
->concurrent_management_ops
, false);
1904 uint64_t period
= src
->get_stripe_period();
1905 unsigned fadvise_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
1906 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
;
1907 for (uint64_t offset
= 0; offset
< src_size
; offset
+= period
) {
1908 if (throttle
.pending_error()) {
1909 return throttle
.wait_for_ret();
1912 uint64_t len
= min(period
, src_size
- offset
);
1913 bufferlist
*bl
= new bufferlist();
1914 auto ctx
= new C_CopyRead(&throttle
, dest
, offset
, bl
, sparse_size
);
1915 auto comp
= io::AioCompletion::create_and_start
<Context
>(
1916 ctx
, src
, io::AIO_TYPE_READ
);
1918 io::ImageReadRequest
<> req(*src
, comp
, {{offset
, len
}},
1919 io::ReadResult
{bl
}, fadvise_flags
,
1921 ctx
->read_trace
= req
.get_trace();
1924 prog_ctx
.update_progress(offset
, src_size
);
1927 r
= throttle
.wait_for_ret();
1929 prog_ctx
.update_progress(src_size
, src_size
);
1933 int snap_set(ImageCtx
*ictx
, const cls::rbd::SnapshotNamespace
&snap_namespace
,
1934 const char *snap_name
)
1936 ldout(ictx
->cct
, 20) << "snap_set " << ictx
<< " snap = "
1937 << (snap_name
? snap_name
: "NULL") << dendl
;
1939 // ignore return value, since we may be set to a non-existent
1940 // snapshot and the user is trying to fix that
1941 ictx
->state
->refresh_if_required();
1944 std::string
name(snap_name
== nullptr ? "" : snap_name
);
1945 ictx
->state
->snap_set(snap_namespace
, name
, &ctx
);
1950 lderr(ictx
->cct
) << "failed to " << (name
.empty() ? "un" : "") << "set "
1951 << "snapshot: " << cpp_strerror(r
) << dendl
;
1959 int list_lockers(ImageCtx
*ictx
,
1960 std::list
<locker_t
> *lockers
,
1964 ldout(ictx
->cct
, 20) << "list_locks on image " << ictx
<< dendl
;
1966 int r
= ictx
->state
->refresh_if_required();
1970 RWLock::RLocker
locker(ictx
->md_lock
);
1972 *exclusive
= ictx
->exclusive_locked
;
1974 *tag
= ictx
->lock_tag
;
1977 map
<rados::cls::lock::locker_id_t
,
1978 rados::cls::lock::locker_info_t
>::const_iterator it
;
1979 for (it
= ictx
->lockers
.begin(); it
!= ictx
->lockers
.end(); ++it
) {
1981 locker
.client
= stringify(it
->first
.locker
);
1982 locker
.cookie
= it
->first
.cookie
;
1983 locker
.address
= stringify(it
->second
.addr
);
1984 lockers
->push_back(locker
);
1991 int lock(ImageCtx
*ictx
, bool exclusive
, const string
& cookie
,
1994 ldout(ictx
->cct
, 20) << "lock image " << ictx
<< " exclusive=" << exclusive
1995 << " cookie='" << cookie
<< "' tag='" << tag
<< "'"
1998 int r
= ictx
->state
->refresh_if_required();
2003 * If we wanted we could do something more intelligent, like local
2004 * checks that we think we will succeed. But for now, let's not
2005 * duplicate that code.
2008 RWLock::RLocker
locker(ictx
->md_lock
);
2009 r
= rados::cls::lock::lock(&ictx
->md_ctx
, ictx
->header_oid
, RBD_LOCK_NAME
,
2010 exclusive
? LOCK_EXCLUSIVE
: LOCK_SHARED
,
2011 cookie
, tag
, "", utime_t(), 0);
2017 ictx
->notify_update();
2021 int unlock(ImageCtx
*ictx
, const string
& cookie
)
2023 ldout(ictx
->cct
, 20) << "unlock image " << ictx
2024 << " cookie='" << cookie
<< "'" << dendl
;
2026 int r
= ictx
->state
->refresh_if_required();
2031 RWLock::RLocker
locker(ictx
->md_lock
);
2032 r
= rados::cls::lock::unlock(&ictx
->md_ctx
, ictx
->header_oid
,
2033 RBD_LOCK_NAME
, cookie
);
2039 ictx
->notify_update();
2043 int break_lock(ImageCtx
*ictx
, const string
& client
,
2044 const string
& cookie
)
2046 ldout(ictx
->cct
, 20) << "break_lock image " << ictx
<< " client='" << client
2047 << "' cookie='" << cookie
<< "'" << dendl
;
2049 int r
= ictx
->state
->refresh_if_required();
2053 entity_name_t lock_client
;
2054 if (!lock_client
.parse(client
)) {
2055 lderr(ictx
->cct
) << "Unable to parse client '" << client
2060 if (ictx
->blacklist_on_break_lock
) {
2061 typedef std::map
<rados::cls::lock::locker_id_t
,
2062 rados::cls::lock::locker_info_t
> Lockers
;
2064 ClsLockType lock_type
;
2065 std::string lock_tag
;
2066 r
= rados::cls::lock::get_lock_info(&ictx
->md_ctx
, ictx
->header_oid
,
2067 RBD_LOCK_NAME
, &lockers
, &lock_type
,
2070 lderr(ictx
->cct
) << "unable to retrieve lock info: " << cpp_strerror(r
)
2075 std::string client_address
;
2076 for (Lockers::iterator it
= lockers
.begin();
2077 it
!= lockers
.end(); ++it
) {
2078 if (it
->first
.locker
== lock_client
) {
2079 client_address
= stringify(it
->second
.addr
);
2083 if (client_address
.empty()) {
2087 RWLock::RLocker
locker(ictx
->md_lock
);
2088 librados::Rados
rados(ictx
->md_ctx
);
2089 r
= rados
.blacklist_add(client_address
,
2090 ictx
->blacklist_expire_seconds
);
2092 lderr(ictx
->cct
) << "unable to blacklist client: " << cpp_strerror(r
)
2098 r
= rados::cls::lock::break_lock(&ictx
->md_ctx
, ictx
->header_oid
,
2099 RBD_LOCK_NAME
, cookie
, lock_client
);
2102 ictx
->notify_update();
2106 void rbd_ctx_cb(completion_t cb
, void *arg
)
2108 Context
*ctx
= reinterpret_cast<Context
*>(arg
);
2109 auto comp
= reinterpret_cast<io::AioCompletion
*>(cb
);
2110 ctx
->complete(comp
->get_return_value());
2114 int64_t read_iterate(ImageCtx
*ictx
, uint64_t off
, uint64_t len
,
2115 int (*cb
)(uint64_t, size_t, const char *, void *),
2118 utime_t start_time
, elapsed
;
2120 ldout(ictx
->cct
, 20) << "read_iterate " << ictx
<< " off = " << off
2121 << " len = " << len
<< dendl
;
2123 int r
= ictx
->state
->refresh_if_required();
2127 uint64_t mylen
= len
;
2128 ictx
->snap_lock
.get_read();
2129 r
= clip_io(ictx
, off
, &mylen
);
2130 ictx
->snap_lock
.put_read();
2134 int64_t total_read
= 0;
2135 uint64_t period
= ictx
->get_stripe_period();
2136 uint64_t left
= mylen
;
2138 ZTracer::Trace trace
;
2139 if (ictx
->cct
->_conf
->rbd_blkin_trace_all
) {
2140 trace
.init("read_iterate", &ictx
->trace_endpoint
);
2143 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2144 start_time
= ceph_clock_now();
2146 uint64_t period_off
= off
- (off
% period
);
2147 uint64_t read_len
= min(period_off
+ period
- off
, left
);
2152 auto c
= io::AioCompletion::create_and_start(&ctx
, ictx
,
2154 io::ImageRequest
<>::aio_read(ictx
, c
, {{off
, read_len
}},
2155 io::ReadResult
{&bl
}, 0, std::move(trace
));
2157 int ret
= ctx
.wait();
2162 r
= cb(total_read
, ret
, bl
.c_str(), arg
);
2172 elapsed
= ceph_clock_now() - start_time
;
2173 ictx
->perfcounter
->tinc(l_librbd_rd_latency
, elapsed
);
2174 ictx
->perfcounter
->inc(l_librbd_rd
);
2175 ictx
->perfcounter
->inc(l_librbd_rd_bytes
, mylen
);
2179 // validate extent against image size; clip to image size if necessary
2180 int clip_io(ImageCtx
*ictx
, uint64_t off
, uint64_t *len
)
2182 assert(ictx
->snap_lock
.is_locked());
2183 uint64_t image_size
= ictx
->get_image_size(ictx
->snap_id
);
2184 bool snap_exists
= ictx
->snap_exists
;
2189 // special-case "len == 0" requests: always valid
2193 // can't start past end
2194 if (off
>= image_size
)
2197 // clip requests that extend past end to just end
2198 if ((off
+ *len
) > image_size
)
2199 *len
= (size_t)(image_size
- off
);
2204 int flush(ImageCtx
*ictx
)
2206 CephContext
*cct
= ictx
->cct
;
2207 ldout(cct
, 20) << "flush " << ictx
<< dendl
;
2209 int r
= ictx
->state
->refresh_if_required();
2214 ictx
->user_flushed();
2217 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2222 ictx
->perfcounter
->inc(l_librbd_flush
);
2226 int invalidate_cache(ImageCtx
*ictx
)
2228 CephContext
*cct
= ictx
->cct
;
2229 ldout(cct
, 20) << "invalidate_cache " << ictx
<< dendl
;
2231 int r
= ictx
->state
->refresh_if_required();
2236 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2237 RWLock::WLocker
md_locker(ictx
->md_lock
);
2238 r
= ictx
->invalidate_cache(false);
2239 ictx
->perfcounter
->inc(l_librbd_invalidate_cache
);
2243 int poll_io_events(ImageCtx
*ictx
, io::AioCompletion
**comps
, int numcomp
)
2247 CephContext
*cct
= ictx
->cct
;
2248 ldout(cct
, 20) << __func__
<< " " << ictx
<< " numcomp = " << numcomp
2251 Mutex::Locker
l(ictx
->completed_reqs_lock
);
2252 while (i
< numcomp
) {
2253 if (ictx
->completed_reqs
.empty())
2255 comps
[i
++] = ictx
->completed_reqs
.front();
2256 ictx
->completed_reqs
.pop_front();
2261 int metadata_get(ImageCtx
*ictx
, const string
&key
, string
*value
)
2263 CephContext
*cct
= ictx
->cct
;
2264 ldout(cct
, 20) << "metadata_get " << ictx
<< " key=" << key
<< dendl
;
2266 int r
= ictx
->state
->refresh_if_required();
2271 return cls_client::metadata_get(&ictx
->md_ctx
, ictx
->header_oid
, key
, value
);
2274 int metadata_list(ImageCtx
*ictx
, const string
&start
, uint64_t max
, map
<string
, bufferlist
> *pairs
)
2276 CephContext
*cct
= ictx
->cct
;
2277 ldout(cct
, 20) << "metadata_list " << ictx
<< dendl
;
2279 int r
= ictx
->state
->refresh_if_required();
2284 return cls_client::metadata_list(&ictx
->md_ctx
, ictx
->header_oid
, start
, max
, pairs
);
2287 struct C_RBD_Readahead
: public Context
{
2292 C_RBD_Readahead(ImageCtx
*ictx
, object_t oid
, uint64_t offset
, uint64_t length
)
2293 : ictx(ictx
), oid(oid
), offset(offset
), length(length
) { }
2294 void finish(int r
) override
{
2295 ldout(ictx
->cct
, 20) << "C_RBD_Readahead on " << oid
<< ": " << offset
<< "+" << length
<< dendl
;
2296 ictx
->readahead
.dec_pending();
2300 void readahead(ImageCtx
*ictx
,
2301 const vector
<pair
<uint64_t,uint64_t> >& image_extents
)
2303 uint64_t total_bytes
= 0;
2304 for (vector
<pair
<uint64_t,uint64_t> >::const_iterator p
= image_extents
.begin();
2305 p
!= image_extents
.end();
2307 total_bytes
+= p
->second
;
2310 ictx
->md_lock
.get_write();
2311 bool abort
= ictx
->readahead_disable_after_bytes
!= 0 &&
2312 ictx
->total_bytes_read
> ictx
->readahead_disable_after_bytes
;
2314 ictx
->md_lock
.put_write();
2317 ictx
->total_bytes_read
+= total_bytes
;
2318 ictx
->snap_lock
.get_read();
2319 uint64_t image_size
= ictx
->get_image_size(ictx
->snap_id
);
2320 ictx
->snap_lock
.put_read();
2321 ictx
->md_lock
.put_write();
2323 pair
<uint64_t, uint64_t> readahead_extent
= ictx
->readahead
.update(image_extents
, image_size
);
2324 uint64_t readahead_offset
= readahead_extent
.first
;
2325 uint64_t readahead_length
= readahead_extent
.second
;
2327 if (readahead_length
> 0) {
2328 ldout(ictx
->cct
, 20) << "(readahead logical) " << readahead_offset
<< "~" << readahead_length
<< dendl
;
2329 map
<object_t
,vector
<ObjectExtent
> > readahead_object_extents
;
2330 Striper::file_to_extents(ictx
->cct
, ictx
->format_string
, &ictx
->layout
,
2331 readahead_offset
, readahead_length
, 0, readahead_object_extents
);
2332 for (map
<object_t
,vector
<ObjectExtent
> >::iterator p
= readahead_object_extents
.begin(); p
!= readahead_object_extents
.end(); ++p
) {
2333 for (vector
<ObjectExtent
>::iterator q
= p
->second
.begin(); q
!= p
->second
.end(); ++q
) {
2334 ldout(ictx
->cct
, 20) << "(readahead) oid " << q
->oid
<< " " << q
->offset
<< "~" << q
->length
<< dendl
;
2336 Context
*req_comp
= new C_RBD_Readahead(ictx
, q
->oid
, q
->offset
, q
->length
);
2337 ictx
->readahead
.inc_pending();
2338 ictx
->aio_read_from_cache(q
->oid
, q
->objectno
, NULL
,
2339 q
->length
, q
->offset
,
2340 req_comp
, 0, nullptr);
2343 ictx
->perfcounter
->inc(l_librbd_readahead
);
2344 ictx
->perfcounter
->inc(l_librbd_readahead_bytes
, readahead_length
);