1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "include/int_types.h"
8 #include "include/types.h"
9 #include "include/uuid.h"
10 #include "common/ceph_context.h"
11 #include "common/dout.h"
12 #include "common/errno.h"
13 #include "common/Throttle.h"
14 #include "common/event_socket.h"
15 #include "cls/lock/cls_lock_client.h"
16 #include "include/stringify.h"
18 #include "cls/rbd/cls_rbd.h"
19 #include "cls/rbd/cls_rbd_types.h"
20 #include "cls/rbd/cls_rbd_client.h"
21 #include "cls/journal/cls_journal_types.h"
22 #include "cls/journal/cls_journal_client.h"
24 #include "librbd/ExclusiveLock.h"
25 #include "librbd/ImageCtx.h"
26 #include "librbd/ImageState.h"
27 #include "librbd/internal.h"
28 #include "librbd/Journal.h"
29 #include "librbd/ObjectMap.h"
30 #include "librbd/Operations.h"
31 #include "librbd/Types.h"
32 #include "librbd/Utils.h"
33 #include "librbd/api/Image.h"
34 #include "librbd/exclusive_lock/AutomaticPolicy.h"
35 #include "librbd/exclusive_lock/StandardPolicy.h"
36 #include "librbd/image/CloneRequest.h"
37 #include "librbd/image/CreateRequest.h"
38 #include "librbd/image/RemoveRequest.h"
39 #include "librbd/io/AioCompletion.h"
40 #include "librbd/io/ImageRequest.h"
41 #include "librbd/io/ImageRequestWQ.h"
42 #include "librbd/io/ObjectRequest.h"
43 #include "librbd/io/ReadResult.h"
44 #include "librbd/journal/Types.h"
45 #include "librbd/managed_lock/Types.h"
46 #include "librbd/mirror/EnableRequest.h"
47 #include "librbd/operation/TrimRequest.h"
49 #include "journal/Journaler.h"
51 #include <boost/scope_exit.hpp>
52 #include <boost/variant.hpp>
53 #include "include/assert.h"
55 #define dout_subsys ceph_subsys_rbd
57 #define dout_prefix *_dout << "librbd: "
59 #define rbd_howmany(x, y) (((x) + (y) - 1) / (y))
66 // list binds to list() here, so std::list is explicitly used below
68 using ceph::bufferlist
;
69 using librados::snap_t
;
70 using librados::IoCtx
;
71 using librados::Rados
;
77 int validate_pool(IoCtx
&io_ctx
, CephContext
*cct
) {
78 if (!cct
->_conf
->get_val
<bool>("rbd_validate_pool")) {
82 int r
= io_ctx
.stat(RBD_DIRECTORY
, NULL
, NULL
);
85 } else if (r
< 0 && r
!= -ENOENT
) {
86 lderr(cct
) << "failed to stat RBD directory: " << cpp_strerror(r
) << dendl
;
90 // allocate a self-managed snapshot id if this a new pool to force
91 // self-managed snapshot mode
93 r
= io_ctx
.selfmanaged_snap_create(&snap_id
);
95 lderr(cct
) << "pool not configured for self-managed RBD snapshot support"
99 lderr(cct
) << "failed to allocate self-managed snapshot: "
100 << cpp_strerror(r
) << dendl
;
104 r
= io_ctx
.selfmanaged_snap_remove(snap_id
);
106 lderr(cct
) << "failed to release self-managed snapshot " << snap_id
107 << ": " << cpp_strerror(r
) << dendl
;
113 } // anonymous namespace
115 int detect_format(IoCtx
&io_ctx
, const string
&name
,
116 bool *old_format
, uint64_t *size
)
118 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
121 int r
= io_ctx
.stat(util::old_header_name(name
), size
, NULL
);
125 r
= io_ctx
.stat(util::id_obj_name(name
), size
, NULL
);
132 ldout(cct
, 20) << "detect format of " << name
<< " : "
133 << (old_format
? (*old_format
? "old" : "new") :
134 "don't care") << dendl
;
138 bool has_parent(int64_t parent_pool_id
, uint64_t off
, uint64_t overlap
)
140 return (parent_pool_id
!= -1 && off
<= overlap
);
143 void init_rbd_header(struct rbd_obj_header_ondisk
& ondisk
,
144 uint64_t size
, int order
, uint64_t bid
)
146 uint32_t hi
= bid
>> 32;
147 uint32_t lo
= bid
& 0xFFFFFFFF;
148 uint32_t extra
= rand() % 0xFFFFFFFF;
149 memset(&ondisk
, 0, sizeof(ondisk
));
151 memcpy(&ondisk
.text
, RBD_HEADER_TEXT
, sizeof(RBD_HEADER_TEXT
));
152 memcpy(&ondisk
.signature
, RBD_HEADER_SIGNATURE
,
153 sizeof(RBD_HEADER_SIGNATURE
));
154 memcpy(&ondisk
.version
, RBD_HEADER_VERSION
, sizeof(RBD_HEADER_VERSION
));
156 snprintf(ondisk
.block_name
, sizeof(ondisk
.block_name
), "rb.%x.%x.%x",
159 ondisk
.image_size
= size
;
160 ondisk
.options
.order
= order
;
161 ondisk
.options
.crypt_type
= RBD_CRYPT_NONE
;
162 ondisk
.options
.comp_type
= RBD_COMP_NONE
;
164 ondisk
.snap_count
= 0;
166 ondisk
.snap_names_len
= 0;
169 void image_info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
171 int obj_order
= ictx
->order
;
172 ictx
->snap_lock
.get_read();
173 info
.size
= ictx
->get_image_size(ictx
->snap_id
);
174 ictx
->snap_lock
.put_read();
175 info
.obj_size
= 1ULL << obj_order
;
176 info
.num_objs
= Striper::get_num_objects(ictx
->layout
, info
.size
);
177 info
.order
= obj_order
;
178 strncpy(info
.block_name_prefix
, ictx
->object_prefix
.c_str(),
179 RBD_MAX_BLOCK_NAME_SIZE
);
180 info
.block_name_prefix
[RBD_MAX_BLOCK_NAME_SIZE
- 1] = '\0';
182 // clear deprecated fields
183 info
.parent_pool
= -1L;
184 info
.parent_name
[0] = '\0';
187 uint64_t oid_to_object_no(const string
& oid
, const string
& object_prefix
)
189 istringstream
iss(oid
);
190 // skip object prefix and separator
191 iss
.ignore(object_prefix
.length() + 1);
193 iss
>> std::hex
>> num
;
197 void trim_image(ImageCtx
*ictx
, uint64_t newsize
, ProgressContext
& prog_ctx
)
199 assert(ictx
->owner_lock
.is_locked());
200 assert(ictx
->exclusive_lock
== nullptr ||
201 ictx
->exclusive_lock
->is_lock_owner());
204 ictx
->snap_lock
.get_read();
205 operation::TrimRequest
<> *req
= operation::TrimRequest
<>::create(
206 *ictx
, &ctx
, ictx
->size
, newsize
, prog_ctx
);
207 ictx
->snap_lock
.put_read();
212 lderr(ictx
->cct
) << "warning: failed to remove some object(s): "
213 << cpp_strerror(r
) << dendl
;
217 int read_header_bl(IoCtx
& io_ctx
, const string
& header_oid
,
218 bufferlist
& header
, uint64_t *ver
)
222 #define READ_SIZE 4096
225 r
= io_ctx
.read(header_oid
, bl
, READ_SIZE
, off
);
228 header
.claim_append(bl
);
230 } while (r
== READ_SIZE
);
232 if (header
.length() < sizeof(RBD_HEADER_TEXT
) ||
233 memcmp(RBD_HEADER_TEXT
, header
.c_str(), sizeof(RBD_HEADER_TEXT
))) {
234 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
235 lderr(cct
) << "unrecognized header format" << dendl
;
240 *ver
= io_ctx
.get_last_version();
245 int read_header(IoCtx
& io_ctx
, const string
& header_oid
,
246 struct rbd_obj_header_ondisk
*header
, uint64_t *ver
)
248 bufferlist header_bl
;
249 int r
= read_header_bl(io_ctx
, header_oid
, header_bl
, ver
);
252 if (header_bl
.length() < (int)sizeof(*header
))
254 memcpy(header
, header_bl
.c_str(), sizeof(*header
));
259 int tmap_set(IoCtx
& io_ctx
, const string
& imgname
)
261 bufferlist cmdbl
, emptybl
;
262 __u8 c
= CEPH_OSD_TMAP_SET
;
264 ::encode(imgname
, cmdbl
);
265 ::encode(emptybl
, cmdbl
);
266 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
269 int tmap_rm(IoCtx
& io_ctx
, const string
& imgname
)
272 __u8 c
= CEPH_OSD_TMAP_RM
;
274 ::encode(imgname
, cmdbl
);
275 return io_ctx
.tmap_update(RBD_DIRECTORY
, cmdbl
);
278 typedef boost::variant
<std::string
,uint64_t> image_option_value_t
;
279 typedef std::map
<int,image_option_value_t
> image_options_t
;
280 typedef std::shared_ptr
<image_options_t
> image_options_ref
;
282 enum image_option_type_t
{
287 const std::map
<int, image_option_type_t
> IMAGE_OPTIONS_TYPE_MAPPING
= {
288 {RBD_IMAGE_OPTION_FORMAT
, UINT64
},
289 {RBD_IMAGE_OPTION_FEATURES
, UINT64
},
290 {RBD_IMAGE_OPTION_ORDER
, UINT64
},
291 {RBD_IMAGE_OPTION_STRIPE_UNIT
, UINT64
},
292 {RBD_IMAGE_OPTION_STRIPE_COUNT
, UINT64
},
293 {RBD_IMAGE_OPTION_JOURNAL_ORDER
, UINT64
},
294 {RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
, UINT64
},
295 {RBD_IMAGE_OPTION_JOURNAL_POOL
, STR
},
296 {RBD_IMAGE_OPTION_FEATURES_SET
, UINT64
},
297 {RBD_IMAGE_OPTION_FEATURES_CLEAR
, UINT64
},
298 {RBD_IMAGE_OPTION_DATA_POOL
, STR
},
301 std::string
image_option_name(int optname
) {
303 case RBD_IMAGE_OPTION_FORMAT
:
305 case RBD_IMAGE_OPTION_FEATURES
:
307 case RBD_IMAGE_OPTION_ORDER
:
309 case RBD_IMAGE_OPTION_STRIPE_UNIT
:
310 return "stripe_unit";
311 case RBD_IMAGE_OPTION_STRIPE_COUNT
:
312 return "stripe_count";
313 case RBD_IMAGE_OPTION_JOURNAL_ORDER
:
314 return "journal_order";
315 case RBD_IMAGE_OPTION_JOURNAL_SPLAY_WIDTH
:
316 return "journal_splay_width";
317 case RBD_IMAGE_OPTION_JOURNAL_POOL
:
318 return "journal_pool";
319 case RBD_IMAGE_OPTION_FEATURES_SET
:
320 return "features_set";
321 case RBD_IMAGE_OPTION_FEATURES_CLEAR
:
322 return "features_clear";
323 case RBD_IMAGE_OPTION_DATA_POOL
:
326 return "unknown (" + stringify(optname
) + ")";
330 std::ostream
&operator<<(std::ostream
&os
, const ImageOptions
&opts
) {
333 const char *delimiter
= "";
334 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
335 if (i
.second
== STR
) {
337 if (opts
.get(i
.first
, &val
) == 0) {
338 os
<< delimiter
<< image_option_name(i
.first
) << "=" << val
;
341 } else if (i
.second
== UINT64
) {
343 if (opts
.get(i
.first
, &val
) == 0) {
344 os
<< delimiter
<< image_option_name(i
.first
) << "=" << val
;
355 void image_options_create(rbd_image_options_t
* opts
)
357 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
359 *opts
= static_cast<rbd_image_options_t
>(opts_
);
362 void image_options_create_ref(rbd_image_options_t
* opts
,
363 rbd_image_options_t orig
)
365 image_options_ref
* orig_
= static_cast<image_options_ref
*>(orig
);
366 image_options_ref
* opts_
= new image_options_ref(*orig_
);
368 *opts
= static_cast<rbd_image_options_t
>(opts_
);
371 void image_options_copy(rbd_image_options_t
* opts
,
372 const ImageOptions
&orig
)
374 image_options_ref
* opts_
= new image_options_ref(new image_options_t());
376 *opts
= static_cast<rbd_image_options_t
>(opts_
);
380 for (auto &i
: IMAGE_OPTIONS_TYPE_MAPPING
) {
383 if (orig
.get(i
.first
, &str_val
) == 0) {
384 image_options_set(*opts
, i
.first
, str_val
);
388 if (orig
.get(i
.first
, &uint64_val
) == 0) {
389 image_options_set(*opts
, i
.first
, uint64_val
);
396 void image_options_destroy(rbd_image_options_t opts
)
398 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
403 int image_options_set(rbd_image_options_t opts
, int optname
,
404 const std::string
& optval
)
406 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
408 std::map
<int, image_option_type_t
>::const_iterator i
=
409 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
411 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
415 (*opts_
->get())[optname
] = optval
;
419 int image_options_set(rbd_image_options_t opts
, int optname
, uint64_t optval
)
421 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
423 std::map
<int, image_option_type_t
>::const_iterator i
=
424 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
426 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
430 (*opts_
->get())[optname
] = optval
;
434 int image_options_get(rbd_image_options_t opts
, int optname
,
437 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
439 std::map
<int, image_option_type_t
>::const_iterator i
=
440 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
442 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= STR
) {
446 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
448 if (j
== (*opts_
)->end()) {
452 *optval
= boost::get
<std::string
>(j
->second
);
456 int image_options_get(rbd_image_options_t opts
, int optname
, uint64_t* optval
)
458 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
460 std::map
<int, image_option_type_t
>::const_iterator i
=
461 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
463 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end() || i
->second
!= UINT64
) {
467 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
469 if (j
== (*opts_
)->end()) {
473 *optval
= boost::get
<uint64_t>(j
->second
);
477 int image_options_is_set(rbd_image_options_t opts
, int optname
,
480 if (IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
) ==
481 IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
485 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
486 *is_set
= ((*opts_
)->find(optname
) != (*opts_
)->end());
490 int image_options_unset(rbd_image_options_t opts
, int optname
)
492 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
494 std::map
<int, image_option_type_t
>::const_iterator i
=
495 IMAGE_OPTIONS_TYPE_MAPPING
.find(optname
);
497 if (i
== IMAGE_OPTIONS_TYPE_MAPPING
.end()) {
498 assert((*opts_
)->find(optname
) == (*opts_
)->end());
502 image_options_t::const_iterator j
= (*opts_
)->find(optname
);
504 if (j
== (*opts_
)->end()) {
512 void image_options_clear(rbd_image_options_t opts
)
514 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
519 bool image_options_is_empty(rbd_image_options_t opts
)
521 image_options_ref
* opts_
= static_cast<image_options_ref
*>(opts
);
523 return (*opts_
)->empty();
526 int list(IoCtx
& io_ctx
, vector
<string
>& names
)
528 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
529 ldout(cct
, 20) << "list " << &io_ctx
<< dendl
;
532 int r
= io_ctx
.read(RBD_DIRECTORY
, bl
, 0, 0);
540 // old format images are in a tmap
542 bufferlist::iterator p
= bl
.begin();
544 map
<string
,bufferlist
> m
;
547 for (map
<string
,bufferlist
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
548 names
.push_back(q
->first
);
552 map
<string
, string
> images
;
553 r
= api::Image
<>::list_images(io_ctx
, &images
);
555 lderr(cct
) << "error listing v2 images: " << cpp_strerror(r
) << dendl
;
558 for (const auto& img_pair
: images
) {
559 names
.push_back(img_pair
.first
);
565 int flatten_children(ImageCtx
*ictx
, const char* snap_name
,
566 ProgressContext
& pctx
)
568 CephContext
*cct
= ictx
->cct
;
569 ldout(cct
, 20) << "children flatten " << ictx
->name
<< dendl
;
571 RWLock::RLocker
l(ictx
->snap_lock
);
572 snap_t snap_id
= ictx
->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name
);
573 ParentSpec
parent_spec(ictx
->md_ctx
.get_id(), ictx
->id
, snap_id
);
574 map
< pair
<int64_t, string
>, set
<string
> > image_info
;
576 int r
= api::Image
<>::list_children(ictx
, parent_spec
, &image_info
);
581 size_t size
= image_info
.size();
586 Rados
rados(ictx
->md_ctx
);
587 for ( auto &info
: image_info
){
588 string pool
= info
.first
.second
;
590 r
= rados
.ioctx_create2(info
.first
.first
, ioctx
);
592 lderr(cct
) << "Error accessing child image pool " << pool
597 for (auto &id_it
: info
.second
) {
598 ImageCtx
*imctx
= new ImageCtx("", id_it
, NULL
, ioctx
, false);
599 int r
= imctx
->state
->open(false);
601 lderr(cct
) << "error opening image: "
602 << cpp_strerror(r
) << dendl
;
606 librbd::NoOpProgressContext prog_ctx
;
607 r
= imctx
->operations
->flatten(prog_ctx
);
609 lderr(cct
) << "error flattening image: " << pool
<< "/" << id_it
610 << cpp_strerror(r
) << dendl
;
611 imctx
->state
->close();
615 if ((imctx
->features
& RBD_FEATURE_DEEP_FLATTEN
) == 0 &&
616 !imctx
->snaps
.empty()) {
617 imctx
->parent_lock
.get_read();
618 ParentInfo parent_info
= imctx
->parent_md
;
619 imctx
->parent_lock
.put_read();
621 r
= cls_client::remove_child(&imctx
->md_ctx
, RBD_CHILDREN
,
622 parent_info
.spec
, imctx
->id
);
623 if (r
< 0 && r
!= -ENOENT
) {
624 lderr(cct
) << "error removing child from children list" << dendl
;
625 imctx
->state
->close();
630 r
= imctx
->state
->close();
632 lderr(cct
) << "failed to close image: " << cpp_strerror(r
) << dendl
;
636 pctx
.update_progress(++i
, size
);
643 int list_children(ImageCtx
*ictx
, set
<pair
<string
, string
> >& names
)
645 CephContext
*cct
= ictx
->cct
;
646 ldout(cct
, 20) << "children list " << ictx
->name
<< dendl
;
648 RWLock::RLocker
l(ictx
->snap_lock
);
649 ParentSpec
parent_spec(ictx
->md_ctx
.get_id(), ictx
->id
, ictx
->snap_id
);
650 map
< pair
<int64_t, string
>, set
<string
> > image_info
;
652 int r
= api::Image
<>::list_children(ictx
, parent_spec
, &image_info
);
657 Rados
rados(ictx
->md_ctx
);
658 for ( auto &info
: image_info
){
660 r
= rados
.ioctx_create2(info
.first
.first
, ioctx
);
662 lderr(cct
) << "Error accessing child image pool " << info
.first
.second
667 for (auto &id_it
: info
.second
) {
669 r
= cls_client::dir_get_name(&ioctx
, RBD_DIRECTORY
, id_it
, &name
);
671 lderr(cct
) << "Error looking up name for image id " << id_it
672 << " in pool " << info
.first
.second
<< dendl
;
675 names
.insert(make_pair(info
.first
.second
, name
));
682 int get_snap_namespace(ImageCtx
*ictx
,
683 const char *snap_name
,
684 cls::rbd::SnapshotNamespace
*snap_namespace
) {
685 ldout(ictx
->cct
, 20) << "get_snap_namespace " << ictx
<< " " << snap_name
688 int r
= ictx
->state
->refresh_if_required();
691 RWLock::RLocker
l(ictx
->snap_lock
);
692 snap_t snap_id
= ictx
->get_snap_id(*snap_namespace
, snap_name
);
693 if (snap_id
== CEPH_NOSNAP
)
695 r
= ictx
->get_snap_namespace(snap_id
, snap_namespace
);
699 int snap_is_protected(ImageCtx
*ictx
, const char *snap_name
, bool *is_protected
)
701 ldout(ictx
->cct
, 20) << "snap_is_protected " << ictx
<< " " << snap_name
704 int r
= ictx
->state
->refresh_if_required();
708 RWLock::RLocker
l(ictx
->snap_lock
);
709 snap_t snap_id
= ictx
->get_snap_id(cls::rbd::UserSnapshotNamespace(), snap_name
);
710 if (snap_id
== CEPH_NOSNAP
)
713 r
= ictx
->is_snap_unprotected(snap_id
, &is_unprotected
);
714 // consider both PROTECTED or UNPROTECTING to be 'protected',
715 // since in either state they can't be deleted
716 *is_protected
= !is_unprotected
;
720 int create_v1(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
, int order
)
722 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
724 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< " name = " << imgname
725 << " size = " << size
<< " order = " << order
<< dendl
;
726 int r
= validate_pool(io_ctx
, cct
);
731 ldout(cct
, 2) << "adding rbd image to directory..." << dendl
;
732 r
= tmap_set(io_ctx
, imgname
);
734 lderr(cct
) << "error adding image to directory: " << cpp_strerror(r
)
740 uint64_t bid
= rados
.get_instance_id();
742 ldout(cct
, 2) << "creating rbd image..." << dendl
;
743 struct rbd_obj_header_ondisk header
;
744 init_rbd_header(header
, size
, order
, bid
);
747 bl
.append((const char *)&header
, sizeof(header
));
749 string header_oid
= util::old_header_name(imgname
);
750 r
= io_ctx
.write(header_oid
, bl
, bl
.length(), 0);
752 lderr(cct
) << "Error writing image header: " << cpp_strerror(r
)
754 int remove_r
= tmap_rm(io_ctx
, imgname
);
756 lderr(cct
) << "Could not remove image from directory after "
757 << "header creation failed: "
758 << cpp_strerror(remove_r
) << dendl
;
763 ldout(cct
, 2) << "done." << dendl
;
767 int create(librados::IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
770 uint64_t order_
= *order
;
773 int r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
776 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
778 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
785 int create(IoCtx
& io_ctx
, const char *imgname
, uint64_t size
,
786 bool old_format
, uint64_t features
, int *order
,
787 uint64_t stripe_unit
, uint64_t stripe_count
)
792 uint64_t order_
= *order
;
793 uint64_t format
= old_format
? 1 : 2;
797 r
= opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
799 r
= opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
801 r
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order_
);
803 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
805 r
= opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
808 r
= create(io_ctx
, imgname
, "", size
, opts
, "", "", false);
810 int r1
= opts
.get(RBD_IMAGE_OPTION_ORDER
, &order_
);
817 int create(IoCtx
& io_ctx
, const std::string
&image_name
,
818 const std::string
&image_id
, uint64_t size
,
820 const std::string
&non_primary_global_image_id
,
821 const std::string
&primary_mirror_uuid
,
822 bool skip_mirror_enable
)
824 std::string
id(image_id
);
826 id
= util::generate_image_id(io_ctx
);
829 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
830 ldout(cct
, 10) << __func__
<< " name=" << image_name
<< ", "
831 << "id= " << id
<< ", "
832 << "size=" << size
<< ", opts=" << opts
<< dendl
;
835 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0)
836 format
= cct
->_conf
->get_val
<int64_t>("rbd_default_format");
837 bool old_format
= format
== 1;
839 // make sure it doesn't already exist, in either format
840 int r
= detect_format(io_ctx
, image_name
, NULL
, NULL
);
843 lderr(cct
) << "Could not tell if " << image_name
<< " already exists"
847 lderr(cct
) << "rbd image " << image_name
<< " already exists" << dendl
;
852 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0 || order
== 0) {
853 order
= cct
->_conf
->get_val
<int64_t>("rbd_default_order");
855 r
= image::CreateRequest
<>::validate_order(cct
, order
);
861 r
= create_v1(io_ctx
, image_name
.c_str(), size
, order
);
863 ThreadPool
*thread_pool
;
864 ContextWQ
*op_work_queue
;
865 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
868 image::CreateRequest
<> *req
= image::CreateRequest
<>::create(
869 io_ctx
, image_name
, id
, size
, opts
, non_primary_global_image_id
,
870 primary_mirror_uuid
, skip_mirror_enable
, op_work_queue
, &cond
);
876 int r1
= opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
883 * Parent may be in different pool, hence different IoCtx
885 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
886 IoCtx
& c_ioctx
, const char *c_name
,
887 uint64_t features
, int *c_order
,
888 uint64_t stripe_unit
, int stripe_count
)
890 uint64_t order
= *c_order
;
893 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
894 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
895 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
896 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
898 int r
= clone(p_ioctx
, p_name
, p_snap_name
, c_ioctx
, c_name
, opts
);
899 opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
);
904 int clone(IoCtx
& p_ioctx
, const char *p_name
, const char *p_snap_name
,
905 IoCtx
& c_ioctx
, const char *c_name
, ImageOptions
& c_opts
)
907 CephContext
*cct
= (CephContext
*)p_ioctx
.cct();
908 if (p_snap_name
== NULL
) {
909 lderr(cct
) << "image to be cloned must be a snapshot" << dendl
;
913 // make sure parent snapshot exists
914 ImageCtx
*p_imctx
= new ImageCtx(p_name
, "", p_snap_name
, p_ioctx
, true);
915 int r
= p_imctx
->state
->open(false);
917 lderr(cct
) << "error opening parent image: "
918 << cpp_strerror(r
) << dendl
;
922 r
= clone(p_imctx
, c_ioctx
, c_name
, "", c_opts
, "", "");
924 int close_r
= p_imctx
->state
->close();
925 if (r
== 0 && close_r
< 0) {
935 int clone(ImageCtx
*p_imctx
, IoCtx
& c_ioctx
, const std::string
&c_name
,
936 const std::string
&c_id
, ImageOptions
& c_opts
,
937 const std::string
&non_primary_global_image_id
,
938 const std::string
&primary_mirror_uuid
)
940 std::string
id(c_id
);
942 id
= util::generate_image_id(c_ioctx
);
945 CephContext
*cct
= (CephContext
*)c_ioctx
.cct();
946 ldout(cct
, 10) << __func__
<< " "
947 << "c_name=" << c_name
<< ", "
948 << "c_id= " << c_id
<< ", "
949 << "c_opts=" << c_opts
<< dendl
;
951 ThreadPool
*thread_pool
;
952 ContextWQ
*op_work_queue
;
953 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
956 auto *req
= image::CloneRequest
<>::create(
957 p_imctx
, c_ioctx
, c_name
, id
, c_opts
,
958 non_primary_global_image_id
, primary_mirror_uuid
, op_work_queue
, &cond
);
964 int rename(IoCtx
& io_ctx
, const char *srcname
, const char *dstname
)
966 CephContext
*cct
= (CephContext
*)io_ctx
.cct();
967 ldout(cct
, 20) << "rename " << &io_ctx
<< " " << srcname
<< " -> "
970 ImageCtx
*ictx
= new ImageCtx(srcname
, "", "", io_ctx
, false);
971 int r
= ictx
->state
->open(false);
973 lderr(cct
) << "error opening source image: " << cpp_strerror(r
) << dendl
;
976 BOOST_SCOPE_EXIT((ictx
)) {
977 ictx
->state
->close();
978 } BOOST_SCOPE_EXIT_END
980 return ictx
->operations
->rename(dstname
);
983 int info(ImageCtx
*ictx
, image_info_t
& info
, size_t infosize
)
985 ldout(ictx
->cct
, 20) << "info " << ictx
<< dendl
;
987 int r
= ictx
->state
->refresh_if_required();
991 image_info(ictx
, info
, infosize
);
995 int get_old_format(ImageCtx
*ictx
, uint8_t *old
)
997 int r
= ictx
->state
->refresh_if_required();
1000 *old
= ictx
->old_format
;
1004 int get_size(ImageCtx
*ictx
, uint64_t *size
)
1006 int r
= ictx
->state
->refresh_if_required();
1009 RWLock::RLocker
l2(ictx
->snap_lock
);
1010 *size
= ictx
->get_image_size(ictx
->snap_id
);
1014 int get_features(ImageCtx
*ictx
, uint64_t *features
)
1016 int r
= ictx
->state
->refresh_if_required();
1019 RWLock::RLocker
l(ictx
->snap_lock
);
1020 *features
= ictx
->features
;
1024 int get_overlap(ImageCtx
*ictx
, uint64_t *overlap
)
1026 int r
= ictx
->state
->refresh_if_required();
1029 RWLock::RLocker
l(ictx
->snap_lock
);
1030 RWLock::RLocker
l2(ictx
->parent_lock
);
1031 return ictx
->get_parent_overlap(ictx
->snap_id
, overlap
);
1034 int get_parent_info(ImageCtx
*ictx
, string
*parent_pool_name
,
1035 string
*parent_name
, string
*parent_id
,
1036 string
*parent_snap_name
)
1038 int r
= ictx
->state
->refresh_if_required();
1042 RWLock::RLocker
l(ictx
->snap_lock
);
1043 RWLock::RLocker
l2(ictx
->parent_lock
);
1044 if (ictx
->parent
== NULL
) {
1048 ParentSpec parent_spec
;
1050 if (ictx
->snap_id
== CEPH_NOSNAP
) {
1051 parent_spec
= ictx
->parent_md
.spec
;
1053 r
= ictx
->get_parent_spec(ictx
->snap_id
, &parent_spec
);
1055 lderr(ictx
->cct
) << "Can't find snapshot id = " << ictx
->snap_id
1059 if (parent_spec
.pool_id
== -1)
1062 if (parent_pool_name
) {
1063 Rados
rados(ictx
->md_ctx
);
1064 r
= rados
.pool_reverse_lookup(parent_spec
.pool_id
,
1067 lderr(ictx
->cct
) << "error looking up pool name: " << cpp_strerror(r
)
1073 if (parent_snap_name
) {
1074 RWLock::RLocker
l(ictx
->parent
->snap_lock
);
1075 r
= ictx
->parent
->get_snap_name(parent_spec
.snap_id
,
1078 lderr(ictx
->cct
) << "error finding parent snap name: "
1079 << cpp_strerror(r
) << dendl
;
1085 RWLock::RLocker
snap_locker(ictx
->parent
->snap_lock
);
1086 *parent_name
= ictx
->parent
->name
;
1089 *parent_id
= ictx
->parent
->id
;
1095 int get_flags(ImageCtx
*ictx
, uint64_t *flags
)
1097 int r
= ictx
->state
->refresh_if_required();
1102 RWLock::RLocker
l2(ictx
->snap_lock
);
1103 return ictx
->get_flags(ictx
->snap_id
, flags
);
1106 int set_image_notification(ImageCtx
*ictx
, int fd
, int type
)
1108 CephContext
*cct
= ictx
->cct
;
1109 ldout(cct
, 20) << __func__
<< " " << ictx
<< " fd " << fd
<< " type" << type
<< dendl
;
1111 int r
= ictx
->state
->refresh_if_required();
1116 if (ictx
->event_socket
.is_valid())
1118 return ictx
->event_socket
.init(fd
, type
);
1121 int is_exclusive_lock_owner(ImageCtx
*ictx
, bool *is_owner
)
1125 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
1126 if (ictx
->exclusive_lock
== nullptr ||
1127 !ictx
->exclusive_lock
->is_lock_owner()) {
1131 // might have been blacklisted by peer -- ensure we still own
1132 // the lock by pinging the OSD
1133 int r
= ictx
->exclusive_lock
->assert_header_locked();
1134 if (r
== -EBUSY
|| r
== -ENOENT
) {
1144 int lock_acquire(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
)
1146 CephContext
*cct
= ictx
->cct
;
1147 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1148 << "lock_mode=" << lock_mode
<< dendl
;
1150 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1154 C_SaferCond lock_ctx
;
1156 RWLock::WLocker
l(ictx
->owner_lock
);
1158 if (ictx
->exclusive_lock
== nullptr) {
1159 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1163 if (ictx
->get_exclusive_lock_policy()->may_auto_request_lock()) {
1164 ictx
->set_exclusive_lock_policy(
1165 new exclusive_lock::StandardPolicy(ictx
));
1168 if (ictx
->exclusive_lock
->is_lock_owner()) {
1172 ictx
->exclusive_lock
->acquire_lock(&lock_ctx
);
1175 int r
= lock_ctx
.wait();
1177 lderr(cct
) << "failed to request exclusive lock: " << cpp_strerror(r
)
1182 RWLock::RLocker
l(ictx
->owner_lock
);
1184 if (ictx
->exclusive_lock
== nullptr ||
1185 !ictx
->exclusive_lock
->is_lock_owner()) {
1186 lderr(cct
) << "failed to acquire exclusive lock" << dendl
;
1193 int lock_release(ImageCtx
*ictx
)
1195 CephContext
*cct
= ictx
->cct
;
1196 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1198 C_SaferCond lock_ctx
;
1200 RWLock::WLocker
l(ictx
->owner_lock
);
1202 if (ictx
->exclusive_lock
== nullptr ||
1203 !ictx
->exclusive_lock
->is_lock_owner()) {
1204 lderr(cct
) << "not exclusive lock owner" << dendl
;
1208 ictx
->exclusive_lock
->release_lock(&lock_ctx
);
1211 int r
= lock_ctx
.wait();
1213 lderr(cct
) << "failed to release exclusive lock: " << cpp_strerror(r
)
1220 int lock_get_owners(ImageCtx
*ictx
, rbd_lock_mode_t
*lock_mode
,
1221 std::list
<std::string
> *lock_owners
)
1223 CephContext
*cct
= ictx
->cct
;
1224 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< dendl
;
1226 if (!ictx
->test_features(RBD_FEATURE_EXCLUSIVE_LOCK
)) {
1227 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1231 managed_lock::Locker locker
;
1232 C_SaferCond get_owner_ctx
;
1233 ExclusiveLock
<>(*ictx
).get_locker(&locker
, &get_owner_ctx
);
1234 int r
= get_owner_ctx
.wait();
1238 lderr(cct
) << "failed to determine current lock owner: "
1239 << cpp_strerror(r
) << dendl
;
1243 *lock_mode
= RBD_LOCK_MODE_EXCLUSIVE
;
1244 lock_owners
->clear();
1245 lock_owners
->emplace_back(locker
.address
);
1249 int lock_break(ImageCtx
*ictx
, rbd_lock_mode_t lock_mode
,
1250 const std::string
&lock_owner
)
1252 CephContext
*cct
= ictx
->cct
;
1253 ldout(cct
, 20) << __func__
<< ": ictx=" << ictx
<< ", "
1254 << "lock_mode=" << lock_mode
<< ", "
1255 << "lock_owner=" << lock_owner
<< dendl
;
1257 if (lock_mode
!= RBD_LOCK_MODE_EXCLUSIVE
) {
1261 if (ictx
->read_only
) {
1265 managed_lock::Locker locker
;
1266 C_SaferCond get_owner_ctx
;
1268 RWLock::RLocker
l(ictx
->owner_lock
);
1270 if (ictx
->exclusive_lock
== nullptr) {
1271 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1275 ictx
->exclusive_lock
->get_locker(&locker
, &get_owner_ctx
);
1277 int r
= get_owner_ctx
.wait();
1281 lderr(cct
) << "failed to determine current lock owner: "
1282 << cpp_strerror(r
) << dendl
;
1286 if (locker
.address
!= lock_owner
) {
1290 C_SaferCond break_ctx
;
1292 RWLock::RLocker
l(ictx
->owner_lock
);
1294 if (ictx
->exclusive_lock
== nullptr) {
1295 lderr(cct
) << "exclusive-lock feature is not enabled" << dendl
;
1299 ictx
->exclusive_lock
->break_lock(locker
, true, &break_ctx
);
1301 r
= break_ctx
.wait();
1305 lderr(cct
) << "failed to break lock: " << cpp_strerror(r
) << dendl
;
1311 int remove(IoCtx
& io_ctx
, const std::string
&image_name
,
1312 const std::string
&image_id
, ProgressContext
& prog_ctx
,
1313 bool force
, bool from_trash_remove
)
1315 CephContext
*cct((CephContext
*)io_ctx
.cct());
1316 ldout(cct
, 20) << "remove " << &io_ctx
<< " "
1317 << (image_id
.empty() ? image_name
: image_id
) << dendl
;
1319 ThreadPool
*thread_pool
;
1320 ContextWQ
*op_work_queue
;
1321 ImageCtx::get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
1324 auto req
= librbd::image::RemoveRequest
<>::create(
1325 io_ctx
, image_name
, image_id
, force
, from_trash_remove
, prog_ctx
,
1326 op_work_queue
, &cond
);
1332 int trash_move(librados::IoCtx
&io_ctx
, rbd_trash_image_source_t source
,
1333 const std::string
&image_name
, uint64_t delay
) {
1334 CephContext
*cct((CephContext
*)io_ctx
.cct());
1335 ldout(cct
, 20) << "trash_move " << &io_ctx
<< " " << image_name
1338 std::string image_id
;
1339 ImageCtx
*ictx
= new ImageCtx(image_name
, "", nullptr, io_ctx
, false);
1340 int r
= ictx
->state
->open(true);
1345 ldout(cct
, 2) << "error opening image: " << cpp_strerror(-r
) << dendl
;
1349 // try to get image id from the directory
1350 r
= cls_client::dir_get_id(&io_ctx
, RBD_DIRECTORY
, image_name
, &image_id
);
1353 ldout(cct
, 2) << "error reading image id from dirctory: "
1354 << cpp_strerror(-r
) << dendl
;
1359 if (ictx
->old_format
) {
1360 ictx
->state
->close();
1364 image_id
= ictx
->id
;
1365 ictx
->owner_lock
.get_read();
1366 if (ictx
->exclusive_lock
!= nullptr) {
1367 r
= ictx
->operations
->prepare_image_update();
1368 if (r
< 0 || (ictx
->exclusive_lock
!= nullptr &&
1369 !ictx
->exclusive_lock
->is_lock_owner())) {
1370 lderr(cct
) << "cannot obtain exclusive lock - not removing" << dendl
;
1371 ictx
->owner_lock
.put_read();
1372 ictx
->state
->close();
1378 BOOST_SCOPE_EXIT_ALL(ictx
, cct
) {
1379 if (ictx
== nullptr)
1382 bool is_locked
= ictx
->exclusive_lock
!= nullptr &&
1383 ictx
->exclusive_lock
->is_lock_owner();
1386 auto exclusive_lock
= ictx
->exclusive_lock
;
1387 exclusive_lock
->shut_down(&ctx
);
1388 ictx
->owner_lock
.put_read();
1391 lderr(cct
) << "error shutting down exclusive lock" << dendl
;
1393 delete exclusive_lock
;
1395 ictx
->owner_lock
.put_read();
1397 ictx
->state
->close();
1400 ldout(cct
, 2) << "adding image entry to rbd_trash" << dendl
;
1401 utime_t ts
= ceph_clock_now();
1402 utime_t deferment_end_time
= ts
;
1403 deferment_end_time
+= (double)delay
;
1404 cls::rbd::TrashImageSource trash_source
=
1405 static_cast<cls::rbd::TrashImageSource
>(source
);
1406 cls::rbd::TrashImageSpec
trash_spec(trash_source
, image_name
, ts
,
1407 deferment_end_time
);
1408 r
= cls_client::trash_add(&io_ctx
, image_id
, trash_spec
);
1409 if (r
< 0 && r
!= -EEXIST
) {
1410 lderr(cct
) << "error adding image " << image_name
<< " to rbd_trash"
1413 } else if (r
== -EEXIST
) {
1414 ldout(cct
, 10) << "found previous unfinished deferred remove for image:"
1415 << image_id
<< dendl
;
1416 // continue with removing image from directory
1419 ldout(cct
, 2) << "removing id object..." << dendl
;
1420 r
= io_ctx
.remove(util::id_obj_name(image_name
));
1421 if (r
< 0 && r
!= -ENOENT
) {
1422 lderr(cct
) << "error removing id object: " << cpp_strerror(r
)
1427 ldout(cct
, 2) << "removing rbd image from v2 directory..." << dendl
;
1428 r
= cls_client::dir_remove_image(&io_ctx
, RBD_DIRECTORY
, image_name
,
1432 lderr(cct
) << "error removing image from v2 directory: "
1433 << cpp_strerror(-r
) << dendl
;
1441 int trash_get(IoCtx
&io_ctx
, const std::string
&id
,
1442 trash_image_info_t
*info
) {
1443 CephContext
*cct((CephContext
*)io_ctx
.cct());
1444 ldout(cct
, 20) << __func__
<< " " << &io_ctx
<< dendl
;
1446 cls::rbd::TrashImageSpec spec
;
1447 int r
= cls_client::trash_get(&io_ctx
, id
, &spec
);
1451 lderr(cct
) << "error retrieving trash entry: " << cpp_strerror(r
)
1456 rbd_trash_image_source_t source
= static_cast<rbd_trash_image_source_t
>(
1458 *info
= trash_image_info_t
{id
, spec
.name
, source
, spec
.deletion_time
.sec(),
1459 spec
.deferment_end_time
.sec()};
1463 int trash_list(IoCtx
&io_ctx
, vector
<trash_image_info_t
> &entries
) {
1464 CephContext
*cct((CephContext
*)io_ctx
.cct());
1465 ldout(cct
, 20) << "trash_list " << &io_ctx
<< dendl
;
1468 uint32_t max_read
= 1024;
1469 std::string last_read
= "";
1471 map
<string
, cls::rbd::TrashImageSpec
> trash_entries
;
1472 int r
= cls_client::trash_list(&io_ctx
, last_read
, max_read
,
1474 if (r
< 0 && r
!= -ENOENT
) {
1475 lderr(cct
) << "error listing rbd trash entries: " << cpp_strerror(r
)
1478 } else if (r
== -ENOENT
) {
1482 if (trash_entries
.empty()) {
1486 for (const auto &entry
: trash_entries
) {
1487 rbd_trash_image_source_t source
=
1488 static_cast<rbd_trash_image_source_t
>(entry
.second
.source
);
1489 entries
.push_back({entry
.first
, entry
.second
.name
, source
,
1490 entry
.second
.deletion_time
.sec(),
1491 entry
.second
.deferment_end_time
.sec()});
1493 last_read
= trash_entries
.rbegin()->first
;
1494 more_entries
= (trash_entries
.size() >= max_read
);
1495 } while (more_entries
);
1500 int trash_remove(IoCtx
&io_ctx
, const std::string
&image_id
, bool force
,
1501 ProgressContext
& prog_ctx
) {
1502 CephContext
*cct((CephContext
*)io_ctx
.cct());
1503 ldout(cct
, 20) << "trash_remove " << &io_ctx
<< " " << image_id
1504 << " " << force
<< dendl
;
1506 cls::rbd::TrashImageSpec trash_spec
;
1507 int r
= cls_client::trash_get(&io_ctx
, image_id
, &trash_spec
);
1509 lderr(cct
) << "error getting image id " << image_id
1510 << " info from trash: " << cpp_strerror(r
) << dendl
;
1514 utime_t now
= ceph_clock_now();
1515 if (now
< trash_spec
.deferment_end_time
&& !force
) {
1516 lderr(cct
) << "error: deferment time has not expired." << dendl
;
1520 r
= remove(io_ctx
, "", image_id
, prog_ctx
, false, true);
1522 lderr(cct
) << "error removing image " << image_id
1523 << ", which is pending deletion" << dendl
;
1526 r
= cls_client::trash_remove(&io_ctx
, image_id
);
1527 if (r
< 0 && r
!= -ENOENT
) {
1528 lderr(cct
) << "error removing image " << image_id
1529 << " from rbd_trash object" << dendl
;
1535 int trash_restore(librados::IoCtx
&io_ctx
, const std::string
&image_id
,
1536 const std::string
&image_new_name
) {
1537 CephContext
*cct((CephContext
*)io_ctx
.cct());
1538 ldout(cct
, 20) << "trash_restore " << &io_ctx
<< " " << image_id
<< " "
1539 << image_new_name
<< dendl
;
1541 cls::rbd::TrashImageSpec trash_spec
;
1542 int r
= cls_client::trash_get(&io_ctx
, image_id
, &trash_spec
);
1544 lderr(cct
) << "error getting image id " << image_id
1545 << " info from trash: " << cpp_strerror(r
) << dendl
;
1549 std::string image_name
= image_new_name
;
1550 if (image_name
.empty()) {
1551 // if user didn't specify a new name, let's try using the old name
1552 image_name
= trash_spec
.name
;
1553 ldout(cct
, 20) << "restoring image id " << image_id
<< " with name "
1554 << image_name
<< dendl
;
1557 // check if no image exists with the same name
1558 bool create_id_obj
= true;
1559 std::string existing_id
;
1560 r
= cls_client::get_id(&io_ctx
, util::id_obj_name(image_name
), &existing_id
);
1561 if (r
< 0 && r
!= -ENOENT
) {
1562 lderr(cct
) << "error checking if image " << image_name
<< " exists: "
1563 << cpp_strerror(r
) << dendl
;
1565 } else if (r
!= -ENOENT
){
1566 // checking if we are recovering from an incomplete restore
1567 if (existing_id
!= image_id
) {
1568 ldout(cct
, 2) << "an image with the same name already exists" << dendl
;
1571 create_id_obj
= false;
1574 if (create_id_obj
) {
1575 ldout(cct
, 2) << "adding id object" << dendl
;
1576 librados::ObjectWriteOperation op
;
1578 cls_client::set_id(&op
, image_id
);
1579 r
= io_ctx
.operate(util::id_obj_name(image_name
), &op
);
1581 lderr(cct
) << "error adding id object for image " << image_name
1582 << ": " << cpp_strerror(r
) << dendl
;
1587 ldout(cct
, 2) << "adding rbd image from v2 directory..." << dendl
;
1588 r
= cls_client::dir_add_image(&io_ctx
, RBD_DIRECTORY
, image_name
,
1590 if (r
< 0 && r
!= -EEXIST
) {
1591 lderr(cct
) << "error adding image to v2 directory: "
1592 << cpp_strerror(r
) << dendl
;
1596 ldout(cct
, 2) << "removing image from trash..." << dendl
;
1597 r
= cls_client::trash_remove(&io_ctx
, image_id
);
1598 if (r
< 0 && r
!= -ENOENT
) {
1599 lderr(cct
) << "error removing image id " << image_id
<< " from trash: "
1600 << cpp_strerror(r
) << dendl
;
1607 int snap_list(ImageCtx
*ictx
, vector
<snap_info_t
>& snaps
)
1609 ldout(ictx
->cct
, 20) << "snap_list " << ictx
<< dendl
;
1611 int r
= ictx
->state
->refresh_if_required();
1615 RWLock::RLocker
l(ictx
->snap_lock
);
1616 for (map
<snap_t
, SnapInfo
>::iterator it
= ictx
->snap_info
.begin();
1617 it
!= ictx
->snap_info
.end(); ++it
) {
1619 info
.name
= it
->second
.name
;
1620 info
.id
= it
->first
;
1621 info
.size
= it
->second
.size
;
1622 snaps
.push_back(info
);
1628 int snap_exists(ImageCtx
*ictx
, const cls::rbd::SnapshotNamespace
& snap_namespace
,
1629 const char *snap_name
, bool *exists
)
1631 ldout(ictx
->cct
, 20) << "snap_exists " << ictx
<< " " << snap_name
<< dendl
;
1633 int r
= ictx
->state
->refresh_if_required();
1637 RWLock::RLocker
l(ictx
->snap_lock
);
1638 *exists
= ictx
->get_snap_id(snap_namespace
, snap_name
) != CEPH_NOSNAP
;
1642 int snap_remove(ImageCtx
*ictx
, const char *snap_name
, uint32_t flags
,
1643 ProgressContext
& pctx
)
1645 ldout(ictx
->cct
, 20) << "snap_remove " << ictx
<< " " << snap_name
<< " flags: " << flags
<< dendl
;
1649 r
= ictx
->state
->refresh_if_required();
1653 if (flags
& RBD_SNAP_REMOVE_FLATTEN
) {
1654 r
= flatten_children(ictx
, snap_name
, pctx
);
1661 r
= snap_is_protected(ictx
, snap_name
, &is_protected
);
1666 if (is_protected
&& flags
& RBD_SNAP_REMOVE_UNPROTECT
) {
1667 r
= ictx
->operations
->snap_unprotect(cls::rbd::UserSnapshotNamespace(), snap_name
);
1669 lderr(ictx
->cct
) << "failed to unprotect snapshot: " << snap_name
<< dendl
;
1673 r
= snap_is_protected(ictx
, snap_name
, &is_protected
);
1678 lderr(ictx
->cct
) << "snapshot is still protected after unprotection" << dendl
;
1684 ictx
->operations
->snap_remove(cls::rbd::UserSnapshotNamespace(), snap_name
, &ctx
);
1690 int snap_get_timestamp(ImageCtx
*ictx
, uint64_t snap_id
, struct timespec
*timestamp
)
1692 std::map
<librados::snap_t
, SnapInfo
>::iterator snap_it
= ictx
->snap_info
.find(snap_id
);
1693 assert(snap_it
!= ictx
->snap_info
.end());
1694 utime_t time
= snap_it
->second
.timestamp
;
1695 time
.to_timespec(timestamp
);
1699 int snap_get_limit(ImageCtx
*ictx
, uint64_t *limit
)
1701 int r
= cls_client::snapshot_get_limit(&ictx
->md_ctx
, ictx
->header_oid
,
1703 if (r
== -EOPNOTSUPP
) {
1704 *limit
= UINT64_MAX
;
1710 int snap_set_limit(ImageCtx
*ictx
, uint64_t limit
)
1712 return ictx
->operations
->snap_set_limit(limit
);
1715 struct CopyProgressCtx
{
1716 explicit CopyProgressCtx(ProgressContext
&p
)
1717 : destictx(NULL
), src_size(0), prog_ctx(p
)
1722 ProgressContext
&prog_ctx
;
1725 int copy(ImageCtx
*src
, IoCtx
& dest_md_ctx
, const char *destname
,
1726 ImageOptions
& opts
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1728 CephContext
*cct
= (CephContext
*)dest_md_ctx
.cct();
1729 ldout(cct
, 20) << "copy " << src
->name
1730 << (src
->snap_name
.length() ? "@" + src
->snap_name
: "")
1731 << " -> " << destname
<< " opts = " << opts
<< dendl
;
1733 src
->snap_lock
.get_read();
1734 uint64_t features
= src
->features
;
1735 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1736 src
->snap_lock
.put_read();
1737 uint64_t format
= src
->old_format
? 1 : 2;
1738 if (opts
.get(RBD_IMAGE_OPTION_FORMAT
, &format
) != 0) {
1739 opts
.set(RBD_IMAGE_OPTION_FORMAT
, format
);
1741 uint64_t stripe_unit
= src
->stripe_unit
;
1742 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_UNIT
, &stripe_unit
) != 0) {
1743 opts
.set(RBD_IMAGE_OPTION_STRIPE_UNIT
, stripe_unit
);
1745 uint64_t stripe_count
= src
->stripe_count
;
1746 if (opts
.get(RBD_IMAGE_OPTION_STRIPE_COUNT
, &stripe_count
) != 0) {
1747 opts
.set(RBD_IMAGE_OPTION_STRIPE_COUNT
, stripe_count
);
1749 uint64_t order
= src
->order
;
1750 if (opts
.get(RBD_IMAGE_OPTION_ORDER
, &order
) != 0) {
1751 opts
.set(RBD_IMAGE_OPTION_ORDER
, order
);
1753 if (opts
.get(RBD_IMAGE_OPTION_FEATURES
, &features
) != 0) {
1754 opts
.set(RBD_IMAGE_OPTION_FEATURES
, features
);
1756 if (features
& ~RBD_FEATURES_ALL
) {
1757 lderr(cct
) << "librbd does not support requested features" << dendl
;
1761 int r
= create(dest_md_ctx
, destname
, "", src_size
, opts
, "", "", false);
1763 lderr(cct
) << "header creation failed" << dendl
;
1766 opts
.set(RBD_IMAGE_OPTION_ORDER
, static_cast<uint64_t>(order
));
1768 ImageCtx
*dest
= new librbd::ImageCtx(destname
, "", NULL
,
1769 dest_md_ctx
, false);
1770 r
= dest
->state
->open(false);
1772 lderr(cct
) << "failed to read newly created header" << dendl
;
1776 r
= copy(src
, dest
, prog_ctx
, sparse_size
);
1778 int close_r
= dest
->state
->close();
1779 if (r
== 0 && close_r
< 0) {
1785 class C_CopyWrite
: public Context
{
1787 C_CopyWrite(bufferlist
*bl
, Context
* ctx
)
1788 : m_bl(bl
), m_ctx(ctx
) {}
1789 void finish(int r
) override
{
1798 class C_CopyRead
: public Context
{
1800 C_CopyRead(SimpleThrottle
*throttle
, ImageCtx
*dest
, uint64_t offset
,
1801 bufferlist
*bl
, size_t sparse_size
)
1802 : m_throttle(throttle
), m_dest(dest
), m_offset(offset
), m_bl(bl
),
1803 m_sparse_size(sparse_size
) {
1804 m_throttle
->start_op();
1806 void finish(int r
) override
{
1808 lderr(m_dest
->cct
) << "error reading from source image at offset "
1809 << m_offset
<< ": " << cpp_strerror(r
) << dendl
;
1811 m_throttle
->end_op(r
);
1814 assert(m_bl
->length() == (size_t)r
);
1816 if (m_bl
->is_zero()) {
1818 m_throttle
->end_op(r
);
1822 if (!m_sparse_size
) {
1823 m_sparse_size
= (1 << m_dest
->order
);
1826 auto *throttle
= m_throttle
;
1827 auto *end_op_ctx
= new FunctionContext([throttle
](int r
) {
1828 throttle
->end_op(r
);
1830 auto gather_ctx
= new C_Gather(m_dest
->cct
, end_op_ctx
);
1832 bufferptr
m_ptr(m_bl
->length());
1833 m_bl
->rebuild(m_ptr
);
1834 size_t write_offset
= 0;
1835 size_t write_length
= 0;
1837 size_t length
= m_bl
->length();
1838 while (offset
< length
) {
1839 if (util::calc_sparse_extent(m_ptr
,
1845 bufferptr
write_ptr(m_ptr
, write_offset
, write_length
);
1846 bufferlist
*write_bl
= new bufferlist();
1847 write_bl
->push_back(write_ptr
);
1848 Context
*ctx
= new C_CopyWrite(write_bl
, gather_ctx
->new_sub());
1849 auto comp
= io::AioCompletion::create(ctx
);
1851 // coordinate through AIO WQ to ensure lock is acquired if needed
1852 m_dest
->io_work_queue
->aio_write(comp
, m_offset
+ write_offset
,
1854 std::move(*write_bl
),
1855 LIBRADOS_OP_FLAG_FADVISE_DONTNEED
,
1856 std::move(read_trace
));
1857 write_offset
= offset
;
1862 assert(gather_ctx
->get_sub_created_count() > 0);
1863 gather_ctx
->activate();
1866 ZTracer::Trace read_trace
;
1869 SimpleThrottle
*m_throttle
;
1873 size_t m_sparse_size
;
1876 int copy(ImageCtx
*src
, ImageCtx
*dest
, ProgressContext
&prog_ctx
, size_t sparse_size
)
1878 src
->snap_lock
.get_read();
1879 uint64_t src_size
= src
->get_image_size(src
->snap_id
);
1880 src
->snap_lock
.put_read();
1882 dest
->snap_lock
.get_read();
1883 uint64_t dest_size
= dest
->get_image_size(dest
->snap_id
);
1884 dest
->snap_lock
.put_read();
1886 CephContext
*cct
= src
->cct
;
1887 if (dest_size
< src_size
) {
1888 lderr(cct
) << " src size " << src_size
<< " > dest size "
1889 << dest_size
<< dendl
;
1893 map
<string
, bufferlist
> pairs
;
1895 r
= cls_client::metadata_list(&src
->md_ctx
, src
->header_oid
, "", 0, &pairs
);
1896 if (r
< 0 && r
!= -EOPNOTSUPP
&& r
!= -EIO
) {
1897 lderr(cct
) << "couldn't list metadata: " << cpp_strerror(r
) << dendl
;
1899 } else if (r
== 0 && !pairs
.empty()) {
1900 r
= cls_client::metadata_set(&dest
->md_ctx
, dest
->header_oid
, pairs
);
1902 lderr(cct
) << "couldn't set metadata: " << cpp_strerror(r
) << dendl
;
1907 ZTracer::Trace trace
;
1908 if (src
->blkin_trace_all
) {
1909 trace
.init("copy", &src
->trace_endpoint
);
1912 RWLock::RLocker
owner_lock(src
->owner_lock
);
1913 SimpleThrottle
throttle(src
->concurrent_management_ops
, false);
1914 uint64_t period
= src
->get_stripe_period();
1915 unsigned fadvise_flags
= LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL
|
1916 LIBRADOS_OP_FLAG_FADVISE_NOCACHE
;
1917 for (uint64_t offset
= 0; offset
< src_size
; offset
+= period
) {
1918 if (throttle
.pending_error()) {
1919 return throttle
.wait_for_ret();
1922 uint64_t len
= min(period
, src_size
- offset
);
1923 bufferlist
*bl
= new bufferlist();
1924 auto ctx
= new C_CopyRead(&throttle
, dest
, offset
, bl
, sparse_size
);
1925 auto comp
= io::AioCompletion::create_and_start
<Context
>(
1926 ctx
, src
, io::AIO_TYPE_READ
);
1928 io::ImageReadRequest
<> req(*src
, comp
, {{offset
, len
}},
1929 io::ReadResult
{bl
}, fadvise_flags
,
1931 ctx
->read_trace
= req
.get_trace();
1934 prog_ctx
.update_progress(offset
, src_size
);
1937 r
= throttle
.wait_for_ret();
1939 prog_ctx
.update_progress(src_size
, src_size
);
1943 int snap_set(ImageCtx
*ictx
, const cls::rbd::SnapshotNamespace
&snap_namespace
,
1944 const char *snap_name
)
1946 ldout(ictx
->cct
, 20) << "snap_set " << ictx
<< " snap = "
1947 << (snap_name
? snap_name
: "NULL") << dendl
;
1949 // ignore return value, since we may be set to a non-existent
1950 // snapshot and the user is trying to fix that
1951 ictx
->state
->refresh_if_required();
1954 std::string
name(snap_name
== nullptr ? "" : snap_name
);
1955 ictx
->state
->snap_set(snap_namespace
, name
, &ctx
);
1960 lderr(ictx
->cct
) << "failed to " << (name
.empty() ? "un" : "") << "set "
1961 << "snapshot: " << cpp_strerror(r
) << dendl
;
1969 int list_lockers(ImageCtx
*ictx
,
1970 std::list
<locker_t
> *lockers
,
1974 ldout(ictx
->cct
, 20) << "list_locks on image " << ictx
<< dendl
;
1976 int r
= ictx
->state
->refresh_if_required();
1980 RWLock::RLocker
locker(ictx
->md_lock
);
1982 *exclusive
= ictx
->exclusive_locked
;
1984 *tag
= ictx
->lock_tag
;
1987 map
<rados::cls::lock::locker_id_t
,
1988 rados::cls::lock::locker_info_t
>::const_iterator it
;
1989 for (it
= ictx
->lockers
.begin(); it
!= ictx
->lockers
.end(); ++it
) {
1991 locker
.client
= stringify(it
->first
.locker
);
1992 locker
.cookie
= it
->first
.cookie
;
1993 locker
.address
= stringify(it
->second
.addr
);
1994 lockers
->push_back(locker
);
2001 int lock(ImageCtx
*ictx
, bool exclusive
, const string
& cookie
,
2004 ldout(ictx
->cct
, 20) << "lock image " << ictx
<< " exclusive=" << exclusive
2005 << " cookie='" << cookie
<< "' tag='" << tag
<< "'"
2008 int r
= ictx
->state
->refresh_if_required();
2013 * If we wanted we could do something more intelligent, like local
2014 * checks that we think we will succeed. But for now, let's not
2015 * duplicate that code.
2018 RWLock::RLocker
locker(ictx
->md_lock
);
2019 r
= rados::cls::lock::lock(&ictx
->md_ctx
, ictx
->header_oid
, RBD_LOCK_NAME
,
2020 exclusive
? LOCK_EXCLUSIVE
: LOCK_SHARED
,
2021 cookie
, tag
, "", utime_t(), 0);
2027 ictx
->notify_update();
2031 int unlock(ImageCtx
*ictx
, const string
& cookie
)
2033 ldout(ictx
->cct
, 20) << "unlock image " << ictx
2034 << " cookie='" << cookie
<< "'" << dendl
;
2036 int r
= ictx
->state
->refresh_if_required();
2041 RWLock::RLocker
locker(ictx
->md_lock
);
2042 r
= rados::cls::lock::unlock(&ictx
->md_ctx
, ictx
->header_oid
,
2043 RBD_LOCK_NAME
, cookie
);
2049 ictx
->notify_update();
2053 int break_lock(ImageCtx
*ictx
, const string
& client
,
2054 const string
& cookie
)
2056 ldout(ictx
->cct
, 20) << "break_lock image " << ictx
<< " client='" << client
2057 << "' cookie='" << cookie
<< "'" << dendl
;
2059 int r
= ictx
->state
->refresh_if_required();
2063 entity_name_t lock_client
;
2064 if (!lock_client
.parse(client
)) {
2065 lderr(ictx
->cct
) << "Unable to parse client '" << client
2070 if (ictx
->blacklist_on_break_lock
) {
2071 typedef std::map
<rados::cls::lock::locker_id_t
,
2072 rados::cls::lock::locker_info_t
> Lockers
;
2074 ClsLockType lock_type
;
2075 std::string lock_tag
;
2076 r
= rados::cls::lock::get_lock_info(&ictx
->md_ctx
, ictx
->header_oid
,
2077 RBD_LOCK_NAME
, &lockers
, &lock_type
,
2080 lderr(ictx
->cct
) << "unable to retrieve lock info: " << cpp_strerror(r
)
2085 std::string client_address
;
2086 for (Lockers::iterator it
= lockers
.begin();
2087 it
!= lockers
.end(); ++it
) {
2088 if (it
->first
.locker
== lock_client
) {
2089 client_address
= stringify(it
->second
.addr
);
2093 if (client_address
.empty()) {
2097 RWLock::RLocker
locker(ictx
->md_lock
);
2098 librados::Rados
rados(ictx
->md_ctx
);
2099 r
= rados
.blacklist_add(client_address
,
2100 ictx
->blacklist_expire_seconds
);
2102 lderr(ictx
->cct
) << "unable to blacklist client: " << cpp_strerror(r
)
2108 r
= rados::cls::lock::break_lock(&ictx
->md_ctx
, ictx
->header_oid
,
2109 RBD_LOCK_NAME
, cookie
, lock_client
);
2112 ictx
->notify_update();
2116 void rbd_ctx_cb(completion_t cb
, void *arg
)
2118 Context
*ctx
= reinterpret_cast<Context
*>(arg
);
2119 auto comp
= reinterpret_cast<io::AioCompletion
*>(cb
);
2120 ctx
->complete(comp
->get_return_value());
2124 int64_t read_iterate(ImageCtx
*ictx
, uint64_t off
, uint64_t len
,
2125 int (*cb
)(uint64_t, size_t, const char *, void *),
2128 utime_t start_time
, elapsed
;
2130 ldout(ictx
->cct
, 20) << "read_iterate " << ictx
<< " off = " << off
2131 << " len = " << len
<< dendl
;
2133 int r
= ictx
->state
->refresh_if_required();
2137 uint64_t mylen
= len
;
2138 ictx
->snap_lock
.get_read();
2139 r
= clip_io(ictx
, off
, &mylen
);
2140 ictx
->snap_lock
.put_read();
2144 int64_t total_read
= 0;
2145 uint64_t period
= ictx
->get_stripe_period();
2146 uint64_t left
= mylen
;
2148 ZTracer::Trace trace
;
2149 if (ictx
->blkin_trace_all
) {
2150 trace
.init("read_iterate", &ictx
->trace_endpoint
);
2153 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2154 start_time
= ceph_clock_now();
2156 uint64_t period_off
= off
- (off
% period
);
2157 uint64_t read_len
= min(period_off
+ period
- off
, left
);
2162 auto c
= io::AioCompletion::create_and_start(&ctx
, ictx
,
2164 io::ImageRequest
<>::aio_read(ictx
, c
, {{off
, read_len
}},
2165 io::ReadResult
{&bl
}, 0, std::move(trace
));
2167 int ret
= ctx
.wait();
2172 r
= cb(total_read
, ret
, bl
.c_str(), arg
);
2182 elapsed
= ceph_clock_now() - start_time
;
2183 ictx
->perfcounter
->tinc(l_librbd_rd_latency
, elapsed
);
2184 ictx
->perfcounter
->inc(l_librbd_rd
);
2185 ictx
->perfcounter
->inc(l_librbd_rd_bytes
, mylen
);
2189 // validate extent against image size; clip to image size if necessary
2190 int clip_io(ImageCtx
*ictx
, uint64_t off
, uint64_t *len
)
2192 assert(ictx
->snap_lock
.is_locked());
2193 uint64_t image_size
= ictx
->get_image_size(ictx
->snap_id
);
2194 bool snap_exists
= ictx
->snap_exists
;
2199 // special-case "len == 0" requests: always valid
2203 // can't start past end
2204 if (off
>= image_size
)
2207 // clip requests that extend past end to just end
2208 if ((off
+ *len
) > image_size
)
2209 *len
= (size_t)(image_size
- off
);
2214 int flush(ImageCtx
*ictx
)
2216 CephContext
*cct
= ictx
->cct
;
2217 ldout(cct
, 20) << "flush " << ictx
<< dendl
;
2219 int r
= ictx
->state
->refresh_if_required();
2224 ictx
->user_flushed();
2227 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2232 ictx
->perfcounter
->inc(l_librbd_flush
);
2236 int invalidate_cache(ImageCtx
*ictx
)
2238 CephContext
*cct
= ictx
->cct
;
2239 ldout(cct
, 20) << "invalidate_cache " << ictx
<< dendl
;
2241 int r
= ictx
->state
->refresh_if_required();
2246 RWLock::RLocker
owner_locker(ictx
->owner_lock
);
2247 RWLock::WLocker
md_locker(ictx
->md_lock
);
2248 r
= ictx
->invalidate_cache(false);
2249 ictx
->perfcounter
->inc(l_librbd_invalidate_cache
);
2253 int poll_io_events(ImageCtx
*ictx
, io::AioCompletion
**comps
, int numcomp
)
2257 CephContext
*cct
= ictx
->cct
;
2258 ldout(cct
, 20) << __func__
<< " " << ictx
<< " numcomp = " << numcomp
2261 Mutex::Locker
l(ictx
->completed_reqs_lock
);
2262 while (i
< numcomp
) {
2263 if (ictx
->completed_reqs
.empty())
2265 comps
[i
++] = ictx
->completed_reqs
.front();
2266 ictx
->completed_reqs
.pop_front();
2271 int metadata_get(ImageCtx
*ictx
, const string
&key
, string
*value
)
2273 CephContext
*cct
= ictx
->cct
;
2274 ldout(cct
, 20) << "metadata_get " << ictx
<< " key=" << key
<< dendl
;
2276 int r
= ictx
->state
->refresh_if_required();
2281 return cls_client::metadata_get(&ictx
->md_ctx
, ictx
->header_oid
, key
, value
);
2284 int metadata_list(ImageCtx
*ictx
, const string
&start
, uint64_t max
, map
<string
, bufferlist
> *pairs
)
2286 CephContext
*cct
= ictx
->cct
;
2287 ldout(cct
, 20) << "metadata_list " << ictx
<< dendl
;
2289 int r
= ictx
->state
->refresh_if_required();
2294 return cls_client::metadata_list(&ictx
->md_ctx
, ictx
->header_oid
, start
, max
, pairs
);
2297 struct C_RBD_Readahead
: public Context
{
2302 C_RBD_Readahead(ImageCtx
*ictx
, object_t oid
, uint64_t offset
, uint64_t length
)
2303 : ictx(ictx
), oid(oid
), offset(offset
), length(length
) { }
2304 void finish(int r
) override
{
2305 ldout(ictx
->cct
, 20) << "C_RBD_Readahead on " << oid
<< ": " << offset
<< "+" << length
<< dendl
;
2306 ictx
->readahead
.dec_pending();
2310 void readahead(ImageCtx
*ictx
,
2311 const vector
<pair
<uint64_t,uint64_t> >& image_extents
)
2313 uint64_t total_bytes
= 0;
2314 for (vector
<pair
<uint64_t,uint64_t> >::const_iterator p
= image_extents
.begin();
2315 p
!= image_extents
.end();
2317 total_bytes
+= p
->second
;
2320 ictx
->md_lock
.get_write();
2321 bool abort
= ictx
->readahead_disable_after_bytes
!= 0 &&
2322 ictx
->total_bytes_read
> ictx
->readahead_disable_after_bytes
;
2324 ictx
->md_lock
.put_write();
2327 ictx
->total_bytes_read
+= total_bytes
;
2328 ictx
->snap_lock
.get_read();
2329 uint64_t image_size
= ictx
->get_image_size(ictx
->snap_id
);
2330 ictx
->snap_lock
.put_read();
2331 ictx
->md_lock
.put_write();
2333 pair
<uint64_t, uint64_t> readahead_extent
= ictx
->readahead
.update(image_extents
, image_size
);
2334 uint64_t readahead_offset
= readahead_extent
.first
;
2335 uint64_t readahead_length
= readahead_extent
.second
;
2337 if (readahead_length
> 0) {
2338 ldout(ictx
->cct
, 20) << "(readahead logical) " << readahead_offset
<< "~" << readahead_length
<< dendl
;
2339 map
<object_t
,vector
<ObjectExtent
> > readahead_object_extents
;
2340 Striper::file_to_extents(ictx
->cct
, ictx
->format_string
, &ictx
->layout
,
2341 readahead_offset
, readahead_length
, 0, readahead_object_extents
);
2342 for (map
<object_t
,vector
<ObjectExtent
> >::iterator p
= readahead_object_extents
.begin(); p
!= readahead_object_extents
.end(); ++p
) {
2343 for (vector
<ObjectExtent
>::iterator q
= p
->second
.begin(); q
!= p
->second
.end(); ++q
) {
2344 ldout(ictx
->cct
, 20) << "(readahead) oid " << q
->oid
<< " " << q
->offset
<< "~" << q
->length
<< dendl
;
2346 Context
*req_comp
= new C_RBD_Readahead(ictx
, q
->oid
, q
->offset
, q
->length
);
2347 ictx
->readahead
.inc_pending();
2348 ictx
->aio_read_from_cache(q
->oid
, q
->objectno
, NULL
,
2349 q
->length
, q
->offset
,
2350 req_comp
, 0, nullptr);
2353 ictx
->perfcounter
->inc(l_librbd_readahead
);
2354 ictx
->perfcounter
->inc(l_librbd_readahead_bytes
, readahead_length
);