1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include <boost/assign/list_of.hpp>
7 #include "include/neorados/RADOS.hpp"
9 #include "common/ceph_context.h"
10 #include "common/dout.h"
11 #include "common/errno.h"
12 #include "common/perf_counters.h"
13 #include "common/Timer.h"
15 #include "librbd/AsioEngine.h"
16 #include "librbd/AsyncRequest.h"
17 #include "librbd/ExclusiveLock.h"
18 #include "librbd/internal.h"
19 #include "librbd/ImageCtx.h"
20 #include "librbd/ImageState.h"
21 #include "librbd/ImageWatcher.h"
22 #include "librbd/Journal.h"
23 #include "librbd/LibrbdAdminSocketHook.h"
24 #include "librbd/ObjectMap.h"
25 #include "librbd/Operations.h"
26 #include "librbd/PluginRegistry.h"
27 #include "librbd/Types.h"
28 #include "librbd/Utils.h"
29 #include "librbd/asio/ContextWQ.h"
30 #include "librbd/exclusive_lock/AutomaticPolicy.h"
31 #include "librbd/exclusive_lock/StandardPolicy.h"
32 #include "librbd/crypto/EncryptionFormat.h"
33 #include "librbd/io/AioCompletion.h"
34 #include "librbd/io/AsyncOperation.h"
35 #include "librbd/io/ImageDispatcher.h"
36 #include "librbd/io/ObjectDispatcher.h"
37 #include "librbd/io/QosImageDispatch.h"
38 #include "librbd/io/IoOperations.h"
39 #include "librbd/io/Utils.h"
40 #include "librbd/journal/StandardPolicy.h"
41 #include "librbd/operation/ResizeRequest.h"
43 #include "osdc/Striper.h"
44 #include <boost/algorithm/string/predicate.hpp>
46 #define dout_subsys ceph_subsys_rbd
48 #define dout_prefix *_dout << "librbd::ImageCtx: "
56 using ceph::bufferlist
;
57 using librados::snap_t
;
58 using librados::IoCtx
;
64 class SafeTimerSingleton
: public CommonSafeTimer
<ceph::mutex
> {
66 ceph::mutex lock
= ceph::make_mutex("librbd::SafeTimerSingleton::lock");
68 explicit SafeTimerSingleton(CephContext
*cct
)
69 : SafeTimer(cct
, lock
, true) {
72 ~SafeTimerSingleton() {
73 std::lock_guard locker
{lock
};
78 librados::IoCtx
duplicate_io_ctx(librados::IoCtx
& io_ctx
) {
79 librados::IoCtx dup_io_ctx
;
80 dup_io_ctx
.dup(io_ctx
);
84 } // anonymous namespace
86 const string
ImageCtx::METADATA_CONF_PREFIX
= "conf_";
88 ImageCtx::ImageCtx(const string
&image_name
, const string
&image_id
,
89 const char *snap
, IoCtx
& p
, bool ro
)
90 : cct((CephContext
*)p
.cct()),
96 read_only_flags(ro
? IMAGE_READ_ONLY_FLAG_USER
: 0U),
97 exclusive_locked(false),
99 asio_engine(std::make_shared
<AsioEngine
>(p
)),
100 rados_api(asio_engine
->get_rados_api()),
101 data_ctx(duplicate_io_ctx(p
)),
102 md_ctx(duplicate_io_ctx(p
)),
105 owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))),
106 image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))),
107 timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))),
108 async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))),
109 copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))),
112 order(0), size(0), features(0),
114 id(image_id
), parent(NULL
),
115 stripe_unit(0), stripe_count(0), flags(0),
118 state(new ImageState
<>(this)),
119 operations(new Operations
<>(*this)),
120 exclusive_lock(nullptr), object_map(nullptr),
121 op_work_queue(asio_engine
->get_work_queue()),
122 plugin_registry(new PluginRegistry
<ImageCtx
>(this)),
123 event_socket_completions(32),
125 trace_endpoint("librbd")
127 ldout(cct
, 10) << this << " " << __func__
<< ": "
128 << "image_name=" << image_name
<< ", "
129 << "image_id=" << image_id
<< dendl
;
134 rebuild_data_io_context();
136 // FIPS zeroization audit 20191117: this memset is not security related.
137 memset(&header
, 0, sizeof(header
));
139 io_image_dispatcher
= new io::ImageDispatcher
<ImageCtx
>(this);
140 io_object_dispatcher
= new io::ObjectDispatcher
<ImageCtx
>(this);
142 if (cct
->_conf
.get_val
<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
143 exclusive_lock_policy
= new exclusive_lock::AutomaticPolicy(this);
145 exclusive_lock_policy
= new exclusive_lock::StandardPolicy(this);
147 journal_policy
= new journal::StandardPolicy(this);
150 ImageCtx::ImageCtx(const string
&image_name
, const string
&image_id
,
151 uint64_t snap_id
, IoCtx
& p
, bool ro
)
152 : ImageCtx(image_name
, image_id
, "", p
, ro
) {
153 open_snap_id
= snap_id
;
156 ImageCtx::~ImageCtx() {
157 ldout(cct
, 10) << this << " " << __func__
<< dendl
;
159 ceph_assert(config_watcher
== nullptr);
160 ceph_assert(image_watcher
== NULL
);
161 ceph_assert(exclusive_lock
== NULL
);
162 ceph_assert(object_map
== NULL
);
163 ceph_assert(journal
== NULL
);
164 ceph_assert(asok_hook
== NULL
);
169 delete[] format_string
;
172 if (data_ctx
.is_valid()) {
173 data_ctx
.aio_flush();
176 delete io_object_dispatcher
;
177 delete io_image_dispatcher
;
179 delete journal_policy
;
180 delete exclusive_lock_policy
;
184 delete plugin_registry
;
187 void ImageCtx::init() {
188 ceph_assert(!header_oid
.empty());
189 ceph_assert(old_format
|| !id
.empty());
191 asok_hook
= new LibrbdAdminSocketHook(this);
193 string pname
= string("librbd-") + id
+ string("-") +
194 md_ctx
.get_pool_name() + string("-") + name
;
195 if (!snap_name
.empty()) {
200 trace_endpoint
.copy_name(pname
);
203 ceph_assert(image_watcher
== NULL
);
204 image_watcher
= new ImageWatcher
<>(*this);
207 void ImageCtx::shutdown() {
208 delete image_watcher
;
209 image_watcher
= nullptr;
215 void ImageCtx::init_layout(int64_t pool_id
)
217 if (stripe_unit
== 0 || stripe_count
== 0) {
218 stripe_unit
= 1ull << order
;
222 vector
<uint64_t> alignments
;
223 alignments
.push_back(stripe_count
<< order
); // object set (in file striping terminology)
224 alignments
.push_back(stripe_unit
* stripe_count
); // stripe
225 alignments
.push_back(stripe_unit
); // stripe unit
226 readahead
.set_alignments(alignments
);
228 layout
= file_layout_t();
229 layout
.stripe_unit
= stripe_unit
;
230 layout
.stripe_count
= stripe_count
;
231 layout
.object_size
= 1ull << order
;
232 layout
.pool_id
= pool_id
; // FIXME: pool id overflow?
234 delete[] format_string
;
235 size_t len
= object_prefix
.length() + 16;
236 format_string
= new char[len
];
238 snprintf(format_string
, len
, "%s.%%012llx", object_prefix
.c_str());
240 snprintf(format_string
, len
, "%s.%%016llx", object_prefix
.c_str());
243 ldout(cct
, 10) << "init_layout stripe_unit " << stripe_unit
244 << " stripe_count " << stripe_count
245 << " object_size " << layout
.object_size
246 << " prefix " << object_prefix
247 << " format " << format_string
251 void ImageCtx::perf_start(string name
) {
252 auto perf_prio
= PerfCountersBuilder::PRIO_DEBUGONLY
;
253 if (child
== nullptr) {
254 // ensure top-level IO stats are exported for librbd daemons
255 perf_prio
= PerfCountersBuilder::PRIO_USEFUL
;
258 PerfCountersBuilder
plb(cct
, name
, l_librbd_first
, l_librbd_last
);
260 plb
.add_u64_counter(l_librbd_rd
, "rd", "Reads", "r", perf_prio
);
261 plb
.add_u64_counter(l_librbd_rd_bytes
, "rd_bytes", "Data size in reads",
262 "rb", perf_prio
, unit_t(UNIT_BYTES
));
263 plb
.add_time_avg(l_librbd_rd_latency
, "rd_latency", "Latency of reads",
265 plb
.add_u64_counter(l_librbd_wr
, "wr", "Writes", "w", perf_prio
);
266 plb
.add_u64_counter(l_librbd_wr_bytes
, "wr_bytes", "Written data",
267 "wb", perf_prio
, unit_t(UNIT_BYTES
));
268 plb
.add_time_avg(l_librbd_wr_latency
, "wr_latency", "Write latency",
270 plb
.add_u64_counter(l_librbd_discard
, "discard", "Discards");
271 plb
.add_u64_counter(l_librbd_discard_bytes
, "discard_bytes", "Discarded data", NULL
, 0, unit_t(UNIT_BYTES
));
272 plb
.add_time_avg(l_librbd_discard_latency
, "discard_latency", "Discard latency");
273 plb
.add_u64_counter(l_librbd_flush
, "flush", "Flushes");
274 plb
.add_time_avg(l_librbd_flush_latency
, "flush_latency", "Latency of flushes");
275 plb
.add_u64_counter(l_librbd_ws
, "ws", "WriteSames");
276 plb
.add_u64_counter(l_librbd_ws_bytes
, "ws_bytes", "WriteSame data", NULL
, 0, unit_t(UNIT_BYTES
));
277 plb
.add_time_avg(l_librbd_ws_latency
, "ws_latency", "WriteSame latency");
278 plb
.add_u64_counter(l_librbd_cmp
, "cmp", "CompareAndWrites");
279 plb
.add_u64_counter(l_librbd_cmp_bytes
, "cmp_bytes", "Data size in cmps", NULL
, 0, unit_t(UNIT_BYTES
));
280 plb
.add_time_avg(l_librbd_cmp_latency
, "cmp_latency", "Latency of cmps");
281 plb
.add_u64_counter(l_librbd_snap_create
, "snap_create", "Snap creations");
282 plb
.add_u64_counter(l_librbd_snap_remove
, "snap_remove", "Snap removals");
283 plb
.add_u64_counter(l_librbd_snap_rollback
, "snap_rollback", "Snap rollbacks");
284 plb
.add_u64_counter(l_librbd_snap_rename
, "snap_rename", "Snap rename");
285 plb
.add_u64_counter(l_librbd_notify
, "notify", "Updated header notifications");
286 plb
.add_u64_counter(l_librbd_resize
, "resize", "Resizes");
287 plb
.add_u64_counter(l_librbd_readahead
, "readahead", "Read ahead");
288 plb
.add_u64_counter(l_librbd_readahead_bytes
, "readahead_bytes", "Data size in read ahead", NULL
, 0, unit_t(UNIT_BYTES
));
289 plb
.add_u64_counter(l_librbd_invalidate_cache
, "invalidate_cache", "Cache invalidates");
291 plb
.add_time(l_librbd_opened_time
, "opened_time", "Opened time",
293 plb
.add_time(l_librbd_lock_acquired_time
, "lock_acquired_time",
294 "Lock acquired time", "lats", perf_prio
);
296 perfcounter
= plb
.create_perf_counters();
297 cct
->get_perfcounters_collection()->add(perfcounter
);
299 perfcounter
->tset(l_librbd_opened_time
, ceph_clock_now());
302 void ImageCtx::perf_stop() {
303 ceph_assert(perfcounter
);
304 cct
->get_perfcounters_collection()->remove(perfcounter
);
308 void ImageCtx::set_read_flag(unsigned flag
) {
309 extra_read_flags
|= flag
;
312 int ImageCtx::get_read_flags(snap_t snap_id
) {
313 int flags
= librados::OPERATION_NOFLAG
| read_flags
;
317 flags
= librados::OPERATION_NOFLAG
| extra_read_flags
;
318 if (snap_id
== LIBRADOS_SNAP_HEAD
)
321 if (config
.get_val
<bool>("rbd_balance_snap_reads"))
322 flags
|= librados::OPERATION_BALANCE_READS
;
323 else if (config
.get_val
<bool>("rbd_localize_snap_reads"))
324 flags
|= librados::OPERATION_LOCALIZE_READS
;
328 int ImageCtx::snap_set(uint64_t in_snap_id
) {
329 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
330 auto it
= snap_info
.find(in_snap_id
);
331 if (in_snap_id
!= CEPH_NOSNAP
&& it
!= snap_info
.end()) {
332 snap_id
= in_snap_id
;
333 snap_namespace
= it
->second
.snap_namespace
;
334 snap_name
= it
->second
.name
;
336 if (data_ctx
.is_valid()) {
337 data_ctx
.snap_set_read(snap_id
);
338 rebuild_data_io_context();
345 void ImageCtx::snap_unset()
347 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
348 snap_id
= CEPH_NOSNAP
;
352 if (data_ctx
.is_valid()) {
353 data_ctx
.snap_set_read(snap_id
);
354 rebuild_data_io_context();
358 snap_t
ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace
& in_snap_namespace
,
359 const string
& in_snap_name
) const
361 ceph_assert(ceph_mutex_is_locked(image_lock
));
362 auto it
= snap_ids
.find({in_snap_namespace
, in_snap_name
});
363 if (it
!= snap_ids
.end()) {
369 const SnapInfo
* ImageCtx::get_snap_info(snap_t in_snap_id
) const
371 ceph_assert(ceph_mutex_is_locked(image_lock
));
372 map
<snap_t
, SnapInfo
>::const_iterator it
=
373 snap_info
.find(in_snap_id
);
374 if (it
!= snap_info
.end())
379 int ImageCtx::get_snap_name(snap_t in_snap_id
,
380 string
*out_snap_name
) const
382 ceph_assert(ceph_mutex_is_locked(image_lock
));
383 const SnapInfo
*info
= get_snap_info(in_snap_id
);
385 *out_snap_name
= info
->name
;
391 int ImageCtx::get_snap_namespace(snap_t in_snap_id
,
392 cls::rbd::SnapshotNamespace
*out_snap_namespace
) const
394 ceph_assert(ceph_mutex_is_locked(image_lock
));
395 const SnapInfo
*info
= get_snap_info(in_snap_id
);
397 *out_snap_namespace
= info
->snap_namespace
;
403 int ImageCtx::get_parent_spec(snap_t in_snap_id
,
404 cls::rbd::ParentImageSpec
*out_pspec
) const
406 const SnapInfo
*info
= get_snap_info(in_snap_id
);
408 *out_pspec
= info
->parent
.spec
;
414 uint64_t ImageCtx::get_current_size() const
416 ceph_assert(ceph_mutex_is_locked(image_lock
));
420 uint64_t ImageCtx::get_object_size() const
422 return 1ull << order
;
425 string
ImageCtx::get_object_name(uint64_t num
) const {
426 return util::data_object_name(this, num
);
429 uint64_t ImageCtx::get_stripe_unit() const
434 uint64_t ImageCtx::get_stripe_count() const
439 uint64_t ImageCtx::get_stripe_period() const
441 return stripe_count
* (1ull << order
);
444 utime_t
ImageCtx::get_create_timestamp() const
446 return create_timestamp
;
449 utime_t
ImageCtx::get_access_timestamp() const
451 return access_timestamp
;
454 utime_t
ImageCtx::get_modify_timestamp() const
456 return modify_timestamp
;
459 void ImageCtx::set_access_timestamp(utime_t at
)
461 ceph_assert(ceph_mutex_is_wlocked(timestamp_lock
));
462 access_timestamp
= at
;
465 void ImageCtx::set_modify_timestamp(utime_t mt
)
467 ceph_assert(ceph_mutex_is_locked(timestamp_lock
));
468 modify_timestamp
= mt
;
471 int ImageCtx::is_snap_protected(snap_t in_snap_id
,
472 bool *is_protected
) const
474 ceph_assert(ceph_mutex_is_locked(image_lock
));
475 const SnapInfo
*info
= get_snap_info(in_snap_id
);
478 (info
->protection_status
== RBD_PROTECTION_STATUS_PROTECTED
);
484 int ImageCtx::is_snap_unprotected(snap_t in_snap_id
,
485 bool *is_unprotected
) const
487 ceph_assert(ceph_mutex_is_locked(image_lock
));
488 const SnapInfo
*info
= get_snap_info(in_snap_id
);
491 (info
->protection_status
== RBD_PROTECTION_STATUS_UNPROTECTED
);
497 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace
,
499 snap_t id
, uint64_t in_size
,
500 const ParentImageInfo
&parent
,
501 uint8_t protection_status
, uint64_t flags
,
504 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
506 SnapInfo
info(in_snap_name
, in_snap_namespace
,
507 in_size
, parent
, protection_status
, flags
, timestamp
);
508 snap_info
.insert({id
, info
});
509 snap_ids
.insert({{in_snap_namespace
, in_snap_name
}, id
});
512 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace
,
516 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
517 snaps
.erase(std::remove(snaps
.begin(), snaps
.end(), id
), snaps
.end());
519 snap_ids
.erase({in_snap_namespace
, in_snap_name
});
522 uint64_t ImageCtx::get_image_size(snap_t in_snap_id
) const
524 ceph_assert(ceph_mutex_is_locked(image_lock
));
525 if (in_snap_id
== CEPH_NOSNAP
) {
526 if (!resize_reqs
.empty() &&
527 resize_reqs
.front()->shrinking()) {
528 return resize_reqs
.front()->get_image_size();
533 const SnapInfo
*info
= get_snap_info(in_snap_id
);
540 uint64_t ImageCtx::get_area_size(io::ImageArea area
) const {
541 // image areas are defined only for the "opened at" snap_id
542 // (i.e. where encryption may be loaded)
543 uint64_t raw_size
= get_image_size(snap_id
);
548 auto size
= io::util::raw_to_area_offset(*this, raw_size
);
549 ceph_assert(size
.first
<= raw_size
&& size
.second
== io::ImageArea::DATA
);
552 case io::ImageArea::DATA
:
554 case io::ImageArea::CRYPTO_HEADER
:
555 // CRYPTO_HEADER area ends where DATA area begins
556 return raw_size
- size
.first
;
562 uint64_t ImageCtx::get_object_count(snap_t in_snap_id
) const {
563 ceph_assert(ceph_mutex_is_locked(image_lock
));
564 uint64_t image_size
= get_image_size(in_snap_id
);
565 return Striper::get_num_objects(layout
, image_size
);
568 bool ImageCtx::test_features(uint64_t features
) const
570 std::shared_lock l
{image_lock
};
571 return test_features(features
, image_lock
);
574 bool ImageCtx::test_features(uint64_t in_features
,
575 const ceph::shared_mutex
&in_image_lock
) const
577 ceph_assert(ceph_mutex_is_locked(image_lock
));
578 return ((features
& in_features
) == in_features
);
581 bool ImageCtx::test_op_features(uint64_t in_op_features
) const
583 std::shared_lock l
{image_lock
};
584 return test_op_features(in_op_features
, image_lock
);
587 bool ImageCtx::test_op_features(uint64_t in_op_features
,
588 const ceph::shared_mutex
&in_image_lock
) const
590 ceph_assert(ceph_mutex_is_locked(image_lock
));
591 return ((op_features
& in_op_features
) == in_op_features
);
594 int ImageCtx::get_flags(librados::snap_t _snap_id
, uint64_t *_flags
) const
596 ceph_assert(ceph_mutex_is_locked(image_lock
));
597 if (_snap_id
== CEPH_NOSNAP
) {
601 const SnapInfo
*info
= get_snap_info(_snap_id
);
603 *_flags
= info
->flags
;
609 int ImageCtx::test_flags(librados::snap_t in_snap_id
,
610 uint64_t flags
, bool *flags_set
) const
612 std::shared_lock l
{image_lock
};
613 return test_flags(in_snap_id
, flags
, image_lock
, flags_set
);
616 int ImageCtx::test_flags(librados::snap_t in_snap_id
,
618 const ceph::shared_mutex
&in_image_lock
,
619 bool *flags_set
) const
621 ceph_assert(ceph_mutex_is_locked(image_lock
));
623 int r
= get_flags(in_snap_id
, &snap_flags
);
627 *flags_set
= ((snap_flags
& flags
) == flags
);
631 int ImageCtx::update_flags(snap_t in_snap_id
, uint64_t flag
, bool enabled
)
633 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
635 if (in_snap_id
== CEPH_NOSNAP
) {
638 map
<snap_t
, SnapInfo
>::iterator it
= snap_info
.find(in_snap_id
);
639 if (it
== snap_info
.end()) {
642 _flags
= &it
->second
.flags
;
653 const ParentImageInfo
* ImageCtx::get_parent_info(snap_t in_snap_id
) const
655 ceph_assert(ceph_mutex_is_locked(image_lock
));
656 if (in_snap_id
== CEPH_NOSNAP
)
658 const SnapInfo
*info
= get_snap_info(in_snap_id
);
660 return &info
->parent
;
664 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id
) const
666 const auto info
= get_parent_info(in_snap_id
);
668 return info
->spec
.pool_id
;
672 string
ImageCtx::get_parent_image_id(snap_t in_snap_id
) const
674 const auto info
= get_parent_info(in_snap_id
);
676 return info
->spec
.image_id
;
680 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id
) const
682 const auto info
= get_parent_info(in_snap_id
);
684 return info
->spec
.snap_id
;
688 int ImageCtx::get_parent_overlap(snap_t in_snap_id
,
689 uint64_t* raw_overlap
) const {
690 const auto info
= get_parent_info(in_snap_id
);
692 *raw_overlap
= info
->overlap
;
698 std::pair
<uint64_t, io::ImageArea
> ImageCtx::reduce_parent_overlap(
699 uint64_t raw_overlap
, bool migration_write
) const {
700 ceph_assert(ceph_mutex_is_locked(image_lock
));
701 if (migration_write
) {
702 // don't reduce migration write overlap -- it may be larger as
703 // it's the largest overlap across snapshots by construction
704 return io::util::raw_to_area_offset(*this, raw_overlap
);
706 if (raw_overlap
== 0 || parent
== nullptr) {
707 // image opened with OPEN_FLAG_SKIP_OPEN_PARENT -> no overlap
708 return io::util::raw_to_area_offset(*this, 0);
710 // DATA area in the parent may be smaller than the part of DATA
711 // area in the clone that is still within the overlap (e.g. for
712 // LUKS2-encrypted parent + LUKS1-encrypted clone, due to LUKS2
713 // header usually being bigger than LUKS1 header)
714 auto overlap
= io::util::raw_to_area_offset(*this, raw_overlap
);
715 std::shared_lock
parent_image_locker(parent
->image_lock
);
716 overlap
.first
= std::min(overlap
.first
,
717 parent
->get_area_size(overlap
.second
));
721 uint64_t ImageCtx::prune_parent_extents(io::Extents
& image_extents
,
723 uint64_t raw_overlap
,
724 bool migration_write
) const {
725 ceph_assert(ceph_mutex_is_locked(image_lock
));
726 ldout(cct
, 10) << __func__
<< ": image_extents=" << image_extents
727 << " area=" << area
<< " raw_overlap=" << raw_overlap
728 << " migration_write=" << migration_write
<< dendl
;
729 if (raw_overlap
== 0) {
730 image_extents
.clear();
734 auto overlap
= reduce_parent_overlap(raw_overlap
, migration_write
);
735 if (area
== overlap
.second
) {
736 // drop extents completely beyond the overlap
737 while (!image_extents
.empty() &&
738 image_extents
.back().first
>= overlap
.first
) {
739 image_extents
.pop_back();
741 if (!image_extents
.empty()) {
742 // trim final overlapping extent
743 auto& last_extent
= image_extents
.back();
744 if (last_extent
.first
+ last_extent
.second
> overlap
.first
) {
745 last_extent
.second
= overlap
.first
- last_extent
.first
;
748 } else if (area
== io::ImageArea::DATA
&&
749 overlap
.second
== io::ImageArea::CRYPTO_HEADER
) {
750 // all extents completely beyond the overlap
751 image_extents
.clear();
753 // all extents completely within the overlap
754 ceph_assert(area
== io::ImageArea::CRYPTO_HEADER
&&
755 overlap
.second
== io::ImageArea::DATA
);
758 uint64_t overlap_bytes
= 0;
759 for (auto [_
, len
] : image_extents
) {
760 overlap_bytes
+= len
;
762 ldout(cct
, 10) << __func__
<< ": overlap=" << overlap
.first
763 << "/" << overlap
.second
764 << " got overlap_bytes=" << overlap_bytes
765 << " at " << image_extents
<< dendl
;
766 return overlap_bytes
;
769 void ImageCtx::register_watch(Context
*on_finish
) {
770 ceph_assert(image_watcher
!= NULL
);
771 image_watcher
->register_watch(on_finish
);
774 void ImageCtx::cancel_async_requests() {
776 cancel_async_requests(&ctx
);
780 void ImageCtx::cancel_async_requests(Context
*on_finish
) {
782 std::lock_guard async_ops_locker
{async_ops_lock
};
783 if (!async_requests
.empty()) {
784 ldout(cct
, 10) << "canceling async requests: count="
785 << async_requests
.size() << dendl
;
786 for (auto req
: async_requests
) {
787 ldout(cct
, 10) << "canceling async request: " << req
<< dendl
;
790 async_requests_waiters
.push_back(on_finish
);
795 on_finish
->complete(0);
798 void ImageCtx::apply_metadata(const std::map
<std::string
, bufferlist
> &meta
,
800 ldout(cct
, 20) << __func__
<< dendl
;
802 std::unique_lock
image_locker(image_lock
);
804 // reset settings back to global defaults
805 config_overrides
.clear();
806 config
.set_config_values(cct
->_conf
.get_config_values());
808 // extract config overrides
809 for (auto meta_pair
: meta
) {
810 if (!boost::starts_with(meta_pair
.first
, METADATA_CONF_PREFIX
)) {
814 std::string key
= meta_pair
.first
.substr(METADATA_CONF_PREFIX
.size());
815 if (!boost::starts_with(key
, "rbd_")) {
816 // ignore non-RBD configuration keys
817 // TODO use option schema to determine applicable subsystem
818 ldout(cct
, 0) << __func__
<< ": ignoring config " << key
<< dendl
;
822 if (config
.find_option(key
) != nullptr) {
823 std::string
val(meta_pair
.second
.c_str(), meta_pair
.second
.length());
824 int r
= config
.set_val(key
, val
);
826 ldout(cct
, 20) << __func__
<< ": " << key
<< "=" << val
<< dendl
;
827 config_overrides
.insert(key
);
829 lderr(cct
) << __func__
<< ": failed to set config " << key
<< " "
830 << "with value " << val
<< ": " << cpp_strerror(r
)
836 image_locker
.unlock();
838 #define ASSIGN_OPTION(param, type) \
839 param = config.get_val<type>("rbd_"#param)
841 bool skip_partial_discard
= true;
842 ASSIGN_OPTION(non_blocking_aio
, bool);
843 ASSIGN_OPTION(cache
, bool);
844 ASSIGN_OPTION(sparse_read_threshold_bytes
, Option::size_t);
845 ASSIGN_OPTION(clone_copy_on_read
, bool);
846 ASSIGN_OPTION(enable_alloc_hint
, bool);
847 ASSIGN_OPTION(mirroring_replay_delay
, uint64_t);
848 ASSIGN_OPTION(mtime_update_interval
, uint64_t);
849 ASSIGN_OPTION(atime_update_interval
, uint64_t);
850 ASSIGN_OPTION(skip_partial_discard
, bool);
851 ASSIGN_OPTION(discard_granularity_bytes
, uint64_t);
852 ASSIGN_OPTION(blkin_trace_all
, bool);
854 auto cache_policy
= config
.get_val
<std::string
>("rbd_cache_policy");
855 if (cache_policy
== "writethrough" || cache_policy
== "writeback") {
856 ASSIGN_OPTION(readahead_max_bytes
, Option::size_t);
857 ASSIGN_OPTION(readahead_disable_after_bytes
, Option::size_t);
862 if (sparse_read_threshold_bytes
== 0) {
863 sparse_read_threshold_bytes
= get_object_size();
866 bool dirty_cache
= test_features(RBD_FEATURE_DIRTY_CACHE
);
867 if (!skip_partial_discard
|| dirty_cache
) {
868 discard_granularity_bytes
= 0;
871 alloc_hint_flags
= 0;
872 auto compression_hint
= config
.get_val
<std::string
>("rbd_compression_hint");
873 if (compression_hint
== "compressible") {
874 alloc_hint_flags
|= librados::ALLOC_HINT_FLAG_COMPRESSIBLE
;
875 } else if (compression_hint
== "incompressible") {
876 alloc_hint_flags
|= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE
;
879 librados::Rados
rados(md_ctx
);
880 int8_t require_osd_release
;
881 int r
= rados
.get_min_compatible_osd(&require_osd_release
);
882 if (r
== 0 && require_osd_release
>= CEPH_RELEASE_OCTOPUS
) {
884 auto read_policy
= config
.get_val
<std::string
>("rbd_read_from_replica_policy");
885 if (read_policy
== "balance") {
886 read_flags
|= librados::OPERATION_BALANCE_READS
;
887 } else if (read_policy
== "localize") {
888 read_flags
|= librados::OPERATION_LOCALIZE_READS
;
892 io_image_dispatcher
->apply_qos_schedule_tick_min(
893 config
.get_val
<uint64_t>("rbd_qos_schedule_tick_min"));
895 io_image_dispatcher
->apply_qos_limit(
896 io::IMAGE_DISPATCH_FLAG_QOS_IOPS_THROTTLE
,
897 config
.get_val
<uint64_t>("rbd_qos_iops_limit"),
898 config
.get_val
<uint64_t>("rbd_qos_iops_burst"),
899 config
.get_val
<uint64_t>("rbd_qos_iops_burst_seconds"));
900 io_image_dispatcher
->apply_qos_limit(
901 io::IMAGE_DISPATCH_FLAG_QOS_BPS_THROTTLE
,
902 config
.get_val
<uint64_t>("rbd_qos_bps_limit"),
903 config
.get_val
<uint64_t>("rbd_qos_bps_burst"),
904 config
.get_val
<uint64_t>("rbd_qos_bps_burst_seconds"));
905 io_image_dispatcher
->apply_qos_limit(
906 io::IMAGE_DISPATCH_FLAG_QOS_READ_IOPS_THROTTLE
,
907 config
.get_val
<uint64_t>("rbd_qos_read_iops_limit"),
908 config
.get_val
<uint64_t>("rbd_qos_read_iops_burst"),
909 config
.get_val
<uint64_t>("rbd_qos_read_iops_burst_seconds"));
910 io_image_dispatcher
->apply_qos_limit(
911 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_IOPS_THROTTLE
,
912 config
.get_val
<uint64_t>("rbd_qos_write_iops_limit"),
913 config
.get_val
<uint64_t>("rbd_qos_write_iops_burst"),
914 config
.get_val
<uint64_t>("rbd_qos_write_iops_burst_seconds"));
915 io_image_dispatcher
->apply_qos_limit(
916 io::IMAGE_DISPATCH_FLAG_QOS_READ_BPS_THROTTLE
,
917 config
.get_val
<uint64_t>("rbd_qos_read_bps_limit"),
918 config
.get_val
<uint64_t>("rbd_qos_read_bps_burst"),
919 config
.get_val
<uint64_t>("rbd_qos_read_bps_burst_seconds"));
920 io_image_dispatcher
->apply_qos_limit(
921 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_BPS_THROTTLE
,
922 config
.get_val
<uint64_t>("rbd_qos_write_bps_limit"),
923 config
.get_val
<uint64_t>("rbd_qos_write_bps_burst"),
924 config
.get_val
<uint64_t>("rbd_qos_write_bps_burst_seconds"));
925 io_image_dispatcher
->apply_qos_exclude_ops(
926 librbd::io::rbd_io_operations_from_string(
927 config
.get_val
<std::string
>("rbd_qos_exclude_ops"), nullptr));
929 if (!disable_zero_copy
&&
930 config
.get_val
<bool>("rbd_disable_zero_copy_writes")) {
931 ldout(cct
, 5) << this << ": disabling zero-copy writes" << dendl
;
932 disable_zero_copy
= true;
936 ExclusiveLock
<ImageCtx
> *ImageCtx::create_exclusive_lock() {
937 return new ExclusiveLock
<ImageCtx
>(*this);
940 ObjectMap
<ImageCtx
> *ImageCtx::create_object_map(uint64_t snap_id
) {
941 return new ObjectMap
<ImageCtx
>(*this, snap_id
);
944 Journal
<ImageCtx
> *ImageCtx::create_journal() {
945 return new Journal
<ImageCtx
>(*this);
948 void ImageCtx::set_image_name(const std::string
&image_name
) {
949 // update the name so rename can be invoked repeatedly
950 std::shared_lock owner_locker
{owner_lock
};
951 std::unique_lock image_locker
{image_lock
};
954 header_oid
= util::old_header_name(image_name
);
958 void ImageCtx::notify_update() {
959 state
->handle_update_notification();
960 ImageWatcher
<>::notify_header_update(md_ctx
, header_oid
);
963 void ImageCtx::notify_update(Context
*on_finish
) {
964 state
->handle_update_notification();
965 image_watcher
->notify_header_update(on_finish
);
968 exclusive_lock::Policy
*ImageCtx::get_exclusive_lock_policy() const {
969 ceph_assert(ceph_mutex_is_locked(owner_lock
));
970 ceph_assert(exclusive_lock_policy
!= nullptr);
971 return exclusive_lock_policy
;
974 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy
*policy
) {
975 ceph_assert(ceph_mutex_is_wlocked(owner_lock
));
976 ceph_assert(policy
!= nullptr);
977 delete exclusive_lock_policy
;
978 exclusive_lock_policy
= policy
;
981 journal::Policy
*ImageCtx::get_journal_policy() const {
982 ceph_assert(ceph_mutex_is_locked(image_lock
));
983 ceph_assert(journal_policy
!= nullptr);
984 return journal_policy
;
987 void ImageCtx::set_journal_policy(journal::Policy
*policy
) {
988 ceph_assert(ceph_mutex_is_wlocked(image_lock
));
989 ceph_assert(policy
!= nullptr);
990 delete journal_policy
;
991 journal_policy
= policy
;
994 void ImageCtx::rebuild_data_io_context() {
995 auto ctx
= std::make_shared
<neorados::IOContext
>(
996 data_ctx
.get_id(), data_ctx
.get_namespace());
997 if (snap_id
!= CEPH_NOSNAP
) {
998 ctx
->read_snap(snap_id
);
1000 if (!snapc
.snaps
.empty()) {
1001 ctx
->write_snap_context(
1002 {{snapc
.seq
, {snapc
.snaps
.begin(), snapc
.snaps
.end()}}});
1004 if (data_ctx
.get_pool_full_try()) {
1005 ctx
->full_try(true);
1008 // atomically reset the data IOContext to new version
1009 atomic_store(&data_io_context
, ctx
);
1012 IOContext
ImageCtx::get_data_io_context() const {
1013 return atomic_load(&data_io_context
);
1016 IOContext
ImageCtx::duplicate_data_io_context() const {
1017 auto ctx
= get_data_io_context();
1018 return std::make_shared
<neorados::IOContext
>(*ctx
);
1021 void ImageCtx::get_timer_instance(CephContext
*cct
, SafeTimer
**timer
,
1022 ceph::mutex
**timer_lock
) {
1023 auto safe_timer_singleton
=
1024 &cct
->lookup_or_create_singleton_object
<SafeTimerSingleton
>(
1025 "librbd::journal::safe_timer", false, cct
);
1026 *timer
= safe_timer_singleton
;
1027 *timer_lock
= &safe_timer_singleton
->lock
;