1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include <boost/assign/list_of.hpp>
7 #include "common/ceph_context.h"
8 #include "common/dout.h"
9 #include "common/errno.h"
10 #include "common/perf_counters.h"
11 #include "common/WorkQueue.h"
12 #include "common/Timer.h"
14 #include "librbd/AsyncRequest.h"
15 #include "librbd/ExclusiveLock.h"
16 #include "librbd/internal.h"
17 #include "librbd/ImageCtx.h"
18 #include "librbd/ImageState.h"
19 #include "librbd/ImageWatcher.h"
20 #include "librbd/Journal.h"
21 #include "librbd/LibrbdAdminSocketHook.h"
22 #include "librbd/ObjectMap.h"
23 #include "librbd/Operations.h"
24 #include "librbd/operation/ResizeRequest.h"
25 #include "librbd/Types.h"
26 #include "librbd/Utils.h"
27 #include "librbd/LibrbdWriteback.h"
28 #include "librbd/exclusive_lock/AutomaticPolicy.h"
29 #include "librbd/exclusive_lock/StandardPolicy.h"
30 #include "librbd/io/AioCompletion.h"
31 #include "librbd/io/AsyncOperation.h"
32 #include "librbd/io/ImageRequestWQ.h"
33 #include "librbd/journal/StandardPolicy.h"
35 #include "osdc/Striper.h"
36 #include <boost/bind.hpp>
38 #define dout_subsys ceph_subsys_rbd
40 #define dout_prefix *_dout << "librbd::ImageCtx: "
48 using ceph::bufferlist
;
49 using librados::snap_t
;
50 using librados::IoCtx
;
56 class ThreadPoolSingleton
: public ThreadPool
{
58 ContextWQ
*op_work_queue
;
60 explicit ThreadPoolSingleton(CephContext
*cct
)
61 : ThreadPool(cct
, "librbd::thread_pool", "tp_librbd", 1,
63 op_work_queue(new ContextWQ("librbd::op_work_queue",
64 cct
->_conf
->get_val
<int64_t>("rbd_op_thread_timeout"),
68 ~ThreadPoolSingleton() override
{
69 op_work_queue
->drain();
76 class SafeTimerSingleton
: public SafeTimer
{
80 explicit SafeTimerSingleton(CephContext
*cct
)
81 : SafeTimer(cct
, lock
, true),
82 lock("librbd::Journal::SafeTimerSingleton::lock") {
85 ~SafeTimerSingleton() {
86 Mutex::Locker
locker(lock
);
91 struct C_FlushCache
: public Context
{
95 C_FlushCache(ImageCtx
*_image_ctx
, Context
*_on_safe
)
96 : image_ctx(_image_ctx
), on_safe(_on_safe
) {
98 void finish(int r
) override
{
99 // successful cache flush indicates all IO is now safe
100 image_ctx
->flush_cache(on_safe
);
104 struct C_ShutDownCache
: public Context
{
108 C_ShutDownCache(ImageCtx
*_image_ctx
, Context
*_on_finish
)
109 : image_ctx(_image_ctx
), on_finish(_on_finish
) {
111 void finish(int r
) override
{
112 image_ctx
->object_cacher
->stop();
113 on_finish
->complete(r
);
117 struct C_InvalidateCache
: public Context
{
123 C_InvalidateCache(ImageCtx
*_image_ctx
, bool _purge_on_error
,
124 bool _reentrant_safe
, Context
*_on_finish
)
125 : image_ctx(_image_ctx
), purge_on_error(_purge_on_error
),
126 reentrant_safe(_reentrant_safe
), on_finish(_on_finish
) {
128 void finish(int r
) override
{
129 assert(image_ctx
->cache_lock
.is_locked());
130 CephContext
*cct
= image_ctx
->cct
;
132 if (r
== -EBLACKLISTED
) {
133 lderr(cct
) << "Blacklisted during flush! Purging cache..." << dendl
;
134 image_ctx
->object_cacher
->purge_set(image_ctx
->object_set
);
135 } else if (r
!= 0 && purge_on_error
) {
136 lderr(cct
) << "invalidate cache encountered error "
137 << cpp_strerror(r
) << " !Purging cache..." << dendl
;
138 image_ctx
->object_cacher
->purge_set(image_ctx
->object_set
);
140 lderr(cct
) << "flush_cache returned " << r
<< dendl
;
143 loff_t unclean
= image_ctx
->object_cacher
->release_set(
144 image_ctx
->object_set
);
148 lderr(cct
) << "could not release all objects from cache: "
149 << unclean
<< " bytes remain" << dendl
;
155 if (reentrant_safe
) {
156 on_finish
->complete(r
);
158 image_ctx
->op_work_queue
->queue(on_finish
, r
);
164 } // anonymous namespace
166 const string
ImageCtx::METADATA_CONF_PREFIX
= "conf_";
168 ImageCtx::ImageCtx(const string
&image_name
, const string
&image_id
,
169 const char *snap
, IoCtx
& p
, bool ro
)
170 : cct((CephContext
*)p
.cct()),
172 snap_id(CEPH_NOSNAP
),
175 flush_encountered(false),
176 exclusive_locked(false),
180 owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
181 md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
182 cache_lock(util::unique_lock_name("librbd::ImageCtx::cache_lock", this)),
183 snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
184 parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
185 object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
186 async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
187 copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
188 completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
191 order(0), size(0), features(0),
193 id(image_id
), parent(NULL
),
194 stripe_unit(0), stripe_count(0), flags(0),
195 object_cacher(NULL
), writeback_handler(NULL
), object_set(NULL
),
198 state(new ImageState
<>(this)),
199 operations(new Operations
<>(*this)),
200 exclusive_lock(nullptr), object_map(nullptr),
201 io_work_queue(nullptr), op_work_queue(nullptr),
203 trace_endpoint("librbd")
210 memset(&header
, 0, sizeof(header
));
212 ThreadPool
*thread_pool
;
213 get_thread_pool_instance(cct
, &thread_pool
, &op_work_queue
);
214 io_work_queue
= new io::ImageRequestWQ
<>(
215 this, "librbd::io_work_queue",
216 cct
->_conf
->get_val
<int64_t>("rbd_op_thread_timeout"),
219 if (cct
->_conf
->get_val
<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
220 exclusive_lock_policy
= new exclusive_lock::AutomaticPolicy(this);
222 exclusive_lock_policy
= new exclusive_lock::StandardPolicy(this);
224 journal_policy
= new journal::StandardPolicy
<ImageCtx
>(this);
227 ImageCtx::~ImageCtx() {
228 assert(image_watcher
== NULL
);
229 assert(exclusive_lock
== NULL
);
230 assert(object_map
== NULL
);
231 assert(journal
== NULL
);
232 assert(asok_hook
== NULL
);
238 delete object_cacher
;
239 object_cacher
= NULL
;
241 if (writeback_handler
) {
242 delete writeback_handler
;
243 writeback_handler
= NULL
;
249 delete[] format_string
;
252 data_ctx
.aio_flush();
253 io_work_queue
->drain();
255 delete journal_policy
;
256 delete exclusive_lock_policy
;
257 delete io_work_queue
;
262 void ImageCtx::init() {
263 assert(!header_oid
.empty());
264 assert(old_format
|| !id
.empty());
266 asok_hook
= new LibrbdAdminSocketHook(this);
268 string pname
= string("librbd-") + id
+ string("-") +
269 data_ctx
.get_pool_name() + string("-") + name
;
270 if (!snap_name
.empty()) {
275 trace_endpoint
.copy_name(pname
);
279 Mutex::Locker
l(cache_lock
);
280 ldout(cct
, 20) << "enabling caching..." << dendl
;
281 writeback_handler
= new LibrbdWriteback(this, cache_lock
);
283 uint64_t init_max_dirty
= cache_max_dirty
;
284 if (cache_writethrough_until_flush
)
286 ldout(cct
, 20) << "Initial cache settings:"
287 << " size=" << cache_size
288 << " num_objects=" << 10
289 << " max_dirty=" << init_max_dirty
290 << " target_dirty=" << cache_target_dirty
292 << cache_max_dirty_age
<< dendl
;
294 object_cacher
= new ObjectCacher(cct
, pname
, *writeback_handler
, cache_lock
,
297 10, /* reset this in init */
301 cache_block_writes_upfront
);
303 // size object cache appropriately
304 uint64_t obj
= cache_max_dirty_object
;
306 obj
= MIN(2000, MAX(10, cache_size
/ 100 / sizeof(ObjectCacher::Object
)));
308 ldout(cct
, 10) << " cache bytes " << cache_size
309 << " -> about " << obj
<< " objects" << dendl
;
310 object_cacher
->set_max_objects(obj
);
312 object_set
= new ObjectCacher::ObjectSet(NULL
, data_ctx
.get_id(), 0);
313 object_set
->return_enoent
= true;
314 object_cacher
->start();
317 readahead
.set_trigger_requests(readahead_trigger_requests
);
318 readahead
.set_max_readahead_size(readahead_max_bytes
);
321 void ImageCtx::shutdown() {
322 delete image_watcher
;
323 image_watcher
= nullptr;
329 void ImageCtx::init_layout()
331 if (stripe_unit
== 0 || stripe_count
== 0) {
332 stripe_unit
= 1ull << order
;
336 vector
<uint64_t> alignments
;
337 alignments
.push_back(stripe_count
<< order
); // object set (in file striping terminology)
338 alignments
.push_back(stripe_unit
* stripe_count
); // stripe
339 alignments
.push_back(stripe_unit
); // stripe unit
340 readahead
.set_alignments(alignments
);
342 layout
= file_layout_t();
343 layout
.stripe_unit
= stripe_unit
;
344 layout
.stripe_count
= stripe_count
;
345 layout
.object_size
= 1ull << order
;
346 layout
.pool_id
= data_ctx
.get_id(); // FIXME: pool id overflow?
348 delete[] format_string
;
349 size_t len
= object_prefix
.length() + 16;
350 format_string
= new char[len
];
352 snprintf(format_string
, len
, "%s.%%012llx", object_prefix
.c_str());
354 snprintf(format_string
, len
, "%s.%%016llx", object_prefix
.c_str());
357 ldout(cct
, 10) << "init_layout stripe_unit " << stripe_unit
358 << " stripe_count " << stripe_count
359 << " object_size " << layout
.object_size
360 << " prefix " << object_prefix
361 << " format " << format_string
365 void ImageCtx::perf_start(string name
) {
366 auto perf_prio
= PerfCountersBuilder::PRIO_DEBUGONLY
;
367 if (child
== nullptr) {
368 // ensure top-level IO stats are exported for librbd daemons
369 perf_prio
= PerfCountersBuilder::PRIO_USEFUL
;
372 PerfCountersBuilder
plb(cct
, name
, l_librbd_first
, l_librbd_last
);
374 plb
.add_u64_counter(l_librbd_rd
, "rd", "Reads", "r", perf_prio
);
375 plb
.add_u64_counter(l_librbd_rd_bytes
, "rd_bytes", "Data size in reads",
376 "rb", perf_prio
, unit_t(BYTES
));
377 plb
.add_time_avg(l_librbd_rd_latency
, "rd_latency", "Latency of reads",
379 plb
.add_u64_counter(l_librbd_wr
, "wr", "Writes", "w", perf_prio
);
380 plb
.add_u64_counter(l_librbd_wr_bytes
, "wr_bytes", "Written data",
381 "wb", perf_prio
, unit_t(BYTES
));
382 plb
.add_time_avg(l_librbd_wr_latency
, "wr_latency", "Write latency",
384 plb
.add_u64_counter(l_librbd_discard
, "discard", "Discards");
385 plb
.add_u64_counter(l_librbd_discard_bytes
, "discard_bytes", "Discarded data", NULL
, 0, unit_t(BYTES
));
386 plb
.add_time_avg(l_librbd_discard_latency
, "discard_latency", "Discard latency");
387 plb
.add_u64_counter(l_librbd_flush
, "flush", "Flushes");
388 plb
.add_u64_counter(l_librbd_aio_flush
, "aio_flush", "Async flushes");
389 plb
.add_time_avg(l_librbd_aio_flush_latency
, "aio_flush_latency", "Latency of async flushes");
390 plb
.add_u64_counter(l_librbd_ws
, "ws", "WriteSames");
391 plb
.add_u64_counter(l_librbd_ws_bytes
, "ws_bytes", "WriteSame data", NULL
, 0, unit_t(BYTES
));
392 plb
.add_time_avg(l_librbd_ws_latency
, "ws_latency", "WriteSame latency");
393 plb
.add_u64_counter(l_librbd_cmp
, "cmp", "CompareAndWrites");
394 plb
.add_u64_counter(l_librbd_cmp_bytes
, "cmp_bytes", "Data size in cmps", NULL
, 0, unit_t(BYTES
));
395 plb
.add_time_avg(l_librbd_cmp_latency
, "cmp_latency", "Latency of cmps");
396 plb
.add_u64_counter(l_librbd_snap_create
, "snap_create", "Snap creations");
397 plb
.add_u64_counter(l_librbd_snap_remove
, "snap_remove", "Snap removals");
398 plb
.add_u64_counter(l_librbd_snap_rollback
, "snap_rollback", "Snap rollbacks");
399 plb
.add_u64_counter(l_librbd_snap_rename
, "snap_rename", "Snap rename");
400 plb
.add_u64_counter(l_librbd_notify
, "notify", "Updated header notifications");
401 plb
.add_u64_counter(l_librbd_resize
, "resize", "Resizes");
402 plb
.add_u64_counter(l_librbd_readahead
, "readahead", "Read ahead");
403 plb
.add_u64_counter(l_librbd_readahead_bytes
, "readahead_bytes", "Data size in read ahead", NULL
, 0, unit_t(BYTES
));
404 plb
.add_u64_counter(l_librbd_invalidate_cache
, "invalidate_cache", "Cache invalidates");
406 plb
.add_time(l_librbd_opened_time
, "opened_time", "Opened time",
408 plb
.add_time(l_librbd_lock_acquired_time
, "lock_acquired_time",
409 "Lock acquired time", "lats", perf_prio
);
411 perfcounter
= plb
.create_perf_counters();
412 cct
->get_perfcounters_collection()->add(perfcounter
);
414 perfcounter
->tset(l_librbd_opened_time
, ceph_clock_now());
417 void ImageCtx::perf_stop() {
419 cct
->get_perfcounters_collection()->remove(perfcounter
);
423 void ImageCtx::set_read_flag(unsigned flag
) {
424 extra_read_flags
|= flag
;
427 int ImageCtx::get_read_flags(snap_t snap_id
) {
428 int flags
= librados::OPERATION_NOFLAG
| extra_read_flags
;
429 if (snap_id
== LIBRADOS_SNAP_HEAD
)
432 if (balance_snap_reads
)
433 flags
|= librados::OPERATION_BALANCE_READS
;
434 else if (localize_snap_reads
)
435 flags
|= librados::OPERATION_LOCALIZE_READS
;
439 int ImageCtx::snap_set(cls::rbd::SnapshotNamespace in_snap_namespace
,
442 assert(snap_lock
.is_wlocked());
443 snap_t in_snap_id
= get_snap_id(in_snap_namespace
, in_snap_name
);
444 if (in_snap_id
!= CEPH_NOSNAP
) {
445 snap_id
= in_snap_id
;
446 snap_namespace
= in_snap_namespace
;
447 snap_name
= in_snap_name
;
449 data_ctx
.snap_set_read(snap_id
);
455 void ImageCtx::snap_unset()
457 assert(snap_lock
.is_wlocked());
458 snap_id
= CEPH_NOSNAP
;
462 data_ctx
.snap_set_read(snap_id
);
465 snap_t
ImageCtx::get_snap_id(cls::rbd::SnapshotNamespace in_snap_namespace
,
466 string in_snap_name
) const
468 assert(snap_lock
.is_locked());
469 auto it
= snap_ids
.find({in_snap_namespace
, in_snap_name
});
470 if (it
!= snap_ids
.end())
475 const SnapInfo
* ImageCtx::get_snap_info(snap_t in_snap_id
) const
477 assert(snap_lock
.is_locked());
478 map
<snap_t
, SnapInfo
>::const_iterator it
=
479 snap_info
.find(in_snap_id
);
480 if (it
!= snap_info
.end())
485 int ImageCtx::get_snap_name(snap_t in_snap_id
,
486 string
*out_snap_name
) const
488 assert(snap_lock
.is_locked());
489 const SnapInfo
*info
= get_snap_info(in_snap_id
);
491 *out_snap_name
= info
->name
;
497 int ImageCtx::get_snap_namespace(snap_t in_snap_id
,
498 cls::rbd::SnapshotNamespace
*out_snap_namespace
) const
500 assert(snap_lock
.is_locked());
501 const SnapInfo
*info
= get_snap_info(in_snap_id
);
503 *out_snap_namespace
= info
->snap_namespace
;
509 int ImageCtx::get_parent_spec(snap_t in_snap_id
,
510 ParentSpec
*out_pspec
) const
512 const SnapInfo
*info
= get_snap_info(in_snap_id
);
514 *out_pspec
= info
->parent
.spec
;
520 uint64_t ImageCtx::get_current_size() const
522 assert(snap_lock
.is_locked());
526 uint64_t ImageCtx::get_object_size() const
528 return 1ull << order
;
531 string
ImageCtx::get_object_name(uint64_t num
) const {
532 char buf
[object_prefix
.length() + 32];
533 snprintf(buf
, sizeof(buf
), format_string
, num
);
537 uint64_t ImageCtx::get_stripe_unit() const
542 uint64_t ImageCtx::get_stripe_count() const
547 uint64_t ImageCtx::get_stripe_period() const
549 return stripe_count
* (1ull << order
);
552 utime_t
ImageCtx::get_create_timestamp() const
554 return create_timestamp
;
557 int ImageCtx::is_snap_protected(snap_t in_snap_id
,
558 bool *is_protected
) const
560 assert(snap_lock
.is_locked());
561 const SnapInfo
*info
= get_snap_info(in_snap_id
);
564 (info
->protection_status
== RBD_PROTECTION_STATUS_PROTECTED
);
570 int ImageCtx::is_snap_unprotected(snap_t in_snap_id
,
571 bool *is_unprotected
) const
573 assert(snap_lock
.is_locked());
574 const SnapInfo
*info
= get_snap_info(in_snap_id
);
577 (info
->protection_status
== RBD_PROTECTION_STATUS_UNPROTECTED
);
583 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace
,
585 snap_t id
, uint64_t in_size
,
586 const ParentInfo
&parent
, uint8_t protection_status
,
587 uint64_t flags
, utime_t timestamp
)
589 assert(snap_lock
.is_wlocked());
591 SnapInfo
info(in_snap_name
, in_snap_namespace
,
592 in_size
, parent
, protection_status
, flags
, timestamp
);
593 snap_info
.insert({id
, info
});
594 snap_ids
.insert({{in_snap_namespace
, in_snap_name
}, id
});
597 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace
,
601 assert(snap_lock
.is_wlocked());
602 snaps
.erase(std::remove(snaps
.begin(), snaps
.end(), id
), snaps
.end());
604 snap_ids
.erase({in_snap_namespace
, in_snap_name
});
607 uint64_t ImageCtx::get_image_size(snap_t in_snap_id
) const
609 assert(snap_lock
.is_locked());
610 if (in_snap_id
== CEPH_NOSNAP
) {
611 if (!resize_reqs
.empty() &&
612 resize_reqs
.front()->shrinking()) {
613 return resize_reqs
.front()->get_image_size();
618 const SnapInfo
*info
= get_snap_info(in_snap_id
);
625 uint64_t ImageCtx::get_object_count(snap_t in_snap_id
) const {
626 assert(snap_lock
.is_locked());
627 uint64_t image_size
= get_image_size(in_snap_id
);
628 return Striper::get_num_objects(layout
, image_size
);
631 bool ImageCtx::test_features(uint64_t features
) const
633 RWLock::RLocker
l(snap_lock
);
634 return test_features(features
, snap_lock
);
637 bool ImageCtx::test_features(uint64_t in_features
,
638 const RWLock
&in_snap_lock
) const
640 assert(snap_lock
.is_locked());
641 return ((features
& in_features
) == in_features
);
644 int ImageCtx::get_flags(librados::snap_t _snap_id
, uint64_t *_flags
) const
646 assert(snap_lock
.is_locked());
647 if (_snap_id
== CEPH_NOSNAP
) {
651 const SnapInfo
*info
= get_snap_info(_snap_id
);
653 *_flags
= info
->flags
;
659 int ImageCtx::test_flags(librados::snap_t in_snap_id
,
660 uint64_t flags
, bool *flags_set
) const
662 RWLock::RLocker
l(snap_lock
);
663 return test_flags(in_snap_id
, flags
, snap_lock
, flags_set
);
666 int ImageCtx::test_flags(librados::snap_t in_snap_id
,
667 uint64_t flags
, const RWLock
&in_snap_lock
,
668 bool *flags_set
) const
670 assert(snap_lock
.is_locked());
672 int r
= get_flags(in_snap_id
, &snap_flags
);
676 *flags_set
= ((snap_flags
& flags
) == flags
);
680 int ImageCtx::update_flags(snap_t in_snap_id
, uint64_t flag
, bool enabled
)
682 assert(snap_lock
.is_wlocked());
684 if (in_snap_id
== CEPH_NOSNAP
) {
687 map
<snap_t
, SnapInfo
>::iterator it
= snap_info
.find(in_snap_id
);
688 if (it
== snap_info
.end()) {
691 _flags
= &it
->second
.flags
;
702 const ParentInfo
* ImageCtx::get_parent_info(snap_t in_snap_id
) const
704 assert(snap_lock
.is_locked());
705 assert(parent_lock
.is_locked());
706 if (in_snap_id
== CEPH_NOSNAP
)
708 const SnapInfo
*info
= get_snap_info(in_snap_id
);
710 return &info
->parent
;
714 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id
) const
716 const ParentInfo
*info
= get_parent_info(in_snap_id
);
718 return info
->spec
.pool_id
;
722 string
ImageCtx::get_parent_image_id(snap_t in_snap_id
) const
724 const ParentInfo
*info
= get_parent_info(in_snap_id
);
726 return info
->spec
.image_id
;
730 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id
) const
732 const ParentInfo
*info
= get_parent_info(in_snap_id
);
734 return info
->spec
.snap_id
;
738 int ImageCtx::get_parent_overlap(snap_t in_snap_id
, uint64_t *overlap
) const
740 assert(snap_lock
.is_locked());
741 const ParentInfo
*info
= get_parent_info(in_snap_id
);
743 *overlap
= info
->overlap
;
749 void ImageCtx::aio_read_from_cache(object_t o
, uint64_t object_no
,
750 bufferlist
*bl
, size_t len
,
751 uint64_t off
, Context
*onfinish
,
752 int fadvise_flags
, ZTracer::Trace
*trace
) {
753 snap_lock
.get_read();
754 ObjectCacher::OSDRead
*rd
= object_cacher
->prepare_read(snap_id
, bl
, fadvise_flags
);
755 snap_lock
.put_read();
756 ObjectExtent
extent(o
, object_no
, off
, len
, 0);
757 extent
.oloc
.pool
= data_ctx
.get_id();
758 extent
.buffer_extents
.push_back(make_pair(0, len
));
759 rd
->extents
.push_back(extent
);
761 int r
= object_cacher
->readx(rd
, object_set
, onfinish
, trace
);
764 onfinish
->complete(r
);
767 void ImageCtx::write_to_cache(object_t o
, const bufferlist
& bl
, size_t len
,
768 uint64_t off
, Context
*onfinish
,
769 int fadvise_flags
, uint64_t journal_tid
,
770 ZTracer::Trace
*trace
) {
771 snap_lock
.get_read();
772 ObjectCacher::OSDWrite
*wr
= object_cacher
->prepare_write(
773 snapc
, bl
, ceph::real_time::min(), fadvise_flags
, journal_tid
);
774 snap_lock
.put_read();
775 ObjectExtent
extent(o
, 0, off
, len
, 0);
776 extent
.oloc
.pool
= data_ctx
.get_id();
777 // XXX: nspace is always default, io_ctx_impl field private
778 //extent.oloc.nspace = data_ctx.io_ctx_impl->oloc.nspace;
779 extent
.buffer_extents
.push_back(make_pair(0, len
));
780 wr
->extents
.push_back(extent
);
782 Mutex::Locker
l(cache_lock
);
783 object_cacher
->writex(wr
, object_set
, onfinish
, trace
);
787 void ImageCtx::user_flushed() {
788 if (object_cacher
&& cache_writethrough_until_flush
) {
790 bool flushed_before
= flush_encountered
;
793 uint64_t max_dirty
= cache_max_dirty
;
794 if (!flushed_before
&& max_dirty
> 0) {
796 flush_encountered
= true;
799 ldout(cct
, 10) << "saw first user flush, enabling writeback" << dendl
;
800 Mutex::Locker
l(cache_lock
);
801 object_cacher
->set_max_dirty(max_dirty
);
806 void ImageCtx::flush_cache(Context
*onfinish
) {
808 object_cacher
->flush_set(object_set
, onfinish
);
812 void ImageCtx::shut_down_cache(Context
*on_finish
) {
813 if (object_cacher
== NULL
) {
814 on_finish
->complete(0);
819 object_cacher
->release_set(object_set
);
822 C_ShutDownCache
*shut_down
= new C_ShutDownCache(this, on_finish
);
823 flush_cache(new C_InvalidateCache(this, true, false, shut_down
));
826 int ImageCtx::invalidate_cache(bool purge_on_error
) {
827 flush_async_operations();
828 if (object_cacher
== NULL
) {
833 object_cacher
->release_set(object_set
);
837 flush_cache(new C_InvalidateCache(this, purge_on_error
, true, &ctx
));
839 int result
= ctx
.wait();
843 void ImageCtx::invalidate_cache(bool purge_on_error
, Context
*on_finish
) {
844 if (object_cacher
== NULL
) {
845 op_work_queue
->queue(on_finish
, 0);
850 object_cacher
->release_set(object_set
);
853 flush_cache(new C_InvalidateCache(this, purge_on_error
, false, on_finish
));
856 void ImageCtx::clear_nonexistence_cache() {
857 assert(cache_lock
.is_locked());
860 object_cacher
->clear_nonexistence(object_set
);
863 bool ImageCtx::is_cache_empty() {
864 Mutex::Locker
locker(cache_lock
);
865 return object_cacher
->set_is_empty(object_set
);
868 void ImageCtx::register_watch(Context
*on_finish
) {
869 assert(image_watcher
== NULL
);
870 image_watcher
= new ImageWatcher
<>(*this);
871 image_watcher
->register_watch(on_finish
);
874 uint64_t ImageCtx::prune_parent_extents(vector
<pair
<uint64_t,uint64_t> >& objectx
,
877 // drop extents completely beyond the overlap
878 while (!objectx
.empty() && objectx
.back().first
>= overlap
)
881 // trim final overlapping extent
882 if (!objectx
.empty() && objectx
.back().first
+ objectx
.back().second
> overlap
)
883 objectx
.back().second
= overlap
- objectx
.back().first
;
886 for (vector
<pair
<uint64_t,uint64_t> >::iterator p
= objectx
.begin();
890 ldout(cct
, 10) << "prune_parent_extents image overlap " << overlap
891 << ", object overlap " << len
892 << " from image extents " << objectx
<< dendl
;
896 void ImageCtx::flush_async_operations() {
898 flush_async_operations(&ctx
);
902 void ImageCtx::flush_async_operations(Context
*on_finish
) {
904 Mutex::Locker
l(async_ops_lock
);
905 if (!async_ops
.empty()) {
906 ldout(cct
, 20) << "flush async operations: " << on_finish
<< " "
907 << "count=" << async_ops
.size() << dendl
;
908 async_ops
.front()->add_flush_context(on_finish
);
912 on_finish
->complete(0);
915 int ImageCtx::flush() {
916 C_SaferCond cond_ctx
;
918 return cond_ctx
.wait();
921 void ImageCtx::flush(Context
*on_safe
) {
922 // ensure no locks are held when flush is complete
923 on_safe
= util::create_async_context_callback(*this, on_safe
);
925 if (object_cacher
!= NULL
) {
926 // flush cache after completing all in-flight AIO ops
927 on_safe
= new C_FlushCache(this, on_safe
);
929 flush_async_operations(on_safe
);
932 void ImageCtx::cancel_async_requests() {
934 cancel_async_requests(&ctx
);
938 void ImageCtx::cancel_async_requests(Context
*on_finish
) {
940 Mutex::Locker
async_ops_locker(async_ops_lock
);
941 if (!async_requests
.empty()) {
942 ldout(cct
, 10) << "canceling async requests: count="
943 << async_requests
.size() << dendl
;
944 for (auto req
: async_requests
) {
945 ldout(cct
, 10) << "canceling async request: " << req
<< dendl
;
948 async_requests_waiters
.push_back(on_finish
);
953 on_finish
->complete(0);
956 void ImageCtx::clear_pending_completions() {
957 Mutex::Locker
l(completed_reqs_lock
);
958 ldout(cct
, 10) << "clear pending AioCompletion: count="
959 << completed_reqs
.size() << dendl
;
960 completed_reqs
.clear();
963 bool ImageCtx::_filter_metadata_confs(const string
&prefix
,
964 map
<string
, bool> &configs
,
965 const map
<string
, bufferlist
> &pairs
,
966 map
<string
, bufferlist
> *res
) {
967 size_t conf_prefix_len
= prefix
.size();
969 for (auto it
: pairs
) {
970 if (it
.first
.compare(0, MIN(conf_prefix_len
, it
.first
.size()), prefix
) > 0)
973 if (it
.first
.size() <= conf_prefix_len
)
976 string key
= it
.first
.substr(conf_prefix_len
, it
.first
.size() - conf_prefix_len
);
977 auto cit
= configs
.find(key
);
978 if (cit
!= configs
.end()) {
980 res
->insert(make_pair(key
, it
.second
));
986 void ImageCtx::apply_metadata(const std::map
<std::string
, bufferlist
> &meta
,
988 ldout(cct
, 20) << __func__
<< dendl
;
989 std::map
<string
, bool> configs
= boost::assign::map_list_of(
990 "rbd_non_blocking_aio", false)(
992 "rbd_cache_writethrough_until_flush", false)(
993 "rbd_cache_size", false)(
994 "rbd_cache_max_dirty", false)(
995 "rbd_cache_target_dirty", false)(
996 "rbd_cache_max_dirty_age", false)(
997 "rbd_cache_max_dirty_object", false)(
998 "rbd_cache_block_writes_upfront", false)(
999 "rbd_concurrent_management_ops", false)(
1000 "rbd_balance_snap_reads", false)(
1001 "rbd_localize_snap_reads", false)(
1002 "rbd_balance_parent_reads", false)(
1003 "rbd_localize_parent_reads", false)(
1004 "rbd_sparse_read_threshold_bytes", false)(
1005 "rbd_readahead_trigger_requests", false)(
1006 "rbd_readahead_max_bytes", false)(
1007 "rbd_readahead_disable_after_bytes", false)(
1008 "rbd_clone_copy_on_read", false)(
1009 "rbd_blacklist_on_break_lock", false)(
1010 "rbd_blacklist_expire_seconds", false)(
1011 "rbd_request_timed_out_seconds", false)(
1012 "rbd_journal_order", false)(
1013 "rbd_journal_splay_width", false)(
1014 "rbd_journal_commit_age", false)(
1015 "rbd_journal_object_flush_interval", false)(
1016 "rbd_journal_object_flush_bytes", false)(
1017 "rbd_journal_object_flush_age", false)(
1018 "rbd_journal_pool", false)(
1019 "rbd_journal_max_payload_bytes", false)(
1020 "rbd_journal_max_concurrent_object_sets", false)(
1021 "rbd_mirroring_resync_after_disconnect", false)(
1022 "rbd_mirroring_replay_delay", false)(
1023 "rbd_skip_partial_discard", false);
1025 md_config_t local_config_t
;
1026 std::map
<std::string
, bufferlist
> res
;
1028 _filter_metadata_confs(METADATA_CONF_PREFIX
, configs
, meta
, &res
);
1029 for (auto it
: res
) {
1030 std::string
val(it
.second
.c_str(), it
.second
.length());
1031 int j
= local_config_t
.set_val(it
.first
.c_str(), val
);
1033 lderr(cct
) << __func__
<< " failed to set config " << it
.first
1034 << " with value " << it
.second
.c_str() << ": " << j
1039 #define ASSIGN_OPTION(config, type) \
1041 string key = "rbd_"; \
1042 key = key + #config; \
1044 config = local_config_t.get_val<type>("rbd_"#config); \
1046 config = cct->_conf->get_val<type>("rbd_"#config); \
1049 ASSIGN_OPTION(non_blocking_aio
, bool);
1050 ASSIGN_OPTION(cache
, bool);
1051 ASSIGN_OPTION(cache_writethrough_until_flush
, bool);
1052 ASSIGN_OPTION(cache_size
, int64_t);
1053 ASSIGN_OPTION(cache_max_dirty
, int64_t);
1054 ASSIGN_OPTION(cache_target_dirty
, int64_t);
1055 ASSIGN_OPTION(cache_max_dirty_age
, double);
1056 ASSIGN_OPTION(cache_max_dirty_object
, int64_t);
1057 ASSIGN_OPTION(cache_block_writes_upfront
, bool);
1058 ASSIGN_OPTION(concurrent_management_ops
, int64_t);
1059 ASSIGN_OPTION(balance_snap_reads
, bool);
1060 ASSIGN_OPTION(localize_snap_reads
, bool);
1061 ASSIGN_OPTION(balance_parent_reads
, bool);
1062 ASSIGN_OPTION(localize_parent_reads
, bool);
1063 ASSIGN_OPTION(sparse_read_threshold_bytes
, uint64_t);
1064 ASSIGN_OPTION(readahead_trigger_requests
, int64_t);
1065 ASSIGN_OPTION(readahead_max_bytes
, int64_t);
1066 ASSIGN_OPTION(readahead_disable_after_bytes
, int64_t);
1067 ASSIGN_OPTION(clone_copy_on_read
, bool);
1068 ASSIGN_OPTION(blacklist_on_break_lock
, bool);
1069 ASSIGN_OPTION(blacklist_expire_seconds
, int64_t);
1070 ASSIGN_OPTION(request_timed_out_seconds
, int64_t);
1071 ASSIGN_OPTION(enable_alloc_hint
, bool);
1072 ASSIGN_OPTION(journal_order
, uint64_t);
1073 ASSIGN_OPTION(journal_splay_width
, uint64_t);
1074 ASSIGN_OPTION(journal_commit_age
, double);
1075 ASSIGN_OPTION(journal_object_flush_interval
, int64_t);
1076 ASSIGN_OPTION(journal_object_flush_bytes
, int64_t);
1077 ASSIGN_OPTION(journal_object_flush_age
, double);
1078 ASSIGN_OPTION(journal_max_payload_bytes
, uint64_t);
1079 ASSIGN_OPTION(journal_max_concurrent_object_sets
, int64_t);
1080 ASSIGN_OPTION(mirroring_resync_after_disconnect
, bool);
1081 ASSIGN_OPTION(mirroring_replay_delay
, int64_t);
1082 ASSIGN_OPTION(skip_partial_discard
, bool);
1083 ASSIGN_OPTION(blkin_trace_all
, bool);
1086 ASSIGN_OPTION(journal_pool
, std::string
);
1089 if (sparse_read_threshold_bytes
== 0) {
1090 sparse_read_threshold_bytes
= get_object_size();
1094 ExclusiveLock
<ImageCtx
> *ImageCtx::create_exclusive_lock() {
1095 return new ExclusiveLock
<ImageCtx
>(*this);
1098 ObjectMap
<ImageCtx
> *ImageCtx::create_object_map(uint64_t snap_id
) {
1099 return new ObjectMap
<ImageCtx
>(*this, snap_id
);
1102 Journal
<ImageCtx
> *ImageCtx::create_journal() {
1103 return new Journal
<ImageCtx
>(*this);
1106 void ImageCtx::set_image_name(const std::string
&image_name
) {
1107 // update the name so rename can be invoked repeatedly
1108 RWLock::RLocker
owner_locker(owner_lock
);
1109 RWLock::WLocker
snap_locker(snap_lock
);
1112 header_oid
= util::old_header_name(image_name
);
1116 void ImageCtx::notify_update() {
1117 state
->handle_update_notification();
1118 ImageWatcher
<>::notify_header_update(md_ctx
, header_oid
);
1121 void ImageCtx::notify_update(Context
*on_finish
) {
1122 state
->handle_update_notification();
1123 image_watcher
->notify_header_update(on_finish
);
1126 exclusive_lock::Policy
*ImageCtx::get_exclusive_lock_policy() const {
1127 assert(owner_lock
.is_locked());
1128 assert(exclusive_lock_policy
!= nullptr);
1129 return exclusive_lock_policy
;
1132 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy
*policy
) {
1133 assert(owner_lock
.is_wlocked());
1134 assert(policy
!= nullptr);
1135 delete exclusive_lock_policy
;
1136 exclusive_lock_policy
= policy
;
1139 journal::Policy
*ImageCtx::get_journal_policy() const {
1140 assert(snap_lock
.is_locked());
1141 assert(journal_policy
!= nullptr);
1142 return journal_policy
;
1145 void ImageCtx::set_journal_policy(journal::Policy
*policy
) {
1146 assert(snap_lock
.is_wlocked());
1147 assert(policy
!= nullptr);
1148 delete journal_policy
;
1149 journal_policy
= policy
;
1152 bool ImageCtx::is_writeback_cache_enabled() const {
1153 return (cache
&& cache_max_dirty
> 0);
1156 void ImageCtx::get_thread_pool_instance(CephContext
*cct
,
1157 ThreadPool
**thread_pool
,
1158 ContextWQ
**op_work_queue
) {
1159 ThreadPoolSingleton
*thread_pool_singleton
;
1160 cct
->lookup_or_create_singleton_object
<ThreadPoolSingleton
>(
1161 thread_pool_singleton
, "librbd::thread_pool");
1162 *thread_pool
= thread_pool_singleton
;
1163 *op_work_queue
= thread_pool_singleton
->op_work_queue
;
1166 void ImageCtx::get_timer_instance(CephContext
*cct
, SafeTimer
**timer
,
1167 Mutex
**timer_lock
) {
1168 SafeTimerSingleton
*safe_timer_singleton
;
1169 cct
->lookup_or_create_singleton_object
<SafeTimerSingleton
>(
1170 safe_timer_singleton
, "librbd::journal::safe_timer");
1171 *timer
= safe_timer_singleton
;
1172 *timer_lock
= &safe_timer_singleton
->lock
;