]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/ImageCtx.cc
bump version to 12.0.3-pve3
[ceph.git] / ceph / src / librbd / ImageCtx.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include <errno.h>
4#include <boost/assign/list_of.hpp>
5#include <stddef.h>
6
7#include "common/ceph_context.h"
8#include "common/dout.h"
9#include "common/errno.h"
10#include "common/perf_counters.h"
11#include "common/WorkQueue.h"
12#include "common/Timer.h"
13
14#include "librbd/AsyncOperation.h"
15#include "librbd/AsyncRequest.h"
16#include "librbd/ExclusiveLock.h"
17#include "librbd/internal.h"
18#include "librbd/ImageCtx.h"
19#include "librbd/ImageState.h"
20#include "librbd/ImageWatcher.h"
21#include "librbd/Journal.h"
22#include "librbd/LibrbdAdminSocketHook.h"
23#include "librbd/ObjectMap.h"
24#include "librbd/Operations.h"
25#include "librbd/operation/ResizeRequest.h"
26#include "librbd/Utils.h"
27#include "librbd/LibrbdWriteback.h"
28#include "librbd/exclusive_lock/AutomaticPolicy.h"
29#include "librbd/exclusive_lock/StandardPolicy.h"
30#include "librbd/io/AioCompletion.h"
31#include "librbd/io/ImageRequestWQ.h"
32#include "librbd/journal/StandardPolicy.h"
33
34#include "osdc/Striper.h"
35#include <boost/bind.hpp>
36
37#define dout_subsys ceph_subsys_rbd
38#undef dout_prefix
39#define dout_prefix *_dout << "librbd::ImageCtx: "
40
41using std::map;
42using std::pair;
43using std::set;
44using std::string;
45using std::vector;
46
47using ceph::bufferlist;
48using librados::snap_t;
49using librados::IoCtx;
50
51namespace librbd {
52
53namespace {
54
55class ThreadPoolSingleton : public ThreadPool {
56public:
57 ContextWQ *op_work_queue;
58
59 explicit ThreadPoolSingleton(CephContext *cct)
60 : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
61 "rbd_op_threads"),
62 op_work_queue(new ContextWQ("librbd::op_work_queue",
63 cct->_conf->rbd_op_thread_timeout,
64 this)) {
65 start();
66 }
67 ~ThreadPoolSingleton() override {
68 op_work_queue->drain();
69 delete op_work_queue;
70
71 stop();
72 }
73};
74
75class SafeTimerSingleton : public SafeTimer {
76public:
77 Mutex lock;
78
79 explicit SafeTimerSingleton(CephContext *cct)
80 : SafeTimer(cct, lock, true),
81 lock("librbd::Journal::SafeTimerSingleton::lock") {
82 init();
83 }
84 ~SafeTimerSingleton() {
85 Mutex::Locker locker(lock);
86 shutdown();
87 }
88};
89
90struct C_FlushCache : public Context {
91 ImageCtx *image_ctx;
92 Context *on_safe;
93
94 C_FlushCache(ImageCtx *_image_ctx, Context *_on_safe)
95 : image_ctx(_image_ctx), on_safe(_on_safe) {
96 }
97 void finish(int r) override {
98 // successful cache flush indicates all IO is now safe
99 image_ctx->flush_cache(on_safe);
100 }
101};
102
103struct C_ShutDownCache : public Context {
104 ImageCtx *image_ctx;
105 Context *on_finish;
106
107 C_ShutDownCache(ImageCtx *_image_ctx, Context *_on_finish)
108 : image_ctx(_image_ctx), on_finish(_on_finish) {
109 }
110 void finish(int r) override {
111 image_ctx->object_cacher->stop();
112 on_finish->complete(r);
113 }
114};
115
116struct C_InvalidateCache : public Context {
117 ImageCtx *image_ctx;
118 bool purge_on_error;
119 bool reentrant_safe;
120 Context *on_finish;
121
122 C_InvalidateCache(ImageCtx *_image_ctx, bool _purge_on_error,
123 bool _reentrant_safe, Context *_on_finish)
124 : image_ctx(_image_ctx), purge_on_error(_purge_on_error),
125 reentrant_safe(_reentrant_safe), on_finish(_on_finish) {
126 }
127 void finish(int r) override {
128 assert(image_ctx->cache_lock.is_locked());
129 CephContext *cct = image_ctx->cct;
130
131 if (r == -EBLACKLISTED) {
132 lderr(cct) << "Blacklisted during flush! Purging cache..." << dendl;
133 image_ctx->object_cacher->purge_set(image_ctx->object_set);
134 } else if (r != 0 && purge_on_error) {
135 lderr(cct) << "invalidate cache encountered error "
136 << cpp_strerror(r) << " !Purging cache..." << dendl;
137 image_ctx->object_cacher->purge_set(image_ctx->object_set);
138 } else if (r != 0) {
139 lderr(cct) << "flush_cache returned " << r << dendl;
140 }
141
142 loff_t unclean = image_ctx->object_cacher->release_set(
143 image_ctx->object_set);
144 if (unclean == 0) {
145 r = 0;
146 } else {
147 lderr(cct) << "could not release all objects from cache: "
148 << unclean << " bytes remain" << dendl;
149 if (r == 0) {
150 r = -EBUSY;
151 }
152 }
153
154 if (reentrant_safe) {
155 on_finish->complete(r);
156 } else {
157 image_ctx->op_work_queue->queue(on_finish, r);
158 }
159 }
160
161};
162
163} // anonymous namespace
164
165 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
166
167 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
168 const char *snap, IoCtx& p, bool ro)
169 : cct((CephContext*)p.cct()),
170 perfcounter(NULL),
171 snap_id(CEPH_NOSNAP),
172 snap_exists(true),
173 read_only(ro),
174 flush_encountered(false),
175 exclusive_locked(false),
176 name(image_name),
177 image_watcher(NULL),
178 journal(NULL),
179 owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
180 md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
181 cache_lock(util::unique_lock_name("librbd::ImageCtx::cache_lock", this)),
182 snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
183 parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
184 object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
185 async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
186 copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
187 completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
188 extra_read_flags(0),
189 old_format(true),
190 order(0), size(0), features(0),
191 format_string(NULL),
192 id(image_id), parent(NULL),
193 stripe_unit(0), stripe_count(0), flags(0),
194 object_cacher(NULL), writeback_handler(NULL), object_set(NULL),
195 readahead(),
196 total_bytes_read(0),
197 state(new ImageState<>(this)),
198 operations(new Operations<>(*this)),
199 exclusive_lock(nullptr), object_map(nullptr),
200 io_work_queue(nullptr), op_work_queue(nullptr),
201 asok_hook(nullptr)
202 {
203 md_ctx.dup(p);
204 data_ctx.dup(p);
205 if (snap)
206 snap_name = snap;
207
208 memset(&header, 0, sizeof(header));
209
210 ThreadPool *thread_pool;
211 get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
212 io_work_queue = new io::ImageRequestWQ(
213 this, "librbd::io_work_queue", cct->_conf->rbd_op_thread_timeout,
214 thread_pool);
215
216 if (cct->_conf->rbd_auto_exclusive_lock_until_manual_request) {
217 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
218 } else {
219 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
220 }
221 journal_policy = new journal::StandardPolicy<ImageCtx>(this);
222 }
223
224 ImageCtx::~ImageCtx() {
225 assert(image_watcher == NULL);
226 assert(exclusive_lock == NULL);
227 assert(object_map == NULL);
228 assert(journal == NULL);
229 assert(asok_hook == NULL);
230
231 if (perfcounter) {
232 perf_stop();
233 }
234 if (object_cacher) {
235 delete object_cacher;
236 object_cacher = NULL;
237 }
238 if (writeback_handler) {
239 delete writeback_handler;
240 writeback_handler = NULL;
241 }
242 if (object_set) {
243 delete object_set;
244 object_set = NULL;
245 }
246 delete[] format_string;
247
248 md_ctx.aio_flush();
249 data_ctx.aio_flush();
250 io_work_queue->drain();
251
252 delete journal_policy;
253 delete exclusive_lock_policy;
254 delete io_work_queue;
255 delete operations;
256 delete state;
257 }
258
259 void ImageCtx::init() {
260 assert(!header_oid.empty());
261 assert(old_format || !id.empty());
262
263 asok_hook = new LibrbdAdminSocketHook(this);
264
265 string pname = string("librbd-") + id + string("-") +
266 data_ctx.get_pool_name() + string("-") + name;
267 if (!snap_name.empty()) {
268 pname += "-";
269 pname += snap_name;
270 }
271
272 perf_start(pname);
273
274 if (cache) {
275 Mutex::Locker l(cache_lock);
276 ldout(cct, 20) << "enabling caching..." << dendl;
277 writeback_handler = new LibrbdWriteback(this, cache_lock);
278
279 uint64_t init_max_dirty = cache_max_dirty;
280 if (cache_writethrough_until_flush)
281 init_max_dirty = 0;
282 ldout(cct, 20) << "Initial cache settings:"
283 << " size=" << cache_size
284 << " num_objects=" << 10
285 << " max_dirty=" << init_max_dirty
286 << " target_dirty=" << cache_target_dirty
287 << " max_dirty_age="
288 << cache_max_dirty_age << dendl;
289
290 object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock,
291 NULL, NULL,
292 cache_size,
293 10, /* reset this in init */
294 init_max_dirty,
295 cache_target_dirty,
296 cache_max_dirty_age,
297 cache_block_writes_upfront);
298
299 // size object cache appropriately
300 uint64_t obj = cache_max_dirty_object;
301 if (!obj) {
302 obj = MIN(2000, MAX(10, cache_size / 100 / sizeof(ObjectCacher::Object)));
303 }
304 ldout(cct, 10) << " cache bytes " << cache_size
305 << " -> about " << obj << " objects" << dendl;
306 object_cacher->set_max_objects(obj);
307
308 object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
309 object_set->return_enoent = true;
310 object_cacher->start();
311 }
312
313 readahead.set_trigger_requests(readahead_trigger_requests);
314 readahead.set_max_readahead_size(readahead_max_bytes);
315 }
316
317 void ImageCtx::shutdown() {
318 delete image_watcher;
319 image_watcher = nullptr;
320
321 delete asok_hook;
322 asok_hook = nullptr;
323 }
324
325 void ImageCtx::init_layout()
326 {
327 if (stripe_unit == 0 || stripe_count == 0) {
328 stripe_unit = 1ull << order;
329 stripe_count = 1;
330 }
331
332 vector<uint64_t> alignments;
333 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
334 alignments.push_back(stripe_unit * stripe_count); // stripe
335 alignments.push_back(stripe_unit); // stripe unit
336 readahead.set_alignments(alignments);
337
338 layout = file_layout_t();
339 layout.stripe_unit = stripe_unit;
340 layout.stripe_count = stripe_count;
341 layout.object_size = 1ull << order;
342 layout.pool_id = data_ctx.get_id(); // FIXME: pool id overflow?
343
344 delete[] format_string;
345 size_t len = object_prefix.length() + 16;
346 format_string = new char[len];
347 if (old_format) {
348 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
349 } else {
350 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
351 }
352
353 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
354 << " stripe_count " << stripe_count
355 << " object_size " << layout.object_size
356 << " prefix " << object_prefix
357 << " format " << format_string
358 << dendl;
359 }
360
361 void ImageCtx::perf_start(string name) {
362 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
363
364 plb.add_u64_counter(l_librbd_rd, "rd", "Reads");
365 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads");
366 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads");
367 plb.add_u64_counter(l_librbd_wr, "wr", "Writes");
368 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data");
369 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency");
370 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
371 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data");
372 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
373 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
374 plb.add_u64_counter(l_librbd_aio_flush, "aio_flush", "Async flushes");
375 plb.add_time_avg(l_librbd_aio_flush_latency, "aio_flush_latency", "Latency of async flushes");
376 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
377 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data");
378 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
379 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
380 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
381 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
382 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
383 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
384 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
385 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
386 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead");
387 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
388
389 perfcounter = plb.create_perf_counters();
390 cct->get_perfcounters_collection()->add(perfcounter);
391 }
392
393 void ImageCtx::perf_stop() {
394 assert(perfcounter);
395 cct->get_perfcounters_collection()->remove(perfcounter);
396 delete perfcounter;
397 }
398
399 void ImageCtx::set_read_flag(unsigned flag) {
400 extra_read_flags |= flag;
401 }
402
403 int ImageCtx::get_read_flags(snap_t snap_id) {
404 int flags = librados::OPERATION_NOFLAG | extra_read_flags;
405 if (snap_id == LIBRADOS_SNAP_HEAD)
406 return flags;
407
408 if (balance_snap_reads)
409 flags |= librados::OPERATION_BALANCE_READS;
410 else if (localize_snap_reads)
411 flags |= librados::OPERATION_LOCALIZE_READS;
412 return flags;
413 }
414
415 int ImageCtx::snap_set(cls::rbd::SnapshotNamespace in_snap_namespace,
416 string in_snap_name)
417 {
418 assert(snap_lock.is_wlocked());
419 snap_t in_snap_id = get_snap_id(in_snap_namespace, in_snap_name);
420 if (in_snap_id != CEPH_NOSNAP) {
421 snap_id = in_snap_id;
422 snap_namespace = in_snap_namespace;
423 snap_name = in_snap_name;
424 snap_exists = true;
425 data_ctx.snap_set_read(snap_id);
426 return 0;
427 }
428 return -ENOENT;
429 }
430
431 void ImageCtx::snap_unset()
432 {
433 assert(snap_lock.is_wlocked());
434 snap_id = CEPH_NOSNAP;
435 snap_namespace = {};
436 snap_name = "";
437 snap_exists = true;
438 data_ctx.snap_set_read(snap_id);
439 }
440
441 snap_t ImageCtx::get_snap_id(cls::rbd::SnapshotNamespace in_snap_namespace,
442 string in_snap_name) const
443 {
444 assert(snap_lock.is_locked());
445 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
446 if (it != snap_ids.end())
447 return it->second;
448 return CEPH_NOSNAP;
449 }
450
451 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
452 {
453 assert(snap_lock.is_locked());
454 map<snap_t, SnapInfo>::const_iterator it =
455 snap_info.find(in_snap_id);
456 if (it != snap_info.end())
457 return &it->second;
458 return NULL;
459 }
460
461 int ImageCtx::get_snap_name(snap_t in_snap_id,
462 string *out_snap_name) const
463 {
464 assert(snap_lock.is_locked());
465 const SnapInfo *info = get_snap_info(in_snap_id);
466 if (info) {
467 *out_snap_name = info->name;
468 return 0;
469 }
470 return -ENOENT;
471 }
472
473 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
474 cls::rbd::SnapshotNamespace *out_snap_namespace) const
475 {
476 assert(snap_lock.is_locked());
477 const SnapInfo *info = get_snap_info(in_snap_id);
478 if (info) {
479 *out_snap_namespace = info->snap_namespace;
480 return 0;
481 }
482 return -ENOENT;
483 }
484
485 int ImageCtx::get_parent_spec(snap_t in_snap_id,
486 ParentSpec *out_pspec) const
487 {
488 const SnapInfo *info = get_snap_info(in_snap_id);
489 if (info) {
490 *out_pspec = info->parent.spec;
491 return 0;
492 }
493 return -ENOENT;
494 }
495
496 uint64_t ImageCtx::get_current_size() const
497 {
498 assert(snap_lock.is_locked());
499 return size;
500 }
501
502 uint64_t ImageCtx::get_object_size() const
503 {
504 return 1ull << order;
505 }
506
507 string ImageCtx::get_object_name(uint64_t num) const {
508 char buf[object_prefix.length() + 32];
509 snprintf(buf, sizeof(buf), format_string, num);
510 return string(buf);
511 }
512
513 uint64_t ImageCtx::get_stripe_unit() const
514 {
515 return stripe_unit;
516 }
517
518 uint64_t ImageCtx::get_stripe_count() const
519 {
520 return stripe_count;
521 }
522
523 uint64_t ImageCtx::get_stripe_period() const
524 {
525 return stripe_count * (1ull << order);
526 }
527
528 int ImageCtx::is_snap_protected(snap_t in_snap_id,
529 bool *is_protected) const
530 {
531 assert(snap_lock.is_locked());
532 const SnapInfo *info = get_snap_info(in_snap_id);
533 if (info) {
534 *is_protected =
535 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
536 return 0;
537 }
538 return -ENOENT;
539 }
540
541 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
542 bool *is_unprotected) const
543 {
544 assert(snap_lock.is_locked());
545 const SnapInfo *info = get_snap_info(in_snap_id);
546 if (info) {
547 *is_unprotected =
548 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
549 return 0;
550 }
551 return -ENOENT;
552 }
553
554 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
555 string in_snap_name,
556 snap_t id, uint64_t in_size,
557 const ParentInfo &parent, uint8_t protection_status,
558 uint64_t flags, utime_t timestamp)
559 {
560 assert(snap_lock.is_wlocked());
561 snaps.push_back(id);
562 SnapInfo info(in_snap_name, in_snap_namespace,
563 in_size, parent, protection_status, flags, timestamp);
564 snap_info.insert({id, info});
565 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
566 }
567
568 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
569 string in_snap_name,
570 snap_t id)
571 {
572 assert(snap_lock.is_wlocked());
573 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
574 snap_info.erase(id);
575 snap_ids.erase({in_snap_namespace, in_snap_name});
576 }
577
578 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
579 {
580 assert(snap_lock.is_locked());
581 if (in_snap_id == CEPH_NOSNAP) {
582 if (!resize_reqs.empty() &&
583 resize_reqs.front()->shrinking()) {
584 return resize_reqs.front()->get_image_size();
585 }
586 return size;
587 }
588
589 const SnapInfo *info = get_snap_info(in_snap_id);
590 if (info) {
591 return info->size;
592 }
593 return 0;
594 }
595
596 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
597 assert(snap_lock.is_locked());
598 uint64_t image_size = get_image_size(in_snap_id);
599 return Striper::get_num_objects(layout, image_size);
600 }
601
602 bool ImageCtx::test_features(uint64_t features) const
603 {
604 RWLock::RLocker l(snap_lock);
605 return test_features(features, snap_lock);
606 }
607
608 bool ImageCtx::test_features(uint64_t in_features,
609 const RWLock &in_snap_lock) const
610 {
611 assert(snap_lock.is_locked());
612 return ((features & in_features) == in_features);
613 }
614
615 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
616 {
617 assert(snap_lock.is_locked());
618 if (_snap_id == CEPH_NOSNAP) {
619 *_flags = flags;
620 return 0;
621 }
622 const SnapInfo *info = get_snap_info(_snap_id);
623 if (info) {
624 *_flags = info->flags;
625 return 0;
626 }
627 return -ENOENT;
628 }
629
630 bool ImageCtx::test_flags(uint64_t flags) const
631 {
632 RWLock::RLocker l(snap_lock);
633 return test_flags(flags, snap_lock);
634 }
635
636 bool ImageCtx::test_flags(uint64_t flags, const RWLock &in_snap_lock) const
637 {
638 assert(snap_lock.is_locked());
639 uint64_t snap_flags;
640 get_flags(snap_id, &snap_flags);
641 return ((snap_flags & flags) == flags);
642 }
643
644 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
645 {
646 assert(snap_lock.is_wlocked());
647 uint64_t *_flags;
648 if (in_snap_id == CEPH_NOSNAP) {
649 _flags = &flags;
650 } else {
651 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
652 if (it == snap_info.end()) {
653 return -ENOENT;
654 }
655 _flags = &it->second.flags;
656 }
657
658 if (enabled) {
659 (*_flags) |= flag;
660 } else {
661 (*_flags) &= ~flag;
662 }
663 return 0;
664 }
665
666 const ParentInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
667 {
668 assert(snap_lock.is_locked());
669 assert(parent_lock.is_locked());
670 if (in_snap_id == CEPH_NOSNAP)
671 return &parent_md;
672 const SnapInfo *info = get_snap_info(in_snap_id);
673 if (info)
674 return &info->parent;
675 return NULL;
676 }
677
678 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
679 {
680 const ParentInfo *info = get_parent_info(in_snap_id);
681 if (info)
682 return info->spec.pool_id;
683 return -1;
684 }
685
686 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
687 {
688 const ParentInfo *info = get_parent_info(in_snap_id);
689 if (info)
690 return info->spec.image_id;
691 return "";
692 }
693
694 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
695 {
696 const ParentInfo *info = get_parent_info(in_snap_id);
697 if (info)
698 return info->spec.snap_id;
699 return CEPH_NOSNAP;
700 }
701
702 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
703 {
704 assert(snap_lock.is_locked());
705 const ParentInfo *info = get_parent_info(in_snap_id);
706 if (info) {
707 *overlap = info->overlap;
708 return 0;
709 }
710 return -ENOENT;
711 }
712
713 void ImageCtx::aio_read_from_cache(object_t o, uint64_t object_no,
714 bufferlist *bl, size_t len,
715 uint64_t off, Context *onfinish,
716 int fadvise_flags) {
717 snap_lock.get_read();
718 ObjectCacher::OSDRead *rd = object_cacher->prepare_read(snap_id, bl, fadvise_flags);
719 snap_lock.put_read();
720 ObjectExtent extent(o, object_no, off, len, 0);
721 extent.oloc.pool = data_ctx.get_id();
722 extent.buffer_extents.push_back(make_pair(0, len));
723 rd->extents.push_back(extent);
724 cache_lock.Lock();
725 int r = object_cacher->readx(rd, object_set, onfinish);
726 cache_lock.Unlock();
727 if (r != 0)
728 onfinish->complete(r);
729 }
730
731 void ImageCtx::write_to_cache(object_t o, const bufferlist& bl, size_t len,
732 uint64_t off, Context *onfinish,
733 int fadvise_flags, uint64_t journal_tid) {
734 snap_lock.get_read();
735 ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(
736 snapc, bl, ceph::real_time::min(), fadvise_flags, journal_tid);
737 snap_lock.put_read();
738 ObjectExtent extent(o, 0, off, len, 0);
739 extent.oloc.pool = data_ctx.get_id();
740 // XXX: nspace is always default, io_ctx_impl field private
741 //extent.oloc.nspace = data_ctx.io_ctx_impl->oloc.nspace;
742 extent.buffer_extents.push_back(make_pair(0, len));
743 wr->extents.push_back(extent);
744 {
745 Mutex::Locker l(cache_lock);
746 object_cacher->writex(wr, object_set, onfinish);
747 }
748 }
749
750 void ImageCtx::user_flushed() {
751 if (object_cacher && cache_writethrough_until_flush) {
752 md_lock.get_read();
753 bool flushed_before = flush_encountered;
754 md_lock.put_read();
755
756 uint64_t max_dirty = cache_max_dirty;
757 if (!flushed_before && max_dirty > 0) {
758 md_lock.get_write();
759 flush_encountered = true;
760 md_lock.put_write();
761
762 ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl;
763 Mutex::Locker l(cache_lock);
764 object_cacher->set_max_dirty(max_dirty);
765 }
766 }
767 }
768
769 void ImageCtx::flush_cache(Context *onfinish) {
770 cache_lock.Lock();
771 object_cacher->flush_set(object_set, onfinish);
772 cache_lock.Unlock();
773 }
774
775 void ImageCtx::shut_down_cache(Context *on_finish) {
776 if (object_cacher == NULL) {
777 on_finish->complete(0);
778 return;
779 }
780
781 cache_lock.Lock();
782 object_cacher->release_set(object_set);
783 cache_lock.Unlock();
784
785 C_ShutDownCache *shut_down = new C_ShutDownCache(this, on_finish);
786 flush_cache(new C_InvalidateCache(this, true, false, shut_down));
787 }
788
789 int ImageCtx::invalidate_cache(bool purge_on_error) {
790 flush_async_operations();
791 if (object_cacher == NULL) {
792 return 0;
793 }
794
795 cache_lock.Lock();
796 object_cacher->release_set(object_set);
797 cache_lock.Unlock();
798
799 C_SaferCond ctx;
800 flush_cache(new C_InvalidateCache(this, purge_on_error, true, &ctx));
801
802 int result = ctx.wait();
803 return result;
804 }
805
806 void ImageCtx::invalidate_cache(bool purge_on_error, Context *on_finish) {
807 if (object_cacher == NULL) {
808 op_work_queue->queue(on_finish, 0);
809 return;
810 }
811
812 cache_lock.Lock();
813 object_cacher->release_set(object_set);
814 cache_lock.Unlock();
815
816 flush_cache(new C_InvalidateCache(this, purge_on_error, false, on_finish));
817 }
818
819 void ImageCtx::clear_nonexistence_cache() {
820 assert(cache_lock.is_locked());
821 if (!object_cacher)
822 return;
823 object_cacher->clear_nonexistence(object_set);
824 }
825
826 bool ImageCtx::is_cache_empty() {
827 Mutex::Locker locker(cache_lock);
828 return object_cacher->set_is_empty(object_set);
829 }
830
831 void ImageCtx::register_watch(Context *on_finish) {
832 assert(image_watcher == NULL);
833 image_watcher = new ImageWatcher<>(*this);
834 image_watcher->register_watch(on_finish);
835 }
836
837 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
838 uint64_t overlap)
839 {
840 // drop extents completely beyond the overlap
841 while (!objectx.empty() && objectx.back().first >= overlap)
842 objectx.pop_back();
843
844 // trim final overlapping extent
845 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
846 objectx.back().second = overlap - objectx.back().first;
847
848 uint64_t len = 0;
849 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
850 p != objectx.end();
851 ++p)
852 len += p->second;
853 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
854 << ", object overlap " << len
855 << " from image extents " << objectx << dendl;
856 return len;
857 }
858
859 void ImageCtx::flush_async_operations() {
860 C_SaferCond ctx;
861 flush_async_operations(&ctx);
862 ctx.wait();
863 }
864
865 void ImageCtx::flush_async_operations(Context *on_finish) {
866 {
867 Mutex::Locker l(async_ops_lock);
868 if (!async_ops.empty()) {
869 ldout(cct, 20) << "flush async operations: " << on_finish << " "
870 << "count=" << async_ops.size() << dendl;
871 async_ops.front()->add_flush_context(on_finish);
872 return;
873 }
874 }
875 on_finish->complete(0);
876 }
877
878 int ImageCtx::flush() {
879 C_SaferCond cond_ctx;
880 flush(&cond_ctx);
881 return cond_ctx.wait();
882 }
883
884 void ImageCtx::flush(Context *on_safe) {
885 // ensure no locks are held when flush is complete
886 on_safe = util::create_async_context_callback(*this, on_safe);
887
888 if (object_cacher != NULL) {
889 // flush cache after completing all in-flight AIO ops
890 on_safe = new C_FlushCache(this, on_safe);
891 }
892 flush_async_operations(on_safe);
893 }
894
895 void ImageCtx::cancel_async_requests() {
896 C_SaferCond ctx;
897 cancel_async_requests(&ctx);
898 ctx.wait();
899 }
900
901 void ImageCtx::cancel_async_requests(Context *on_finish) {
902 {
903 Mutex::Locker async_ops_locker(async_ops_lock);
904 if (!async_requests.empty()) {
905 ldout(cct, 10) << "canceling async requests: count="
906 << async_requests.size() << dendl;
907 for (auto req : async_requests) {
908 ldout(cct, 10) << "canceling async request: " << req << dendl;
909 req->cancel();
910 }
911 async_requests_waiters.push_back(on_finish);
912 return;
913 }
914 }
915
916 on_finish->complete(0);
917 }
918
919 void ImageCtx::clear_pending_completions() {
920 Mutex::Locker l(completed_reqs_lock);
921 ldout(cct, 10) << "clear pending AioCompletion: count="
922 << completed_reqs.size() << dendl;
923 completed_reqs.clear();
924 }
925
926 bool ImageCtx::_filter_metadata_confs(const string &prefix,
927 map<string, bool> &configs,
928 const map<string, bufferlist> &pairs,
929 map<string, bufferlist> *res) {
930 size_t conf_prefix_len = prefix.size();
931
932 for (auto it : pairs) {
933 if (it.first.compare(0, MIN(conf_prefix_len, it.first.size()), prefix) > 0)
934 return false;
935
936 if (it.first.size() <= conf_prefix_len)
937 continue;
938
939 string key = it.first.substr(conf_prefix_len, it.first.size() - conf_prefix_len);
940 auto cit = configs.find(key);
941 if (cit != configs.end()) {
942 cit->second = true;
943 res->insert(make_pair(key, it.second));
944 }
945 }
946 return true;
947 }
948
949 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta) {
950 ldout(cct, 20) << __func__ << dendl;
951 std::map<string, bool> configs = boost::assign::map_list_of(
952 "rbd_non_blocking_aio", false)(
953 "rbd_cache", false)(
954 "rbd_cache_writethrough_until_flush", false)(
955 "rbd_cache_size", false)(
956 "rbd_cache_max_dirty", false)(
957 "rbd_cache_target_dirty", false)(
958 "rbd_cache_max_dirty_age", false)(
959 "rbd_cache_max_dirty_object", false)(
960 "rbd_cache_block_writes_upfront", false)(
961 "rbd_concurrent_management_ops", false)(
962 "rbd_balance_snap_reads", false)(
963 "rbd_localize_snap_reads", false)(
964 "rbd_balance_parent_reads", false)(
965 "rbd_localize_parent_reads", false)(
966 "rbd_readahead_trigger_requests", false)(
967 "rbd_readahead_max_bytes", false)(
968 "rbd_readahead_disable_after_bytes", false)(
969 "rbd_clone_copy_on_read", false)(
970 "rbd_blacklist_on_break_lock", false)(
971 "rbd_blacklist_expire_seconds", false)(
972 "rbd_request_timed_out_seconds", false)(
973 "rbd_journal_order", false)(
974 "rbd_journal_splay_width", false)(
975 "rbd_journal_commit_age", false)(
976 "rbd_journal_object_flush_interval", false)(
977 "rbd_journal_object_flush_bytes", false)(
978 "rbd_journal_object_flush_age", false)(
979 "rbd_journal_pool", false)(
980 "rbd_journal_max_payload_bytes", false)(
981 "rbd_journal_max_concurrent_object_sets", false)(
982 "rbd_mirroring_resync_after_disconnect", false)(
983 "rbd_mirroring_replay_delay", false)(
984 "rbd_skip_partial_discard", false);
985
986 md_config_t local_config_t;
987 std::map<std::string, bufferlist> res;
988
989 _filter_metadata_confs(METADATA_CONF_PREFIX, configs, meta, &res);
990 for (auto it : res) {
991 std::string val(it.second.c_str(), it.second.length());
992 int j = local_config_t.set_val(it.first.c_str(), val);
993 if (j < 0) {
994 lderr(cct) << __func__ << " failed to set config " << it.first
995 << " with value " << it.second.c_str() << ": " << j
996 << dendl;
997 }
998 }
999
1000#define ASSIGN_OPTION(config) \
1001 do { \
1002 string key = "rbd_"; \
1003 key = key + #config; \
1004 if (configs[key]) \
1005 config = local_config_t.rbd_##config; \
1006 else \
1007 config = cct->_conf->rbd_##config; \
1008 } while (0);
1009
1010 ASSIGN_OPTION(non_blocking_aio);
1011 ASSIGN_OPTION(cache);
1012 ASSIGN_OPTION(cache_writethrough_until_flush);
1013 ASSIGN_OPTION(cache_size);
1014 ASSIGN_OPTION(cache_max_dirty);
1015 ASSIGN_OPTION(cache_target_dirty);
1016 ASSIGN_OPTION(cache_max_dirty_age);
1017 ASSIGN_OPTION(cache_max_dirty_object);
1018 ASSIGN_OPTION(cache_block_writes_upfront);
1019 ASSIGN_OPTION(concurrent_management_ops);
1020 ASSIGN_OPTION(balance_snap_reads);
1021 ASSIGN_OPTION(localize_snap_reads);
1022 ASSIGN_OPTION(balance_parent_reads);
1023 ASSIGN_OPTION(localize_parent_reads);
1024 ASSIGN_OPTION(readahead_trigger_requests);
1025 ASSIGN_OPTION(readahead_max_bytes);
1026 ASSIGN_OPTION(readahead_disable_after_bytes);
1027 ASSIGN_OPTION(clone_copy_on_read);
1028 ASSIGN_OPTION(blacklist_on_break_lock);
1029 ASSIGN_OPTION(blacklist_expire_seconds);
1030 ASSIGN_OPTION(request_timed_out_seconds);
1031 ASSIGN_OPTION(enable_alloc_hint);
1032 ASSIGN_OPTION(journal_order);
1033 ASSIGN_OPTION(journal_splay_width);
1034 ASSIGN_OPTION(journal_commit_age);
1035 ASSIGN_OPTION(journal_object_flush_interval);
1036 ASSIGN_OPTION(journal_object_flush_bytes);
1037 ASSIGN_OPTION(journal_object_flush_age);
1038 ASSIGN_OPTION(journal_pool);
1039 ASSIGN_OPTION(journal_max_payload_bytes);
1040 ASSIGN_OPTION(journal_max_concurrent_object_sets);
1041 ASSIGN_OPTION(mirroring_resync_after_disconnect);
1042 ASSIGN_OPTION(mirroring_replay_delay);
1043 ASSIGN_OPTION(skip_partial_discard);
1044 }
1045
1046 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
1047 return new ExclusiveLock<ImageCtx>(*this);
1048 }
1049
1050 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
1051 return new ObjectMap<ImageCtx>(*this, snap_id);
1052 }
1053
1054 Journal<ImageCtx> *ImageCtx::create_journal() {
1055 return new Journal<ImageCtx>(*this);
1056 }
1057
1058 void ImageCtx::set_image_name(const std::string &image_name) {
1059 // update the name so rename can be invoked repeatedly
1060 RWLock::RLocker owner_locker(owner_lock);
1061 RWLock::WLocker snap_locker(snap_lock);
1062 name = image_name;
1063 if (old_format) {
1064 header_oid = util::old_header_name(image_name);
1065 }
1066 }
1067
1068 void ImageCtx::notify_update() {
1069 state->handle_update_notification();
1070 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
1071 }
1072
1073 void ImageCtx::notify_update(Context *on_finish) {
1074 state->handle_update_notification();
1075 image_watcher->notify_header_update(on_finish);
1076 }
1077
1078 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
1079 assert(owner_lock.is_locked());
1080 assert(exclusive_lock_policy != nullptr);
1081 return exclusive_lock_policy;
1082 }
1083
1084 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
1085 assert(owner_lock.is_wlocked());
1086 assert(policy != nullptr);
1087 delete exclusive_lock_policy;
1088 exclusive_lock_policy = policy;
1089 }
1090
1091 journal::Policy *ImageCtx::get_journal_policy() const {
1092 assert(snap_lock.is_locked());
1093 assert(journal_policy != nullptr);
1094 return journal_policy;
1095 }
1096
1097 void ImageCtx::set_journal_policy(journal::Policy *policy) {
1098 assert(snap_lock.is_wlocked());
1099 assert(policy != nullptr);
1100 delete journal_policy;
1101 journal_policy = policy;
1102 }
1103
1104 void ImageCtx::get_thread_pool_instance(CephContext *cct,
1105 ThreadPool **thread_pool,
1106 ContextWQ **op_work_queue) {
1107 ThreadPoolSingleton *thread_pool_singleton;
1108 cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
1109 thread_pool_singleton, "librbd::thread_pool");
1110 *thread_pool = thread_pool_singleton;
1111 *op_work_queue = thread_pool_singleton->op_work_queue;
1112 }
1113
1114 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
1115 Mutex **timer_lock) {
1116 SafeTimerSingleton *safe_timer_singleton;
1117 cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
1118 safe_timer_singleton, "librbd::journal::safe_timer");
1119 *timer = safe_timer_singleton;
1120 *timer_lock = &safe_timer_singleton->lock;
1121 }
1122}