]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/ImageCtx.cc
update sources to 12.2.7
[ceph.git] / ceph / src / librbd / ImageCtx.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include <errno.h>
4#include <boost/assign/list_of.hpp>
5#include <stddef.h>
6
7#include "common/ceph_context.h"
8#include "common/dout.h"
9#include "common/errno.h"
10#include "common/perf_counters.h"
11#include "common/WorkQueue.h"
12#include "common/Timer.h"
13
7c673cae
FG
14#include "librbd/AsyncRequest.h"
15#include "librbd/ExclusiveLock.h"
16#include "librbd/internal.h"
17#include "librbd/ImageCtx.h"
18#include "librbd/ImageState.h"
19#include "librbd/ImageWatcher.h"
20#include "librbd/Journal.h"
21#include "librbd/LibrbdAdminSocketHook.h"
22#include "librbd/ObjectMap.h"
23#include "librbd/Operations.h"
24#include "librbd/operation/ResizeRequest.h"
b32b8144 25#include "librbd/Types.h"
7c673cae
FG
26#include "librbd/Utils.h"
27#include "librbd/LibrbdWriteback.h"
28#include "librbd/exclusive_lock/AutomaticPolicy.h"
29#include "librbd/exclusive_lock/StandardPolicy.h"
30#include "librbd/io/AioCompletion.h"
31f18b77 31#include "librbd/io/AsyncOperation.h"
7c673cae
FG
32#include "librbd/io/ImageRequestWQ.h"
33#include "librbd/journal/StandardPolicy.h"
34
35#include "osdc/Striper.h"
36#include <boost/bind.hpp>
37
38#define dout_subsys ceph_subsys_rbd
39#undef dout_prefix
40#define dout_prefix *_dout << "librbd::ImageCtx: "
41
42using std::map;
43using std::pair;
44using std::set;
45using std::string;
46using std::vector;
47
48using ceph::bufferlist;
49using librados::snap_t;
50using librados::IoCtx;
51
52namespace librbd {
53
54namespace {
55
56class ThreadPoolSingleton : public ThreadPool {
57public:
58 ContextWQ *op_work_queue;
59
60 explicit ThreadPoolSingleton(CephContext *cct)
61 : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
62 "rbd_op_threads"),
63 op_work_queue(new ContextWQ("librbd::op_work_queue",
181888fb 64 cct->_conf->get_val<int64_t>("rbd_op_thread_timeout"),
7c673cae
FG
65 this)) {
66 start();
67 }
68 ~ThreadPoolSingleton() override {
69 op_work_queue->drain();
70 delete op_work_queue;
71
72 stop();
73 }
74};
75
76class SafeTimerSingleton : public SafeTimer {
77public:
78 Mutex lock;
79
80 explicit SafeTimerSingleton(CephContext *cct)
81 : SafeTimer(cct, lock, true),
82 lock("librbd::Journal::SafeTimerSingleton::lock") {
83 init();
84 }
85 ~SafeTimerSingleton() {
86 Mutex::Locker locker(lock);
87 shutdown();
88 }
89};
90
91struct C_FlushCache : public Context {
92 ImageCtx *image_ctx;
93 Context *on_safe;
94
95 C_FlushCache(ImageCtx *_image_ctx, Context *_on_safe)
96 : image_ctx(_image_ctx), on_safe(_on_safe) {
97 }
98 void finish(int r) override {
99 // successful cache flush indicates all IO is now safe
100 image_ctx->flush_cache(on_safe);
101 }
102};
103
104struct C_ShutDownCache : public Context {
105 ImageCtx *image_ctx;
106 Context *on_finish;
107
108 C_ShutDownCache(ImageCtx *_image_ctx, Context *_on_finish)
109 : image_ctx(_image_ctx), on_finish(_on_finish) {
110 }
111 void finish(int r) override {
112 image_ctx->object_cacher->stop();
113 on_finish->complete(r);
114 }
115};
116
117struct C_InvalidateCache : public Context {
118 ImageCtx *image_ctx;
119 bool purge_on_error;
120 bool reentrant_safe;
121 Context *on_finish;
122
123 C_InvalidateCache(ImageCtx *_image_ctx, bool _purge_on_error,
124 bool _reentrant_safe, Context *_on_finish)
125 : image_ctx(_image_ctx), purge_on_error(_purge_on_error),
126 reentrant_safe(_reentrant_safe), on_finish(_on_finish) {
127 }
128 void finish(int r) override {
129 assert(image_ctx->cache_lock.is_locked());
130 CephContext *cct = image_ctx->cct;
131
132 if (r == -EBLACKLISTED) {
133 lderr(cct) << "Blacklisted during flush! Purging cache..." << dendl;
134 image_ctx->object_cacher->purge_set(image_ctx->object_set);
135 } else if (r != 0 && purge_on_error) {
136 lderr(cct) << "invalidate cache encountered error "
137 << cpp_strerror(r) << " !Purging cache..." << dendl;
138 image_ctx->object_cacher->purge_set(image_ctx->object_set);
139 } else if (r != 0) {
140 lderr(cct) << "flush_cache returned " << r << dendl;
141 }
142
143 loff_t unclean = image_ctx->object_cacher->release_set(
144 image_ctx->object_set);
145 if (unclean == 0) {
146 r = 0;
147 } else {
148 lderr(cct) << "could not release all objects from cache: "
149 << unclean << " bytes remain" << dendl;
150 if (r == 0) {
151 r = -EBUSY;
152 }
153 }
154
155 if (reentrant_safe) {
156 on_finish->complete(r);
157 } else {
158 image_ctx->op_work_queue->queue(on_finish, r);
159 }
160 }
161
162};
163
164} // anonymous namespace
165
166 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
167
168 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
169 const char *snap, IoCtx& p, bool ro)
170 : cct((CephContext*)p.cct()),
171 perfcounter(NULL),
172 snap_id(CEPH_NOSNAP),
173 snap_exists(true),
174 read_only(ro),
175 flush_encountered(false),
176 exclusive_locked(false),
177 name(image_name),
178 image_watcher(NULL),
179 journal(NULL),
180 owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
181 md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
182 cache_lock(util::unique_lock_name("librbd::ImageCtx::cache_lock", this)),
183 snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
184 parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
185 object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
186 async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
187 copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
188 completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
189 extra_read_flags(0),
190 old_format(true),
191 order(0), size(0), features(0),
192 format_string(NULL),
193 id(image_id), parent(NULL),
194 stripe_unit(0), stripe_count(0), flags(0),
195 object_cacher(NULL), writeback_handler(NULL), object_set(NULL),
196 readahead(),
197 total_bytes_read(0),
198 state(new ImageState<>(this)),
199 operations(new Operations<>(*this)),
200 exclusive_lock(nullptr), object_map(nullptr),
201 io_work_queue(nullptr), op_work_queue(nullptr),
31f18b77
FG
202 asok_hook(nullptr),
203 trace_endpoint("librbd")
7c673cae
FG
204 {
205 md_ctx.dup(p);
206 data_ctx.dup(p);
207 if (snap)
208 snap_name = snap;
209
210 memset(&header, 0, sizeof(header));
211
212 ThreadPool *thread_pool;
213 get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
224ce89b 214 io_work_queue = new io::ImageRequestWQ<>(
181888fb
FG
215 this, "librbd::io_work_queue",
216 cct->_conf->get_val<int64_t>("rbd_op_thread_timeout"),
7c673cae
FG
217 thread_pool);
218
181888fb 219 if (cct->_conf->get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
7c673cae
FG
220 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
221 } else {
222 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
223 }
224 journal_policy = new journal::StandardPolicy<ImageCtx>(this);
225 }
226
227 ImageCtx::~ImageCtx() {
228 assert(image_watcher == NULL);
229 assert(exclusive_lock == NULL);
230 assert(object_map == NULL);
231 assert(journal == NULL);
232 assert(asok_hook == NULL);
233
234 if (perfcounter) {
235 perf_stop();
236 }
237 if (object_cacher) {
238 delete object_cacher;
239 object_cacher = NULL;
240 }
241 if (writeback_handler) {
242 delete writeback_handler;
243 writeback_handler = NULL;
244 }
245 if (object_set) {
246 delete object_set;
247 object_set = NULL;
248 }
249 delete[] format_string;
250
251 md_ctx.aio_flush();
252 data_ctx.aio_flush();
253 io_work_queue->drain();
254
255 delete journal_policy;
256 delete exclusive_lock_policy;
257 delete io_work_queue;
258 delete operations;
259 delete state;
260 }
261
262 void ImageCtx::init() {
263 assert(!header_oid.empty());
264 assert(old_format || !id.empty());
265
266 asok_hook = new LibrbdAdminSocketHook(this);
267
268 string pname = string("librbd-") + id + string("-") +
269 data_ctx.get_pool_name() + string("-") + name;
270 if (!snap_name.empty()) {
271 pname += "-";
272 pname += snap_name;
273 }
274
31f18b77 275 trace_endpoint.copy_name(pname);
7c673cae
FG
276 perf_start(pname);
277
278 if (cache) {
279 Mutex::Locker l(cache_lock);
280 ldout(cct, 20) << "enabling caching..." << dendl;
281 writeback_handler = new LibrbdWriteback(this, cache_lock);
282
283 uint64_t init_max_dirty = cache_max_dirty;
284 if (cache_writethrough_until_flush)
285 init_max_dirty = 0;
286 ldout(cct, 20) << "Initial cache settings:"
287 << " size=" << cache_size
288 << " num_objects=" << 10
289 << " max_dirty=" << init_max_dirty
290 << " target_dirty=" << cache_target_dirty
291 << " max_dirty_age="
292 << cache_max_dirty_age << dendl;
293
294 object_cacher = new ObjectCacher(cct, pname, *writeback_handler, cache_lock,
295 NULL, NULL,
296 cache_size,
297 10, /* reset this in init */
298 init_max_dirty,
299 cache_target_dirty,
300 cache_max_dirty_age,
301 cache_block_writes_upfront);
302
303 // size object cache appropriately
304 uint64_t obj = cache_max_dirty_object;
305 if (!obj) {
306 obj = MIN(2000, MAX(10, cache_size / 100 / sizeof(ObjectCacher::Object)));
307 }
308 ldout(cct, 10) << " cache bytes " << cache_size
309 << " -> about " << obj << " objects" << dendl;
310 object_cacher->set_max_objects(obj);
311
312 object_set = new ObjectCacher::ObjectSet(NULL, data_ctx.get_id(), 0);
313 object_set->return_enoent = true;
314 object_cacher->start();
315 }
316
317 readahead.set_trigger_requests(readahead_trigger_requests);
318 readahead.set_max_readahead_size(readahead_max_bytes);
319 }
320
321 void ImageCtx::shutdown() {
322 delete image_watcher;
323 image_watcher = nullptr;
324
325 delete asok_hook;
326 asok_hook = nullptr;
327 }
328
329 void ImageCtx::init_layout()
330 {
331 if (stripe_unit == 0 || stripe_count == 0) {
332 stripe_unit = 1ull << order;
333 stripe_count = 1;
334 }
335
336 vector<uint64_t> alignments;
337 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
338 alignments.push_back(stripe_unit * stripe_count); // stripe
339 alignments.push_back(stripe_unit); // stripe unit
340 readahead.set_alignments(alignments);
341
342 layout = file_layout_t();
343 layout.stripe_unit = stripe_unit;
344 layout.stripe_count = stripe_count;
345 layout.object_size = 1ull << order;
346 layout.pool_id = data_ctx.get_id(); // FIXME: pool id overflow?
347
348 delete[] format_string;
349 size_t len = object_prefix.length() + 16;
350 format_string = new char[len];
351 if (old_format) {
352 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
353 } else {
354 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
355 }
356
357 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
358 << " stripe_count " << stripe_count
359 << " object_size " << layout.object_size
360 << " prefix " << object_prefix
361 << " format " << format_string
362 << dendl;
363 }
364
365 void ImageCtx::perf_start(string name) {
b32b8144
FG
366 auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY;
367 if (child == nullptr) {
368 // ensure top-level IO stats are exported for librbd daemons
369 perf_prio = PerfCountersBuilder::PRIO_USEFUL;
370 }
371
7c673cae
FG
372 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
373
b32b8144
FG
374 plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
375 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
376 "rb", perf_prio);
377 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
378 "rl", perf_prio);
379 plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
380 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
381 "wb", perf_prio);
382 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
383 "wl", perf_prio);
7c673cae
FG
384 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
385 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data");
386 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
387 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
388 plb.add_u64_counter(l_librbd_aio_flush, "aio_flush", "Async flushes");
389 plb.add_time_avg(l_librbd_aio_flush_latency, "aio_flush_latency", "Latency of async flushes");
390 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
391 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data");
392 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
c07f9fc5
FG
393 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
394 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps");
395 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
7c673cae
FG
396 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
397 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
398 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
399 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
400 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
401 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
402 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
403 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead");
404 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
405
b32b8144
FG
406 plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
407 "ots", perf_prio);
408 plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time",
409 "Lock acquired time", "lats", perf_prio);
410
7c673cae
FG
411 perfcounter = plb.create_perf_counters();
412 cct->get_perfcounters_collection()->add(perfcounter);
b32b8144
FG
413
414 perfcounter->tset(l_librbd_opened_time, ceph_clock_now());
7c673cae
FG
415 }
416
417 void ImageCtx::perf_stop() {
418 assert(perfcounter);
419 cct->get_perfcounters_collection()->remove(perfcounter);
420 delete perfcounter;
421 }
422
423 void ImageCtx::set_read_flag(unsigned flag) {
424 extra_read_flags |= flag;
425 }
426
427 int ImageCtx::get_read_flags(snap_t snap_id) {
428 int flags = librados::OPERATION_NOFLAG | extra_read_flags;
429 if (snap_id == LIBRADOS_SNAP_HEAD)
430 return flags;
431
432 if (balance_snap_reads)
433 flags |= librados::OPERATION_BALANCE_READS;
434 else if (localize_snap_reads)
435 flags |= librados::OPERATION_LOCALIZE_READS;
436 return flags;
437 }
438
439 int ImageCtx::snap_set(cls::rbd::SnapshotNamespace in_snap_namespace,
440 string in_snap_name)
441 {
442 assert(snap_lock.is_wlocked());
443 snap_t in_snap_id = get_snap_id(in_snap_namespace, in_snap_name);
444 if (in_snap_id != CEPH_NOSNAP) {
445 snap_id = in_snap_id;
446 snap_namespace = in_snap_namespace;
447 snap_name = in_snap_name;
448 snap_exists = true;
449 data_ctx.snap_set_read(snap_id);
450 return 0;
451 }
452 return -ENOENT;
453 }
454
455 void ImageCtx::snap_unset()
456 {
457 assert(snap_lock.is_wlocked());
458 snap_id = CEPH_NOSNAP;
459 snap_namespace = {};
460 snap_name = "";
461 snap_exists = true;
462 data_ctx.snap_set_read(snap_id);
463 }
464
465 snap_t ImageCtx::get_snap_id(cls::rbd::SnapshotNamespace in_snap_namespace,
466 string in_snap_name) const
467 {
468 assert(snap_lock.is_locked());
469 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
470 if (it != snap_ids.end())
471 return it->second;
472 return CEPH_NOSNAP;
473 }
474
475 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
476 {
477 assert(snap_lock.is_locked());
478 map<snap_t, SnapInfo>::const_iterator it =
479 snap_info.find(in_snap_id);
480 if (it != snap_info.end())
481 return &it->second;
482 return NULL;
483 }
484
485 int ImageCtx::get_snap_name(snap_t in_snap_id,
486 string *out_snap_name) const
487 {
488 assert(snap_lock.is_locked());
489 const SnapInfo *info = get_snap_info(in_snap_id);
490 if (info) {
491 *out_snap_name = info->name;
492 return 0;
493 }
494 return -ENOENT;
495 }
496
497 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
498 cls::rbd::SnapshotNamespace *out_snap_namespace) const
499 {
500 assert(snap_lock.is_locked());
501 const SnapInfo *info = get_snap_info(in_snap_id);
502 if (info) {
503 *out_snap_namespace = info->snap_namespace;
504 return 0;
505 }
506 return -ENOENT;
507 }
508
509 int ImageCtx::get_parent_spec(snap_t in_snap_id,
510 ParentSpec *out_pspec) const
511 {
512 const SnapInfo *info = get_snap_info(in_snap_id);
513 if (info) {
514 *out_pspec = info->parent.spec;
515 return 0;
516 }
517 return -ENOENT;
518 }
519
520 uint64_t ImageCtx::get_current_size() const
521 {
522 assert(snap_lock.is_locked());
523 return size;
524 }
525
526 uint64_t ImageCtx::get_object_size() const
527 {
528 return 1ull << order;
529 }
530
531 string ImageCtx::get_object_name(uint64_t num) const {
532 char buf[object_prefix.length() + 32];
533 snprintf(buf, sizeof(buf), format_string, num);
534 return string(buf);
535 }
536
537 uint64_t ImageCtx::get_stripe_unit() const
538 {
539 return stripe_unit;
540 }
541
542 uint64_t ImageCtx::get_stripe_count() const
543 {
544 return stripe_count;
545 }
546
547 uint64_t ImageCtx::get_stripe_period() const
548 {
549 return stripe_count * (1ull << order);
550 }
551
31f18b77
FG
552 utime_t ImageCtx::get_create_timestamp() const
553 {
554 return create_timestamp;
555 }
556
7c673cae
FG
557 int ImageCtx::is_snap_protected(snap_t in_snap_id,
558 bool *is_protected) const
559 {
560 assert(snap_lock.is_locked());
561 const SnapInfo *info = get_snap_info(in_snap_id);
562 if (info) {
563 *is_protected =
564 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
565 return 0;
566 }
567 return -ENOENT;
568 }
569
570 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
571 bool *is_unprotected) const
572 {
573 assert(snap_lock.is_locked());
574 const SnapInfo *info = get_snap_info(in_snap_id);
575 if (info) {
576 *is_unprotected =
577 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
578 return 0;
579 }
580 return -ENOENT;
581 }
582
583 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
584 string in_snap_name,
585 snap_t id, uint64_t in_size,
586 const ParentInfo &parent, uint8_t protection_status,
587 uint64_t flags, utime_t timestamp)
588 {
589 assert(snap_lock.is_wlocked());
590 snaps.push_back(id);
591 SnapInfo info(in_snap_name, in_snap_namespace,
592 in_size, parent, protection_status, flags, timestamp);
593 snap_info.insert({id, info});
594 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
595 }
596
597 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
598 string in_snap_name,
599 snap_t id)
600 {
601 assert(snap_lock.is_wlocked());
602 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
603 snap_info.erase(id);
604 snap_ids.erase({in_snap_namespace, in_snap_name});
605 }
606
607 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
608 {
609 assert(snap_lock.is_locked());
610 if (in_snap_id == CEPH_NOSNAP) {
611 if (!resize_reqs.empty() &&
612 resize_reqs.front()->shrinking()) {
613 return resize_reqs.front()->get_image_size();
614 }
615 return size;
616 }
617
618 const SnapInfo *info = get_snap_info(in_snap_id);
619 if (info) {
620 return info->size;
621 }
622 return 0;
623 }
624
625 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
626 assert(snap_lock.is_locked());
627 uint64_t image_size = get_image_size(in_snap_id);
628 return Striper::get_num_objects(layout, image_size);
629 }
630
631 bool ImageCtx::test_features(uint64_t features) const
632 {
633 RWLock::RLocker l(snap_lock);
634 return test_features(features, snap_lock);
635 }
636
637 bool ImageCtx::test_features(uint64_t in_features,
638 const RWLock &in_snap_lock) const
639 {
640 assert(snap_lock.is_locked());
641 return ((features & in_features) == in_features);
642 }
643
644 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
645 {
646 assert(snap_lock.is_locked());
647 if (_snap_id == CEPH_NOSNAP) {
648 *_flags = flags;
649 return 0;
650 }
651 const SnapInfo *info = get_snap_info(_snap_id);
652 if (info) {
653 *_flags = info->flags;
654 return 0;
655 }
656 return -ENOENT;
657 }
658
31f18b77 659 int ImageCtx::test_flags(uint64_t flags, bool *flags_set) const
7c673cae
FG
660 {
661 RWLock::RLocker l(snap_lock);
31f18b77 662 return test_flags(flags, snap_lock, flags_set);
7c673cae
FG
663 }
664
31f18b77
FG
665 int ImageCtx::test_flags(uint64_t flags, const RWLock &in_snap_lock,
666 bool *flags_set) const
7c673cae
FG
667 {
668 assert(snap_lock.is_locked());
669 uint64_t snap_flags;
31f18b77
FG
670 int r = get_flags(snap_id, &snap_flags);
671 if (r < 0) {
672 return r;
673 }
674 *flags_set = ((snap_flags & flags) == flags);
675 return 0;
7c673cae
FG
676 }
677
678 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
679 {
680 assert(snap_lock.is_wlocked());
681 uint64_t *_flags;
682 if (in_snap_id == CEPH_NOSNAP) {
683 _flags = &flags;
684 } else {
685 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
686 if (it == snap_info.end()) {
687 return -ENOENT;
688 }
689 _flags = &it->second.flags;
690 }
691
692 if (enabled) {
693 (*_flags) |= flag;
694 } else {
695 (*_flags) &= ~flag;
696 }
697 return 0;
698 }
699
700 const ParentInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
701 {
702 assert(snap_lock.is_locked());
703 assert(parent_lock.is_locked());
704 if (in_snap_id == CEPH_NOSNAP)
705 return &parent_md;
706 const SnapInfo *info = get_snap_info(in_snap_id);
707 if (info)
708 return &info->parent;
709 return NULL;
710 }
711
712 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
713 {
714 const ParentInfo *info = get_parent_info(in_snap_id);
715 if (info)
716 return info->spec.pool_id;
717 return -1;
718 }
719
720 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
721 {
722 const ParentInfo *info = get_parent_info(in_snap_id);
723 if (info)
724 return info->spec.image_id;
725 return "";
726 }
727
728 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
729 {
730 const ParentInfo *info = get_parent_info(in_snap_id);
731 if (info)
732 return info->spec.snap_id;
733 return CEPH_NOSNAP;
734 }
735
736 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
737 {
738 assert(snap_lock.is_locked());
739 const ParentInfo *info = get_parent_info(in_snap_id);
740 if (info) {
741 *overlap = info->overlap;
742 return 0;
743 }
744 return -ENOENT;
745 }
746
747 void ImageCtx::aio_read_from_cache(object_t o, uint64_t object_no,
748 bufferlist *bl, size_t len,
749 uint64_t off, Context *onfinish,
31f18b77 750 int fadvise_flags, ZTracer::Trace *trace) {
7c673cae
FG
751 snap_lock.get_read();
752 ObjectCacher::OSDRead *rd = object_cacher->prepare_read(snap_id, bl, fadvise_flags);
753 snap_lock.put_read();
754 ObjectExtent extent(o, object_no, off, len, 0);
755 extent.oloc.pool = data_ctx.get_id();
756 extent.buffer_extents.push_back(make_pair(0, len));
757 rd->extents.push_back(extent);
758 cache_lock.Lock();
31f18b77 759 int r = object_cacher->readx(rd, object_set, onfinish, trace);
7c673cae
FG
760 cache_lock.Unlock();
761 if (r != 0)
762 onfinish->complete(r);
763 }
764
765 void ImageCtx::write_to_cache(object_t o, const bufferlist& bl, size_t len,
766 uint64_t off, Context *onfinish,
31f18b77
FG
767 int fadvise_flags, uint64_t journal_tid,
768 ZTracer::Trace *trace) {
7c673cae
FG
769 snap_lock.get_read();
770 ObjectCacher::OSDWrite *wr = object_cacher->prepare_write(
771 snapc, bl, ceph::real_time::min(), fadvise_flags, journal_tid);
772 snap_lock.put_read();
773 ObjectExtent extent(o, 0, off, len, 0);
774 extent.oloc.pool = data_ctx.get_id();
775 // XXX: nspace is always default, io_ctx_impl field private
776 //extent.oloc.nspace = data_ctx.io_ctx_impl->oloc.nspace;
777 extent.buffer_extents.push_back(make_pair(0, len));
778 wr->extents.push_back(extent);
779 {
780 Mutex::Locker l(cache_lock);
31f18b77 781 object_cacher->writex(wr, object_set, onfinish, trace);
7c673cae
FG
782 }
783 }
784
785 void ImageCtx::user_flushed() {
786 if (object_cacher && cache_writethrough_until_flush) {
787 md_lock.get_read();
788 bool flushed_before = flush_encountered;
789 md_lock.put_read();
790
791 uint64_t max_dirty = cache_max_dirty;
792 if (!flushed_before && max_dirty > 0) {
793 md_lock.get_write();
794 flush_encountered = true;
795 md_lock.put_write();
796
797 ldout(cct, 10) << "saw first user flush, enabling writeback" << dendl;
798 Mutex::Locker l(cache_lock);
799 object_cacher->set_max_dirty(max_dirty);
800 }
801 }
802 }
803
804 void ImageCtx::flush_cache(Context *onfinish) {
805 cache_lock.Lock();
806 object_cacher->flush_set(object_set, onfinish);
807 cache_lock.Unlock();
808 }
809
810 void ImageCtx::shut_down_cache(Context *on_finish) {
811 if (object_cacher == NULL) {
812 on_finish->complete(0);
813 return;
814 }
815
816 cache_lock.Lock();
817 object_cacher->release_set(object_set);
818 cache_lock.Unlock();
819
820 C_ShutDownCache *shut_down = new C_ShutDownCache(this, on_finish);
821 flush_cache(new C_InvalidateCache(this, true, false, shut_down));
822 }
823
824 int ImageCtx::invalidate_cache(bool purge_on_error) {
825 flush_async_operations();
826 if (object_cacher == NULL) {
827 return 0;
828 }
829
830 cache_lock.Lock();
831 object_cacher->release_set(object_set);
832 cache_lock.Unlock();
833
834 C_SaferCond ctx;
835 flush_cache(new C_InvalidateCache(this, purge_on_error, true, &ctx));
836
837 int result = ctx.wait();
838 return result;
839 }
840
841 void ImageCtx::invalidate_cache(bool purge_on_error, Context *on_finish) {
842 if (object_cacher == NULL) {
843 op_work_queue->queue(on_finish, 0);
844 return;
845 }
846
847 cache_lock.Lock();
848 object_cacher->release_set(object_set);
849 cache_lock.Unlock();
850
851 flush_cache(new C_InvalidateCache(this, purge_on_error, false, on_finish));
852 }
853
854 void ImageCtx::clear_nonexistence_cache() {
855 assert(cache_lock.is_locked());
856 if (!object_cacher)
857 return;
858 object_cacher->clear_nonexistence(object_set);
859 }
860
861 bool ImageCtx::is_cache_empty() {
862 Mutex::Locker locker(cache_lock);
863 return object_cacher->set_is_empty(object_set);
864 }
865
866 void ImageCtx::register_watch(Context *on_finish) {
867 assert(image_watcher == NULL);
868 image_watcher = new ImageWatcher<>(*this);
869 image_watcher->register_watch(on_finish);
870 }
871
872 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
873 uint64_t overlap)
874 {
875 // drop extents completely beyond the overlap
876 while (!objectx.empty() && objectx.back().first >= overlap)
877 objectx.pop_back();
878
879 // trim final overlapping extent
880 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
881 objectx.back().second = overlap - objectx.back().first;
882
883 uint64_t len = 0;
884 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
885 p != objectx.end();
886 ++p)
887 len += p->second;
888 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
889 << ", object overlap " << len
890 << " from image extents " << objectx << dendl;
891 return len;
892 }
893
894 void ImageCtx::flush_async_operations() {
895 C_SaferCond ctx;
896 flush_async_operations(&ctx);
897 ctx.wait();
898 }
899
900 void ImageCtx::flush_async_operations(Context *on_finish) {
901 {
902 Mutex::Locker l(async_ops_lock);
903 if (!async_ops.empty()) {
904 ldout(cct, 20) << "flush async operations: " << on_finish << " "
905 << "count=" << async_ops.size() << dendl;
906 async_ops.front()->add_flush_context(on_finish);
907 return;
908 }
909 }
910 on_finish->complete(0);
911 }
912
913 int ImageCtx::flush() {
914 C_SaferCond cond_ctx;
915 flush(&cond_ctx);
916 return cond_ctx.wait();
917 }
918
919 void ImageCtx::flush(Context *on_safe) {
920 // ensure no locks are held when flush is complete
921 on_safe = util::create_async_context_callback(*this, on_safe);
922
923 if (object_cacher != NULL) {
924 // flush cache after completing all in-flight AIO ops
925 on_safe = new C_FlushCache(this, on_safe);
926 }
927 flush_async_operations(on_safe);
928 }
929
930 void ImageCtx::cancel_async_requests() {
931 C_SaferCond ctx;
932 cancel_async_requests(&ctx);
933 ctx.wait();
934 }
935
936 void ImageCtx::cancel_async_requests(Context *on_finish) {
937 {
938 Mutex::Locker async_ops_locker(async_ops_lock);
939 if (!async_requests.empty()) {
940 ldout(cct, 10) << "canceling async requests: count="
941 << async_requests.size() << dendl;
942 for (auto req : async_requests) {
943 ldout(cct, 10) << "canceling async request: " << req << dendl;
944 req->cancel();
945 }
946 async_requests_waiters.push_back(on_finish);
947 return;
948 }
949 }
950
951 on_finish->complete(0);
952 }
953
954 void ImageCtx::clear_pending_completions() {
955 Mutex::Locker l(completed_reqs_lock);
956 ldout(cct, 10) << "clear pending AioCompletion: count="
957 << completed_reqs.size() << dendl;
958 completed_reqs.clear();
959 }
960
961 bool ImageCtx::_filter_metadata_confs(const string &prefix,
962 map<string, bool> &configs,
963 const map<string, bufferlist> &pairs,
964 map<string, bufferlist> *res) {
965 size_t conf_prefix_len = prefix.size();
966
967 for (auto it : pairs) {
968 if (it.first.compare(0, MIN(conf_prefix_len, it.first.size()), prefix) > 0)
969 return false;
970
971 if (it.first.size() <= conf_prefix_len)
972 continue;
973
974 string key = it.first.substr(conf_prefix_len, it.first.size() - conf_prefix_len);
975 auto cit = configs.find(key);
976 if (cit != configs.end()) {
977 cit->second = true;
978 res->insert(make_pair(key, it.second));
979 }
980 }
981 return true;
982 }
983
b32b8144
FG
984 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
985 bool thread_safe) {
7c673cae
FG
986 ldout(cct, 20) << __func__ << dendl;
987 std::map<string, bool> configs = boost::assign::map_list_of(
988 "rbd_non_blocking_aio", false)(
989 "rbd_cache", false)(
990 "rbd_cache_writethrough_until_flush", false)(
991 "rbd_cache_size", false)(
992 "rbd_cache_max_dirty", false)(
993 "rbd_cache_target_dirty", false)(
994 "rbd_cache_max_dirty_age", false)(
995 "rbd_cache_max_dirty_object", false)(
996 "rbd_cache_block_writes_upfront", false)(
997 "rbd_concurrent_management_ops", false)(
998 "rbd_balance_snap_reads", false)(
999 "rbd_localize_snap_reads", false)(
1000 "rbd_balance_parent_reads", false)(
1001 "rbd_localize_parent_reads", false)(
b32b8144 1002 "rbd_sparse_read_threshold_bytes", false)(
7c673cae
FG
1003 "rbd_readahead_trigger_requests", false)(
1004 "rbd_readahead_max_bytes", false)(
1005 "rbd_readahead_disable_after_bytes", false)(
1006 "rbd_clone_copy_on_read", false)(
1007 "rbd_blacklist_on_break_lock", false)(
1008 "rbd_blacklist_expire_seconds", false)(
1009 "rbd_request_timed_out_seconds", false)(
1010 "rbd_journal_order", false)(
1011 "rbd_journal_splay_width", false)(
1012 "rbd_journal_commit_age", false)(
1013 "rbd_journal_object_flush_interval", false)(
1014 "rbd_journal_object_flush_bytes", false)(
1015 "rbd_journal_object_flush_age", false)(
1016 "rbd_journal_pool", false)(
1017 "rbd_journal_max_payload_bytes", false)(
1018 "rbd_journal_max_concurrent_object_sets", false)(
1019 "rbd_mirroring_resync_after_disconnect", false)(
1020 "rbd_mirroring_replay_delay", false)(
1021 "rbd_skip_partial_discard", false);
1022
1023 md_config_t local_config_t;
1024 std::map<std::string, bufferlist> res;
1025
1026 _filter_metadata_confs(METADATA_CONF_PREFIX, configs, meta, &res);
1027 for (auto it : res) {
1028 std::string val(it.second.c_str(), it.second.length());
1029 int j = local_config_t.set_val(it.first.c_str(), val);
1030 if (j < 0) {
1031 lderr(cct) << __func__ << " failed to set config " << it.first
1032 << " with value " << it.second.c_str() << ": " << j
1033 << dendl;
1034 }
1035 }
1036
181888fb 1037#define ASSIGN_OPTION(config, type) \
7c673cae
FG
1038 do { \
1039 string key = "rbd_"; \
1040 key = key + #config; \
1041 if (configs[key]) \
181888fb 1042 config = local_config_t.get_val<type>("rbd_"#config); \
7c673cae 1043 else \
181888fb 1044 config = cct->_conf->get_val<type>("rbd_"#config); \
7c673cae
FG
1045 } while (0);
1046
181888fb
FG
1047 ASSIGN_OPTION(non_blocking_aio, bool);
1048 ASSIGN_OPTION(cache, bool);
1049 ASSIGN_OPTION(cache_writethrough_until_flush, bool);
1050 ASSIGN_OPTION(cache_size, int64_t);
1051 ASSIGN_OPTION(cache_max_dirty, int64_t);
1052 ASSIGN_OPTION(cache_target_dirty, int64_t);
1053 ASSIGN_OPTION(cache_max_dirty_age, double);
1054 ASSIGN_OPTION(cache_max_dirty_object, int64_t);
1055 ASSIGN_OPTION(cache_block_writes_upfront, bool);
1056 ASSIGN_OPTION(concurrent_management_ops, int64_t);
1057 ASSIGN_OPTION(balance_snap_reads, bool);
1058 ASSIGN_OPTION(localize_snap_reads, bool);
1059 ASSIGN_OPTION(balance_parent_reads, bool);
1060 ASSIGN_OPTION(localize_parent_reads, bool);
b32b8144 1061 ASSIGN_OPTION(sparse_read_threshold_bytes, uint64_t);
181888fb
FG
1062 ASSIGN_OPTION(readahead_trigger_requests, int64_t);
1063 ASSIGN_OPTION(readahead_max_bytes, int64_t);
1064 ASSIGN_OPTION(readahead_disable_after_bytes, int64_t);
1065 ASSIGN_OPTION(clone_copy_on_read, bool);
1066 ASSIGN_OPTION(blacklist_on_break_lock, bool);
1067 ASSIGN_OPTION(blacklist_expire_seconds, int64_t);
1068 ASSIGN_OPTION(request_timed_out_seconds, int64_t);
1069 ASSIGN_OPTION(enable_alloc_hint, bool);
1070 ASSIGN_OPTION(journal_order, uint64_t);
1071 ASSIGN_OPTION(journal_splay_width, uint64_t);
1072 ASSIGN_OPTION(journal_commit_age, double);
1073 ASSIGN_OPTION(journal_object_flush_interval, int64_t);
1074 ASSIGN_OPTION(journal_object_flush_bytes, int64_t);
1075 ASSIGN_OPTION(journal_object_flush_age, double);
181888fb
FG
1076 ASSIGN_OPTION(journal_max_payload_bytes, uint64_t);
1077 ASSIGN_OPTION(journal_max_concurrent_object_sets, int64_t);
1078 ASSIGN_OPTION(mirroring_resync_after_disconnect, bool);
1079 ASSIGN_OPTION(mirroring_replay_delay, int64_t);
1080 ASSIGN_OPTION(skip_partial_discard, bool);
1081 ASSIGN_OPTION(blkin_trace_all, bool);
b32b8144
FG
1082
1083 if (thread_safe) {
1084 ASSIGN_OPTION(journal_pool, std::string);
1085 }
1086
1087 if (sparse_read_threshold_bytes == 0) {
1088 sparse_read_threshold_bytes = get_object_size();
1089 }
7c673cae
FG
1090 }
1091
1092 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
1093 return new ExclusiveLock<ImageCtx>(*this);
1094 }
1095
1096 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
1097 return new ObjectMap<ImageCtx>(*this, snap_id);
1098 }
1099
1100 Journal<ImageCtx> *ImageCtx::create_journal() {
1101 return new Journal<ImageCtx>(*this);
1102 }
1103
1104 void ImageCtx::set_image_name(const std::string &image_name) {
1105 // update the name so rename can be invoked repeatedly
1106 RWLock::RLocker owner_locker(owner_lock);
1107 RWLock::WLocker snap_locker(snap_lock);
1108 name = image_name;
1109 if (old_format) {
1110 header_oid = util::old_header_name(image_name);
1111 }
1112 }
1113
1114 void ImageCtx::notify_update() {
1115 state->handle_update_notification();
1116 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
1117 }
1118
1119 void ImageCtx::notify_update(Context *on_finish) {
1120 state->handle_update_notification();
1121 image_watcher->notify_header_update(on_finish);
1122 }
1123
1124 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
1125 assert(owner_lock.is_locked());
1126 assert(exclusive_lock_policy != nullptr);
1127 return exclusive_lock_policy;
1128 }
1129
1130 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
1131 assert(owner_lock.is_wlocked());
1132 assert(policy != nullptr);
1133 delete exclusive_lock_policy;
1134 exclusive_lock_policy = policy;
1135 }
1136
1137 journal::Policy *ImageCtx::get_journal_policy() const {
1138 assert(snap_lock.is_locked());
1139 assert(journal_policy != nullptr);
1140 return journal_policy;
1141 }
1142
1143 void ImageCtx::set_journal_policy(journal::Policy *policy) {
1144 assert(snap_lock.is_wlocked());
1145 assert(policy != nullptr);
1146 delete journal_policy;
1147 journal_policy = policy;
1148 }
1149
28e407b8
AA
1150 bool ImageCtx::is_writeback_cache_enabled() const {
1151 return (cache && cache_max_dirty > 0);
1152 }
1153
7c673cae
FG
1154 void ImageCtx::get_thread_pool_instance(CephContext *cct,
1155 ThreadPool **thread_pool,
1156 ContextWQ **op_work_queue) {
1157 ThreadPoolSingleton *thread_pool_singleton;
1158 cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
1159 thread_pool_singleton, "librbd::thread_pool");
1160 *thread_pool = thread_pool_singleton;
1161 *op_work_queue = thread_pool_singleton->op_work_queue;
1162 }
1163
1164 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
1165 Mutex **timer_lock) {
1166 SafeTimerSingleton *safe_timer_singleton;
1167 cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
1168 safe_timer_singleton, "librbd::journal::safe_timer");
1169 *timer = safe_timer_singleton;
1170 *timer_lock = &safe_timer_singleton->lock;
1171 }
1172}