]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/ImageCtx.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / librbd / ImageCtx.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include <errno.h>
4 #include <boost/assign/list_of.hpp>
5 #include <stddef.h>
6
7 #include "include/neorados/RADOS.hpp"
8
9 #include "common/ceph_context.h"
10 #include "common/dout.h"
11 #include "common/errno.h"
12 #include "common/perf_counters.h"
13 #include "common/Timer.h"
14
15 #include "librbd/AsioEngine.h"
16 #include "librbd/AsyncRequest.h"
17 #include "librbd/ExclusiveLock.h"
18 #include "librbd/internal.h"
19 #include "librbd/ImageCtx.h"
20 #include "librbd/ImageState.h"
21 #include "librbd/ImageWatcher.h"
22 #include "librbd/Journal.h"
23 #include "librbd/LibrbdAdminSocketHook.h"
24 #include "librbd/ObjectMap.h"
25 #include "librbd/Operations.h"
26 #include "librbd/PluginRegistry.h"
27 #include "librbd/Types.h"
28 #include "librbd/Utils.h"
29 #include "librbd/asio/ContextWQ.h"
30 #include "librbd/exclusive_lock/AutomaticPolicy.h"
31 #include "librbd/exclusive_lock/StandardPolicy.h"
32 #include "librbd/crypto/EncryptionFormat.h"
33 #include "librbd/io/AioCompletion.h"
34 #include "librbd/io/AsyncOperation.h"
35 #include "librbd/io/ImageDispatcher.h"
36 #include "librbd/io/ObjectDispatcher.h"
37 #include "librbd/io/QosImageDispatch.h"
38 #include "librbd/io/IoOperations.h"
39 #include "librbd/io/Utils.h"
40 #include "librbd/journal/StandardPolicy.h"
41 #include "librbd/operation/ResizeRequest.h"
42
43 #include "osdc/Striper.h"
44 #include <boost/algorithm/string/predicate.hpp>
45
46 #define dout_subsys ceph_subsys_rbd
47 #undef dout_prefix
48 #define dout_prefix *_dout << "librbd::ImageCtx: "
49
50 using std::map;
51 using std::pair;
52 using std::set;
53 using std::string;
54 using std::vector;
55
56 using ceph::bufferlist;
57 using librados::snap_t;
58 using librados::IoCtx;
59
60 namespace librbd {
61
62 namespace {
63
64 class SafeTimerSingleton : public CommonSafeTimer<ceph::mutex> {
65 public:
66 ceph::mutex lock = ceph::make_mutex("librbd::SafeTimerSingleton::lock");
67
68 explicit SafeTimerSingleton(CephContext *cct)
69 : SafeTimer(cct, lock, true) {
70 init();
71 }
72 ~SafeTimerSingleton() {
73 std::lock_guard locker{lock};
74 shutdown();
75 }
76 };
77
78 librados::IoCtx duplicate_io_ctx(librados::IoCtx& io_ctx) {
79 librados::IoCtx dup_io_ctx;
80 dup_io_ctx.dup(io_ctx);
81 return dup_io_ctx;
82 }
83
84 } // anonymous namespace
85
86 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
87
88 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
89 const char *snap, IoCtx& p, bool ro)
90 : cct((CephContext*)p.cct()),
91 config(cct->_conf),
92 perfcounter(NULL),
93 snap_id(CEPH_NOSNAP),
94 snap_exists(true),
95 read_only(ro),
96 read_only_flags(ro ? IMAGE_READ_ONLY_FLAG_USER : 0U),
97 exclusive_locked(false),
98 name(image_name),
99 asio_engine(std::make_shared<AsioEngine>(p)),
100 rados_api(asio_engine->get_rados_api()),
101 data_ctx(duplicate_io_ctx(p)),
102 md_ctx(duplicate_io_ctx(p)),
103 image_watcher(NULL),
104 journal(NULL),
105 owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))),
106 image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))),
107 timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))),
108 async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))),
109 copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))),
110 extra_read_flags(0),
111 old_format(false),
112 order(0), size(0), features(0),
113 format_string(NULL),
114 id(image_id), parent(NULL),
115 stripe_unit(0), stripe_count(0), flags(0),
116 readahead(),
117 total_bytes_read(0),
118 state(new ImageState<>(this)),
119 operations(new Operations<>(*this)),
120 exclusive_lock(nullptr), object_map(nullptr),
121 op_work_queue(asio_engine->get_work_queue()),
122 plugin_registry(new PluginRegistry<ImageCtx>(this)),
123 event_socket_completions(32),
124 asok_hook(nullptr),
125 trace_endpoint("librbd")
126 {
127 ldout(cct, 10) << this << " " << __func__ << ": "
128 << "image_name=" << image_name << ", "
129 << "image_id=" << image_id << dendl;
130
131 if (snap)
132 snap_name = snap;
133
134 rebuild_data_io_context();
135
136 // FIPS zeroization audit 20191117: this memset is not security related.
137 memset(&header, 0, sizeof(header));
138
139 io_image_dispatcher = new io::ImageDispatcher<ImageCtx>(this);
140 io_object_dispatcher = new io::ObjectDispatcher<ImageCtx>(this);
141
142 if (cct->_conf.get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
143 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
144 } else {
145 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
146 }
147 journal_policy = new journal::StandardPolicy(this);
148 }
149
150 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
151 uint64_t snap_id, IoCtx& p, bool ro)
152 : ImageCtx(image_name, image_id, "", p, ro) {
153 open_snap_id = snap_id;
154 }
155
156 ImageCtx::~ImageCtx() {
157 ldout(cct, 10) << this << " " << __func__ << dendl;
158
159 ceph_assert(config_watcher == nullptr);
160 ceph_assert(image_watcher == NULL);
161 ceph_assert(exclusive_lock == NULL);
162 ceph_assert(object_map == NULL);
163 ceph_assert(journal == NULL);
164 ceph_assert(asok_hook == NULL);
165
166 if (perfcounter) {
167 perf_stop();
168 }
169 delete[] format_string;
170
171 md_ctx.aio_flush();
172 if (data_ctx.is_valid()) {
173 data_ctx.aio_flush();
174 }
175
176 delete io_object_dispatcher;
177 delete io_image_dispatcher;
178
179 delete journal_policy;
180 delete exclusive_lock_policy;
181 delete operations;
182 delete state;
183
184 delete plugin_registry;
185 }
186
187 void ImageCtx::init() {
188 ceph_assert(!header_oid.empty());
189 ceph_assert(old_format || !id.empty());
190
191 asok_hook = new LibrbdAdminSocketHook(this);
192
193 string pname = string("librbd-") + id + string("-") +
194 md_ctx.get_pool_name() + string("-") + name;
195 if (!snap_name.empty()) {
196 pname += "-";
197 pname += snap_name;
198 }
199
200 trace_endpoint.copy_name(pname);
201 perf_start(pname);
202
203 ceph_assert(image_watcher == NULL);
204 image_watcher = new ImageWatcher<>(*this);
205 }
206
207 void ImageCtx::shutdown() {
208 delete image_watcher;
209 image_watcher = nullptr;
210
211 delete asok_hook;
212 asok_hook = nullptr;
213 }
214
215 void ImageCtx::init_layout(int64_t pool_id)
216 {
217 if (stripe_unit == 0 || stripe_count == 0) {
218 stripe_unit = 1ull << order;
219 stripe_count = 1;
220 }
221
222 vector<uint64_t> alignments;
223 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
224 alignments.push_back(stripe_unit * stripe_count); // stripe
225 alignments.push_back(stripe_unit); // stripe unit
226 readahead.set_alignments(alignments);
227
228 layout = file_layout_t();
229 layout.stripe_unit = stripe_unit;
230 layout.stripe_count = stripe_count;
231 layout.object_size = 1ull << order;
232 layout.pool_id = pool_id; // FIXME: pool id overflow?
233
234 delete[] format_string;
235 size_t len = object_prefix.length() + 16;
236 format_string = new char[len];
237 if (old_format) {
238 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
239 } else {
240 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
241 }
242
243 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
244 << " stripe_count " << stripe_count
245 << " object_size " << layout.object_size
246 << " prefix " << object_prefix
247 << " format " << format_string
248 << dendl;
249 }
250
251 void ImageCtx::perf_start(string name) {
252 auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY;
253 if (child == nullptr) {
254 // ensure top-level IO stats are exported for librbd daemons
255 perf_prio = PerfCountersBuilder::PRIO_USEFUL;
256 }
257
258 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
259
260 plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
261 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
262 "rb", perf_prio, unit_t(UNIT_BYTES));
263 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
264 "rl", perf_prio);
265 plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
266 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
267 "wb", perf_prio, unit_t(UNIT_BYTES));
268 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
269 "wl", perf_prio);
270 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
271 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(UNIT_BYTES));
272 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
273 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
274 plb.add_time_avg(l_librbd_flush_latency, "flush_latency", "Latency of flushes");
275 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
276 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(UNIT_BYTES));
277 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
278 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
279 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(UNIT_BYTES));
280 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
281 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
282 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
283 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
284 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
285 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
286 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
287 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
288 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(UNIT_BYTES));
289 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
290
291 plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
292 "ots", perf_prio);
293 plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time",
294 "Lock acquired time", "lats", perf_prio);
295
296 perfcounter = plb.create_perf_counters();
297 cct->get_perfcounters_collection()->add(perfcounter);
298
299 perfcounter->tset(l_librbd_opened_time, ceph_clock_now());
300 }
301
302 void ImageCtx::perf_stop() {
303 ceph_assert(perfcounter);
304 cct->get_perfcounters_collection()->remove(perfcounter);
305 delete perfcounter;
306 }
307
308 void ImageCtx::set_read_flag(unsigned flag) {
309 extra_read_flags |= flag;
310 }
311
312 int ImageCtx::get_read_flags(snap_t snap_id) {
313 int flags = librados::OPERATION_NOFLAG | read_flags;
314 if (flags != 0)
315 return flags;
316
317 flags = librados::OPERATION_NOFLAG | extra_read_flags;
318 if (snap_id == LIBRADOS_SNAP_HEAD)
319 return flags;
320
321 if (config.get_val<bool>("rbd_balance_snap_reads"))
322 flags |= librados::OPERATION_BALANCE_READS;
323 else if (config.get_val<bool>("rbd_localize_snap_reads"))
324 flags |= librados::OPERATION_LOCALIZE_READS;
325 return flags;
326 }
327
328 int ImageCtx::snap_set(uint64_t in_snap_id) {
329 ceph_assert(ceph_mutex_is_wlocked(image_lock));
330 auto it = snap_info.find(in_snap_id);
331 if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) {
332 snap_id = in_snap_id;
333 snap_namespace = it->second.snap_namespace;
334 snap_name = it->second.name;
335 snap_exists = true;
336 if (data_ctx.is_valid()) {
337 data_ctx.snap_set_read(snap_id);
338 rebuild_data_io_context();
339 }
340 return 0;
341 }
342 return -ENOENT;
343 }
344
345 void ImageCtx::snap_unset()
346 {
347 ceph_assert(ceph_mutex_is_wlocked(image_lock));
348 snap_id = CEPH_NOSNAP;
349 snap_namespace = {};
350 snap_name = "";
351 snap_exists = true;
352 if (data_ctx.is_valid()) {
353 data_ctx.snap_set_read(snap_id);
354 rebuild_data_io_context();
355 }
356 }
357
358 snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace,
359 const string& in_snap_name) const
360 {
361 ceph_assert(ceph_mutex_is_locked(image_lock));
362 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
363 if (it != snap_ids.end()) {
364 return it->second;
365 }
366 return CEPH_NOSNAP;
367 }
368
369 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
370 {
371 ceph_assert(ceph_mutex_is_locked(image_lock));
372 map<snap_t, SnapInfo>::const_iterator it =
373 snap_info.find(in_snap_id);
374 if (it != snap_info.end())
375 return &it->second;
376 return nullptr;
377 }
378
379 int ImageCtx::get_snap_name(snap_t in_snap_id,
380 string *out_snap_name) const
381 {
382 ceph_assert(ceph_mutex_is_locked(image_lock));
383 const SnapInfo *info = get_snap_info(in_snap_id);
384 if (info) {
385 *out_snap_name = info->name;
386 return 0;
387 }
388 return -ENOENT;
389 }
390
391 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
392 cls::rbd::SnapshotNamespace *out_snap_namespace) const
393 {
394 ceph_assert(ceph_mutex_is_locked(image_lock));
395 const SnapInfo *info = get_snap_info(in_snap_id);
396 if (info) {
397 *out_snap_namespace = info->snap_namespace;
398 return 0;
399 }
400 return -ENOENT;
401 }
402
403 int ImageCtx::get_parent_spec(snap_t in_snap_id,
404 cls::rbd::ParentImageSpec *out_pspec) const
405 {
406 const SnapInfo *info = get_snap_info(in_snap_id);
407 if (info) {
408 *out_pspec = info->parent.spec;
409 return 0;
410 }
411 return -ENOENT;
412 }
413
414 uint64_t ImageCtx::get_current_size() const
415 {
416 ceph_assert(ceph_mutex_is_locked(image_lock));
417 return size;
418 }
419
420 uint64_t ImageCtx::get_object_size() const
421 {
422 return 1ull << order;
423 }
424
425 string ImageCtx::get_object_name(uint64_t num) const {
426 return util::data_object_name(this, num);
427 }
428
429 uint64_t ImageCtx::get_stripe_unit() const
430 {
431 return stripe_unit;
432 }
433
434 uint64_t ImageCtx::get_stripe_count() const
435 {
436 return stripe_count;
437 }
438
439 uint64_t ImageCtx::get_stripe_period() const
440 {
441 return stripe_count * (1ull << order);
442 }
443
444 utime_t ImageCtx::get_create_timestamp() const
445 {
446 return create_timestamp;
447 }
448
449 utime_t ImageCtx::get_access_timestamp() const
450 {
451 return access_timestamp;
452 }
453
454 utime_t ImageCtx::get_modify_timestamp() const
455 {
456 return modify_timestamp;
457 }
458
459 void ImageCtx::set_access_timestamp(utime_t at)
460 {
461 ceph_assert(ceph_mutex_is_wlocked(timestamp_lock));
462 access_timestamp = at;
463 }
464
465 void ImageCtx::set_modify_timestamp(utime_t mt)
466 {
467 ceph_assert(ceph_mutex_is_locked(timestamp_lock));
468 modify_timestamp = mt;
469 }
470
471 int ImageCtx::is_snap_protected(snap_t in_snap_id,
472 bool *is_protected) const
473 {
474 ceph_assert(ceph_mutex_is_locked(image_lock));
475 const SnapInfo *info = get_snap_info(in_snap_id);
476 if (info) {
477 *is_protected =
478 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
479 return 0;
480 }
481 return -ENOENT;
482 }
483
484 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
485 bool *is_unprotected) const
486 {
487 ceph_assert(ceph_mutex_is_locked(image_lock));
488 const SnapInfo *info = get_snap_info(in_snap_id);
489 if (info) {
490 *is_unprotected =
491 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
492 return 0;
493 }
494 return -ENOENT;
495 }
496
497 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
498 string in_snap_name,
499 snap_t id, uint64_t in_size,
500 const ParentImageInfo &parent,
501 uint8_t protection_status, uint64_t flags,
502 utime_t timestamp)
503 {
504 ceph_assert(ceph_mutex_is_wlocked(image_lock));
505 snaps.push_back(id);
506 SnapInfo info(in_snap_name, in_snap_namespace,
507 in_size, parent, protection_status, flags, timestamp);
508 snap_info.insert({id, info});
509 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
510 }
511
512 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
513 string in_snap_name,
514 snap_t id)
515 {
516 ceph_assert(ceph_mutex_is_wlocked(image_lock));
517 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
518 snap_info.erase(id);
519 snap_ids.erase({in_snap_namespace, in_snap_name});
520 }
521
522 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
523 {
524 ceph_assert(ceph_mutex_is_locked(image_lock));
525 if (in_snap_id == CEPH_NOSNAP) {
526 if (!resize_reqs.empty() &&
527 resize_reqs.front()->shrinking()) {
528 return resize_reqs.front()->get_image_size();
529 }
530 return size;
531 }
532
533 const SnapInfo *info = get_snap_info(in_snap_id);
534 if (info) {
535 return info->size;
536 }
537 return 0;
538 }
539
540 uint64_t ImageCtx::get_area_size(io::ImageArea area) const {
541 // image areas are defined only for the "opened at" snap_id
542 // (i.e. where encryption may be loaded)
543 uint64_t raw_size = get_image_size(snap_id);
544 if (raw_size == 0) {
545 return 0;
546 }
547
548 auto size = io::util::raw_to_area_offset(*this, raw_size);
549 ceph_assert(size.first <= raw_size && size.second == io::ImageArea::DATA);
550
551 switch (area) {
552 case io::ImageArea::DATA:
553 return size.first;
554 case io::ImageArea::CRYPTO_HEADER:
555 // CRYPTO_HEADER area ends where DATA area begins
556 return raw_size - size.first;
557 default:
558 ceph_abort();
559 }
560 }
561
562 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
563 ceph_assert(ceph_mutex_is_locked(image_lock));
564 uint64_t image_size = get_image_size(in_snap_id);
565 return Striper::get_num_objects(layout, image_size);
566 }
567
568 bool ImageCtx::test_features(uint64_t features) const
569 {
570 std::shared_lock l{image_lock};
571 return test_features(features, image_lock);
572 }
573
574 bool ImageCtx::test_features(uint64_t in_features,
575 const ceph::shared_mutex &in_image_lock) const
576 {
577 ceph_assert(ceph_mutex_is_locked(image_lock));
578 return ((features & in_features) == in_features);
579 }
580
581 bool ImageCtx::test_op_features(uint64_t in_op_features) const
582 {
583 std::shared_lock l{image_lock};
584 return test_op_features(in_op_features, image_lock);
585 }
586
587 bool ImageCtx::test_op_features(uint64_t in_op_features,
588 const ceph::shared_mutex &in_image_lock) const
589 {
590 ceph_assert(ceph_mutex_is_locked(image_lock));
591 return ((op_features & in_op_features) == in_op_features);
592 }
593
594 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
595 {
596 ceph_assert(ceph_mutex_is_locked(image_lock));
597 if (_snap_id == CEPH_NOSNAP) {
598 *_flags = flags;
599 return 0;
600 }
601 const SnapInfo *info = get_snap_info(_snap_id);
602 if (info) {
603 *_flags = info->flags;
604 return 0;
605 }
606 return -ENOENT;
607 }
608
609 int ImageCtx::test_flags(librados::snap_t in_snap_id,
610 uint64_t flags, bool *flags_set) const
611 {
612 std::shared_lock l{image_lock};
613 return test_flags(in_snap_id, flags, image_lock, flags_set);
614 }
615
616 int ImageCtx::test_flags(librados::snap_t in_snap_id,
617 uint64_t flags,
618 const ceph::shared_mutex &in_image_lock,
619 bool *flags_set) const
620 {
621 ceph_assert(ceph_mutex_is_locked(image_lock));
622 uint64_t snap_flags;
623 int r = get_flags(in_snap_id, &snap_flags);
624 if (r < 0) {
625 return r;
626 }
627 *flags_set = ((snap_flags & flags) == flags);
628 return 0;
629 }
630
631 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
632 {
633 ceph_assert(ceph_mutex_is_wlocked(image_lock));
634 uint64_t *_flags;
635 if (in_snap_id == CEPH_NOSNAP) {
636 _flags = &flags;
637 } else {
638 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
639 if (it == snap_info.end()) {
640 return -ENOENT;
641 }
642 _flags = &it->second.flags;
643 }
644
645 if (enabled) {
646 (*_flags) |= flag;
647 } else {
648 (*_flags) &= ~flag;
649 }
650 return 0;
651 }
652
653 const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
654 {
655 ceph_assert(ceph_mutex_is_locked(image_lock));
656 if (in_snap_id == CEPH_NOSNAP)
657 return &parent_md;
658 const SnapInfo *info = get_snap_info(in_snap_id);
659 if (info)
660 return &info->parent;
661 return NULL;
662 }
663
664 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
665 {
666 const auto info = get_parent_info(in_snap_id);
667 if (info)
668 return info->spec.pool_id;
669 return -1;
670 }
671
672 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
673 {
674 const auto info = get_parent_info(in_snap_id);
675 if (info)
676 return info->spec.image_id;
677 return "";
678 }
679
680 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
681 {
682 const auto info = get_parent_info(in_snap_id);
683 if (info)
684 return info->spec.snap_id;
685 return CEPH_NOSNAP;
686 }
687
688 int ImageCtx::get_parent_overlap(snap_t in_snap_id,
689 uint64_t* raw_overlap) const {
690 const auto info = get_parent_info(in_snap_id);
691 if (info) {
692 *raw_overlap = info->overlap;
693 return 0;
694 }
695 return -ENOENT;
696 }
697
698 std::pair<uint64_t, io::ImageArea> ImageCtx::reduce_parent_overlap(
699 uint64_t raw_overlap, bool migration_write) const {
700 ceph_assert(ceph_mutex_is_locked(image_lock));
701 if (migration_write) {
702 // don't reduce migration write overlap -- it may be larger as
703 // it's the largest overlap across snapshots by construction
704 return io::util::raw_to_area_offset(*this, raw_overlap);
705 }
706 if (raw_overlap == 0 || parent == nullptr) {
707 // image opened with OPEN_FLAG_SKIP_OPEN_PARENT -> no overlap
708 return io::util::raw_to_area_offset(*this, 0);
709 }
710 // DATA area in the parent may be smaller than the part of DATA
711 // area in the clone that is still within the overlap (e.g. for
712 // LUKS2-encrypted parent + LUKS1-encrypted clone, due to LUKS2
713 // header usually being bigger than LUKS1 header)
714 auto overlap = io::util::raw_to_area_offset(*this, raw_overlap);
715 std::shared_lock parent_image_locker(parent->image_lock);
716 overlap.first = std::min(overlap.first,
717 parent->get_area_size(overlap.second));
718 return overlap;
719 }
720
721 uint64_t ImageCtx::prune_parent_extents(io::Extents& image_extents,
722 io::ImageArea area,
723 uint64_t raw_overlap,
724 bool migration_write) const {
725 ceph_assert(ceph_mutex_is_locked(image_lock));
726 ldout(cct, 10) << __func__ << ": image_extents=" << image_extents
727 << " area=" << area << " raw_overlap=" << raw_overlap
728 << " migration_write=" << migration_write << dendl;
729 if (raw_overlap == 0) {
730 image_extents.clear();
731 return 0;
732 }
733
734 auto overlap = reduce_parent_overlap(raw_overlap, migration_write);
735 if (area == overlap.second) {
736 // drop extents completely beyond the overlap
737 while (!image_extents.empty() &&
738 image_extents.back().first >= overlap.first) {
739 image_extents.pop_back();
740 }
741 if (!image_extents.empty()) {
742 // trim final overlapping extent
743 auto& last_extent = image_extents.back();
744 if (last_extent.first + last_extent.second > overlap.first) {
745 last_extent.second = overlap.first - last_extent.first;
746 }
747 }
748 } else if (area == io::ImageArea::DATA &&
749 overlap.second == io::ImageArea::CRYPTO_HEADER) {
750 // all extents completely beyond the overlap
751 image_extents.clear();
752 } else {
753 // all extents completely within the overlap
754 ceph_assert(area == io::ImageArea::CRYPTO_HEADER &&
755 overlap.second == io::ImageArea::DATA);
756 }
757
758 uint64_t overlap_bytes = 0;
759 for (auto [_, len] : image_extents) {
760 overlap_bytes += len;
761 }
762 ldout(cct, 10) << __func__ << ": overlap=" << overlap.first
763 << "/" << overlap.second
764 << " got overlap_bytes=" << overlap_bytes
765 << " at " << image_extents << dendl;
766 return overlap_bytes;
767 }
768
769 void ImageCtx::register_watch(Context *on_finish) {
770 ceph_assert(image_watcher != NULL);
771 image_watcher->register_watch(on_finish);
772 }
773
774 void ImageCtx::cancel_async_requests() {
775 C_SaferCond ctx;
776 cancel_async_requests(&ctx);
777 ctx.wait();
778 }
779
780 void ImageCtx::cancel_async_requests(Context *on_finish) {
781 {
782 std::lock_guard async_ops_locker{async_ops_lock};
783 if (!async_requests.empty()) {
784 ldout(cct, 10) << "canceling async requests: count="
785 << async_requests.size() << dendl;
786 for (auto req : async_requests) {
787 ldout(cct, 10) << "canceling async request: " << req << dendl;
788 req->cancel();
789 }
790 async_requests_waiters.push_back(on_finish);
791 return;
792 }
793 }
794
795 on_finish->complete(0);
796 }
797
798 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
799 bool thread_safe) {
800 ldout(cct, 20) << __func__ << dendl;
801
802 std::unique_lock image_locker(image_lock);
803
804 // reset settings back to global defaults
805 config_overrides.clear();
806 config.set_config_values(cct->_conf.get_config_values());
807
808 // extract config overrides
809 for (auto meta_pair : meta) {
810 if (!boost::starts_with(meta_pair.first, METADATA_CONF_PREFIX)) {
811 continue;
812 }
813
814 std::string key = meta_pair.first.substr(METADATA_CONF_PREFIX.size());
815 if (!boost::starts_with(key, "rbd_")) {
816 // ignore non-RBD configuration keys
817 // TODO use option schema to determine applicable subsystem
818 ldout(cct, 0) << __func__ << ": ignoring config " << key << dendl;
819 continue;
820 }
821
822 if (config.find_option(key) != nullptr) {
823 std::string val(meta_pair.second.c_str(), meta_pair.second.length());
824 int r = config.set_val(key, val);
825 if (r >= 0) {
826 ldout(cct, 20) << __func__ << ": " << key << "=" << val << dendl;
827 config_overrides.insert(key);
828 } else {
829 lderr(cct) << __func__ << ": failed to set config " << key << " "
830 << "with value " << val << ": " << cpp_strerror(r)
831 << dendl;
832 }
833 }
834 }
835
836 image_locker.unlock();
837
838 #define ASSIGN_OPTION(param, type) \
839 param = config.get_val<type>("rbd_"#param)
840
841 bool skip_partial_discard = true;
842 ASSIGN_OPTION(non_blocking_aio, bool);
843 ASSIGN_OPTION(cache, bool);
844 ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t);
845 ASSIGN_OPTION(clone_copy_on_read, bool);
846 ASSIGN_OPTION(enable_alloc_hint, bool);
847 ASSIGN_OPTION(mirroring_replay_delay, uint64_t);
848 ASSIGN_OPTION(mtime_update_interval, uint64_t);
849 ASSIGN_OPTION(atime_update_interval, uint64_t);
850 ASSIGN_OPTION(skip_partial_discard, bool);
851 ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
852 ASSIGN_OPTION(blkin_trace_all, bool);
853
854 auto cache_policy = config.get_val<std::string>("rbd_cache_policy");
855 if (cache_policy == "writethrough" || cache_policy == "writeback") {
856 ASSIGN_OPTION(readahead_max_bytes, Option::size_t);
857 ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t);
858 }
859
860 #undef ASSIGN_OPTION
861
862 if (sparse_read_threshold_bytes == 0) {
863 sparse_read_threshold_bytes = get_object_size();
864 }
865
866 bool dirty_cache = test_features(RBD_FEATURE_DIRTY_CACHE);
867 if (!skip_partial_discard || dirty_cache) {
868 discard_granularity_bytes = 0;
869 }
870
871 alloc_hint_flags = 0;
872 auto compression_hint = config.get_val<std::string>("rbd_compression_hint");
873 if (compression_hint == "compressible") {
874 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_COMPRESSIBLE;
875 } else if (compression_hint == "incompressible") {
876 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE;
877 }
878
879 librados::Rados rados(md_ctx);
880 int8_t require_osd_release;
881 int r = rados.get_min_compatible_osd(&require_osd_release);
882 if (r == 0 && require_osd_release >= CEPH_RELEASE_OCTOPUS) {
883 read_flags = 0;
884 auto read_policy = config.get_val<std::string>("rbd_read_from_replica_policy");
885 if (read_policy == "balance") {
886 read_flags |= librados::OPERATION_BALANCE_READS;
887 } else if (read_policy == "localize") {
888 read_flags |= librados::OPERATION_LOCALIZE_READS;
889 }
890 }
891
892 io_image_dispatcher->apply_qos_schedule_tick_min(
893 config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
894
895 io_image_dispatcher->apply_qos_limit(
896 io::IMAGE_DISPATCH_FLAG_QOS_IOPS_THROTTLE,
897 config.get_val<uint64_t>("rbd_qos_iops_limit"),
898 config.get_val<uint64_t>("rbd_qos_iops_burst"),
899 config.get_val<uint64_t>("rbd_qos_iops_burst_seconds"));
900 io_image_dispatcher->apply_qos_limit(
901 io::IMAGE_DISPATCH_FLAG_QOS_BPS_THROTTLE,
902 config.get_val<uint64_t>("rbd_qos_bps_limit"),
903 config.get_val<uint64_t>("rbd_qos_bps_burst"),
904 config.get_val<uint64_t>("rbd_qos_bps_burst_seconds"));
905 io_image_dispatcher->apply_qos_limit(
906 io::IMAGE_DISPATCH_FLAG_QOS_READ_IOPS_THROTTLE,
907 config.get_val<uint64_t>("rbd_qos_read_iops_limit"),
908 config.get_val<uint64_t>("rbd_qos_read_iops_burst"),
909 config.get_val<uint64_t>("rbd_qos_read_iops_burst_seconds"));
910 io_image_dispatcher->apply_qos_limit(
911 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_IOPS_THROTTLE,
912 config.get_val<uint64_t>("rbd_qos_write_iops_limit"),
913 config.get_val<uint64_t>("rbd_qos_write_iops_burst"),
914 config.get_val<uint64_t>("rbd_qos_write_iops_burst_seconds"));
915 io_image_dispatcher->apply_qos_limit(
916 io::IMAGE_DISPATCH_FLAG_QOS_READ_BPS_THROTTLE,
917 config.get_val<uint64_t>("rbd_qos_read_bps_limit"),
918 config.get_val<uint64_t>("rbd_qos_read_bps_burst"),
919 config.get_val<uint64_t>("rbd_qos_read_bps_burst_seconds"));
920 io_image_dispatcher->apply_qos_limit(
921 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_BPS_THROTTLE,
922 config.get_val<uint64_t>("rbd_qos_write_bps_limit"),
923 config.get_val<uint64_t>("rbd_qos_write_bps_burst"),
924 config.get_val<uint64_t>("rbd_qos_write_bps_burst_seconds"));
925 io_image_dispatcher->apply_qos_exclude_ops(
926 librbd::io::rbd_io_operations_from_string(
927 config.get_val<std::string>("rbd_qos_exclude_ops"), nullptr));
928
929 if (!disable_zero_copy &&
930 config.get_val<bool>("rbd_disable_zero_copy_writes")) {
931 ldout(cct, 5) << this << ": disabling zero-copy writes" << dendl;
932 disable_zero_copy = true;
933 }
934 }
935
936 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
937 return new ExclusiveLock<ImageCtx>(*this);
938 }
939
940 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
941 return new ObjectMap<ImageCtx>(*this, snap_id);
942 }
943
944 Journal<ImageCtx> *ImageCtx::create_journal() {
945 return new Journal<ImageCtx>(*this);
946 }
947
948 void ImageCtx::set_image_name(const std::string &image_name) {
949 // update the name so rename can be invoked repeatedly
950 std::shared_lock owner_locker{owner_lock};
951 std::unique_lock image_locker{image_lock};
952 name = image_name;
953 if (old_format) {
954 header_oid = util::old_header_name(image_name);
955 }
956 }
957
958 void ImageCtx::notify_update() {
959 state->handle_update_notification();
960 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
961 }
962
963 void ImageCtx::notify_update(Context *on_finish) {
964 state->handle_update_notification();
965 image_watcher->notify_header_update(on_finish);
966 }
967
968 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
969 ceph_assert(ceph_mutex_is_locked(owner_lock));
970 ceph_assert(exclusive_lock_policy != nullptr);
971 return exclusive_lock_policy;
972 }
973
974 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
975 ceph_assert(ceph_mutex_is_wlocked(owner_lock));
976 ceph_assert(policy != nullptr);
977 delete exclusive_lock_policy;
978 exclusive_lock_policy = policy;
979 }
980
981 journal::Policy *ImageCtx::get_journal_policy() const {
982 ceph_assert(ceph_mutex_is_locked(image_lock));
983 ceph_assert(journal_policy != nullptr);
984 return journal_policy;
985 }
986
987 void ImageCtx::set_journal_policy(journal::Policy *policy) {
988 ceph_assert(ceph_mutex_is_wlocked(image_lock));
989 ceph_assert(policy != nullptr);
990 delete journal_policy;
991 journal_policy = policy;
992 }
993
994 void ImageCtx::rebuild_data_io_context() {
995 auto ctx = std::make_shared<neorados::IOContext>(
996 data_ctx.get_id(), data_ctx.get_namespace());
997 if (snap_id != CEPH_NOSNAP) {
998 ctx->read_snap(snap_id);
999 }
1000 if (!snapc.snaps.empty()) {
1001 ctx->write_snap_context(
1002 {{snapc.seq, {snapc.snaps.begin(), snapc.snaps.end()}}});
1003 }
1004 if (data_ctx.get_pool_full_try()) {
1005 ctx->full_try(true);
1006 }
1007
1008 // atomically reset the data IOContext to new version
1009 atomic_store(&data_io_context, ctx);
1010 }
1011
1012 IOContext ImageCtx::get_data_io_context() const {
1013 return atomic_load(&data_io_context);
1014 }
1015
1016 IOContext ImageCtx::duplicate_data_io_context() const {
1017 auto ctx = get_data_io_context();
1018 return std::make_shared<neorados::IOContext>(*ctx);
1019 }
1020
1021 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
1022 ceph::mutex **timer_lock) {
1023 auto safe_timer_singleton =
1024 &cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
1025 "librbd::journal::safe_timer", false, cct);
1026 *timer = safe_timer_singleton;
1027 *timer_lock = &safe_timer_singleton->lock;
1028 }
1029 }