]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/ImageCtx.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / librbd / ImageCtx.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include <errno.h>
4#include <boost/assign/list_of.hpp>
5#include <stddef.h>
6
7#include "common/ceph_context.h"
8#include "common/dout.h"
9#include "common/errno.h"
10#include "common/perf_counters.h"
11#include "common/WorkQueue.h"
12#include "common/Timer.h"
13
7c673cae
FG
14#include "librbd/AsyncRequest.h"
15#include "librbd/ExclusiveLock.h"
16#include "librbd/internal.h"
17#include "librbd/ImageCtx.h"
18#include "librbd/ImageState.h"
19#include "librbd/ImageWatcher.h"
20#include "librbd/Journal.h"
21#include "librbd/LibrbdAdminSocketHook.h"
22#include "librbd/ObjectMap.h"
23#include "librbd/Operations.h"
24#include "librbd/operation/ResizeRequest.h"
b32b8144 25#include "librbd/Types.h"
7c673cae
FG
26#include "librbd/Utils.h"
27#include "librbd/LibrbdWriteback.h"
28#include "librbd/exclusive_lock/AutomaticPolicy.h"
29#include "librbd/exclusive_lock/StandardPolicy.h"
30#include "librbd/io/AioCompletion.h"
31f18b77 31#include "librbd/io/AsyncOperation.h"
7c673cae 32#include "librbd/io/ImageRequestWQ.h"
11fdf7f2 33#include "librbd/io/ObjectDispatcher.h"
7c673cae
FG
34#include "librbd/journal/StandardPolicy.h"
35
36#include "osdc/Striper.h"
37#include <boost/bind.hpp>
11fdf7f2 38#include <boost/algorithm/string/predicate.hpp>
7c673cae
FG
39
40#define dout_subsys ceph_subsys_rbd
41#undef dout_prefix
42#define dout_prefix *_dout << "librbd::ImageCtx: "
43
44using std::map;
45using std::pair;
46using std::set;
47using std::string;
48using std::vector;
49
50using ceph::bufferlist;
51using librados::snap_t;
52using librados::IoCtx;
53
54namespace librbd {
55
56namespace {
57
58class ThreadPoolSingleton : public ThreadPool {
59public:
60 ContextWQ *op_work_queue;
61
62 explicit ThreadPoolSingleton(CephContext *cct)
63 : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
64 "rbd_op_threads"),
65 op_work_queue(new ContextWQ("librbd::op_work_queue",
11fdf7f2 66 cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"),
7c673cae
FG
67 this)) {
68 start();
69 }
70 ~ThreadPoolSingleton() override {
71 op_work_queue->drain();
72 delete op_work_queue;
73
74 stop();
75 }
76};
77
78class SafeTimerSingleton : public SafeTimer {
79public:
80 Mutex lock;
81
82 explicit SafeTimerSingleton(CephContext *cct)
83 : SafeTimer(cct, lock, true),
84 lock("librbd::Journal::SafeTimerSingleton::lock") {
85 init();
86 }
87 ~SafeTimerSingleton() {
88 Mutex::Locker locker(lock);
89 shutdown();
90 }
91};
92
7c673cae
FG
93} // anonymous namespace
94
95 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
96
97 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
98 const char *snap, IoCtx& p, bool ro)
99 : cct((CephContext*)p.cct()),
11fdf7f2 100 config(cct->_conf),
7c673cae
FG
101 perfcounter(NULL),
102 snap_id(CEPH_NOSNAP),
103 snap_exists(true),
104 read_only(ro),
7c673cae
FG
105 exclusive_locked(false),
106 name(image_name),
107 image_watcher(NULL),
108 journal(NULL),
109 owner_lock(util::unique_lock_name("librbd::ImageCtx::owner_lock", this)),
110 md_lock(util::unique_lock_name("librbd::ImageCtx::md_lock", this)),
7c673cae 111 snap_lock(util::unique_lock_name("librbd::ImageCtx::snap_lock", this)),
11fdf7f2 112 timestamp_lock(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this)),
7c673cae
FG
113 parent_lock(util::unique_lock_name("librbd::ImageCtx::parent_lock", this)),
114 object_map_lock(util::unique_lock_name("librbd::ImageCtx::object_map_lock", this)),
115 async_ops_lock(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this)),
116 copyup_list_lock(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this)),
117 completed_reqs_lock(util::unique_lock_name("librbd::ImageCtx::completed_reqs_lock", this)),
118 extra_read_flags(0),
11fdf7f2 119 old_format(false),
7c673cae
FG
120 order(0), size(0), features(0),
121 format_string(NULL),
122 id(image_id), parent(NULL),
123 stripe_unit(0), stripe_count(0), flags(0),
7c673cae
FG
124 readahead(),
125 total_bytes_read(0),
126 state(new ImageState<>(this)),
127 operations(new Operations<>(*this)),
128 exclusive_lock(nullptr), object_map(nullptr),
129 io_work_queue(nullptr), op_work_queue(nullptr),
31f18b77
FG
130 asok_hook(nullptr),
131 trace_endpoint("librbd")
7c673cae
FG
132 {
133 md_ctx.dup(p);
134 data_ctx.dup(p);
135 if (snap)
136 snap_name = snap;
137
92f5a8d4 138 // FIPS zeroization audit 20191117: this memset is not security related.
7c673cae
FG
139 memset(&header, 0, sizeof(header));
140
141 ThreadPool *thread_pool;
142 get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
224ce89b 143 io_work_queue = new io::ImageRequestWQ<>(
181888fb 144 this, "librbd::io_work_queue",
11fdf7f2 145 cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"),
7c673cae 146 thread_pool);
11fdf7f2 147 io_object_dispatcher = new io::ObjectDispatcher<>(this);
7c673cae 148
11fdf7f2 149 if (cct->_conf.get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
7c673cae
FG
150 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
151 } else {
152 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
153 }
154 journal_policy = new journal::StandardPolicy<ImageCtx>(this);
155 }
156
11fdf7f2
TL
157 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
158 uint64_t snap_id, IoCtx& p, bool ro)
159 : ImageCtx(image_name, image_id, "", p, ro) {
160 open_snap_id = snap_id;
161 }
162
7c673cae 163 ImageCtx::~ImageCtx() {
11fdf7f2
TL
164 ceph_assert(image_watcher == NULL);
165 ceph_assert(exclusive_lock == NULL);
166 ceph_assert(object_map == NULL);
167 ceph_assert(journal == NULL);
168 ceph_assert(asok_hook == NULL);
7c673cae
FG
169
170 if (perfcounter) {
171 perf_stop();
172 }
7c673cae
FG
173 delete[] format_string;
174
175 md_ctx.aio_flush();
eafe8130
TL
176 if (data_ctx.is_valid()) {
177 data_ctx.aio_flush();
178 }
7c673cae
FG
179 io_work_queue->drain();
180
11fdf7f2
TL
181 delete io_object_dispatcher;
182
7c673cae
FG
183 delete journal_policy;
184 delete exclusive_lock_policy;
185 delete io_work_queue;
186 delete operations;
187 delete state;
188 }
189
190 void ImageCtx::init() {
11fdf7f2
TL
191 ceph_assert(!header_oid.empty());
192 ceph_assert(old_format || !id.empty());
7c673cae
FG
193
194 asok_hook = new LibrbdAdminSocketHook(this);
195
196 string pname = string("librbd-") + id + string("-") +
eafe8130 197 md_ctx.get_pool_name() + string("-") + name;
7c673cae
FG
198 if (!snap_name.empty()) {
199 pname += "-";
200 pname += snap_name;
201 }
202
31f18b77 203 trace_endpoint.copy_name(pname);
7c673cae
FG
204 perf_start(pname);
205
11fdf7f2
TL
206 ceph_assert(image_watcher == NULL);
207 image_watcher = new ImageWatcher<>(*this);
7c673cae
FG
208 }
209
210 void ImageCtx::shutdown() {
211 delete image_watcher;
212 image_watcher = nullptr;
213
214 delete asok_hook;
215 asok_hook = nullptr;
216 }
217
eafe8130 218 void ImageCtx::init_layout(int64_t pool_id)
7c673cae
FG
219 {
220 if (stripe_unit == 0 || stripe_count == 0) {
221 stripe_unit = 1ull << order;
222 stripe_count = 1;
223 }
224
225 vector<uint64_t> alignments;
226 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
227 alignments.push_back(stripe_unit * stripe_count); // stripe
228 alignments.push_back(stripe_unit); // stripe unit
229 readahead.set_alignments(alignments);
230
231 layout = file_layout_t();
232 layout.stripe_unit = stripe_unit;
233 layout.stripe_count = stripe_count;
234 layout.object_size = 1ull << order;
eafe8130 235 layout.pool_id = pool_id; // FIXME: pool id overflow?
7c673cae
FG
236
237 delete[] format_string;
238 size_t len = object_prefix.length() + 16;
239 format_string = new char[len];
240 if (old_format) {
241 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
242 } else {
243 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
244 }
245
246 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
247 << " stripe_count " << stripe_count
248 << " object_size " << layout.object_size
249 << " prefix " << object_prefix
250 << " format " << format_string
251 << dendl;
252 }
253
254 void ImageCtx::perf_start(string name) {
b32b8144
FG
255 auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY;
256 if (child == nullptr) {
257 // ensure top-level IO stats are exported for librbd daemons
258 perf_prio = PerfCountersBuilder::PRIO_USEFUL;
259 }
260
7c673cae
FG
261 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
262
b32b8144
FG
263 plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
264 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
11fdf7f2 265 "rb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
266 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
267 "rl", perf_prio);
268 plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
269 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
11fdf7f2 270 "wb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
271 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
272 "wl", perf_prio);
7c673cae 273 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
11fdf7f2 274 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
275 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
276 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
11fdf7f2 277 plb.add_time_avg(l_librbd_flush_latency, "flush_latency", "Latency of flushes");
7c673cae 278 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
11fdf7f2 279 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae 280 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
c07f9fc5 281 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
11fdf7f2 282 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(UNIT_BYTES));
c07f9fc5 283 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
7c673cae
FG
284 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
285 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
286 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
287 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
288 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
289 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
290 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
11fdf7f2 291 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
292 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
293
b32b8144
FG
294 plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
295 "ots", perf_prio);
296 plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time",
297 "Lock acquired time", "lats", perf_prio);
298
7c673cae
FG
299 perfcounter = plb.create_perf_counters();
300 cct->get_perfcounters_collection()->add(perfcounter);
b32b8144
FG
301
302 perfcounter->tset(l_librbd_opened_time, ceph_clock_now());
7c673cae
FG
303 }
304
305 void ImageCtx::perf_stop() {
11fdf7f2 306 ceph_assert(perfcounter);
7c673cae
FG
307 cct->get_perfcounters_collection()->remove(perfcounter);
308 delete perfcounter;
309 }
310
311 void ImageCtx::set_read_flag(unsigned flag) {
312 extra_read_flags |= flag;
313 }
314
315 int ImageCtx::get_read_flags(snap_t snap_id) {
316 int flags = librados::OPERATION_NOFLAG | extra_read_flags;
317 if (snap_id == LIBRADOS_SNAP_HEAD)
318 return flags;
319
11fdf7f2 320 if (config.get_val<bool>("rbd_balance_snap_reads"))
7c673cae 321 flags |= librados::OPERATION_BALANCE_READS;
11fdf7f2 322 else if (config.get_val<bool>("rbd_localize_snap_reads"))
7c673cae
FG
323 flags |= librados::OPERATION_LOCALIZE_READS;
324 return flags;
325 }
326
11fdf7f2
TL
327 int ImageCtx::snap_set(uint64_t in_snap_id) {
328 ceph_assert(snap_lock.is_wlocked());
329 auto it = snap_info.find(in_snap_id);
330 if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) {
7c673cae 331 snap_id = in_snap_id;
11fdf7f2
TL
332 snap_namespace = it->second.snap_namespace;
333 snap_name = it->second.name;
7c673cae 334 snap_exists = true;
eafe8130
TL
335 if (data_ctx.is_valid()) {
336 data_ctx.snap_set_read(snap_id);
337 }
7c673cae
FG
338 return 0;
339 }
340 return -ENOENT;
341 }
342
343 void ImageCtx::snap_unset()
344 {
11fdf7f2 345 ceph_assert(snap_lock.is_wlocked());
7c673cae
FG
346 snap_id = CEPH_NOSNAP;
347 snap_namespace = {};
348 snap_name = "";
349 snap_exists = true;
eafe8130
TL
350 if (data_ctx.is_valid()) {
351 data_ctx.snap_set_read(snap_id);
352 }
7c673cae
FG
353 }
354
11fdf7f2
TL
355 snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace,
356 const string& in_snap_name) const
7c673cae 357 {
11fdf7f2 358 ceph_assert(snap_lock.is_locked());
7c673cae 359 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
11fdf7f2 360 if (it != snap_ids.end()) {
7c673cae 361 return it->second;
11fdf7f2 362 }
7c673cae
FG
363 return CEPH_NOSNAP;
364 }
365
366 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
367 {
11fdf7f2 368 ceph_assert(snap_lock.is_locked());
7c673cae
FG
369 map<snap_t, SnapInfo>::const_iterator it =
370 snap_info.find(in_snap_id);
371 if (it != snap_info.end())
372 return &it->second;
11fdf7f2 373 return nullptr;
7c673cae
FG
374 }
375
376 int ImageCtx::get_snap_name(snap_t in_snap_id,
377 string *out_snap_name) const
378 {
11fdf7f2 379 ceph_assert(snap_lock.is_locked());
7c673cae
FG
380 const SnapInfo *info = get_snap_info(in_snap_id);
381 if (info) {
382 *out_snap_name = info->name;
383 return 0;
384 }
385 return -ENOENT;
386 }
387
388 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
389 cls::rbd::SnapshotNamespace *out_snap_namespace) const
390 {
11fdf7f2 391 ceph_assert(snap_lock.is_locked());
7c673cae
FG
392 const SnapInfo *info = get_snap_info(in_snap_id);
393 if (info) {
394 *out_snap_namespace = info->snap_namespace;
395 return 0;
396 }
397 return -ENOENT;
398 }
399
400 int ImageCtx::get_parent_spec(snap_t in_snap_id,
11fdf7f2 401 cls::rbd::ParentImageSpec *out_pspec) const
7c673cae
FG
402 {
403 const SnapInfo *info = get_snap_info(in_snap_id);
404 if (info) {
405 *out_pspec = info->parent.spec;
406 return 0;
407 }
408 return -ENOENT;
409 }
410
411 uint64_t ImageCtx::get_current_size() const
412 {
11fdf7f2 413 ceph_assert(snap_lock.is_locked());
7c673cae
FG
414 return size;
415 }
416
417 uint64_t ImageCtx::get_object_size() const
418 {
419 return 1ull << order;
420 }
421
422 string ImageCtx::get_object_name(uint64_t num) const {
423 char buf[object_prefix.length() + 32];
424 snprintf(buf, sizeof(buf), format_string, num);
425 return string(buf);
426 }
427
428 uint64_t ImageCtx::get_stripe_unit() const
429 {
430 return stripe_unit;
431 }
432
433 uint64_t ImageCtx::get_stripe_count() const
434 {
435 return stripe_count;
436 }
437
438 uint64_t ImageCtx::get_stripe_period() const
439 {
440 return stripe_count * (1ull << order);
441 }
442
31f18b77
FG
443 utime_t ImageCtx::get_create_timestamp() const
444 {
445 return create_timestamp;
446 }
447
11fdf7f2
TL
448 utime_t ImageCtx::get_access_timestamp() const
449 {
450 return access_timestamp;
451 }
452
453 utime_t ImageCtx::get_modify_timestamp() const
454 {
455 return modify_timestamp;
456 }
457
458 void ImageCtx::set_access_timestamp(utime_t at)
459 {
460 ceph_assert(timestamp_lock.is_wlocked());
461 access_timestamp = at;
462 }
463
464 void ImageCtx::set_modify_timestamp(utime_t mt)
465 {
466 ceph_assert(timestamp_lock.is_locked());
467 modify_timestamp = mt;
468 }
469
7c673cae
FG
470 int ImageCtx::is_snap_protected(snap_t in_snap_id,
471 bool *is_protected) const
472 {
11fdf7f2 473 ceph_assert(snap_lock.is_locked());
7c673cae
FG
474 const SnapInfo *info = get_snap_info(in_snap_id);
475 if (info) {
476 *is_protected =
477 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
478 return 0;
479 }
480 return -ENOENT;
481 }
482
483 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
484 bool *is_unprotected) const
485 {
11fdf7f2 486 ceph_assert(snap_lock.is_locked());
7c673cae
FG
487 const SnapInfo *info = get_snap_info(in_snap_id);
488 if (info) {
489 *is_unprotected =
490 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
491 return 0;
492 }
493 return -ENOENT;
494 }
495
496 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
497 string in_snap_name,
498 snap_t id, uint64_t in_size,
11fdf7f2
TL
499 const ParentImageInfo &parent,
500 uint8_t protection_status, uint64_t flags,
501 utime_t timestamp)
7c673cae 502 {
11fdf7f2 503 ceph_assert(snap_lock.is_wlocked());
7c673cae
FG
504 snaps.push_back(id);
505 SnapInfo info(in_snap_name, in_snap_namespace,
506 in_size, parent, protection_status, flags, timestamp);
507 snap_info.insert({id, info});
508 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
509 }
510
511 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
512 string in_snap_name,
513 snap_t id)
514 {
11fdf7f2 515 ceph_assert(snap_lock.is_wlocked());
7c673cae
FG
516 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
517 snap_info.erase(id);
518 snap_ids.erase({in_snap_namespace, in_snap_name});
519 }
520
521 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
522 {
11fdf7f2 523 ceph_assert(snap_lock.is_locked());
7c673cae
FG
524 if (in_snap_id == CEPH_NOSNAP) {
525 if (!resize_reqs.empty() &&
526 resize_reqs.front()->shrinking()) {
527 return resize_reqs.front()->get_image_size();
528 }
529 return size;
530 }
531
532 const SnapInfo *info = get_snap_info(in_snap_id);
533 if (info) {
534 return info->size;
535 }
536 return 0;
537 }
538
539 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
11fdf7f2 540 ceph_assert(snap_lock.is_locked());
7c673cae
FG
541 uint64_t image_size = get_image_size(in_snap_id);
542 return Striper::get_num_objects(layout, image_size);
543 }
544
545 bool ImageCtx::test_features(uint64_t features) const
546 {
547 RWLock::RLocker l(snap_lock);
548 return test_features(features, snap_lock);
549 }
550
551 bool ImageCtx::test_features(uint64_t in_features,
552 const RWLock &in_snap_lock) const
553 {
11fdf7f2 554 ceph_assert(snap_lock.is_locked());
7c673cae
FG
555 return ((features & in_features) == in_features);
556 }
557
11fdf7f2
TL
558 bool ImageCtx::test_op_features(uint64_t in_op_features) const
559 {
560 RWLock::RLocker snap_locker(snap_lock);
561 return test_op_features(in_op_features, snap_lock);
562 }
563
564 bool ImageCtx::test_op_features(uint64_t in_op_features,
565 const RWLock &in_snap_lock) const
566 {
567 ceph_assert(snap_lock.is_locked());
568 return ((op_features & in_op_features) == in_op_features);
569 }
570
7c673cae
FG
571 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
572 {
11fdf7f2 573 ceph_assert(snap_lock.is_locked());
7c673cae
FG
574 if (_snap_id == CEPH_NOSNAP) {
575 *_flags = flags;
576 return 0;
577 }
578 const SnapInfo *info = get_snap_info(_snap_id);
579 if (info) {
580 *_flags = info->flags;
581 return 0;
582 }
583 return -ENOENT;
584 }
585
91327a77
AA
586 int ImageCtx::test_flags(librados::snap_t in_snap_id,
587 uint64_t flags, bool *flags_set) const
7c673cae
FG
588 {
589 RWLock::RLocker l(snap_lock);
91327a77 590 return test_flags(in_snap_id, flags, snap_lock, flags_set);
7c673cae
FG
591 }
592
91327a77
AA
593 int ImageCtx::test_flags(librados::snap_t in_snap_id,
594 uint64_t flags, const RWLock &in_snap_lock,
31f18b77 595 bool *flags_set) const
7c673cae 596 {
11fdf7f2 597 ceph_assert(snap_lock.is_locked());
7c673cae 598 uint64_t snap_flags;
91327a77 599 int r = get_flags(in_snap_id, &snap_flags);
31f18b77
FG
600 if (r < 0) {
601 return r;
602 }
603 *flags_set = ((snap_flags & flags) == flags);
604 return 0;
7c673cae
FG
605 }
606
607 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
608 {
11fdf7f2 609 ceph_assert(snap_lock.is_wlocked());
7c673cae
FG
610 uint64_t *_flags;
611 if (in_snap_id == CEPH_NOSNAP) {
612 _flags = &flags;
613 } else {
614 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
615 if (it == snap_info.end()) {
616 return -ENOENT;
617 }
618 _flags = &it->second.flags;
619 }
620
621 if (enabled) {
622 (*_flags) |= flag;
623 } else {
624 (*_flags) &= ~flag;
625 }
626 return 0;
627 }
628
11fdf7f2 629 const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
7c673cae 630 {
11fdf7f2
TL
631 ceph_assert(snap_lock.is_locked());
632 ceph_assert(parent_lock.is_locked());
7c673cae
FG
633 if (in_snap_id == CEPH_NOSNAP)
634 return &parent_md;
635 const SnapInfo *info = get_snap_info(in_snap_id);
636 if (info)
637 return &info->parent;
638 return NULL;
639 }
640
641 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
642 {
11fdf7f2 643 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
644 if (info)
645 return info->spec.pool_id;
646 return -1;
647 }
648
649 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
650 {
11fdf7f2 651 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
652 if (info)
653 return info->spec.image_id;
654 return "";
655 }
656
657 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
658 {
11fdf7f2 659 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
660 if (info)
661 return info->spec.snap_id;
662 return CEPH_NOSNAP;
663 }
664
665 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
666 {
11fdf7f2
TL
667 ceph_assert(snap_lock.is_locked());
668 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
669 if (info) {
670 *overlap = info->overlap;
671 return 0;
672 }
673 return -ENOENT;
674 }
675
7c673cae 676 void ImageCtx::register_watch(Context *on_finish) {
11fdf7f2 677 ceph_assert(image_watcher != NULL);
7c673cae
FG
678 image_watcher->register_watch(on_finish);
679 }
680
681 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
682 uint64_t overlap)
683 {
684 // drop extents completely beyond the overlap
685 while (!objectx.empty() && objectx.back().first >= overlap)
686 objectx.pop_back();
687
688 // trim final overlapping extent
689 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
690 objectx.back().second = overlap - objectx.back().first;
691
692 uint64_t len = 0;
693 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
694 p != objectx.end();
695 ++p)
696 len += p->second;
697 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
698 << ", object overlap " << len
699 << " from image extents " << objectx << dendl;
700 return len;
701 }
702
7c673cae
FG
703 void ImageCtx::cancel_async_requests() {
704 C_SaferCond ctx;
705 cancel_async_requests(&ctx);
706 ctx.wait();
707 }
708
709 void ImageCtx::cancel_async_requests(Context *on_finish) {
710 {
711 Mutex::Locker async_ops_locker(async_ops_lock);
712 if (!async_requests.empty()) {
713 ldout(cct, 10) << "canceling async requests: count="
714 << async_requests.size() << dendl;
715 for (auto req : async_requests) {
716 ldout(cct, 10) << "canceling async request: " << req << dendl;
717 req->cancel();
718 }
719 async_requests_waiters.push_back(on_finish);
720 return;
721 }
722 }
723
724 on_finish->complete(0);
725 }
726
727 void ImageCtx::clear_pending_completions() {
728 Mutex::Locker l(completed_reqs_lock);
729 ldout(cct, 10) << "clear pending AioCompletion: count="
730 << completed_reqs.size() << dendl;
731 completed_reqs.clear();
732 }
733
11fdf7f2
TL
734 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
735 bool thread_safe) {
736 ldout(cct, 20) << __func__ << dendl;
7c673cae 737
11fdf7f2
TL
738 // reset settings back to global defaults
739 for (auto& key : config_overrides) {
740 std::string value;
741 int r = cct->_conf.get_val(key, &value);
742 ceph_assert(r == 0);
743
744 config.set_val(key, value);
745 }
746 config_overrides.clear();
7c673cae 747
11fdf7f2
TL
748 // extract config overrides
749 for (auto meta_pair : meta) {
750 if (!boost::starts_with(meta_pair.first, METADATA_CONF_PREFIX)) {
7c673cae 751 continue;
11fdf7f2 752 }
7c673cae 753
11fdf7f2
TL
754 std::string key = meta_pair.first.substr(METADATA_CONF_PREFIX.size());
755 if (!boost::starts_with(key, "rbd_")) {
756 // ignore non-RBD configuration keys
757 // TODO use option schema to determine applicable subsystem
758 ldout(cct, 0) << __func__ << ": ignoring config " << key << dendl;
759 continue;
7c673cae 760 }
7c673cae 761
11fdf7f2
TL
762 if (config.find_option(key) != nullptr) {
763 std::string val(meta_pair.second.c_str(), meta_pair.second.length());
764 int r = config.set_val(key, val);
765 if (r >= 0) {
766 ldout(cct, 20) << __func__ << ": " << key << "=" << val << dendl;
767 config_overrides.insert(key);
768 } else {
769 lderr(cct) << __func__ << ": failed to set config " << key << " "
770 << "with value " << val << ": " << cpp_strerror(r)
771 << dendl;
772 }
7c673cae
FG
773 }
774 }
775
11fdf7f2
TL
776#define ASSIGN_OPTION(param, type) \
777 param = config.get_val<type>("rbd_"#param)
7c673cae 778
11fdf7f2 779 bool skip_partial_discard = true;
181888fb
FG
780 ASSIGN_OPTION(non_blocking_aio, bool);
781 ASSIGN_OPTION(cache, bool);
782 ASSIGN_OPTION(cache_writethrough_until_flush, bool);
11fdf7f2
TL
783 ASSIGN_OPTION(cache_max_dirty, Option::size_t);
784 ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t);
785 ASSIGN_OPTION(readahead_max_bytes, Option::size_t);
786 ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t);
181888fb 787 ASSIGN_OPTION(clone_copy_on_read, bool);
181888fb 788 ASSIGN_OPTION(enable_alloc_hint, bool);
11fdf7f2
TL
789 ASSIGN_OPTION(mirroring_replay_delay, uint64_t);
790 ASSIGN_OPTION(mtime_update_interval, uint64_t);
791 ASSIGN_OPTION(atime_update_interval, uint64_t);
181888fb 792 ASSIGN_OPTION(skip_partial_discard, bool);
11fdf7f2 793 ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
181888fb 794 ASSIGN_OPTION(blkin_trace_all, bool);
b32b8144 795
11fdf7f2 796#undef ASSIGN_OPTION
b32b8144
FG
797
798 if (sparse_read_threshold_bytes == 0) {
799 sparse_read_threshold_bytes = get_object_size();
800 }
11fdf7f2
TL
801 if (!skip_partial_discard) {
802 discard_granularity_bytes = 0;
803 }
804
92f5a8d4
TL
805 alloc_hint_flags = 0;
806 auto compression_hint = config.get_val<std::string>("rbd_compression_hint");
807 if (compression_hint == "compressible") {
808 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_COMPRESSIBLE;
809 } else if (compression_hint == "incompressible") {
810 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE;
811 }
812
11fdf7f2
TL
813 io_work_queue->apply_qos_schedule_tick_min(
814 config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
815
816 io_work_queue->apply_qos_limit(
817 RBD_QOS_IOPS_THROTTLE,
818 config.get_val<uint64_t>("rbd_qos_iops_limit"),
819 config.get_val<uint64_t>("rbd_qos_iops_burst"));
820 io_work_queue->apply_qos_limit(
821 RBD_QOS_BPS_THROTTLE,
822 config.get_val<uint64_t>("rbd_qos_bps_limit"),
823 config.get_val<uint64_t>("rbd_qos_bps_burst"));
824 io_work_queue->apply_qos_limit(
825 RBD_QOS_READ_IOPS_THROTTLE,
826 config.get_val<uint64_t>("rbd_qos_read_iops_limit"),
827 config.get_val<uint64_t>("rbd_qos_read_iops_burst"));
828 io_work_queue->apply_qos_limit(
829 RBD_QOS_WRITE_IOPS_THROTTLE,
830 config.get_val<uint64_t>("rbd_qos_write_iops_limit"),
831 config.get_val<uint64_t>("rbd_qos_write_iops_burst"));
832 io_work_queue->apply_qos_limit(
833 RBD_QOS_READ_BPS_THROTTLE,
834 config.get_val<uint64_t>("rbd_qos_read_bps_limit"),
835 config.get_val<uint64_t>("rbd_qos_read_bps_burst"));
836 io_work_queue->apply_qos_limit(
837 RBD_QOS_WRITE_BPS_THROTTLE,
838 config.get_val<uint64_t>("rbd_qos_write_bps_limit"),
839 config.get_val<uint64_t>("rbd_qos_write_bps_burst"));
7c673cae
FG
840 }
841
842 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
843 return new ExclusiveLock<ImageCtx>(*this);
844 }
845
846 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
847 return new ObjectMap<ImageCtx>(*this, snap_id);
848 }
849
850 Journal<ImageCtx> *ImageCtx::create_journal() {
851 return new Journal<ImageCtx>(*this);
852 }
853
854 void ImageCtx::set_image_name(const std::string &image_name) {
855 // update the name so rename can be invoked repeatedly
856 RWLock::RLocker owner_locker(owner_lock);
857 RWLock::WLocker snap_locker(snap_lock);
858 name = image_name;
859 if (old_format) {
860 header_oid = util::old_header_name(image_name);
861 }
862 }
863
864 void ImageCtx::notify_update() {
865 state->handle_update_notification();
866 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
867 }
868
869 void ImageCtx::notify_update(Context *on_finish) {
870 state->handle_update_notification();
871 image_watcher->notify_header_update(on_finish);
872 }
873
874 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
11fdf7f2
TL
875 ceph_assert(owner_lock.is_locked());
876 ceph_assert(exclusive_lock_policy != nullptr);
7c673cae
FG
877 return exclusive_lock_policy;
878 }
879
880 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
11fdf7f2
TL
881 ceph_assert(owner_lock.is_wlocked());
882 ceph_assert(policy != nullptr);
7c673cae
FG
883 delete exclusive_lock_policy;
884 exclusive_lock_policy = policy;
885 }
886
887 journal::Policy *ImageCtx::get_journal_policy() const {
11fdf7f2
TL
888 ceph_assert(snap_lock.is_locked());
889 ceph_assert(journal_policy != nullptr);
7c673cae
FG
890 return journal_policy;
891 }
892
893 void ImageCtx::set_journal_policy(journal::Policy *policy) {
11fdf7f2
TL
894 ceph_assert(snap_lock.is_wlocked());
895 ceph_assert(policy != nullptr);
7c673cae
FG
896 delete journal_policy;
897 journal_policy = policy;
898 }
899
28e407b8
AA
900 bool ImageCtx::is_writeback_cache_enabled() const {
901 return (cache && cache_max_dirty > 0);
902 }
903
7c673cae
FG
904 void ImageCtx::get_thread_pool_instance(CephContext *cct,
905 ThreadPool **thread_pool,
906 ContextWQ **op_work_queue) {
11fdf7f2
TL
907 auto thread_pool_singleton =
908 &cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
909 "librbd::thread_pool", false, cct);
7c673cae
FG
910 *thread_pool = thread_pool_singleton;
911 *op_work_queue = thread_pool_singleton->op_work_queue;
912 }
913
914 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
915 Mutex **timer_lock) {
11fdf7f2
TL
916 auto safe_timer_singleton =
917 &cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
918 "librbd::journal::safe_timer", false, cct);
7c673cae
FG
919 *timer = safe_timer_singleton;
920 *timer_lock = &safe_timer_singleton->lock;
921 }
922}