]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/ImageCtx.cc
import 15.2.2 octopus source
[ceph.git] / ceph / src / librbd / ImageCtx.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include <errno.h>
4#include <boost/assign/list_of.hpp>
5#include <stddef.h>
6
7#include "common/ceph_context.h"
8#include "common/dout.h"
9#include "common/errno.h"
10#include "common/perf_counters.h"
11#include "common/WorkQueue.h"
12#include "common/Timer.h"
13
7c673cae
FG
14#include "librbd/AsyncRequest.h"
15#include "librbd/ExclusiveLock.h"
16#include "librbd/internal.h"
17#include "librbd/ImageCtx.h"
18#include "librbd/ImageState.h"
19#include "librbd/ImageWatcher.h"
20#include "librbd/Journal.h"
21#include "librbd/LibrbdAdminSocketHook.h"
22#include "librbd/ObjectMap.h"
23#include "librbd/Operations.h"
24#include "librbd/operation/ResizeRequest.h"
b32b8144 25#include "librbd/Types.h"
7c673cae 26#include "librbd/Utils.h"
7c673cae
FG
27#include "librbd/exclusive_lock/AutomaticPolicy.h"
28#include "librbd/exclusive_lock/StandardPolicy.h"
29#include "librbd/io/AioCompletion.h"
31f18b77 30#include "librbd/io/AsyncOperation.h"
7c673cae 31#include "librbd/io/ImageRequestWQ.h"
11fdf7f2 32#include "librbd/io/ObjectDispatcher.h"
7c673cae
FG
33#include "librbd/journal/StandardPolicy.h"
34
35#include "osdc/Striper.h"
36#include <boost/bind.hpp>
11fdf7f2 37#include <boost/algorithm/string/predicate.hpp>
7c673cae
FG
38
39#define dout_subsys ceph_subsys_rbd
40#undef dout_prefix
41#define dout_prefix *_dout << "librbd::ImageCtx: "
42
43using std::map;
44using std::pair;
45using std::set;
46using std::string;
47using std::vector;
48
49using ceph::bufferlist;
50using librados::snap_t;
51using librados::IoCtx;
52
53namespace librbd {
54
55namespace {
56
57class ThreadPoolSingleton : public ThreadPool {
58public:
59 ContextWQ *op_work_queue;
60
61 explicit ThreadPoolSingleton(CephContext *cct)
62 : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1,
63 "rbd_op_threads"),
64 op_work_queue(new ContextWQ("librbd::op_work_queue",
11fdf7f2 65 cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"),
7c673cae
FG
66 this)) {
67 start();
68 }
69 ~ThreadPoolSingleton() override {
70 op_work_queue->drain();
71 delete op_work_queue;
72
73 stop();
74 }
75};
76
77class SafeTimerSingleton : public SafeTimer {
78public:
9f95a23c 79 ceph::mutex lock = ceph::make_mutex("librbd::Journal::SafeTimerSingleton::lock");
7c673cae
FG
80
81 explicit SafeTimerSingleton(CephContext *cct)
9f95a23c 82 : SafeTimer(cct, lock, true) {
7c673cae
FG
83 init();
84 }
85 ~SafeTimerSingleton() {
9f95a23c 86 std::lock_guard locker{lock};
7c673cae
FG
87 shutdown();
88 }
89};
90
7c673cae
FG
91} // anonymous namespace
92
93 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
94
95 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
96 const char *snap, IoCtx& p, bool ro)
97 : cct((CephContext*)p.cct()),
11fdf7f2 98 config(cct->_conf),
7c673cae
FG
99 perfcounter(NULL),
100 snap_id(CEPH_NOSNAP),
101 snap_exists(true),
102 read_only(ro),
9f95a23c 103 read_only_flags(ro ? IMAGE_READ_ONLY_FLAG_USER : 0U),
7c673cae
FG
104 exclusive_locked(false),
105 name(image_name),
106 image_watcher(NULL),
107 journal(NULL),
9f95a23c
TL
108 owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))),
109 image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))),
110 timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))),
111 async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))),
112 copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))),
7c673cae 113 extra_read_flags(0),
11fdf7f2 114 old_format(false),
7c673cae
FG
115 order(0), size(0), features(0),
116 format_string(NULL),
117 id(image_id), parent(NULL),
118 stripe_unit(0), stripe_count(0), flags(0),
7c673cae
FG
119 readahead(),
120 total_bytes_read(0),
121 state(new ImageState<>(this)),
122 operations(new Operations<>(*this)),
123 exclusive_lock(nullptr), object_map(nullptr),
124 io_work_queue(nullptr), op_work_queue(nullptr),
9f95a23c
TL
125 external_callback_completions(32),
126 event_socket_completions(32),
31f18b77
FG
127 asok_hook(nullptr),
128 trace_endpoint("librbd")
7c673cae
FG
129 {
130 md_ctx.dup(p);
131 data_ctx.dup(p);
132 if (snap)
133 snap_name = snap;
134
92f5a8d4 135 // FIPS zeroization audit 20191117: this memset is not security related.
7c673cae
FG
136 memset(&header, 0, sizeof(header));
137
138 ThreadPool *thread_pool;
139 get_thread_pool_instance(cct, &thread_pool, &op_work_queue);
224ce89b 140 io_work_queue = new io::ImageRequestWQ<>(
181888fb 141 this, "librbd::io_work_queue",
11fdf7f2 142 cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"),
7c673cae 143 thread_pool);
11fdf7f2 144 io_object_dispatcher = new io::ObjectDispatcher<>(this);
7c673cae 145
11fdf7f2 146 if (cct->_conf.get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
7c673cae
FG
147 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
148 } else {
149 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
150 }
151 journal_policy = new journal::StandardPolicy<ImageCtx>(this);
152 }
153
11fdf7f2
TL
154 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
155 uint64_t snap_id, IoCtx& p, bool ro)
156 : ImageCtx(image_name, image_id, "", p, ro) {
157 open_snap_id = snap_id;
158 }
159
7c673cae 160 ImageCtx::~ImageCtx() {
11fdf7f2
TL
161 ceph_assert(image_watcher == NULL);
162 ceph_assert(exclusive_lock == NULL);
163 ceph_assert(object_map == NULL);
164 ceph_assert(journal == NULL);
165 ceph_assert(asok_hook == NULL);
7c673cae
FG
166
167 if (perfcounter) {
168 perf_stop();
169 }
7c673cae
FG
170 delete[] format_string;
171
172 md_ctx.aio_flush();
eafe8130
TL
173 if (data_ctx.is_valid()) {
174 data_ctx.aio_flush();
175 }
7c673cae
FG
176 io_work_queue->drain();
177
11fdf7f2
TL
178 delete io_object_dispatcher;
179
7c673cae
FG
180 delete journal_policy;
181 delete exclusive_lock_policy;
182 delete io_work_queue;
183 delete operations;
184 delete state;
185 }
186
187 void ImageCtx::init() {
11fdf7f2
TL
188 ceph_assert(!header_oid.empty());
189 ceph_assert(old_format || !id.empty());
7c673cae
FG
190
191 asok_hook = new LibrbdAdminSocketHook(this);
192
193 string pname = string("librbd-") + id + string("-") +
eafe8130 194 md_ctx.get_pool_name() + string("-") + name;
7c673cae
FG
195 if (!snap_name.empty()) {
196 pname += "-";
197 pname += snap_name;
198 }
199
31f18b77 200 trace_endpoint.copy_name(pname);
7c673cae
FG
201 perf_start(pname);
202
11fdf7f2
TL
203 ceph_assert(image_watcher == NULL);
204 image_watcher = new ImageWatcher<>(*this);
7c673cae
FG
205 }
206
207 void ImageCtx::shutdown() {
208 delete image_watcher;
209 image_watcher = nullptr;
210
211 delete asok_hook;
212 asok_hook = nullptr;
213 }
214
eafe8130 215 void ImageCtx::init_layout(int64_t pool_id)
7c673cae
FG
216 {
217 if (stripe_unit == 0 || stripe_count == 0) {
218 stripe_unit = 1ull << order;
219 stripe_count = 1;
220 }
221
222 vector<uint64_t> alignments;
223 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
224 alignments.push_back(stripe_unit * stripe_count); // stripe
225 alignments.push_back(stripe_unit); // stripe unit
226 readahead.set_alignments(alignments);
227
228 layout = file_layout_t();
229 layout.stripe_unit = stripe_unit;
230 layout.stripe_count = stripe_count;
231 layout.object_size = 1ull << order;
eafe8130 232 layout.pool_id = pool_id; // FIXME: pool id overflow?
7c673cae
FG
233
234 delete[] format_string;
235 size_t len = object_prefix.length() + 16;
236 format_string = new char[len];
237 if (old_format) {
238 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
239 } else {
240 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
241 }
242
243 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
244 << " stripe_count " << stripe_count
245 << " object_size " << layout.object_size
246 << " prefix " << object_prefix
247 << " format " << format_string
248 << dendl;
249 }
250
251 void ImageCtx::perf_start(string name) {
b32b8144
FG
252 auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY;
253 if (child == nullptr) {
254 // ensure top-level IO stats are exported for librbd daemons
255 perf_prio = PerfCountersBuilder::PRIO_USEFUL;
256 }
257
7c673cae
FG
258 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
259
b32b8144
FG
260 plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
261 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
11fdf7f2 262 "rb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
263 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
264 "rl", perf_prio);
265 plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
266 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
11fdf7f2 267 "wb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
268 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
269 "wl", perf_prio);
7c673cae 270 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
11fdf7f2 271 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
272 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
273 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
11fdf7f2 274 plb.add_time_avg(l_librbd_flush_latency, "flush_latency", "Latency of flushes");
7c673cae 275 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
11fdf7f2 276 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae 277 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
c07f9fc5 278 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
11fdf7f2 279 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(UNIT_BYTES));
c07f9fc5 280 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
7c673cae
FG
281 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
282 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
283 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
284 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
285 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
286 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
287 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
11fdf7f2 288 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
289 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
290
b32b8144
FG
291 plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
292 "ots", perf_prio);
293 plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time",
294 "Lock acquired time", "lats", perf_prio);
295
7c673cae
FG
296 perfcounter = plb.create_perf_counters();
297 cct->get_perfcounters_collection()->add(perfcounter);
b32b8144
FG
298
299 perfcounter->tset(l_librbd_opened_time, ceph_clock_now());
7c673cae
FG
300 }
301
302 void ImageCtx::perf_stop() {
11fdf7f2 303 ceph_assert(perfcounter);
7c673cae
FG
304 cct->get_perfcounters_collection()->remove(perfcounter);
305 delete perfcounter;
306 }
307
308 void ImageCtx::set_read_flag(unsigned flag) {
309 extra_read_flags |= flag;
310 }
311
312 int ImageCtx::get_read_flags(snap_t snap_id) {
9f95a23c
TL
313 int flags = librados::OPERATION_NOFLAG | read_flags;
314 if (flags != 0)
315 return flags;
316
317 flags = librados::OPERATION_NOFLAG | extra_read_flags;
7c673cae
FG
318 if (snap_id == LIBRADOS_SNAP_HEAD)
319 return flags;
320
11fdf7f2 321 if (config.get_val<bool>("rbd_balance_snap_reads"))
7c673cae 322 flags |= librados::OPERATION_BALANCE_READS;
11fdf7f2 323 else if (config.get_val<bool>("rbd_localize_snap_reads"))
7c673cae
FG
324 flags |= librados::OPERATION_LOCALIZE_READS;
325 return flags;
326 }
327
11fdf7f2 328 int ImageCtx::snap_set(uint64_t in_snap_id) {
9f95a23c 329 ceph_assert(ceph_mutex_is_wlocked(image_lock));
11fdf7f2
TL
330 auto it = snap_info.find(in_snap_id);
331 if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) {
7c673cae 332 snap_id = in_snap_id;
11fdf7f2
TL
333 snap_namespace = it->second.snap_namespace;
334 snap_name = it->second.name;
7c673cae 335 snap_exists = true;
eafe8130
TL
336 if (data_ctx.is_valid()) {
337 data_ctx.snap_set_read(snap_id);
338 }
7c673cae
FG
339 return 0;
340 }
341 return -ENOENT;
342 }
343
344 void ImageCtx::snap_unset()
345 {
9f95a23c 346 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
347 snap_id = CEPH_NOSNAP;
348 snap_namespace = {};
349 snap_name = "";
350 snap_exists = true;
eafe8130
TL
351 if (data_ctx.is_valid()) {
352 data_ctx.snap_set_read(snap_id);
353 }
7c673cae
FG
354 }
355
11fdf7f2
TL
356 snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace,
357 const string& in_snap_name) const
7c673cae 358 {
9f95a23c 359 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae 360 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
11fdf7f2 361 if (it != snap_ids.end()) {
7c673cae 362 return it->second;
11fdf7f2 363 }
7c673cae
FG
364 return CEPH_NOSNAP;
365 }
366
367 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
368 {
9f95a23c 369 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
370 map<snap_t, SnapInfo>::const_iterator it =
371 snap_info.find(in_snap_id);
372 if (it != snap_info.end())
373 return &it->second;
11fdf7f2 374 return nullptr;
7c673cae
FG
375 }
376
377 int ImageCtx::get_snap_name(snap_t in_snap_id,
378 string *out_snap_name) const
379 {
9f95a23c 380 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
381 const SnapInfo *info = get_snap_info(in_snap_id);
382 if (info) {
383 *out_snap_name = info->name;
384 return 0;
385 }
386 return -ENOENT;
387 }
388
389 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
390 cls::rbd::SnapshotNamespace *out_snap_namespace) const
391 {
9f95a23c 392 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
393 const SnapInfo *info = get_snap_info(in_snap_id);
394 if (info) {
395 *out_snap_namespace = info->snap_namespace;
396 return 0;
397 }
398 return -ENOENT;
399 }
400
401 int ImageCtx::get_parent_spec(snap_t in_snap_id,
11fdf7f2 402 cls::rbd::ParentImageSpec *out_pspec) const
7c673cae
FG
403 {
404 const SnapInfo *info = get_snap_info(in_snap_id);
405 if (info) {
406 *out_pspec = info->parent.spec;
407 return 0;
408 }
409 return -ENOENT;
410 }
411
412 uint64_t ImageCtx::get_current_size() const
413 {
9f95a23c 414 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
415 return size;
416 }
417
418 uint64_t ImageCtx::get_object_size() const
419 {
420 return 1ull << order;
421 }
422
423 string ImageCtx::get_object_name(uint64_t num) const {
9f95a23c 424 return util::data_object_name(this, num);
7c673cae
FG
425 }
426
427 uint64_t ImageCtx::get_stripe_unit() const
428 {
429 return stripe_unit;
430 }
431
432 uint64_t ImageCtx::get_stripe_count() const
433 {
434 return stripe_count;
435 }
436
437 uint64_t ImageCtx::get_stripe_period() const
438 {
439 return stripe_count * (1ull << order);
440 }
441
31f18b77
FG
442 utime_t ImageCtx::get_create_timestamp() const
443 {
444 return create_timestamp;
445 }
446
11fdf7f2
TL
447 utime_t ImageCtx::get_access_timestamp() const
448 {
449 return access_timestamp;
450 }
451
452 utime_t ImageCtx::get_modify_timestamp() const
453 {
454 return modify_timestamp;
455 }
456
457 void ImageCtx::set_access_timestamp(utime_t at)
458 {
9f95a23c 459 ceph_assert(ceph_mutex_is_wlocked(timestamp_lock));
11fdf7f2
TL
460 access_timestamp = at;
461 }
462
463 void ImageCtx::set_modify_timestamp(utime_t mt)
464 {
9f95a23c 465 ceph_assert(ceph_mutex_is_locked(timestamp_lock));
11fdf7f2
TL
466 modify_timestamp = mt;
467 }
468
7c673cae
FG
469 int ImageCtx::is_snap_protected(snap_t in_snap_id,
470 bool *is_protected) const
471 {
9f95a23c 472 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
473 const SnapInfo *info = get_snap_info(in_snap_id);
474 if (info) {
475 *is_protected =
476 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
477 return 0;
478 }
479 return -ENOENT;
480 }
481
482 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
483 bool *is_unprotected) const
484 {
9f95a23c 485 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
486 const SnapInfo *info = get_snap_info(in_snap_id);
487 if (info) {
488 *is_unprotected =
489 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
490 return 0;
491 }
492 return -ENOENT;
493 }
494
495 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
496 string in_snap_name,
497 snap_t id, uint64_t in_size,
11fdf7f2
TL
498 const ParentImageInfo &parent,
499 uint8_t protection_status, uint64_t flags,
500 utime_t timestamp)
7c673cae 501 {
9f95a23c 502 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
503 snaps.push_back(id);
504 SnapInfo info(in_snap_name, in_snap_namespace,
505 in_size, parent, protection_status, flags, timestamp);
506 snap_info.insert({id, info});
507 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
508 }
509
510 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
511 string in_snap_name,
512 snap_t id)
513 {
9f95a23c 514 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
515 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
516 snap_info.erase(id);
517 snap_ids.erase({in_snap_namespace, in_snap_name});
518 }
519
520 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
521 {
9f95a23c 522 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
523 if (in_snap_id == CEPH_NOSNAP) {
524 if (!resize_reqs.empty() &&
525 resize_reqs.front()->shrinking()) {
526 return resize_reqs.front()->get_image_size();
527 }
528 return size;
529 }
530
531 const SnapInfo *info = get_snap_info(in_snap_id);
532 if (info) {
533 return info->size;
534 }
535 return 0;
536 }
537
538 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
9f95a23c 539 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
540 uint64_t image_size = get_image_size(in_snap_id);
541 return Striper::get_num_objects(layout, image_size);
542 }
543
544 bool ImageCtx::test_features(uint64_t features) const
545 {
9f95a23c
TL
546 std::shared_lock l{image_lock};
547 return test_features(features, image_lock);
7c673cae
FG
548 }
549
550 bool ImageCtx::test_features(uint64_t in_features,
9f95a23c 551 const ceph::shared_mutex &in_image_lock) const
7c673cae 552 {
9f95a23c 553 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
554 return ((features & in_features) == in_features);
555 }
556
11fdf7f2
TL
557 bool ImageCtx::test_op_features(uint64_t in_op_features) const
558 {
9f95a23c
TL
559 std::shared_lock l{image_lock};
560 return test_op_features(in_op_features, image_lock);
11fdf7f2
TL
561 }
562
563 bool ImageCtx::test_op_features(uint64_t in_op_features,
9f95a23c 564 const ceph::shared_mutex &in_image_lock) const
11fdf7f2 565 {
9f95a23c 566 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2
TL
567 return ((op_features & in_op_features) == in_op_features);
568 }
569
7c673cae
FG
570 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
571 {
9f95a23c 572 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
573 if (_snap_id == CEPH_NOSNAP) {
574 *_flags = flags;
575 return 0;
576 }
577 const SnapInfo *info = get_snap_info(_snap_id);
578 if (info) {
579 *_flags = info->flags;
580 return 0;
581 }
582 return -ENOENT;
583 }
584
91327a77
AA
585 int ImageCtx::test_flags(librados::snap_t in_snap_id,
586 uint64_t flags, bool *flags_set) const
7c673cae 587 {
9f95a23c
TL
588 std::shared_lock l{image_lock};
589 return test_flags(in_snap_id, flags, image_lock, flags_set);
7c673cae
FG
590 }
591
91327a77 592 int ImageCtx::test_flags(librados::snap_t in_snap_id,
9f95a23c
TL
593 uint64_t flags,
594 const ceph::shared_mutex &in_image_lock,
31f18b77 595 bool *flags_set) const
7c673cae 596 {
9f95a23c 597 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae 598 uint64_t snap_flags;
91327a77 599 int r = get_flags(in_snap_id, &snap_flags);
31f18b77
FG
600 if (r < 0) {
601 return r;
602 }
603 *flags_set = ((snap_flags & flags) == flags);
604 return 0;
7c673cae
FG
605 }
606
607 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
608 {
9f95a23c 609 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
610 uint64_t *_flags;
611 if (in_snap_id == CEPH_NOSNAP) {
612 _flags = &flags;
613 } else {
614 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
615 if (it == snap_info.end()) {
616 return -ENOENT;
617 }
618 _flags = &it->second.flags;
619 }
620
621 if (enabled) {
622 (*_flags) |= flag;
623 } else {
624 (*_flags) &= ~flag;
625 }
626 return 0;
627 }
628
11fdf7f2 629 const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
7c673cae 630 {
9f95a23c 631 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
632 if (in_snap_id == CEPH_NOSNAP)
633 return &parent_md;
634 const SnapInfo *info = get_snap_info(in_snap_id);
635 if (info)
636 return &info->parent;
637 return NULL;
638 }
639
640 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
641 {
11fdf7f2 642 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
643 if (info)
644 return info->spec.pool_id;
645 return -1;
646 }
647
648 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
649 {
11fdf7f2 650 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
651 if (info)
652 return info->spec.image_id;
653 return "";
654 }
655
656 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
657 {
11fdf7f2 658 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
659 if (info)
660 return info->spec.snap_id;
661 return CEPH_NOSNAP;
662 }
663
664 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
665 {
9f95a23c 666 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2 667 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
668 if (info) {
669 *overlap = info->overlap;
670 return 0;
671 }
672 return -ENOENT;
673 }
674
7c673cae 675 void ImageCtx::register_watch(Context *on_finish) {
11fdf7f2 676 ceph_assert(image_watcher != NULL);
7c673cae
FG
677 image_watcher->register_watch(on_finish);
678 }
679
680 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
681 uint64_t overlap)
682 {
683 // drop extents completely beyond the overlap
684 while (!objectx.empty() && objectx.back().first >= overlap)
685 objectx.pop_back();
686
687 // trim final overlapping extent
688 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
689 objectx.back().second = overlap - objectx.back().first;
690
691 uint64_t len = 0;
692 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
693 p != objectx.end();
694 ++p)
695 len += p->second;
696 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
697 << ", object overlap " << len
698 << " from image extents " << objectx << dendl;
699 return len;
700 }
701
7c673cae
FG
702 void ImageCtx::cancel_async_requests() {
703 C_SaferCond ctx;
704 cancel_async_requests(&ctx);
705 ctx.wait();
706 }
707
708 void ImageCtx::cancel_async_requests(Context *on_finish) {
709 {
9f95a23c 710 std::lock_guard async_ops_locker{async_ops_lock};
7c673cae
FG
711 if (!async_requests.empty()) {
712 ldout(cct, 10) << "canceling async requests: count="
713 << async_requests.size() << dendl;
714 for (auto req : async_requests) {
715 ldout(cct, 10) << "canceling async request: " << req << dendl;
716 req->cancel();
717 }
718 async_requests_waiters.push_back(on_finish);
719 return;
720 }
721 }
722
723 on_finish->complete(0);
724 }
725
11fdf7f2
TL
726 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
727 bool thread_safe) {
728 ldout(cct, 20) << __func__ << dendl;
7c673cae 729
11fdf7f2
TL
730 // reset settings back to global defaults
731 for (auto& key : config_overrides) {
732 std::string value;
733 int r = cct->_conf.get_val(key, &value);
734 ceph_assert(r == 0);
735
736 config.set_val(key, value);
737 }
738 config_overrides.clear();
7c673cae 739
11fdf7f2
TL
740 // extract config overrides
741 for (auto meta_pair : meta) {
742 if (!boost::starts_with(meta_pair.first, METADATA_CONF_PREFIX)) {
7c673cae 743 continue;
11fdf7f2 744 }
7c673cae 745
11fdf7f2
TL
746 std::string key = meta_pair.first.substr(METADATA_CONF_PREFIX.size());
747 if (!boost::starts_with(key, "rbd_")) {
748 // ignore non-RBD configuration keys
749 // TODO use option schema to determine applicable subsystem
750 ldout(cct, 0) << __func__ << ": ignoring config " << key << dendl;
751 continue;
7c673cae 752 }
7c673cae 753
11fdf7f2
TL
754 if (config.find_option(key) != nullptr) {
755 std::string val(meta_pair.second.c_str(), meta_pair.second.length());
756 int r = config.set_val(key, val);
757 if (r >= 0) {
758 ldout(cct, 20) << __func__ << ": " << key << "=" << val << dendl;
759 config_overrides.insert(key);
760 } else {
761 lderr(cct) << __func__ << ": failed to set config " << key << " "
762 << "with value " << val << ": " << cpp_strerror(r)
763 << dendl;
764 }
7c673cae
FG
765 }
766 }
767
11fdf7f2
TL
768#define ASSIGN_OPTION(param, type) \
769 param = config.get_val<type>("rbd_"#param)
7c673cae 770
11fdf7f2 771 bool skip_partial_discard = true;
181888fb
FG
772 ASSIGN_OPTION(non_blocking_aio, bool);
773 ASSIGN_OPTION(cache, bool);
11fdf7f2
TL
774 ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t);
775 ASSIGN_OPTION(readahead_max_bytes, Option::size_t);
776 ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t);
181888fb 777 ASSIGN_OPTION(clone_copy_on_read, bool);
181888fb 778 ASSIGN_OPTION(enable_alloc_hint, bool);
11fdf7f2
TL
779 ASSIGN_OPTION(mirroring_replay_delay, uint64_t);
780 ASSIGN_OPTION(mtime_update_interval, uint64_t);
781 ASSIGN_OPTION(atime_update_interval, uint64_t);
181888fb 782 ASSIGN_OPTION(skip_partial_discard, bool);
11fdf7f2 783 ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
181888fb 784 ASSIGN_OPTION(blkin_trace_all, bool);
b32b8144 785
11fdf7f2 786#undef ASSIGN_OPTION
b32b8144
FG
787
788 if (sparse_read_threshold_bytes == 0) {
789 sparse_read_threshold_bytes = get_object_size();
790 }
11fdf7f2
TL
791 if (!skip_partial_discard) {
792 discard_granularity_bytes = 0;
793 }
794
92f5a8d4
TL
795 alloc_hint_flags = 0;
796 auto compression_hint = config.get_val<std::string>("rbd_compression_hint");
797 if (compression_hint == "compressible") {
798 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_COMPRESSIBLE;
799 } else if (compression_hint == "incompressible") {
800 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE;
801 }
802
9f95a23c
TL
803 librados::Rados rados(md_ctx);
804 int8_t require_osd_release;
805 int r = rados.get_min_compatible_osd(&require_osd_release);
806 if (r == 0 && require_osd_release >= CEPH_RELEASE_OCTOPUS) {
807 read_flags = 0;
808 auto read_policy = config.get_val<std::string>("rbd_read_from_replica_policy");
809 if (read_policy == "balance") {
810 read_flags |= CEPH_OSD_FLAG_BALANCE_READS;
811 } else if (read_policy == "localize") {
812 read_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
813 }
814 }
815
11fdf7f2
TL
816 io_work_queue->apply_qos_schedule_tick_min(
817 config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
818
819 io_work_queue->apply_qos_limit(
820 RBD_QOS_IOPS_THROTTLE,
821 config.get_val<uint64_t>("rbd_qos_iops_limit"),
822 config.get_val<uint64_t>("rbd_qos_iops_burst"));
823 io_work_queue->apply_qos_limit(
824 RBD_QOS_BPS_THROTTLE,
825 config.get_val<uint64_t>("rbd_qos_bps_limit"),
826 config.get_val<uint64_t>("rbd_qos_bps_burst"));
827 io_work_queue->apply_qos_limit(
828 RBD_QOS_READ_IOPS_THROTTLE,
829 config.get_val<uint64_t>("rbd_qos_read_iops_limit"),
830 config.get_val<uint64_t>("rbd_qos_read_iops_burst"));
831 io_work_queue->apply_qos_limit(
832 RBD_QOS_WRITE_IOPS_THROTTLE,
833 config.get_val<uint64_t>("rbd_qos_write_iops_limit"),
834 config.get_val<uint64_t>("rbd_qos_write_iops_burst"));
835 io_work_queue->apply_qos_limit(
836 RBD_QOS_READ_BPS_THROTTLE,
837 config.get_val<uint64_t>("rbd_qos_read_bps_limit"),
838 config.get_val<uint64_t>("rbd_qos_read_bps_burst"));
839 io_work_queue->apply_qos_limit(
840 RBD_QOS_WRITE_BPS_THROTTLE,
841 config.get_val<uint64_t>("rbd_qos_write_bps_limit"),
842 config.get_val<uint64_t>("rbd_qos_write_bps_burst"));
9f95a23c
TL
843
844 if (!disable_zero_copy &&
845 config.get_val<bool>("rbd_disable_zero_copy_writes")) {
846 ldout(cct, 5) << this << ": disabling zero-copy writes" << dendl;
847 disable_zero_copy = true;
848 }
7c673cae
FG
849 }
850
851 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
852 return new ExclusiveLock<ImageCtx>(*this);
853 }
854
855 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
856 return new ObjectMap<ImageCtx>(*this, snap_id);
857 }
858
859 Journal<ImageCtx> *ImageCtx::create_journal() {
860 return new Journal<ImageCtx>(*this);
861 }
862
863 void ImageCtx::set_image_name(const std::string &image_name) {
864 // update the name so rename can be invoked repeatedly
9f95a23c
TL
865 std::shared_lock owner_locker{owner_lock};
866 std::unique_lock image_locker{image_lock};
7c673cae
FG
867 name = image_name;
868 if (old_format) {
869 header_oid = util::old_header_name(image_name);
870 }
871 }
872
873 void ImageCtx::notify_update() {
874 state->handle_update_notification();
875 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
876 }
877
878 void ImageCtx::notify_update(Context *on_finish) {
879 state->handle_update_notification();
880 image_watcher->notify_header_update(on_finish);
881 }
882
883 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
9f95a23c 884 ceph_assert(ceph_mutex_is_locked(owner_lock));
11fdf7f2 885 ceph_assert(exclusive_lock_policy != nullptr);
7c673cae
FG
886 return exclusive_lock_policy;
887 }
888
889 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
9f95a23c 890 ceph_assert(ceph_mutex_is_wlocked(owner_lock));
11fdf7f2 891 ceph_assert(policy != nullptr);
7c673cae
FG
892 delete exclusive_lock_policy;
893 exclusive_lock_policy = policy;
894 }
895
896 journal::Policy *ImageCtx::get_journal_policy() const {
9f95a23c 897 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2 898 ceph_assert(journal_policy != nullptr);
7c673cae
FG
899 return journal_policy;
900 }
901
902 void ImageCtx::set_journal_policy(journal::Policy *policy) {
9f95a23c 903 ceph_assert(ceph_mutex_is_wlocked(image_lock));
11fdf7f2 904 ceph_assert(policy != nullptr);
7c673cae
FG
905 delete journal_policy;
906 journal_policy = policy;
907 }
908
909 void ImageCtx::get_thread_pool_instance(CephContext *cct,
910 ThreadPool **thread_pool,
911 ContextWQ **op_work_queue) {
11fdf7f2
TL
912 auto thread_pool_singleton =
913 &cct->lookup_or_create_singleton_object<ThreadPoolSingleton>(
914 "librbd::thread_pool", false, cct);
7c673cae
FG
915 *thread_pool = thread_pool_singleton;
916 *op_work_queue = thread_pool_singleton->op_work_queue;
917 }
918
919 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
9f95a23c 920 ceph::mutex **timer_lock) {
11fdf7f2
TL
921 auto safe_timer_singleton =
922 &cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
923 "librbd::journal::safe_timer", false, cct);
7c673cae
FG
924 *timer = safe_timer_singleton;
925 *timer_lock = &safe_timer_singleton->lock;
926 }
927}