]> git.proxmox.com Git - ceph.git/blame - ceph/src/librbd/ImageCtx.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / librbd / ImageCtx.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include <errno.h>
4#include <boost/assign/list_of.hpp>
5#include <stddef.h>
6
f67539c2
TL
7#include "include/neorados/RADOS.hpp"
8
7c673cae
FG
9#include "common/ceph_context.h"
10#include "common/dout.h"
11#include "common/errno.h"
12#include "common/perf_counters.h"
7c673cae
FG
13#include "common/Timer.h"
14
f67539c2 15#include "librbd/AsioEngine.h"
7c673cae
FG
16#include "librbd/AsyncRequest.h"
17#include "librbd/ExclusiveLock.h"
18#include "librbd/internal.h"
19#include "librbd/ImageCtx.h"
20#include "librbd/ImageState.h"
21#include "librbd/ImageWatcher.h"
22#include "librbd/Journal.h"
23#include "librbd/LibrbdAdminSocketHook.h"
24#include "librbd/ObjectMap.h"
25#include "librbd/Operations.h"
f67539c2 26#include "librbd/PluginRegistry.h"
b32b8144 27#include "librbd/Types.h"
7c673cae 28#include "librbd/Utils.h"
f67539c2 29#include "librbd/asio/ContextWQ.h"
7c673cae
FG
30#include "librbd/exclusive_lock/AutomaticPolicy.h"
31#include "librbd/exclusive_lock/StandardPolicy.h"
32#include "librbd/io/AioCompletion.h"
31f18b77 33#include "librbd/io/AsyncOperation.h"
f67539c2 34#include "librbd/io/ImageDispatcher.h"
11fdf7f2 35#include "librbd/io/ObjectDispatcher.h"
f67539c2 36#include "librbd/io/QosImageDispatch.h"
20effc67 37#include "librbd/io/IoOperations.h"
7c673cae 38#include "librbd/journal/StandardPolicy.h"
f67539c2 39#include "librbd/operation/ResizeRequest.h"
7c673cae
FG
40
41#include "osdc/Striper.h"
11fdf7f2 42#include <boost/algorithm/string/predicate.hpp>
7c673cae
FG
43
44#define dout_subsys ceph_subsys_rbd
45#undef dout_prefix
46#define dout_prefix *_dout << "librbd::ImageCtx: "
47
48using std::map;
49using std::pair;
50using std::set;
51using std::string;
52using std::vector;
53
54using ceph::bufferlist;
55using librados::snap_t;
56using librados::IoCtx;
57
58namespace librbd {
59
60namespace {
61
a4b75251 62class SafeTimerSingleton : public CommonSafeTimer<ceph::mutex> {
7c673cae 63public:
f67539c2 64 ceph::mutex lock = ceph::make_mutex("librbd::SafeTimerSingleton::lock");
7c673cae
FG
65
66 explicit SafeTimerSingleton(CephContext *cct)
9f95a23c 67 : SafeTimer(cct, lock, true) {
7c673cae
FG
68 init();
69 }
70 ~SafeTimerSingleton() {
9f95a23c 71 std::lock_guard locker{lock};
7c673cae
FG
72 shutdown();
73 }
74};
75
f67539c2
TL
76librados::IoCtx duplicate_io_ctx(librados::IoCtx& io_ctx) {
77 librados::IoCtx dup_io_ctx;
78 dup_io_ctx.dup(io_ctx);
79 return dup_io_ctx;
80}
81
7c673cae
FG
82} // anonymous namespace
83
84 const string ImageCtx::METADATA_CONF_PREFIX = "conf_";
85
86 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
87 const char *snap, IoCtx& p, bool ro)
88 : cct((CephContext*)p.cct()),
11fdf7f2 89 config(cct->_conf),
7c673cae
FG
90 perfcounter(NULL),
91 snap_id(CEPH_NOSNAP),
92 snap_exists(true),
93 read_only(ro),
9f95a23c 94 read_only_flags(ro ? IMAGE_READ_ONLY_FLAG_USER : 0U),
7c673cae
FG
95 exclusive_locked(false),
96 name(image_name),
f67539c2
TL
97 asio_engine(std::make_shared<AsioEngine>(p)),
98 rados_api(asio_engine->get_rados_api()),
99 data_ctx(duplicate_io_ctx(p)),
100 md_ctx(duplicate_io_ctx(p)),
7c673cae
FG
101 image_watcher(NULL),
102 journal(NULL),
9f95a23c
TL
103 owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))),
104 image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))),
105 timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))),
106 async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))),
107 copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))),
7c673cae 108 extra_read_flags(0),
11fdf7f2 109 old_format(false),
7c673cae
FG
110 order(0), size(0), features(0),
111 format_string(NULL),
112 id(image_id), parent(NULL),
113 stripe_unit(0), stripe_count(0), flags(0),
7c673cae
FG
114 readahead(),
115 total_bytes_read(0),
116 state(new ImageState<>(this)),
117 operations(new Operations<>(*this)),
118 exclusive_lock(nullptr), object_map(nullptr),
f67539c2
TL
119 op_work_queue(asio_engine->get_work_queue()),
120 plugin_registry(new PluginRegistry<ImageCtx>(this)),
9f95a23c 121 event_socket_completions(32),
31f18b77
FG
122 asok_hook(nullptr),
123 trace_endpoint("librbd")
7c673cae 124 {
f67539c2
TL
125 ldout(cct, 10) << this << " " << __func__ << ": "
126 << "image_name=" << image_name << ", "
127 << "image_id=" << image_id << dendl;
128
7c673cae
FG
129 if (snap)
130 snap_name = snap;
131
f67539c2
TL
132 rebuild_data_io_context();
133
92f5a8d4 134 // FIPS zeroization audit 20191117: this memset is not security related.
7c673cae
FG
135 memset(&header, 0, sizeof(header));
136
f67539c2
TL
137 io_image_dispatcher = new io::ImageDispatcher<ImageCtx>(this);
138 io_object_dispatcher = new io::ObjectDispatcher<ImageCtx>(this);
7c673cae 139
11fdf7f2 140 if (cct->_conf.get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) {
7c673cae
FG
141 exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this);
142 } else {
143 exclusive_lock_policy = new exclusive_lock::StandardPolicy(this);
144 }
f67539c2 145 journal_policy = new journal::StandardPolicy(this);
7c673cae
FG
146 }
147
11fdf7f2
TL
148 ImageCtx::ImageCtx(const string &image_name, const string &image_id,
149 uint64_t snap_id, IoCtx& p, bool ro)
150 : ImageCtx(image_name, image_id, "", p, ro) {
151 open_snap_id = snap_id;
152 }
153
7c673cae 154 ImageCtx::~ImageCtx() {
f67539c2
TL
155 ldout(cct, 10) << this << " " << __func__ << dendl;
156
f6b5b4d7 157 ceph_assert(config_watcher == nullptr);
11fdf7f2
TL
158 ceph_assert(image_watcher == NULL);
159 ceph_assert(exclusive_lock == NULL);
160 ceph_assert(object_map == NULL);
161 ceph_assert(journal == NULL);
162 ceph_assert(asok_hook == NULL);
7c673cae
FG
163
164 if (perfcounter) {
165 perf_stop();
166 }
7c673cae
FG
167 delete[] format_string;
168
169 md_ctx.aio_flush();
eafe8130
TL
170 if (data_ctx.is_valid()) {
171 data_ctx.aio_flush();
172 }
7c673cae 173
11fdf7f2 174 delete io_object_dispatcher;
f67539c2 175 delete io_image_dispatcher;
11fdf7f2 176
7c673cae
FG
177 delete journal_policy;
178 delete exclusive_lock_policy;
7c673cae
FG
179 delete operations;
180 delete state;
f67539c2
TL
181
182 delete plugin_registry;
7c673cae
FG
183 }
184
185 void ImageCtx::init() {
11fdf7f2
TL
186 ceph_assert(!header_oid.empty());
187 ceph_assert(old_format || !id.empty());
7c673cae
FG
188
189 asok_hook = new LibrbdAdminSocketHook(this);
190
191 string pname = string("librbd-") + id + string("-") +
eafe8130 192 md_ctx.get_pool_name() + string("-") + name;
7c673cae
FG
193 if (!snap_name.empty()) {
194 pname += "-";
195 pname += snap_name;
196 }
197
31f18b77 198 trace_endpoint.copy_name(pname);
7c673cae
FG
199 perf_start(pname);
200
11fdf7f2
TL
201 ceph_assert(image_watcher == NULL);
202 image_watcher = new ImageWatcher<>(*this);
7c673cae
FG
203 }
204
205 void ImageCtx::shutdown() {
206 delete image_watcher;
207 image_watcher = nullptr;
208
209 delete asok_hook;
210 asok_hook = nullptr;
211 }
212
eafe8130 213 void ImageCtx::init_layout(int64_t pool_id)
7c673cae
FG
214 {
215 if (stripe_unit == 0 || stripe_count == 0) {
216 stripe_unit = 1ull << order;
217 stripe_count = 1;
218 }
219
220 vector<uint64_t> alignments;
221 alignments.push_back(stripe_count << order); // object set (in file striping terminology)
222 alignments.push_back(stripe_unit * stripe_count); // stripe
223 alignments.push_back(stripe_unit); // stripe unit
224 readahead.set_alignments(alignments);
225
226 layout = file_layout_t();
227 layout.stripe_unit = stripe_unit;
228 layout.stripe_count = stripe_count;
229 layout.object_size = 1ull << order;
eafe8130 230 layout.pool_id = pool_id; // FIXME: pool id overflow?
7c673cae
FG
231
232 delete[] format_string;
233 size_t len = object_prefix.length() + 16;
234 format_string = new char[len];
235 if (old_format) {
236 snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str());
237 } else {
238 snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str());
239 }
240
241 ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit
242 << " stripe_count " << stripe_count
243 << " object_size " << layout.object_size
244 << " prefix " << object_prefix
245 << " format " << format_string
246 << dendl;
247 }
248
249 void ImageCtx::perf_start(string name) {
b32b8144
FG
250 auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY;
251 if (child == nullptr) {
252 // ensure top-level IO stats are exported for librbd daemons
253 perf_prio = PerfCountersBuilder::PRIO_USEFUL;
254 }
255
7c673cae
FG
256 PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last);
257
b32b8144
FG
258 plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
259 plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
11fdf7f2 260 "rb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
261 plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
262 "rl", perf_prio);
263 plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
264 plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
11fdf7f2 265 "wb", perf_prio, unit_t(UNIT_BYTES));
b32b8144
FG
266 plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
267 "wl", perf_prio);
7c673cae 268 plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
11fdf7f2 269 plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
270 plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
271 plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
11fdf7f2 272 plb.add_time_avg(l_librbd_flush_latency, "flush_latency", "Latency of flushes");
7c673cae 273 plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
11fdf7f2 274 plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(UNIT_BYTES));
7c673cae 275 plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
c07f9fc5 276 plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
11fdf7f2 277 plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(UNIT_BYTES));
c07f9fc5 278 plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
7c673cae
FG
279 plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
280 plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
281 plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks");
282 plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename");
283 plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
284 plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
285 plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
11fdf7f2 286 plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(UNIT_BYTES));
7c673cae
FG
287 plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
288
b32b8144
FG
289 plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
290 "ots", perf_prio);
291 plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time",
292 "Lock acquired time", "lats", perf_prio);
293
7c673cae
FG
294 perfcounter = plb.create_perf_counters();
295 cct->get_perfcounters_collection()->add(perfcounter);
b32b8144
FG
296
297 perfcounter->tset(l_librbd_opened_time, ceph_clock_now());
7c673cae
FG
298 }
299
300 void ImageCtx::perf_stop() {
11fdf7f2 301 ceph_assert(perfcounter);
7c673cae
FG
302 cct->get_perfcounters_collection()->remove(perfcounter);
303 delete perfcounter;
304 }
305
306 void ImageCtx::set_read_flag(unsigned flag) {
307 extra_read_flags |= flag;
308 }
309
310 int ImageCtx::get_read_flags(snap_t snap_id) {
9f95a23c
TL
311 int flags = librados::OPERATION_NOFLAG | read_flags;
312 if (flags != 0)
313 return flags;
314
315 flags = librados::OPERATION_NOFLAG | extra_read_flags;
7c673cae
FG
316 if (snap_id == LIBRADOS_SNAP_HEAD)
317 return flags;
318
11fdf7f2 319 if (config.get_val<bool>("rbd_balance_snap_reads"))
7c673cae 320 flags |= librados::OPERATION_BALANCE_READS;
11fdf7f2 321 else if (config.get_val<bool>("rbd_localize_snap_reads"))
7c673cae
FG
322 flags |= librados::OPERATION_LOCALIZE_READS;
323 return flags;
324 }
325
11fdf7f2 326 int ImageCtx::snap_set(uint64_t in_snap_id) {
9f95a23c 327 ceph_assert(ceph_mutex_is_wlocked(image_lock));
11fdf7f2
TL
328 auto it = snap_info.find(in_snap_id);
329 if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) {
7c673cae 330 snap_id = in_snap_id;
11fdf7f2
TL
331 snap_namespace = it->second.snap_namespace;
332 snap_name = it->second.name;
7c673cae 333 snap_exists = true;
eafe8130
TL
334 if (data_ctx.is_valid()) {
335 data_ctx.snap_set_read(snap_id);
f67539c2 336 rebuild_data_io_context();
eafe8130 337 }
7c673cae
FG
338 return 0;
339 }
340 return -ENOENT;
341 }
342
343 void ImageCtx::snap_unset()
344 {
9f95a23c 345 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
346 snap_id = CEPH_NOSNAP;
347 snap_namespace = {};
348 snap_name = "";
349 snap_exists = true;
eafe8130
TL
350 if (data_ctx.is_valid()) {
351 data_ctx.snap_set_read(snap_id);
f67539c2 352 rebuild_data_io_context();
eafe8130 353 }
7c673cae
FG
354 }
355
11fdf7f2
TL
356 snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace,
357 const string& in_snap_name) const
7c673cae 358 {
9f95a23c 359 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae 360 auto it = snap_ids.find({in_snap_namespace, in_snap_name});
11fdf7f2 361 if (it != snap_ids.end()) {
7c673cae 362 return it->second;
11fdf7f2 363 }
7c673cae
FG
364 return CEPH_NOSNAP;
365 }
366
367 const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const
368 {
9f95a23c 369 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
370 map<snap_t, SnapInfo>::const_iterator it =
371 snap_info.find(in_snap_id);
372 if (it != snap_info.end())
373 return &it->second;
11fdf7f2 374 return nullptr;
7c673cae
FG
375 }
376
377 int ImageCtx::get_snap_name(snap_t in_snap_id,
378 string *out_snap_name) const
379 {
9f95a23c 380 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
381 const SnapInfo *info = get_snap_info(in_snap_id);
382 if (info) {
383 *out_snap_name = info->name;
384 return 0;
385 }
386 return -ENOENT;
387 }
388
389 int ImageCtx::get_snap_namespace(snap_t in_snap_id,
390 cls::rbd::SnapshotNamespace *out_snap_namespace) const
391 {
9f95a23c 392 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
393 const SnapInfo *info = get_snap_info(in_snap_id);
394 if (info) {
395 *out_snap_namespace = info->snap_namespace;
396 return 0;
397 }
398 return -ENOENT;
399 }
400
401 int ImageCtx::get_parent_spec(snap_t in_snap_id,
11fdf7f2 402 cls::rbd::ParentImageSpec *out_pspec) const
7c673cae
FG
403 {
404 const SnapInfo *info = get_snap_info(in_snap_id);
405 if (info) {
406 *out_pspec = info->parent.spec;
407 return 0;
408 }
409 return -ENOENT;
410 }
411
412 uint64_t ImageCtx::get_current_size() const
413 {
9f95a23c 414 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
415 return size;
416 }
417
418 uint64_t ImageCtx::get_object_size() const
419 {
420 return 1ull << order;
421 }
422
423 string ImageCtx::get_object_name(uint64_t num) const {
9f95a23c 424 return util::data_object_name(this, num);
7c673cae
FG
425 }
426
427 uint64_t ImageCtx::get_stripe_unit() const
428 {
429 return stripe_unit;
430 }
431
432 uint64_t ImageCtx::get_stripe_count() const
433 {
434 return stripe_count;
435 }
436
437 uint64_t ImageCtx::get_stripe_period() const
438 {
439 return stripe_count * (1ull << order);
440 }
441
31f18b77
FG
442 utime_t ImageCtx::get_create_timestamp() const
443 {
444 return create_timestamp;
445 }
446
11fdf7f2
TL
447 utime_t ImageCtx::get_access_timestamp() const
448 {
449 return access_timestamp;
450 }
451
452 utime_t ImageCtx::get_modify_timestamp() const
453 {
454 return modify_timestamp;
455 }
456
457 void ImageCtx::set_access_timestamp(utime_t at)
458 {
9f95a23c 459 ceph_assert(ceph_mutex_is_wlocked(timestamp_lock));
11fdf7f2
TL
460 access_timestamp = at;
461 }
462
463 void ImageCtx::set_modify_timestamp(utime_t mt)
464 {
9f95a23c 465 ceph_assert(ceph_mutex_is_locked(timestamp_lock));
11fdf7f2
TL
466 modify_timestamp = mt;
467 }
468
7c673cae
FG
469 int ImageCtx::is_snap_protected(snap_t in_snap_id,
470 bool *is_protected) const
471 {
9f95a23c 472 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
473 const SnapInfo *info = get_snap_info(in_snap_id);
474 if (info) {
475 *is_protected =
476 (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED);
477 return 0;
478 }
479 return -ENOENT;
480 }
481
482 int ImageCtx::is_snap_unprotected(snap_t in_snap_id,
483 bool *is_unprotected) const
484 {
9f95a23c 485 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
486 const SnapInfo *info = get_snap_info(in_snap_id);
487 if (info) {
488 *is_unprotected =
489 (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED);
490 return 0;
491 }
492 return -ENOENT;
493 }
494
495 void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
496 string in_snap_name,
497 snap_t id, uint64_t in_size,
11fdf7f2
TL
498 const ParentImageInfo &parent,
499 uint8_t protection_status, uint64_t flags,
500 utime_t timestamp)
7c673cae 501 {
9f95a23c 502 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
503 snaps.push_back(id);
504 SnapInfo info(in_snap_name, in_snap_namespace,
505 in_size, parent, protection_status, flags, timestamp);
506 snap_info.insert({id, info});
507 snap_ids.insert({{in_snap_namespace, in_snap_name}, id});
508 }
509
510 void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace,
511 string in_snap_name,
512 snap_t id)
513 {
9f95a23c 514 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
515 snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end());
516 snap_info.erase(id);
517 snap_ids.erase({in_snap_namespace, in_snap_name});
518 }
519
520 uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const
521 {
9f95a23c 522 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
523 if (in_snap_id == CEPH_NOSNAP) {
524 if (!resize_reqs.empty() &&
525 resize_reqs.front()->shrinking()) {
526 return resize_reqs.front()->get_image_size();
527 }
528 return size;
529 }
530
531 const SnapInfo *info = get_snap_info(in_snap_id);
532 if (info) {
533 return info->size;
534 }
535 return 0;
536 }
537
f67539c2
TL
538 uint64_t ImageCtx::get_effective_image_size(snap_t in_snap_id) const {
539 auto raw_size = get_image_size(in_snap_id);
540 if (raw_size == 0) {
541 return 0;
542 }
543
544 io::Extents extents = {{raw_size, 0}};
545 io_image_dispatcher->remap_extents(
546 extents, io::IMAGE_EXTENTS_MAP_TYPE_PHYSICAL_TO_LOGICAL);
547 return extents.front().first;
548 }
549
7c673cae 550 uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const {
9f95a23c 551 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
552 uint64_t image_size = get_image_size(in_snap_id);
553 return Striper::get_num_objects(layout, image_size);
554 }
555
556 bool ImageCtx::test_features(uint64_t features) const
557 {
9f95a23c
TL
558 std::shared_lock l{image_lock};
559 return test_features(features, image_lock);
7c673cae
FG
560 }
561
562 bool ImageCtx::test_features(uint64_t in_features,
9f95a23c 563 const ceph::shared_mutex &in_image_lock) const
7c673cae 564 {
9f95a23c 565 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
566 return ((features & in_features) == in_features);
567 }
568
11fdf7f2
TL
569 bool ImageCtx::test_op_features(uint64_t in_op_features) const
570 {
9f95a23c
TL
571 std::shared_lock l{image_lock};
572 return test_op_features(in_op_features, image_lock);
11fdf7f2
TL
573 }
574
575 bool ImageCtx::test_op_features(uint64_t in_op_features,
9f95a23c 576 const ceph::shared_mutex &in_image_lock) const
11fdf7f2 577 {
9f95a23c 578 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2
TL
579 return ((op_features & in_op_features) == in_op_features);
580 }
581
7c673cae
FG
582 int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const
583 {
9f95a23c 584 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
585 if (_snap_id == CEPH_NOSNAP) {
586 *_flags = flags;
587 return 0;
588 }
589 const SnapInfo *info = get_snap_info(_snap_id);
590 if (info) {
591 *_flags = info->flags;
592 return 0;
593 }
594 return -ENOENT;
595 }
596
91327a77
AA
597 int ImageCtx::test_flags(librados::snap_t in_snap_id,
598 uint64_t flags, bool *flags_set) const
7c673cae 599 {
9f95a23c
TL
600 std::shared_lock l{image_lock};
601 return test_flags(in_snap_id, flags, image_lock, flags_set);
7c673cae
FG
602 }
603
91327a77 604 int ImageCtx::test_flags(librados::snap_t in_snap_id,
9f95a23c
TL
605 uint64_t flags,
606 const ceph::shared_mutex &in_image_lock,
31f18b77 607 bool *flags_set) const
7c673cae 608 {
9f95a23c 609 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae 610 uint64_t snap_flags;
91327a77 611 int r = get_flags(in_snap_id, &snap_flags);
31f18b77
FG
612 if (r < 0) {
613 return r;
614 }
615 *flags_set = ((snap_flags & flags) == flags);
616 return 0;
7c673cae
FG
617 }
618
619 int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled)
620 {
9f95a23c 621 ceph_assert(ceph_mutex_is_wlocked(image_lock));
7c673cae
FG
622 uint64_t *_flags;
623 if (in_snap_id == CEPH_NOSNAP) {
624 _flags = &flags;
625 } else {
626 map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id);
627 if (it == snap_info.end()) {
628 return -ENOENT;
629 }
630 _flags = &it->second.flags;
631 }
632
633 if (enabled) {
634 (*_flags) |= flag;
635 } else {
636 (*_flags) &= ~flag;
637 }
638 return 0;
639 }
640
11fdf7f2 641 const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const
7c673cae 642 {
9f95a23c 643 ceph_assert(ceph_mutex_is_locked(image_lock));
7c673cae
FG
644 if (in_snap_id == CEPH_NOSNAP)
645 return &parent_md;
646 const SnapInfo *info = get_snap_info(in_snap_id);
647 if (info)
648 return &info->parent;
649 return NULL;
650 }
651
652 int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const
653 {
11fdf7f2 654 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
655 if (info)
656 return info->spec.pool_id;
657 return -1;
658 }
659
660 string ImageCtx::get_parent_image_id(snap_t in_snap_id) const
661 {
11fdf7f2 662 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
663 if (info)
664 return info->spec.image_id;
665 return "";
666 }
667
668 uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const
669 {
11fdf7f2 670 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
671 if (info)
672 return info->spec.snap_id;
673 return CEPH_NOSNAP;
674 }
675
676 int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const
677 {
9f95a23c 678 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2 679 const auto info = get_parent_info(in_snap_id);
7c673cae
FG
680 if (info) {
681 *overlap = info->overlap;
682 return 0;
683 }
684 return -ENOENT;
685 }
686
7c673cae 687 void ImageCtx::register_watch(Context *on_finish) {
11fdf7f2 688 ceph_assert(image_watcher != NULL);
7c673cae
FG
689 image_watcher->register_watch(on_finish);
690 }
691
692 uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
693 uint64_t overlap)
694 {
695 // drop extents completely beyond the overlap
696 while (!objectx.empty() && objectx.back().first >= overlap)
697 objectx.pop_back();
698
699 // trim final overlapping extent
700 if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap)
701 objectx.back().second = overlap - objectx.back().first;
702
703 uint64_t len = 0;
704 for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin();
705 p != objectx.end();
706 ++p)
707 len += p->second;
708 ldout(cct, 10) << "prune_parent_extents image overlap " << overlap
709 << ", object overlap " << len
710 << " from image extents " << objectx << dendl;
711 return len;
712 }
713
7c673cae
FG
714 void ImageCtx::cancel_async_requests() {
715 C_SaferCond ctx;
716 cancel_async_requests(&ctx);
717 ctx.wait();
718 }
719
720 void ImageCtx::cancel_async_requests(Context *on_finish) {
721 {
9f95a23c 722 std::lock_guard async_ops_locker{async_ops_lock};
7c673cae
FG
723 if (!async_requests.empty()) {
724 ldout(cct, 10) << "canceling async requests: count="
725 << async_requests.size() << dendl;
726 for (auto req : async_requests) {
727 ldout(cct, 10) << "canceling async request: " << req << dendl;
728 req->cancel();
729 }
730 async_requests_waiters.push_back(on_finish);
731 return;
732 }
733 }
734
735 on_finish->complete(0);
736 }
737
11fdf7f2
TL
738 void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta,
739 bool thread_safe) {
740 ldout(cct, 20) << __func__ << dendl;
7c673cae 741
f6b5b4d7
TL
742 std::unique_lock image_locker(image_lock);
743
11fdf7f2 744 // reset settings back to global defaults
11fdf7f2 745 config_overrides.clear();
f67539c2 746 config.set_config_values(cct->_conf.get_config_values());
7c673cae 747
11fdf7f2
TL
748 // extract config overrides
749 for (auto meta_pair : meta) {
750 if (!boost::starts_with(meta_pair.first, METADATA_CONF_PREFIX)) {
7c673cae 751 continue;
11fdf7f2 752 }
7c673cae 753
11fdf7f2
TL
754 std::string key = meta_pair.first.substr(METADATA_CONF_PREFIX.size());
755 if (!boost::starts_with(key, "rbd_")) {
756 // ignore non-RBD configuration keys
757 // TODO use option schema to determine applicable subsystem
758 ldout(cct, 0) << __func__ << ": ignoring config " << key << dendl;
759 continue;
7c673cae 760 }
7c673cae 761
11fdf7f2
TL
762 if (config.find_option(key) != nullptr) {
763 std::string val(meta_pair.second.c_str(), meta_pair.second.length());
764 int r = config.set_val(key, val);
765 if (r >= 0) {
766 ldout(cct, 20) << __func__ << ": " << key << "=" << val << dendl;
767 config_overrides.insert(key);
768 } else {
769 lderr(cct) << __func__ << ": failed to set config " << key << " "
770 << "with value " << val << ": " << cpp_strerror(r)
771 << dendl;
772 }
7c673cae
FG
773 }
774 }
775
f6b5b4d7
TL
776 image_locker.unlock();
777
11fdf7f2
TL
778#define ASSIGN_OPTION(param, type) \
779 param = config.get_val<type>("rbd_"#param)
7c673cae 780
11fdf7f2 781 bool skip_partial_discard = true;
181888fb
FG
782 ASSIGN_OPTION(non_blocking_aio, bool);
783 ASSIGN_OPTION(cache, bool);
11fdf7f2 784 ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t);
181888fb 785 ASSIGN_OPTION(clone_copy_on_read, bool);
181888fb 786 ASSIGN_OPTION(enable_alloc_hint, bool);
11fdf7f2
TL
787 ASSIGN_OPTION(mirroring_replay_delay, uint64_t);
788 ASSIGN_OPTION(mtime_update_interval, uint64_t);
789 ASSIGN_OPTION(atime_update_interval, uint64_t);
181888fb 790 ASSIGN_OPTION(skip_partial_discard, bool);
11fdf7f2 791 ASSIGN_OPTION(discard_granularity_bytes, uint64_t);
181888fb 792 ASSIGN_OPTION(blkin_trace_all, bool);
b32b8144 793
f67539c2
TL
794 auto cache_policy = config.get_val<std::string>("rbd_cache_policy");
795 if (cache_policy == "writethrough" || cache_policy == "writeback") {
796 ASSIGN_OPTION(readahead_max_bytes, Option::size_t);
797 ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t);
798 }
799
11fdf7f2 800#undef ASSIGN_OPTION
b32b8144
FG
801
802 if (sparse_read_threshold_bytes == 0) {
803 sparse_read_threshold_bytes = get_object_size();
804 }
20effc67
TL
805
806 bool dirty_cache = test_features(RBD_FEATURE_DIRTY_CACHE);
807 if (!skip_partial_discard || dirty_cache) {
11fdf7f2
TL
808 discard_granularity_bytes = 0;
809 }
810
92f5a8d4
TL
811 alloc_hint_flags = 0;
812 auto compression_hint = config.get_val<std::string>("rbd_compression_hint");
813 if (compression_hint == "compressible") {
814 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_COMPRESSIBLE;
815 } else if (compression_hint == "incompressible") {
816 alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE;
817 }
818
9f95a23c
TL
819 librados::Rados rados(md_ctx);
820 int8_t require_osd_release;
821 int r = rados.get_min_compatible_osd(&require_osd_release);
822 if (r == 0 && require_osd_release >= CEPH_RELEASE_OCTOPUS) {
823 read_flags = 0;
824 auto read_policy = config.get_val<std::string>("rbd_read_from_replica_policy");
825 if (read_policy == "balance") {
e306af50 826 read_flags |= librados::OPERATION_BALANCE_READS;
9f95a23c 827 } else if (read_policy == "localize") {
e306af50 828 read_flags |= librados::OPERATION_LOCALIZE_READS;
9f95a23c
TL
829 }
830 }
831
f67539c2 832 io_image_dispatcher->apply_qos_schedule_tick_min(
11fdf7f2
TL
833 config.get_val<uint64_t>("rbd_qos_schedule_tick_min"));
834
f67539c2
TL
835 io_image_dispatcher->apply_qos_limit(
836 io::IMAGE_DISPATCH_FLAG_QOS_IOPS_THROTTLE,
11fdf7f2 837 config.get_val<uint64_t>("rbd_qos_iops_limit"),
f67539c2
TL
838 config.get_val<uint64_t>("rbd_qos_iops_burst"),
839 config.get_val<uint64_t>("rbd_qos_iops_burst_seconds"));
840 io_image_dispatcher->apply_qos_limit(
841 io::IMAGE_DISPATCH_FLAG_QOS_BPS_THROTTLE,
11fdf7f2 842 config.get_val<uint64_t>("rbd_qos_bps_limit"),
f67539c2
TL
843 config.get_val<uint64_t>("rbd_qos_bps_burst"),
844 config.get_val<uint64_t>("rbd_qos_bps_burst_seconds"));
845 io_image_dispatcher->apply_qos_limit(
846 io::IMAGE_DISPATCH_FLAG_QOS_READ_IOPS_THROTTLE,
11fdf7f2 847 config.get_val<uint64_t>("rbd_qos_read_iops_limit"),
f67539c2
TL
848 config.get_val<uint64_t>("rbd_qos_read_iops_burst"),
849 config.get_val<uint64_t>("rbd_qos_read_iops_burst_seconds"));
850 io_image_dispatcher->apply_qos_limit(
851 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_IOPS_THROTTLE,
11fdf7f2 852 config.get_val<uint64_t>("rbd_qos_write_iops_limit"),
f67539c2
TL
853 config.get_val<uint64_t>("rbd_qos_write_iops_burst"),
854 config.get_val<uint64_t>("rbd_qos_write_iops_burst_seconds"));
855 io_image_dispatcher->apply_qos_limit(
856 io::IMAGE_DISPATCH_FLAG_QOS_READ_BPS_THROTTLE,
11fdf7f2 857 config.get_val<uint64_t>("rbd_qos_read_bps_limit"),
f67539c2
TL
858 config.get_val<uint64_t>("rbd_qos_read_bps_burst"),
859 config.get_val<uint64_t>("rbd_qos_read_bps_burst_seconds"));
860 io_image_dispatcher->apply_qos_limit(
861 io::IMAGE_DISPATCH_FLAG_QOS_WRITE_BPS_THROTTLE,
11fdf7f2 862 config.get_val<uint64_t>("rbd_qos_write_bps_limit"),
f67539c2
TL
863 config.get_val<uint64_t>("rbd_qos_write_bps_burst"),
864 config.get_val<uint64_t>("rbd_qos_write_bps_burst_seconds"));
20effc67
TL
865 io_image_dispatcher->apply_qos_exclude_ops(
866 librbd::io::rbd_io_operations_from_string(
867 config.get_val<std::string>("rbd_qos_exclude_ops"), nullptr));
9f95a23c
TL
868
869 if (!disable_zero_copy &&
870 config.get_val<bool>("rbd_disable_zero_copy_writes")) {
871 ldout(cct, 5) << this << ": disabling zero-copy writes" << dendl;
872 disable_zero_copy = true;
873 }
7c673cae
FG
874 }
875
876 ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() {
877 return new ExclusiveLock<ImageCtx>(*this);
878 }
879
880 ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) {
881 return new ObjectMap<ImageCtx>(*this, snap_id);
882 }
883
884 Journal<ImageCtx> *ImageCtx::create_journal() {
885 return new Journal<ImageCtx>(*this);
886 }
887
888 void ImageCtx::set_image_name(const std::string &image_name) {
889 // update the name so rename can be invoked repeatedly
9f95a23c
TL
890 std::shared_lock owner_locker{owner_lock};
891 std::unique_lock image_locker{image_lock};
7c673cae
FG
892 name = image_name;
893 if (old_format) {
894 header_oid = util::old_header_name(image_name);
895 }
896 }
897
898 void ImageCtx::notify_update() {
899 state->handle_update_notification();
900 ImageWatcher<>::notify_header_update(md_ctx, header_oid);
901 }
902
903 void ImageCtx::notify_update(Context *on_finish) {
904 state->handle_update_notification();
905 image_watcher->notify_header_update(on_finish);
906 }
907
908 exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const {
9f95a23c 909 ceph_assert(ceph_mutex_is_locked(owner_lock));
11fdf7f2 910 ceph_assert(exclusive_lock_policy != nullptr);
7c673cae
FG
911 return exclusive_lock_policy;
912 }
913
914 void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) {
9f95a23c 915 ceph_assert(ceph_mutex_is_wlocked(owner_lock));
11fdf7f2 916 ceph_assert(policy != nullptr);
7c673cae
FG
917 delete exclusive_lock_policy;
918 exclusive_lock_policy = policy;
919 }
920
921 journal::Policy *ImageCtx::get_journal_policy() const {
9f95a23c 922 ceph_assert(ceph_mutex_is_locked(image_lock));
11fdf7f2 923 ceph_assert(journal_policy != nullptr);
7c673cae
FG
924 return journal_policy;
925 }
926
927 void ImageCtx::set_journal_policy(journal::Policy *policy) {
9f95a23c 928 ceph_assert(ceph_mutex_is_wlocked(image_lock));
11fdf7f2 929 ceph_assert(policy != nullptr);
7c673cae
FG
930 delete journal_policy;
931 journal_policy = policy;
932 }
933
f67539c2
TL
934 void ImageCtx::rebuild_data_io_context() {
935 auto ctx = std::make_shared<neorados::IOContext>(
936 data_ctx.get_id(), data_ctx.get_namespace());
937 if (snap_id != CEPH_NOSNAP) {
938 ctx->read_snap(snap_id);
939 }
940 if (!snapc.snaps.empty()) {
941 ctx->write_snap_context(
942 {{snapc.seq, {snapc.snaps.begin(), snapc.snaps.end()}}});
943 }
20effc67
TL
944 if (data_ctx.get_pool_full_try()) {
945 ctx->full_try(true);
946 }
f67539c2
TL
947
948 // atomically reset the data IOContext to new version
949 atomic_store(&data_io_context, ctx);
950 }
951
952 IOContext ImageCtx::get_data_io_context() const {
953 return atomic_load(&data_io_context);
954 }
955
956 IOContext ImageCtx::duplicate_data_io_context() const {
957 auto ctx = get_data_io_context();
958 return std::make_shared<neorados::IOContext>(*ctx);
7c673cae
FG
959 }
960
961 void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer,
9f95a23c 962 ceph::mutex **timer_lock) {
11fdf7f2
TL
963 auto safe_timer_singleton =
964 &cct->lookup_or_create_singleton_object<SafeTimerSingleton>(
965 "librbd::journal::safe_timer", false, cct);
7c673cae
FG
966 *timer = safe_timer_singleton;
967 *timer_lock = &safe_timer_singleton->lock;
968 }
969}