]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #include <errno.h> | |
4 | #include <boost/assign/list_of.hpp> | |
5 | #include <stddef.h> | |
6 | ||
7 | #include "common/ceph_context.h" | |
8 | #include "common/dout.h" | |
9 | #include "common/errno.h" | |
10 | #include "common/perf_counters.h" | |
11 | #include "common/WorkQueue.h" | |
12 | #include "common/Timer.h" | |
13 | ||
7c673cae FG |
14 | #include "librbd/AsyncRequest.h" |
15 | #include "librbd/ExclusiveLock.h" | |
16 | #include "librbd/internal.h" | |
17 | #include "librbd/ImageCtx.h" | |
18 | #include "librbd/ImageState.h" | |
19 | #include "librbd/ImageWatcher.h" | |
20 | #include "librbd/Journal.h" | |
21 | #include "librbd/LibrbdAdminSocketHook.h" | |
22 | #include "librbd/ObjectMap.h" | |
23 | #include "librbd/Operations.h" | |
24 | #include "librbd/operation/ResizeRequest.h" | |
b32b8144 | 25 | #include "librbd/Types.h" |
7c673cae | 26 | #include "librbd/Utils.h" |
7c673cae FG |
27 | #include "librbd/exclusive_lock/AutomaticPolicy.h" |
28 | #include "librbd/exclusive_lock/StandardPolicy.h" | |
29 | #include "librbd/io/AioCompletion.h" | |
31f18b77 | 30 | #include "librbd/io/AsyncOperation.h" |
7c673cae | 31 | #include "librbd/io/ImageRequestWQ.h" |
11fdf7f2 | 32 | #include "librbd/io/ObjectDispatcher.h" |
7c673cae FG |
33 | #include "librbd/journal/StandardPolicy.h" |
34 | ||
35 | #include "osdc/Striper.h" | |
36 | #include <boost/bind.hpp> | |
11fdf7f2 | 37 | #include <boost/algorithm/string/predicate.hpp> |
7c673cae FG |
38 | |
39 | #define dout_subsys ceph_subsys_rbd | |
40 | #undef dout_prefix | |
41 | #define dout_prefix *_dout << "librbd::ImageCtx: " | |
42 | ||
43 | using std::map; | |
44 | using std::pair; | |
45 | using std::set; | |
46 | using std::string; | |
47 | using std::vector; | |
48 | ||
49 | using ceph::bufferlist; | |
50 | using librados::snap_t; | |
51 | using librados::IoCtx; | |
52 | ||
53 | namespace librbd { | |
54 | ||
55 | namespace { | |
56 | ||
57 | class ThreadPoolSingleton : public ThreadPool { | |
58 | public: | |
59 | ContextWQ *op_work_queue; | |
60 | ||
61 | explicit ThreadPoolSingleton(CephContext *cct) | |
62 | : ThreadPool(cct, "librbd::thread_pool", "tp_librbd", 1, | |
63 | "rbd_op_threads"), | |
64 | op_work_queue(new ContextWQ("librbd::op_work_queue", | |
11fdf7f2 | 65 | cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"), |
7c673cae FG |
66 | this)) { |
67 | start(); | |
68 | } | |
69 | ~ThreadPoolSingleton() override { | |
70 | op_work_queue->drain(); | |
71 | delete op_work_queue; | |
72 | ||
73 | stop(); | |
74 | } | |
75 | }; | |
76 | ||
77 | class SafeTimerSingleton : public SafeTimer { | |
78 | public: | |
9f95a23c | 79 | ceph::mutex lock = ceph::make_mutex("librbd::Journal::SafeTimerSingleton::lock"); |
7c673cae FG |
80 | |
81 | explicit SafeTimerSingleton(CephContext *cct) | |
9f95a23c | 82 | : SafeTimer(cct, lock, true) { |
7c673cae FG |
83 | init(); |
84 | } | |
85 | ~SafeTimerSingleton() { | |
9f95a23c | 86 | std::lock_guard locker{lock}; |
7c673cae FG |
87 | shutdown(); |
88 | } | |
89 | }; | |
90 | ||
7c673cae FG |
91 | } // anonymous namespace |
92 | ||
93 | const string ImageCtx::METADATA_CONF_PREFIX = "conf_"; | |
94 | ||
95 | ImageCtx::ImageCtx(const string &image_name, const string &image_id, | |
96 | const char *snap, IoCtx& p, bool ro) | |
97 | : cct((CephContext*)p.cct()), | |
11fdf7f2 | 98 | config(cct->_conf), |
7c673cae FG |
99 | perfcounter(NULL), |
100 | snap_id(CEPH_NOSNAP), | |
101 | snap_exists(true), | |
102 | read_only(ro), | |
9f95a23c | 103 | read_only_flags(ro ? IMAGE_READ_ONLY_FLAG_USER : 0U), |
7c673cae FG |
104 | exclusive_locked(false), |
105 | name(image_name), | |
106 | image_watcher(NULL), | |
107 | journal(NULL), | |
9f95a23c TL |
108 | owner_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::owner_lock", this))), |
109 | image_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::image_lock", this))), | |
110 | timestamp_lock(ceph::make_shared_mutex(util::unique_lock_name("librbd::ImageCtx::timestamp_lock", this))), | |
111 | async_ops_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::async_ops_lock", this))), | |
112 | copyup_list_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageCtx::copyup_list_lock", this))), | |
7c673cae | 113 | extra_read_flags(0), |
11fdf7f2 | 114 | old_format(false), |
7c673cae FG |
115 | order(0), size(0), features(0), |
116 | format_string(NULL), | |
117 | id(image_id), parent(NULL), | |
118 | stripe_unit(0), stripe_count(0), flags(0), | |
7c673cae FG |
119 | readahead(), |
120 | total_bytes_read(0), | |
121 | state(new ImageState<>(this)), | |
122 | operations(new Operations<>(*this)), | |
123 | exclusive_lock(nullptr), object_map(nullptr), | |
124 | io_work_queue(nullptr), op_work_queue(nullptr), | |
9f95a23c TL |
125 | external_callback_completions(32), |
126 | event_socket_completions(32), | |
31f18b77 FG |
127 | asok_hook(nullptr), |
128 | trace_endpoint("librbd") | |
7c673cae FG |
129 | { |
130 | md_ctx.dup(p); | |
131 | data_ctx.dup(p); | |
132 | if (snap) | |
133 | snap_name = snap; | |
134 | ||
92f5a8d4 | 135 | // FIPS zeroization audit 20191117: this memset is not security related. |
7c673cae FG |
136 | memset(&header, 0, sizeof(header)); |
137 | ||
138 | ThreadPool *thread_pool; | |
139 | get_thread_pool_instance(cct, &thread_pool, &op_work_queue); | |
224ce89b | 140 | io_work_queue = new io::ImageRequestWQ<>( |
181888fb | 141 | this, "librbd::io_work_queue", |
11fdf7f2 | 142 | cct->_conf.get_val<uint64_t>("rbd_op_thread_timeout"), |
7c673cae | 143 | thread_pool); |
11fdf7f2 | 144 | io_object_dispatcher = new io::ObjectDispatcher<>(this); |
7c673cae | 145 | |
11fdf7f2 | 146 | if (cct->_conf.get_val<bool>("rbd_auto_exclusive_lock_until_manual_request")) { |
7c673cae FG |
147 | exclusive_lock_policy = new exclusive_lock::AutomaticPolicy(this); |
148 | } else { | |
149 | exclusive_lock_policy = new exclusive_lock::StandardPolicy(this); | |
150 | } | |
151 | journal_policy = new journal::StandardPolicy<ImageCtx>(this); | |
152 | } | |
153 | ||
11fdf7f2 TL |
154 | ImageCtx::ImageCtx(const string &image_name, const string &image_id, |
155 | uint64_t snap_id, IoCtx& p, bool ro) | |
156 | : ImageCtx(image_name, image_id, "", p, ro) { | |
157 | open_snap_id = snap_id; | |
158 | } | |
159 | ||
7c673cae | 160 | ImageCtx::~ImageCtx() { |
11fdf7f2 TL |
161 | ceph_assert(image_watcher == NULL); |
162 | ceph_assert(exclusive_lock == NULL); | |
163 | ceph_assert(object_map == NULL); | |
164 | ceph_assert(journal == NULL); | |
165 | ceph_assert(asok_hook == NULL); | |
7c673cae FG |
166 | |
167 | if (perfcounter) { | |
168 | perf_stop(); | |
169 | } | |
7c673cae FG |
170 | delete[] format_string; |
171 | ||
172 | md_ctx.aio_flush(); | |
eafe8130 TL |
173 | if (data_ctx.is_valid()) { |
174 | data_ctx.aio_flush(); | |
175 | } | |
7c673cae FG |
176 | io_work_queue->drain(); |
177 | ||
11fdf7f2 TL |
178 | delete io_object_dispatcher; |
179 | ||
7c673cae FG |
180 | delete journal_policy; |
181 | delete exclusive_lock_policy; | |
182 | delete io_work_queue; | |
183 | delete operations; | |
184 | delete state; | |
185 | } | |
186 | ||
187 | void ImageCtx::init() { | |
11fdf7f2 TL |
188 | ceph_assert(!header_oid.empty()); |
189 | ceph_assert(old_format || !id.empty()); | |
7c673cae FG |
190 | |
191 | asok_hook = new LibrbdAdminSocketHook(this); | |
192 | ||
193 | string pname = string("librbd-") + id + string("-") + | |
eafe8130 | 194 | md_ctx.get_pool_name() + string("-") + name; |
7c673cae FG |
195 | if (!snap_name.empty()) { |
196 | pname += "-"; | |
197 | pname += snap_name; | |
198 | } | |
199 | ||
31f18b77 | 200 | trace_endpoint.copy_name(pname); |
7c673cae FG |
201 | perf_start(pname); |
202 | ||
11fdf7f2 TL |
203 | ceph_assert(image_watcher == NULL); |
204 | image_watcher = new ImageWatcher<>(*this); | |
7c673cae FG |
205 | } |
206 | ||
207 | void ImageCtx::shutdown() { | |
208 | delete image_watcher; | |
209 | image_watcher = nullptr; | |
210 | ||
211 | delete asok_hook; | |
212 | asok_hook = nullptr; | |
213 | } | |
214 | ||
eafe8130 | 215 | void ImageCtx::init_layout(int64_t pool_id) |
7c673cae FG |
216 | { |
217 | if (stripe_unit == 0 || stripe_count == 0) { | |
218 | stripe_unit = 1ull << order; | |
219 | stripe_count = 1; | |
220 | } | |
221 | ||
222 | vector<uint64_t> alignments; | |
223 | alignments.push_back(stripe_count << order); // object set (in file striping terminology) | |
224 | alignments.push_back(stripe_unit * stripe_count); // stripe | |
225 | alignments.push_back(stripe_unit); // stripe unit | |
226 | readahead.set_alignments(alignments); | |
227 | ||
228 | layout = file_layout_t(); | |
229 | layout.stripe_unit = stripe_unit; | |
230 | layout.stripe_count = stripe_count; | |
231 | layout.object_size = 1ull << order; | |
eafe8130 | 232 | layout.pool_id = pool_id; // FIXME: pool id overflow? |
7c673cae FG |
233 | |
234 | delete[] format_string; | |
235 | size_t len = object_prefix.length() + 16; | |
236 | format_string = new char[len]; | |
237 | if (old_format) { | |
238 | snprintf(format_string, len, "%s.%%012llx", object_prefix.c_str()); | |
239 | } else { | |
240 | snprintf(format_string, len, "%s.%%016llx", object_prefix.c_str()); | |
241 | } | |
242 | ||
243 | ldout(cct, 10) << "init_layout stripe_unit " << stripe_unit | |
244 | << " stripe_count " << stripe_count | |
245 | << " object_size " << layout.object_size | |
246 | << " prefix " << object_prefix | |
247 | << " format " << format_string | |
248 | << dendl; | |
249 | } | |
250 | ||
251 | void ImageCtx::perf_start(string name) { | |
b32b8144 FG |
252 | auto perf_prio = PerfCountersBuilder::PRIO_DEBUGONLY; |
253 | if (child == nullptr) { | |
254 | // ensure top-level IO stats are exported for librbd daemons | |
255 | perf_prio = PerfCountersBuilder::PRIO_USEFUL; | |
256 | } | |
257 | ||
7c673cae FG |
258 | PerfCountersBuilder plb(cct, name, l_librbd_first, l_librbd_last); |
259 | ||
b32b8144 FG |
260 | plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio); |
261 | plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads", | |
11fdf7f2 | 262 | "rb", perf_prio, unit_t(UNIT_BYTES)); |
b32b8144 FG |
263 | plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads", |
264 | "rl", perf_prio); | |
265 | plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio); | |
266 | plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data", | |
11fdf7f2 | 267 | "wb", perf_prio, unit_t(UNIT_BYTES)); |
b32b8144 FG |
268 | plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency", |
269 | "wl", perf_prio); | |
7c673cae | 270 | plb.add_u64_counter(l_librbd_discard, "discard", "Discards"); |
11fdf7f2 | 271 | plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(UNIT_BYTES)); |
7c673cae FG |
272 | plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency"); |
273 | plb.add_u64_counter(l_librbd_flush, "flush", "Flushes"); | |
11fdf7f2 | 274 | plb.add_time_avg(l_librbd_flush_latency, "flush_latency", "Latency of flushes"); |
7c673cae | 275 | plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames"); |
11fdf7f2 | 276 | plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(UNIT_BYTES)); |
7c673cae | 277 | plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency"); |
c07f9fc5 | 278 | plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites"); |
11fdf7f2 | 279 | plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(UNIT_BYTES)); |
c07f9fc5 | 280 | plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps"); |
7c673cae FG |
281 | plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations"); |
282 | plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals"); | |
283 | plb.add_u64_counter(l_librbd_snap_rollback, "snap_rollback", "Snap rollbacks"); | |
284 | plb.add_u64_counter(l_librbd_snap_rename, "snap_rename", "Snap rename"); | |
285 | plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications"); | |
286 | plb.add_u64_counter(l_librbd_resize, "resize", "Resizes"); | |
287 | plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead"); | |
11fdf7f2 | 288 | plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(UNIT_BYTES)); |
7c673cae FG |
289 | plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates"); |
290 | ||
b32b8144 FG |
291 | plb.add_time(l_librbd_opened_time, "opened_time", "Opened time", |
292 | "ots", perf_prio); | |
293 | plb.add_time(l_librbd_lock_acquired_time, "lock_acquired_time", | |
294 | "Lock acquired time", "lats", perf_prio); | |
295 | ||
7c673cae FG |
296 | perfcounter = plb.create_perf_counters(); |
297 | cct->get_perfcounters_collection()->add(perfcounter); | |
b32b8144 FG |
298 | |
299 | perfcounter->tset(l_librbd_opened_time, ceph_clock_now()); | |
7c673cae FG |
300 | } |
301 | ||
302 | void ImageCtx::perf_stop() { | |
11fdf7f2 | 303 | ceph_assert(perfcounter); |
7c673cae FG |
304 | cct->get_perfcounters_collection()->remove(perfcounter); |
305 | delete perfcounter; | |
306 | } | |
307 | ||
308 | void ImageCtx::set_read_flag(unsigned flag) { | |
309 | extra_read_flags |= flag; | |
310 | } | |
311 | ||
312 | int ImageCtx::get_read_flags(snap_t snap_id) { | |
9f95a23c TL |
313 | int flags = librados::OPERATION_NOFLAG | read_flags; |
314 | if (flags != 0) | |
315 | return flags; | |
316 | ||
317 | flags = librados::OPERATION_NOFLAG | extra_read_flags; | |
7c673cae FG |
318 | if (snap_id == LIBRADOS_SNAP_HEAD) |
319 | return flags; | |
320 | ||
11fdf7f2 | 321 | if (config.get_val<bool>("rbd_balance_snap_reads")) |
7c673cae | 322 | flags |= librados::OPERATION_BALANCE_READS; |
11fdf7f2 | 323 | else if (config.get_val<bool>("rbd_localize_snap_reads")) |
7c673cae FG |
324 | flags |= librados::OPERATION_LOCALIZE_READS; |
325 | return flags; | |
326 | } | |
327 | ||
11fdf7f2 | 328 | int ImageCtx::snap_set(uint64_t in_snap_id) { |
9f95a23c | 329 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
11fdf7f2 TL |
330 | auto it = snap_info.find(in_snap_id); |
331 | if (in_snap_id != CEPH_NOSNAP && it != snap_info.end()) { | |
7c673cae | 332 | snap_id = in_snap_id; |
11fdf7f2 TL |
333 | snap_namespace = it->second.snap_namespace; |
334 | snap_name = it->second.name; | |
7c673cae | 335 | snap_exists = true; |
eafe8130 TL |
336 | if (data_ctx.is_valid()) { |
337 | data_ctx.snap_set_read(snap_id); | |
338 | } | |
7c673cae FG |
339 | return 0; |
340 | } | |
341 | return -ENOENT; | |
342 | } | |
343 | ||
344 | void ImageCtx::snap_unset() | |
345 | { | |
9f95a23c | 346 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
7c673cae FG |
347 | snap_id = CEPH_NOSNAP; |
348 | snap_namespace = {}; | |
349 | snap_name = ""; | |
350 | snap_exists = true; | |
eafe8130 TL |
351 | if (data_ctx.is_valid()) { |
352 | data_ctx.snap_set_read(snap_id); | |
353 | } | |
7c673cae FG |
354 | } |
355 | ||
11fdf7f2 TL |
356 | snap_t ImageCtx::get_snap_id(const cls::rbd::SnapshotNamespace& in_snap_namespace, |
357 | const string& in_snap_name) const | |
7c673cae | 358 | { |
9f95a23c | 359 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae | 360 | auto it = snap_ids.find({in_snap_namespace, in_snap_name}); |
11fdf7f2 | 361 | if (it != snap_ids.end()) { |
7c673cae | 362 | return it->second; |
11fdf7f2 | 363 | } |
7c673cae FG |
364 | return CEPH_NOSNAP; |
365 | } | |
366 | ||
367 | const SnapInfo* ImageCtx::get_snap_info(snap_t in_snap_id) const | |
368 | { | |
9f95a23c | 369 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
370 | map<snap_t, SnapInfo>::const_iterator it = |
371 | snap_info.find(in_snap_id); | |
372 | if (it != snap_info.end()) | |
373 | return &it->second; | |
11fdf7f2 | 374 | return nullptr; |
7c673cae FG |
375 | } |
376 | ||
377 | int ImageCtx::get_snap_name(snap_t in_snap_id, | |
378 | string *out_snap_name) const | |
379 | { | |
9f95a23c | 380 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
381 | const SnapInfo *info = get_snap_info(in_snap_id); |
382 | if (info) { | |
383 | *out_snap_name = info->name; | |
384 | return 0; | |
385 | } | |
386 | return -ENOENT; | |
387 | } | |
388 | ||
389 | int ImageCtx::get_snap_namespace(snap_t in_snap_id, | |
390 | cls::rbd::SnapshotNamespace *out_snap_namespace) const | |
391 | { | |
9f95a23c | 392 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
393 | const SnapInfo *info = get_snap_info(in_snap_id); |
394 | if (info) { | |
395 | *out_snap_namespace = info->snap_namespace; | |
396 | return 0; | |
397 | } | |
398 | return -ENOENT; | |
399 | } | |
400 | ||
401 | int ImageCtx::get_parent_spec(snap_t in_snap_id, | |
11fdf7f2 | 402 | cls::rbd::ParentImageSpec *out_pspec) const |
7c673cae FG |
403 | { |
404 | const SnapInfo *info = get_snap_info(in_snap_id); | |
405 | if (info) { | |
406 | *out_pspec = info->parent.spec; | |
407 | return 0; | |
408 | } | |
409 | return -ENOENT; | |
410 | } | |
411 | ||
412 | uint64_t ImageCtx::get_current_size() const | |
413 | { | |
9f95a23c | 414 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
415 | return size; |
416 | } | |
417 | ||
418 | uint64_t ImageCtx::get_object_size() const | |
419 | { | |
420 | return 1ull << order; | |
421 | } | |
422 | ||
423 | string ImageCtx::get_object_name(uint64_t num) const { | |
9f95a23c | 424 | return util::data_object_name(this, num); |
7c673cae FG |
425 | } |
426 | ||
427 | uint64_t ImageCtx::get_stripe_unit() const | |
428 | { | |
429 | return stripe_unit; | |
430 | } | |
431 | ||
432 | uint64_t ImageCtx::get_stripe_count() const | |
433 | { | |
434 | return stripe_count; | |
435 | } | |
436 | ||
437 | uint64_t ImageCtx::get_stripe_period() const | |
438 | { | |
439 | return stripe_count * (1ull << order); | |
440 | } | |
441 | ||
31f18b77 FG |
442 | utime_t ImageCtx::get_create_timestamp() const |
443 | { | |
444 | return create_timestamp; | |
445 | } | |
446 | ||
11fdf7f2 TL |
447 | utime_t ImageCtx::get_access_timestamp() const |
448 | { | |
449 | return access_timestamp; | |
450 | } | |
451 | ||
452 | utime_t ImageCtx::get_modify_timestamp() const | |
453 | { | |
454 | return modify_timestamp; | |
455 | } | |
456 | ||
457 | void ImageCtx::set_access_timestamp(utime_t at) | |
458 | { | |
9f95a23c | 459 | ceph_assert(ceph_mutex_is_wlocked(timestamp_lock)); |
11fdf7f2 TL |
460 | access_timestamp = at; |
461 | } | |
462 | ||
463 | void ImageCtx::set_modify_timestamp(utime_t mt) | |
464 | { | |
9f95a23c | 465 | ceph_assert(ceph_mutex_is_locked(timestamp_lock)); |
11fdf7f2 TL |
466 | modify_timestamp = mt; |
467 | } | |
468 | ||
7c673cae FG |
469 | int ImageCtx::is_snap_protected(snap_t in_snap_id, |
470 | bool *is_protected) const | |
471 | { | |
9f95a23c | 472 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
473 | const SnapInfo *info = get_snap_info(in_snap_id); |
474 | if (info) { | |
475 | *is_protected = | |
476 | (info->protection_status == RBD_PROTECTION_STATUS_PROTECTED); | |
477 | return 0; | |
478 | } | |
479 | return -ENOENT; | |
480 | } | |
481 | ||
482 | int ImageCtx::is_snap_unprotected(snap_t in_snap_id, | |
483 | bool *is_unprotected) const | |
484 | { | |
9f95a23c | 485 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
486 | const SnapInfo *info = get_snap_info(in_snap_id); |
487 | if (info) { | |
488 | *is_unprotected = | |
489 | (info->protection_status == RBD_PROTECTION_STATUS_UNPROTECTED); | |
490 | return 0; | |
491 | } | |
492 | return -ENOENT; | |
493 | } | |
494 | ||
495 | void ImageCtx::add_snap(cls::rbd::SnapshotNamespace in_snap_namespace, | |
496 | string in_snap_name, | |
497 | snap_t id, uint64_t in_size, | |
11fdf7f2 TL |
498 | const ParentImageInfo &parent, |
499 | uint8_t protection_status, uint64_t flags, | |
500 | utime_t timestamp) | |
7c673cae | 501 | { |
9f95a23c | 502 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
7c673cae FG |
503 | snaps.push_back(id); |
504 | SnapInfo info(in_snap_name, in_snap_namespace, | |
505 | in_size, parent, protection_status, flags, timestamp); | |
506 | snap_info.insert({id, info}); | |
507 | snap_ids.insert({{in_snap_namespace, in_snap_name}, id}); | |
508 | } | |
509 | ||
510 | void ImageCtx::rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace, | |
511 | string in_snap_name, | |
512 | snap_t id) | |
513 | { | |
9f95a23c | 514 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
7c673cae FG |
515 | snaps.erase(std::remove(snaps.begin(), snaps.end(), id), snaps.end()); |
516 | snap_info.erase(id); | |
517 | snap_ids.erase({in_snap_namespace, in_snap_name}); | |
518 | } | |
519 | ||
520 | uint64_t ImageCtx::get_image_size(snap_t in_snap_id) const | |
521 | { | |
9f95a23c | 522 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
523 | if (in_snap_id == CEPH_NOSNAP) { |
524 | if (!resize_reqs.empty() && | |
525 | resize_reqs.front()->shrinking()) { | |
526 | return resize_reqs.front()->get_image_size(); | |
527 | } | |
528 | return size; | |
529 | } | |
530 | ||
531 | const SnapInfo *info = get_snap_info(in_snap_id); | |
532 | if (info) { | |
533 | return info->size; | |
534 | } | |
535 | return 0; | |
536 | } | |
537 | ||
538 | uint64_t ImageCtx::get_object_count(snap_t in_snap_id) const { | |
9f95a23c | 539 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
540 | uint64_t image_size = get_image_size(in_snap_id); |
541 | return Striper::get_num_objects(layout, image_size); | |
542 | } | |
543 | ||
544 | bool ImageCtx::test_features(uint64_t features) const | |
545 | { | |
9f95a23c TL |
546 | std::shared_lock l{image_lock}; |
547 | return test_features(features, image_lock); | |
7c673cae FG |
548 | } |
549 | ||
550 | bool ImageCtx::test_features(uint64_t in_features, | |
9f95a23c | 551 | const ceph::shared_mutex &in_image_lock) const |
7c673cae | 552 | { |
9f95a23c | 553 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
554 | return ((features & in_features) == in_features); |
555 | } | |
556 | ||
11fdf7f2 TL |
557 | bool ImageCtx::test_op_features(uint64_t in_op_features) const |
558 | { | |
9f95a23c TL |
559 | std::shared_lock l{image_lock}; |
560 | return test_op_features(in_op_features, image_lock); | |
11fdf7f2 TL |
561 | } |
562 | ||
563 | bool ImageCtx::test_op_features(uint64_t in_op_features, | |
9f95a23c | 564 | const ceph::shared_mutex &in_image_lock) const |
11fdf7f2 | 565 | { |
9f95a23c | 566 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
11fdf7f2 TL |
567 | return ((op_features & in_op_features) == in_op_features); |
568 | } | |
569 | ||
7c673cae FG |
570 | int ImageCtx::get_flags(librados::snap_t _snap_id, uint64_t *_flags) const |
571 | { | |
9f95a23c | 572 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
573 | if (_snap_id == CEPH_NOSNAP) { |
574 | *_flags = flags; | |
575 | return 0; | |
576 | } | |
577 | const SnapInfo *info = get_snap_info(_snap_id); | |
578 | if (info) { | |
579 | *_flags = info->flags; | |
580 | return 0; | |
581 | } | |
582 | return -ENOENT; | |
583 | } | |
584 | ||
91327a77 AA |
585 | int ImageCtx::test_flags(librados::snap_t in_snap_id, |
586 | uint64_t flags, bool *flags_set) const | |
7c673cae | 587 | { |
9f95a23c TL |
588 | std::shared_lock l{image_lock}; |
589 | return test_flags(in_snap_id, flags, image_lock, flags_set); | |
7c673cae FG |
590 | } |
591 | ||
91327a77 | 592 | int ImageCtx::test_flags(librados::snap_t in_snap_id, |
9f95a23c TL |
593 | uint64_t flags, |
594 | const ceph::shared_mutex &in_image_lock, | |
31f18b77 | 595 | bool *flags_set) const |
7c673cae | 596 | { |
9f95a23c | 597 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae | 598 | uint64_t snap_flags; |
91327a77 | 599 | int r = get_flags(in_snap_id, &snap_flags); |
31f18b77 FG |
600 | if (r < 0) { |
601 | return r; | |
602 | } | |
603 | *flags_set = ((snap_flags & flags) == flags); | |
604 | return 0; | |
7c673cae FG |
605 | } |
606 | ||
607 | int ImageCtx::update_flags(snap_t in_snap_id, uint64_t flag, bool enabled) | |
608 | { | |
9f95a23c | 609 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
7c673cae FG |
610 | uint64_t *_flags; |
611 | if (in_snap_id == CEPH_NOSNAP) { | |
612 | _flags = &flags; | |
613 | } else { | |
614 | map<snap_t, SnapInfo>::iterator it = snap_info.find(in_snap_id); | |
615 | if (it == snap_info.end()) { | |
616 | return -ENOENT; | |
617 | } | |
618 | _flags = &it->second.flags; | |
619 | } | |
620 | ||
621 | if (enabled) { | |
622 | (*_flags) |= flag; | |
623 | } else { | |
624 | (*_flags) &= ~flag; | |
625 | } | |
626 | return 0; | |
627 | } | |
628 | ||
11fdf7f2 | 629 | const ParentImageInfo* ImageCtx::get_parent_info(snap_t in_snap_id) const |
7c673cae | 630 | { |
9f95a23c | 631 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
7c673cae FG |
632 | if (in_snap_id == CEPH_NOSNAP) |
633 | return &parent_md; | |
634 | const SnapInfo *info = get_snap_info(in_snap_id); | |
635 | if (info) | |
636 | return &info->parent; | |
637 | return NULL; | |
638 | } | |
639 | ||
640 | int64_t ImageCtx::get_parent_pool_id(snap_t in_snap_id) const | |
641 | { | |
11fdf7f2 | 642 | const auto info = get_parent_info(in_snap_id); |
7c673cae FG |
643 | if (info) |
644 | return info->spec.pool_id; | |
645 | return -1; | |
646 | } | |
647 | ||
648 | string ImageCtx::get_parent_image_id(snap_t in_snap_id) const | |
649 | { | |
11fdf7f2 | 650 | const auto info = get_parent_info(in_snap_id); |
7c673cae FG |
651 | if (info) |
652 | return info->spec.image_id; | |
653 | return ""; | |
654 | } | |
655 | ||
656 | uint64_t ImageCtx::get_parent_snap_id(snap_t in_snap_id) const | |
657 | { | |
11fdf7f2 | 658 | const auto info = get_parent_info(in_snap_id); |
7c673cae FG |
659 | if (info) |
660 | return info->spec.snap_id; | |
661 | return CEPH_NOSNAP; | |
662 | } | |
663 | ||
664 | int ImageCtx::get_parent_overlap(snap_t in_snap_id, uint64_t *overlap) const | |
665 | { | |
9f95a23c | 666 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
11fdf7f2 | 667 | const auto info = get_parent_info(in_snap_id); |
7c673cae FG |
668 | if (info) { |
669 | *overlap = info->overlap; | |
670 | return 0; | |
671 | } | |
672 | return -ENOENT; | |
673 | } | |
674 | ||
7c673cae | 675 | void ImageCtx::register_watch(Context *on_finish) { |
11fdf7f2 | 676 | ceph_assert(image_watcher != NULL); |
7c673cae FG |
677 | image_watcher->register_watch(on_finish); |
678 | } | |
679 | ||
680 | uint64_t ImageCtx::prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx, | |
681 | uint64_t overlap) | |
682 | { | |
683 | // drop extents completely beyond the overlap | |
684 | while (!objectx.empty() && objectx.back().first >= overlap) | |
685 | objectx.pop_back(); | |
686 | ||
687 | // trim final overlapping extent | |
688 | if (!objectx.empty() && objectx.back().first + objectx.back().second > overlap) | |
689 | objectx.back().second = overlap - objectx.back().first; | |
690 | ||
691 | uint64_t len = 0; | |
692 | for (vector<pair<uint64_t,uint64_t> >::iterator p = objectx.begin(); | |
693 | p != objectx.end(); | |
694 | ++p) | |
695 | len += p->second; | |
696 | ldout(cct, 10) << "prune_parent_extents image overlap " << overlap | |
697 | << ", object overlap " << len | |
698 | << " from image extents " << objectx << dendl; | |
699 | return len; | |
700 | } | |
701 | ||
7c673cae FG |
702 | void ImageCtx::cancel_async_requests() { |
703 | C_SaferCond ctx; | |
704 | cancel_async_requests(&ctx); | |
705 | ctx.wait(); | |
706 | } | |
707 | ||
708 | void ImageCtx::cancel_async_requests(Context *on_finish) { | |
709 | { | |
9f95a23c | 710 | std::lock_guard async_ops_locker{async_ops_lock}; |
7c673cae FG |
711 | if (!async_requests.empty()) { |
712 | ldout(cct, 10) << "canceling async requests: count=" | |
713 | << async_requests.size() << dendl; | |
714 | for (auto req : async_requests) { | |
715 | ldout(cct, 10) << "canceling async request: " << req << dendl; | |
716 | req->cancel(); | |
717 | } | |
718 | async_requests_waiters.push_back(on_finish); | |
719 | return; | |
720 | } | |
721 | } | |
722 | ||
723 | on_finish->complete(0); | |
724 | } | |
725 | ||
11fdf7f2 TL |
726 | void ImageCtx::apply_metadata(const std::map<std::string, bufferlist> &meta, |
727 | bool thread_safe) { | |
728 | ldout(cct, 20) << __func__ << dendl; | |
7c673cae | 729 | |
11fdf7f2 TL |
730 | // reset settings back to global defaults |
731 | for (auto& key : config_overrides) { | |
732 | std::string value; | |
733 | int r = cct->_conf.get_val(key, &value); | |
734 | ceph_assert(r == 0); | |
735 | ||
736 | config.set_val(key, value); | |
737 | } | |
738 | config_overrides.clear(); | |
7c673cae | 739 | |
11fdf7f2 TL |
740 | // extract config overrides |
741 | for (auto meta_pair : meta) { | |
742 | if (!boost::starts_with(meta_pair.first, METADATA_CONF_PREFIX)) { | |
7c673cae | 743 | continue; |
11fdf7f2 | 744 | } |
7c673cae | 745 | |
11fdf7f2 TL |
746 | std::string key = meta_pair.first.substr(METADATA_CONF_PREFIX.size()); |
747 | if (!boost::starts_with(key, "rbd_")) { | |
748 | // ignore non-RBD configuration keys | |
749 | // TODO use option schema to determine applicable subsystem | |
750 | ldout(cct, 0) << __func__ << ": ignoring config " << key << dendl; | |
751 | continue; | |
7c673cae | 752 | } |
7c673cae | 753 | |
11fdf7f2 TL |
754 | if (config.find_option(key) != nullptr) { |
755 | std::string val(meta_pair.second.c_str(), meta_pair.second.length()); | |
756 | int r = config.set_val(key, val); | |
757 | if (r >= 0) { | |
758 | ldout(cct, 20) << __func__ << ": " << key << "=" << val << dendl; | |
759 | config_overrides.insert(key); | |
760 | } else { | |
761 | lderr(cct) << __func__ << ": failed to set config " << key << " " | |
762 | << "with value " << val << ": " << cpp_strerror(r) | |
763 | << dendl; | |
764 | } | |
7c673cae FG |
765 | } |
766 | } | |
767 | ||
11fdf7f2 TL |
768 | #define ASSIGN_OPTION(param, type) \ |
769 | param = config.get_val<type>("rbd_"#param) | |
7c673cae | 770 | |
11fdf7f2 | 771 | bool skip_partial_discard = true; |
181888fb FG |
772 | ASSIGN_OPTION(non_blocking_aio, bool); |
773 | ASSIGN_OPTION(cache, bool); | |
11fdf7f2 TL |
774 | ASSIGN_OPTION(sparse_read_threshold_bytes, Option::size_t); |
775 | ASSIGN_OPTION(readahead_max_bytes, Option::size_t); | |
776 | ASSIGN_OPTION(readahead_disable_after_bytes, Option::size_t); | |
181888fb | 777 | ASSIGN_OPTION(clone_copy_on_read, bool); |
181888fb | 778 | ASSIGN_OPTION(enable_alloc_hint, bool); |
11fdf7f2 TL |
779 | ASSIGN_OPTION(mirroring_replay_delay, uint64_t); |
780 | ASSIGN_OPTION(mtime_update_interval, uint64_t); | |
781 | ASSIGN_OPTION(atime_update_interval, uint64_t); | |
181888fb | 782 | ASSIGN_OPTION(skip_partial_discard, bool); |
11fdf7f2 | 783 | ASSIGN_OPTION(discard_granularity_bytes, uint64_t); |
181888fb | 784 | ASSIGN_OPTION(blkin_trace_all, bool); |
b32b8144 | 785 | |
11fdf7f2 | 786 | #undef ASSIGN_OPTION |
b32b8144 FG |
787 | |
788 | if (sparse_read_threshold_bytes == 0) { | |
789 | sparse_read_threshold_bytes = get_object_size(); | |
790 | } | |
11fdf7f2 TL |
791 | if (!skip_partial_discard) { |
792 | discard_granularity_bytes = 0; | |
793 | } | |
794 | ||
92f5a8d4 TL |
795 | alloc_hint_flags = 0; |
796 | auto compression_hint = config.get_val<std::string>("rbd_compression_hint"); | |
797 | if (compression_hint == "compressible") { | |
798 | alloc_hint_flags |= librados::ALLOC_HINT_FLAG_COMPRESSIBLE; | |
799 | } else if (compression_hint == "incompressible") { | |
800 | alloc_hint_flags |= librados::ALLOC_HINT_FLAG_INCOMPRESSIBLE; | |
801 | } | |
802 | ||
9f95a23c TL |
803 | librados::Rados rados(md_ctx); |
804 | int8_t require_osd_release; | |
805 | int r = rados.get_min_compatible_osd(&require_osd_release); | |
806 | if (r == 0 && require_osd_release >= CEPH_RELEASE_OCTOPUS) { | |
807 | read_flags = 0; | |
808 | auto read_policy = config.get_val<std::string>("rbd_read_from_replica_policy"); | |
809 | if (read_policy == "balance") { | |
810 | read_flags |= CEPH_OSD_FLAG_BALANCE_READS; | |
811 | } else if (read_policy == "localize") { | |
812 | read_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; | |
813 | } | |
814 | } | |
815 | ||
11fdf7f2 TL |
816 | io_work_queue->apply_qos_schedule_tick_min( |
817 | config.get_val<uint64_t>("rbd_qos_schedule_tick_min")); | |
818 | ||
819 | io_work_queue->apply_qos_limit( | |
820 | RBD_QOS_IOPS_THROTTLE, | |
821 | config.get_val<uint64_t>("rbd_qos_iops_limit"), | |
822 | config.get_val<uint64_t>("rbd_qos_iops_burst")); | |
823 | io_work_queue->apply_qos_limit( | |
824 | RBD_QOS_BPS_THROTTLE, | |
825 | config.get_val<uint64_t>("rbd_qos_bps_limit"), | |
826 | config.get_val<uint64_t>("rbd_qos_bps_burst")); | |
827 | io_work_queue->apply_qos_limit( | |
828 | RBD_QOS_READ_IOPS_THROTTLE, | |
829 | config.get_val<uint64_t>("rbd_qos_read_iops_limit"), | |
830 | config.get_val<uint64_t>("rbd_qos_read_iops_burst")); | |
831 | io_work_queue->apply_qos_limit( | |
832 | RBD_QOS_WRITE_IOPS_THROTTLE, | |
833 | config.get_val<uint64_t>("rbd_qos_write_iops_limit"), | |
834 | config.get_val<uint64_t>("rbd_qos_write_iops_burst")); | |
835 | io_work_queue->apply_qos_limit( | |
836 | RBD_QOS_READ_BPS_THROTTLE, | |
837 | config.get_val<uint64_t>("rbd_qos_read_bps_limit"), | |
838 | config.get_val<uint64_t>("rbd_qos_read_bps_burst")); | |
839 | io_work_queue->apply_qos_limit( | |
840 | RBD_QOS_WRITE_BPS_THROTTLE, | |
841 | config.get_val<uint64_t>("rbd_qos_write_bps_limit"), | |
842 | config.get_val<uint64_t>("rbd_qos_write_bps_burst")); | |
9f95a23c TL |
843 | |
844 | if (!disable_zero_copy && | |
845 | config.get_val<bool>("rbd_disable_zero_copy_writes")) { | |
846 | ldout(cct, 5) << this << ": disabling zero-copy writes" << dendl; | |
847 | disable_zero_copy = true; | |
848 | } | |
7c673cae FG |
849 | } |
850 | ||
851 | ExclusiveLock<ImageCtx> *ImageCtx::create_exclusive_lock() { | |
852 | return new ExclusiveLock<ImageCtx>(*this); | |
853 | } | |
854 | ||
855 | ObjectMap<ImageCtx> *ImageCtx::create_object_map(uint64_t snap_id) { | |
856 | return new ObjectMap<ImageCtx>(*this, snap_id); | |
857 | } | |
858 | ||
859 | Journal<ImageCtx> *ImageCtx::create_journal() { | |
860 | return new Journal<ImageCtx>(*this); | |
861 | } | |
862 | ||
863 | void ImageCtx::set_image_name(const std::string &image_name) { | |
864 | // update the name so rename can be invoked repeatedly | |
9f95a23c TL |
865 | std::shared_lock owner_locker{owner_lock}; |
866 | std::unique_lock image_locker{image_lock}; | |
7c673cae FG |
867 | name = image_name; |
868 | if (old_format) { | |
869 | header_oid = util::old_header_name(image_name); | |
870 | } | |
871 | } | |
872 | ||
873 | void ImageCtx::notify_update() { | |
874 | state->handle_update_notification(); | |
875 | ImageWatcher<>::notify_header_update(md_ctx, header_oid); | |
876 | } | |
877 | ||
878 | void ImageCtx::notify_update(Context *on_finish) { | |
879 | state->handle_update_notification(); | |
880 | image_watcher->notify_header_update(on_finish); | |
881 | } | |
882 | ||
883 | exclusive_lock::Policy *ImageCtx::get_exclusive_lock_policy() const { | |
9f95a23c | 884 | ceph_assert(ceph_mutex_is_locked(owner_lock)); |
11fdf7f2 | 885 | ceph_assert(exclusive_lock_policy != nullptr); |
7c673cae FG |
886 | return exclusive_lock_policy; |
887 | } | |
888 | ||
889 | void ImageCtx::set_exclusive_lock_policy(exclusive_lock::Policy *policy) { | |
9f95a23c | 890 | ceph_assert(ceph_mutex_is_wlocked(owner_lock)); |
11fdf7f2 | 891 | ceph_assert(policy != nullptr); |
7c673cae FG |
892 | delete exclusive_lock_policy; |
893 | exclusive_lock_policy = policy; | |
894 | } | |
895 | ||
896 | journal::Policy *ImageCtx::get_journal_policy() const { | |
9f95a23c | 897 | ceph_assert(ceph_mutex_is_locked(image_lock)); |
11fdf7f2 | 898 | ceph_assert(journal_policy != nullptr); |
7c673cae FG |
899 | return journal_policy; |
900 | } | |
901 | ||
902 | void ImageCtx::set_journal_policy(journal::Policy *policy) { | |
9f95a23c | 903 | ceph_assert(ceph_mutex_is_wlocked(image_lock)); |
11fdf7f2 | 904 | ceph_assert(policy != nullptr); |
7c673cae FG |
905 | delete journal_policy; |
906 | journal_policy = policy; | |
907 | } | |
908 | ||
909 | void ImageCtx::get_thread_pool_instance(CephContext *cct, | |
910 | ThreadPool **thread_pool, | |
911 | ContextWQ **op_work_queue) { | |
11fdf7f2 TL |
912 | auto thread_pool_singleton = |
913 | &cct->lookup_or_create_singleton_object<ThreadPoolSingleton>( | |
914 | "librbd::thread_pool", false, cct); | |
7c673cae FG |
915 | *thread_pool = thread_pool_singleton; |
916 | *op_work_queue = thread_pool_singleton->op_work_queue; | |
917 | } | |
918 | ||
919 | void ImageCtx::get_timer_instance(CephContext *cct, SafeTimer **timer, | |
9f95a23c | 920 | ceph::mutex **timer_lock) { |
11fdf7f2 TL |
921 | auto safe_timer_singleton = |
922 | &cct->lookup_or_create_singleton_object<SafeTimerSingleton>( | |
923 | "librbd::journal::safe_timer", false, cct); | |
7c673cae FG |
924 | *timer = safe_timer_singleton; |
925 | *timer_lock = &safe_timer_singleton->lock; | |
926 | } | |
927 | } |