]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
9f95a23c | 8 | #include "common/ceph_mutex.h" |
7c673cae | 9 | #include "include/Context.h" |
c07f9fc5 | 10 | #include "include/err.h" |
f67539c2 | 11 | #include "include/neorados/RADOS.hpp" |
11fdf7f2 | 12 | #include "osd/osd_types.h" |
f67539c2 TL |
13 | #include "librados/snap_set_diff.h" |
14 | #include "librbd/AsioEngine.h" | |
7c673cae FG |
15 | #include "librbd/ExclusiveLock.h" |
16 | #include "librbd/ImageCtx.h" | |
17 | #include "librbd/ObjectMap.h" | |
18 | #include "librbd/Utils.h" | |
f67539c2 | 19 | #include "librbd/asio/Utils.h" |
7c673cae FG |
20 | #include "librbd/io/AioCompletion.h" |
21 | #include "librbd/io/CopyupRequest.h" | |
f67539c2 | 22 | #include "librbd/io/ImageRequest.h" |
f91f0fd5 | 23 | #include "librbd/io/Utils.h" |
7c673cae | 24 | |
7c673cae FG |
25 | #include <boost/optional.hpp> |
26 | ||
27 | #define dout_subsys ceph_subsys_rbd | |
28 | #undef dout_prefix | |
9f95a23c TL |
29 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ |
30 | << " " << __func__ << ": " \ | |
31 | << data_object_name(this->m_ictx, \ | |
32 | this->m_object_no) << " " | |
7c673cae FG |
33 | |
34 | namespace librbd { | |
35 | namespace io { | |
36 | ||
9f95a23c | 37 | using librbd::util::data_object_name; |
f91f0fd5 | 38 | using librbd::util::create_context_callback; |
f91f0fd5 | 39 | using librbd::util::create_trace; |
9f95a23c | 40 | |
b32b8144 | 41 | namespace { |
7c673cae FG |
42 | |
43 | template <typename I> | |
f67539c2 | 44 | inline bool is_copy_on_read(I *ictx, const IOContext& io_context) { |
9f95a23c | 45 | std::shared_lock image_locker{ictx->image_lock}; |
f67539c2 TL |
46 | return (ictx->clone_copy_on_read && !ictx->read_only && |
47 | io_context->read_snap().value_or(CEPH_NOSNAP) == CEPH_NOSNAP && | |
b32b8144 FG |
48 | (ictx->exclusive_lock == nullptr || |
49 | ictx->exclusive_lock->is_lock_owner())); | |
7c673cae FG |
50 | } |
51 | ||
f67539c2 TL |
52 | template <typename S, typename D> |
53 | void convert_snap_set(const S& src_snap_set, | |
54 | D* dst_snap_set) { | |
55 | dst_snap_set->seq = src_snap_set.seq; | |
56 | dst_snap_set->clones.reserve(src_snap_set.clones.size()); | |
57 | for (auto& src_clone : src_snap_set.clones) { | |
58 | dst_snap_set->clones.emplace_back(); | |
59 | auto& dst_clone = dst_snap_set->clones.back(); | |
60 | dst_clone.cloneid = src_clone.cloneid; | |
61 | dst_clone.snaps = src_clone.snaps; | |
62 | dst_clone.overlap = src_clone.overlap; | |
63 | dst_clone.size = src_clone.size; | |
64 | } | |
65 | } | |
66 | ||
b32b8144 | 67 | } // anonymous namespace |
3efd9988 | 68 | |
7c673cae FG |
69 | template <typename I> |
70 | ObjectRequest<I>* | |
9f95a23c TL |
71 | ObjectRequest<I>::create_write( |
72 | I *ictx, uint64_t object_no, uint64_t object_off, ceph::bufferlist&& data, | |
f67539c2 TL |
73 | IOContext io_context, int op_flags, int write_flags, |
74 | std::optional<uint64_t> assert_version, | |
9f95a23c TL |
75 | const ZTracer::Trace &parent_trace, Context *completion) { |
76 | return new ObjectWriteRequest<I>(ictx, object_no, object_off, | |
f67539c2 TL |
77 | std::move(data), io_context, op_flags, |
78 | write_flags, assert_version, | |
11fdf7f2 | 79 | parent_trace, completion); |
7c673cae FG |
80 | } |
81 | ||
82 | template <typename I> | |
83 | ObjectRequest<I>* | |
9f95a23c TL |
84 | ObjectRequest<I>::create_discard( |
85 | I *ictx, uint64_t object_no, uint64_t object_off, uint64_t object_len, | |
f67539c2 | 86 | IOContext io_context, int discard_flags, |
9f95a23c TL |
87 | const ZTracer::Trace &parent_trace, Context *completion) { |
88 | return new ObjectDiscardRequest<I>(ictx, object_no, object_off, | |
f67539c2 | 89 | object_len, io_context, discard_flags, |
11fdf7f2 | 90 | parent_trace, completion); |
7c673cae FG |
91 | } |
92 | ||
93 | template <typename I> | |
94 | ObjectRequest<I>* | |
9f95a23c TL |
95 | ObjectRequest<I>::create_write_same( |
96 | I *ictx, uint64_t object_no, uint64_t object_off, uint64_t object_len, | |
f67539c2 | 97 | ceph::bufferlist&& data, IOContext io_context, int op_flags, |
9f95a23c TL |
98 | const ZTracer::Trace &parent_trace, Context *completion) { |
99 | return new ObjectWriteSameRequest<I>(ictx, object_no, object_off, | |
f67539c2 | 100 | object_len, std::move(data), io_context, |
11fdf7f2 | 101 | op_flags, parent_trace, completion); |
7c673cae FG |
102 | } |
103 | ||
c07f9fc5 FG |
104 | template <typename I> |
105 | ObjectRequest<I>* | |
9f95a23c TL |
106 | ObjectRequest<I>::create_compare_and_write( |
107 | I *ictx, uint64_t object_no, uint64_t object_off, | |
108 | ceph::bufferlist&& cmp_data, ceph::bufferlist&& write_data, | |
f67539c2 | 109 | IOContext io_context, uint64_t *mismatch_offset, int op_flags, |
9f95a23c TL |
110 | const ZTracer::Trace &parent_trace, Context *completion) { |
111 | return new ObjectCompareAndWriteRequest<I>(ictx, object_no, object_off, | |
11fdf7f2 | 112 | std::move(cmp_data), |
f67539c2 | 113 | std::move(write_data), io_context, |
b32b8144 FG |
114 | mismatch_offset, op_flags, |
115 | parent_trace, completion); | |
c07f9fc5 FG |
116 | } |
117 | ||
7c673cae | 118 | template <typename I> |
9f95a23c | 119 | ObjectRequest<I>::ObjectRequest( |
f67539c2 TL |
120 | I *ictx, uint64_t objectno, IOContext io_context, |
121 | const char *trace_name, const ZTracer::Trace &trace, Context *completion) | |
122 | : m_ictx(ictx), m_object_no(objectno), m_io_context(io_context), | |
123 | m_completion(completion), | |
f91f0fd5 | 124 | m_trace(create_trace(*ictx, "", trace)) { |
eafe8130 | 125 | ceph_assert(m_ictx->data_ctx.is_valid()); |
31f18b77 | 126 | if (m_trace.valid()) { |
9f95a23c TL |
127 | m_trace.copy_name(trace_name + std::string(" ") + |
128 | data_object_name(ictx, objectno)); | |
31f18b77 FG |
129 | m_trace.event("start"); |
130 | } | |
7c673cae FG |
131 | } |
132 | ||
133 | template <typename I> | |
f67539c2 TL |
134 | void ObjectRequest<I>::add_write_hint(I& image_ctx, neorados::WriteOp* wr) { |
135 | auto alloc_hint_flags = static_cast<neorados::alloc_hint::alloc_hint_t>( | |
136 | image_ctx.alloc_hint_flags); | |
b32b8144 | 137 | if (image_ctx.enable_alloc_hint) { |
f67539c2 TL |
138 | wr->set_alloc_hint(image_ctx.get_object_size(), |
139 | image_ctx.get_object_size(), | |
140 | alloc_hint_flags); | |
92f5a8d4 | 141 | } else if (image_ctx.alloc_hint_flags != 0U) { |
f67539c2 | 142 | wr->set_alloc_hint(0, 0, alloc_hint_flags); |
7c673cae FG |
143 | } |
144 | } | |
145 | ||
146 | template <typename I> | |
11fdf7f2 | 147 | bool ObjectRequest<I>::compute_parent_extents(Extents *parent_extents, |
1e59de90 | 148 | ImageArea *area, |
11fdf7f2 | 149 | bool read_request) { |
9f95a23c | 150 | ceph_assert(ceph_mutex_is_locked(m_ictx->image_lock)); |
7c673cae | 151 | |
b32b8144 FG |
152 | m_has_parent = false; |
153 | parent_extents->clear(); | |
1e59de90 | 154 | *area = ImageArea::DATA; |
b32b8144 | 155 | |
1e59de90 | 156 | uint64_t raw_overlap; |
f67539c2 | 157 | int r = m_ictx->get_parent_overlap( |
1e59de90 | 158 | m_io_context->read_snap().value_or(CEPH_NOSNAP), &raw_overlap); |
7c673cae FG |
159 | if (r < 0) { |
160 | // NOTE: it's possible for a snapshot to be deleted while we are | |
161 | // still reading from it | |
162 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
b32b8144 FG |
163 | << cpp_strerror(r) << dendl; |
164 | return false; | |
11fdf7f2 | 165 | } |
1e59de90 TL |
166 | bool migration_write = !read_request && !m_ictx->migration_info.empty(); |
167 | if (migration_write) { | |
168 | raw_overlap = m_ictx->migration_info.overlap; | |
11fdf7f2 | 169 | } |
1e59de90 | 170 | if (raw_overlap == 0) { |
7c673cae FG |
171 | return false; |
172 | } | |
173 | ||
1e59de90 TL |
174 | std::tie(*parent_extents, *area) = io::util::object_to_area_extents( |
175 | m_ictx, m_object_no, {{0, m_ictx->layout.object_size}}); | |
176 | uint64_t object_overlap = m_ictx->prune_parent_extents( | |
177 | *parent_extents, *area, raw_overlap, migration_write); | |
7c673cae | 178 | if (object_overlap > 0) { |
1e59de90 | 179 | m_has_parent = true; |
7c673cae FG |
180 | return true; |
181 | } | |
182 | return false; | |
183 | } | |
184 | ||
b32b8144 FG |
185 | template <typename I> |
186 | void ObjectRequest<I>::async_finish(int r) { | |
187 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
f67539c2 | 188 | m_ictx->asio_engine->post([this, r]() { finish(r); }); |
b32b8144 FG |
189 | } |
190 | ||
191 | template <typename I> | |
192 | void ObjectRequest<I>::finish(int r) { | |
193 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
194 | m_completion->complete(r); | |
195 | delete this; | |
7c673cae FG |
196 | } |
197 | ||
198 | /** read **/ | |
199 | ||
200 | template <typename I> | |
9f95a23c | 201 | ObjectReadRequest<I>::ObjectReadRequest( |
f67539c2 TL |
202 | I *ictx, uint64_t objectno, ReadExtents* extents, |
203 | IOContext io_context, int op_flags, int read_flags, | |
204 | const ZTracer::Trace &parent_trace, uint64_t* version, | |
205 | Context *completion) | |
206 | : ObjectRequest<I>(ictx, objectno, io_context, "read", parent_trace, | |
207 | completion), | |
208 | m_extents(extents), m_op_flags(op_flags),m_read_flags(read_flags), | |
209 | m_version(version) { | |
7c673cae FG |
210 | } |
211 | ||
212 | template <typename I> | |
b32b8144 FG |
213 | void ObjectReadRequest<I>::send() { |
214 | I *image_ctx = this->m_ictx; | |
215 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 216 | |
11fdf7f2 | 217 | read_object(); |
b32b8144 FG |
218 | } |
219 | ||
220 | template <typename I> | |
221 | void ObjectReadRequest<I>::read_object() { | |
222 | I *image_ctx = this->m_ictx; | |
b32b8144 | 223 | |
f67539c2 TL |
224 | std::shared_lock image_locker{image_ctx->image_lock}; |
225 | auto read_snap_id = this->m_io_context->read_snap().value_or(CEPH_NOSNAP); | |
226 | if (read_snap_id == image_ctx->snap_id && | |
227 | image_ctx->object_map != nullptr && | |
228 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
229 | image_ctx->asio_engine->post([this]() { read_parent(); }); | |
230 | return; | |
7c673cae | 231 | } |
f67539c2 | 232 | image_locker.unlock(); |
7c673cae | 233 | |
f67539c2 | 234 | ldout(image_ctx->cct, 20) << "snap_id=" << read_snap_id << dendl; |
7c673cae | 235 | |
f67539c2 TL |
236 | neorados::ReadOp read_op; |
237 | for (auto& extent: *this->m_extents) { | |
238 | if (extent.length >= image_ctx->sparse_read_threshold_bytes) { | |
239 | read_op.sparse_read(extent.offset, extent.length, &extent.bl, | |
240 | &extent.extent_map); | |
241 | } else { | |
242 | read_op.read(extent.offset, extent.length, &extent.bl); | |
243 | } | |
244 | } | |
245 | util::apply_op_flags( | |
246 | m_op_flags, image_ctx->get_read_flags(read_snap_id), &read_op); | |
247 | ||
248 | image_ctx->rados_api.execute( | |
249 | {data_object_name(this->m_ictx, this->m_object_no)}, | |
250 | *this->m_io_context, std::move(read_op), nullptr, | |
251 | librbd::asio::util::get_callback_adapter( | |
252 | [this](int r) { handle_read_object(r); }), m_version, | |
253 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
254 | } |
255 | ||
256 | template <typename I> | |
b32b8144 FG |
257 | void ObjectReadRequest<I>::handle_read_object(int r) { |
258 | I *image_ctx = this->m_ictx; | |
259 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
f67539c2 TL |
260 | if (m_version != nullptr) { |
261 | ldout(image_ctx->cct, 20) << "version=" << *m_version << dendl; | |
262 | } | |
b32b8144 FG |
263 | |
264 | if (r == -ENOENT) { | |
265 | read_parent(); | |
266 | return; | |
267 | } else if (r < 0) { | |
268 | lderr(image_ctx->cct) << "failed to read from object: " | |
269 | << cpp_strerror(r) << dendl; | |
270 | this->finish(r); | |
271 | return; | |
272 | } | |
7c673cae | 273 | |
b32b8144 FG |
274 | this->finish(0); |
275 | } | |
276 | ||
277 | template <typename I> | |
278 | void ObjectReadRequest<I>::read_parent() { | |
f67539c2 TL |
279 | if ((m_read_flags & READ_FLAG_DISABLE_READ_FROM_PARENT) != 0) { |
280 | this->finish(-ENOENT); | |
281 | return; | |
282 | } | |
283 | ||
b32b8144 | 284 | I *image_ctx = this->m_ictx; |
b32b8144 FG |
285 | ldout(image_ctx->cct, 20) << dendl; |
286 | ||
f91f0fd5 TL |
287 | auto ctx = create_context_callback< |
288 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_parent>(this); | |
289 | ||
f67539c2 TL |
290 | io::util::read_parent<I>( |
291 | image_ctx, this->m_object_no, this->m_extents, | |
292 | this->m_io_context->read_snap().value_or(CEPH_NOSNAP), this->m_trace, | |
293 | ctx); | |
b32b8144 FG |
294 | } |
295 | ||
296 | template <typename I> | |
297 | void ObjectReadRequest<I>::handle_read_parent(int r) { | |
298 | I *image_ctx = this->m_ictx; | |
299 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
300 | ||
301 | if (r == -ENOENT) { | |
302 | this->finish(r); | |
303 | return; | |
304 | } else if (r < 0) { | |
305 | lderr(image_ctx->cct) << "failed to read parent extents: " | |
306 | << cpp_strerror(r) << dendl; | |
307 | this->finish(r); | |
308 | return; | |
309 | } | |
310 | ||
311 | copyup(); | |
312 | } | |
313 | ||
314 | template <typename I> | |
315 | void ObjectReadRequest<I>::copyup() { | |
316 | I *image_ctx = this->m_ictx; | |
f67539c2 | 317 | if (!is_copy_on_read(image_ctx, this->m_io_context)) { |
b32b8144 FG |
318 | this->finish(0); |
319 | return; | |
320 | } | |
321 | ||
9f95a23c TL |
322 | image_ctx->owner_lock.lock_shared(); |
323 | image_ctx->image_lock.lock_shared(); | |
b32b8144 | 324 | Extents parent_extents; |
1e59de90 TL |
325 | ImageArea area; |
326 | if (!this->compute_parent_extents(&parent_extents, &area, true) || | |
b32b8144 FG |
327 | (image_ctx->exclusive_lock != nullptr && |
328 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
9f95a23c TL |
329 | image_ctx->image_lock.unlock_shared(); |
330 | image_ctx->owner_lock.unlock_shared(); | |
b32b8144 FG |
331 | this->finish(0); |
332 | return; | |
333 | } | |
334 | ||
335 | ldout(image_ctx->cct, 20) << dendl; | |
336 | ||
9f95a23c | 337 | image_ctx->copyup_list_lock.lock(); |
b32b8144 | 338 | auto it = image_ctx->copyup_list.find(this->m_object_no); |
7c673cae FG |
339 | if (it == image_ctx->copyup_list.end()) { |
340 | // create and kick off a CopyupRequest | |
b32b8144 | 341 | auto new_req = CopyupRequest<I>::create( |
1e59de90 TL |
342 | image_ctx, this->m_object_no, std::move(parent_extents), area, |
343 | this->m_trace); | |
7c673cae FG |
344 | |
345 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
9f95a23c TL |
346 | image_ctx->copyup_list_lock.unlock(); |
347 | image_ctx->image_lock.unlock_shared(); | |
7c673cae | 348 | new_req->send(); |
11fdf7f2 | 349 | } else { |
9f95a23c TL |
350 | image_ctx->copyup_list_lock.unlock(); |
351 | image_ctx->image_lock.unlock_shared(); | |
7c673cae | 352 | } |
7c673cae | 353 | |
9f95a23c | 354 | image_ctx->owner_lock.unlock_shared(); |
b32b8144 | 355 | this->finish(0); |
7c673cae FG |
356 | } |
357 | ||
358 | /** write **/ | |
359 | ||
b32b8144 FG |
360 | template <typename I> |
361 | AbstractObjectWriteRequest<I>::AbstractObjectWriteRequest( | |
9f95a23c | 362 | I *ictx, uint64_t object_no, uint64_t object_off, uint64_t len, |
f67539c2 | 363 | IOContext io_context, const char *trace_name, |
b32b8144 | 364 | const ZTracer::Trace &parent_trace, Context *completion) |
f67539c2 TL |
365 | : ObjectRequest<I>(ictx, object_no, io_context, trace_name, parent_trace, |
366 | completion), | |
367 | m_object_off(object_off), m_object_len(len) | |
7c673cae | 368 | { |
b32b8144 FG |
369 | if (this->m_object_off == 0 && |
370 | this->m_object_len == ictx->get_object_size()) { | |
371 | m_full_object = true; | |
372 | } | |
7c673cae | 373 | |
11fdf7f2 | 374 | compute_parent_info(); |
81eedcae | 375 | |
9f95a23c | 376 | ictx->image_lock.lock_shared(); |
81eedcae TL |
377 | if (!ictx->migration_info.empty()) { |
378 | m_guarding_migration_write = true; | |
379 | } | |
9f95a23c | 380 | ictx->image_lock.unlock_shared(); |
11fdf7f2 TL |
381 | } |
382 | ||
383 | template <typename I> | |
384 | void AbstractObjectWriteRequest<I>::compute_parent_info() { | |
385 | I *image_ctx = this->m_ictx; | |
9f95a23c | 386 | std::shared_lock image_locker{image_ctx->image_lock}; |
11fdf7f2 | 387 | |
1e59de90 | 388 | this->compute_parent_extents(&m_parent_extents, &m_image_area, false); |
11fdf7f2 | 389 | |
b32b8144 | 390 | if (!this->has_parent() || |
f67539c2 TL |
391 | (m_full_object && |
392 | !this->m_io_context->write_snap_context() && | |
393 | !is_post_copyup_write_required())) { | |
11fdf7f2 | 394 | m_copyup_enabled = false; |
7c673cae | 395 | } |
b32b8144 | 396 | } |
7c673cae | 397 | |
b32b8144 FG |
398 | template <typename I> |
399 | void AbstractObjectWriteRequest<I>::add_write_hint( | |
f67539c2 | 400 | neorados::WriteOp *wr) { |
b32b8144 | 401 | I *image_ctx = this->m_ictx; |
9f95a23c | 402 | std::shared_lock image_locker{image_ctx->image_lock}; |
f67539c2 TL |
403 | if (image_ctx->object_map == nullptr || !this->m_object_may_exist || |
404 | image_ctx->alloc_hint_flags != 0U) { | |
b32b8144 FG |
405 | ObjectRequest<I>::add_write_hint(*image_ctx, wr); |
406 | } | |
7c673cae FG |
407 | } |
408 | ||
b32b8144 FG |
409 | template <typename I> |
410 | void AbstractObjectWriteRequest<I>::send() { | |
411 | I *image_ctx = this->m_ictx; | |
9f95a23c | 412 | ldout(image_ctx->cct, 20) << this->get_op_type() << " " |
b32b8144 FG |
413 | << this->m_object_off << "~" << this->m_object_len |
414 | << dendl; | |
7c673cae | 415 | { |
9f95a23c | 416 | std::shared_lock image_lock{image_ctx->image_lock}; |
b32b8144 FG |
417 | if (image_ctx->object_map == nullptr) { |
418 | m_object_may_exist = true; | |
7c673cae FG |
419 | } else { |
420 | // should have been flushed prior to releasing lock | |
11fdf7f2 | 421 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
422 | m_object_may_exist = image_ctx->object_map->object_may_exist( |
423 | this->m_object_no); | |
7c673cae FG |
424 | } |
425 | } | |
426 | ||
b32b8144 FG |
427 | if (!m_object_may_exist && is_no_op_for_nonexistent_object()) { |
428 | ldout(image_ctx->cct, 20) << "skipping no-op on nonexistent object" | |
429 | << dendl; | |
430 | this->async_finish(0); | |
431 | return; | |
7c673cae FG |
432 | } |
433 | ||
b32b8144 | 434 | pre_write_object_map_update(); |
7c673cae FG |
435 | } |
436 | ||
b32b8144 FG |
437 | template <typename I> |
438 | void AbstractObjectWriteRequest<I>::pre_write_object_map_update() { | |
439 | I *image_ctx = this->m_ictx; | |
7c673cae | 440 | |
9f95a23c | 441 | image_ctx->image_lock.lock_shared(); |
b32b8144 | 442 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled()) { |
9f95a23c | 443 | image_ctx->image_lock.unlock_shared(); |
b32b8144 FG |
444 | write_object(); |
445 | return; | |
7c673cae FG |
446 | } |
447 | ||
b32b8144 FG |
448 | if (!m_object_may_exist && m_copyup_enabled) { |
449 | // optimization: copyup required | |
9f95a23c | 450 | image_ctx->image_lock.unlock_shared(); |
b32b8144 FG |
451 | copyup(); |
452 | return; | |
453 | } | |
7c673cae | 454 | |
b32b8144 | 455 | uint8_t new_state = this->get_pre_write_object_map_state(); |
9f95a23c TL |
456 | ldout(image_ctx->cct, 20) << this->m_object_off << "~" << this->m_object_len |
457 | << dendl; | |
7c673cae | 458 | |
b32b8144 FG |
459 | if (image_ctx->object_map->template aio_update< |
460 | AbstractObjectWriteRequest<I>, | |
461 | &AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update>( | |
91327a77 AA |
462 | CEPH_NOSNAP, this->m_object_no, new_state, {}, this->m_trace, false, |
463 | this)) { | |
9f95a23c | 464 | image_ctx->image_lock.unlock_shared(); |
b32b8144 | 465 | return; |
7c673cae FG |
466 | } |
467 | ||
9f95a23c | 468 | image_ctx->image_lock.unlock_shared(); |
b32b8144 | 469 | write_object(); |
7c673cae FG |
470 | } |
471 | ||
b32b8144 FG |
472 | template <typename I> |
473 | void AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update(int r) { | |
474 | I *image_ctx = this->m_ictx; | |
475 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
476 | if (r < 0) { |
477 | lderr(image_ctx->cct) << "failed to update object map: " | |
478 | << cpp_strerror(r) << dendl; | |
479 | this->finish(r); | |
480 | return; | |
481 | } | |
7c673cae | 482 | |
b32b8144 | 483 | write_object(); |
7c673cae FG |
484 | } |
485 | ||
b32b8144 FG |
486 | template <typename I> |
487 | void AbstractObjectWriteRequest<I>::write_object() { | |
488 | I *image_ctx = this->m_ictx; | |
489 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 490 | |
f67539c2 | 491 | neorados::WriteOp write_op; |
b32b8144 | 492 | if (m_copyup_enabled) { |
81eedcae | 493 | if (m_guarding_migration_write) { |
f67539c2 TL |
494 | auto snap_seq = (this->m_io_context->write_snap_context() ? |
495 | this->m_io_context->write_snap_context()->first : 0); | |
496 | ldout(image_ctx->cct, 20) << "guarding write: snap_seq=" << snap_seq | |
497 | << dendl; | |
498 | ||
11fdf7f2 | 499 | cls_client::assert_snapc_seq( |
f67539c2 | 500 | &write_op, snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_LE_SNAPSET_SEQ); |
11fdf7f2 | 501 | } else { |
f67539c2 TL |
502 | ldout(image_ctx->cct, 20) << "guarding write" << dendl; |
503 | write_op.assert_exists(); | |
11fdf7f2 | 504 | } |
7c673cae FG |
505 | } |
506 | ||
f67539c2 TL |
507 | add_write_hint(&write_op); |
508 | add_write_ops(&write_op); | |
509 | ceph_assert(write_op.size() != 0); | |
7c673cae | 510 | |
f67539c2 TL |
511 | image_ctx->rados_api.execute( |
512 | {data_object_name(this->m_ictx, this->m_object_no)}, | |
513 | *this->m_io_context, std::move(write_op), | |
514 | librbd::asio::util::get_callback_adapter( | |
515 | [this](int r) { handle_write_object(r); }), nullptr, | |
516 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae | 517 | } |
7c673cae | 518 | |
b32b8144 FG |
519 | template <typename I> |
520 | void AbstractObjectWriteRequest<I>::handle_write_object(int r) { | |
521 | I *image_ctx = this->m_ictx; | |
522 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
523 | ||
524 | r = filter_write_result(r); | |
525 | if (r == -ENOENT) { | |
526 | if (m_copyup_enabled) { | |
527 | copyup(); | |
528 | return; | |
529 | } | |
11fdf7f2 | 530 | } else if (r == -ERANGE && m_guarding_migration_write) { |
9f95a23c | 531 | image_ctx->image_lock.lock_shared(); |
81eedcae | 532 | m_guarding_migration_write = !image_ctx->migration_info.empty(); |
9f95a23c | 533 | image_ctx->image_lock.unlock_shared(); |
81eedcae TL |
534 | |
535 | if (m_guarding_migration_write) { | |
11fdf7f2 TL |
536 | copyup(); |
537 | } else { | |
538 | ldout(image_ctx->cct, 10) << "migration parent gone, restart io" << dendl; | |
11fdf7f2 TL |
539 | compute_parent_info(); |
540 | write_object(); | |
541 | } | |
542 | return; | |
b32b8144 FG |
543 | } else if (r == -EILSEQ) { |
544 | ldout(image_ctx->cct, 10) << "failed to write object" << dendl; | |
545 | this->finish(r); | |
546 | return; | |
547 | } else if (r < 0) { | |
548 | lderr(image_ctx->cct) << "failed to write object: " << cpp_strerror(r) | |
549 | << dendl; | |
550 | this->finish(r); | |
551 | return; | |
7c673cae FG |
552 | } |
553 | ||
b32b8144 | 554 | post_write_object_map_update(); |
7c673cae FG |
555 | } |
556 | ||
b32b8144 FG |
557 | template <typename I> |
558 | void AbstractObjectWriteRequest<I>::copyup() { | |
559 | I *image_ctx = this->m_ictx; | |
560 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 561 | |
11fdf7f2 | 562 | ceph_assert(!m_copyup_in_progress); |
b32b8144 | 563 | m_copyup_in_progress = true; |
7c673cae | 564 | |
9f95a23c | 565 | image_ctx->copyup_list_lock.lock(); |
b32b8144 FG |
566 | auto it = image_ctx->copyup_list.find(this->m_object_no); |
567 | if (it == image_ctx->copyup_list.end()) { | |
568 | auto new_req = CopyupRequest<I>::create( | |
1e59de90 TL |
569 | image_ctx, this->m_object_no, std::move(this->m_parent_extents), |
570 | m_image_area, this->m_trace); | |
b32b8144 FG |
571 | this->m_parent_extents.clear(); |
572 | ||
573 | // make sure to wait on this CopyupRequest | |
f67539c2 | 574 | new_req->append_request(this, std::move(get_copyup_overwrite_extents())); |
b32b8144 FG |
575 | image_ctx->copyup_list[this->m_object_no] = new_req; |
576 | ||
9f95a23c | 577 | image_ctx->copyup_list_lock.unlock(); |
b32b8144 | 578 | new_req->send(); |
31f18b77 | 579 | } else { |
f67539c2 | 580 | it->second->append_request(this, std::move(get_copyup_overwrite_extents())); |
9f95a23c | 581 | image_ctx->copyup_list_lock.unlock(); |
31f18b77 | 582 | } |
7c673cae FG |
583 | } |
584 | ||
b32b8144 FG |
585 | template <typename I> |
586 | void AbstractObjectWriteRequest<I>::handle_copyup(int r) { | |
587 | I *image_ctx = this->m_ictx; | |
588 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
589 | ||
11fdf7f2 | 590 | ceph_assert(m_copyup_in_progress); |
b32b8144 FG |
591 | m_copyup_in_progress = false; |
592 | ||
81eedcae | 593 | if (r < 0 && r != -ERESTART) { |
b32b8144 FG |
594 | lderr(image_ctx->cct) << "failed to copyup object: " << cpp_strerror(r) |
595 | << dendl; | |
596 | this->finish(r); | |
597 | return; | |
31f18b77 | 598 | } |
31f18b77 | 599 | |
81eedcae | 600 | if (r == -ERESTART || is_post_copyup_write_required()) { |
b32b8144 FG |
601 | write_object(); |
602 | return; | |
7c673cae | 603 | } |
b32b8144 FG |
604 | |
605 | post_write_object_map_update(); | |
7c673cae FG |
606 | } |
607 | ||
b32b8144 FG |
608 | template <typename I> |
609 | void AbstractObjectWriteRequest<I>::post_write_object_map_update() { | |
610 | I *image_ctx = this->m_ictx; | |
611 | ||
9f95a23c | 612 | image_ctx->image_lock.lock_shared(); |
b32b8144 FG |
613 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled() || |
614 | !is_non_existent_post_write_object_map_state()) { | |
9f95a23c | 615 | image_ctx->image_lock.unlock_shared(); |
b32b8144 FG |
616 | this->finish(0); |
617 | return; | |
7c673cae FG |
618 | } |
619 | ||
b32b8144 | 620 | ldout(image_ctx->cct, 20) << dendl; |
7c673cae | 621 | |
b32b8144 | 622 | // should have been flushed prior to releasing lock |
11fdf7f2 | 623 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
624 | if (image_ctx->object_map->template aio_update< |
625 | AbstractObjectWriteRequest<I>, | |
626 | &AbstractObjectWriteRequest<I>::handle_post_write_object_map_update>( | |
627 | CEPH_NOSNAP, this->m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, | |
91327a77 | 628 | this->m_trace, false, this)) { |
9f95a23c | 629 | image_ctx->image_lock.unlock_shared(); |
b32b8144 | 630 | return; |
7c673cae FG |
631 | } |
632 | ||
9f95a23c | 633 | image_ctx->image_lock.unlock_shared(); |
b32b8144 | 634 | this->finish(0); |
7c673cae FG |
635 | } |
636 | ||
b32b8144 FG |
637 | template <typename I> |
638 | void AbstractObjectWriteRequest<I>::handle_post_write_object_map_update(int r) { | |
639 | I *image_ctx = this->m_ictx; | |
640 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
641 | if (r < 0) { |
642 | lderr(image_ctx->cct) << "failed to update object map: " | |
643 | << cpp_strerror(r) << dendl; | |
644 | this->finish(r); | |
645 | return; | |
646 | } | |
c07f9fc5 | 647 | |
b32b8144 FG |
648 | this->finish(0); |
649 | } | |
c07f9fc5 | 650 | |
b32b8144 | 651 | template <typename I> |
f67539c2 TL |
652 | void ObjectWriteRequest<I>::add_write_hint(neorados::WriteOp* wr) { |
653 | if ((m_write_flags & OBJECT_WRITE_FLAG_CREATE_EXCLUSIVE) != 0) { | |
654 | wr->create(true); | |
655 | } else if (m_assert_version.has_value()) { | |
656 | wr->assert_version(m_assert_version.value()); | |
657 | } | |
658 | AbstractObjectWriteRequest<I>::add_write_hint(wr); | |
659 | } | |
660 | ||
661 | template <typename I> | |
662 | void ObjectWriteRequest<I>::add_write_ops(neorados::WriteOp* wr) { | |
b32b8144 | 663 | if (this->m_full_object) { |
f67539c2 | 664 | wr->write_full(bufferlist{m_write_data}); |
c07f9fc5 | 665 | } else { |
f67539c2 | 666 | wr->write(this->m_object_off, bufferlist{m_write_data}); |
c07f9fc5 | 667 | } |
f67539c2 | 668 | util::apply_op_flags(m_op_flags, 0U, wr); |
c07f9fc5 FG |
669 | } |
670 | ||
b32b8144 | 671 | template <typename I> |
f67539c2 TL |
672 | void ObjectDiscardRequest<I>::add_write_ops(neorados::WriteOp* wr) { |
673 | switch (m_discard_action) { | |
674 | case DISCARD_ACTION_REMOVE: | |
675 | wr->remove(); | |
676 | break; | |
677 | case DISCARD_ACTION_REMOVE_TRUNCATE: | |
678 | wr->create(false); | |
679 | // fall through | |
680 | case DISCARD_ACTION_TRUNCATE: | |
681 | wr->truncate(this->m_object_off); | |
682 | break; | |
683 | case DISCARD_ACTION_ZERO: | |
684 | wr->zero(this->m_object_off, this->m_object_len); | |
685 | break; | |
686 | default: | |
687 | ceph_abort(); | |
688 | break; | |
689 | } | |
c07f9fc5 FG |
690 | } |
691 | ||
b32b8144 | 692 | template <typename I> |
f67539c2 TL |
693 | void ObjectWriteSameRequest<I>::add_write_ops(neorados::WriteOp* wr) { |
694 | wr->writesame(this->m_object_off, this->m_object_len, | |
695 | bufferlist{m_write_data}); | |
696 | util::apply_op_flags(m_op_flags, 0U, wr); | |
697 | } | |
698 | ||
699 | template <typename I> | |
700 | void ObjectCompareAndWriteRequest<I>::add_write_ops(neorados::WriteOp* wr) { | |
701 | wr->cmpext(this->m_object_off, bufferlist{m_cmp_bl}, nullptr); | |
c07f9fc5 | 702 | |
b32b8144 | 703 | if (this->m_full_object) { |
f67539c2 | 704 | wr->write_full(bufferlist{m_write_bl}); |
b32b8144 | 705 | } else { |
f67539c2 | 706 | wr->write(this->m_object_off, bufferlist{m_write_bl}); |
b32b8144 | 707 | } |
f67539c2 | 708 | util::apply_op_flags(m_op_flags, 0U, wr); |
b32b8144 | 709 | } |
c07f9fc5 | 710 | |
b32b8144 FG |
711 | template <typename I> |
712 | int ObjectCompareAndWriteRequest<I>::filter_write_result(int r) const { | |
713 | if (r <= -MAX_ERRNO) { | |
714 | I *image_ctx = this->m_ictx; | |
b32b8144 FG |
715 | |
716 | // object extent compare mismatch | |
717 | uint64_t offset = -MAX_ERRNO - r; | |
1e59de90 TL |
718 | auto [image_extents, _] = io::util::object_to_area_extents( |
719 | image_ctx, this->m_object_no, {{offset, this->m_object_len}}); | |
11fdf7f2 | 720 | ceph_assert(image_extents.size() == 1); |
b32b8144 FG |
721 | |
722 | if (m_mismatch_offset) { | |
723 | *m_mismatch_offset = image_extents[0].first; | |
c07f9fc5 | 724 | } |
b32b8144 | 725 | r = -EILSEQ; |
c07f9fc5 | 726 | } |
b32b8144 | 727 | return r; |
c07f9fc5 FG |
728 | } |
729 | ||
f67539c2 TL |
730 | template <typename I> |
731 | ObjectListSnapsRequest<I>::ObjectListSnapsRequest( | |
732 | I *ictx, uint64_t objectno, Extents&& object_extents, SnapIds&& snap_ids, | |
733 | int list_snaps_flags, const ZTracer::Trace &parent_trace, | |
734 | SnapshotDelta* snapshot_delta, Context *completion) | |
735 | : ObjectRequest<I>( | |
736 | ictx, objectno, ictx->duplicate_data_io_context(), "snap_list", | |
737 | parent_trace, completion), | |
738 | m_object_extents(std::move(object_extents)), | |
739 | m_snap_ids(std::move(snap_ids)), m_list_snaps_flags(list_snaps_flags), | |
740 | m_snapshot_delta(snapshot_delta) { | |
741 | this->m_io_context->read_snap(CEPH_SNAPDIR); | |
742 | } | |
743 | ||
744 | template <typename I> | |
745 | void ObjectListSnapsRequest<I>::send() { | |
746 | I *image_ctx = this->m_ictx; | |
747 | ldout(image_ctx->cct, 20) << dendl; | |
748 | ||
749 | if (m_snap_ids.size() < 2) { | |
750 | lderr(image_ctx->cct) << "invalid snap ids: " << m_snap_ids << dendl; | |
751 | this->async_finish(-EINVAL); | |
752 | return; | |
753 | } | |
754 | ||
755 | list_snaps(); | |
756 | } | |
757 | ||
758 | template <typename I> | |
759 | void ObjectListSnapsRequest<I>::list_snaps() { | |
760 | I *image_ctx = this->m_ictx; | |
761 | ldout(image_ctx->cct, 20) << dendl; | |
762 | ||
763 | neorados::ReadOp read_op; | |
764 | read_op.list_snaps(&m_snap_set, &m_ec); | |
765 | ||
766 | image_ctx->rados_api.execute( | |
767 | {data_object_name(this->m_ictx, this->m_object_no)}, | |
768 | *this->m_io_context, std::move(read_op), nullptr, | |
769 | librbd::asio::util::get_callback_adapter( | |
770 | [this](int r) { handle_list_snaps(r); }), nullptr, | |
771 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
772 | } | |
773 | ||
774 | template <typename I> | |
775 | void ObjectListSnapsRequest<I>::handle_list_snaps(int r) { | |
776 | I *image_ctx = this->m_ictx; | |
777 | auto cct = image_ctx->cct; | |
778 | ||
779 | if (r >= 0) { | |
780 | r = -m_ec.value(); | |
781 | } | |
782 | ||
783 | ldout(cct, 20) << "r=" << r << dendl; | |
784 | ||
785 | m_snapshot_delta->clear(); | |
786 | auto& snapshot_delta = *m_snapshot_delta; | |
787 | ||
788 | ceph_assert(!m_snap_ids.empty()); | |
789 | librados::snap_t start_snap_id = 0; | |
790 | librados::snap_t first_snap_id = *m_snap_ids.begin(); | |
791 | librados::snap_t last_snap_id = *m_snap_ids.rbegin(); | |
792 | ||
793 | if (r == -ENOENT) { | |
794 | // the object does not exist -- mark the missing extents | |
795 | zero_extent(first_snap_id, true); | |
796 | list_from_parent(); | |
797 | return; | |
798 | } else if (r < 0) { | |
799 | lderr(cct) << "failed to retrieve object snapshot list: " << cpp_strerror(r) | |
800 | << dendl; | |
801 | this->finish(r); | |
802 | return; | |
803 | } | |
804 | ||
805 | // helper function requires the librados legacy data structure | |
806 | librados::snap_set_t snap_set; | |
807 | convert_snap_set(m_snap_set, &snap_set); | |
808 | ||
809 | bool initial_extents_written = false; | |
810 | ||
811 | interval_set<uint64_t> object_interval; | |
812 | for (auto& object_extent : m_object_extents) { | |
813 | object_interval.insert(object_extent.first, object_extent.second); | |
814 | } | |
815 | ldout(cct, 20) << "object_interval=" << object_interval << dendl; | |
816 | ||
817 | // loop through all expected snapshots and build interval sets for | |
818 | // data and zeroed ranges for each snapshot | |
819 | uint64_t prev_end_size = 0; | |
820 | interval_set<uint64_t> initial_written_extents; | |
821 | for (auto end_snap_id : m_snap_ids) { | |
822 | if (start_snap_id == end_snap_id) { | |
823 | continue; | |
824 | } else if (end_snap_id > last_snap_id) { | |
825 | break; | |
826 | } | |
827 | ||
828 | interval_set<uint64_t> diff; | |
829 | uint64_t end_size; | |
830 | bool exists; | |
831 | librados::snap_t clone_end_snap_id; | |
832 | bool read_whole_object; | |
833 | calc_snap_set_diff(cct, snap_set, start_snap_id, | |
834 | end_snap_id, &diff, &end_size, &exists, | |
835 | &clone_end_snap_id, &read_whole_object); | |
836 | ||
837 | if (read_whole_object || | |
838 | (!diff.empty() && | |
839 | ((m_list_snaps_flags & LIST_SNAPS_FLAG_WHOLE_OBJECT) != 0))) { | |
840 | ldout(cct, 1) << "need to read full object" << dendl; | |
841 | diff.clear(); | |
842 | diff.insert(0, image_ctx->layout.object_size); | |
843 | end_size = image_ctx->layout.object_size; | |
844 | clone_end_snap_id = end_snap_id; | |
845 | } else if (!exists) { | |
846 | end_size = 0; | |
847 | } | |
848 | ||
849 | if (exists) { | |
850 | // reads should be issued against the newest (existing) snapshot within | |
851 | // the associated snapshot object clone. writes should be issued | |
852 | // against the oldest snapshot in the snap_map. | |
853 | ceph_assert(clone_end_snap_id >= end_snap_id); | |
854 | if (clone_end_snap_id > last_snap_id) { | |
855 | // do not read past the copy point snapshot | |
856 | clone_end_snap_id = last_snap_id; | |
857 | } | |
858 | } | |
859 | ||
860 | // clip diff to current object extent | |
861 | interval_set<uint64_t> diff_interval; | |
862 | diff_interval.intersection_of(object_interval, diff); | |
863 | ||
864 | // clip diff to size of object (in case it was truncated) | |
865 | interval_set<uint64_t> zero_interval; | |
866 | if (end_size < prev_end_size) { | |
867 | zero_interval.insert(end_size, prev_end_size - end_size); | |
868 | zero_interval.intersection_of(object_interval); | |
869 | ||
870 | interval_set<uint64_t> trunc_interval; | |
871 | trunc_interval.intersection_of(zero_interval, diff_interval); | |
872 | if (!trunc_interval.empty()) { | |
873 | diff_interval.subtract(trunc_interval); | |
874 | ldout(cct, 20) << "clearing truncate diff: " << trunc_interval << dendl; | |
875 | } | |
876 | } | |
877 | ||
878 | ldout(cct, 20) << "start_snap_id=" << start_snap_id << ", " | |
879 | << "end_snap_id=" << end_snap_id << ", " | |
880 | << "clone_end_snap_id=" << clone_end_snap_id << ", " | |
881 | << "diff=" << diff << ", " | |
882 | << "diff_interval=" << diff_interval<< ", " | |
883 | << "zero_interval=" << zero_interval<< ", " | |
884 | << "end_size=" << end_size << ", " | |
885 | << "prev_end_size=" << prev_end_size << ", " | |
886 | << "exists=" << exists << ", " | |
887 | << "whole_object=" << read_whole_object << dendl; | |
888 | ||
889 | // check if object exists prior to start of incremental snap delta so that | |
890 | // we don't DNE the object if no additional deltas exist | |
891 | if (exists && start_snap_id == 0 && | |
892 | (!diff_interval.empty() || !zero_interval.empty())) { | |
893 | ldout(cct, 20) << "object exists at snap id " << end_snap_id << dendl; | |
894 | initial_extents_written = true; | |
895 | } | |
896 | ||
897 | prev_end_size = end_size; | |
898 | start_snap_id = end_snap_id; | |
899 | ||
900 | if (end_snap_id <= first_snap_id) { | |
901 | // don't include deltas from the starting snapshots, but we iterate over | |
902 | // it to track its existence and size | |
903 | ldout(cct, 20) << "skipping prior snapshot " << dendl; | |
904 | continue; | |
905 | } | |
906 | ||
907 | if (exists) { | |
908 | for (auto& interval : diff_interval) { | |
909 | snapshot_delta[{end_snap_id, clone_end_snap_id}].insert( | |
910 | interval.first, interval.second, | |
911 | SparseExtent(SPARSE_EXTENT_STATE_DATA, interval.second)); | |
912 | } | |
913 | } else { | |
914 | zero_interval.union_of(diff_interval); | |
915 | } | |
916 | ||
917 | if ((m_list_snaps_flags & LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS) == 0) { | |
918 | for (auto& interval : zero_interval) { | |
919 | snapshot_delta[{end_snap_id, end_snap_id}].insert( | |
920 | interval.first, interval.second, | |
921 | SparseExtent(SPARSE_EXTENT_STATE_ZEROED, interval.second)); | |
922 | } | |
923 | } | |
924 | } | |
925 | ||
926 | bool snapshot_delta_empty = snapshot_delta.empty(); | |
927 | if (!initial_extents_written) { | |
928 | zero_extent(first_snap_id, first_snap_id > 0); | |
929 | } | |
930 | ldout(cct, 20) << "snapshot_delta=" << snapshot_delta << dendl; | |
931 | ||
932 | if (snapshot_delta_empty) { | |
933 | list_from_parent(); | |
934 | return; | |
935 | } | |
936 | ||
937 | this->finish(0); | |
938 | } | |
939 | ||
940 | template <typename I> | |
941 | void ObjectListSnapsRequest<I>::list_from_parent() { | |
942 | I *image_ctx = this->m_ictx; | |
943 | auto cct = image_ctx->cct; | |
944 | ||
945 | ceph_assert(!m_snap_ids.empty()); | |
946 | librados::snap_t snap_id_start = *m_snap_ids.begin(); | |
947 | librados::snap_t snap_id_end = *m_snap_ids.rbegin(); | |
948 | ||
949 | std::unique_lock image_locker{image_ctx->image_lock}; | |
950 | if ((snap_id_start > 0) || (image_ctx->parent == nullptr) || | |
951 | ((m_list_snaps_flags & LIST_SNAPS_FLAG_DISABLE_LIST_FROM_PARENT) != 0)) { | |
952 | image_locker.unlock(); | |
953 | ||
954 | this->finish(0); | |
955 | return; | |
956 | } | |
957 | ||
1e59de90 TL |
958 | Extents parent_extents; |
959 | uint64_t raw_overlap = 0; | |
f67539c2 | 960 | uint64_t object_overlap = 0; |
1e59de90 TL |
961 | image_ctx->get_parent_overlap(snap_id_end, &raw_overlap); |
962 | if (raw_overlap > 0) { | |
963 | // calculate reverse mapping onto the parent image | |
964 | std::tie(parent_extents, m_image_area) = io::util::object_to_area_extents( | |
965 | image_ctx, this->m_object_no, m_object_extents); | |
966 | object_overlap = image_ctx->prune_parent_extents( | |
967 | parent_extents, m_image_area, raw_overlap, false); | |
f67539c2 | 968 | } |
f67539c2 TL |
969 | if (object_overlap == 0) { |
970 | image_locker.unlock(); | |
971 | ||
972 | this->finish(0); | |
973 | return; | |
974 | } | |
975 | ||
976 | auto ctx = create_context_callback< | |
977 | ObjectListSnapsRequest<I>, | |
978 | &ObjectListSnapsRequest<I>::handle_list_from_parent>(this); | |
979 | auto aio_comp = AioCompletion::create_and_start( | |
980 | ctx, librbd::util::get_image_ctx(image_ctx->parent), AIO_TYPE_GENERIC); | |
1e59de90 TL |
981 | ldout(cct, 20) << "completion=" << aio_comp |
982 | << " parent_extents=" << parent_extents | |
983 | << " area=" << m_image_area << dendl; | |
f67539c2 TL |
984 | |
985 | auto list_snaps_flags = ( | |
986 | m_list_snaps_flags | LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS); | |
987 | ||
988 | ImageListSnapsRequest<I> req( | |
1e59de90 | 989 | *image_ctx->parent, aio_comp, std::move(parent_extents), m_image_area, |
f67539c2 TL |
990 | {0, image_ctx->parent->snap_id}, list_snaps_flags, &m_parent_snapshot_delta, |
991 | this->m_trace); | |
992 | req.send(); | |
993 | } | |
994 | ||
995 | template <typename I> | |
996 | void ObjectListSnapsRequest<I>::handle_list_from_parent(int r) { | |
997 | I *image_ctx = this->m_ictx; | |
998 | auto cct = image_ctx->cct; | |
999 | ||
1000 | ldout(cct, 20) << "r=" << r << ", " | |
1001 | << "parent_snapshot_delta=" << m_parent_snapshot_delta | |
1002 | << dendl; | |
1003 | ||
1004 | // ignore special-case of fully empty dataset (we ignore zeroes) | |
1005 | if (m_parent_snapshot_delta.empty()) { | |
1006 | this->finish(0); | |
1007 | return; | |
1008 | } | |
1009 | ||
1010 | // the write/read snapshot id key is not useful for parent images so | |
1011 | // map the the special-case INITIAL_WRITE_READ_SNAP_IDS key | |
1012 | *m_snapshot_delta = {}; | |
1013 | auto& intervals = (*m_snapshot_delta)[INITIAL_WRITE_READ_SNAP_IDS]; | |
1014 | for (auto& [key, image_extents] : m_parent_snapshot_delta) { | |
1015 | for (auto image_extent : image_extents) { | |
1016 | auto state = image_extent.get_val().state; | |
1017 | ||
1018 | // map image-extents back to this object | |
1019 | striper::LightweightObjectExtents object_extents; | |
1e59de90 TL |
1020 | io::util::area_to_object_extents(image_ctx, image_extent.get_off(), |
1021 | image_extent.get_len(), m_image_area, 0, | |
1022 | &object_extents); | |
f67539c2 TL |
1023 | for (auto& object_extent : object_extents) { |
1024 | ceph_assert(object_extent.object_no == this->m_object_no); | |
1025 | intervals.insert( | |
1026 | object_extent.offset, object_extent.length, | |
1027 | {state, object_extent.length}); | |
1028 | } | |
1029 | } | |
1030 | } | |
1031 | ||
1032 | ldout(cct, 20) << "snapshot_delta=" << *m_snapshot_delta << dendl; | |
1033 | this->finish(0); | |
1034 | } | |
1035 | ||
1036 | template <typename I> | |
1037 | void ObjectListSnapsRequest<I>::zero_extent(uint64_t snap_id, bool dne) { | |
1038 | I *image_ctx = this->m_ictx; | |
1039 | auto cct = image_ctx->cct; | |
1040 | ||
1041 | // the object does not exist or is (partially) under whiteout -- mark the | |
1042 | // missing extents which would be any portion of the object that does not | |
1043 | // have data in the initial snapshot set | |
1044 | if ((m_list_snaps_flags & LIST_SNAPS_FLAG_IGNORE_ZEROED_EXTENTS) == 0) { | |
1045 | interval_set<uint64_t> interval; | |
1046 | for (auto [object_offset, object_length] : m_object_extents) { | |
1047 | interval.insert(object_offset, object_length); | |
1048 | } | |
1049 | ||
1050 | for (auto [offset, length] : interval) { | |
1051 | ldout(cct, 20) << "snapshot " << snap_id << ": " | |
1052 | << (dne ? "DNE" : "zeroed") << " extent " | |
1053 | << offset << "~" << length << dendl; | |
1054 | (*m_snapshot_delta)[{snap_id, snap_id}].insert( | |
1055 | offset, length, | |
1056 | SparseExtent( | |
1057 | (dne ? SPARSE_EXTENT_STATE_DNE : SPARSE_EXTENT_STATE_ZEROED), | |
1058 | length)); | |
1059 | } | |
1060 | } | |
1061 | } | |
1062 | ||
7c673cae FG |
1063 | } // namespace io |
1064 | } // namespace librbd | |
1065 | ||
1066 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
1067 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
b32b8144 FG |
1068 | template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>; |
1069 | template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>; | |
1070 | template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>; | |
1071 | template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>; | |
1072 | template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>; | |
f67539c2 | 1073 | template class librbd::io::ObjectListSnapsRequest<librbd::ImageCtx>; |