]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | #include "common/RWLock.h" | |
10 | #include "common/WorkQueue.h" | |
11 | #include "include/Context.h" | |
c07f9fc5 | 12 | #include "include/err.h" |
11fdf7f2 | 13 | #include "osd/osd_types.h" |
7c673cae FG |
14 | |
15 | #include "librbd/ExclusiveLock.h" | |
16 | #include "librbd/ImageCtx.h" | |
17 | #include "librbd/ObjectMap.h" | |
18 | #include "librbd/Utils.h" | |
19 | #include "librbd/io/AioCompletion.h" | |
20 | #include "librbd/io/CopyupRequest.h" | |
21 | #include "librbd/io/ImageRequest.h" | |
22 | #include "librbd/io/ReadResult.h" | |
23 | ||
24 | #include <boost/bind.hpp> | |
25 | #include <boost/optional.hpp> | |
26 | ||
27 | #define dout_subsys ceph_subsys_rbd | |
28 | #undef dout_prefix | |
29 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ | |
30 | << " " << __func__ << ": " | |
31 | ||
32 | namespace librbd { | |
33 | namespace io { | |
34 | ||
b32b8144 | 35 | namespace { |
7c673cae FG |
36 | |
37 | template <typename I> | |
b32b8144 FG |
38 | inline bool is_copy_on_read(I *ictx, librados::snap_t snap_id) { |
39 | RWLock::RLocker snap_locker(ictx->snap_lock); | |
40 | return (ictx->clone_copy_on_read && | |
41 | !ictx->read_only && snap_id == CEPH_NOSNAP && | |
42 | (ictx->exclusive_lock == nullptr || | |
43 | ictx->exclusive_lock->is_lock_owner())); | |
7c673cae FG |
44 | } |
45 | ||
b32b8144 | 46 | } // anonymous namespace |
3efd9988 | 47 | |
7c673cae FG |
48 | template <typename I> |
49 | ObjectRequest<I>* | |
50 | ObjectRequest<I>::create_write(I *ictx, const std::string &oid, | |
51 | uint64_t object_no, uint64_t object_off, | |
11fdf7f2 | 52 | ceph::bufferlist&& data, |
31f18b77 FG |
53 | const ::SnapContext &snapc, int op_flags, |
54 | const ZTracer::Trace &parent_trace, | |
55 | Context *completion) { | |
11fdf7f2 TL |
56 | return new ObjectWriteRequest<I>(ictx, oid, object_no, object_off, |
57 | std::move(data), snapc, op_flags, | |
58 | parent_trace, completion); | |
7c673cae FG |
59 | } |
60 | ||
61 | template <typename I> | |
62 | ObjectRequest<I>* | |
b32b8144 FG |
63 | ObjectRequest<I>::create_discard(I *ictx, const std::string &oid, |
64 | uint64_t object_no, uint64_t object_off, | |
65 | uint64_t object_len, | |
66 | const ::SnapContext &snapc, | |
11fdf7f2 | 67 | int discard_flags, |
b32b8144 FG |
68 | const ZTracer::Trace &parent_trace, |
69 | Context *completion) { | |
70 | return new ObjectDiscardRequest<I>(ictx, oid, object_no, object_off, | |
11fdf7f2 TL |
71 | object_len, snapc, discard_flags, |
72 | parent_trace, completion); | |
7c673cae FG |
73 | } |
74 | ||
75 | template <typename I> | |
76 | ObjectRequest<I>* | |
11fdf7f2 | 77 | ObjectRequest<I>::create_write_same(I *ictx, const std::string &oid, |
7c673cae FG |
78 | uint64_t object_no, uint64_t object_off, |
79 | uint64_t object_len, | |
11fdf7f2 | 80 | ceph::bufferlist&& data, |
31f18b77 FG |
81 | const ::SnapContext &snapc, int op_flags, |
82 | const ZTracer::Trace &parent_trace, | |
83 | Context *completion) { | |
b32b8144 | 84 | return new ObjectWriteSameRequest<I>(ictx, oid, object_no, object_off, |
11fdf7f2 TL |
85 | object_len, std::move(data), snapc, |
86 | op_flags, parent_trace, completion); | |
7c673cae FG |
87 | } |
88 | ||
c07f9fc5 FG |
89 | template <typename I> |
90 | ObjectRequest<I>* | |
91 | ObjectRequest<I>::create_compare_and_write(I *ictx, const std::string &oid, | |
b32b8144 FG |
92 | uint64_t object_no, |
93 | uint64_t object_off, | |
11fdf7f2 TL |
94 | ceph::bufferlist&& cmp_data, |
95 | ceph::bufferlist&& write_data, | |
c07f9fc5 FG |
96 | const ::SnapContext &snapc, |
97 | uint64_t *mismatch_offset, | |
98 | int op_flags, | |
99 | const ZTracer::Trace &parent_trace, | |
100 | Context *completion) { | |
b32b8144 | 101 | return new ObjectCompareAndWriteRequest<I>(ictx, oid, object_no, object_off, |
11fdf7f2 TL |
102 | std::move(cmp_data), |
103 | std::move(write_data), snapc, | |
b32b8144 FG |
104 | mismatch_offset, op_flags, |
105 | parent_trace, completion); | |
c07f9fc5 FG |
106 | } |
107 | ||
7c673cae | 108 | template <typename I> |
b32b8144 | 109 | ObjectRequest<I>::ObjectRequest(I *ictx, const std::string &oid, |
7c673cae FG |
110 | uint64_t objectno, uint64_t off, |
111 | uint64_t len, librados::snap_t snap_id, | |
b32b8144 FG |
112 | const char *trace_name, |
113 | const ZTracer::Trace &trace, | |
31f18b77 | 114 | Context *completion) |
7c673cae FG |
115 | : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), |
116 | m_object_len(len), m_snap_id(snap_id), m_completion(completion), | |
31f18b77 FG |
117 | m_trace(util::create_trace(*ictx, "", trace)) { |
118 | if (m_trace.valid()) { | |
119 | m_trace.copy_name(trace_name + std::string(" ") + oid); | |
120 | m_trace.event("start"); | |
121 | } | |
7c673cae FG |
122 | } |
123 | ||
124 | template <typename I> | |
b32b8144 FG |
125 | void ObjectRequest<I>::add_write_hint(I& image_ctx, |
126 | librados::ObjectWriteOperation *wr) { | |
127 | if (image_ctx.enable_alloc_hint) { | |
128 | wr->set_alloc_hint(image_ctx.get_object_size(), | |
129 | image_ctx.get_object_size()); | |
7c673cae FG |
130 | } |
131 | } | |
132 | ||
133 | template <typename I> | |
11fdf7f2 TL |
134 | bool ObjectRequest<I>::compute_parent_extents(Extents *parent_extents, |
135 | bool read_request) { | |
136 | ceph_assert(m_ictx->snap_lock.is_locked()); | |
137 | ceph_assert(m_ictx->parent_lock.is_locked()); | |
7c673cae | 138 | |
b32b8144 FG |
139 | m_has_parent = false; |
140 | parent_extents->clear(); | |
141 | ||
7c673cae FG |
142 | uint64_t parent_overlap; |
143 | int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); | |
144 | if (r < 0) { | |
145 | // NOTE: it's possible for a snapshot to be deleted while we are | |
146 | // still reading from it | |
147 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
b32b8144 FG |
148 | << cpp_strerror(r) << dendl; |
149 | return false; | |
11fdf7f2 TL |
150 | } |
151 | ||
152 | if (!read_request && !m_ictx->migration_info.empty()) { | |
153 | parent_overlap = m_ictx->migration_info.overlap; | |
154 | } | |
155 | ||
156 | if (parent_overlap == 0) { | |
7c673cae FG |
157 | return false; |
158 | } | |
159 | ||
b32b8144 FG |
160 | Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0, |
161 | m_ictx->layout.object_size, *parent_extents); | |
162 | uint64_t object_overlap = m_ictx->prune_parent_extents(*parent_extents, | |
163 | parent_overlap); | |
7c673cae FG |
164 | if (object_overlap > 0) { |
165 | ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " " | |
b32b8144 FG |
166 | << "extents " << *parent_extents << dendl; |
167 | m_has_parent = !parent_extents->empty(); | |
7c673cae FG |
168 | return true; |
169 | } | |
170 | return false; | |
171 | } | |
172 | ||
b32b8144 FG |
173 | template <typename I> |
174 | void ObjectRequest<I>::async_finish(int r) { | |
175 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
176 | m_ictx->op_work_queue->queue(util::create_context_callback< | |
177 | ObjectRequest<I>, &ObjectRequest<I>::finish>(this), r); | |
178 | } | |
179 | ||
180 | template <typename I> | |
181 | void ObjectRequest<I>::finish(int r) { | |
182 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
183 | m_completion->complete(r); | |
184 | delete this; | |
7c673cae FG |
185 | } |
186 | ||
187 | /** read **/ | |
188 | ||
189 | template <typename I> | |
190 | ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid, | |
191 | uint64_t objectno, uint64_t offset, | |
b32b8144 | 192 | uint64_t len, librados::snap_t snap_id, |
11fdf7f2 | 193 | int op_flags, |
b32b8144 | 194 | const ZTracer::Trace &parent_trace, |
11fdf7f2 TL |
195 | bufferlist* read_data, |
196 | ExtentMap* extent_map, | |
31f18b77 | 197 | Context *completion) |
b32b8144 FG |
198 | : ObjectRequest<I>(ictx, oid, objectno, offset, len, snap_id, "read", |
199 | parent_trace, completion), | |
11fdf7f2 | 200 | m_op_flags(op_flags), m_read_data(read_data), m_extent_map(extent_map) { |
7c673cae FG |
201 | } |
202 | ||
203 | template <typename I> | |
b32b8144 FG |
204 | void ObjectReadRequest<I>::send() { |
205 | I *image_ctx = this->m_ictx; | |
206 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 207 | |
11fdf7f2 | 208 | read_object(); |
b32b8144 FG |
209 | } |
210 | ||
211 | template <typename I> | |
212 | void ObjectReadRequest<I>::read_object() { | |
213 | I *image_ctx = this->m_ictx; | |
7c673cae FG |
214 | { |
215 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
7c673cae FG |
216 | if (image_ctx->object_map != nullptr && |
217 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
b32b8144 FG |
218 | image_ctx->op_work_queue->queue(new FunctionContext([this](int r) { |
219 | read_parent(); | |
220 | }), 0); | |
7c673cae FG |
221 | return; |
222 | } | |
223 | } | |
224 | ||
b32b8144 FG |
225 | ldout(image_ctx->cct, 20) << dendl; |
226 | ||
7c673cae | 227 | librados::ObjectReadOperation op; |
b32b8144 | 228 | if (this->m_object_len >= image_ctx->sparse_read_threshold_bytes) { |
11fdf7f2 TL |
229 | op.sparse_read(this->m_object_off, this->m_object_len, m_extent_map, |
230 | m_read_data, nullptr); | |
7c673cae | 231 | } else { |
11fdf7f2 | 232 | op.read(this->m_object_off, this->m_object_len, m_read_data, nullptr); |
7c673cae FG |
233 | } |
234 | op.set_op_flags2(m_op_flags); | |
235 | ||
b32b8144 FG |
236 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
237 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_object>(this); | |
238 | int flags = image_ctx->get_read_flags(this->m_snap_id); | |
31f18b77 FG |
239 | int r = image_ctx->data_ctx.aio_operate( |
240 | this->m_oid, rados_completion, &op, flags, nullptr, | |
241 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
11fdf7f2 | 242 | ceph_assert(r == 0); |
7c673cae FG |
243 | |
244 | rados_completion->release(); | |
245 | } | |
246 | ||
247 | template <typename I> | |
b32b8144 FG |
248 | void ObjectReadRequest<I>::handle_read_object(int r) { |
249 | I *image_ctx = this->m_ictx; | |
250 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
251 | ||
252 | if (r == -ENOENT) { | |
253 | read_parent(); | |
254 | return; | |
255 | } else if (r < 0) { | |
256 | lderr(image_ctx->cct) << "failed to read from object: " | |
257 | << cpp_strerror(r) << dendl; | |
258 | this->finish(r); | |
259 | return; | |
260 | } | |
7c673cae | 261 | |
b32b8144 FG |
262 | this->finish(0); |
263 | } | |
264 | ||
265 | template <typename I> | |
266 | void ObjectReadRequest<I>::read_parent() { | |
267 | I *image_ctx = this->m_ictx; | |
b32b8144 FG |
268 | |
269 | uint64_t object_overlap = 0; | |
270 | Extents parent_extents; | |
7c673cae FG |
271 | { |
272 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
273 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
b32b8144 FG |
274 | |
275 | // calculate reverse mapping onto the image | |
276 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
277 | this->m_object_no, this->m_object_off, | |
278 | this->m_object_len, parent_extents); | |
279 | ||
280 | uint64_t parent_overlap = 0; | |
281 | int r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); | |
282 | if (r == 0) { | |
283 | object_overlap = image_ctx->prune_parent_extents(parent_extents, | |
284 | parent_overlap); | |
7c673cae FG |
285 | } |
286 | } | |
287 | ||
b32b8144 FG |
288 | if (object_overlap == 0) { |
289 | this->finish(-ENOENT); | |
290 | return; | |
291 | } | |
292 | ||
293 | ldout(image_ctx->cct, 20) << dendl; | |
294 | ||
295 | AioCompletion *parent_completion = AioCompletion::create_and_start< | |
296 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_parent>( | |
297 | this, util::get_image_ctx(image_ctx->parent), AIO_TYPE_READ); | |
298 | ImageRequest<I>::aio_read(image_ctx->parent, parent_completion, | |
11fdf7f2 | 299 | std::move(parent_extents), ReadResult{m_read_data}, |
b32b8144 FG |
300 | 0, this->m_trace); |
301 | } | |
302 | ||
303 | template <typename I> | |
304 | void ObjectReadRequest<I>::handle_read_parent(int r) { | |
305 | I *image_ctx = this->m_ictx; | |
306 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
307 | ||
308 | if (r == -ENOENT) { | |
309 | this->finish(r); | |
310 | return; | |
311 | } else if (r < 0) { | |
312 | lderr(image_ctx->cct) << "failed to read parent extents: " | |
313 | << cpp_strerror(r) << dendl; | |
314 | this->finish(r); | |
315 | return; | |
316 | } | |
317 | ||
318 | copyup(); | |
319 | } | |
320 | ||
321 | template <typename I> | |
322 | void ObjectReadRequest<I>::copyup() { | |
323 | I *image_ctx = this->m_ictx; | |
324 | if (!is_copy_on_read(image_ctx, this->m_snap_id)) { | |
325 | this->finish(0); | |
326 | return; | |
327 | } | |
328 | ||
329 | image_ctx->owner_lock.get_read(); | |
330 | image_ctx->snap_lock.get_read(); | |
331 | image_ctx->parent_lock.get_read(); | |
332 | Extents parent_extents; | |
11fdf7f2 | 333 | if (!this->compute_parent_extents(&parent_extents, true) || |
b32b8144 FG |
334 | (image_ctx->exclusive_lock != nullptr && |
335 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
336 | image_ctx->parent_lock.put_read(); | |
337 | image_ctx->snap_lock.put_read(); | |
338 | image_ctx->owner_lock.put_read(); | |
339 | this->finish(0); | |
340 | return; | |
341 | } | |
342 | ||
343 | ldout(image_ctx->cct, 20) << dendl; | |
344 | ||
11fdf7f2 | 345 | image_ctx->copyup_list_lock.Lock(); |
b32b8144 | 346 | auto it = image_ctx->copyup_list.find(this->m_object_no); |
7c673cae FG |
347 | if (it == image_ctx->copyup_list.end()) { |
348 | // create and kick off a CopyupRequest | |
b32b8144 FG |
349 | auto new_req = CopyupRequest<I>::create( |
350 | image_ctx, this->m_oid, this->m_object_no, std::move(parent_extents), | |
351 | this->m_trace); | |
7c673cae FG |
352 | |
353 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
11fdf7f2 TL |
354 | image_ctx->copyup_list_lock.Unlock(); |
355 | image_ctx->parent_lock.put_read(); | |
356 | image_ctx->snap_lock.put_read(); | |
7c673cae | 357 | new_req->send(); |
11fdf7f2 TL |
358 | } else { |
359 | image_ctx->copyup_list_lock.Unlock(); | |
360 | image_ctx->parent_lock.put_read(); | |
361 | image_ctx->snap_lock.put_read(); | |
7c673cae | 362 | } |
7c673cae | 363 | |
b32b8144 FG |
364 | image_ctx->owner_lock.put_read(); |
365 | this->finish(0); | |
7c673cae FG |
366 | } |
367 | ||
368 | /** write **/ | |
369 | ||
b32b8144 FG |
370 | template <typename I> |
371 | AbstractObjectWriteRequest<I>::AbstractObjectWriteRequest( | |
372 | I *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, | |
373 | uint64_t len, const ::SnapContext &snapc, const char *trace_name, | |
374 | const ZTracer::Trace &parent_trace, Context *completion) | |
375 | : ObjectRequest<I>(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, | |
376 | trace_name, parent_trace, completion), | |
377 | m_snap_seq(snapc.seq.val) | |
7c673cae FG |
378 | { |
379 | m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
7c673cae | 380 | |
b32b8144 FG |
381 | if (this->m_object_off == 0 && |
382 | this->m_object_len == ictx->get_object_size()) { | |
383 | m_full_object = true; | |
384 | } | |
7c673cae | 385 | |
11fdf7f2 TL |
386 | compute_parent_info(); |
387 | } | |
388 | ||
389 | template <typename I> | |
390 | void AbstractObjectWriteRequest<I>::compute_parent_info() { | |
391 | I *image_ctx = this->m_ictx; | |
392 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
393 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
394 | ||
395 | this->compute_parent_extents(&m_parent_extents, false); | |
396 | ||
b32b8144 FG |
397 | if (!this->has_parent() || |
398 | (m_full_object && m_snaps.empty() && !is_post_copyup_write_required())) { | |
11fdf7f2 | 399 | m_copyup_enabled = false; |
7c673cae | 400 | } |
b32b8144 | 401 | } |
7c673cae | 402 | |
b32b8144 FG |
403 | template <typename I> |
404 | void AbstractObjectWriteRequest<I>::add_write_hint( | |
405 | librados::ObjectWriteOperation *wr) { | |
406 | I *image_ctx = this->m_ictx; | |
407 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
408 | if (image_ctx->object_map == nullptr || !this->m_object_may_exist) { | |
409 | ObjectRequest<I>::add_write_hint(*image_ctx, wr); | |
410 | } | |
7c673cae FG |
411 | } |
412 | ||
b32b8144 FG |
413 | template <typename I> |
414 | void AbstractObjectWriteRequest<I>::send() { | |
415 | I *image_ctx = this->m_ictx; | |
416 | ldout(image_ctx->cct, 20) << this->get_op_type() << " " << this->m_oid << " " | |
417 | << this->m_object_off << "~" << this->m_object_len | |
418 | << dendl; | |
7c673cae | 419 | { |
b32b8144 FG |
420 | RWLock::RLocker snap_lock(image_ctx->snap_lock); |
421 | if (image_ctx->object_map == nullptr) { | |
422 | m_object_may_exist = true; | |
7c673cae FG |
423 | } else { |
424 | // should have been flushed prior to releasing lock | |
11fdf7f2 | 425 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
426 | m_object_may_exist = image_ctx->object_map->object_may_exist( |
427 | this->m_object_no); | |
7c673cae FG |
428 | } |
429 | } | |
430 | ||
b32b8144 FG |
431 | if (!m_object_may_exist && is_no_op_for_nonexistent_object()) { |
432 | ldout(image_ctx->cct, 20) << "skipping no-op on nonexistent object" | |
433 | << dendl; | |
434 | this->async_finish(0); | |
435 | return; | |
7c673cae FG |
436 | } |
437 | ||
b32b8144 | 438 | pre_write_object_map_update(); |
7c673cae FG |
439 | } |
440 | ||
b32b8144 FG |
441 | template <typename I> |
442 | void AbstractObjectWriteRequest<I>::pre_write_object_map_update() { | |
443 | I *image_ctx = this->m_ictx; | |
7c673cae | 444 | |
b32b8144 FG |
445 | image_ctx->snap_lock.get_read(); |
446 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled()) { | |
447 | image_ctx->snap_lock.put_read(); | |
448 | write_object(); | |
449 | return; | |
7c673cae FG |
450 | } |
451 | ||
b32b8144 FG |
452 | if (!m_object_may_exist && m_copyup_enabled) { |
453 | // optimization: copyup required | |
454 | image_ctx->snap_lock.put_read(); | |
455 | copyup(); | |
456 | return; | |
457 | } | |
7c673cae | 458 | |
b32b8144 FG |
459 | uint8_t new_state = this->get_pre_write_object_map_state(); |
460 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
461 | << "~" << this->m_object_len << dendl; | |
7c673cae | 462 | |
b32b8144 FG |
463 | image_ctx->object_map_lock.get_write(); |
464 | if (image_ctx->object_map->template aio_update< | |
465 | AbstractObjectWriteRequest<I>, | |
466 | &AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update>( | |
91327a77 AA |
467 | CEPH_NOSNAP, this->m_object_no, new_state, {}, this->m_trace, false, |
468 | this)) { | |
b32b8144 FG |
469 | image_ctx->object_map_lock.put_write(); |
470 | image_ctx->snap_lock.put_read(); | |
471 | return; | |
7c673cae FG |
472 | } |
473 | ||
b32b8144 FG |
474 | image_ctx->object_map_lock.put_write(); |
475 | image_ctx->snap_lock.put_read(); | |
476 | write_object(); | |
7c673cae FG |
477 | } |
478 | ||
b32b8144 FG |
479 | template <typename I> |
480 | void AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update(int r) { | |
481 | I *image_ctx = this->m_ictx; | |
482 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
483 | if (r < 0) { |
484 | lderr(image_ctx->cct) << "failed to update object map: " | |
485 | << cpp_strerror(r) << dendl; | |
486 | this->finish(r); | |
487 | return; | |
488 | } | |
7c673cae | 489 | |
b32b8144 | 490 | write_object(); |
7c673cae FG |
491 | } |
492 | ||
b32b8144 FG |
493 | template <typename I> |
494 | void AbstractObjectWriteRequest<I>::write_object() { | |
495 | I *image_ctx = this->m_ictx; | |
496 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 497 | |
b32b8144 FG |
498 | librados::ObjectWriteOperation write; |
499 | if (m_copyup_enabled) { | |
500 | ldout(image_ctx->cct, 20) << "guarding write" << dendl; | |
11fdf7f2 TL |
501 | if (!image_ctx->migration_info.empty()) { |
502 | m_guarding_migration_write = true; | |
503 | cls_client::assert_snapc_seq( | |
504 | &write, m_snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_LE_SNAPSET_SEQ); | |
505 | } else { | |
506 | write.assert_exists(); | |
507 | } | |
7c673cae FG |
508 | } |
509 | ||
b32b8144 FG |
510 | add_write_hint(&write); |
511 | add_write_ops(&write); | |
11fdf7f2 | 512 | ceph_assert(write.size() != 0); |
7c673cae | 513 | |
b32b8144 FG |
514 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
515 | AbstractObjectWriteRequest<I>, | |
516 | &AbstractObjectWriteRequest<I>::handle_write_object>(this); | |
517 | int r = image_ctx->data_ctx.aio_operate( | |
518 | this->m_oid, rados_completion, &write, m_snap_seq, m_snaps, | |
31f18b77 | 519 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); |
11fdf7f2 | 520 | ceph_assert(r == 0); |
7c673cae FG |
521 | rados_completion->release(); |
522 | } | |
7c673cae | 523 | |
b32b8144 FG |
524 | template <typename I> |
525 | void AbstractObjectWriteRequest<I>::handle_write_object(int r) { | |
526 | I *image_ctx = this->m_ictx; | |
527 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
528 | ||
529 | r = filter_write_result(r); | |
530 | if (r == -ENOENT) { | |
531 | if (m_copyup_enabled) { | |
532 | copyup(); | |
533 | return; | |
534 | } | |
11fdf7f2 TL |
535 | } else if (r == -ERANGE && m_guarding_migration_write) { |
536 | if (!image_ctx->migration_info.empty()) { | |
537 | copyup(); | |
538 | } else { | |
539 | ldout(image_ctx->cct, 10) << "migration parent gone, restart io" << dendl; | |
540 | m_guarding_migration_write = false; | |
541 | compute_parent_info(); | |
542 | write_object(); | |
543 | } | |
544 | return; | |
b32b8144 FG |
545 | } else if (r == -EILSEQ) { |
546 | ldout(image_ctx->cct, 10) << "failed to write object" << dendl; | |
547 | this->finish(r); | |
548 | return; | |
549 | } else if (r < 0) { | |
550 | lderr(image_ctx->cct) << "failed to write object: " << cpp_strerror(r) | |
551 | << dendl; | |
552 | this->finish(r); | |
553 | return; | |
7c673cae FG |
554 | } |
555 | ||
b32b8144 | 556 | post_write_object_map_update(); |
7c673cae FG |
557 | } |
558 | ||
b32b8144 FG |
559 | template <typename I> |
560 | void AbstractObjectWriteRequest<I>::copyup() { | |
561 | I *image_ctx = this->m_ictx; | |
562 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 563 | |
11fdf7f2 | 564 | ceph_assert(!m_copyup_in_progress); |
b32b8144 | 565 | m_copyup_in_progress = true; |
7c673cae | 566 | |
b32b8144 FG |
567 | image_ctx->copyup_list_lock.Lock(); |
568 | auto it = image_ctx->copyup_list.find(this->m_object_no); | |
569 | if (it == image_ctx->copyup_list.end()) { | |
570 | auto new_req = CopyupRequest<I>::create( | |
571 | image_ctx, this->m_oid, this->m_object_no, | |
572 | std::move(this->m_parent_extents), this->m_trace); | |
573 | this->m_parent_extents.clear(); | |
574 | ||
575 | // make sure to wait on this CopyupRequest | |
576 | new_req->append_request(this); | |
577 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
578 | ||
579 | image_ctx->copyup_list_lock.Unlock(); | |
580 | new_req->send(); | |
31f18b77 | 581 | } else { |
b32b8144 FG |
582 | it->second->append_request(this); |
583 | image_ctx->copyup_list_lock.Unlock(); | |
31f18b77 | 584 | } |
7c673cae FG |
585 | } |
586 | ||
b32b8144 FG |
587 | template <typename I> |
588 | void AbstractObjectWriteRequest<I>::handle_copyup(int r) { | |
589 | I *image_ctx = this->m_ictx; | |
590 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
591 | ||
11fdf7f2 | 592 | ceph_assert(m_copyup_in_progress); |
b32b8144 FG |
593 | m_copyup_in_progress = false; |
594 | ||
595 | if (r < 0) { | |
596 | lderr(image_ctx->cct) << "failed to copyup object: " << cpp_strerror(r) | |
597 | << dendl; | |
598 | this->finish(r); | |
599 | return; | |
31f18b77 | 600 | } |
31f18b77 | 601 | |
b32b8144 FG |
602 | if (is_post_copyup_write_required()) { |
603 | write_object(); | |
604 | return; | |
7c673cae | 605 | } |
b32b8144 FG |
606 | |
607 | post_write_object_map_update(); | |
7c673cae FG |
608 | } |
609 | ||
b32b8144 FG |
610 | template <typename I> |
611 | void AbstractObjectWriteRequest<I>::post_write_object_map_update() { | |
612 | I *image_ctx = this->m_ictx; | |
613 | ||
614 | image_ctx->snap_lock.get_read(); | |
615 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled() || | |
616 | !is_non_existent_post_write_object_map_state()) { | |
617 | image_ctx->snap_lock.put_read(); | |
618 | this->finish(0); | |
619 | return; | |
7c673cae FG |
620 | } |
621 | ||
b32b8144 | 622 | ldout(image_ctx->cct, 20) << dendl; |
7c673cae | 623 | |
b32b8144 | 624 | // should have been flushed prior to releasing lock |
11fdf7f2 | 625 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
626 | image_ctx->object_map_lock.get_write(); |
627 | if (image_ctx->object_map->template aio_update< | |
628 | AbstractObjectWriteRequest<I>, | |
629 | &AbstractObjectWriteRequest<I>::handle_post_write_object_map_update>( | |
630 | CEPH_NOSNAP, this->m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, | |
91327a77 | 631 | this->m_trace, false, this)) { |
b32b8144 FG |
632 | image_ctx->object_map_lock.put_write(); |
633 | image_ctx->snap_lock.put_read(); | |
634 | return; | |
7c673cae FG |
635 | } |
636 | ||
b32b8144 FG |
637 | image_ctx->object_map_lock.put_write(); |
638 | image_ctx->snap_lock.put_read(); | |
639 | this->finish(0); | |
7c673cae FG |
640 | } |
641 | ||
b32b8144 FG |
642 | template <typename I> |
643 | void AbstractObjectWriteRequest<I>::handle_post_write_object_map_update(int r) { | |
644 | I *image_ctx = this->m_ictx; | |
645 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
646 | if (r < 0) { |
647 | lderr(image_ctx->cct) << "failed to update object map: " | |
648 | << cpp_strerror(r) << dendl; | |
649 | this->finish(r); | |
650 | return; | |
651 | } | |
c07f9fc5 | 652 | |
b32b8144 FG |
653 | this->finish(0); |
654 | } | |
c07f9fc5 | 655 | |
b32b8144 FG |
656 | template <typename I> |
657 | void ObjectWriteRequest<I>::add_write_ops(librados::ObjectWriteOperation *wr) { | |
658 | if (this->m_full_object) { | |
659 | wr->write_full(m_write_data); | |
c07f9fc5 | 660 | } else { |
b32b8144 | 661 | wr->write(this->m_object_off, m_write_data); |
c07f9fc5 FG |
662 | } |
663 | wr->set_op_flags2(m_op_flags); | |
664 | } | |
665 | ||
b32b8144 FG |
666 | template <typename I> |
667 | void ObjectWriteSameRequest<I>::add_write_ops( | |
668 | librados::ObjectWriteOperation *wr) { | |
669 | wr->writesame(this->m_object_off, this->m_object_len, m_write_data); | |
670 | wr->set_op_flags2(m_op_flags); | |
c07f9fc5 FG |
671 | } |
672 | ||
b32b8144 FG |
673 | template <typename I> |
674 | void ObjectCompareAndWriteRequest<I>::add_write_ops( | |
675 | librados::ObjectWriteOperation *wr) { | |
676 | wr->cmpext(this->m_object_off, m_cmp_bl, nullptr); | |
c07f9fc5 | 677 | |
b32b8144 FG |
678 | if (this->m_full_object) { |
679 | wr->write_full(m_write_bl); | |
680 | } else { | |
681 | wr->write(this->m_object_off, m_write_bl); | |
682 | } | |
683 | wr->set_op_flags2(m_op_flags); | |
684 | } | |
c07f9fc5 | 685 | |
b32b8144 FG |
686 | template <typename I> |
687 | int ObjectCompareAndWriteRequest<I>::filter_write_result(int r) const { | |
688 | if (r <= -MAX_ERRNO) { | |
689 | I *image_ctx = this->m_ictx; | |
690 | Extents image_extents; | |
691 | ||
692 | // object extent compare mismatch | |
693 | uint64_t offset = -MAX_ERRNO - r; | |
694 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
695 | this->m_object_no, offset, this->m_object_len, | |
696 | image_extents); | |
11fdf7f2 | 697 | ceph_assert(image_extents.size() == 1); |
b32b8144 FG |
698 | |
699 | if (m_mismatch_offset) { | |
700 | *m_mismatch_offset = image_extents[0].first; | |
c07f9fc5 | 701 | } |
b32b8144 | 702 | r = -EILSEQ; |
c07f9fc5 | 703 | } |
b32b8144 | 704 | return r; |
c07f9fc5 FG |
705 | } |
706 | ||
7c673cae FG |
707 | } // namespace io |
708 | } // namespace librbd | |
709 | ||
710 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
711 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
b32b8144 FG |
712 | template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>; |
713 | template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>; | |
714 | template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>; | |
715 | template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>; | |
716 | template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>; |