]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | #include "common/RWLock.h" | |
10 | #include "common/WorkQueue.h" | |
11 | #include "include/Context.h" | |
c07f9fc5 | 12 | #include "include/err.h" |
11fdf7f2 | 13 | #include "osd/osd_types.h" |
7c673cae FG |
14 | |
15 | #include "librbd/ExclusiveLock.h" | |
16 | #include "librbd/ImageCtx.h" | |
17 | #include "librbd/ObjectMap.h" | |
18 | #include "librbd/Utils.h" | |
19 | #include "librbd/io/AioCompletion.h" | |
20 | #include "librbd/io/CopyupRequest.h" | |
21 | #include "librbd/io/ImageRequest.h" | |
22 | #include "librbd/io/ReadResult.h" | |
23 | ||
24 | #include <boost/bind.hpp> | |
25 | #include <boost/optional.hpp> | |
26 | ||
27 | #define dout_subsys ceph_subsys_rbd | |
28 | #undef dout_prefix | |
29 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ | |
30 | << " " << __func__ << ": " | |
31 | ||
32 | namespace librbd { | |
33 | namespace io { | |
34 | ||
b32b8144 | 35 | namespace { |
7c673cae FG |
36 | |
37 | template <typename I> | |
b32b8144 FG |
38 | inline bool is_copy_on_read(I *ictx, librados::snap_t snap_id) { |
39 | RWLock::RLocker snap_locker(ictx->snap_lock); | |
40 | return (ictx->clone_copy_on_read && | |
41 | !ictx->read_only && snap_id == CEPH_NOSNAP && | |
42 | (ictx->exclusive_lock == nullptr || | |
43 | ictx->exclusive_lock->is_lock_owner())); | |
7c673cae FG |
44 | } |
45 | ||
b32b8144 | 46 | } // anonymous namespace |
3efd9988 | 47 | |
7c673cae FG |
48 | template <typename I> |
49 | ObjectRequest<I>* | |
50 | ObjectRequest<I>::create_write(I *ictx, const std::string &oid, | |
51 | uint64_t object_no, uint64_t object_off, | |
11fdf7f2 | 52 | ceph::bufferlist&& data, |
31f18b77 FG |
53 | const ::SnapContext &snapc, int op_flags, |
54 | const ZTracer::Trace &parent_trace, | |
55 | Context *completion) { | |
11fdf7f2 TL |
56 | return new ObjectWriteRequest<I>(ictx, oid, object_no, object_off, |
57 | std::move(data), snapc, op_flags, | |
58 | parent_trace, completion); | |
7c673cae FG |
59 | } |
60 | ||
61 | template <typename I> | |
62 | ObjectRequest<I>* | |
b32b8144 FG |
63 | ObjectRequest<I>::create_discard(I *ictx, const std::string &oid, |
64 | uint64_t object_no, uint64_t object_off, | |
65 | uint64_t object_len, | |
66 | const ::SnapContext &snapc, | |
11fdf7f2 | 67 | int discard_flags, |
b32b8144 FG |
68 | const ZTracer::Trace &parent_trace, |
69 | Context *completion) { | |
70 | return new ObjectDiscardRequest<I>(ictx, oid, object_no, object_off, | |
11fdf7f2 TL |
71 | object_len, snapc, discard_flags, |
72 | parent_trace, completion); | |
7c673cae FG |
73 | } |
74 | ||
75 | template <typename I> | |
76 | ObjectRequest<I>* | |
11fdf7f2 | 77 | ObjectRequest<I>::create_write_same(I *ictx, const std::string &oid, |
7c673cae FG |
78 | uint64_t object_no, uint64_t object_off, |
79 | uint64_t object_len, | |
11fdf7f2 | 80 | ceph::bufferlist&& data, |
31f18b77 FG |
81 | const ::SnapContext &snapc, int op_flags, |
82 | const ZTracer::Trace &parent_trace, | |
83 | Context *completion) { | |
b32b8144 | 84 | return new ObjectWriteSameRequest<I>(ictx, oid, object_no, object_off, |
11fdf7f2 TL |
85 | object_len, std::move(data), snapc, |
86 | op_flags, parent_trace, completion); | |
7c673cae FG |
87 | } |
88 | ||
c07f9fc5 FG |
89 | template <typename I> |
90 | ObjectRequest<I>* | |
91 | ObjectRequest<I>::create_compare_and_write(I *ictx, const std::string &oid, | |
b32b8144 FG |
92 | uint64_t object_no, |
93 | uint64_t object_off, | |
11fdf7f2 TL |
94 | ceph::bufferlist&& cmp_data, |
95 | ceph::bufferlist&& write_data, | |
c07f9fc5 FG |
96 | const ::SnapContext &snapc, |
97 | uint64_t *mismatch_offset, | |
98 | int op_flags, | |
99 | const ZTracer::Trace &parent_trace, | |
100 | Context *completion) { | |
b32b8144 | 101 | return new ObjectCompareAndWriteRequest<I>(ictx, oid, object_no, object_off, |
11fdf7f2 TL |
102 | std::move(cmp_data), |
103 | std::move(write_data), snapc, | |
b32b8144 FG |
104 | mismatch_offset, op_flags, |
105 | parent_trace, completion); | |
c07f9fc5 FG |
106 | } |
107 | ||
7c673cae | 108 | template <typename I> |
b32b8144 | 109 | ObjectRequest<I>::ObjectRequest(I *ictx, const std::string &oid, |
7c673cae FG |
110 | uint64_t objectno, uint64_t off, |
111 | uint64_t len, librados::snap_t snap_id, | |
b32b8144 FG |
112 | const char *trace_name, |
113 | const ZTracer::Trace &trace, | |
31f18b77 | 114 | Context *completion) |
7c673cae FG |
115 | : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), |
116 | m_object_len(len), m_snap_id(snap_id), m_completion(completion), | |
31f18b77 | 117 | m_trace(util::create_trace(*ictx, "", trace)) { |
eafe8130 | 118 | ceph_assert(m_ictx->data_ctx.is_valid()); |
31f18b77 FG |
119 | if (m_trace.valid()) { |
120 | m_trace.copy_name(trace_name + std::string(" ") + oid); | |
121 | m_trace.event("start"); | |
122 | } | |
7c673cae FG |
123 | } |
124 | ||
125 | template <typename I> | |
b32b8144 FG |
126 | void ObjectRequest<I>::add_write_hint(I& image_ctx, |
127 | librados::ObjectWriteOperation *wr) { | |
128 | if (image_ctx.enable_alloc_hint) { | |
92f5a8d4 TL |
129 | wr->set_alloc_hint2(image_ctx.get_object_size(), |
130 | image_ctx.get_object_size(), | |
131 | image_ctx.alloc_hint_flags); | |
132 | } else if (image_ctx.alloc_hint_flags != 0U) { | |
133 | wr->set_alloc_hint2(0, 0, image_ctx.alloc_hint_flags); | |
7c673cae FG |
134 | } |
135 | } | |
136 | ||
137 | template <typename I> | |
11fdf7f2 TL |
138 | bool ObjectRequest<I>::compute_parent_extents(Extents *parent_extents, |
139 | bool read_request) { | |
140 | ceph_assert(m_ictx->snap_lock.is_locked()); | |
141 | ceph_assert(m_ictx->parent_lock.is_locked()); | |
7c673cae | 142 | |
b32b8144 FG |
143 | m_has_parent = false; |
144 | parent_extents->clear(); | |
145 | ||
7c673cae FG |
146 | uint64_t parent_overlap; |
147 | int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); | |
148 | if (r < 0) { | |
149 | // NOTE: it's possible for a snapshot to be deleted while we are | |
150 | // still reading from it | |
151 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
b32b8144 FG |
152 | << cpp_strerror(r) << dendl; |
153 | return false; | |
11fdf7f2 TL |
154 | } |
155 | ||
156 | if (!read_request && !m_ictx->migration_info.empty()) { | |
157 | parent_overlap = m_ictx->migration_info.overlap; | |
158 | } | |
159 | ||
160 | if (parent_overlap == 0) { | |
7c673cae FG |
161 | return false; |
162 | } | |
163 | ||
b32b8144 FG |
164 | Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0, |
165 | m_ictx->layout.object_size, *parent_extents); | |
166 | uint64_t object_overlap = m_ictx->prune_parent_extents(*parent_extents, | |
167 | parent_overlap); | |
7c673cae FG |
168 | if (object_overlap > 0) { |
169 | ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " " | |
b32b8144 FG |
170 | << "extents " << *parent_extents << dendl; |
171 | m_has_parent = !parent_extents->empty(); | |
7c673cae FG |
172 | return true; |
173 | } | |
174 | return false; | |
175 | } | |
176 | ||
b32b8144 FG |
177 | template <typename I> |
178 | void ObjectRequest<I>::async_finish(int r) { | |
179 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
180 | m_ictx->op_work_queue->queue(util::create_context_callback< | |
181 | ObjectRequest<I>, &ObjectRequest<I>::finish>(this), r); | |
182 | } | |
183 | ||
184 | template <typename I> | |
185 | void ObjectRequest<I>::finish(int r) { | |
186 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
187 | m_completion->complete(r); | |
188 | delete this; | |
7c673cae FG |
189 | } |
190 | ||
191 | /** read **/ | |
192 | ||
193 | template <typename I> | |
194 | ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid, | |
195 | uint64_t objectno, uint64_t offset, | |
b32b8144 | 196 | uint64_t len, librados::snap_t snap_id, |
11fdf7f2 | 197 | int op_flags, |
b32b8144 | 198 | const ZTracer::Trace &parent_trace, |
11fdf7f2 TL |
199 | bufferlist* read_data, |
200 | ExtentMap* extent_map, | |
31f18b77 | 201 | Context *completion) |
b32b8144 FG |
202 | : ObjectRequest<I>(ictx, oid, objectno, offset, len, snap_id, "read", |
203 | parent_trace, completion), | |
11fdf7f2 | 204 | m_op_flags(op_flags), m_read_data(read_data), m_extent_map(extent_map) { |
7c673cae FG |
205 | } |
206 | ||
207 | template <typename I> | |
b32b8144 FG |
208 | void ObjectReadRequest<I>::send() { |
209 | I *image_ctx = this->m_ictx; | |
210 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 211 | |
11fdf7f2 | 212 | read_object(); |
b32b8144 FG |
213 | } |
214 | ||
215 | template <typename I> | |
216 | void ObjectReadRequest<I>::read_object() { | |
217 | I *image_ctx = this->m_ictx; | |
7c673cae FG |
218 | { |
219 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
7c673cae FG |
220 | if (image_ctx->object_map != nullptr && |
221 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
b32b8144 FG |
222 | image_ctx->op_work_queue->queue(new FunctionContext([this](int r) { |
223 | read_parent(); | |
224 | }), 0); | |
7c673cae FG |
225 | return; |
226 | } | |
227 | } | |
228 | ||
b32b8144 FG |
229 | ldout(image_ctx->cct, 20) << dendl; |
230 | ||
7c673cae | 231 | librados::ObjectReadOperation op; |
b32b8144 | 232 | if (this->m_object_len >= image_ctx->sparse_read_threshold_bytes) { |
11fdf7f2 TL |
233 | op.sparse_read(this->m_object_off, this->m_object_len, m_extent_map, |
234 | m_read_data, nullptr); | |
7c673cae | 235 | } else { |
11fdf7f2 | 236 | op.read(this->m_object_off, this->m_object_len, m_read_data, nullptr); |
7c673cae FG |
237 | } |
238 | op.set_op_flags2(m_op_flags); | |
239 | ||
b32b8144 FG |
240 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
241 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_object>(this); | |
242 | int flags = image_ctx->get_read_flags(this->m_snap_id); | |
31f18b77 FG |
243 | int r = image_ctx->data_ctx.aio_operate( |
244 | this->m_oid, rados_completion, &op, flags, nullptr, | |
245 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
11fdf7f2 | 246 | ceph_assert(r == 0); |
7c673cae FG |
247 | |
248 | rados_completion->release(); | |
249 | } | |
250 | ||
251 | template <typename I> | |
b32b8144 FG |
252 | void ObjectReadRequest<I>::handle_read_object(int r) { |
253 | I *image_ctx = this->m_ictx; | |
254 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
255 | ||
256 | if (r == -ENOENT) { | |
257 | read_parent(); | |
258 | return; | |
259 | } else if (r < 0) { | |
260 | lderr(image_ctx->cct) << "failed to read from object: " | |
261 | << cpp_strerror(r) << dendl; | |
262 | this->finish(r); | |
263 | return; | |
264 | } | |
7c673cae | 265 | |
b32b8144 FG |
266 | this->finish(0); |
267 | } | |
268 | ||
269 | template <typename I> | |
270 | void ObjectReadRequest<I>::read_parent() { | |
271 | I *image_ctx = this->m_ictx; | |
b32b8144 | 272 | |
81eedcae TL |
273 | RWLock::RLocker snap_locker(image_ctx->snap_lock); |
274 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
275 | ||
276 | // calculate reverse mapping onto the image | |
b32b8144 | 277 | Extents parent_extents; |
81eedcae TL |
278 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, |
279 | this->m_object_no, this->m_object_off, | |
280 | this->m_object_len, parent_extents); | |
281 | ||
282 | uint64_t parent_overlap = 0; | |
283 | uint64_t object_overlap = 0; | |
284 | int r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); | |
285 | if (r == 0) { | |
286 | object_overlap = image_ctx->prune_parent_extents(parent_extents, | |
287 | parent_overlap); | |
7c673cae FG |
288 | } |
289 | ||
b32b8144 | 290 | if (object_overlap == 0) { |
81eedcae TL |
291 | parent_locker.unlock(); |
292 | snap_locker.unlock(); | |
293 | ||
b32b8144 FG |
294 | this->finish(-ENOENT); |
295 | return; | |
296 | } | |
297 | ||
298 | ldout(image_ctx->cct, 20) << dendl; | |
299 | ||
81eedcae | 300 | auto parent_completion = AioCompletion::create_and_start< |
b32b8144 FG |
301 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_parent>( |
302 | this, util::get_image_ctx(image_ctx->parent), AIO_TYPE_READ); | |
303 | ImageRequest<I>::aio_read(image_ctx->parent, parent_completion, | |
11fdf7f2 | 304 | std::move(parent_extents), ReadResult{m_read_data}, |
b32b8144 FG |
305 | 0, this->m_trace); |
306 | } | |
307 | ||
308 | template <typename I> | |
309 | void ObjectReadRequest<I>::handle_read_parent(int r) { | |
310 | I *image_ctx = this->m_ictx; | |
311 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
312 | ||
313 | if (r == -ENOENT) { | |
314 | this->finish(r); | |
315 | return; | |
316 | } else if (r < 0) { | |
317 | lderr(image_ctx->cct) << "failed to read parent extents: " | |
318 | << cpp_strerror(r) << dendl; | |
319 | this->finish(r); | |
320 | return; | |
321 | } | |
322 | ||
323 | copyup(); | |
324 | } | |
325 | ||
326 | template <typename I> | |
327 | void ObjectReadRequest<I>::copyup() { | |
328 | I *image_ctx = this->m_ictx; | |
329 | if (!is_copy_on_read(image_ctx, this->m_snap_id)) { | |
330 | this->finish(0); | |
331 | return; | |
332 | } | |
333 | ||
334 | image_ctx->owner_lock.get_read(); | |
335 | image_ctx->snap_lock.get_read(); | |
336 | image_ctx->parent_lock.get_read(); | |
337 | Extents parent_extents; | |
11fdf7f2 | 338 | if (!this->compute_parent_extents(&parent_extents, true) || |
b32b8144 FG |
339 | (image_ctx->exclusive_lock != nullptr && |
340 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
341 | image_ctx->parent_lock.put_read(); | |
342 | image_ctx->snap_lock.put_read(); | |
343 | image_ctx->owner_lock.put_read(); | |
344 | this->finish(0); | |
345 | return; | |
346 | } | |
347 | ||
348 | ldout(image_ctx->cct, 20) << dendl; | |
349 | ||
11fdf7f2 | 350 | image_ctx->copyup_list_lock.Lock(); |
b32b8144 | 351 | auto it = image_ctx->copyup_list.find(this->m_object_no); |
7c673cae FG |
352 | if (it == image_ctx->copyup_list.end()) { |
353 | // create and kick off a CopyupRequest | |
b32b8144 FG |
354 | auto new_req = CopyupRequest<I>::create( |
355 | image_ctx, this->m_oid, this->m_object_no, std::move(parent_extents), | |
356 | this->m_trace); | |
7c673cae FG |
357 | |
358 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
11fdf7f2 TL |
359 | image_ctx->copyup_list_lock.Unlock(); |
360 | image_ctx->parent_lock.put_read(); | |
361 | image_ctx->snap_lock.put_read(); | |
7c673cae | 362 | new_req->send(); |
11fdf7f2 TL |
363 | } else { |
364 | image_ctx->copyup_list_lock.Unlock(); | |
365 | image_ctx->parent_lock.put_read(); | |
366 | image_ctx->snap_lock.put_read(); | |
7c673cae | 367 | } |
7c673cae | 368 | |
b32b8144 FG |
369 | image_ctx->owner_lock.put_read(); |
370 | this->finish(0); | |
7c673cae FG |
371 | } |
372 | ||
373 | /** write **/ | |
374 | ||
b32b8144 FG |
375 | template <typename I> |
376 | AbstractObjectWriteRequest<I>::AbstractObjectWriteRequest( | |
377 | I *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, | |
378 | uint64_t len, const ::SnapContext &snapc, const char *trace_name, | |
379 | const ZTracer::Trace &parent_trace, Context *completion) | |
380 | : ObjectRequest<I>(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, | |
381 | trace_name, parent_trace, completion), | |
382 | m_snap_seq(snapc.seq.val) | |
7c673cae FG |
383 | { |
384 | m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
7c673cae | 385 | |
b32b8144 FG |
386 | if (this->m_object_off == 0 && |
387 | this->m_object_len == ictx->get_object_size()) { | |
388 | m_full_object = true; | |
389 | } | |
7c673cae | 390 | |
11fdf7f2 | 391 | compute_parent_info(); |
81eedcae TL |
392 | |
393 | ictx->snap_lock.get_read(); | |
394 | if (!ictx->migration_info.empty()) { | |
395 | m_guarding_migration_write = true; | |
396 | } | |
397 | ictx->snap_lock.put_read(); | |
11fdf7f2 TL |
398 | } |
399 | ||
400 | template <typename I> | |
401 | void AbstractObjectWriteRequest<I>::compute_parent_info() { | |
402 | I *image_ctx = this->m_ictx; | |
403 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
404 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
405 | ||
406 | this->compute_parent_extents(&m_parent_extents, false); | |
407 | ||
b32b8144 FG |
408 | if (!this->has_parent() || |
409 | (m_full_object && m_snaps.empty() && !is_post_copyup_write_required())) { | |
11fdf7f2 | 410 | m_copyup_enabled = false; |
7c673cae | 411 | } |
b32b8144 | 412 | } |
7c673cae | 413 | |
b32b8144 FG |
414 | template <typename I> |
415 | void AbstractObjectWriteRequest<I>::add_write_hint( | |
416 | librados::ObjectWriteOperation *wr) { | |
417 | I *image_ctx = this->m_ictx; | |
418 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
419 | if (image_ctx->object_map == nullptr || !this->m_object_may_exist) { | |
420 | ObjectRequest<I>::add_write_hint(*image_ctx, wr); | |
421 | } | |
7c673cae FG |
422 | } |
423 | ||
b32b8144 FG |
424 | template <typename I> |
425 | void AbstractObjectWriteRequest<I>::send() { | |
426 | I *image_ctx = this->m_ictx; | |
427 | ldout(image_ctx->cct, 20) << this->get_op_type() << " " << this->m_oid << " " | |
428 | << this->m_object_off << "~" << this->m_object_len | |
429 | << dendl; | |
7c673cae | 430 | { |
b32b8144 FG |
431 | RWLock::RLocker snap_lock(image_ctx->snap_lock); |
432 | if (image_ctx->object_map == nullptr) { | |
433 | m_object_may_exist = true; | |
7c673cae FG |
434 | } else { |
435 | // should have been flushed prior to releasing lock | |
11fdf7f2 | 436 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
437 | m_object_may_exist = image_ctx->object_map->object_may_exist( |
438 | this->m_object_no); | |
7c673cae FG |
439 | } |
440 | } | |
441 | ||
b32b8144 FG |
442 | if (!m_object_may_exist && is_no_op_for_nonexistent_object()) { |
443 | ldout(image_ctx->cct, 20) << "skipping no-op on nonexistent object" | |
444 | << dendl; | |
445 | this->async_finish(0); | |
446 | return; | |
7c673cae FG |
447 | } |
448 | ||
b32b8144 | 449 | pre_write_object_map_update(); |
7c673cae FG |
450 | } |
451 | ||
b32b8144 FG |
452 | template <typename I> |
453 | void AbstractObjectWriteRequest<I>::pre_write_object_map_update() { | |
454 | I *image_ctx = this->m_ictx; | |
7c673cae | 455 | |
b32b8144 FG |
456 | image_ctx->snap_lock.get_read(); |
457 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled()) { | |
458 | image_ctx->snap_lock.put_read(); | |
459 | write_object(); | |
460 | return; | |
7c673cae FG |
461 | } |
462 | ||
b32b8144 FG |
463 | if (!m_object_may_exist && m_copyup_enabled) { |
464 | // optimization: copyup required | |
465 | image_ctx->snap_lock.put_read(); | |
466 | copyup(); | |
467 | return; | |
468 | } | |
7c673cae | 469 | |
b32b8144 FG |
470 | uint8_t new_state = this->get_pre_write_object_map_state(); |
471 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
472 | << "~" << this->m_object_len << dendl; | |
7c673cae | 473 | |
b32b8144 FG |
474 | image_ctx->object_map_lock.get_write(); |
475 | if (image_ctx->object_map->template aio_update< | |
476 | AbstractObjectWriteRequest<I>, | |
477 | &AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update>( | |
91327a77 AA |
478 | CEPH_NOSNAP, this->m_object_no, new_state, {}, this->m_trace, false, |
479 | this)) { | |
b32b8144 FG |
480 | image_ctx->object_map_lock.put_write(); |
481 | image_ctx->snap_lock.put_read(); | |
482 | return; | |
7c673cae FG |
483 | } |
484 | ||
b32b8144 FG |
485 | image_ctx->object_map_lock.put_write(); |
486 | image_ctx->snap_lock.put_read(); | |
487 | write_object(); | |
7c673cae FG |
488 | } |
489 | ||
b32b8144 FG |
490 | template <typename I> |
491 | void AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update(int r) { | |
492 | I *image_ctx = this->m_ictx; | |
493 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
494 | if (r < 0) { |
495 | lderr(image_ctx->cct) << "failed to update object map: " | |
496 | << cpp_strerror(r) << dendl; | |
497 | this->finish(r); | |
498 | return; | |
499 | } | |
7c673cae | 500 | |
b32b8144 | 501 | write_object(); |
7c673cae FG |
502 | } |
503 | ||
b32b8144 FG |
504 | template <typename I> |
505 | void AbstractObjectWriteRequest<I>::write_object() { | |
506 | I *image_ctx = this->m_ictx; | |
507 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 508 | |
b32b8144 FG |
509 | librados::ObjectWriteOperation write; |
510 | if (m_copyup_enabled) { | |
511 | ldout(image_ctx->cct, 20) << "guarding write" << dendl; | |
81eedcae | 512 | if (m_guarding_migration_write) { |
11fdf7f2 TL |
513 | cls_client::assert_snapc_seq( |
514 | &write, m_snap_seq, cls::rbd::ASSERT_SNAPC_SEQ_LE_SNAPSET_SEQ); | |
515 | } else { | |
516 | write.assert_exists(); | |
517 | } | |
7c673cae FG |
518 | } |
519 | ||
b32b8144 FG |
520 | add_write_hint(&write); |
521 | add_write_ops(&write); | |
11fdf7f2 | 522 | ceph_assert(write.size() != 0); |
7c673cae | 523 | |
b32b8144 FG |
524 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
525 | AbstractObjectWriteRequest<I>, | |
526 | &AbstractObjectWriteRequest<I>::handle_write_object>(this); | |
527 | int r = image_ctx->data_ctx.aio_operate( | |
528 | this->m_oid, rados_completion, &write, m_snap_seq, m_snaps, | |
31f18b77 | 529 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); |
11fdf7f2 | 530 | ceph_assert(r == 0); |
7c673cae FG |
531 | rados_completion->release(); |
532 | } | |
7c673cae | 533 | |
b32b8144 FG |
534 | template <typename I> |
535 | void AbstractObjectWriteRequest<I>::handle_write_object(int r) { | |
536 | I *image_ctx = this->m_ictx; | |
537 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
538 | ||
539 | r = filter_write_result(r); | |
540 | if (r == -ENOENT) { | |
541 | if (m_copyup_enabled) { | |
542 | copyup(); | |
543 | return; | |
544 | } | |
11fdf7f2 | 545 | } else if (r == -ERANGE && m_guarding_migration_write) { |
81eedcae TL |
546 | image_ctx->snap_lock.get_read(); |
547 | m_guarding_migration_write = !image_ctx->migration_info.empty(); | |
548 | image_ctx->snap_lock.put_read(); | |
549 | ||
550 | if (m_guarding_migration_write) { | |
11fdf7f2 TL |
551 | copyup(); |
552 | } else { | |
553 | ldout(image_ctx->cct, 10) << "migration parent gone, restart io" << dendl; | |
11fdf7f2 TL |
554 | compute_parent_info(); |
555 | write_object(); | |
556 | } | |
557 | return; | |
b32b8144 FG |
558 | } else if (r == -EILSEQ) { |
559 | ldout(image_ctx->cct, 10) << "failed to write object" << dendl; | |
560 | this->finish(r); | |
561 | return; | |
562 | } else if (r < 0) { | |
563 | lderr(image_ctx->cct) << "failed to write object: " << cpp_strerror(r) | |
564 | << dendl; | |
565 | this->finish(r); | |
566 | return; | |
7c673cae FG |
567 | } |
568 | ||
b32b8144 | 569 | post_write_object_map_update(); |
7c673cae FG |
570 | } |
571 | ||
b32b8144 FG |
572 | template <typename I> |
573 | void AbstractObjectWriteRequest<I>::copyup() { | |
574 | I *image_ctx = this->m_ictx; | |
575 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 576 | |
11fdf7f2 | 577 | ceph_assert(!m_copyup_in_progress); |
b32b8144 | 578 | m_copyup_in_progress = true; |
7c673cae | 579 | |
b32b8144 FG |
580 | image_ctx->copyup_list_lock.Lock(); |
581 | auto it = image_ctx->copyup_list.find(this->m_object_no); | |
582 | if (it == image_ctx->copyup_list.end()) { | |
583 | auto new_req = CopyupRequest<I>::create( | |
584 | image_ctx, this->m_oid, this->m_object_no, | |
585 | std::move(this->m_parent_extents), this->m_trace); | |
586 | this->m_parent_extents.clear(); | |
587 | ||
588 | // make sure to wait on this CopyupRequest | |
589 | new_req->append_request(this); | |
590 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
591 | ||
592 | image_ctx->copyup_list_lock.Unlock(); | |
593 | new_req->send(); | |
31f18b77 | 594 | } else { |
b32b8144 FG |
595 | it->second->append_request(this); |
596 | image_ctx->copyup_list_lock.Unlock(); | |
31f18b77 | 597 | } |
7c673cae FG |
598 | } |
599 | ||
b32b8144 FG |
600 | template <typename I> |
601 | void AbstractObjectWriteRequest<I>::handle_copyup(int r) { | |
602 | I *image_ctx = this->m_ictx; | |
603 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
604 | ||
11fdf7f2 | 605 | ceph_assert(m_copyup_in_progress); |
b32b8144 FG |
606 | m_copyup_in_progress = false; |
607 | ||
81eedcae | 608 | if (r < 0 && r != -ERESTART) { |
b32b8144 FG |
609 | lderr(image_ctx->cct) << "failed to copyup object: " << cpp_strerror(r) |
610 | << dendl; | |
611 | this->finish(r); | |
612 | return; | |
31f18b77 | 613 | } |
31f18b77 | 614 | |
81eedcae | 615 | if (r == -ERESTART || is_post_copyup_write_required()) { |
b32b8144 FG |
616 | write_object(); |
617 | return; | |
7c673cae | 618 | } |
b32b8144 FG |
619 | |
620 | post_write_object_map_update(); | |
7c673cae FG |
621 | } |
622 | ||
b32b8144 FG |
623 | template <typename I> |
624 | void AbstractObjectWriteRequest<I>::post_write_object_map_update() { | |
625 | I *image_ctx = this->m_ictx; | |
626 | ||
627 | image_ctx->snap_lock.get_read(); | |
628 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled() || | |
629 | !is_non_existent_post_write_object_map_state()) { | |
630 | image_ctx->snap_lock.put_read(); | |
631 | this->finish(0); | |
632 | return; | |
7c673cae FG |
633 | } |
634 | ||
b32b8144 | 635 | ldout(image_ctx->cct, 20) << dendl; |
7c673cae | 636 | |
b32b8144 | 637 | // should have been flushed prior to releasing lock |
11fdf7f2 | 638 | ceph_assert(image_ctx->exclusive_lock->is_lock_owner()); |
b32b8144 FG |
639 | image_ctx->object_map_lock.get_write(); |
640 | if (image_ctx->object_map->template aio_update< | |
641 | AbstractObjectWriteRequest<I>, | |
642 | &AbstractObjectWriteRequest<I>::handle_post_write_object_map_update>( | |
643 | CEPH_NOSNAP, this->m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, | |
91327a77 | 644 | this->m_trace, false, this)) { |
b32b8144 FG |
645 | image_ctx->object_map_lock.put_write(); |
646 | image_ctx->snap_lock.put_read(); | |
647 | return; | |
7c673cae FG |
648 | } |
649 | ||
b32b8144 FG |
650 | image_ctx->object_map_lock.put_write(); |
651 | image_ctx->snap_lock.put_read(); | |
652 | this->finish(0); | |
7c673cae FG |
653 | } |
654 | ||
b32b8144 FG |
655 | template <typename I> |
656 | void AbstractObjectWriteRequest<I>::handle_post_write_object_map_update(int r) { | |
657 | I *image_ctx = this->m_ictx; | |
658 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
11fdf7f2 TL |
659 | if (r < 0) { |
660 | lderr(image_ctx->cct) << "failed to update object map: " | |
661 | << cpp_strerror(r) << dendl; | |
662 | this->finish(r); | |
663 | return; | |
664 | } | |
c07f9fc5 | 665 | |
b32b8144 FG |
666 | this->finish(0); |
667 | } | |
c07f9fc5 | 668 | |
b32b8144 FG |
669 | template <typename I> |
670 | void ObjectWriteRequest<I>::add_write_ops(librados::ObjectWriteOperation *wr) { | |
671 | if (this->m_full_object) { | |
672 | wr->write_full(m_write_data); | |
c07f9fc5 | 673 | } else { |
b32b8144 | 674 | wr->write(this->m_object_off, m_write_data); |
c07f9fc5 FG |
675 | } |
676 | wr->set_op_flags2(m_op_flags); | |
677 | } | |
678 | ||
b32b8144 FG |
679 | template <typename I> |
680 | void ObjectWriteSameRequest<I>::add_write_ops( | |
681 | librados::ObjectWriteOperation *wr) { | |
682 | wr->writesame(this->m_object_off, this->m_object_len, m_write_data); | |
683 | wr->set_op_flags2(m_op_flags); | |
c07f9fc5 FG |
684 | } |
685 | ||
b32b8144 FG |
686 | template <typename I> |
687 | void ObjectCompareAndWriteRequest<I>::add_write_ops( | |
688 | librados::ObjectWriteOperation *wr) { | |
689 | wr->cmpext(this->m_object_off, m_cmp_bl, nullptr); | |
c07f9fc5 | 690 | |
b32b8144 FG |
691 | if (this->m_full_object) { |
692 | wr->write_full(m_write_bl); | |
693 | } else { | |
694 | wr->write(this->m_object_off, m_write_bl); | |
695 | } | |
696 | wr->set_op_flags2(m_op_flags); | |
697 | } | |
c07f9fc5 | 698 | |
b32b8144 FG |
699 | template <typename I> |
700 | int ObjectCompareAndWriteRequest<I>::filter_write_result(int r) const { | |
701 | if (r <= -MAX_ERRNO) { | |
702 | I *image_ctx = this->m_ictx; | |
703 | Extents image_extents; | |
704 | ||
705 | // object extent compare mismatch | |
706 | uint64_t offset = -MAX_ERRNO - r; | |
707 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
708 | this->m_object_no, offset, this->m_object_len, | |
709 | image_extents); | |
11fdf7f2 | 710 | ceph_assert(image_extents.size() == 1); |
b32b8144 FG |
711 | |
712 | if (m_mismatch_offset) { | |
713 | *m_mismatch_offset = image_extents[0].first; | |
c07f9fc5 | 714 | } |
b32b8144 | 715 | r = -EILSEQ; |
c07f9fc5 | 716 | } |
b32b8144 | 717 | return r; |
c07f9fc5 FG |
718 | } |
719 | ||
7c673cae FG |
720 | } // namespace io |
721 | } // namespace librbd | |
722 | ||
723 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
724 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
b32b8144 FG |
725 | template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>; |
726 | template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>; | |
727 | template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>; | |
728 | template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>; | |
729 | template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>; |