]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | #include "common/RWLock.h" | |
10 | #include "common/WorkQueue.h" | |
11 | #include "include/Context.h" | |
c07f9fc5 | 12 | #include "include/err.h" |
7c673cae FG |
13 | |
14 | #include "librbd/ExclusiveLock.h" | |
15 | #include "librbd/ImageCtx.h" | |
16 | #include "librbd/ObjectMap.h" | |
17 | #include "librbd/Utils.h" | |
18 | #include "librbd/io/AioCompletion.h" | |
19 | #include "librbd/io/CopyupRequest.h" | |
20 | #include "librbd/io/ImageRequest.h" | |
21 | #include "librbd/io/ReadResult.h" | |
22 | ||
23 | #include <boost/bind.hpp> | |
24 | #include <boost/optional.hpp> | |
25 | ||
26 | #define dout_subsys ceph_subsys_rbd | |
27 | #undef dout_prefix | |
28 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ | |
29 | << " " << __func__ << ": " | |
30 | ||
31 | namespace librbd { | |
32 | namespace io { | |
33 | ||
b32b8144 | 34 | namespace { |
7c673cae FG |
35 | |
36 | template <typename I> | |
b32b8144 FG |
37 | inline bool is_copy_on_read(I *ictx, librados::snap_t snap_id) { |
38 | RWLock::RLocker snap_locker(ictx->snap_lock); | |
39 | return (ictx->clone_copy_on_read && | |
40 | !ictx->read_only && snap_id == CEPH_NOSNAP && | |
41 | (ictx->exclusive_lock == nullptr || | |
42 | ictx->exclusive_lock->is_lock_owner())); | |
7c673cae FG |
43 | } |
44 | ||
b32b8144 | 45 | } // anonymous namespace |
3efd9988 | 46 | |
7c673cae FG |
47 | template <typename I> |
48 | ObjectRequest<I>* | |
49 | ObjectRequest<I>::create_write(I *ictx, const std::string &oid, | |
50 | uint64_t object_no, uint64_t object_off, | |
51 | const ceph::bufferlist &data, | |
31f18b77 FG |
52 | const ::SnapContext &snapc, int op_flags, |
53 | const ZTracer::Trace &parent_trace, | |
54 | Context *completion) { | |
b32b8144 FG |
55 | return new ObjectWriteRequest<I>(ictx, oid, object_no, object_off, data, |
56 | snapc, op_flags, parent_trace, completion); | |
7c673cae FG |
57 | } |
58 | ||
59 | template <typename I> | |
60 | ObjectRequest<I>* | |
b32b8144 FG |
61 | ObjectRequest<I>::create_discard(I *ictx, const std::string &oid, |
62 | uint64_t object_no, uint64_t object_off, | |
63 | uint64_t object_len, | |
64 | const ::SnapContext &snapc, | |
65 | bool disable_clone_remove, | |
66 | bool update_object_map, | |
67 | const ZTracer::Trace &parent_trace, | |
68 | Context *completion) { | |
69 | return new ObjectDiscardRequest<I>(ictx, oid, object_no, object_off, | |
70 | object_len, snapc, disable_clone_remove, | |
71 | update_object_map, parent_trace, | |
72 | completion); | |
7c673cae FG |
73 | } |
74 | ||
75 | template <typename I> | |
76 | ObjectRequest<I>* | |
77 | ObjectRequest<I>::create_writesame(I *ictx, const std::string &oid, | |
78 | uint64_t object_no, uint64_t object_off, | |
79 | uint64_t object_len, | |
80 | const ceph::bufferlist &data, | |
31f18b77 FG |
81 | const ::SnapContext &snapc, int op_flags, |
82 | const ZTracer::Trace &parent_trace, | |
83 | Context *completion) { | |
b32b8144 FG |
84 | return new ObjectWriteSameRequest<I>(ictx, oid, object_no, object_off, |
85 | object_len, data, snapc, op_flags, | |
86 | parent_trace, completion); | |
7c673cae FG |
87 | } |
88 | ||
c07f9fc5 FG |
89 | template <typename I> |
90 | ObjectRequest<I>* | |
91 | ObjectRequest<I>::create_compare_and_write(I *ictx, const std::string &oid, | |
b32b8144 FG |
92 | uint64_t object_no, |
93 | uint64_t object_off, | |
c07f9fc5 FG |
94 | const ceph::bufferlist &cmp_data, |
95 | const ceph::bufferlist &write_data, | |
96 | const ::SnapContext &snapc, | |
97 | uint64_t *mismatch_offset, | |
98 | int op_flags, | |
99 | const ZTracer::Trace &parent_trace, | |
100 | Context *completion) { | |
b32b8144 FG |
101 | return new ObjectCompareAndWriteRequest<I>(ictx, oid, object_no, object_off, |
102 | cmp_data, write_data, snapc, | |
103 | mismatch_offset, op_flags, | |
104 | parent_trace, completion); | |
c07f9fc5 FG |
105 | } |
106 | ||
7c673cae | 107 | template <typename I> |
b32b8144 | 108 | ObjectRequest<I>::ObjectRequest(I *ictx, const std::string &oid, |
7c673cae FG |
109 | uint64_t objectno, uint64_t off, |
110 | uint64_t len, librados::snap_t snap_id, | |
b32b8144 FG |
111 | const char *trace_name, |
112 | const ZTracer::Trace &trace, | |
31f18b77 | 113 | Context *completion) |
7c673cae FG |
114 | : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), |
115 | m_object_len(len), m_snap_id(snap_id), m_completion(completion), | |
31f18b77 FG |
116 | m_trace(util::create_trace(*ictx, "", trace)) { |
117 | if (m_trace.valid()) { | |
118 | m_trace.copy_name(trace_name + std::string(" ") + oid); | |
119 | m_trace.event("start"); | |
120 | } | |
7c673cae FG |
121 | } |
122 | ||
123 | template <typename I> | |
b32b8144 FG |
124 | void ObjectRequest<I>::add_write_hint(I& image_ctx, |
125 | librados::ObjectWriteOperation *wr) { | |
126 | if (image_ctx.enable_alloc_hint) { | |
127 | wr->set_alloc_hint(image_ctx.get_object_size(), | |
128 | image_ctx.get_object_size()); | |
7c673cae FG |
129 | } |
130 | } | |
131 | ||
132 | template <typename I> | |
b32b8144 | 133 | bool ObjectRequest<I>::compute_parent_extents(Extents *parent_extents) { |
7c673cae FG |
134 | assert(m_ictx->snap_lock.is_locked()); |
135 | assert(m_ictx->parent_lock.is_locked()); | |
136 | ||
b32b8144 FG |
137 | m_has_parent = false; |
138 | parent_extents->clear(); | |
139 | ||
7c673cae FG |
140 | uint64_t parent_overlap; |
141 | int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); | |
142 | if (r < 0) { | |
143 | // NOTE: it's possible for a snapshot to be deleted while we are | |
144 | // still reading from it | |
145 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
b32b8144 FG |
146 | << cpp_strerror(r) << dendl; |
147 | return false; | |
148 | } else if (parent_overlap == 0) { | |
7c673cae FG |
149 | return false; |
150 | } | |
151 | ||
b32b8144 FG |
152 | Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, 0, |
153 | m_ictx->layout.object_size, *parent_extents); | |
154 | uint64_t object_overlap = m_ictx->prune_parent_extents(*parent_extents, | |
155 | parent_overlap); | |
7c673cae FG |
156 | if (object_overlap > 0) { |
157 | ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " " | |
b32b8144 FG |
158 | << "extents " << *parent_extents << dendl; |
159 | m_has_parent = !parent_extents->empty(); | |
7c673cae FG |
160 | return true; |
161 | } | |
162 | return false; | |
163 | } | |
164 | ||
b32b8144 FG |
165 | template <typename I> |
166 | void ObjectRequest<I>::async_finish(int r) { | |
167 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
168 | m_ictx->op_work_queue->queue(util::create_context_callback< | |
169 | ObjectRequest<I>, &ObjectRequest<I>::finish>(this), r); | |
170 | } | |
171 | ||
172 | template <typename I> | |
173 | void ObjectRequest<I>::finish(int r) { | |
174 | ldout(m_ictx->cct, 20) << "r=" << r << dendl; | |
175 | m_completion->complete(r); | |
176 | delete this; | |
7c673cae FG |
177 | } |
178 | ||
179 | /** read **/ | |
180 | ||
181 | template <typename I> | |
182 | ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid, | |
183 | uint64_t objectno, uint64_t offset, | |
b32b8144 FG |
184 | uint64_t len, librados::snap_t snap_id, |
185 | int op_flags, bool cache_initiated, | |
186 | const ZTracer::Trace &parent_trace, | |
31f18b77 | 187 | Context *completion) |
b32b8144 FG |
188 | : ObjectRequest<I>(ictx, oid, objectno, offset, len, snap_id, "read", |
189 | parent_trace, completion), | |
190 | m_op_flags(op_flags), m_cache_initiated(cache_initiated) { | |
7c673cae FG |
191 | } |
192 | ||
193 | template <typename I> | |
b32b8144 FG |
194 | void ObjectReadRequest<I>::send() { |
195 | I *image_ctx = this->m_ictx; | |
196 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 197 | |
b32b8144 FG |
198 | if (!m_cache_initiated && image_ctx->object_cacher != nullptr) { |
199 | read_cache(); | |
200 | } else { | |
201 | read_object(); | |
7c673cae FG |
202 | } |
203 | } | |
204 | ||
205 | template <typename I> | |
b32b8144 FG |
206 | void ObjectReadRequest<I>::read_cache() { |
207 | I *image_ctx = this->m_ictx; | |
208 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 209 | |
b32b8144 FG |
210 | // must use async callback to avoid cache_lock cycle |
211 | auto cache_ctx = util::create_async_context_callback( | |
212 | *image_ctx, util::create_context_callback< | |
213 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_cache>(this)); | |
214 | image_ctx->aio_read_from_cache( | |
215 | this->m_oid, this->m_object_no, &m_read_data, this->m_object_len, | |
216 | this->m_object_off, cache_ctx, m_op_flags, | |
217 | (this->m_trace.valid() ? &this->m_trace : nullptr)); | |
7c673cae FG |
218 | } |
219 | ||
220 | template <typename I> | |
b32b8144 FG |
221 | void ObjectReadRequest<I>::handle_read_cache(int r) { |
222 | I *image_ctx = this->m_ictx; | |
223 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
224 | ||
225 | if (r == -ENOENT) { | |
226 | read_parent(); | |
227 | return; | |
228 | } else if (r < 0) { | |
229 | lderr(image_ctx->cct) << "failed to read from cache: " | |
230 | << cpp_strerror(r) << dendl; | |
231 | this->finish(r); | |
232 | return; | |
233 | } | |
7c673cae | 234 | |
b32b8144 FG |
235 | this->finish(0); |
236 | } | |
237 | ||
238 | template <typename I> | |
239 | void ObjectReadRequest<I>::read_object() { | |
240 | I *image_ctx = this->m_ictx; | |
7c673cae FG |
241 | { |
242 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
7c673cae FG |
243 | if (image_ctx->object_map != nullptr && |
244 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
b32b8144 FG |
245 | image_ctx->op_work_queue->queue(new FunctionContext([this](int r) { |
246 | read_parent(); | |
247 | }), 0); | |
7c673cae FG |
248 | return; |
249 | } | |
250 | } | |
251 | ||
b32b8144 FG |
252 | ldout(image_ctx->cct, 20) << dendl; |
253 | ||
7c673cae | 254 | librados::ObjectReadOperation op; |
b32b8144 | 255 | if (this->m_object_len >= image_ctx->sparse_read_threshold_bytes) { |
7c673cae FG |
256 | op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map, |
257 | &m_read_data, nullptr); | |
258 | } else { | |
259 | op.read(this->m_object_off, this->m_object_len, &m_read_data, nullptr); | |
260 | } | |
261 | op.set_op_flags2(m_op_flags); | |
262 | ||
b32b8144 FG |
263 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
264 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_object>(this); | |
265 | int flags = image_ctx->get_read_flags(this->m_snap_id); | |
31f18b77 FG |
266 | int r = image_ctx->data_ctx.aio_operate( |
267 | this->m_oid, rados_completion, &op, flags, nullptr, | |
268 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
269 | assert(r == 0); |
270 | ||
271 | rados_completion->release(); | |
272 | } | |
273 | ||
274 | template <typename I> | |
b32b8144 FG |
275 | void ObjectReadRequest<I>::handle_read_object(int r) { |
276 | I *image_ctx = this->m_ictx; | |
277 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
278 | ||
279 | if (r == -ENOENT) { | |
280 | read_parent(); | |
281 | return; | |
282 | } else if (r < 0) { | |
283 | lderr(image_ctx->cct) << "failed to read from object: " | |
284 | << cpp_strerror(r) << dendl; | |
285 | this->finish(r); | |
286 | return; | |
287 | } | |
7c673cae | 288 | |
b32b8144 FG |
289 | this->finish(0); |
290 | } | |
291 | ||
292 | template <typename I> | |
293 | void ObjectReadRequest<I>::read_parent() { | |
294 | I *image_ctx = this->m_ictx; | |
295 | if (m_cache_initiated) { | |
296 | this->finish(-ENOENT); | |
297 | return; | |
298 | } | |
299 | ||
300 | uint64_t object_overlap = 0; | |
301 | Extents parent_extents; | |
7c673cae FG |
302 | { |
303 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
304 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
b32b8144 FG |
305 | |
306 | // calculate reverse mapping onto the image | |
307 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
308 | this->m_object_no, this->m_object_off, | |
309 | this->m_object_len, parent_extents); | |
310 | ||
311 | uint64_t parent_overlap = 0; | |
312 | int r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); | |
313 | if (r == 0) { | |
314 | object_overlap = image_ctx->prune_parent_extents(parent_extents, | |
315 | parent_overlap); | |
7c673cae FG |
316 | } |
317 | } | |
318 | ||
b32b8144 FG |
319 | if (object_overlap == 0) { |
320 | this->finish(-ENOENT); | |
321 | return; | |
322 | } | |
323 | ||
324 | ldout(image_ctx->cct, 20) << dendl; | |
325 | ||
326 | AioCompletion *parent_completion = AioCompletion::create_and_start< | |
327 | ObjectReadRequest<I>, &ObjectReadRequest<I>::handle_read_parent>( | |
328 | this, util::get_image_ctx(image_ctx->parent), AIO_TYPE_READ); | |
329 | ImageRequest<I>::aio_read(image_ctx->parent, parent_completion, | |
330 | std::move(parent_extents), ReadResult{&m_read_data}, | |
331 | 0, this->m_trace); | |
332 | } | |
333 | ||
334 | template <typename I> | |
335 | void ObjectReadRequest<I>::handle_read_parent(int r) { | |
336 | I *image_ctx = this->m_ictx; | |
337 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
338 | ||
339 | if (r == -ENOENT) { | |
340 | this->finish(r); | |
341 | return; | |
342 | } else if (r < 0) { | |
343 | lderr(image_ctx->cct) << "failed to read parent extents: " | |
344 | << cpp_strerror(r) << dendl; | |
345 | this->finish(r); | |
346 | return; | |
347 | } | |
348 | ||
349 | copyup(); | |
350 | } | |
351 | ||
352 | template <typename I> | |
353 | void ObjectReadRequest<I>::copyup() { | |
354 | I *image_ctx = this->m_ictx; | |
355 | if (!is_copy_on_read(image_ctx, this->m_snap_id)) { | |
356 | this->finish(0); | |
357 | return; | |
358 | } | |
359 | ||
360 | image_ctx->owner_lock.get_read(); | |
361 | image_ctx->snap_lock.get_read(); | |
362 | image_ctx->parent_lock.get_read(); | |
363 | Extents parent_extents; | |
364 | if (!this->compute_parent_extents(&parent_extents) || | |
365 | (image_ctx->exclusive_lock != nullptr && | |
366 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
367 | image_ctx->parent_lock.put_read(); | |
368 | image_ctx->snap_lock.put_read(); | |
369 | image_ctx->owner_lock.put_read(); | |
370 | this->finish(0); | |
371 | return; | |
372 | } | |
373 | ||
374 | ldout(image_ctx->cct, 20) << dendl; | |
375 | ||
7c673cae | 376 | Mutex::Locker copyup_locker(image_ctx->copyup_list_lock); |
b32b8144 | 377 | auto it = image_ctx->copyup_list.find(this->m_object_no); |
7c673cae FG |
378 | if (it == image_ctx->copyup_list.end()) { |
379 | // create and kick off a CopyupRequest | |
b32b8144 FG |
380 | auto new_req = CopyupRequest<I>::create( |
381 | image_ctx, this->m_oid, this->m_object_no, std::move(parent_extents), | |
382 | this->m_trace); | |
7c673cae FG |
383 | |
384 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
385 | new_req->send(); | |
386 | } | |
7c673cae | 387 | |
b32b8144 FG |
388 | image_ctx->parent_lock.put_read(); |
389 | image_ctx->snap_lock.put_read(); | |
390 | image_ctx->owner_lock.put_read(); | |
391 | this->finish(0); | |
7c673cae FG |
392 | } |
393 | ||
394 | /** write **/ | |
395 | ||
b32b8144 FG |
396 | template <typename I> |
397 | AbstractObjectWriteRequest<I>::AbstractObjectWriteRequest( | |
398 | I *ictx, const std::string &oid, uint64_t object_no, uint64_t object_off, | |
399 | uint64_t len, const ::SnapContext &snapc, const char *trace_name, | |
400 | const ZTracer::Trace &parent_trace, Context *completion) | |
401 | : ObjectRequest<I>(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, | |
402 | trace_name, parent_trace, completion), | |
403 | m_snap_seq(snapc.seq.val) | |
7c673cae FG |
404 | { |
405 | m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
7c673cae | 406 | |
b32b8144 FG |
407 | { |
408 | RWLock::RLocker snap_locker(ictx->snap_lock); | |
409 | RWLock::RLocker parent_locker(ictx->parent_lock); | |
410 | this->compute_parent_extents(&m_parent_extents); | |
7c673cae | 411 | } |
7c673cae | 412 | |
b32b8144 FG |
413 | if (this->m_object_off == 0 && |
414 | this->m_object_len == ictx->get_object_size()) { | |
415 | m_full_object = true; | |
416 | } | |
7c673cae | 417 | |
b32b8144 FG |
418 | if (!this->has_parent() || |
419 | (m_full_object && m_snaps.empty() && !is_post_copyup_write_required())) { | |
420 | this->m_copyup_enabled = false; | |
7c673cae | 421 | } |
b32b8144 | 422 | } |
7c673cae | 423 | |
b32b8144 FG |
424 | template <typename I> |
425 | void AbstractObjectWriteRequest<I>::add_write_hint( | |
426 | librados::ObjectWriteOperation *wr) { | |
427 | I *image_ctx = this->m_ictx; | |
428 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
429 | if (image_ctx->object_map == nullptr || !this->m_object_may_exist) { | |
430 | ObjectRequest<I>::add_write_hint(*image_ctx, wr); | |
431 | } | |
7c673cae FG |
432 | } |
433 | ||
b32b8144 FG |
434 | template <typename I> |
435 | void AbstractObjectWriteRequest<I>::send() { | |
436 | I *image_ctx = this->m_ictx; | |
437 | ldout(image_ctx->cct, 20) << this->get_op_type() << " " << this->m_oid << " " | |
438 | << this->m_object_off << "~" << this->m_object_len | |
439 | << dendl; | |
7c673cae | 440 | { |
b32b8144 FG |
441 | RWLock::RLocker snap_lock(image_ctx->snap_lock); |
442 | if (image_ctx->object_map == nullptr) { | |
443 | m_object_may_exist = true; | |
7c673cae FG |
444 | } else { |
445 | // should have been flushed prior to releasing lock | |
b32b8144 FG |
446 | assert(image_ctx->exclusive_lock->is_lock_owner()); |
447 | m_object_may_exist = image_ctx->object_map->object_may_exist( | |
448 | this->m_object_no); | |
7c673cae FG |
449 | } |
450 | } | |
451 | ||
b32b8144 FG |
452 | if (!m_object_may_exist && is_no_op_for_nonexistent_object()) { |
453 | ldout(image_ctx->cct, 20) << "skipping no-op on nonexistent object" | |
454 | << dendl; | |
455 | this->async_finish(0); | |
456 | return; | |
7c673cae FG |
457 | } |
458 | ||
b32b8144 | 459 | pre_write_object_map_update(); |
7c673cae FG |
460 | } |
461 | ||
b32b8144 FG |
462 | template <typename I> |
463 | void AbstractObjectWriteRequest<I>::pre_write_object_map_update() { | |
464 | I *image_ctx = this->m_ictx; | |
7c673cae | 465 | |
b32b8144 FG |
466 | image_ctx->snap_lock.get_read(); |
467 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled()) { | |
468 | image_ctx->snap_lock.put_read(); | |
469 | write_object(); | |
470 | return; | |
7c673cae FG |
471 | } |
472 | ||
b32b8144 FG |
473 | if (!m_object_may_exist && m_copyup_enabled) { |
474 | // optimization: copyup required | |
475 | image_ctx->snap_lock.put_read(); | |
476 | copyup(); | |
477 | return; | |
478 | } | |
7c673cae | 479 | |
b32b8144 FG |
480 | uint8_t new_state = this->get_pre_write_object_map_state(); |
481 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
482 | << "~" << this->m_object_len << dendl; | |
7c673cae | 483 | |
b32b8144 FG |
484 | image_ctx->object_map_lock.get_write(); |
485 | if (image_ctx->object_map->template aio_update< | |
486 | AbstractObjectWriteRequest<I>, | |
487 | &AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update>( | |
488 | CEPH_NOSNAP, this->m_object_no, new_state, {}, this->m_trace, this)) { | |
489 | image_ctx->object_map_lock.put_write(); | |
490 | image_ctx->snap_lock.put_read(); | |
491 | return; | |
7c673cae FG |
492 | } |
493 | ||
b32b8144 FG |
494 | image_ctx->object_map_lock.put_write(); |
495 | image_ctx->snap_lock.put_read(); | |
496 | write_object(); | |
7c673cae FG |
497 | } |
498 | ||
b32b8144 FG |
499 | template <typename I> |
500 | void AbstractObjectWriteRequest<I>::handle_pre_write_object_map_update(int r) { | |
501 | I *image_ctx = this->m_ictx; | |
502 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
7c673cae | 503 | |
b32b8144 FG |
504 | assert(r == 0); |
505 | write_object(); | |
7c673cae FG |
506 | } |
507 | ||
b32b8144 FG |
508 | template <typename I> |
509 | void AbstractObjectWriteRequest<I>::write_object() { | |
510 | I *image_ctx = this->m_ictx; | |
511 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 512 | |
b32b8144 FG |
513 | librados::ObjectWriteOperation write; |
514 | if (m_copyup_enabled) { | |
515 | ldout(image_ctx->cct, 20) << "guarding write" << dendl; | |
516 | write.assert_exists(); | |
7c673cae FG |
517 | } |
518 | ||
b32b8144 FG |
519 | add_write_hint(&write); |
520 | add_write_ops(&write); | |
521 | assert(write.size() != 0); | |
7c673cae | 522 | |
b32b8144 FG |
523 | librados::AioCompletion *rados_completion = util::create_rados_callback< |
524 | AbstractObjectWriteRequest<I>, | |
525 | &AbstractObjectWriteRequest<I>::handle_write_object>(this); | |
526 | int r = image_ctx->data_ctx.aio_operate( | |
527 | this->m_oid, rados_completion, &write, m_snap_seq, m_snaps, | |
31f18b77 | 528 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); |
7c673cae FG |
529 | assert(r == 0); |
530 | rados_completion->release(); | |
531 | } | |
7c673cae | 532 | |
b32b8144 FG |
533 | template <typename I> |
534 | void AbstractObjectWriteRequest<I>::handle_write_object(int r) { | |
535 | I *image_ctx = this->m_ictx; | |
536 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
537 | ||
538 | r = filter_write_result(r); | |
539 | if (r == -ENOENT) { | |
540 | if (m_copyup_enabled) { | |
541 | copyup(); | |
542 | return; | |
543 | } | |
544 | } else if (r == -EILSEQ) { | |
545 | ldout(image_ctx->cct, 10) << "failed to write object" << dendl; | |
546 | this->finish(r); | |
547 | return; | |
548 | } else if (r < 0) { | |
549 | lderr(image_ctx->cct) << "failed to write object: " << cpp_strerror(r) | |
550 | << dendl; | |
551 | this->finish(r); | |
552 | return; | |
7c673cae FG |
553 | } |
554 | ||
b32b8144 | 555 | post_write_object_map_update(); |
7c673cae FG |
556 | } |
557 | ||
b32b8144 FG |
558 | template <typename I> |
559 | void AbstractObjectWriteRequest<I>::copyup() { | |
560 | I *image_ctx = this->m_ictx; | |
561 | ldout(image_ctx->cct, 20) << dendl; | |
7c673cae | 562 | |
b32b8144 FG |
563 | assert(!m_copyup_in_progress); |
564 | m_copyup_in_progress = true; | |
7c673cae | 565 | |
b32b8144 FG |
566 | image_ctx->copyup_list_lock.Lock(); |
567 | auto it = image_ctx->copyup_list.find(this->m_object_no); | |
568 | if (it == image_ctx->copyup_list.end()) { | |
569 | auto new_req = CopyupRequest<I>::create( | |
570 | image_ctx, this->m_oid, this->m_object_no, | |
571 | std::move(this->m_parent_extents), this->m_trace); | |
572 | this->m_parent_extents.clear(); | |
573 | ||
574 | // make sure to wait on this CopyupRequest | |
575 | new_req->append_request(this); | |
576 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
577 | ||
578 | image_ctx->copyup_list_lock.Unlock(); | |
579 | new_req->send(); | |
31f18b77 | 580 | } else { |
b32b8144 FG |
581 | it->second->append_request(this); |
582 | image_ctx->copyup_list_lock.Unlock(); | |
31f18b77 | 583 | } |
7c673cae FG |
584 | } |
585 | ||
b32b8144 FG |
586 | template <typename I> |
587 | void AbstractObjectWriteRequest<I>::handle_copyup(int r) { | |
588 | I *image_ctx = this->m_ictx; | |
589 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
590 | ||
591 | assert(m_copyup_in_progress); | |
592 | m_copyup_in_progress = false; | |
593 | ||
594 | if (r < 0) { | |
595 | lderr(image_ctx->cct) << "failed to copyup object: " << cpp_strerror(r) | |
596 | << dendl; | |
597 | this->finish(r); | |
598 | return; | |
31f18b77 | 599 | } |
31f18b77 | 600 | |
b32b8144 FG |
601 | if (is_post_copyup_write_required()) { |
602 | write_object(); | |
603 | return; | |
7c673cae | 604 | } |
b32b8144 FG |
605 | |
606 | post_write_object_map_update(); | |
7c673cae FG |
607 | } |
608 | ||
b32b8144 FG |
609 | template <typename I> |
610 | void AbstractObjectWriteRequest<I>::post_write_object_map_update() { | |
611 | I *image_ctx = this->m_ictx; | |
612 | ||
613 | image_ctx->snap_lock.get_read(); | |
614 | if (image_ctx->object_map == nullptr || !is_object_map_update_enabled() || | |
615 | !is_non_existent_post_write_object_map_state()) { | |
616 | image_ctx->snap_lock.put_read(); | |
617 | this->finish(0); | |
618 | return; | |
7c673cae FG |
619 | } |
620 | ||
b32b8144 | 621 | ldout(image_ctx->cct, 20) << dendl; |
7c673cae | 622 | |
b32b8144 FG |
623 | // should have been flushed prior to releasing lock |
624 | assert(image_ctx->exclusive_lock->is_lock_owner()); | |
625 | image_ctx->object_map_lock.get_write(); | |
626 | if (image_ctx->object_map->template aio_update< | |
627 | AbstractObjectWriteRequest<I>, | |
628 | &AbstractObjectWriteRequest<I>::handle_post_write_object_map_update>( | |
629 | CEPH_NOSNAP, this->m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, | |
630 | this->m_trace, this)) { | |
631 | image_ctx->object_map_lock.put_write(); | |
632 | image_ctx->snap_lock.put_read(); | |
633 | return; | |
7c673cae FG |
634 | } |
635 | ||
b32b8144 FG |
636 | image_ctx->object_map_lock.put_write(); |
637 | image_ctx->snap_lock.put_read(); | |
638 | this->finish(0); | |
7c673cae FG |
639 | } |
640 | ||
b32b8144 FG |
641 | template <typename I> |
642 | void AbstractObjectWriteRequest<I>::handle_post_write_object_map_update(int r) { | |
643 | I *image_ctx = this->m_ictx; | |
644 | ldout(image_ctx->cct, 20) << "r=" << r << dendl; | |
c07f9fc5 | 645 | |
b32b8144 FG |
646 | assert(r == 0); |
647 | this->finish(0); | |
648 | } | |
c07f9fc5 | 649 | |
b32b8144 FG |
650 | template <typename I> |
651 | void ObjectWriteRequest<I>::add_write_ops(librados::ObjectWriteOperation *wr) { | |
652 | if (this->m_full_object) { | |
653 | wr->write_full(m_write_data); | |
c07f9fc5 | 654 | } else { |
b32b8144 | 655 | wr->write(this->m_object_off, m_write_data); |
c07f9fc5 FG |
656 | } |
657 | wr->set_op_flags2(m_op_flags); | |
658 | } | |
659 | ||
b32b8144 FG |
660 | template <typename I> |
661 | void ObjectWriteSameRequest<I>::add_write_ops( | |
662 | librados::ObjectWriteOperation *wr) { | |
663 | wr->writesame(this->m_object_off, this->m_object_len, m_write_data); | |
664 | wr->set_op_flags2(m_op_flags); | |
c07f9fc5 FG |
665 | } |
666 | ||
b32b8144 FG |
667 | template <typename I> |
668 | void ObjectCompareAndWriteRequest<I>::add_write_ops( | |
669 | librados::ObjectWriteOperation *wr) { | |
670 | wr->cmpext(this->m_object_off, m_cmp_bl, nullptr); | |
c07f9fc5 | 671 | |
b32b8144 FG |
672 | if (this->m_full_object) { |
673 | wr->write_full(m_write_bl); | |
674 | } else { | |
675 | wr->write(this->m_object_off, m_write_bl); | |
676 | } | |
677 | wr->set_op_flags2(m_op_flags); | |
678 | } | |
c07f9fc5 | 679 | |
b32b8144 FG |
680 | template <typename I> |
681 | int ObjectCompareAndWriteRequest<I>::filter_write_result(int r) const { | |
682 | if (r <= -MAX_ERRNO) { | |
683 | I *image_ctx = this->m_ictx; | |
684 | Extents image_extents; | |
685 | ||
686 | // object extent compare mismatch | |
687 | uint64_t offset = -MAX_ERRNO - r; | |
688 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
689 | this->m_object_no, offset, this->m_object_len, | |
690 | image_extents); | |
691 | assert(image_extents.size() == 1); | |
692 | ||
693 | if (m_mismatch_offset) { | |
694 | *m_mismatch_offset = image_extents[0].first; | |
c07f9fc5 | 695 | } |
b32b8144 | 696 | r = -EILSEQ; |
c07f9fc5 | 697 | } |
b32b8144 | 698 | return r; |
c07f9fc5 FG |
699 | } |
700 | ||
7c673cae FG |
701 | } // namespace io |
702 | } // namespace librbd | |
703 | ||
704 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
705 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
b32b8144 FG |
706 | template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>; |
707 | template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>; | |
708 | template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>; | |
709 | template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>; | |
710 | template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>; |