]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | #include "common/RWLock.h" | |
10 | #include "common/WorkQueue.h" | |
11 | #include "include/Context.h" | |
c07f9fc5 | 12 | #include "include/err.h" |
7c673cae FG |
13 | |
14 | #include "librbd/ExclusiveLock.h" | |
15 | #include "librbd/ImageCtx.h" | |
16 | #include "librbd/ObjectMap.h" | |
17 | #include "librbd/Utils.h" | |
18 | #include "librbd/io/AioCompletion.h" | |
19 | #include "librbd/io/CopyupRequest.h" | |
20 | #include "librbd/io/ImageRequest.h" | |
21 | #include "librbd/io/ReadResult.h" | |
22 | ||
23 | #include <boost/bind.hpp> | |
24 | #include <boost/optional.hpp> | |
25 | ||
26 | #define dout_subsys ceph_subsys_rbd | |
27 | #undef dout_prefix | |
28 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ | |
29 | << " " << __func__ << ": " | |
30 | ||
31 | namespace librbd { | |
32 | namespace io { | |
33 | ||
34 | template <typename I> | |
35 | ObjectRequest<I>* | |
36 | ObjectRequest<I>::create_remove(I *ictx, const std::string &oid, | |
37 | uint64_t object_no, | |
38 | const ::SnapContext &snapc, | |
31f18b77 | 39 | const ZTracer::Trace &parent_trace, |
7c673cae FG |
40 | Context *completion) { |
41 | return new ObjectRemoveRequest(util::get_image_ctx(ictx), oid, object_no, | |
31f18b77 | 42 | snapc, parent_trace, completion); |
7c673cae FG |
43 | } |
44 | ||
45 | template <typename I> | |
46 | ObjectRequest<I>* | |
47 | ObjectRequest<I>::create_truncate(I *ictx, const std::string &oid, | |
48 | uint64_t object_no, uint64_t object_off, | |
49 | const ::SnapContext &snapc, | |
31f18b77 FG |
50 | const ZTracer::Trace &parent_trace, |
51 | Context *completion) { | |
7c673cae | 52 | return new ObjectTruncateRequest(util::get_image_ctx(ictx), oid, object_no, |
31f18b77 | 53 | object_off, snapc, parent_trace, completion); |
7c673cae FG |
54 | } |
55 | ||
56 | template <typename I> | |
57 | ObjectRequest<I>* | |
58 | ObjectRequest<I>::create_write(I *ictx, const std::string &oid, | |
59 | uint64_t object_no, uint64_t object_off, | |
60 | const ceph::bufferlist &data, | |
31f18b77 FG |
61 | const ::SnapContext &snapc, int op_flags, |
62 | const ZTracer::Trace &parent_trace, | |
63 | Context *completion) { | |
7c673cae | 64 | return new ObjectWriteRequest(util::get_image_ctx(ictx), oid, object_no, |
31f18b77 FG |
65 | object_off, data, snapc, op_flags, parent_trace, |
66 | completion); | |
7c673cae FG |
67 | } |
68 | ||
69 | template <typename I> | |
70 | ObjectRequest<I>* | |
71 | ObjectRequest<I>::create_zero(I *ictx, const std::string &oid, | |
72 | uint64_t object_no, uint64_t object_off, | |
73 | uint64_t object_len, | |
74 | const ::SnapContext &snapc, | |
31f18b77 | 75 | const ZTracer::Trace &parent_trace, |
7c673cae FG |
76 | Context *completion) { |
77 | return new ObjectZeroRequest(util::get_image_ctx(ictx), oid, object_no, | |
31f18b77 FG |
78 | object_off, object_len, snapc, parent_trace, |
79 | completion); | |
7c673cae FG |
80 | } |
81 | ||
82 | template <typename I> | |
83 | ObjectRequest<I>* | |
84 | ObjectRequest<I>::create_writesame(I *ictx, const std::string &oid, | |
85 | uint64_t object_no, uint64_t object_off, | |
86 | uint64_t object_len, | |
87 | const ceph::bufferlist &data, | |
31f18b77 FG |
88 | const ::SnapContext &snapc, int op_flags, |
89 | const ZTracer::Trace &parent_trace, | |
90 | Context *completion) { | |
7c673cae FG |
91 | return new ObjectWriteSameRequest(util::get_image_ctx(ictx), oid, object_no, |
92 | object_off, object_len, data, snapc, | |
31f18b77 | 93 | op_flags, parent_trace, completion); |
7c673cae FG |
94 | } |
95 | ||
c07f9fc5 FG |
96 | template <typename I> |
97 | ObjectRequest<I>* | |
98 | ObjectRequest<I>::create_compare_and_write(I *ictx, const std::string &oid, | |
99 | uint64_t object_no, uint64_t object_off, | |
100 | const ceph::bufferlist &cmp_data, | |
101 | const ceph::bufferlist &write_data, | |
102 | const ::SnapContext &snapc, | |
103 | uint64_t *mismatch_offset, | |
104 | int op_flags, | |
105 | const ZTracer::Trace &parent_trace, | |
106 | Context *completion) { | |
107 | return new ObjectCompareAndWriteRequest(util::get_image_ctx(ictx), oid, | |
108 | object_no, object_off, cmp_data, | |
109 | write_data, snapc, mismatch_offset, | |
110 | op_flags, parent_trace, completion); | |
111 | } | |
112 | ||
7c673cae FG |
113 | template <typename I> |
114 | ObjectRequest<I>::ObjectRequest(ImageCtx *ictx, const std::string &oid, | |
115 | uint64_t objectno, uint64_t off, | |
116 | uint64_t len, librados::snap_t snap_id, | |
31f18b77 FG |
117 | bool hide_enoent, const char *trace_name, |
118 | const ZTracer::Trace &trace, | |
119 | Context *completion) | |
7c673cae FG |
120 | : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), |
121 | m_object_len(len), m_snap_id(snap_id), m_completion(completion), | |
31f18b77 FG |
122 | m_hide_enoent(hide_enoent), |
123 | m_trace(util::create_trace(*ictx, "", trace)) { | |
124 | if (m_trace.valid()) { | |
125 | m_trace.copy_name(trace_name + std::string(" ") + oid); | |
126 | m_trace.event("start"); | |
127 | } | |
7c673cae FG |
128 | |
129 | Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, | |
130 | 0, m_ictx->layout.object_size, m_parent_extents); | |
131 | ||
132 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
133 | RWLock::RLocker parent_locker(m_ictx->parent_lock); | |
134 | compute_parent_extents(); | |
135 | } | |
136 | ||
137 | template <typename I> | |
138 | void ObjectRequest<I>::complete(int r) | |
139 | { | |
140 | if (should_complete(r)) { | |
141 | ldout(m_ictx->cct, 20) << dendl; | |
142 | if (m_hide_enoent && r == -ENOENT) { | |
143 | r = 0; | |
144 | } | |
145 | m_completion->complete(r); | |
146 | delete this; | |
147 | } | |
148 | } | |
149 | ||
150 | template <typename I> | |
151 | bool ObjectRequest<I>::compute_parent_extents() { | |
152 | assert(m_ictx->snap_lock.is_locked()); | |
153 | assert(m_ictx->parent_lock.is_locked()); | |
154 | ||
155 | uint64_t parent_overlap; | |
156 | int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); | |
157 | if (r < 0) { | |
158 | // NOTE: it's possible for a snapshot to be deleted while we are | |
159 | // still reading from it | |
160 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
161 | << cpp_strerror(r) | |
162 | << dendl; | |
163 | m_has_parent = false; | |
164 | m_parent_extents.clear(); | |
165 | return false; | |
166 | } | |
167 | ||
168 | uint64_t object_overlap = m_ictx->prune_parent_extents( | |
169 | m_parent_extents, parent_overlap); | |
170 | if (object_overlap > 0) { | |
171 | ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " " | |
172 | << "extents " << m_parent_extents << dendl; | |
173 | m_has_parent = !m_parent_extents.empty(); | |
174 | return true; | |
175 | } | |
176 | return false; | |
177 | } | |
178 | ||
179 | static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { | |
180 | assert(ictx->snap_lock.is_locked()); | |
181 | return (ictx->clone_copy_on_read && | |
182 | !ictx->read_only && snap_id == CEPH_NOSNAP && | |
183 | (ictx->exclusive_lock == nullptr || | |
184 | ictx->exclusive_lock->is_lock_owner())); | |
185 | } | |
186 | ||
187 | /** read **/ | |
188 | ||
189 | template <typename I> | |
190 | ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid, | |
191 | uint64_t objectno, uint64_t offset, | |
192 | uint64_t len, Extents& be, | |
193 | librados::snap_t snap_id, bool sparse, | |
31f18b77 FG |
194 | int op_flags, |
195 | const ZTracer::Trace &parent_trace, | |
196 | Context *completion) | |
7c673cae | 197 | : ObjectRequest<I>(util::get_image_ctx(ictx), oid, objectno, offset, len, |
31f18b77 | 198 | snap_id, false, "read", parent_trace, completion), |
7c673cae FG |
199 | m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), |
200 | m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) { | |
201 | guard_read(); | |
202 | } | |
203 | ||
204 | template <typename I> | |
205 | void ObjectReadRequest<I>::guard_read() | |
206 | { | |
207 | ImageCtx *image_ctx = this->m_ictx; | |
208 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
209 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
210 | ||
211 | if (this->has_parent()) { | |
212 | ldout(image_ctx->cct, 20) << "guarding read" << dendl; | |
213 | m_state = LIBRBD_AIO_READ_GUARD; | |
214 | } | |
215 | } | |
216 | ||
217 | template <typename I> | |
218 | bool ObjectReadRequest<I>::should_complete(int r) | |
219 | { | |
220 | ImageCtx *image_ctx = this->m_ictx; | |
221 | ldout(image_ctx->cct, 20) << this->m_oid << " " | |
222 | << this->m_object_off << "~" << this->m_object_len | |
223 | << " r = " << r << dendl; | |
224 | ||
225 | bool finished = true; | |
226 | ||
227 | switch (m_state) { | |
228 | case LIBRBD_AIO_READ_GUARD: | |
229 | ldout(image_ctx->cct, 20) << "READ_CHECK_GUARD" << dendl; | |
230 | ||
231 | // This is the step to read from parent | |
232 | if (!m_tried_parent && r == -ENOENT) { | |
233 | { | |
234 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
235 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
236 | if (image_ctx->parent == NULL) { | |
237 | ldout(image_ctx->cct, 20) << "parent is gone; do nothing" << dendl; | |
7c673cae FG |
238 | break; |
239 | } | |
240 | ||
241 | // calculate reverse mapping onto the image | |
242 | vector<pair<uint64_t,uint64_t> > parent_extents; | |
243 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
244 | this->m_object_no, this->m_object_off, | |
245 | this->m_object_len, parent_extents); | |
246 | ||
247 | uint64_t parent_overlap = 0; | |
248 | uint64_t object_overlap = 0; | |
249 | r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); | |
250 | if (r == 0) { | |
251 | object_overlap = image_ctx->prune_parent_extents(parent_extents, | |
252 | parent_overlap); | |
253 | } | |
254 | ||
255 | if (object_overlap > 0) { | |
256 | m_tried_parent = true; | |
257 | if (is_copy_on_read(image_ctx, this->m_snap_id)) { | |
258 | m_state = LIBRBD_AIO_READ_COPYUP; | |
259 | } | |
260 | ||
261 | read_from_parent(std::move(parent_extents)); | |
262 | finished = false; | |
263 | } | |
264 | } | |
265 | } | |
266 | break; | |
267 | case LIBRBD_AIO_READ_COPYUP: | |
268 | ldout(image_ctx->cct, 20) << "READ_COPYUP" << dendl; | |
269 | // This is the extra step for copy-on-read: kick off an asynchronous copyup. | |
270 | // It is different from copy-on-write as asynchronous copyup will finish | |
271 | // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. | |
272 | ||
273 | assert(m_tried_parent); | |
274 | if (r > 0) { | |
275 | // If read entire object from parent success and CoR is possible, kick | |
276 | // off a asynchronous copyup. This approach minimizes the latency | |
277 | // impact. | |
278 | send_copyup(); | |
279 | } | |
280 | break; | |
281 | case LIBRBD_AIO_READ_FLAT: | |
282 | ldout(image_ctx->cct, 20) << "READ_FLAT" << dendl; | |
283 | // The read content should be deposit in m_read_data | |
284 | break; | |
285 | default: | |
286 | lderr(image_ctx->cct) << "invalid request state: " << m_state << dendl; | |
287 | ceph_abort(); | |
288 | } | |
289 | ||
290 | return finished; | |
291 | } | |
292 | ||
293 | template <typename I> | |
294 | void ObjectReadRequest<I>::send() { | |
295 | ImageCtx *image_ctx = this->m_ictx; | |
296 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
297 | << "~" << this->m_object_len | |
298 | << dendl; | |
299 | ||
300 | { | |
301 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
302 | ||
303 | // send read request to parent if the object doesn't exist locally | |
304 | if (image_ctx->object_map != nullptr && | |
305 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
306 | image_ctx->op_work_queue->queue(util::create_context_callback< | |
307 | ObjectRequest<I> >(this), -ENOENT); | |
308 | return; | |
309 | } | |
310 | } | |
311 | ||
312 | librados::ObjectReadOperation op; | |
313 | int flags = image_ctx->get_read_flags(this->m_snap_id); | |
314 | if (m_sparse) { | |
315 | op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map, | |
316 | &m_read_data, nullptr); | |
317 | } else { | |
318 | op.read(this->m_object_off, this->m_object_len, &m_read_data, nullptr); | |
319 | } | |
320 | op.set_op_flags2(m_op_flags); | |
321 | ||
322 | librados::AioCompletion *rados_completion = | |
323 | util::create_rados_callback(this); | |
31f18b77 FG |
324 | int r = image_ctx->data_ctx.aio_operate( |
325 | this->m_oid, rados_completion, &op, flags, nullptr, | |
326 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
327 | assert(r == 0); |
328 | ||
329 | rados_completion->release(); | |
330 | } | |
331 | ||
332 | template <typename I> | |
333 | void ObjectReadRequest<I>::send_copyup() | |
334 | { | |
335 | ImageCtx *image_ctx = this->m_ictx; | |
336 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
337 | << "~" << this->m_object_len << dendl; | |
338 | ||
339 | { | |
340 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
341 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
342 | if (!this->compute_parent_extents() || | |
343 | (image_ctx->exclusive_lock != nullptr && | |
344 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
345 | return; | |
346 | } | |
347 | } | |
348 | ||
349 | Mutex::Locker copyup_locker(image_ctx->copyup_list_lock); | |
350 | map<uint64_t, CopyupRequest*>::iterator it = | |
351 | image_ctx->copyup_list.find(this->m_object_no); | |
352 | if (it == image_ctx->copyup_list.end()) { | |
353 | // create and kick off a CopyupRequest | |
354 | CopyupRequest *new_req = new CopyupRequest( | |
355 | image_ctx, this->m_oid, this->m_object_no, | |
31f18b77 | 356 | std::move(this->m_parent_extents), this->m_trace); |
7c673cae FG |
357 | this->m_parent_extents.clear(); |
358 | ||
359 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
360 | new_req->send(); | |
361 | } | |
362 | } | |
363 | ||
364 | template <typename I> | |
365 | void ObjectReadRequest<I>::read_from_parent(Extents&& parent_extents) | |
366 | { | |
367 | ImageCtx *image_ctx = this->m_ictx; | |
368 | AioCompletion *parent_completion = AioCompletion::create_and_start< | |
369 | ObjectRequest<I> >(this, image_ctx, AIO_TYPE_READ); | |
370 | ||
371 | ldout(image_ctx->cct, 20) << "parent completion " << parent_completion | |
372 | << " extents " << parent_extents << dendl; | |
373 | ImageRequest<>::aio_read(image_ctx->parent, parent_completion, | |
374 | std::move(parent_extents), | |
31f18b77 | 375 | ReadResult{&m_read_data}, 0, this->m_trace); |
7c673cae FG |
376 | } |
377 | ||
378 | /** write **/ | |
379 | ||
380 | AbstractObjectWriteRequest::AbstractObjectWriteRequest(ImageCtx *ictx, | |
381 | const std::string &oid, | |
382 | uint64_t object_no, | |
383 | uint64_t object_off, | |
384 | uint64_t len, | |
385 | const ::SnapContext &snapc, | |
31f18b77 FG |
386 | bool hide_enoent, |
387 | const char *trace_name, | |
388 | const ZTracer::Trace &parent_trace, | |
389 | Context *completion) | |
7c673cae | 390 | : ObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, |
31f18b77 | 391 | hide_enoent, trace_name, parent_trace, completion), |
7c673cae FG |
392 | m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) |
393 | { | |
394 | m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
395 | } | |
396 | ||
397 | void AbstractObjectWriteRequest::guard_write() | |
398 | { | |
399 | if (has_parent()) { | |
400 | m_state = LIBRBD_AIO_WRITE_GUARD; | |
401 | m_write.assert_exists(); | |
402 | ldout(m_ictx->cct, 20) << "guarding write" << dendl; | |
403 | } | |
404 | } | |
405 | ||
406 | bool AbstractObjectWriteRequest::should_complete(int r) | |
407 | { | |
408 | ldout(m_ictx->cct, 20) << get_op_type() << m_oid << " " | |
409 | << m_object_off << "~" << m_object_len | |
410 | << " r = " << r << dendl; | |
411 | ||
412 | bool finished = true; | |
413 | switch (m_state) { | |
414 | case LIBRBD_AIO_WRITE_PRE: | |
415 | ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; | |
416 | if (r < 0) { | |
417 | return true; | |
418 | } | |
419 | ||
420 | send_write_op(); | |
421 | finished = false; | |
422 | break; | |
423 | ||
424 | case LIBRBD_AIO_WRITE_POST: | |
425 | ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; | |
426 | finished = true; | |
427 | break; | |
428 | ||
429 | case LIBRBD_AIO_WRITE_GUARD: | |
430 | ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; | |
431 | ||
432 | if (r == -ENOENT) { | |
433 | handle_write_guard(); | |
434 | finished = false; | |
435 | break; | |
436 | } else if (r < 0) { | |
437 | // pass the error code to the finish context | |
438 | m_state = LIBRBD_AIO_WRITE_ERROR; | |
439 | complete(r); | |
440 | finished = false; | |
441 | break; | |
442 | } | |
443 | ||
444 | finished = send_post_object_map_update(); | |
445 | break; | |
446 | ||
447 | case LIBRBD_AIO_WRITE_COPYUP: | |
448 | ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; | |
449 | if (r < 0) { | |
450 | m_state = LIBRBD_AIO_WRITE_ERROR; | |
451 | complete(r); | |
452 | finished = false; | |
453 | } else { | |
454 | finished = send_post_object_map_update(); | |
455 | } | |
456 | break; | |
457 | ||
458 | case LIBRBD_AIO_WRITE_FLAT: | |
459 | ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; | |
460 | ||
461 | finished = send_post_object_map_update(); | |
462 | break; | |
463 | ||
464 | case LIBRBD_AIO_WRITE_ERROR: | |
465 | assert(r < 0); | |
466 | lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl; | |
467 | break; | |
468 | ||
469 | default: | |
470 | lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; | |
471 | ceph_abort(); | |
472 | } | |
473 | ||
474 | return finished; | |
475 | } | |
476 | ||
477 | void AbstractObjectWriteRequest::send() { | |
478 | ldout(m_ictx->cct, 20) << get_op_type() << " " << m_oid << " " | |
479 | << m_object_off << "~" << m_object_len << dendl; | |
480 | { | |
481 | RWLock::RLocker snap_lock(m_ictx->snap_lock); | |
482 | if (m_ictx->object_map == nullptr) { | |
483 | m_object_exist = true; | |
484 | } else { | |
485 | // should have been flushed prior to releasing lock | |
486 | assert(m_ictx->exclusive_lock->is_lock_owner()); | |
487 | m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); | |
488 | } | |
489 | } | |
490 | ||
491 | send_write(); | |
492 | } | |
493 | ||
494 | void AbstractObjectWriteRequest::send_pre_object_map_update() { | |
495 | ldout(m_ictx->cct, 20) << dendl; | |
496 | ||
497 | { | |
498 | RWLock::RLocker snap_lock(m_ictx->snap_lock); | |
499 | if (m_ictx->object_map != nullptr) { | |
500 | uint8_t new_state; | |
501 | pre_object_map_update(&new_state); | |
502 | RWLock::WLocker object_map_locker(m_ictx->object_map_lock); | |
503 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
504 | << "~" << m_object_len << dendl; | |
505 | m_state = LIBRBD_AIO_WRITE_PRE; | |
506 | ||
507 | if (m_ictx->object_map->aio_update<ObjectRequest>( | |
31f18b77 | 508 | CEPH_NOSNAP, m_object_no, new_state, {}, this->m_trace, this)) { |
7c673cae FG |
509 | return; |
510 | } | |
511 | } | |
512 | } | |
513 | ||
514 | send_write_op(); | |
515 | } | |
516 | ||
517 | bool AbstractObjectWriteRequest::send_post_object_map_update() { | |
518 | ldout(m_ictx->cct, 20) << dendl; | |
519 | ||
520 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
521 | if (m_ictx->object_map == nullptr || !post_object_map_update()) { | |
522 | return true; | |
523 | } | |
524 | ||
525 | // should have been flushed prior to releasing lock | |
526 | assert(m_ictx->exclusive_lock->is_lock_owner()); | |
527 | ||
528 | RWLock::WLocker object_map_locker(m_ictx->object_map_lock); | |
529 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
530 | << "~" << m_object_len << dendl; | |
531 | m_state = LIBRBD_AIO_WRITE_POST; | |
532 | ||
533 | if (m_ictx->object_map->aio_update<ObjectRequest>( | |
31f18b77 FG |
534 | CEPH_NOSNAP, m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, |
535 | this->m_trace, this)) { | |
7c673cae FG |
536 | return false; |
537 | } | |
538 | ||
539 | return true; | |
540 | } | |
541 | ||
542 | void AbstractObjectWriteRequest::send_write() { | |
543 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
544 | << " object exist " << m_object_exist << dendl; | |
545 | ||
546 | if (!m_object_exist && has_parent()) { | |
547 | m_state = LIBRBD_AIO_WRITE_GUARD; | |
548 | handle_write_guard(); | |
549 | } else { | |
550 | send_pre_object_map_update(); | |
551 | } | |
552 | } | |
553 | ||
554 | void AbstractObjectWriteRequest::send_copyup() | |
555 | { | |
556 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
557 | << "~" << m_object_len << dendl; | |
558 | m_state = LIBRBD_AIO_WRITE_COPYUP; | |
559 | ||
560 | m_ictx->copyup_list_lock.Lock(); | |
561 | map<uint64_t, CopyupRequest*>::iterator it = | |
562 | m_ictx->copyup_list.find(m_object_no); | |
563 | if (it == m_ictx->copyup_list.end()) { | |
564 | CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, | |
565 | m_object_no, | |
31f18b77 FG |
566 | std::move(m_parent_extents), |
567 | this->m_trace); | |
7c673cae FG |
568 | m_parent_extents.clear(); |
569 | ||
570 | // make sure to wait on this CopyupRequest | |
571 | new_req->append_request(this); | |
572 | m_ictx->copyup_list[m_object_no] = new_req; | |
573 | ||
574 | m_ictx->copyup_list_lock.Unlock(); | |
575 | new_req->send(); | |
576 | } else { | |
577 | it->second->append_request(this); | |
578 | m_ictx->copyup_list_lock.Unlock(); | |
579 | } | |
580 | } | |
581 | void AbstractObjectWriteRequest::send_write_op() | |
582 | { | |
583 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
584 | if (m_guard) { | |
585 | guard_write(); | |
586 | } | |
587 | ||
31f18b77 | 588 | add_write_ops(&m_write, true); |
7c673cae FG |
589 | assert(m_write.size() != 0); |
590 | ||
591 | librados::AioCompletion *rados_completion = | |
592 | util::create_rados_callback(this); | |
31f18b77 FG |
593 | int r = m_ictx->data_ctx.aio_operate( |
594 | m_oid, rados_completion, &m_write, m_snap_seq, m_snaps, | |
595 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
596 | assert(r == 0); |
597 | rados_completion->release(); | |
598 | } | |
599 | void AbstractObjectWriteRequest::handle_write_guard() | |
600 | { | |
601 | bool has_parent; | |
602 | { | |
603 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
604 | RWLock::RLocker parent_locker(m_ictx->parent_lock); | |
605 | has_parent = compute_parent_extents(); | |
606 | } | |
607 | // If parent still exists, overlap might also have changed. | |
608 | if (has_parent) { | |
609 | send_copyup(); | |
610 | } else { | |
611 | // parent may have disappeared -- send original write again | |
612 | ldout(m_ictx->cct, 20) << "should_complete(" << this | |
613 | << "): parent overlap now 0" << dendl; | |
614 | send_write(); | |
615 | } | |
616 | } | |
617 | ||
31f18b77 FG |
618 | void ObjectWriteRequest::add_write_ops(librados::ObjectWriteOperation *wr, |
619 | bool set_hints) { | |
7c673cae | 620 | RWLock::RLocker snap_locker(m_ictx->snap_lock); |
31f18b77 | 621 | if (set_hints && m_ictx->enable_alloc_hint && |
7c673cae FG |
622 | (m_ictx->object_map == nullptr || !m_object_exist)) { |
623 | wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); | |
624 | } | |
625 | ||
626 | if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { | |
627 | wr->write_full(m_write_data); | |
628 | } else { | |
629 | wr->write(m_object_off, m_write_data); | |
630 | } | |
631 | wr->set_op_flags2(m_op_flags); | |
632 | } | |
633 | ||
634 | void ObjectWriteRequest::send_write() { | |
635 | bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); | |
636 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
637 | << " object exist " << m_object_exist | |
638 | << " write_full " << write_full << dendl; | |
639 | if (write_full && !has_parent()) { | |
640 | m_guard = false; | |
641 | } | |
642 | ||
643 | AbstractObjectWriteRequest::send_write(); | |
644 | } | |
645 | ||
646 | void ObjectRemoveRequest::guard_write() { | |
647 | // do nothing to disable write guard only if deep-copyup not required | |
648 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
649 | if (!m_ictx->snaps.empty()) { | |
650 | AbstractObjectWriteRequest::guard_write(); | |
651 | } | |
652 | } | |
653 | void ObjectRemoveRequest::send_write() { | |
31f18b77 FG |
654 | ldout(m_ictx->cct, 20) << m_oid << " remove " << " object exist " |
655 | << m_object_exist << dendl; | |
656 | if (!m_object_exist && !has_parent()) { | |
657 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
658 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
659 | m_ictx->op_work_queue->queue(ctx, 0); | |
660 | } else { | |
661 | send_pre_object_map_update(); | |
662 | } | |
7c673cae FG |
663 | } |
664 | ||
665 | void ObjectTruncateRequest::send_write() { | |
31f18b77 FG |
666 | ldout(m_ictx->cct, 20) << m_oid << " truncate " << m_object_off |
667 | << " object exist " << m_object_exist << dendl; | |
668 | if (!m_object_exist && !has_parent()) { | |
669 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
670 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
671 | m_ictx->op_work_queue->queue(ctx, 0); | |
672 | } else { | |
673 | AbstractObjectWriteRequest::send_write(); | |
674 | } | |
675 | } | |
676 | ||
677 | void ObjectZeroRequest::send_write() { | |
678 | ldout(m_ictx->cct, 20) << m_oid << " zero " << m_object_off << "~" | |
679 | << m_object_len << " object exist " << m_object_exist | |
680 | << dendl; | |
681 | if (!m_object_exist && !has_parent()) { | |
7c673cae FG |
682 | m_state = LIBRBD_AIO_WRITE_FLAT; |
683 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
684 | m_ictx->op_work_queue->queue(ctx, 0); | |
685 | } else { | |
686 | AbstractObjectWriteRequest::send_write(); | |
687 | } | |
688 | } | |
689 | ||
31f18b77 FG |
690 | void ObjectWriteSameRequest::add_write_ops(librados::ObjectWriteOperation *wr, |
691 | bool set_hints) { | |
7c673cae | 692 | RWLock::RLocker snap_locker(m_ictx->snap_lock); |
31f18b77 | 693 | if (set_hints && m_ictx->enable_alloc_hint && |
7c673cae FG |
694 | (m_ictx->object_map == nullptr || !m_object_exist)) { |
695 | wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); | |
696 | } | |
697 | ||
698 | wr->writesame(m_object_off, m_object_len, m_write_data); | |
699 | wr->set_op_flags2(m_op_flags); | |
700 | } | |
701 | ||
702 | void ObjectWriteSameRequest::send_write() { | |
703 | bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); | |
704 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
705 | << " write_full " << write_full << dendl; | |
706 | if (write_full && !has_parent()) { | |
707 | m_guard = false; | |
708 | } | |
709 | ||
710 | AbstractObjectWriteRequest::send_write(); | |
711 | } | |
712 | ||
c07f9fc5 FG |
713 | void ObjectCompareAndWriteRequest::add_write_ops(librados::ObjectWriteOperation *wr, |
714 | bool set_hints) { | |
715 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
716 | ||
717 | if (set_hints && m_ictx->enable_alloc_hint && | |
718 | (m_ictx->object_map == nullptr || !m_object_exist)) { | |
719 | wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); | |
720 | } | |
721 | ||
722 | // add cmpext ops | |
723 | wr->cmpext(m_object_off, m_cmp_bl, nullptr); | |
724 | ||
725 | if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { | |
726 | wr->write_full(m_write_bl); | |
727 | } else { | |
728 | wr->write(m_object_off, m_write_bl); | |
729 | } | |
730 | wr->set_op_flags2(m_op_flags); | |
731 | } | |
732 | ||
733 | void ObjectCompareAndWriteRequest::send_write() { | |
734 | bool write_full = (m_object_off == 0 && | |
735 | m_object_len == m_ictx->get_object_size()); | |
736 | ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " " | |
737 | << m_object_off << "~" << m_object_len | |
738 | << " object exist " << m_object_exist | |
739 | << " write_full " << write_full << dendl; | |
740 | if (write_full && !has_parent()) { | |
741 | m_guard = false; | |
742 | } | |
743 | ||
744 | AbstractObjectWriteRequest::send_write(); | |
745 | } | |
746 | ||
747 | void ObjectCompareAndWriteRequest::complete(int r) | |
748 | { | |
749 | if (should_complete(r)) { | |
750 | ImageCtx *image_ctx = this->m_ictx; | |
751 | ldout(m_ictx->cct, 20) << "complete " << this << dendl; | |
752 | ||
753 | if (this->m_hide_enoent && r == -ENOENT) { | |
754 | r = 0; | |
755 | } | |
756 | ||
757 | vector<pair<uint64_t,uint64_t> > file_extents; | |
758 | if (r <= -MAX_ERRNO) { | |
759 | // object extent compare mismatch | |
760 | uint64_t offset = -MAX_ERRNO - r; | |
761 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
762 | this->m_object_no, offset, this->m_object_len, | |
763 | file_extents); | |
764 | ||
765 | assert(file_extents.size() == 1); | |
766 | ||
767 | uint64_t mismatch_offset = file_extents[0].first; | |
768 | if (this->m_mismatch_offset) | |
769 | *this->m_mismatch_offset = mismatch_offset; | |
770 | r = -EILSEQ; | |
771 | } | |
772 | ||
773 | //compare and write object extent error | |
774 | m_completion->complete(r); | |
775 | delete this; | |
776 | } | |
777 | } | |
778 | ||
7c673cae FG |
779 | } // namespace io |
780 | } // namespace librbd | |
781 | ||
782 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
783 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; |