]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/ObjectRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | #include "common/RWLock.h" | |
10 | #include "common/WorkQueue.h" | |
11 | #include "include/Context.h" | |
12 | ||
13 | #include "librbd/ExclusiveLock.h" | |
14 | #include "librbd/ImageCtx.h" | |
15 | #include "librbd/ObjectMap.h" | |
16 | #include "librbd/Utils.h" | |
17 | #include "librbd/io/AioCompletion.h" | |
18 | #include "librbd/io/CopyupRequest.h" | |
19 | #include "librbd/io/ImageRequest.h" | |
20 | #include "librbd/io/ReadResult.h" | |
21 | ||
22 | #include <boost/bind.hpp> | |
23 | #include <boost/optional.hpp> | |
24 | ||
25 | #define dout_subsys ceph_subsys_rbd | |
26 | #undef dout_prefix | |
27 | #define dout_prefix *_dout << "librbd::io::ObjectRequest: " << this \ | |
28 | << " " << __func__ << ": " | |
29 | ||
30 | namespace librbd { | |
31 | namespace io { | |
32 | ||
33 | template <typename I> | |
34 | ObjectRequest<I>* | |
35 | ObjectRequest<I>::create_remove(I *ictx, const std::string &oid, | |
36 | uint64_t object_no, | |
37 | const ::SnapContext &snapc, | |
31f18b77 | 38 | const ZTracer::Trace &parent_trace, |
7c673cae FG |
39 | Context *completion) { |
40 | return new ObjectRemoveRequest(util::get_image_ctx(ictx), oid, object_no, | |
31f18b77 | 41 | snapc, parent_trace, completion); |
7c673cae FG |
42 | } |
43 | ||
44 | template <typename I> | |
45 | ObjectRequest<I>* | |
46 | ObjectRequest<I>::create_truncate(I *ictx, const std::string &oid, | |
47 | uint64_t object_no, uint64_t object_off, | |
48 | const ::SnapContext &snapc, | |
31f18b77 FG |
49 | const ZTracer::Trace &parent_trace, |
50 | Context *completion) { | |
7c673cae | 51 | return new ObjectTruncateRequest(util::get_image_ctx(ictx), oid, object_no, |
31f18b77 | 52 | object_off, snapc, parent_trace, completion); |
7c673cae FG |
53 | } |
54 | ||
55 | template <typename I> | |
56 | ObjectRequest<I>* | |
57 | ObjectRequest<I>::create_write(I *ictx, const std::string &oid, | |
58 | uint64_t object_no, uint64_t object_off, | |
59 | const ceph::bufferlist &data, | |
31f18b77 FG |
60 | const ::SnapContext &snapc, int op_flags, |
61 | const ZTracer::Trace &parent_trace, | |
62 | Context *completion) { | |
7c673cae | 63 | return new ObjectWriteRequest(util::get_image_ctx(ictx), oid, object_no, |
31f18b77 FG |
64 | object_off, data, snapc, op_flags, parent_trace, |
65 | completion); | |
7c673cae FG |
66 | } |
67 | ||
68 | template <typename I> | |
69 | ObjectRequest<I>* | |
70 | ObjectRequest<I>::create_zero(I *ictx, const std::string &oid, | |
71 | uint64_t object_no, uint64_t object_off, | |
72 | uint64_t object_len, | |
73 | const ::SnapContext &snapc, | |
31f18b77 | 74 | const ZTracer::Trace &parent_trace, |
7c673cae FG |
75 | Context *completion) { |
76 | return new ObjectZeroRequest(util::get_image_ctx(ictx), oid, object_no, | |
31f18b77 FG |
77 | object_off, object_len, snapc, parent_trace, |
78 | completion); | |
7c673cae FG |
79 | } |
80 | ||
81 | template <typename I> | |
82 | ObjectRequest<I>* | |
83 | ObjectRequest<I>::create_writesame(I *ictx, const std::string &oid, | |
84 | uint64_t object_no, uint64_t object_off, | |
85 | uint64_t object_len, | |
86 | const ceph::bufferlist &data, | |
31f18b77 FG |
87 | const ::SnapContext &snapc, int op_flags, |
88 | const ZTracer::Trace &parent_trace, | |
89 | Context *completion) { | |
7c673cae FG |
90 | return new ObjectWriteSameRequest(util::get_image_ctx(ictx), oid, object_no, |
91 | object_off, object_len, data, snapc, | |
31f18b77 | 92 | op_flags, parent_trace, completion); |
7c673cae FG |
93 | } |
94 | ||
95 | template <typename I> | |
96 | ObjectRequest<I>::ObjectRequest(ImageCtx *ictx, const std::string &oid, | |
97 | uint64_t objectno, uint64_t off, | |
98 | uint64_t len, librados::snap_t snap_id, | |
31f18b77 FG |
99 | bool hide_enoent, const char *trace_name, |
100 | const ZTracer::Trace &trace, | |
101 | Context *completion) | |
7c673cae FG |
102 | : m_ictx(ictx), m_oid(oid), m_object_no(objectno), m_object_off(off), |
103 | m_object_len(len), m_snap_id(snap_id), m_completion(completion), | |
31f18b77 FG |
104 | m_hide_enoent(hide_enoent), |
105 | m_trace(util::create_trace(*ictx, "", trace)) { | |
106 | if (m_trace.valid()) { | |
107 | m_trace.copy_name(trace_name + std::string(" ") + oid); | |
108 | m_trace.event("start"); | |
109 | } | |
7c673cae FG |
110 | |
111 | Striper::extent_to_file(m_ictx->cct, &m_ictx->layout, m_object_no, | |
112 | 0, m_ictx->layout.object_size, m_parent_extents); | |
113 | ||
114 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
115 | RWLock::RLocker parent_locker(m_ictx->parent_lock); | |
116 | compute_parent_extents(); | |
117 | } | |
118 | ||
119 | template <typename I> | |
120 | void ObjectRequest<I>::complete(int r) | |
121 | { | |
122 | if (should_complete(r)) { | |
123 | ldout(m_ictx->cct, 20) << dendl; | |
124 | if (m_hide_enoent && r == -ENOENT) { | |
125 | r = 0; | |
126 | } | |
127 | m_completion->complete(r); | |
128 | delete this; | |
129 | } | |
130 | } | |
131 | ||
132 | template <typename I> | |
133 | bool ObjectRequest<I>::compute_parent_extents() { | |
134 | assert(m_ictx->snap_lock.is_locked()); | |
135 | assert(m_ictx->parent_lock.is_locked()); | |
136 | ||
137 | uint64_t parent_overlap; | |
138 | int r = m_ictx->get_parent_overlap(m_snap_id, &parent_overlap); | |
139 | if (r < 0) { | |
140 | // NOTE: it's possible for a snapshot to be deleted while we are | |
141 | // still reading from it | |
142 | lderr(m_ictx->cct) << "failed to retrieve parent overlap: " | |
143 | << cpp_strerror(r) | |
144 | << dendl; | |
145 | m_has_parent = false; | |
146 | m_parent_extents.clear(); | |
147 | return false; | |
148 | } | |
149 | ||
150 | uint64_t object_overlap = m_ictx->prune_parent_extents( | |
151 | m_parent_extents, parent_overlap); | |
152 | if (object_overlap > 0) { | |
153 | ldout(m_ictx->cct, 20) << "overlap " << parent_overlap << " " | |
154 | << "extents " << m_parent_extents << dendl; | |
155 | m_has_parent = !m_parent_extents.empty(); | |
156 | return true; | |
157 | } | |
158 | return false; | |
159 | } | |
160 | ||
161 | static inline bool is_copy_on_read(ImageCtx *ictx, librados::snap_t snap_id) { | |
162 | assert(ictx->snap_lock.is_locked()); | |
163 | return (ictx->clone_copy_on_read && | |
164 | !ictx->read_only && snap_id == CEPH_NOSNAP && | |
165 | (ictx->exclusive_lock == nullptr || | |
166 | ictx->exclusive_lock->is_lock_owner())); | |
167 | } | |
168 | ||
169 | /** read **/ | |
170 | ||
171 | template <typename I> | |
172 | ObjectReadRequest<I>::ObjectReadRequest(I *ictx, const std::string &oid, | |
173 | uint64_t objectno, uint64_t offset, | |
174 | uint64_t len, Extents& be, | |
175 | librados::snap_t snap_id, bool sparse, | |
31f18b77 FG |
176 | int op_flags, |
177 | const ZTracer::Trace &parent_trace, | |
178 | Context *completion) | |
7c673cae | 179 | : ObjectRequest<I>(util::get_image_ctx(ictx), oid, objectno, offset, len, |
31f18b77 | 180 | snap_id, false, "read", parent_trace, completion), |
7c673cae FG |
181 | m_buffer_extents(be), m_tried_parent(false), m_sparse(sparse), |
182 | m_op_flags(op_flags), m_state(LIBRBD_AIO_READ_FLAT) { | |
183 | guard_read(); | |
184 | } | |
185 | ||
186 | template <typename I> | |
187 | void ObjectReadRequest<I>::guard_read() | |
188 | { | |
189 | ImageCtx *image_ctx = this->m_ictx; | |
190 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
191 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
192 | ||
193 | if (this->has_parent()) { | |
194 | ldout(image_ctx->cct, 20) << "guarding read" << dendl; | |
195 | m_state = LIBRBD_AIO_READ_GUARD; | |
196 | } | |
197 | } | |
198 | ||
199 | template <typename I> | |
200 | bool ObjectReadRequest<I>::should_complete(int r) | |
201 | { | |
202 | ImageCtx *image_ctx = this->m_ictx; | |
203 | ldout(image_ctx->cct, 20) << this->m_oid << " " | |
204 | << this->m_object_off << "~" << this->m_object_len | |
205 | << " r = " << r << dendl; | |
206 | ||
207 | bool finished = true; | |
208 | ||
209 | switch (m_state) { | |
210 | case LIBRBD_AIO_READ_GUARD: | |
211 | ldout(image_ctx->cct, 20) << "READ_CHECK_GUARD" << dendl; | |
212 | ||
213 | // This is the step to read from parent | |
214 | if (!m_tried_parent && r == -ENOENT) { | |
215 | { | |
216 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
217 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
218 | if (image_ctx->parent == NULL) { | |
219 | ldout(image_ctx->cct, 20) << "parent is gone; do nothing" << dendl; | |
7c673cae FG |
220 | break; |
221 | } | |
222 | ||
223 | // calculate reverse mapping onto the image | |
224 | vector<pair<uint64_t,uint64_t> > parent_extents; | |
225 | Striper::extent_to_file(image_ctx->cct, &image_ctx->layout, | |
226 | this->m_object_no, this->m_object_off, | |
227 | this->m_object_len, parent_extents); | |
228 | ||
229 | uint64_t parent_overlap = 0; | |
230 | uint64_t object_overlap = 0; | |
231 | r = image_ctx->get_parent_overlap(this->m_snap_id, &parent_overlap); | |
232 | if (r == 0) { | |
233 | object_overlap = image_ctx->prune_parent_extents(parent_extents, | |
234 | parent_overlap); | |
235 | } | |
236 | ||
237 | if (object_overlap > 0) { | |
238 | m_tried_parent = true; | |
239 | if (is_copy_on_read(image_ctx, this->m_snap_id)) { | |
240 | m_state = LIBRBD_AIO_READ_COPYUP; | |
241 | } | |
242 | ||
243 | read_from_parent(std::move(parent_extents)); | |
244 | finished = false; | |
245 | } | |
246 | } | |
247 | } | |
248 | break; | |
249 | case LIBRBD_AIO_READ_COPYUP: | |
250 | ldout(image_ctx->cct, 20) << "READ_COPYUP" << dendl; | |
251 | // This is the extra step for copy-on-read: kick off an asynchronous copyup. | |
252 | // It is different from copy-on-write as asynchronous copyup will finish | |
253 | // by itself so state won't go back to LIBRBD_AIO_READ_GUARD. | |
254 | ||
255 | assert(m_tried_parent); | |
256 | if (r > 0) { | |
257 | // If read entire object from parent success and CoR is possible, kick | |
258 | // off a asynchronous copyup. This approach minimizes the latency | |
259 | // impact. | |
260 | send_copyup(); | |
261 | } | |
262 | break; | |
263 | case LIBRBD_AIO_READ_FLAT: | |
264 | ldout(image_ctx->cct, 20) << "READ_FLAT" << dendl; | |
265 | // The read content should be deposit in m_read_data | |
266 | break; | |
267 | default: | |
268 | lderr(image_ctx->cct) << "invalid request state: " << m_state << dendl; | |
269 | ceph_abort(); | |
270 | } | |
271 | ||
272 | return finished; | |
273 | } | |
274 | ||
275 | template <typename I> | |
276 | void ObjectReadRequest<I>::send() { | |
277 | ImageCtx *image_ctx = this->m_ictx; | |
278 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
279 | << "~" << this->m_object_len | |
280 | << dendl; | |
281 | ||
282 | { | |
283 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
284 | ||
285 | // send read request to parent if the object doesn't exist locally | |
286 | if (image_ctx->object_map != nullptr && | |
287 | !image_ctx->object_map->object_may_exist(this->m_object_no)) { | |
288 | image_ctx->op_work_queue->queue(util::create_context_callback< | |
289 | ObjectRequest<I> >(this), -ENOENT); | |
290 | return; | |
291 | } | |
292 | } | |
293 | ||
294 | librados::ObjectReadOperation op; | |
295 | int flags = image_ctx->get_read_flags(this->m_snap_id); | |
296 | if (m_sparse) { | |
297 | op.sparse_read(this->m_object_off, this->m_object_len, &m_ext_map, | |
298 | &m_read_data, nullptr); | |
299 | } else { | |
300 | op.read(this->m_object_off, this->m_object_len, &m_read_data, nullptr); | |
301 | } | |
302 | op.set_op_flags2(m_op_flags); | |
303 | ||
304 | librados::AioCompletion *rados_completion = | |
305 | util::create_rados_callback(this); | |
31f18b77 FG |
306 | int r = image_ctx->data_ctx.aio_operate( |
307 | this->m_oid, rados_completion, &op, flags, nullptr, | |
308 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
309 | assert(r == 0); |
310 | ||
311 | rados_completion->release(); | |
312 | } | |
313 | ||
314 | template <typename I> | |
315 | void ObjectReadRequest<I>::send_copyup() | |
316 | { | |
317 | ImageCtx *image_ctx = this->m_ictx; | |
318 | ldout(image_ctx->cct, 20) << this->m_oid << " " << this->m_object_off | |
319 | << "~" << this->m_object_len << dendl; | |
320 | ||
321 | { | |
322 | RWLock::RLocker snap_locker(image_ctx->snap_lock); | |
323 | RWLock::RLocker parent_locker(image_ctx->parent_lock); | |
324 | if (!this->compute_parent_extents() || | |
325 | (image_ctx->exclusive_lock != nullptr && | |
326 | !image_ctx->exclusive_lock->is_lock_owner())) { | |
327 | return; | |
328 | } | |
329 | } | |
330 | ||
331 | Mutex::Locker copyup_locker(image_ctx->copyup_list_lock); | |
332 | map<uint64_t, CopyupRequest*>::iterator it = | |
333 | image_ctx->copyup_list.find(this->m_object_no); | |
334 | if (it == image_ctx->copyup_list.end()) { | |
335 | // create and kick off a CopyupRequest | |
336 | CopyupRequest *new_req = new CopyupRequest( | |
337 | image_ctx, this->m_oid, this->m_object_no, | |
31f18b77 | 338 | std::move(this->m_parent_extents), this->m_trace); |
7c673cae FG |
339 | this->m_parent_extents.clear(); |
340 | ||
341 | image_ctx->copyup_list[this->m_object_no] = new_req; | |
342 | new_req->send(); | |
343 | } | |
344 | } | |
345 | ||
346 | template <typename I> | |
347 | void ObjectReadRequest<I>::read_from_parent(Extents&& parent_extents) | |
348 | { | |
349 | ImageCtx *image_ctx = this->m_ictx; | |
350 | AioCompletion *parent_completion = AioCompletion::create_and_start< | |
351 | ObjectRequest<I> >(this, image_ctx, AIO_TYPE_READ); | |
352 | ||
353 | ldout(image_ctx->cct, 20) << "parent completion " << parent_completion | |
354 | << " extents " << parent_extents << dendl; | |
355 | ImageRequest<>::aio_read(image_ctx->parent, parent_completion, | |
356 | std::move(parent_extents), | |
31f18b77 | 357 | ReadResult{&m_read_data}, 0, this->m_trace); |
7c673cae FG |
358 | } |
359 | ||
360 | /** write **/ | |
361 | ||
362 | AbstractObjectWriteRequest::AbstractObjectWriteRequest(ImageCtx *ictx, | |
363 | const std::string &oid, | |
364 | uint64_t object_no, | |
365 | uint64_t object_off, | |
366 | uint64_t len, | |
367 | const ::SnapContext &snapc, | |
31f18b77 FG |
368 | bool hide_enoent, |
369 | const char *trace_name, | |
370 | const ZTracer::Trace &parent_trace, | |
371 | Context *completion) | |
7c673cae | 372 | : ObjectRequest(ictx, oid, object_no, object_off, len, CEPH_NOSNAP, |
31f18b77 | 373 | hide_enoent, trace_name, parent_trace, completion), |
7c673cae FG |
374 | m_state(LIBRBD_AIO_WRITE_FLAT), m_snap_seq(snapc.seq.val) |
375 | { | |
376 | m_snaps.insert(m_snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
377 | } | |
378 | ||
379 | void AbstractObjectWriteRequest::guard_write() | |
380 | { | |
381 | if (has_parent()) { | |
382 | m_state = LIBRBD_AIO_WRITE_GUARD; | |
383 | m_write.assert_exists(); | |
384 | ldout(m_ictx->cct, 20) << "guarding write" << dendl; | |
385 | } | |
386 | } | |
387 | ||
388 | bool AbstractObjectWriteRequest::should_complete(int r) | |
389 | { | |
390 | ldout(m_ictx->cct, 20) << get_op_type() << m_oid << " " | |
391 | << m_object_off << "~" << m_object_len | |
392 | << " r = " << r << dendl; | |
393 | ||
394 | bool finished = true; | |
395 | switch (m_state) { | |
396 | case LIBRBD_AIO_WRITE_PRE: | |
397 | ldout(m_ictx->cct, 20) << "WRITE_PRE" << dendl; | |
398 | if (r < 0) { | |
399 | return true; | |
400 | } | |
401 | ||
402 | send_write_op(); | |
403 | finished = false; | |
404 | break; | |
405 | ||
406 | case LIBRBD_AIO_WRITE_POST: | |
407 | ldout(m_ictx->cct, 20) << "WRITE_POST" << dendl; | |
408 | finished = true; | |
409 | break; | |
410 | ||
411 | case LIBRBD_AIO_WRITE_GUARD: | |
412 | ldout(m_ictx->cct, 20) << "WRITE_CHECK_GUARD" << dendl; | |
413 | ||
414 | if (r == -ENOENT) { | |
415 | handle_write_guard(); | |
416 | finished = false; | |
417 | break; | |
418 | } else if (r < 0) { | |
419 | // pass the error code to the finish context | |
420 | m_state = LIBRBD_AIO_WRITE_ERROR; | |
421 | complete(r); | |
422 | finished = false; | |
423 | break; | |
424 | } | |
425 | ||
426 | finished = send_post_object_map_update(); | |
427 | break; | |
428 | ||
429 | case LIBRBD_AIO_WRITE_COPYUP: | |
430 | ldout(m_ictx->cct, 20) << "WRITE_COPYUP" << dendl; | |
431 | if (r < 0) { | |
432 | m_state = LIBRBD_AIO_WRITE_ERROR; | |
433 | complete(r); | |
434 | finished = false; | |
435 | } else { | |
436 | finished = send_post_object_map_update(); | |
437 | } | |
438 | break; | |
439 | ||
440 | case LIBRBD_AIO_WRITE_FLAT: | |
441 | ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl; | |
442 | ||
443 | finished = send_post_object_map_update(); | |
444 | break; | |
445 | ||
446 | case LIBRBD_AIO_WRITE_ERROR: | |
447 | assert(r < 0); | |
448 | lderr(m_ictx->cct) << "WRITE_ERROR: " << cpp_strerror(r) << dendl; | |
449 | break; | |
450 | ||
451 | default: | |
452 | lderr(m_ictx->cct) << "invalid request state: " << m_state << dendl; | |
453 | ceph_abort(); | |
454 | } | |
455 | ||
456 | return finished; | |
457 | } | |
458 | ||
459 | void AbstractObjectWriteRequest::send() { | |
460 | ldout(m_ictx->cct, 20) << get_op_type() << " " << m_oid << " " | |
461 | << m_object_off << "~" << m_object_len << dendl; | |
462 | { | |
463 | RWLock::RLocker snap_lock(m_ictx->snap_lock); | |
464 | if (m_ictx->object_map == nullptr) { | |
465 | m_object_exist = true; | |
466 | } else { | |
467 | // should have been flushed prior to releasing lock | |
468 | assert(m_ictx->exclusive_lock->is_lock_owner()); | |
469 | m_object_exist = m_ictx->object_map->object_may_exist(m_object_no); | |
470 | } | |
471 | } | |
472 | ||
473 | send_write(); | |
474 | } | |
475 | ||
476 | void AbstractObjectWriteRequest::send_pre_object_map_update() { | |
477 | ldout(m_ictx->cct, 20) << dendl; | |
478 | ||
479 | { | |
480 | RWLock::RLocker snap_lock(m_ictx->snap_lock); | |
481 | if (m_ictx->object_map != nullptr) { | |
482 | uint8_t new_state; | |
483 | pre_object_map_update(&new_state); | |
484 | RWLock::WLocker object_map_locker(m_ictx->object_map_lock); | |
485 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
486 | << "~" << m_object_len << dendl; | |
487 | m_state = LIBRBD_AIO_WRITE_PRE; | |
488 | ||
489 | if (m_ictx->object_map->aio_update<ObjectRequest>( | |
31f18b77 | 490 | CEPH_NOSNAP, m_object_no, new_state, {}, this->m_trace, this)) { |
7c673cae FG |
491 | return; |
492 | } | |
493 | } | |
494 | } | |
495 | ||
496 | send_write_op(); | |
497 | } | |
498 | ||
499 | bool AbstractObjectWriteRequest::send_post_object_map_update() { | |
500 | ldout(m_ictx->cct, 20) << dendl; | |
501 | ||
502 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
503 | if (m_ictx->object_map == nullptr || !post_object_map_update()) { | |
504 | return true; | |
505 | } | |
506 | ||
507 | // should have been flushed prior to releasing lock | |
508 | assert(m_ictx->exclusive_lock->is_lock_owner()); | |
509 | ||
510 | RWLock::WLocker object_map_locker(m_ictx->object_map_lock); | |
511 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
512 | << "~" << m_object_len << dendl; | |
513 | m_state = LIBRBD_AIO_WRITE_POST; | |
514 | ||
515 | if (m_ictx->object_map->aio_update<ObjectRequest>( | |
31f18b77 FG |
516 | CEPH_NOSNAP, m_object_no, OBJECT_NONEXISTENT, OBJECT_PENDING, |
517 | this->m_trace, this)) { | |
7c673cae FG |
518 | return false; |
519 | } | |
520 | ||
521 | return true; | |
522 | } | |
523 | ||
524 | void AbstractObjectWriteRequest::send_write() { | |
525 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
526 | << " object exist " << m_object_exist << dendl; | |
527 | ||
528 | if (!m_object_exist && has_parent()) { | |
529 | m_state = LIBRBD_AIO_WRITE_GUARD; | |
530 | handle_write_guard(); | |
531 | } else { | |
532 | send_pre_object_map_update(); | |
533 | } | |
534 | } | |
535 | ||
536 | void AbstractObjectWriteRequest::send_copyup() | |
537 | { | |
538 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off | |
539 | << "~" << m_object_len << dendl; | |
540 | m_state = LIBRBD_AIO_WRITE_COPYUP; | |
541 | ||
542 | m_ictx->copyup_list_lock.Lock(); | |
543 | map<uint64_t, CopyupRequest*>::iterator it = | |
544 | m_ictx->copyup_list.find(m_object_no); | |
545 | if (it == m_ictx->copyup_list.end()) { | |
546 | CopyupRequest *new_req = new CopyupRequest(m_ictx, m_oid, | |
547 | m_object_no, | |
31f18b77 FG |
548 | std::move(m_parent_extents), |
549 | this->m_trace); | |
7c673cae FG |
550 | m_parent_extents.clear(); |
551 | ||
552 | // make sure to wait on this CopyupRequest | |
553 | new_req->append_request(this); | |
554 | m_ictx->copyup_list[m_object_no] = new_req; | |
555 | ||
556 | m_ictx->copyup_list_lock.Unlock(); | |
557 | new_req->send(); | |
558 | } else { | |
559 | it->second->append_request(this); | |
560 | m_ictx->copyup_list_lock.Unlock(); | |
561 | } | |
562 | } | |
563 | void AbstractObjectWriteRequest::send_write_op() | |
564 | { | |
565 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
566 | if (m_guard) { | |
567 | guard_write(); | |
568 | } | |
569 | ||
31f18b77 | 570 | add_write_ops(&m_write, true); |
7c673cae FG |
571 | assert(m_write.size() != 0); |
572 | ||
573 | librados::AioCompletion *rados_completion = | |
574 | util::create_rados_callback(this); | |
31f18b77 FG |
575 | int r = m_ictx->data_ctx.aio_operate( |
576 | m_oid, rados_completion, &m_write, m_snap_seq, m_snaps, | |
577 | (this->m_trace.valid() ? this->m_trace.get_info() : nullptr)); | |
7c673cae FG |
578 | assert(r == 0); |
579 | rados_completion->release(); | |
580 | } | |
581 | void AbstractObjectWriteRequest::handle_write_guard() | |
582 | { | |
583 | bool has_parent; | |
584 | { | |
585 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
586 | RWLock::RLocker parent_locker(m_ictx->parent_lock); | |
587 | has_parent = compute_parent_extents(); | |
588 | } | |
589 | // If parent still exists, overlap might also have changed. | |
590 | if (has_parent) { | |
591 | send_copyup(); | |
592 | } else { | |
593 | // parent may have disappeared -- send original write again | |
594 | ldout(m_ictx->cct, 20) << "should_complete(" << this | |
595 | << "): parent overlap now 0" << dendl; | |
596 | send_write(); | |
597 | } | |
598 | } | |
599 | ||
31f18b77 FG |
600 | void ObjectWriteRequest::add_write_ops(librados::ObjectWriteOperation *wr, |
601 | bool set_hints) { | |
7c673cae | 602 | RWLock::RLocker snap_locker(m_ictx->snap_lock); |
31f18b77 | 603 | if (set_hints && m_ictx->enable_alloc_hint && |
7c673cae FG |
604 | (m_ictx->object_map == nullptr || !m_object_exist)) { |
605 | wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); | |
606 | } | |
607 | ||
608 | if (m_object_off == 0 && m_object_len == m_ictx->get_object_size()) { | |
609 | wr->write_full(m_write_data); | |
610 | } else { | |
611 | wr->write(m_object_off, m_write_data); | |
612 | } | |
613 | wr->set_op_flags2(m_op_flags); | |
614 | } | |
615 | ||
616 | void ObjectWriteRequest::send_write() { | |
617 | bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); | |
618 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
619 | << " object exist " << m_object_exist | |
620 | << " write_full " << write_full << dendl; | |
621 | if (write_full && !has_parent()) { | |
622 | m_guard = false; | |
623 | } | |
624 | ||
625 | AbstractObjectWriteRequest::send_write(); | |
626 | } | |
627 | ||
628 | void ObjectRemoveRequest::guard_write() { | |
629 | // do nothing to disable write guard only if deep-copyup not required | |
630 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
631 | if (!m_ictx->snaps.empty()) { | |
632 | AbstractObjectWriteRequest::guard_write(); | |
633 | } | |
634 | } | |
635 | void ObjectRemoveRequest::send_write() { | |
31f18b77 FG |
636 | ldout(m_ictx->cct, 20) << m_oid << " remove " << " object exist " |
637 | << m_object_exist << dendl; | |
638 | if (!m_object_exist && !has_parent()) { | |
639 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
640 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
641 | m_ictx->op_work_queue->queue(ctx, 0); | |
642 | } else { | |
643 | send_pre_object_map_update(); | |
644 | } | |
7c673cae FG |
645 | } |
646 | ||
647 | void ObjectTruncateRequest::send_write() { | |
31f18b77 FG |
648 | ldout(m_ictx->cct, 20) << m_oid << " truncate " << m_object_off |
649 | << " object exist " << m_object_exist << dendl; | |
650 | if (!m_object_exist && !has_parent()) { | |
651 | m_state = LIBRBD_AIO_WRITE_FLAT; | |
652 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
653 | m_ictx->op_work_queue->queue(ctx, 0); | |
654 | } else { | |
655 | AbstractObjectWriteRequest::send_write(); | |
656 | } | |
657 | } | |
658 | ||
659 | void ObjectZeroRequest::send_write() { | |
660 | ldout(m_ictx->cct, 20) << m_oid << " zero " << m_object_off << "~" | |
661 | << m_object_len << " object exist " << m_object_exist | |
662 | << dendl; | |
663 | if (!m_object_exist && !has_parent()) { | |
7c673cae FG |
664 | m_state = LIBRBD_AIO_WRITE_FLAT; |
665 | Context *ctx = util::create_context_callback<ObjectRequest>(this); | |
666 | m_ictx->op_work_queue->queue(ctx, 0); | |
667 | } else { | |
668 | AbstractObjectWriteRequest::send_write(); | |
669 | } | |
670 | } | |
671 | ||
31f18b77 FG |
672 | void ObjectWriteSameRequest::add_write_ops(librados::ObjectWriteOperation *wr, |
673 | bool set_hints) { | |
7c673cae | 674 | RWLock::RLocker snap_locker(m_ictx->snap_lock); |
31f18b77 | 675 | if (set_hints && m_ictx->enable_alloc_hint && |
7c673cae FG |
676 | (m_ictx->object_map == nullptr || !m_object_exist)) { |
677 | wr->set_alloc_hint(m_ictx->get_object_size(), m_ictx->get_object_size()); | |
678 | } | |
679 | ||
680 | wr->writesame(m_object_off, m_object_len, m_write_data); | |
681 | wr->set_op_flags2(m_op_flags); | |
682 | } | |
683 | ||
684 | void ObjectWriteSameRequest::send_write() { | |
685 | bool write_full = (m_object_off == 0 && m_object_len == m_ictx->get_object_size()); | |
686 | ldout(m_ictx->cct, 20) << m_oid << " " << m_object_off << "~" << m_object_len | |
687 | << " write_full " << write_full << dendl; | |
688 | if (write_full && !has_parent()) { | |
689 | m_guard = false; | |
690 | } | |
691 | ||
692 | AbstractObjectWriteRequest::send_write(); | |
693 | } | |
694 | ||
695 | } // namespace io | |
696 | } // namespace librbd | |
697 | ||
698 | template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
699 | template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; |