]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_LIBRBD_IO_OBJECT_REQUEST_H | |
5 | #define CEPH_LIBRBD_IO_OBJECT_REQUEST_H | |
6 | ||
7 | #include "include/int_types.h" | |
7c673cae FG |
8 | #include "include/buffer.h" |
9 | #include "include/rados/librados.hpp" | |
31f18b77 FG |
10 | #include "common/snap_types.h" |
11 | #include "common/zipkin_trace.h" | |
7c673cae | 12 | #include "librbd/ObjectMap.h" |
b32b8144 | 13 | #include "librbd/io/Types.h" |
31f18b77 | 14 | #include <map> |
7c673cae FG |
15 | |
16 | class Context; | |
17 | ||
18 | namespace librbd { | |
19 | ||
20 | struct ImageCtx; | |
21 | ||
22 | namespace io { | |
23 | ||
24 | struct AioCompletion; | |
b32b8144 | 25 | template <typename> class CopyupRequest; |
7c673cae FG |
26 | |
27 | struct ObjectRequestHandle { | |
28 | virtual ~ObjectRequestHandle() { | |
29 | } | |
30 | ||
b32b8144 | 31 | virtual void fail(int r) = 0; |
7c673cae FG |
32 | virtual void send() = 0; |
33 | }; | |
34 | ||
35 | /** | |
36 | * This class represents an I/O operation to a single RBD data object. | |
37 | * Its subclasses encapsulate logic for dealing with special cases | |
38 | * for I/O due to layering. | |
39 | */ | |
40 | template <typename ImageCtxT = ImageCtx> | |
41 | class ObjectRequest : public ObjectRequestHandle { | |
42 | public: | |
7c673cae FG |
43 | static ObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid, |
44 | uint64_t object_no, | |
45 | uint64_t object_off, | |
46 | const ceph::bufferlist &data, | |
31f18b77 FG |
47 | const ::SnapContext &snapc, int op_flags, |
48 | const ZTracer::Trace &parent_trace, | |
49 | Context *completion); | |
b32b8144 FG |
50 | static ObjectRequest* create_discard(ImageCtxT *ictx, const std::string &oid, |
51 | uint64_t object_no, uint64_t object_off, | |
52 | uint64_t object_len, | |
53 | const ::SnapContext &snapc, | |
54 | bool disable_clone_remove, | |
55 | bool update_object_map, | |
56 | const ZTracer::Trace &parent_trace, | |
57 | Context *completion); | |
7c673cae FG |
58 | static ObjectRequest* create_writesame(ImageCtxT *ictx, |
59 | const std::string &oid, | |
60 | uint64_t object_no, | |
61 | uint64_t object_off, | |
62 | uint64_t object_len, | |
63 | const ceph::bufferlist &data, | |
64 | const ::SnapContext &snapc, | |
31f18b77 FG |
65 | int op_flags, |
66 | const ZTracer::Trace &parent_trace, | |
67 | Context *completion); | |
c07f9fc5 FG |
68 | static ObjectRequest* create_compare_and_write(ImageCtxT *ictx, |
69 | const std::string &oid, | |
70 | uint64_t object_no, | |
71 | uint64_t object_off, | |
72 | const ceph::bufferlist &cmp_data, | |
73 | const ceph::bufferlist &write_data, | |
74 | const ::SnapContext &snapc, | |
75 | uint64_t *mismatch_offset, int op_flags, | |
76 | const ZTracer::Trace &parent_trace, | |
77 | Context *completion); | |
7c673cae | 78 | |
b32b8144 | 79 | ObjectRequest(ImageCtxT *ictx, const std::string &oid, |
7c673cae | 80 | uint64_t objectno, uint64_t off, uint64_t len, |
b32b8144 FG |
81 | librados::snap_t snap_id, const char *trace_name, |
82 | const ZTracer::Trace &parent_trace, Context *completion); | |
31f18b77 FG |
83 | ~ObjectRequest() override { |
84 | m_trace.event("finish"); | |
85 | } | |
7c673cae | 86 | |
b32b8144 FG |
87 | static void add_write_hint(ImageCtxT& image_ctx, |
88 | librados::ObjectWriteOperation *wr); | |
7c673cae | 89 | |
b32b8144 FG |
90 | void fail(int r) { |
91 | finish(r); | |
92 | } | |
7c673cae | 93 | |
7c673cae FG |
94 | void send() override = 0; |
95 | ||
96 | bool has_parent() const { | |
97 | return m_has_parent; | |
98 | } | |
99 | ||
7c673cae | 100 | virtual const char *get_op_type() const = 0; |
7c673cae FG |
101 | |
102 | protected: | |
b32b8144 | 103 | bool compute_parent_extents(Extents *parent_extents); |
7c673cae | 104 | |
b32b8144 | 105 | ImageCtxT *m_ictx; |
7c673cae FG |
106 | std::string m_oid; |
107 | uint64_t m_object_no, m_object_off, m_object_len; | |
108 | librados::snap_t m_snap_id; | |
109 | Context *m_completion; | |
31f18b77 | 110 | ZTracer::Trace m_trace; |
7c673cae | 111 | |
b32b8144 FG |
112 | void async_finish(int r); |
113 | void finish(int r); | |
114 | ||
7c673cae FG |
115 | private: |
116 | bool m_has_parent = false; | |
117 | }; | |
118 | ||
119 | template <typename ImageCtxT = ImageCtx> | |
120 | class ObjectReadRequest : public ObjectRequest<ImageCtxT> { | |
121 | public: | |
7c673cae FG |
122 | typedef std::map<uint64_t, uint64_t> ExtentMap; |
123 | ||
124 | static ObjectReadRequest* create(ImageCtxT *ictx, const std::string &oid, | |
125 | uint64_t objectno, uint64_t offset, | |
b32b8144 FG |
126 | uint64_t len, librados::snap_t snap_id, |
127 | int op_flags, bool cache_initiated, | |
128 | const ZTracer::Trace &parent_trace, | |
31f18b77 | 129 | Context *completion) { |
7c673cae | 130 | return new ObjectReadRequest(ictx, oid, objectno, offset, len, |
b32b8144 FG |
131 | snap_id, op_flags, cache_initiated, |
132 | parent_trace, completion); | |
7c673cae FG |
133 | } |
134 | ||
135 | ObjectReadRequest(ImageCtxT *ictx, const std::string &oid, | |
136 | uint64_t objectno, uint64_t offset, uint64_t len, | |
b32b8144 FG |
137 | librados::snap_t snap_id, int op_flags, |
138 | bool cache_initiated, const ZTracer::Trace &parent_trace, | |
139 | Context *completion); | |
7c673cae | 140 | |
7c673cae | 141 | void send() override; |
7c673cae FG |
142 | |
143 | inline uint64_t get_offset() const { | |
144 | return this->m_object_off; | |
145 | } | |
146 | inline uint64_t get_length() const { | |
147 | return this->m_object_len; | |
148 | } | |
149 | ceph::bufferlist &data() { | |
150 | return m_read_data; | |
151 | } | |
7c673cae FG |
152 | ExtentMap &get_extent_map() { |
153 | return m_ext_map; | |
154 | } | |
155 | ||
156 | const char *get_op_type() const override { | |
157 | return "read"; | |
158 | } | |
159 | ||
7c673cae | 160 | private: |
7c673cae | 161 | /** |
b32b8144 | 162 | * @verbatim |
7c673cae | 163 | * |
b32b8144 FG |
164 | * <start> |
165 | * | | |
166 | * | | |
167 | * /--------/ \--------\ | |
168 | * | | | |
169 | * | (cache | (cache | |
170 | * v disabled) v enabled) | |
171 | * READ_OBJECT READ_CACHE | |
172 | * | | | |
173 | * |/------------------/ | |
174 | * | | |
175 | * v (skip if not needed) | |
176 | * READ_PARENT | |
177 | * | | |
178 | * v (skip if not needed) | |
179 | * COPYUP | |
180 | * | | |
181 | * v | |
182 | * <finish> | |
7c673cae | 183 | * |
b32b8144 | 184 | * @endverbatim |
7c673cae | 185 | */ |
7c673cae | 186 | |
b32b8144 FG |
187 | int m_op_flags; |
188 | bool m_cache_initiated; | |
189 | ||
190 | ceph::bufferlist m_read_data; | |
191 | ExtentMap m_ext_map; | |
7c673cae | 192 | |
b32b8144 FG |
193 | void read_cache(); |
194 | void handle_read_cache(int r); | |
7c673cae | 195 | |
b32b8144 FG |
196 | void read_object(); |
197 | void handle_read_object(int r); | |
198 | ||
199 | void read_parent(); | |
200 | void handle_read_parent(int r); | |
201 | ||
202 | void copyup(); | |
7c673cae FG |
203 | }; |
204 | ||
b32b8144 FG |
205 | template <typename ImageCtxT = ImageCtx> |
206 | class AbstractObjectWriteRequest : public ObjectRequest<ImageCtxT> { | |
7c673cae | 207 | public: |
b32b8144 | 208 | AbstractObjectWriteRequest(ImageCtxT *ictx, const std::string &oid, |
7c673cae FG |
209 | uint64_t object_no, uint64_t object_off, |
210 | uint64_t len, const ::SnapContext &snapc, | |
b32b8144 | 211 | const char *trace_name, |
31f18b77 FG |
212 | const ZTracer::Trace &parent_trace, |
213 | Context *completion); | |
7c673cae | 214 | |
b32b8144 FG |
215 | virtual bool is_empty_write_op() const { |
216 | return false; | |
7c673cae FG |
217 | } |
218 | ||
b32b8144 FG |
219 | virtual uint8_t get_pre_write_object_map_state() const { |
220 | return OBJECT_EXISTS; | |
221 | } | |
222 | ||
223 | virtual void add_copyup_ops(librados::ObjectWriteOperation *wr) { | |
224 | add_write_ops(wr); | |
225 | } | |
226 | ||
227 | void handle_copyup(int r); | |
228 | ||
7c673cae FG |
229 | void send() override; |
230 | ||
b32b8144 FG |
231 | protected: |
232 | bool m_full_object = false; | |
233 | ||
234 | virtual bool is_no_op_for_nonexistent_object() const { | |
235 | return false; | |
236 | } | |
237 | virtual bool is_object_map_update_enabled() const { | |
238 | return true; | |
239 | } | |
240 | virtual bool is_post_copyup_write_required() const { | |
241 | return false; | |
242 | } | |
243 | virtual bool is_non_existent_post_write_object_map_state() const { | |
244 | return false; | |
245 | } | |
246 | ||
247 | virtual void add_write_hint(librados::ObjectWriteOperation *wr); | |
248 | virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0; | |
249 | ||
250 | virtual int filter_write_result(int r) const { | |
251 | return r; | |
252 | } | |
253 | ||
254 | private: | |
7c673cae | 255 | /** |
b32b8144 | 256 | * @verbatim |
7c673cae | 257 | * |
b32b8144 FG |
258 | * <start> |
259 | * | | |
260 | * v (no-op write request) | |
261 | * DETECT_NO_OP . . . . . . . . . . . . . . . . . . . | |
262 | * | . | |
263 | * v (skip if not required/disabled) . | |
264 | * PRE_UPDATE_OBJECT_MAP . | |
265 | * | . . | |
266 | * | . (child dne) . | |
267 | * | . . . . . . . . . . | |
268 | * | . . | |
269 | * | (post-copyup write) . . | |
270 | * | . . . . . . . . . . . . . . | |
271 | * | . . . . | |
272 | * v v . v . | |
273 | * WRITE . . . . . . . . > COPYUP (if required) . | |
274 | * | | . | |
275 | * |/----------------------/ . | |
276 | * | . | |
277 | * v (skip if not required/disabled) . | |
278 | * POST_UPDATE_OBJECT_MAP . | |
279 | * | . | |
280 | * v . | |
281 | * <finish> < . . . . . . . . . . . . . . . . . . . . | |
7c673cae | 282 | * |
b32b8144 | 283 | * @endverbatim |
7c673cae | 284 | */ |
7c673cae | 285 | |
7c673cae FG |
286 | uint64_t m_snap_seq; |
287 | std::vector<librados::snap_t> m_snaps; | |
7c673cae | 288 | |
b32b8144 FG |
289 | Extents m_parent_extents; |
290 | bool m_object_may_exist = false; | |
291 | bool m_copyup_enabled = true; | |
292 | bool m_copyup_in_progress = false; | |
7c673cae | 293 | |
b32b8144 FG |
294 | void pre_write_object_map_update(); |
295 | void handle_pre_write_object_map_update(int r); | |
296 | ||
297 | void write_object(); | |
298 | void handle_write_object(int r); | |
299 | ||
300 | void copyup(); | |
301 | ||
302 | void post_write_object_map_update(); | |
303 | void handle_post_write_object_map_update(int r); | |
7c673cae | 304 | |
7c673cae FG |
305 | }; |
306 | ||
b32b8144 FG |
307 | template <typename ImageCtxT = ImageCtx> |
308 | class ObjectWriteRequest : public AbstractObjectWriteRequest<ImageCtxT> { | |
7c673cae | 309 | public: |
b32b8144 FG |
310 | ObjectWriteRequest(ImageCtxT *ictx, const std::string &oid, |
311 | uint64_t object_no, uint64_t object_off, | |
312 | const ceph::bufferlist &data, const ::SnapContext &snapc, | |
313 | int op_flags, const ZTracer::Trace &parent_trace, | |
314 | Context *completion) | |
315 | : AbstractObjectWriteRequest<ImageCtxT>(ictx, oid, object_no, object_off, | |
316 | data.length(), snapc, "write", | |
317 | parent_trace, completion), | |
7c673cae FG |
318 | m_write_data(data), m_op_flags(op_flags) { |
319 | } | |
320 | ||
b32b8144 | 321 | bool is_empty_write_op() const override { |
7c673cae FG |
322 | return (m_write_data.length() == 0); |
323 | } | |
324 | ||
325 | const char *get_op_type() const override { | |
326 | return "write"; | |
327 | } | |
328 | ||
7c673cae | 329 | protected: |
b32b8144 | 330 | void add_write_ops(librados::ObjectWriteOperation *wr) override; |
7c673cae FG |
331 | |
332 | private: | |
333 | ceph::bufferlist m_write_data; | |
334 | int m_op_flags; | |
335 | }; | |
336 | ||
b32b8144 FG |
337 | template <typename ImageCtxT = ImageCtx> |
338 | class ObjectDiscardRequest : public AbstractObjectWriteRequest<ImageCtxT> { | |
7c673cae | 339 | public: |
b32b8144 FG |
340 | ObjectDiscardRequest(ImageCtxT *ictx, const std::string &oid, |
341 | uint64_t object_no, uint64_t object_off, | |
342 | uint64_t object_len, const ::SnapContext &snapc, | |
343 | bool disable_clone_remove, bool update_object_map, | |
344 | const ZTracer::Trace &parent_trace, Context *completion) | |
345 | : AbstractObjectWriteRequest<ImageCtxT>(ictx, oid, object_no, object_off, | |
346 | object_len, snapc, "discard", | |
347 | parent_trace, completion), | |
348 | m_update_object_map(update_object_map) { | |
349 | if (this->m_full_object) { | |
350 | if (disable_clone_remove && this->has_parent()) { | |
351 | // need to hide the parent object instead of child object | |
352 | m_discard_action = DISCARD_ACTION_REMOVE_TRUNCATE; | |
353 | this->m_object_len = 0; | |
354 | } else { | |
355 | m_discard_action = DISCARD_ACTION_REMOVE; | |
356 | } | |
357 | } else if (object_off + object_len == ictx->layout.object_size) { | |
358 | m_discard_action = DISCARD_ACTION_TRUNCATE; | |
7c673cae | 359 | } else { |
b32b8144 | 360 | m_discard_action = DISCARD_ACTION_ZERO; |
7c673cae | 361 | } |
7c673cae FG |
362 | } |
363 | ||
b32b8144 FG |
364 | const char* get_op_type() const override { |
365 | switch (m_discard_action) { | |
366 | case DISCARD_ACTION_REMOVE: | |
367 | return "remove"; | |
368 | case DISCARD_ACTION_REMOVE_TRUNCATE: | |
369 | return "remove (truncate)"; | |
370 | case DISCARD_ACTION_TRUNCATE: | |
371 | return "truncate"; | |
372 | case DISCARD_ACTION_ZERO: | |
373 | return "zero"; | |
7c673cae | 374 | } |
b32b8144 FG |
375 | assert(false); |
376 | return nullptr; | |
7c673cae FG |
377 | } |
378 | ||
b32b8144 FG |
379 | uint8_t get_pre_write_object_map_state() const override { |
380 | if (m_discard_action == DISCARD_ACTION_REMOVE) { | |
381 | return OBJECT_PENDING; | |
7c673cae | 382 | } |
b32b8144 | 383 | return OBJECT_EXISTS; |
7c673cae FG |
384 | } |
385 | ||
386 | protected: | |
b32b8144 FG |
387 | bool is_no_op_for_nonexistent_object() const override { |
388 | return (!this->has_parent()); | |
7c673cae | 389 | } |
b32b8144 FG |
390 | bool is_object_map_update_enabled() const override { |
391 | return m_update_object_map; | |
7c673cae | 392 | } |
b32b8144 FG |
393 | bool is_non_existent_post_write_object_map_state() const override { |
394 | return (m_discard_action == DISCARD_ACTION_REMOVE); | |
7c673cae FG |
395 | } |
396 | ||
b32b8144 FG |
397 | void add_write_hint(librados::ObjectWriteOperation *wr) override { |
398 | // no hint for discard | |
7c673cae | 399 | } |
7c673cae | 400 | |
b32b8144 FG |
401 | void add_write_ops(librados::ObjectWriteOperation *wr) override { |
402 | switch (m_discard_action) { | |
403 | case DISCARD_ACTION_REMOVE: | |
404 | wr->remove(); | |
405 | break; | |
406 | case DISCARD_ACTION_REMOVE_TRUNCATE: | |
407 | case DISCARD_ACTION_TRUNCATE: | |
408 | wr->truncate(this->m_object_off); | |
409 | break; | |
410 | case DISCARD_ACTION_ZERO: | |
411 | wr->zero(this->m_object_off, this->m_object_len); | |
412 | break; | |
413 | default: | |
414 | assert(false); | |
415 | break; | |
416 | } | |
7c673cae FG |
417 | } |
418 | ||
b32b8144 FG |
419 | private: |
420 | enum DiscardAction { | |
421 | DISCARD_ACTION_REMOVE, | |
422 | DISCARD_ACTION_REMOVE_TRUNCATE, | |
423 | DISCARD_ACTION_TRUNCATE, | |
424 | DISCARD_ACTION_ZERO | |
425 | }; | |
7c673cae | 426 | |
b32b8144 FG |
427 | DiscardAction m_discard_action; |
428 | bool m_update_object_map; | |
31f18b77 | 429 | |
7c673cae FG |
430 | }; |
431 | ||
b32b8144 FG |
432 | template <typename ImageCtxT = ImageCtx> |
433 | class ObjectWriteSameRequest : public AbstractObjectWriteRequest<ImageCtxT> { | |
7c673cae | 434 | public: |
b32b8144 | 435 | ObjectWriteSameRequest(ImageCtxT *ictx, const std::string &oid, |
31f18b77 FG |
436 | uint64_t object_no, uint64_t object_off, |
437 | uint64_t object_len, const ceph::bufferlist &data, | |
438 | const ::SnapContext &snapc, int op_flags, | |
439 | const ZTracer::Trace &parent_trace, | |
440 | Context *completion) | |
b32b8144 FG |
441 | : AbstractObjectWriteRequest<ImageCtxT>(ictx, oid, object_no, object_off, |
442 | object_len, snapc, "writesame", | |
443 | parent_trace, completion), | |
7c673cae FG |
444 | m_write_data(data), m_op_flags(op_flags) { |
445 | } | |
446 | ||
447 | const char *get_op_type() const override { | |
448 | return "writesame"; | |
449 | } | |
450 | ||
7c673cae | 451 | protected: |
b32b8144 | 452 | void add_write_ops(librados::ObjectWriteOperation *wr) override; |
7c673cae FG |
453 | |
454 | private: | |
455 | ceph::bufferlist m_write_data; | |
456 | int m_op_flags; | |
457 | }; | |
458 | ||
b32b8144 FG |
459 | template <typename ImageCtxT = ImageCtx> |
460 | class ObjectCompareAndWriteRequest : public AbstractObjectWriteRequest<ImageCtxT> { | |
c07f9fc5 | 461 | public: |
b32b8144 | 462 | ObjectCompareAndWriteRequest(ImageCtxT *ictx, const std::string &oid, |
c07f9fc5 FG |
463 | uint64_t object_no, uint64_t object_off, |
464 | const ceph::bufferlist &cmp_bl, | |
465 | const ceph::bufferlist &write_bl, | |
466 | const ::SnapContext &snapc, | |
467 | uint64_t *mismatch_offset, int op_flags, | |
468 | const ZTracer::Trace &parent_trace, | |
469 | Context *completion) | |
b32b8144 FG |
470 | : AbstractObjectWriteRequest<ImageCtxT>(ictx, oid, object_no, object_off, |
471 | cmp_bl.length(), snapc, | |
472 | "compare_and_write", parent_trace, | |
473 | completion), | |
c07f9fc5 FG |
474 | m_cmp_bl(cmp_bl), m_write_bl(write_bl), |
475 | m_mismatch_offset(mismatch_offset), m_op_flags(op_flags) { | |
476 | } | |
477 | ||
478 | const char *get_op_type() const override { | |
479 | return "compare_and_write"; | |
480 | } | |
481 | ||
b32b8144 FG |
482 | void add_copyup_ops(librados::ObjectWriteOperation *wr) override { |
483 | // no-op on copyup | |
c07f9fc5 FG |
484 | } |
485 | ||
c07f9fc5 | 486 | protected: |
b32b8144 FG |
487 | virtual bool is_post_copyup_write_required() const { |
488 | return true; | |
489 | } | |
490 | ||
491 | void add_write_ops(librados::ObjectWriteOperation *wr) override; | |
c07f9fc5 | 492 | |
b32b8144 | 493 | int filter_write_result(int r) const override; |
c07f9fc5 FG |
494 | |
495 | private: | |
496 | ceph::bufferlist m_cmp_bl; | |
497 | ceph::bufferlist m_write_bl; | |
498 | uint64_t *m_mismatch_offset; | |
499 | int m_op_flags; | |
500 | }; | |
501 | ||
7c673cae FG |
502 | } // namespace io |
503 | } // namespace librbd | |
504 | ||
505 | extern template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
506 | extern template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
b32b8144 FG |
507 | extern template class librbd::io::AbstractObjectWriteRequest<librbd::ImageCtx>; |
508 | extern template class librbd::io::ObjectWriteRequest<librbd::ImageCtx>; | |
509 | extern template class librbd::io::ObjectDiscardRequest<librbd::ImageCtx>; | |
510 | extern template class librbd::io::ObjectWriteSameRequest<librbd::ImageCtx>; | |
511 | extern template class librbd::io::ObjectCompareAndWriteRequest<librbd::ImageCtx>; | |
7c673cae FG |
512 | |
513 | #endif // CEPH_LIBRBD_IO_OBJECT_REQUEST_H |