]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #ifndef CEPH_LIBRBD_IO_OBJECT_REQUEST_H | |
5 | #define CEPH_LIBRBD_IO_OBJECT_REQUEST_H | |
6 | ||
7 | #include "include/int_types.h" | |
8 | #include "include/buffer.h" | |
9 | #include "include/rados/librados.hpp" | |
10 | #include "common/snap_types.h" | |
11 | #include "common/zipkin_trace.h" | |
12 | #include "librbd/ObjectMap.h" | |
13 | #include <map> | |
14 | ||
15 | class Context; | |
16 | ||
17 | namespace librbd { | |
18 | ||
19 | struct ImageCtx; | |
20 | ||
21 | namespace io { | |
22 | ||
23 | struct AioCompletion; | |
24 | class CopyupRequest; | |
25 | class ObjectRemoveRequest; | |
26 | class ObjectTruncateRequest; | |
27 | class ObjectWriteRequest; | |
28 | class ObjectZeroRequest; | |
29 | ||
30 | struct ObjectRequestHandle { | |
31 | virtual ~ObjectRequestHandle() { | |
32 | } | |
33 | ||
34 | virtual void complete(int r) = 0; | |
35 | virtual void send() = 0; | |
36 | }; | |
37 | ||
38 | /** | |
39 | * This class represents an I/O operation to a single RBD data object. | |
40 | * Its subclasses encapsulate logic for dealing with special cases | |
41 | * for I/O due to layering. | |
42 | */ | |
43 | template <typename ImageCtxT = ImageCtx> | |
44 | class ObjectRequest : public ObjectRequestHandle { | |
45 | public: | |
46 | typedef std::vector<std::pair<uint64_t, uint64_t> > Extents; | |
47 | ||
48 | static ObjectRequest* create_remove(ImageCtxT *ictx, | |
49 | const std::string &oid, | |
50 | uint64_t object_no, | |
51 | const ::SnapContext &snapc, | |
52 | const ZTracer::Trace &parent_trace, | |
53 | Context *completion); | |
54 | static ObjectRequest* create_truncate(ImageCtxT *ictx, | |
55 | const std::string &oid, | |
56 | uint64_t object_no, | |
57 | uint64_t object_off, | |
58 | const ::SnapContext &snapc, | |
59 | const ZTracer::Trace &parent_trace, | |
60 | Context *completion); | |
61 | static ObjectRequest* create_trim(ImageCtxT *ictx, const std::string &oid, | |
62 | uint64_t object_no, | |
63 | const ::SnapContext &snapc, | |
64 | bool post_object_map_update, | |
65 | Context *completion); | |
66 | static ObjectRequest* create_write(ImageCtxT *ictx, const std::string &oid, | |
67 | uint64_t object_no, | |
68 | uint64_t object_off, | |
69 | const ceph::bufferlist &data, | |
70 | const ::SnapContext &snapc, int op_flags, | |
71 | const ZTracer::Trace &parent_trace, | |
72 | Context *completion); | |
73 | static ObjectRequest* create_zero(ImageCtxT *ictx, const std::string &oid, | |
74 | uint64_t object_no, uint64_t object_off, | |
75 | uint64_t object_len, | |
76 | const ::SnapContext &snapc, | |
77 | const ZTracer::Trace &parent_trace, | |
78 | Context *completion); | |
79 | static ObjectRequest* create_writesame(ImageCtxT *ictx, | |
80 | const std::string &oid, | |
81 | uint64_t object_no, | |
82 | uint64_t object_off, | |
83 | uint64_t object_len, | |
84 | const ceph::bufferlist &data, | |
85 | const ::SnapContext &snapc, | |
86 | int op_flags, | |
87 | const ZTracer::Trace &parent_trace, | |
88 | Context *completion); | |
89 | static ObjectRequest* create_compare_and_write(ImageCtxT *ictx, | |
90 | const std::string &oid, | |
91 | uint64_t object_no, | |
92 | uint64_t object_off, | |
93 | const ceph::bufferlist &cmp_data, | |
94 | const ceph::bufferlist &write_data, | |
95 | const ::SnapContext &snapc, | |
96 | uint64_t *mismatch_offset, int op_flags, | |
97 | const ZTracer::Trace &parent_trace, | |
98 | Context *completion); | |
99 | ||
100 | ObjectRequest(ImageCtx *ictx, const std::string &oid, | |
101 | uint64_t objectno, uint64_t off, uint64_t len, | |
102 | librados::snap_t snap_id, bool hide_enoent, | |
103 | const char *trace_name, const ZTracer::Trace &parent_trace, | |
104 | Context *completion); | |
105 | ~ObjectRequest() override { | |
106 | m_trace.event("finish"); | |
107 | } | |
108 | ||
109 | virtual void add_copyup_ops(librados::ObjectWriteOperation *wr, | |
110 | bool set_hints) { | |
111 | }; | |
112 | ||
113 | virtual void complete(int r); | |
114 | ||
115 | virtual bool should_complete(int r) = 0; | |
116 | void send() override = 0; | |
117 | ||
118 | bool has_parent() const { | |
119 | return m_has_parent; | |
120 | } | |
121 | ||
122 | virtual bool is_op_payload_empty() const { | |
123 | return false; | |
124 | } | |
125 | ||
126 | virtual const char *get_op_type() const = 0; | |
127 | virtual bool pre_object_map_update(uint8_t *new_state) = 0; | |
128 | ||
129 | protected: | |
130 | bool compute_parent_extents(); | |
131 | ||
132 | ImageCtx *m_ictx; | |
133 | std::string m_oid; | |
134 | uint64_t m_object_no, m_object_off, m_object_len; | |
135 | librados::snap_t m_snap_id; | |
136 | Context *m_completion; | |
137 | Extents m_parent_extents; | |
138 | bool m_hide_enoent; | |
139 | ZTracer::Trace m_trace; | |
140 | ||
141 | private: | |
142 | bool m_has_parent = false; | |
143 | }; | |
144 | ||
145 | template <typename ImageCtxT = ImageCtx> | |
146 | class ObjectReadRequest : public ObjectRequest<ImageCtxT> { | |
147 | public: | |
148 | typedef std::vector<std::pair<uint64_t, uint64_t> > Extents; | |
149 | typedef std::map<uint64_t, uint64_t> ExtentMap; | |
150 | ||
151 | static ObjectReadRequest* create(ImageCtxT *ictx, const std::string &oid, | |
152 | uint64_t objectno, uint64_t offset, | |
153 | uint64_t len, Extents &buffer_extents, | |
154 | librados::snap_t snap_id, bool sparse, | |
155 | int op_flags, | |
156 | const ZTracer::Trace &parent_trace, | |
157 | Context *completion) { | |
158 | return new ObjectReadRequest(ictx, oid, objectno, offset, len, | |
159 | buffer_extents, snap_id, sparse, op_flags, | |
160 | parent_trace, completion); | |
161 | } | |
162 | ||
163 | ObjectReadRequest(ImageCtxT *ictx, const std::string &oid, | |
164 | uint64_t objectno, uint64_t offset, uint64_t len, | |
165 | Extents& buffer_extents, librados::snap_t snap_id, | |
166 | bool sparse, int op_flags, | |
167 | const ZTracer::Trace &parent_trace, Context *completion); | |
168 | ||
169 | bool should_complete(int r) override; | |
170 | void send() override; | |
171 | void guard_read(); | |
172 | ||
173 | inline uint64_t get_offset() const { | |
174 | return this->m_object_off; | |
175 | } | |
176 | inline uint64_t get_length() const { | |
177 | return this->m_object_len; | |
178 | } | |
179 | ceph::bufferlist &data() { | |
180 | return m_read_data; | |
181 | } | |
182 | const Extents &get_buffer_extents() const { | |
183 | return m_buffer_extents; | |
184 | } | |
185 | ExtentMap &get_extent_map() { | |
186 | return m_ext_map; | |
187 | } | |
188 | ||
189 | const char *get_op_type() const override { | |
190 | return "read"; | |
191 | } | |
192 | ||
193 | bool pre_object_map_update(uint8_t *new_state) override { | |
194 | return false; | |
195 | } | |
196 | ||
197 | private: | |
198 | Extents m_buffer_extents; | |
199 | bool m_tried_parent; | |
200 | bool m_sparse; | |
201 | int m_op_flags; | |
202 | ceph::bufferlist m_read_data; | |
203 | ExtentMap m_ext_map; | |
204 | ||
205 | /** | |
206 | * Reads go through the following state machine to deal with | |
207 | * layering: | |
208 | * | |
209 | * need copyup | |
210 | * LIBRBD_AIO_READ_GUARD ---------------> LIBRBD_AIO_READ_COPYUP | |
211 | * | | | |
212 | * v | | |
213 | * done <------------------------------------/ | |
214 | * ^ | |
215 | * | | |
216 | * LIBRBD_AIO_READ_FLAT | |
217 | * | |
218 | * Reads start in LIBRBD_AIO_READ_GUARD or _FLAT, depending on | |
219 | * whether there is a parent or not. | |
220 | */ | |
221 | enum read_state_d { | |
222 | LIBRBD_AIO_READ_GUARD, | |
223 | LIBRBD_AIO_READ_COPYUP, | |
224 | LIBRBD_AIO_READ_FLAT | |
225 | }; | |
226 | ||
227 | read_state_d m_state; | |
228 | ||
229 | void send_copyup(); | |
230 | ||
231 | void read_from_parent(Extents&& image_extents); | |
232 | }; | |
233 | ||
234 | class AbstractObjectWriteRequest : public ObjectRequest<> { | |
235 | public: | |
236 | AbstractObjectWriteRequest(ImageCtx *ictx, const std::string &oid, | |
237 | uint64_t object_no, uint64_t object_off, | |
238 | uint64_t len, const ::SnapContext &snapc, | |
239 | bool hide_enoent, const char *trace_name, | |
240 | const ZTracer::Trace &parent_trace, | |
241 | Context *completion); | |
242 | ||
243 | void add_copyup_ops(librados::ObjectWriteOperation *wr, | |
244 | bool set_hints) override | |
245 | { | |
246 | add_write_ops(wr, set_hints); | |
247 | } | |
248 | ||
249 | bool should_complete(int r) override; | |
250 | void send() override; | |
251 | ||
252 | /** | |
253 | * Writes go through the following state machine to deal with | |
254 | * layering and the object map: | |
255 | * | |
256 | * <start> | |
257 | * | | |
258 | * |\ | |
259 | * | \ -or- | |
260 | * | ---------------------------------> LIBRBD_AIO_WRITE_PRE | |
261 | * | . | | |
262 | * | . | | |
263 | * | . v | |
264 | * | . . . . > LIBRBD_AIO_WRITE_FLAT. . . | |
265 | * | | . | |
266 | * | | . | |
267 | * | | . | |
268 | * v need copyup (copyup performs pre) | . | |
269 | * LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | . | |
270 | * . | | . | . | |
271 | * . | | . | . | |
272 | * . | /-----/ . | . | |
273 | * . | | . | . | |
274 | * . \-------------------\ | /-------------------/ . | |
275 | * . | | | . . | |
276 | * . v v v . . | |
277 | * . LIBRBD_AIO_WRITE_POST . . | |
278 | * . | . . | |
279 | * . | . . . . . . . . . | |
280 | * . | . . | |
281 | * . v v . | |
282 | * . . . . . . . . . . . . . . > <finish> < . . . . . . . . . . . . . . | |
283 | * | |
284 | * The _PRE/_POST states are skipped if the object map is disabled. | |
285 | * The write starts in _WRITE_GUARD or _FLAT depending on whether or not | |
286 | * there is a parent overlap. | |
287 | */ | |
288 | protected: | |
289 | enum write_state_d { | |
290 | LIBRBD_AIO_WRITE_GUARD, | |
291 | LIBRBD_AIO_WRITE_COPYUP, | |
292 | LIBRBD_AIO_WRITE_FLAT, | |
293 | LIBRBD_AIO_WRITE_PRE, | |
294 | LIBRBD_AIO_WRITE_POST, | |
295 | LIBRBD_AIO_WRITE_ERROR | |
296 | }; | |
297 | ||
298 | write_state_d m_state; | |
299 | librados::ObjectWriteOperation m_write; | |
300 | uint64_t m_snap_seq; | |
301 | std::vector<librados::snap_t> m_snaps; | |
302 | bool m_object_exist; | |
303 | bool m_guard = true; | |
304 | ||
305 | virtual void add_write_ops(librados::ObjectWriteOperation *wr, | |
306 | bool set_hints) = 0; | |
307 | virtual void guard_write(); | |
308 | virtual bool post_object_map_update() { | |
309 | return false; | |
310 | } | |
311 | virtual void send_write(); | |
312 | virtual void send_write_op(); | |
313 | virtual void handle_write_guard(); | |
314 | ||
315 | void send_pre_object_map_update(); | |
316 | ||
317 | private: | |
318 | bool send_post_object_map_update(); | |
319 | void send_copyup(); | |
320 | }; | |
321 | ||
322 | class ObjectWriteRequest : public AbstractObjectWriteRequest { | |
323 | public: | |
324 | ObjectWriteRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, | |
325 | uint64_t object_off, const ceph::bufferlist &data, | |
326 | const ::SnapContext &snapc, int op_flags, | |
327 | const ZTracer::Trace &parent_trace, Context *completion) | |
328 | : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, | |
329 | data.length(), snapc, false, "write", | |
330 | parent_trace, completion), | |
331 | m_write_data(data), m_op_flags(op_flags) { | |
332 | } | |
333 | ||
334 | bool is_op_payload_empty() const override { | |
335 | return (m_write_data.length() == 0); | |
336 | } | |
337 | ||
338 | const char *get_op_type() const override { | |
339 | return "write"; | |
340 | } | |
341 | ||
342 | bool pre_object_map_update(uint8_t *new_state) override { | |
343 | *new_state = OBJECT_EXISTS; | |
344 | return true; | |
345 | } | |
346 | ||
347 | protected: | |
348 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
349 | bool set_hints) override; | |
350 | ||
351 | void send_write() override; | |
352 | ||
353 | private: | |
354 | ceph::bufferlist m_write_data; | |
355 | int m_op_flags; | |
356 | }; | |
357 | ||
358 | class ObjectRemoveRequest : public AbstractObjectWriteRequest { | |
359 | public: | |
360 | ObjectRemoveRequest(ImageCtx *ictx, const std::string &oid, | |
361 | uint64_t object_no, const ::SnapContext &snapc, | |
362 | const ZTracer::Trace &parent_trace, Context *completion) | |
363 | : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true, | |
364 | "remote", parent_trace, completion), | |
365 | m_object_state(OBJECT_NONEXISTENT) { | |
366 | } | |
367 | ||
368 | const char* get_op_type() const override { | |
369 | if (has_parent()) { | |
370 | return "remove (trunc)"; | |
371 | } | |
372 | return "remove"; | |
373 | } | |
374 | ||
375 | bool pre_object_map_update(uint8_t *new_state) override { | |
376 | if (has_parent()) { | |
377 | m_object_state = OBJECT_EXISTS; | |
378 | } else { | |
379 | m_object_state = OBJECT_PENDING; | |
380 | } | |
381 | *new_state = m_object_state; | |
382 | return true; | |
383 | } | |
384 | ||
385 | bool post_object_map_update() override { | |
386 | if (m_object_state == OBJECT_EXISTS) { | |
387 | return false; | |
388 | } | |
389 | return true; | |
390 | } | |
391 | ||
392 | void guard_write() override; | |
393 | void send_write() override; | |
394 | ||
395 | protected: | |
396 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
397 | bool set_hints) override { | |
398 | if (has_parent()) { | |
399 | wr->truncate(0); | |
400 | } else { | |
401 | wr->remove(); | |
402 | } | |
403 | } | |
404 | ||
405 | private: | |
406 | uint8_t m_object_state; | |
407 | }; | |
408 | ||
409 | class ObjectTrimRequest : public AbstractObjectWriteRequest { | |
410 | public: | |
411 | // we'd need to only conditionally specify if a post object map | |
412 | // update is needed. pre update is decided as usual (by checking | |
413 | // the state of the object in the map). | |
414 | ObjectTrimRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, | |
415 | const ::SnapContext &snapc, bool post_object_map_update, | |
416 | Context *completion) | |
417 | : AbstractObjectWriteRequest(ictx, oid, object_no, 0, 0, snapc, true, | |
418 | "trim", {}, completion), | |
419 | m_post_object_map_update(post_object_map_update) { | |
420 | } | |
421 | ||
422 | const char* get_op_type() const override { | |
423 | return "remove (trim)"; | |
424 | } | |
425 | ||
426 | bool pre_object_map_update(uint8_t *new_state) override { | |
427 | *new_state = OBJECT_PENDING; | |
428 | return true; | |
429 | } | |
430 | ||
431 | bool post_object_map_update() override { | |
432 | return m_post_object_map_update; | |
433 | } | |
434 | ||
435 | protected: | |
436 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
437 | bool set_hints) override { | |
438 | wr->remove(); | |
439 | } | |
440 | ||
441 | private: | |
442 | bool m_post_object_map_update; | |
443 | }; | |
444 | ||
445 | class ObjectTruncateRequest : public AbstractObjectWriteRequest { | |
446 | public: | |
447 | ObjectTruncateRequest(ImageCtx *ictx, const std::string &oid, | |
448 | uint64_t object_no, uint64_t object_off, | |
449 | const ::SnapContext &snapc, | |
450 | const ZTracer::Trace &parent_trace, Context *completion) | |
451 | : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, 0, snapc, | |
452 | true, "truncate", parent_trace, completion) { | |
453 | } | |
454 | ||
455 | const char* get_op_type() const override { | |
456 | return "truncate"; | |
457 | } | |
458 | ||
459 | bool pre_object_map_update(uint8_t *new_state) override { | |
460 | if (!m_object_exist && !has_parent()) | |
461 | *new_state = OBJECT_NONEXISTENT; | |
462 | else | |
463 | *new_state = OBJECT_EXISTS; | |
464 | return true; | |
465 | } | |
466 | ||
467 | void send_write() override; | |
468 | ||
469 | protected: | |
470 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
471 | bool set_hints) override { | |
472 | wr->truncate(m_object_off); | |
473 | } | |
474 | }; | |
475 | ||
476 | class ObjectZeroRequest : public AbstractObjectWriteRequest { | |
477 | public: | |
478 | ObjectZeroRequest(ImageCtx *ictx, const std::string &oid, uint64_t object_no, | |
479 | uint64_t object_off, uint64_t object_len, | |
480 | const ::SnapContext &snapc, | |
481 | const ZTracer::Trace &parent_trace, Context *completion) | |
482 | : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, object_len, | |
483 | snapc, true, "zero", parent_trace, | |
484 | completion) { | |
485 | } | |
486 | ||
487 | const char* get_op_type() const override { | |
488 | return "zero"; | |
489 | } | |
490 | ||
491 | bool pre_object_map_update(uint8_t *new_state) override { | |
492 | *new_state = OBJECT_EXISTS; | |
493 | return true; | |
494 | } | |
495 | ||
496 | void send_write() override; | |
497 | ||
498 | protected: | |
499 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
500 | bool set_hints) override { | |
501 | wr->zero(m_object_off, m_object_len); | |
502 | } | |
503 | }; | |
504 | ||
505 | class ObjectWriteSameRequest : public AbstractObjectWriteRequest { | |
506 | public: | |
507 | ObjectWriteSameRequest(ImageCtx *ictx, const std::string &oid, | |
508 | uint64_t object_no, uint64_t object_off, | |
509 | uint64_t object_len, const ceph::bufferlist &data, | |
510 | const ::SnapContext &snapc, int op_flags, | |
511 | const ZTracer::Trace &parent_trace, | |
512 | Context *completion) | |
513 | : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, | |
514 | object_len, snapc, false, "writesame", | |
515 | parent_trace, completion), | |
516 | m_write_data(data), m_op_flags(op_flags) { | |
517 | } | |
518 | ||
519 | const char *get_op_type() const override { | |
520 | return "writesame"; | |
521 | } | |
522 | ||
523 | bool pre_object_map_update(uint8_t *new_state) override { | |
524 | *new_state = OBJECT_EXISTS; | |
525 | return true; | |
526 | } | |
527 | ||
528 | protected: | |
529 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
530 | bool set_hints) override; | |
531 | ||
532 | void send_write() override; | |
533 | ||
534 | private: | |
535 | ceph::bufferlist m_write_data; | |
536 | int m_op_flags; | |
537 | }; | |
538 | ||
539 | class ObjectCompareAndWriteRequest : public AbstractObjectWriteRequest { | |
540 | public: | |
541 | typedef std::vector<std::pair<uint64_t, uint64_t> > Extents; | |
542 | ||
543 | ObjectCompareAndWriteRequest(ImageCtx *ictx, const std::string &oid, | |
544 | uint64_t object_no, uint64_t object_off, | |
545 | const ceph::bufferlist &cmp_bl, | |
546 | const ceph::bufferlist &write_bl, | |
547 | const ::SnapContext &snapc, | |
548 | uint64_t *mismatch_offset, int op_flags, | |
549 | const ZTracer::Trace &parent_trace, | |
550 | Context *completion) | |
551 | : AbstractObjectWriteRequest(ictx, oid, object_no, object_off, | |
552 | cmp_bl.length(), snapc, false, "compare_and_write", | |
553 | parent_trace, completion), | |
554 | m_cmp_bl(cmp_bl), m_write_bl(write_bl), | |
555 | m_mismatch_offset(mismatch_offset), m_op_flags(op_flags) { | |
556 | } | |
557 | ||
558 | const char *get_op_type() const override { | |
559 | return "compare_and_write"; | |
560 | } | |
561 | ||
562 | bool pre_object_map_update(uint8_t *new_state) override { | |
563 | *new_state = OBJECT_EXISTS; | |
564 | return true; | |
565 | } | |
566 | ||
567 | void complete(int r) override; | |
568 | protected: | |
569 | void add_write_ops(librados::ObjectWriteOperation *wr, | |
570 | bool set_hints) override; | |
571 | ||
572 | void send_write() override; | |
573 | ||
574 | private: | |
575 | ceph::bufferlist m_cmp_bl; | |
576 | ceph::bufferlist m_write_bl; | |
577 | uint64_t *m_mismatch_offset; | |
578 | int m_op_flags; | |
579 | }; | |
580 | ||
581 | } // namespace io | |
582 | } // namespace librbd | |
583 | ||
584 | extern template class librbd::io::ObjectRequest<librbd::ImageCtx>; | |
585 | extern template class librbd::io::ObjectReadRequest<librbd::ImageCtx>; | |
586 | ||
587 | #endif // CEPH_LIBRBD_IO_OBJECT_REQUEST_H |