]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/migration/QCOWFormat.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / librbd / migration / QCOWFormat.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/migration/QCOWFormat.h"
5 #include "common/Clock.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "include/intarith.h"
9 #include "librbd/AsioEngine.h"
10 #include "librbd/ImageCtx.h"
11 #include "librbd/ImageState.h"
12 #include "librbd/Utils.h"
13 #include "librbd/io/AioCompletion.h"
14 #include "librbd/io/ReadResult.h"
15 #include "librbd/migration/SnapshotInterface.h"
16 #include "librbd/migration/SourceSpecBuilder.h"
17 #include "librbd/migration/StreamInterface.h"
18 #include "librbd/migration/Utils.h"
19 #include <boost/asio/dispatch.hpp>
20 #include <boost/asio/post.hpp>
21 #include <boost/endian/conversion.hpp>
22 #include <deque>
23 #include <tuple>
24 #include <unordered_map>
25 #include <vector>
26
27 #define dout_subsys ceph_subsys_rbd
28
29 namespace librbd {
30 namespace migration {
31
32 #undef dout_prefix
33 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
34 << __func__ << ": "
35
36 using boost::endian::big_to_native;
37
38 namespace qcow_format {
39
40 struct ClusterExtent {
41 uint64_t cluster_offset;
42 uint64_t cluster_length;
43 uint64_t intra_cluster_offset;
44 uint64_t image_offset;
45 uint64_t buffer_offset;
46
47 ClusterExtent(uint64_t cluster_offset, uint64_t cluster_length,
48 uint64_t intra_cluster_offset, uint64_t image_offset,
49 uint64_t buffer_offset)
50 : cluster_offset(cluster_offset), cluster_length(cluster_length),
51 intra_cluster_offset(intra_cluster_offset), image_offset(image_offset),
52 buffer_offset(buffer_offset) {
53 }
54 };
55
56 typedef std::vector<ClusterExtent> ClusterExtents;
57
58 void LookupTable::init() {
59 if (cluster_offsets == nullptr) {
60 cluster_offsets = reinterpret_cast<uint64_t*>(bl.c_str());
61 }
62 }
63
64 void LookupTable::decode() {
65 init();
66
67 // L2 tables are selectively byte-swapped on demand if only requesting a
68 // single cluster offset
69 if (decoded) {
70 return;
71 }
72
73 // translate the lookup table (big-endian -> CPU endianess)
74 for (auto idx = 0UL; idx < size; ++idx) {
75 cluster_offsets[idx] = big_to_native(cluster_offsets[idx]);
76 }
77
78 decoded = true;
79 }
80
81 void populate_cluster_extents(CephContext* cct, uint64_t cluster_size,
82 const io::Extents& image_extents,
83 ClusterExtents* cluster_extents) {
84 uint64_t buffer_offset = 0;
85 for (auto [image_offset, image_length] : image_extents) {
86 while (image_length > 0) {
87 auto intra_cluster_offset = image_offset & (cluster_size - 1);
88 auto intra_cluster_length = cluster_size - intra_cluster_offset;
89 auto cluster_length = std::min(image_length, intra_cluster_length);
90
91 ldout(cct, 20) << "image_offset=" << image_offset << ", "
92 << "image_length=" << image_length << ", "
93 << "cluster_length=" << cluster_length << dendl;
94
95
96 cluster_extents->emplace_back(0, cluster_length, intra_cluster_offset,
97 image_offset, buffer_offset);
98
99 image_offset += cluster_length;
100 image_length -= cluster_length;
101 buffer_offset += cluster_length;
102 }
103 }
104 }
105
106 } // namespace qcow_format
107
108 using namespace qcow_format;
109
110 template <typename I>
111 struct QCOWFormat<I>::Cluster {
112 const uint64_t cluster_offset;
113 bufferlist cluster_data_bl;
114
115 Cluster(uint64_t cluster_offset) : cluster_offset(cluster_offset) {
116 }
117 };
118
119 #undef dout_prefix
120 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
121 << this << " " << __func__ << ": "
122
123 template <typename I>
124 class QCOWFormat<I>::ClusterCache {
125 public:
126 ClusterCache(QCOWFormat* qcow_format)
127 : qcow_format(qcow_format),
128 m_strand(*qcow_format->m_image_ctx->asio_engine) {
129 }
130
131 void get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
132 uint64_t intra_cluster_offset, bufferlist* bl,
133 Context* on_finish) {
134 auto cct = qcow_format->m_image_ctx->cct;
135 ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
136
137 // cache state machine runs in a single strand thread
138 boost::asio::dispatch(
139 m_strand,
140 [this, cluster_offset, cluster_length, intra_cluster_offset, bl,
141 on_finish]() {
142 execute_get_cluster(cluster_offset, cluster_length,
143 intra_cluster_offset, bl, on_finish);
144 });
145 }
146
147 private:
148 typedef std::tuple<uint64_t, uint64_t, bufferlist*, Context*> Completion;
149 typedef std::list<Completion> Completions;
150
151 QCOWFormat* qcow_format;
152 boost::asio::io_context::strand m_strand;
153
154 std::shared_ptr<Cluster> cluster;
155 std::unordered_map<uint64_t, Completions> cluster_completions;
156
157 void execute_get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
158 uint64_t intra_cluster_offset, bufferlist* bl,
159 Context* on_finish) {
160 auto cct = qcow_format->m_image_ctx->cct;
161 ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
162
163 if (cluster && cluster->cluster_offset == cluster_offset) {
164 // most-recent cluster matches
165 bl->substr_of(cluster->cluster_data_bl, intra_cluster_offset,
166 cluster_length);
167 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
168 [on_finish]() { on_finish->complete(0); });
169 return;
170 }
171
172 // record callback for cluster
173 bool new_request = (cluster_completions.count(cluster_offset) == 0);
174 cluster_completions[cluster_offset].emplace_back(
175 intra_cluster_offset, cluster_length, bl, on_finish);
176 if (new_request) {
177 // start the new read request
178 read_cluster(std::make_shared<Cluster>(cluster_offset));
179 }
180 }
181
182 void read_cluster(std::shared_ptr<Cluster> cluster) {
183 auto cct = qcow_format->m_image_ctx->cct;
184
185 uint64_t stream_offset = cluster->cluster_offset;
186 uint64_t stream_length = qcow_format->m_cluster_size;
187 if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
188 // compressed clusters encode the compressed length in the lower bits
189 stream_offset = cluster->cluster_offset &
190 qcow_format->m_cluster_offset_mask;
191 stream_length = (cluster->cluster_offset >>
192 (63 - qcow_format->m_cluster_bits)) &
193 (qcow_format->m_cluster_size - 1);
194 }
195
196 ldout(cct, 20) << "cluster_offset=" << cluster->cluster_offset << ", "
197 << "stream_offset=" << stream_offset << ", "
198 << "stream_length=" << stream_length << dendl;
199
200 // read the cluster into the cache entry
201 auto ctx = new LambdaContext([this, cluster](int r) {
202 boost::asio::post(m_strand, [this, cluster, r]() {
203 handle_read_cluster(r, cluster); }); });
204 qcow_format->m_stream->read({{stream_offset, stream_length}},
205 &cluster->cluster_data_bl, ctx);
206 }
207
208 void handle_read_cluster(int r, std::shared_ptr<Cluster> cluster) {
209 auto cct = qcow_format->m_image_ctx->cct;
210 ldout(cct, 20) << "r=" << r << ", "
211 << "cluster_offset=" << cluster->cluster_offset << dendl;
212
213 auto completions = std::move(cluster_completions[cluster->cluster_offset]);
214 cluster_completions.erase(cluster->cluster_offset);
215
216 if (r < 0) {
217 lderr(cct) << "failed to read cluster offset " << cluster->cluster_offset
218 << ": " << cpp_strerror(r) << dendl;
219 } else {
220 if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
221 bufferlist compressed_bl{std::move(cluster->cluster_data_bl)};
222 cluster->cluster_data_bl.clear();
223
224 // TODO
225 lderr(cct) << "support for compressed clusters is not available"
226 << dendl;
227 r = -EINVAL;
228 } else {
229 // cache the MRU cluster in case of sequential IO
230 this->cluster = cluster;
231 }
232 }
233
234 // complete the IO back to caller
235 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
236 [r, cluster, completions=std::move(completions)]() {
237 for (auto completion : completions) {
238 if (r >= 0) {
239 std::get<2>(completion)->substr_of(
240 cluster->cluster_data_bl,
241 std::get<0>(completion),
242 std::get<1>(completion));
243 }
244 std::get<3>(completion)->complete(r);
245 }
246 });
247 }
248 };
249
250 #undef dout_prefix
251 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
252 << this << " " << __func__ << ": "
253
254 template <typename I>
255 class QCOWFormat<I>::L2TableCache {
256 public:
257 L2TableCache(QCOWFormat* qcow_format)
258 : qcow_format(qcow_format),
259 m_strand(*qcow_format->m_image_ctx->asio_engine),
260 l2_cache_entries(QCOW_L2_CACHE_SIZE) {
261 }
262
263 void get_l2_table(const LookupTable* l1_table, uint64_t l2_table_offset,
264 std::shared_ptr<const LookupTable>* l2_table,
265 Context* on_finish) {
266 auto cct = qcow_format->m_image_ctx->cct;
267 ldout(cct, 20) << "l2_table_offset=" << l2_table_offset << dendl;
268
269 // cache state machine runs in a single strand thread
270 Request request{l1_table, l2_table_offset, l2_table, on_finish};
271 boost::asio::dispatch(
272 m_strand, [this, request=std::move(request)]() {
273 requests.push_back(std::move(request));
274 });
275 dispatch_request();
276 }
277
278 void get_cluster_offset(const LookupTable* l1_table,
279 uint64_t image_offset, uint64_t* cluster_offset,
280 Context* on_finish) {
281 auto cct = qcow_format->m_image_ctx->cct;
282 uint32_t l1_table_index = image_offset >> qcow_format->m_l1_shift;
283 uint64_t l2_table_offset = l1_table->cluster_offsets[std::min<uint32_t>(
284 l1_table_index, l1_table->size - 1)] &
285 qcow_format->m_cluster_mask;
286 uint32_t l2_table_index = (image_offset >> qcow_format->m_cluster_bits) &
287 (qcow_format->m_l2_size - 1);
288 ldout(cct, 20) << "image_offset=" << image_offset << ", "
289 << "l1_table_index=" << l1_table_index << ", "
290 << "l2_table_offset=" << l2_table_offset << ", "
291 << "l2_table_index=" << l2_table_index << dendl;
292
293 if (l1_table_index >= l1_table->size) {
294 lderr(cct) << "L1 index " << l1_table_index << " out-of-bounds" << dendl;
295 on_finish->complete(-ERANGE);
296 return;
297 } else if (l2_table_offset == 0) {
298 // L2 table has not been allocated for specified offset
299 ldout(cct, 20) << "image_offset=" << image_offset << ", "
300 << "cluster_offset=DNE" << dendl;
301 *cluster_offset = 0;
302 on_finish->complete(-ENOENT);
303 return;
304 }
305
306 // cache state machine runs in a single strand thread
307 Request request{l1_table, l2_table_offset, l2_table_index, cluster_offset,
308 on_finish};
309 boost::asio::dispatch(
310 m_strand, [this, request=std::move(request)]() {
311 requests.push_back(std::move(request));
312 });
313 dispatch_request();
314 }
315
316 private:
317 QCOWFormat* qcow_format;
318
319 boost::asio::io_context::strand m_strand;
320
321 struct Request {
322 const LookupTable* l1_table;
323
324 uint64_t l2_table_offset;
325
326 // get_cluster_offset request
327 uint32_t l2_table_index;
328 uint64_t* cluster_offset = nullptr;
329
330 // get_l2_table request
331 std::shared_ptr<const LookupTable>* l2_table;
332
333 Context* on_finish;
334
335 Request(const LookupTable* l1_table, uint64_t l2_table_offset,
336 uint32_t l2_table_index, uint64_t* cluster_offset,
337 Context* on_finish)
338 : l1_table(l1_table), l2_table_offset(l2_table_offset),
339 l2_table_index(l2_table_index), cluster_offset(cluster_offset),
340 on_finish(on_finish) {
341 }
342 Request(const LookupTable* l1_table, uint64_t l2_table_offset,
343 std::shared_ptr<const LookupTable>* l2_table, Context* on_finish)
344 : l1_table(l1_table), l2_table_offset(l2_table_offset),
345 l2_table(l2_table), on_finish(on_finish) {
346 }
347 };
348
349 typedef std::deque<Request> Requests;
350
351 struct L2Cache {
352 uint64_t l2_offset = 0;
353 std::shared_ptr<LookupTable> l2_table;
354
355 utime_t timestamp;
356 uint32_t count = 0;
357 bool in_flight = false;
358
359 int ret_val = 0;
360 };
361 std::vector<L2Cache> l2_cache_entries;
362
363 Requests requests;
364
365 void dispatch_request() {
366 boost::asio::dispatch(m_strand, [this]() { execute_request(); });
367 }
368
369 void execute_request() {
370 auto cct = qcow_format->m_image_ctx->cct;
371 if (requests.empty()) {
372 return;
373 }
374
375 auto request = requests.front();
376 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << dendl;
377
378 std::shared_ptr<LookupTable> l2_table;
379 int r = l2_table_lookup(request.l2_table_offset, &l2_table);
380 if (r < 0) {
381 lderr(cct) << "failed to load L2 table: l2_table_offset="
382 << request.l2_table_offset << ": "
383 << cpp_strerror(r) << dendl;
384 } else if (l2_table == nullptr) {
385 // table not in cache -- will restart once its loaded
386 return;
387 } else if (request.cluster_offset != nullptr) {
388 auto cluster_offset = l2_table->cluster_offsets[request.l2_table_index];
389 if (!l2_table->decoded) {
390 // table hasn't been byte-swapped
391 cluster_offset = big_to_native(cluster_offset);
392 }
393
394 *request.cluster_offset = cluster_offset & qcow_format->m_cluster_mask;
395 if (*request.cluster_offset == QCOW_OFLAG_ZERO) {
396 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
397 << "l2_table_index=" << request.l2_table_index << ", "
398 << "cluster_offset=zeroed" << dendl;
399 } else {
400 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
401 << "l2_table_index=" << request.l2_table_index << ", "
402 << "cluster_offset=" << *request.cluster_offset
403 << dendl;
404 }
405 } else if (request.l2_table != nullptr) {
406 // ensure it's in the correct byte-order
407 l2_table->decode();
408 *request.l2_table = l2_table;
409 } else {
410 ceph_assert(false);
411 }
412
413 // complete the L2 cache request
414 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
415 [r, ctx=request.on_finish]() { ctx->complete(r); });
416 requests.pop_front();
417
418 // process next request (if any)
419 dispatch_request();
420 }
421
422 int l2_table_lookup(uint64_t l2_offset,
423 std::shared_ptr<LookupTable>* l2_table) {
424 auto cct = qcow_format->m_image_ctx->cct;
425
426 l2_table->reset();
427
428 // find a match in the existing cache
429 for (auto idx = 0U; idx < l2_cache_entries.size(); ++idx) {
430 auto& l2_cache = l2_cache_entries[idx];
431 if (l2_cache.l2_offset == l2_offset) {
432 if (l2_cache.in_flight) {
433 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
434 << "index=" << idx << " (in-flight)" << dendl;
435 return 0;
436 }
437
438 if (l2_cache.ret_val < 0) {
439 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
440 << "index=" << idx << " (error): "
441 << cpp_strerror(l2_cache.ret_val) << dendl;
442 int r = l2_cache.ret_val;
443 l2_cache = L2Cache{};
444
445 return r;
446 }
447
448 ++l2_cache.count;
449 if (l2_cache.count == std::numeric_limits<uint32_t>::max()) {
450 for (auto& entry : l2_cache_entries) {
451 entry.count >>= 1;
452 }
453 }
454
455 ldout(cct, 20) << "l2_offset=" << l2_offset << ", " << "index=" << idx
456 << dendl;
457 *l2_table = l2_cache.l2_table;
458 return 0;
459 }
460 }
461
462 // find the least used entry
463 int32_t min_idx = -1;
464 uint32_t min_count = std::numeric_limits<uint32_t>::max();
465 utime_t min_timestamp;
466 for (uint32_t idx = 0U; idx < l2_cache_entries.size(); ++idx) {
467 auto& l2_cache = l2_cache_entries[idx];
468 if (l2_cache.in_flight) {
469 continue;
470 }
471
472 if (l2_cache.count > 0) {
473 --l2_cache.count;
474 }
475
476 if (l2_cache.count <= min_count) {
477 if (min_idx == -1 || l2_cache.timestamp < min_timestamp) {
478 min_timestamp = l2_cache.timestamp;
479 min_count = l2_cache.count;
480 min_idx = idx;
481 }
482 }
483 }
484
485 if (min_idx == -1) {
486 // no space in the cache due to in-flight requests
487 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
488 << "index=DNE (cache busy)" << dendl;
489 return 0;
490 }
491
492 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
493 << "index=" << min_idx << " (loading)" << dendl;
494 auto& l2_cache = l2_cache_entries[min_idx];
495 l2_cache.l2_table = std::make_shared<LookupTable>(qcow_format->m_l2_size);
496 l2_cache.l2_offset = l2_offset;
497 l2_cache.timestamp = ceph_clock_now();
498 l2_cache.count = 1;
499 l2_cache.in_flight = true;
500
501 // read the L2 table into the L2 cache entry
502 auto ctx = new LambdaContext([this, index=min_idx, l2_offset](int r) {
503 boost::asio::post(m_strand, [this, index, l2_offset, r]() {
504 handle_l2_table_lookup(r, index, l2_offset); }); });
505 qcow_format->m_stream->read(
506 {{l2_offset, qcow_format->m_l2_size * sizeof(uint64_t)}},
507 &l2_cache.l2_table->bl, ctx);
508 return 0;
509 }
510
511 void handle_l2_table_lookup(int r, uint32_t index, uint64_t l2_offset) {
512 auto cct = qcow_format->m_image_ctx->cct;
513 ldout(cct, 20) << "r=" << r << ", "
514 << "l2_offset=" << l2_offset << ", "
515 << "index=" << index << dendl;
516
517 auto& l2_cache = l2_cache_entries[index];
518 ceph_assert(l2_cache.in_flight);
519 l2_cache.in_flight = false;
520
521 if (r < 0) {
522 lderr(cct) << "failed to load L2 table: "
523 << "l2_offset=" << l2_cache.l2_offset << ": "
524 << cpp_strerror(r) << dendl;
525 l2_cache.ret_val = r;
526 } else {
527 // keep the L2 table in big-endian byte-order until the full table
528 // is requested
529 l2_cache.l2_table->init();
530 }
531
532 // restart the state machine
533 dispatch_request();
534 }
535
536 };
537
538 #undef dout_prefix
539 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
540 << this << " " << __func__ << ": "
541
542 template <typename I>
543 class QCOWFormat<I>::ReadRequest {
544 public:
545 ReadRequest(QCOWFormat* qcow_format, io::AioCompletion* aio_comp,
546 const LookupTable* l1_table, io::Extents&& image_extents)
547 : qcow_format(qcow_format), aio_comp(aio_comp), l1_table(l1_table),
548 image_extents(std::move(image_extents)) {
549 }
550
551 void send() {
552 get_cluster_offsets();
553 }
554
555 private:
556 QCOWFormat* qcow_format;
557 io::AioCompletion* aio_comp;
558
559 const LookupTable* l1_table;
560 io::Extents image_extents;
561
562 size_t image_extents_idx = 0;
563 uint32_t image_extent_offset = 0;
564
565 ClusterExtents cluster_extents;
566
567 void get_cluster_offsets() {
568 auto cct = qcow_format->m_image_ctx->cct;
569 populate_cluster_extents(cct, qcow_format->m_cluster_size, image_extents,
570 &cluster_extents);
571
572 ldout(cct, 20) << dendl;
573 auto ctx = new LambdaContext([this](int r) {
574 handle_get_cluster_offsets(r); });
575 auto gather_ctx = new C_Gather(cct, ctx);
576
577 for (auto& cluster_extent : cluster_extents) {
578 auto sub_ctx = new LambdaContext(
579 [this, &cluster_extent, on_finish=gather_ctx->new_sub()](int r) {
580 handle_get_cluster_offset(r, cluster_extent, on_finish); });
581 qcow_format->m_l2_table_cache->get_cluster_offset(
582 l1_table, cluster_extent.image_offset,
583 &cluster_extent.cluster_offset, sub_ctx);
584 }
585
586 gather_ctx->activate();
587 }
588
589 void handle_get_cluster_offset(int r, const ClusterExtent& cluster_extent,
590 Context* on_finish) {
591 auto cct = qcow_format->m_image_ctx->cct;
592 ldout(cct, 20) << "r=" << r << ", "
593 << "image_offset=" << cluster_extent.image_offset << ", "
594 << "cluster_offset=" << cluster_extent.cluster_offset
595 << dendl;
596
597 if (r == -ENOENT) {
598 ldout(cct, 20) << "image offset DNE in QCOW image" << dendl;
599 r = 0;
600 } else if (r < 0) {
601 lderr(cct) << "failed to map image offset " << cluster_extent.image_offset
602 << ": " << cpp_strerror(r) << dendl;
603 }
604
605 on_finish->complete(r);
606 }
607
608 void handle_get_cluster_offsets(int r) {
609 auto cct = qcow_format->m_image_ctx->cct;
610 ldout(cct, 20) << "r=" << r << dendl;
611
612 if (r < 0) {
613 lderr(cct) << "failed to retrieve cluster extents: " << cpp_strerror(r)
614 << dendl;
615 aio_comp->fail(r);
616 delete this;
617 return;
618 }
619
620 read_clusters();
621 }
622
623 void read_clusters() {
624 auto cct = qcow_format->m_image_ctx->cct;
625 ldout(cct, 20) << dendl;
626
627 aio_comp->set_request_count(cluster_extents.size());
628 for (auto& cluster_extent : cluster_extents) {
629 auto read_ctx = new io::ReadResult::C_ImageReadRequest(
630 aio_comp, cluster_extent.buffer_offset,
631 {{cluster_extent.image_offset, cluster_extent.cluster_length}});
632 read_ctx->ignore_enoent = true;
633
634 auto log_ctx = new LambdaContext(
635 [this, cct=qcow_format->m_image_ctx->cct,
636 image_offset=cluster_extent.image_offset,
637 image_length=cluster_extent.cluster_length, ctx=read_ctx](int r) {
638 handle_read_cluster(cct, r, image_offset, image_length, ctx);
639 });
640
641 if (cluster_extent.cluster_offset == 0) {
642 // QCOW header is at offset 0, implies cluster DNE
643 log_ctx->complete(-ENOENT);
644 } else if (cluster_extent.cluster_offset == QCOW_OFLAG_ZERO) {
645 // explicitly zeroed section
646 read_ctx->bl.append_zero(cluster_extent.cluster_length);
647 log_ctx->complete(0);
648 } else {
649 // request the (sub)cluster from the cluster cache
650 qcow_format->m_cluster_cache->get_cluster(
651 cluster_extent.cluster_offset, cluster_extent.cluster_length,
652 cluster_extent.intra_cluster_offset, &read_ctx->bl, log_ctx);
653 }
654 }
655
656 delete this;
657 }
658
659 void handle_read_cluster(CephContext* cct, int r, uint64_t image_offset,
660 uint64_t image_length, Context* on_finish) const {
661 // NOTE: treat as static function, expect object has been deleted
662
663 ldout(cct, 20) << "r=" << r << ", "
664 << "image_offset=" << image_offset << ", "
665 << "image_length=" << image_length << dendl;
666
667 if (r != -ENOENT && r < 0) {
668 lderr(cct) << "failed to read image extent " << image_offset << "~"
669 << image_length << ": " << cpp_strerror(r) << dendl;
670 }
671
672 on_finish->complete(r);
673 }
674 };
675
676 #undef dout_prefix
677 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
678 << "ListSnapsRequest: " << this << " " \
679 << __func__ << ": "
680
681 template <typename I>
682 class QCOWFormat<I>::ListSnapsRequest {
683 public:
684 ListSnapsRequest(
685 QCOWFormat* qcow_format, uint32_t l1_table_index,
686 ClusterExtents&& cluster_extents,
687 const std::map<uint64_t, const LookupTable*>& snap_id_to_l1_table,
688 io::SnapshotDelta* snapshot_delta, Context* on_finish)
689 : qcow_format(qcow_format), l1_table_index(l1_table_index),
690 cluster_extents(std::move(cluster_extents)),
691 snap_id_to_l1_table(snap_id_to_l1_table), snapshot_delta(snapshot_delta),
692 on_finish(on_finish) {
693 }
694
695 void send() {
696 get_l2_table();
697 }
698
699 private:
700 QCOWFormat* qcow_format;
701 uint32_t l1_table_index;
702 ClusterExtents cluster_extents;
703 std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
704 io::SnapshotDelta* snapshot_delta;
705 Context* on_finish;
706
707 std::shared_ptr<const LookupTable> previous_l2_table;
708 std::shared_ptr<const LookupTable> l2_table;
709
710 void get_l2_table() {
711 auto cct = qcow_format->m_image_ctx->cct;
712 if (snap_id_to_l1_table.empty()) {
713 finish(0);
714 return;
715 }
716
717 auto it = snap_id_to_l1_table.begin();
718 auto [snap_id, l1_table] = *it;
719 snap_id_to_l1_table.erase(it);
720
721 previous_l2_table = l2_table;
722 l2_table.reset();
723
724 auto ctx = new LambdaContext([this, snap_id = snap_id](int r) {
725 boost::asio::post(qcow_format->m_strand, [this, snap_id, r]() {
726 handle_get_l2_table(r, snap_id);
727 });
728 });
729
730 if (l1_table_index >= l1_table->size ||
731 l1_table->cluster_offsets[l1_table_index] == 0) {
732 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
733 << "snap_id=" << snap_id << ": DNE" << dendl;
734 ctx->complete(-ENOENT);
735 return;
736 }
737
738 uint64_t l2_table_offset = l1_table->cluster_offsets[l1_table_index] &
739 qcow_format->m_cluster_mask;
740
741 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
742 << "snap_id=" << snap_id << ", "
743 << "l2_table_offset=" << l2_table_offset << dendl;
744 qcow_format->m_l2_table_cache->get_l2_table(l1_table, l2_table_offset,
745 &l2_table, ctx);
746 }
747
748 void handle_get_l2_table(int r, uint64_t snap_id) {
749 ceph_assert(qcow_format->m_strand.running_in_this_thread());
750
751 auto cct = qcow_format->m_image_ctx->cct;
752 ldout(cct, 20) << "r=" << r << ", "
753 << "snap_id=" << snap_id << dendl;
754
755 if (r == -ENOENT) {
756 l2_table.reset();
757 } else if (r < 0) {
758 lderr(cct) << "failed to retrieve L2 table for snapshot " << snap_id
759 << ": " << cpp_strerror(r) << dendl;
760 finish(r);
761 return;
762 }
763
764 // compare the cluster offsets at each requested L2 offset between
765 // the previous snapshot's L2 table and the current L2 table.
766 auto& sparse_extents = (*snapshot_delta)[{snap_id, snap_id}];
767 for (auto& cluster_extent : cluster_extents) {
768 uint32_t l2_table_index =
769 (cluster_extent.image_offset >> qcow_format->m_cluster_bits) &
770 (qcow_format->m_l2_size - 1);
771
772 std::optional<uint64_t> cluster_offset;
773 if (l2_table && l2_table_index < l2_table->size) {
774 cluster_offset = l2_table->cluster_offsets[l2_table_index] &
775 qcow_format->m_cluster_offset_mask;
776 }
777
778 std::optional<uint64_t> prev_cluster_offset;
779 if (previous_l2_table && l2_table_index < previous_l2_table->size) {
780 prev_cluster_offset =
781 previous_l2_table->cluster_offsets[l2_table_index] &
782 qcow_format->m_cluster_offset_mask;
783 }
784
785 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
786 << "snap_id=" << snap_id << ", "
787 << "image_offset=" << cluster_extent.image_offset << ", "
788 << "l2_table_index=" << l2_table_index << ", "
789 << "cluster_offset=" << cluster_offset << ", "
790 << "prev_cluster_offset=" << prev_cluster_offset << dendl;
791
792 auto state = io::SPARSE_EXTENT_STATE_DATA;
793 if (cluster_offset == prev_cluster_offset) {
794 continue;
795 } else if ((prev_cluster_offset && !cluster_offset) ||
796 *cluster_offset == QCOW_OFLAG_ZERO) {
797 // explicitly zeroed or deallocated
798 state = io::SPARSE_EXTENT_STATE_ZEROED;
799 }
800
801 sparse_extents.insert(
802 cluster_extent.image_offset, cluster_extent.cluster_length,
803 {state, cluster_extent.cluster_length});
804 }
805
806 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
807 << "snap_id=" << snap_id << ", "
808 << "sparse_extents=" << sparse_extents << dendl;
809
810 // continue processing the L2 table at this index for all snapshots
811 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
812 [this]() { get_l2_table(); });
813 }
814
815
816 void finish(int r) {
817 auto cct = qcow_format->m_image_ctx->cct;
818 ldout(cct, 20) << "r=" << r << dendl;
819
820 on_finish->complete(r);
821 delete this;
822 }
823 };
824
825 #undef dout_prefix
826 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
827 << " " << __func__ << ": "
828
829 template <typename I>
830 QCOWFormat<I>::QCOWFormat(
831 I* image_ctx, const json_spirit::mObject& json_object,
832 const SourceSpecBuilder<I>* source_spec_builder)
833 : m_image_ctx(image_ctx), m_json_object(json_object),
834 m_source_spec_builder(source_spec_builder),
835 m_strand(*image_ctx->asio_engine) {
836 }
837
838 template <typename I>
839 void QCOWFormat<I>::open(Context* on_finish) {
840 auto cct = m_image_ctx->cct;
841 ldout(cct, 10) << dendl;
842
843 int r = m_source_spec_builder->build_stream(m_json_object, &m_stream);
844 if (r < 0) {
845 lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r)
846 << dendl;
847 on_finish->complete(r);
848 return;
849 }
850
851 auto ctx = new LambdaContext([this, on_finish](int r) {
852 handle_open(r, on_finish); });
853 m_stream->open(ctx);
854 }
855
856 template <typename I>
857 void QCOWFormat<I>::handle_open(int r, Context* on_finish) {
858 auto cct = m_image_ctx->cct;
859 ldout(cct, 10) << "r=" << r << dendl;
860
861 if (r < 0) {
862 lderr(cct) << "failed to open QCOW image: " << cpp_strerror(r)
863 << dendl;
864 on_finish->complete(r);
865 return;
866 }
867
868 probe(on_finish);
869 }
870
871 template <typename I>
872 void QCOWFormat<I>::probe(Context* on_finish) {
873 auto cct = m_image_ctx->cct;
874 ldout(cct, 10) << dendl;
875
876 auto ctx = new LambdaContext([this, on_finish](int r) {
877 handle_probe(r, on_finish); });
878 m_bl.clear();
879 m_stream->read({{0, 8}}, &m_bl, ctx);
880 }
881
882 template <typename I>
883 void QCOWFormat<I>::handle_probe(int r, Context* on_finish) {
884 auto cct = m_image_ctx->cct;
885 ldout(cct, 10) << "r=" << r << dendl;
886
887 if (r < 0) {
888 lderr(cct) << "failed to probe QCOW image: " << cpp_strerror(r)
889 << dendl;
890 on_finish->complete(r);
891 return;
892 }
893
894 auto header_probe = *reinterpret_cast<QCowHeaderProbe*>(
895 m_bl.c_str());
896 header_probe.magic = big_to_native(header_probe.magic);
897 header_probe.version = big_to_native(header_probe.version);
898
899 if (header_probe.magic != QCOW_MAGIC) {
900 lderr(cct) << "invalid QCOW header magic" << dendl;
901 on_finish->complete(-EINVAL);
902 return;
903 }
904
905 m_bl.clear();
906 if (header_probe.version == 1) {
907 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
908 read_v1_header(on_finish);
909 #else // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
910 lderr(cct) << "QCOW is not supported" << dendl;
911 on_finish->complete(-ENOTSUP);
912 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
913 return;
914 } else if (header_probe.version >= 2 && header_probe.version <= 3) {
915 read_v2_header(on_finish);
916 return;
917 } else {
918 lderr(cct) << "invalid QCOW header version " << header_probe.version
919 << dendl;
920 on_finish->complete(-EINVAL);
921 return;
922 }
923 }
924
925 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
926
927 template <typename I>
928 void QCOWFormat<I>::read_v1_header(Context* on_finish) {
929 auto cct = m_image_ctx->cct;
930 ldout(cct, 10) << dendl;
931
932 auto ctx = new LambdaContext([this, on_finish](int r) {
933 handle_read_v1_header(r, on_finish); });
934 m_bl.clear();
935 m_stream->read({{0, sizeof(QCowHeaderV1)}}, &m_bl, ctx);
936 }
937
938 template <typename I>
939 void QCOWFormat<I>::handle_read_v1_header(int r, Context* on_finish) {
940 auto cct = m_image_ctx->cct;
941 ldout(cct, 10) << "r=" << r << dendl;
942
943 if (r < 0) {
944 lderr(cct) << "failed to read QCOW header: " << cpp_strerror(r) << dendl;
945 on_finish->complete(r);
946 return;
947 }
948
949 auto header = *reinterpret_cast<QCowHeaderV1*>(m_bl.c_str());
950
951 // byte-swap important fields
952 header.magic = big_to_native(header.magic);
953 header.version = big_to_native(header.version);
954 header.backing_file_offset = big_to_native(header.backing_file_offset);
955 header.backing_file_size = big_to_native(header.backing_file_size);
956 header.size = big_to_native(header.size);
957 header.crypt_method = big_to_native(header.crypt_method);
958 header.l1_table_offset = big_to_native(header.l1_table_offset);
959
960 if (header.magic != QCOW_MAGIC || header.version != 1) {
961 // honestly shouldn't happen since we've already validated it
962 lderr(cct) << "header is not QCOW" << dendl;
963 on_finish->complete(-EINVAL);
964 return;
965 }
966
967 if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
968 header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
969 lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
970 on_finish->complete(-EINVAL);
971 return;
972 }
973
974 if (header.l2_bits < (QCOW_MIN_CLUSTER_BITS - 3) ||
975 header.l2_bits > (QCOW_MAX_CLUSTER_BITS - 3)) {
976 lderr(cct) << "invalid L2 bits: " << header.l2_bits << dendl;
977 on_finish->complete(-EINVAL);
978 return;
979 }
980
981 if (header.crypt_method != QCOW_CRYPT_NONE) {
982 lderr(cct) << "invalid or unsupported encryption method" << dendl;
983 on_finish->complete(-EINVAL);
984 return;
985 }
986
987 m_size = header.size;
988 if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
989 lderr(cct) << "image size is not a multiple of block size" << dendl;
990 on_finish->complete(-EINVAL);
991 return;
992 }
993
994 m_backing_file_offset = header.backing_file_offset;
995 m_backing_file_size = header.backing_file_size;
996
997 m_cluster_bits = header.cluster_bits;
998 m_cluster_size = 1UL << header.cluster_bits;
999 m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
1000 m_cluster_mask = ~QCOW_OFLAG_COMPRESSED;
1001
1002 m_l2_bits = header.l2_bits;
1003 m_l2_size = (1UL << m_l2_bits);
1004
1005 m_l1_shift = m_cluster_bits + m_l2_bits;
1006 m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
1007 m_l1_table_offset = header.l1_table_offset;
1008 if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
1009 m_l1_table.size >
1010 (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
1011 lderr(cct) << "image size too big: " << m_size << dendl;
1012 on_finish->complete(-EINVAL);
1013 return;
1014 }
1015
1016 ldout(cct, 15) << "size=" << m_size << ", "
1017 << "cluster_bits=" << m_cluster_bits << ", "
1018 << "l2_bits=" << m_l2_bits << dendl;
1019
1020 // allocate memory for L1 table and L2 + cluster caches
1021 m_l2_table_cache = std::make_unique<L2TableCache>(this);
1022 m_cluster_cache = std::make_unique<ClusterCache>(this);
1023
1024 read_l1_table(on_finish);
1025 }
1026
1027 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
1028
1029 template <typename I>
1030 void QCOWFormat<I>::read_v2_header(Context* on_finish) {
1031 auto cct = m_image_ctx->cct;
1032 ldout(cct, 10) << dendl;
1033
1034 auto ctx = new LambdaContext([this, on_finish](int r) {
1035 handle_read_v2_header(r, on_finish); });
1036 m_bl.clear();
1037 m_stream->read({{0, sizeof(QCowHeader)}}, &m_bl, ctx);
1038 }
1039
1040 template <typename I>
1041 void QCOWFormat<I>::handle_read_v2_header(int r, Context* on_finish) {
1042 auto cct = m_image_ctx->cct;
1043 ldout(cct, 10) << "r=" << r << dendl;
1044
1045 if (r < 0) {
1046 lderr(cct) << "failed to read QCOW2 header: " << cpp_strerror(r) << dendl;
1047 on_finish->complete(r);
1048 return;
1049 }
1050
1051 auto header = *reinterpret_cast<QCowHeader*>(m_bl.c_str());
1052
1053 // byte-swap important fields
1054 header.magic = big_to_native(header.magic);
1055 header.version = big_to_native(header.version);
1056 header.backing_file_offset = big_to_native(header.backing_file_offset);
1057 header.backing_file_size = big_to_native(header.backing_file_size);
1058 header.cluster_bits = big_to_native(header.cluster_bits);
1059 header.size = big_to_native(header.size);
1060 header.crypt_method = big_to_native(header.crypt_method);
1061 header.l1_size = big_to_native(header.l1_size);
1062 header.l1_table_offset = big_to_native(header.l1_table_offset);
1063 header.nb_snapshots = big_to_native(header.nb_snapshots);
1064 header.snapshots_offset = big_to_native(header.snapshots_offset);
1065
1066 if (header.version == 2) {
1067 // valid only for version >= 3
1068 header.incompatible_features = 0;
1069 header.compatible_features = 0;
1070 header.autoclear_features = 0;
1071 header.header_length = 72;
1072 header.compression_type = 0;
1073 } else {
1074 header.incompatible_features = big_to_native(header.incompatible_features);
1075 header.compatible_features = big_to_native(header.compatible_features);
1076 header.autoclear_features = big_to_native(header.autoclear_features);
1077 header.header_length = big_to_native(header.header_length);
1078 }
1079
1080 if (header.magic != QCOW_MAGIC || header.version < 2 || header.version > 3) {
1081 // honestly shouldn't happen since we've already validated it
1082 lderr(cct) << "header is not QCOW2" << dendl;
1083 on_finish->complete(-EINVAL);
1084 return;
1085 }
1086
1087 if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
1088 header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
1089 lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
1090 on_finish->complete(-EINVAL);
1091 return;
1092 }
1093
1094 if (header.crypt_method != QCOW_CRYPT_NONE) {
1095 lderr(cct) << "invalid or unsupported encryption method" << dendl;
1096 on_finish->complete(-EINVAL);
1097 return;
1098 }
1099
1100 m_size = header.size;
1101 if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
1102 lderr(cct) << "image size is not a multiple of block size" << dendl;
1103 on_finish->complete(-EINVAL);
1104 return;
1105 }
1106
1107 if (header.header_length <= offsetof(QCowHeader, compression_type)) {
1108 header.compression_type = 0;
1109 }
1110
1111 if ((header.compression_type != 0) ||
1112 ((header.incompatible_features & QCOW2_INCOMPAT_COMPRESSION) != 0)) {
1113 lderr(cct) << "invalid or unsupported compression type" << dendl;
1114 on_finish->complete(-EINVAL);
1115 return;
1116 }
1117
1118 if ((header.incompatible_features & QCOW2_INCOMPAT_DATA_FILE) != 0) {
1119 lderr(cct) << "external data file feature not supported" << dendl;
1120 on_finish->complete(-ENOTSUP);
1121 }
1122
1123 if ((header.incompatible_features & QCOW2_INCOMPAT_EXTL2) != 0) {
1124 lderr(cct) << "extended L2 table feature not supported" << dendl;
1125 on_finish->complete(-ENOTSUP);
1126 return;
1127 }
1128
1129 header.incompatible_features &= ~QCOW2_INCOMPAT_MASK;
1130 if (header.incompatible_features != 0) {
1131 lderr(cct) << "unknown incompatible feature enabled" << dendl;
1132 on_finish->complete(-EINVAL);
1133 return;
1134 }
1135
1136 m_backing_file_offset = header.backing_file_offset;
1137 m_backing_file_size = header.backing_file_size;
1138
1139 m_cluster_bits = header.cluster_bits;
1140 m_cluster_size = 1UL << header.cluster_bits;
1141 m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
1142 m_cluster_mask = ~(QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_COPIED);
1143
1144 // L2 table is fixed a (1) cluster block to hold 8-byte (3 bit) offsets
1145 m_l2_bits = m_cluster_bits - 3;
1146 m_l2_size = (1UL << m_l2_bits);
1147
1148 m_l1_shift = m_cluster_bits + m_l2_bits;
1149 m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
1150 m_l1_table_offset = header.l1_table_offset;
1151 if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
1152 m_l1_table.size >
1153 (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
1154 lderr(cct) << "image size too big: " << m_size << dendl;
1155 on_finish->complete(-EINVAL);
1156 return;
1157 } else if (m_l1_table.size > header.l1_size) {
1158 lderr(cct) << "invalid L1 table size in header (" << header.l1_size
1159 << " < " << m_l1_table.size << ")" << dendl;
1160 on_finish->complete(-EINVAL);
1161 return;
1162 }
1163
1164 m_snapshot_count = header.nb_snapshots;
1165 m_snapshots_offset = header.snapshots_offset;
1166
1167 ldout(cct, 15) << "size=" << m_size << ", "
1168 << "cluster_bits=" << m_cluster_bits << ", "
1169 << "l1_table_offset=" << m_l1_table_offset << ", "
1170 << "snapshot_count=" << m_snapshot_count << ", "
1171 << "snapshots_offset=" << m_snapshots_offset << dendl;
1172
1173 // allocate memory for L1 table and L2 + cluster caches
1174 m_l2_table_cache = std::make_unique<L2TableCache>(this);
1175 m_cluster_cache = std::make_unique<ClusterCache>(this);
1176
1177 read_snapshot(on_finish);
1178 }
1179
1180 template <typename I>
1181 void QCOWFormat<I>::read_snapshot(Context* on_finish) {
1182 if (m_snapshots_offset == 0 || m_snapshots.size() == m_snapshot_count) {
1183 read_l1_table(on_finish);
1184 return;
1185 }
1186
1187 // header is always aligned on 8 byte boundary
1188 m_snapshots_offset = p2roundup(m_snapshots_offset, static_cast<uint64_t>(8));
1189
1190 auto cct = m_image_ctx->cct;
1191 ldout(cct, 10) << "snap_id=" << (m_snapshots.size() + 1) << ", "
1192 << "offset=" << m_snapshots_offset << dendl;
1193
1194 auto ctx = new LambdaContext([this, on_finish](int r) {
1195 handle_read_snapshot(r, on_finish); });
1196 m_bl.clear();
1197 m_stream->read({{m_snapshots_offset, sizeof(QCowSnapshotHeader)}}, &m_bl,
1198 ctx);
1199 }
1200
1201 template <typename I>
1202 void QCOWFormat<I>::handle_read_snapshot(int r, Context* on_finish) {
1203 auto cct = m_image_ctx->cct;
1204 ldout(cct, 10) << "r=" << r << ", "
1205 << "index=" << m_snapshots.size() << dendl;
1206
1207 if (r < 0) {
1208 lderr(cct) << "failed to read QCOW2 snapshot header: " << cpp_strerror(r)
1209 << dendl;
1210 on_finish->complete(r);
1211 return;
1212 }
1213
1214 m_snapshots_offset += m_bl.length();
1215 auto header = *reinterpret_cast<QCowSnapshotHeader*>(m_bl.c_str());
1216
1217 auto& snapshot = m_snapshots[m_snapshots.size() + 1];
1218 snapshot.id.resize(big_to_native(header.id_str_size));
1219 snapshot.name.resize(big_to_native(header.name_size));
1220 snapshot.l1_table_offset = big_to_native(header.l1_table_offset);
1221 snapshot.l1_table.size = big_to_native(header.l1_size);
1222 snapshot.timestamp.sec_ref() = big_to_native(header.date_sec);
1223 snapshot.timestamp.nsec_ref() = big_to_native(header.date_nsec);
1224 snapshot.extra_data_size = big_to_native(header.extra_data_size);
1225
1226 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1227 << "id_str_len=" << snapshot.id.size() << ", "
1228 << "name_str_len=" << snapshot.name.size() << ", "
1229 << "l1_table_offset=" << snapshot.l1_table_offset << ", "
1230 << "l1_size=" << snapshot.l1_table.size << ", "
1231 << "extra_data_size=" << snapshot.extra_data_size << dendl;
1232
1233 read_snapshot_extra(on_finish);
1234 }
1235
1236 template <typename I>
1237 void QCOWFormat<I>::read_snapshot_extra(Context* on_finish) {
1238 ceph_assert(!m_snapshots.empty());
1239 auto& snapshot = m_snapshots.rbegin()->second;
1240
1241 uint32_t length = snapshot.extra_data_size +
1242 snapshot.id.size() +
1243 snapshot.name.size();
1244 if (length == 0) {
1245 uuid_d uuid_gen;
1246 uuid_gen.generate_random();
1247 snapshot.name = uuid_gen.to_string();
1248
1249 read_snapshot(on_finish);
1250 return;
1251 }
1252
1253 auto cct = m_image_ctx->cct;
1254 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1255 << "offset=" << m_snapshots_offset << ", "
1256 << "length=" << length << dendl;
1257
1258 auto offset = m_snapshots_offset;
1259 m_snapshots_offset += length;
1260
1261 auto ctx = new LambdaContext([this, on_finish](int r) {
1262 handle_read_snapshot_extra(r, on_finish); });
1263 m_bl.clear();
1264 m_stream->read({{offset, length}}, &m_bl, ctx);
1265 }
1266
1267 template <typename I>
1268 void QCOWFormat<I>::handle_read_snapshot_extra(int r, Context* on_finish) {
1269 ceph_assert(!m_snapshots.empty());
1270 auto& snapshot = m_snapshots.rbegin()->second;
1271
1272 auto cct = m_image_ctx->cct;
1273 ldout(cct, 10) << "r=" << r << ", "
1274 << "snap_id=" << m_snapshots.size() << dendl;
1275
1276 if (r < 0) {
1277 lderr(cct) << "failed to read QCOW2 snapshot header extra: "
1278 << cpp_strerror(r) << dendl;
1279 on_finish->complete(r);
1280 return;
1281 }
1282
1283 if (snapshot.extra_data_size >=
1284 offsetof(QCowSnapshotExtraData, disk_size) + sizeof(uint64_t)) {
1285 auto extra = reinterpret_cast<const QCowSnapshotExtraData*>(m_bl.c_str());
1286 snapshot.size = big_to_native(extra->disk_size);
1287 } else {
1288 snapshot.size = m_size;
1289 }
1290
1291 auto data = reinterpret_cast<const char*>(m_bl.c_str());
1292 data += snapshot.extra_data_size;
1293
1294 if (!snapshot.id.empty()) {
1295 snapshot.id = std::string(data, snapshot.id.size());
1296 data += snapshot.id.size();
1297 }
1298
1299 if (!snapshot.name.empty()) {
1300 snapshot.name = std::string(data, snapshot.name.size());
1301 data += snapshot.name.size();
1302 } else {
1303 uuid_d uuid_gen;
1304 uuid_gen.generate_random();
1305 snapshot.name = uuid_gen.to_string();
1306 }
1307
1308 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1309 << "name=" << snapshot.name << ", "
1310 << "size=" << snapshot.size << dendl;
1311 read_snapshot_l1_table(on_finish);
1312 }
1313
1314 template <typename I>
1315 void QCOWFormat<I>::read_snapshot_l1_table(Context* on_finish) {
1316 ceph_assert(!m_snapshots.empty());
1317 auto& snapshot = m_snapshots.rbegin()->second;
1318
1319 auto cct = m_image_ctx->cct;
1320 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1321 << "l1_table_offset=" << snapshot.l1_table_offset
1322 << dendl;
1323
1324 auto ctx = new LambdaContext([this, on_finish](int r) {
1325 handle_read_snapshot_l1_table(r, on_finish); });
1326 m_stream->read({{snapshot.l1_table_offset,
1327 snapshot.l1_table.size * sizeof(uint64_t)}},
1328 &snapshot.l1_table.bl, ctx);
1329 }
1330
1331 template <typename I>
1332 void QCOWFormat<I>::handle_read_snapshot_l1_table(int r, Context* on_finish) {
1333 ceph_assert(!m_snapshots.empty());
1334 auto& snapshot = m_snapshots.rbegin()->second;
1335
1336 auto cct = m_image_ctx->cct;
1337 ldout(cct, 10) << "r=" << r << ", "
1338 << "snap_id=" << m_snapshots.size() << dendl;
1339
1340 if (r < 0) {
1341 lderr(cct) << "failed to read snapshot L1 table: " << cpp_strerror(r)
1342 << dendl;
1343 on_finish->complete(r);
1344 return;
1345 }
1346
1347 snapshot.l1_table.decode();
1348 read_snapshot(on_finish);
1349 }
1350
1351 template <typename I>
1352 void QCOWFormat<I>::read_l1_table(Context* on_finish) {
1353 auto cct = m_image_ctx->cct;
1354 ldout(cct, 10) << dendl;
1355
1356 auto ctx = new LambdaContext([this, on_finish](int r) {
1357 handle_read_l1_table(r, on_finish); });
1358 m_stream->read({{m_l1_table_offset,
1359 m_l1_table.size * sizeof(uint64_t)}},
1360 &m_l1_table.bl, ctx);
1361 }
1362
1363 template <typename I>
1364 void QCOWFormat<I>::handle_read_l1_table(int r, Context* on_finish) {
1365 auto cct = m_image_ctx->cct;
1366 ldout(cct, 10) << "r=" << r << dendl;
1367
1368 if (r < 0) {
1369 lderr(cct) << "failed to read L1 table: " << cpp_strerror(r) << dendl;
1370 on_finish->complete(r);
1371 return;
1372 }
1373
1374 m_l1_table.decode();
1375 read_backing_file(on_finish);
1376 }
1377
1378 template <typename I>
1379 void QCOWFormat<I>::read_backing_file(Context* on_finish) {
1380 if (m_backing_file_offset == 0 || m_backing_file_size == 0) {
1381 // all data is within the specified file
1382 on_finish->complete(0);
1383 return;
1384 }
1385
1386 auto cct = m_image_ctx->cct;
1387 ldout(cct, 10) << dendl;
1388
1389 // TODO add support for backing files
1390 on_finish->complete(-ENOTSUP);
1391 }
1392
1393 template <typename I>
1394 void QCOWFormat<I>::close(Context* on_finish) {
1395 auto cct = m_image_ctx->cct;
1396 ldout(cct, 10) << dendl;
1397
1398 m_stream->close(on_finish);
1399 }
1400
1401 template <typename I>
1402 void QCOWFormat<I>::get_snapshots(SnapInfos* snap_infos, Context* on_finish) {
1403 auto cct = m_image_ctx->cct;
1404 ldout(cct, 10) << dendl;
1405
1406 snap_infos->clear();
1407 for (auto& [snap_id, snapshot] : m_snapshots) {
1408 SnapInfo snap_info(snapshot.name, cls::rbd::UserSnapshotNamespace{},
1409 snapshot.size, {}, 0, 0, snapshot.timestamp);
1410 snap_infos->emplace(snap_id, snap_info);
1411 }
1412
1413 on_finish->complete(0);
1414 }
1415
1416 template <typename I>
1417 void QCOWFormat<I>::get_image_size(uint64_t snap_id, uint64_t* size,
1418 Context* on_finish) {
1419 auto cct = m_image_ctx->cct;
1420 ldout(cct, 10) << "snap_id=" << snap_id << dendl;
1421
1422 if (snap_id == CEPH_NOSNAP) {
1423 *size = m_size;
1424 } else {
1425 auto snapshot_it = m_snapshots.find(snap_id);
1426 if (snapshot_it == m_snapshots.end()) {
1427 on_finish->complete(-ENOENT);
1428 return;
1429 }
1430
1431 auto& snapshot = snapshot_it->second;
1432 *size = snapshot.size;
1433 }
1434
1435 on_finish->complete(0);
1436 }
1437
1438 template <typename I>
1439 bool QCOWFormat<I>::read(
1440 io::AioCompletion* aio_comp, uint64_t snap_id, io::Extents&& image_extents,
1441 io::ReadResult&& read_result, int op_flags, int read_flags,
1442 const ZTracer::Trace &parent_trace) {
1443 auto cct = m_image_ctx->cct;
1444 ldout(cct, 20) << "snap_id=" << snap_id << ", "
1445 << "image_extents=" << image_extents << dendl;
1446
1447 const LookupTable* l1_table = nullptr;
1448 if (snap_id == CEPH_NOSNAP) {
1449 l1_table = &m_l1_table;
1450 } else {
1451 auto snapshot_it = m_snapshots.find(snap_id);
1452 if (snapshot_it == m_snapshots.end()) {
1453 aio_comp->fail(-ENOENT);
1454 return true;
1455 }
1456
1457 auto& snapshot = snapshot_it->second;
1458 l1_table = &snapshot.l1_table;
1459 }
1460
1461 aio_comp->read_result = std::move(read_result);
1462 aio_comp->read_result.set_image_extents(image_extents);
1463
1464 auto read_request = new ReadRequest(this, aio_comp, l1_table,
1465 std::move(image_extents));
1466 read_request->send();
1467
1468 return true;
1469 }
1470
1471 template <typename I>
1472 void QCOWFormat<I>::list_snaps(io::Extents&& image_extents,
1473 io::SnapIds&& snap_ids, int list_snaps_flags,
1474 io::SnapshotDelta* snapshot_delta,
1475 const ZTracer::Trace &parent_trace,
1476 Context* on_finish) {
1477 auto cct = m_image_ctx->cct;
1478 ldout(cct, 20) << "image_extents=" << image_extents << dendl;
1479
1480 ClusterExtents cluster_extents;
1481 populate_cluster_extents(cct, m_cluster_size, image_extents,
1482 &cluster_extents);
1483
1484 // map L1 table indexes to cluster extents
1485 std::map<uint64_t, ClusterExtents> l1_cluster_extents;
1486 for (auto& cluster_extent : cluster_extents) {
1487 uint32_t l1_table_index = cluster_extent.image_offset >> m_l1_shift;
1488 auto& l1_cluster_extent = l1_cluster_extents[l1_table_index];
1489 l1_cluster_extent.reserve(cluster_extents.size());
1490 l1_cluster_extent.push_back(cluster_extent);
1491 }
1492
1493 std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
1494 for (auto& [snap_id, snapshot] : m_snapshots) {
1495 snap_id_to_l1_table[snap_id] = &snapshot.l1_table;
1496 }
1497 snap_id_to_l1_table[CEPH_NOSNAP] = &m_l1_table;
1498
1499 on_finish = new LambdaContext([this, image_extents,
1500 snap_ids=std::move(snap_ids),
1501 snapshot_delta, on_finish](int r) mutable {
1502 handle_list_snaps(r, std::move(image_extents), std::move(snap_ids),
1503 snapshot_delta, on_finish);
1504 });
1505
1506 auto gather_ctx = new C_Gather(cct, on_finish);
1507
1508 for (auto& [l1_table_index, cluster_extents] : l1_cluster_extents) {
1509 auto list_snaps_request = new ListSnapsRequest(
1510 this, l1_table_index, std::move(cluster_extents), snap_id_to_l1_table,
1511 snapshot_delta, gather_ctx->new_sub());
1512 list_snaps_request->send();
1513 }
1514
1515 gather_ctx->activate();
1516 }
1517
1518 template <typename I>
1519 void QCOWFormat<I>::handle_list_snaps(int r, io::Extents&& image_extents,
1520 io::SnapIds&& snap_ids,
1521 io::SnapshotDelta* snapshot_delta,
1522 Context* on_finish) {
1523 auto cct = m_image_ctx->cct;
1524 ldout(cct, 20) << "r=" << r << ", "
1525 << "snapshot_delta=" << *snapshot_delta << dendl;
1526
1527 std::optional<uint64_t> previous_size = std::nullopt;
1528 for (auto& [snap_id, snapshot] : m_snapshots) {
1529 auto sparse_extents = &(*snapshot_delta)[{snap_id, snap_id}];
1530 util::zero_shrunk_snapshot(cct, image_extents, snap_id, snapshot.size,
1531 &previous_size, sparse_extents);
1532 }
1533
1534 auto sparse_extents = &(*snapshot_delta)[{CEPH_NOSNAP, CEPH_NOSNAP}];
1535 util::zero_shrunk_snapshot(cct, image_extents, CEPH_NOSNAP, m_size,
1536 &previous_size, sparse_extents);
1537
1538 util::merge_snapshot_delta(snap_ids, snapshot_delta);
1539 on_finish->complete(r);
1540 }
1541
1542 } // namespace migration
1543 } // namespace librbd
1544
1545 template class librbd::migration::QCOWFormat<librbd::ImageCtx>;