]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/migration/QCOWFormat.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / librbd / migration / QCOWFormat.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/migration/QCOWFormat.h"
5 #include "common/Clock.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "include/intarith.h"
9 #include "librbd/AsioEngine.h"
10 #include "librbd/ImageCtx.h"
11 #include "librbd/ImageState.h"
12 #include "librbd/Utils.h"
13 #include "librbd/io/AioCompletion.h"
14 #include "librbd/io/ReadResult.h"
15 #include "librbd/migration/SnapshotInterface.h"
16 #include "librbd/migration/SourceSpecBuilder.h"
17 #include "librbd/migration/StreamInterface.h"
18 #include "librbd/migration/Utils.h"
19 #include <boost/asio/dispatch.hpp>
20 #include <boost/asio/post.hpp>
21 #include <deque>
22 #include <tuple>
23 #include <unordered_map>
24 #include <vector>
25
26 #define dout_subsys ceph_subsys_rbd
27
28 namespace librbd {
29 namespace migration {
30
31 #undef dout_prefix
32 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
33 << __func__ << ": "
34
35 namespace qcow_format {
36
37 struct ClusterExtent {
38 uint64_t cluster_offset;
39 uint64_t cluster_length;
40 uint64_t intra_cluster_offset;
41 uint64_t image_offset;
42 uint64_t buffer_offset;
43
44 ClusterExtent(uint64_t cluster_offset, uint64_t cluster_length,
45 uint64_t intra_cluster_offset, uint64_t image_offset,
46 uint64_t buffer_offset)
47 : cluster_offset(cluster_offset), cluster_length(cluster_length),
48 intra_cluster_offset(intra_cluster_offset), image_offset(image_offset),
49 buffer_offset(buffer_offset) {
50 }
51 };
52
53 typedef std::vector<ClusterExtent> ClusterExtents;
54
55 void LookupTable::init() {
56 if (cluster_offsets == nullptr) {
57 cluster_offsets = reinterpret_cast<uint64_t*>(bl.c_str());
58 }
59 }
60
61 void LookupTable::decode() {
62 init();
63
64 // L2 tables are selectively byte-swapped on demand if only requesting a
65 // single cluster offset
66 if (decoded) {
67 return;
68 }
69
70 // translate the lookup table (big-endian -> CPU endianess)
71 for (auto idx = 0UL; idx < size; ++idx) {
72 cluster_offsets[idx] = be64toh(cluster_offsets[idx]);
73 }
74
75 decoded = true;
76 }
77
78 void populate_cluster_extents(CephContext* cct, uint64_t cluster_size,
79 const io::Extents& image_extents,
80 ClusterExtents* cluster_extents) {
81 uint64_t buffer_offset = 0;
82 for (auto [image_offset, image_length] : image_extents) {
83 while (image_length > 0) {
84 auto intra_cluster_offset = image_offset & (cluster_size - 1);
85 auto intra_cluster_length = cluster_size - intra_cluster_offset;
86 auto cluster_length = std::min(image_length, intra_cluster_length);
87
88 ldout(cct, 20) << "image_offset=" << image_offset << ", "
89 << "image_length=" << image_length << ", "
90 << "cluster_length=" << cluster_length << dendl;
91
92
93 cluster_extents->emplace_back(0, cluster_length, intra_cluster_offset,
94 image_offset, buffer_offset);
95
96 image_offset += cluster_length;
97 image_length -= cluster_length;
98 buffer_offset += cluster_length;
99 }
100 }
101 }
102
103 } // namespace qcow_format
104
105 using namespace qcow_format;
106
107 template <typename I>
108 struct QCOWFormat<I>::Cluster {
109 const uint64_t cluster_offset;
110 bufferlist cluster_data_bl;
111
112 Cluster(uint64_t cluster_offset) : cluster_offset(cluster_offset) {
113 }
114 };
115
116 #undef dout_prefix
117 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
118 << this << " " << __func__ << ": "
119
120 template <typename I>
121 class QCOWFormat<I>::ClusterCache {
122 public:
123 ClusterCache(QCOWFormat* qcow_format)
124 : qcow_format(qcow_format),
125 m_strand(*qcow_format->m_image_ctx->asio_engine) {
126 }
127
128 void get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
129 uint64_t intra_cluster_offset, bufferlist* bl,
130 Context* on_finish) {
131 auto cct = qcow_format->m_image_ctx->cct;
132 ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
133
134 // cache state machine runs in a single strand thread
135 boost::asio::dispatch(
136 m_strand,
137 [this, cluster_offset, cluster_length, intra_cluster_offset, bl,
138 on_finish]() {
139 execute_get_cluster(cluster_offset, cluster_length,
140 intra_cluster_offset, bl, on_finish);
141 });
142 }
143
144 private:
145 typedef std::tuple<uint64_t, uint64_t, bufferlist*, Context*> Completion;
146 typedef std::list<Completion> Completions;
147
148 QCOWFormat* qcow_format;
149 boost::asio::io_context::strand m_strand;
150
151 std::shared_ptr<Cluster> cluster;
152 std::unordered_map<uint64_t, Completions> cluster_completions;
153
154 void execute_get_cluster(uint64_t cluster_offset, uint64_t cluster_length,
155 uint64_t intra_cluster_offset, bufferlist* bl,
156 Context* on_finish) {
157 auto cct = qcow_format->m_image_ctx->cct;
158 ldout(cct, 20) << "cluster_offset=" << cluster_offset << dendl;
159
160 if (cluster && cluster->cluster_offset == cluster_offset) {
161 // most-recent cluster matches
162 bl->substr_of(cluster->cluster_data_bl, intra_cluster_offset,
163 cluster_length);
164 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
165 [on_finish]() { on_finish->complete(0); });
166 return;
167 }
168
169 // record callback for cluster
170 bool new_request = (cluster_completions.count(cluster_offset) == 0);
171 cluster_completions[cluster_offset].emplace_back(
172 intra_cluster_offset, cluster_length, bl, on_finish);
173 if (new_request) {
174 // start the new read request
175 read_cluster(std::make_shared<Cluster>(cluster_offset));
176 }
177 }
178
179 void read_cluster(std::shared_ptr<Cluster> cluster) {
180 auto cct = qcow_format->m_image_ctx->cct;
181
182 uint64_t stream_offset = cluster->cluster_offset;
183 uint64_t stream_length = qcow_format->m_cluster_size;
184 if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
185 // compressed clusters encode the compressed length in the lower bits
186 stream_offset = cluster->cluster_offset &
187 qcow_format->m_cluster_offset_mask;
188 stream_length = (cluster->cluster_offset >>
189 (63 - qcow_format->m_cluster_bits)) &
190 (qcow_format->m_cluster_size - 1);
191 }
192
193 ldout(cct, 20) << "cluster_offset=" << cluster->cluster_offset << ", "
194 << "stream_offset=" << stream_offset << ", "
195 << "stream_length=" << stream_length << dendl;
196
197 // read the cluster into the cache entry
198 auto ctx = new LambdaContext([this, cluster](int r) {
199 boost::asio::post(m_strand, [this, cluster, r]() {
200 handle_read_cluster(r, cluster); }); });
201 qcow_format->m_stream->read({{stream_offset, stream_length}},
202 &cluster->cluster_data_bl, ctx);
203 }
204
205 void handle_read_cluster(int r, std::shared_ptr<Cluster> cluster) {
206 auto cct = qcow_format->m_image_ctx->cct;
207 ldout(cct, 20) << "r=" << r << ", "
208 << "cluster_offset=" << cluster->cluster_offset << dendl;
209
210 auto completions = std::move(cluster_completions[cluster->cluster_offset]);
211 cluster_completions.erase(cluster->cluster_offset);
212
213 if (r < 0) {
214 lderr(cct) << "failed to read cluster offset " << cluster->cluster_offset
215 << ": " << cpp_strerror(r) << dendl;
216 } else {
217 if ((cluster->cluster_offset & QCOW_OFLAG_COMPRESSED) != 0) {
218 bufferlist compressed_bl{std::move(cluster->cluster_data_bl)};
219 cluster->cluster_data_bl.clear();
220
221 // TODO
222 lderr(cct) << "support for compressed clusters is not available"
223 << dendl;
224 r = -EINVAL;
225 } else {
226 // cache the MRU cluster in case of sequential IO
227 this->cluster = cluster;
228 }
229 }
230
231 // complete the IO back to caller
232 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
233 [r, cluster, completions=std::move(completions)]() {
234 for (auto completion : completions) {
235 if (r >= 0) {
236 std::get<2>(completion)->substr_of(
237 cluster->cluster_data_bl,
238 std::get<0>(completion),
239 std::get<1>(completion));
240 }
241 std::get<3>(completion)->complete(r);
242 }
243 });
244 }
245 };
246
247 #undef dout_prefix
248 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
249 << this << " " << __func__ << ": "
250
251 template <typename I>
252 class QCOWFormat<I>::L2TableCache {
253 public:
254 L2TableCache(QCOWFormat* qcow_format)
255 : qcow_format(qcow_format),
256 m_strand(*qcow_format->m_image_ctx->asio_engine),
257 l2_cache_entries(QCOW_L2_CACHE_SIZE) {
258 }
259
260 void get_l2_table(const LookupTable* l1_table, uint64_t l2_table_offset,
261 std::shared_ptr<const LookupTable>* l2_table,
262 Context* on_finish) {
263 auto cct = qcow_format->m_image_ctx->cct;
264 ldout(cct, 20) << "l2_table_offset=" << l2_table_offset << dendl;
265
266 // cache state machine runs in a single strand thread
267 Request request{l1_table, l2_table_offset, l2_table, on_finish};
268 boost::asio::dispatch(
269 m_strand, [this, request=std::move(request)]() {
270 requests.push_back(std::move(request));
271 });
272 dispatch_request();
273 }
274
275 void get_cluster_offset(const LookupTable* l1_table,
276 uint64_t image_offset, uint64_t* cluster_offset,
277 Context* on_finish) {
278 auto cct = qcow_format->m_image_ctx->cct;
279 uint32_t l1_table_index = image_offset >> qcow_format->m_l1_shift;
280 uint64_t l2_table_offset = l1_table->cluster_offsets[std::min<uint32_t>(
281 l1_table_index, l1_table->size - 1)] &
282 qcow_format->m_cluster_mask;
283 uint32_t l2_table_index = (image_offset >> qcow_format->m_cluster_bits) &
284 (qcow_format->m_l2_size - 1);
285 ldout(cct, 20) << "image_offset=" << image_offset << ", "
286 << "l1_table_index=" << l1_table_index << ", "
287 << "l2_table_offset=" << l2_table_offset << ", "
288 << "l2_table_index=" << l2_table_index << dendl;
289
290 if (l1_table_index >= l1_table->size) {
291 lderr(cct) << "L1 index " << l1_table_index << " out-of-bounds" << dendl;
292 on_finish->complete(-ERANGE);
293 return;
294 } else if (l2_table_offset == 0) {
295 // L2 table has not been allocated for specified offset
296 ldout(cct, 20) << "image_offset=" << image_offset << ", "
297 << "cluster_offset=DNE" << dendl;
298 *cluster_offset = 0;
299 on_finish->complete(-ENOENT);
300 return;
301 }
302
303 // cache state machine runs in a single strand thread
304 Request request{l1_table, l2_table_offset, l2_table_index, cluster_offset,
305 on_finish};
306 boost::asio::dispatch(
307 m_strand, [this, request=std::move(request)]() {
308 requests.push_back(std::move(request));
309 });
310 dispatch_request();
311 }
312
313 private:
314 QCOWFormat* qcow_format;
315
316 boost::asio::io_context::strand m_strand;
317
318 struct Request {
319 const LookupTable* l1_table;
320
321 uint64_t l2_table_offset;
322
323 // get_cluster_offset request
324 uint32_t l2_table_index;
325 uint64_t* cluster_offset = nullptr;
326
327 // get_l2_table request
328 std::shared_ptr<const LookupTable>* l2_table;
329
330 Context* on_finish;
331
332 Request(const LookupTable* l1_table, uint64_t l2_table_offset,
333 uint32_t l2_table_index, uint64_t* cluster_offset,
334 Context* on_finish)
335 : l1_table(l1_table), l2_table_offset(l2_table_offset),
336 l2_table_index(l2_table_index), cluster_offset(cluster_offset),
337 on_finish(on_finish) {
338 }
339 Request(const LookupTable* l1_table, uint64_t l2_table_offset,
340 std::shared_ptr<const LookupTable>* l2_table, Context* on_finish)
341 : l1_table(l1_table), l2_table_offset(l2_table_offset),
342 l2_table(l2_table), on_finish(on_finish) {
343 }
344 };
345
346 typedef std::deque<Request> Requests;
347
348 struct L2Cache {
349 uint64_t l2_offset = 0;
350 std::shared_ptr<LookupTable> l2_table;
351
352 utime_t timestamp;
353 uint32_t count = 0;
354 bool in_flight = false;
355
356 int ret_val = 0;
357 };
358 std::vector<L2Cache> l2_cache_entries;
359
360 Requests requests;
361
362 void dispatch_request() {
363 boost::asio::dispatch(m_strand, [this]() { execute_request(); });
364 }
365
366 void execute_request() {
367 auto cct = qcow_format->m_image_ctx->cct;
368 if (requests.empty()) {
369 return;
370 }
371
372 auto request = requests.front();
373 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << dendl;
374
375 std::shared_ptr<LookupTable> l2_table;
376 int r = l2_table_lookup(request.l2_table_offset, &l2_table);
377 if (r < 0) {
378 lderr(cct) << "failed to load L2 table: l2_table_offset="
379 << request.l2_table_offset << ": "
380 << cpp_strerror(r) << dendl;
381 } else if (l2_table == nullptr) {
382 // table not in cache -- will restart once its loaded
383 return;
384 } else if (request.cluster_offset != nullptr) {
385 auto cluster_offset = l2_table->cluster_offsets[request.l2_table_index];
386 if (!l2_table->decoded) {
387 // table hasn't been byte-swapped
388 cluster_offset = be64toh(cluster_offset);
389 }
390
391 *request.cluster_offset = cluster_offset & qcow_format->m_cluster_mask;
392 if (*request.cluster_offset == QCOW_OFLAG_ZERO) {
393 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
394 << "l2_table_index=" << request.l2_table_index << ", "
395 << "cluster_offset=zeroed" << dendl;
396 } else {
397 ldout(cct, 20) << "l2_table_offset=" << request.l2_table_offset << ", "
398 << "l2_table_index=" << request.l2_table_index << ", "
399 << "cluster_offset=" << *request.cluster_offset
400 << dendl;
401 }
402 } else if (request.l2_table != nullptr) {
403 // ensure it's in the correct byte-order
404 l2_table->decode();
405 *request.l2_table = l2_table;
406 } else {
407 ceph_assert(false);
408 }
409
410 // complete the L2 cache request
411 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
412 [r, ctx=request.on_finish]() { ctx->complete(r); });
413 requests.pop_front();
414
415 // process next request (if any)
416 dispatch_request();
417 }
418
419 int l2_table_lookup(uint64_t l2_offset,
420 std::shared_ptr<LookupTable>* l2_table) {
421 auto cct = qcow_format->m_image_ctx->cct;
422
423 l2_table->reset();
424
425 // find a match in the existing cache
426 for (auto idx = 0U; idx < l2_cache_entries.size(); ++idx) {
427 auto& l2_cache = l2_cache_entries[idx];
428 if (l2_cache.l2_offset == l2_offset) {
429 if (l2_cache.in_flight) {
430 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
431 << "index=" << idx << " (in-flight)" << dendl;
432 return 0;
433 }
434
435 if (l2_cache.ret_val < 0) {
436 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
437 << "index=" << idx << " (error): "
438 << cpp_strerror(l2_cache.ret_val) << dendl;
439 int r = l2_cache.ret_val;
440 l2_cache = L2Cache{};
441
442 return r;
443 }
444
445 ++l2_cache.count;
446 if (l2_cache.count == std::numeric_limits<uint32_t>::max()) {
447 for (auto& entry : l2_cache_entries) {
448 entry.count >>= 1;
449 }
450 }
451
452 ldout(cct, 20) << "l2_offset=" << l2_offset << ", " << "index=" << idx
453 << dendl;
454 *l2_table = l2_cache.l2_table;
455 return 0;
456 }
457 }
458
459 // find the least used entry
460 int32_t min_idx = -1;
461 uint32_t min_count = std::numeric_limits<uint32_t>::max();
462 utime_t min_timestamp;
463 for (uint32_t idx = 0U; idx < l2_cache_entries.size(); ++idx) {
464 auto& l2_cache = l2_cache_entries[idx];
465 if (l2_cache.in_flight) {
466 continue;
467 }
468
469 if (l2_cache.count > 0) {
470 --l2_cache.count;
471 }
472
473 if (l2_cache.count <= min_count) {
474 if (min_idx == -1 || l2_cache.timestamp < min_timestamp) {
475 min_timestamp = l2_cache.timestamp;
476 min_count = l2_cache.count;
477 min_idx = idx;
478 }
479 }
480 }
481
482 if (min_idx == -1) {
483 // no space in the cache due to in-flight requests
484 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
485 << "index=DNE (cache busy)" << dendl;
486 return 0;
487 }
488
489 ldout(cct, 20) << "l2_offset=" << l2_offset << ", "
490 << "index=" << min_idx << " (loading)" << dendl;
491 auto& l2_cache = l2_cache_entries[min_idx];
492 l2_cache.l2_table = std::make_shared<LookupTable>(qcow_format->m_l2_size);
493 l2_cache.l2_offset = l2_offset;
494 l2_cache.timestamp = ceph_clock_now();
495 l2_cache.count = 1;
496 l2_cache.in_flight = true;
497
498 // read the L2 table into the L2 cache entry
499 auto ctx = new LambdaContext([this, index=min_idx, l2_offset](int r) {
500 boost::asio::post(m_strand, [this, index, l2_offset, r]() {
501 handle_l2_table_lookup(r, index, l2_offset); }); });
502 qcow_format->m_stream->read(
503 {{l2_offset, qcow_format->m_l2_size * sizeof(uint64_t)}},
504 &l2_cache.l2_table->bl, ctx);
505 return 0;
506 }
507
508 void handle_l2_table_lookup(int r, uint32_t index, uint64_t l2_offset) {
509 auto cct = qcow_format->m_image_ctx->cct;
510 ldout(cct, 20) << "r=" << r << ", "
511 << "l2_offset=" << l2_offset << ", "
512 << "index=" << index << dendl;
513
514 auto& l2_cache = l2_cache_entries[index];
515 ceph_assert(l2_cache.in_flight);
516 l2_cache.in_flight = false;
517
518 if (r < 0) {
519 lderr(cct) << "failed to load L2 table: "
520 << "l2_offset=" << l2_cache.l2_offset << ": "
521 << cpp_strerror(r) << dendl;
522 l2_cache.ret_val = r;
523 } else {
524 // keep the L2 table in big-endian byte-order until the full table
525 // is requested
526 l2_cache.l2_table->init();
527 }
528
529 // restart the state machine
530 dispatch_request();
531 }
532
533 };
534
535 #undef dout_prefix
536 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
537 << this << " " << __func__ << ": "
538
539 template <typename I>
540 class QCOWFormat<I>::ReadRequest {
541 public:
542 ReadRequest(QCOWFormat* qcow_format, io::AioCompletion* aio_comp,
543 const LookupTable* l1_table, io::Extents&& image_extents)
544 : qcow_format(qcow_format), aio_comp(aio_comp), l1_table(l1_table),
545 image_extents(std::move(image_extents)) {
546 }
547
548 void send() {
549 get_cluster_offsets();
550 }
551
552 private:
553 QCOWFormat* qcow_format;
554 io::AioCompletion* aio_comp;
555
556 const LookupTable* l1_table;
557 io::Extents image_extents;
558
559 size_t image_extents_idx = 0;
560 uint32_t image_extent_offset = 0;
561
562 ClusterExtents cluster_extents;
563
564 void get_cluster_offsets() {
565 auto cct = qcow_format->m_image_ctx->cct;
566 populate_cluster_extents(cct, qcow_format->m_cluster_size, image_extents,
567 &cluster_extents);
568
569 ldout(cct, 20) << dendl;
570 auto ctx = new LambdaContext([this](int r) {
571 handle_get_cluster_offsets(r); });
572 auto gather_ctx = new C_Gather(cct, ctx);
573
574 for (auto& cluster_extent : cluster_extents) {
575 auto sub_ctx = new LambdaContext(
576 [this, &cluster_extent, on_finish=gather_ctx->new_sub()](int r) {
577 handle_get_cluster_offset(r, cluster_extent, on_finish); });
578 qcow_format->m_l2_table_cache->get_cluster_offset(
579 l1_table, cluster_extent.image_offset,
580 &cluster_extent.cluster_offset, sub_ctx);
581 }
582
583 gather_ctx->activate();
584 }
585
586 void handle_get_cluster_offset(int r, const ClusterExtent& cluster_extent,
587 Context* on_finish) {
588 auto cct = qcow_format->m_image_ctx->cct;
589 ldout(cct, 20) << "r=" << r << ", "
590 << "image_offset=" << cluster_extent.image_offset << ", "
591 << "cluster_offset=" << cluster_extent.cluster_offset
592 << dendl;
593
594 if (r == -ENOENT) {
595 ldout(cct, 20) << "image offset DNE in QCOW image" << dendl;
596 r = 0;
597 } else if (r < 0) {
598 lderr(cct) << "failed to map image offset " << cluster_extent.image_offset
599 << ": " << cpp_strerror(r) << dendl;
600 }
601
602 on_finish->complete(r);
603 }
604
605 void handle_get_cluster_offsets(int r) {
606 auto cct = qcow_format->m_image_ctx->cct;
607 ldout(cct, 20) << "r=" << r << dendl;
608
609 if (r < 0) {
610 lderr(cct) << "failed to retrieve cluster extents: " << cpp_strerror(r)
611 << dendl;
612 aio_comp->fail(r);
613 delete this;
614 return;
615 }
616
617 read_clusters();
618 }
619
620 void read_clusters() {
621 auto cct = qcow_format->m_image_ctx->cct;
622 ldout(cct, 20) << dendl;
623
624 aio_comp->set_request_count(cluster_extents.size());
625 for (auto& cluster_extent : cluster_extents) {
626 auto read_ctx = new io::ReadResult::C_ImageReadRequest(
627 aio_comp, cluster_extent.buffer_offset,
628 {{cluster_extent.image_offset, cluster_extent.cluster_length}});
629 read_ctx->ignore_enoent = true;
630
631 auto log_ctx = new LambdaContext(
632 [this, cct=qcow_format->m_image_ctx->cct,
633 image_offset=cluster_extent.image_offset,
634 image_length=cluster_extent.cluster_length, ctx=read_ctx](int r) {
635 handle_read_cluster(cct, r, image_offset, image_length, ctx);
636 });
637
638 if (cluster_extent.cluster_offset == 0) {
639 // QCOW header is at offset 0, implies cluster DNE
640 log_ctx->complete(-ENOENT);
641 } else if (cluster_extent.cluster_offset == QCOW_OFLAG_ZERO) {
642 // explicitly zeroed section
643 read_ctx->bl.append_zero(cluster_extent.cluster_length);
644 log_ctx->complete(0);
645 } else {
646 // request the (sub)cluster from the cluster cache
647 qcow_format->m_cluster_cache->get_cluster(
648 cluster_extent.cluster_offset, cluster_extent.cluster_length,
649 cluster_extent.intra_cluster_offset, &read_ctx->bl, log_ctx);
650 }
651 }
652
653 delete this;
654 }
655
656 void handle_read_cluster(CephContext* cct, int r, uint64_t image_offset,
657 uint64_t image_length, Context* on_finish) const {
658 // NOTE: treat as static function, expect object has been deleted
659
660 ldout(cct, 20) << "r=" << r << ", "
661 << "image_offset=" << image_offset << ", "
662 << "image_length=" << image_length << dendl;
663
664 if (r != -ENOENT && r < 0) {
665 lderr(cct) << "failed to read image extent " << image_offset << "~"
666 << image_length << ": " << cpp_strerror(r) << dendl;
667 }
668
669 on_finish->complete(r);
670 }
671 };
672
673 #undef dout_prefix
674 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
675 << "ListSnapsRequest: " << this << " " \
676 << __func__ << ": "
677
678 template <typename I>
679 class QCOWFormat<I>::ListSnapsRequest {
680 public:
681 ListSnapsRequest(
682 QCOWFormat* qcow_format, uint32_t l1_table_index,
683 ClusterExtents&& cluster_extents,
684 const std::map<uint64_t, const LookupTable*>& snap_id_to_l1_table,
685 io::SnapshotDelta* snapshot_delta, Context* on_finish)
686 : qcow_format(qcow_format), l1_table_index(l1_table_index),
687 cluster_extents(std::move(cluster_extents)),
688 snap_id_to_l1_table(snap_id_to_l1_table), snapshot_delta(snapshot_delta),
689 on_finish(on_finish) {
690 }
691
692 void send() {
693 get_l2_table();
694 }
695
696 private:
697 QCOWFormat* qcow_format;
698 uint32_t l1_table_index;
699 ClusterExtents cluster_extents;
700 std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
701 io::SnapshotDelta* snapshot_delta;
702 Context* on_finish;
703
704 std::shared_ptr<const LookupTable> previous_l2_table;
705 std::shared_ptr<const LookupTable> l2_table;
706
707 void get_l2_table() {
708 auto cct = qcow_format->m_image_ctx->cct;
709 if (snap_id_to_l1_table.empty()) {
710 finish(0);
711 return;
712 }
713
714 auto it = snap_id_to_l1_table.begin();
715 auto [snap_id, l1_table] = *it;
716 snap_id_to_l1_table.erase(it);
717
718 previous_l2_table = l2_table;
719 l2_table.reset();
720
721 auto ctx = new LambdaContext([this, snap_id = snap_id](int r) {
722 boost::asio::post(qcow_format->m_strand, [this, snap_id, r]() {
723 handle_get_l2_table(r, snap_id);
724 });
725 });
726
727 if (l1_table_index >= l1_table->size ||
728 l1_table->cluster_offsets[l1_table_index] == 0) {
729 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
730 << "snap_id=" << snap_id << ": DNE" << dendl;
731 ctx->complete(-ENOENT);
732 return;
733 }
734
735 uint64_t l2_table_offset = l1_table->cluster_offsets[l1_table_index] &
736 qcow_format->m_cluster_mask;
737
738 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
739 << "snap_id=" << snap_id << ", "
740 << "l2_table_offset=" << l2_table_offset << dendl;
741 qcow_format->m_l2_table_cache->get_l2_table(l1_table, l2_table_offset,
742 &l2_table, ctx);
743 }
744
745 void handle_get_l2_table(int r, uint64_t snap_id) {
746 ceph_assert(qcow_format->m_strand.running_in_this_thread());
747
748 auto cct = qcow_format->m_image_ctx->cct;
749 ldout(cct, 20) << "r=" << r << ", "
750 << "snap_id=" << snap_id << dendl;
751
752 if (r == -ENOENT) {
753 l2_table.reset();
754 } else if (r < 0) {
755 lderr(cct) << "failed to retrieve L2 table for snapshot " << snap_id
756 << ": " << cpp_strerror(r) << dendl;
757 finish(r);
758 return;
759 }
760
761 // compare the cluster offsets at each requested L2 offset between
762 // the previous snapshot's L2 table and the current L2 table.
763 auto& sparse_extents = (*snapshot_delta)[{snap_id, snap_id}];
764 for (auto& cluster_extent : cluster_extents) {
765 uint32_t l2_table_index =
766 (cluster_extent.image_offset >> qcow_format->m_cluster_bits) &
767 (qcow_format->m_l2_size - 1);
768
769 std::optional<uint64_t> cluster_offset;
770 if (l2_table && l2_table_index < l2_table->size) {
771 cluster_offset = l2_table->cluster_offsets[l2_table_index] &
772 qcow_format->m_cluster_offset_mask;
773 }
774
775 std::optional<uint64_t> prev_cluster_offset;
776 if (previous_l2_table && l2_table_index < previous_l2_table->size) {
777 prev_cluster_offset =
778 previous_l2_table->cluster_offsets[l2_table_index] &
779 qcow_format->m_cluster_offset_mask;
780 }
781
782 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
783 << "snap_id=" << snap_id << ", "
784 << "image_offset=" << cluster_extent.image_offset << ", "
785 << "l2_table_index=" << l2_table_index << ", "
786 << "cluster_offset=" << cluster_offset << ", "
787 << "prev_cluster_offset=" << prev_cluster_offset << dendl;
788
789 auto state = io::SPARSE_EXTENT_STATE_DATA;
790 if (cluster_offset == prev_cluster_offset) {
791 continue;
792 } else if ((prev_cluster_offset && !cluster_offset) ||
793 *cluster_offset == QCOW_OFLAG_ZERO) {
794 // explicitly zeroed or deallocated
795 state = io::SPARSE_EXTENT_STATE_ZEROED;
796 }
797
798 sparse_extents.insert(
799 cluster_extent.image_offset, cluster_extent.cluster_length,
800 {state, cluster_extent.cluster_length});
801 }
802
803 ldout(cct, 20) << "l1_table_index=" << l1_table_index << ", "
804 << "snap_id=" << snap_id << ", "
805 << "sparse_extents=" << sparse_extents << dendl;
806
807 // continue processing the L2 table at this index for all snapshots
808 boost::asio::post(*qcow_format->m_image_ctx->asio_engine,
809 [this]() { get_l2_table(); });
810 }
811
812
813 void finish(int r) {
814 auto cct = qcow_format->m_image_ctx->cct;
815 ldout(cct, 20) << "r=" << r << dendl;
816
817 on_finish->complete(r);
818 delete this;
819 }
820 };
821
822 #undef dout_prefix
823 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
824 << " " << __func__ << ": "
825
826 template <typename I>
827 QCOWFormat<I>::QCOWFormat(
828 I* image_ctx, const json_spirit::mObject& json_object,
829 const SourceSpecBuilder<I>* source_spec_builder)
830 : m_image_ctx(image_ctx), m_json_object(json_object),
831 m_source_spec_builder(source_spec_builder),
832 m_strand(*image_ctx->asio_engine) {
833 }
834
835 template <typename I>
836 void QCOWFormat<I>::open(Context* on_finish) {
837 auto cct = m_image_ctx->cct;
838 ldout(cct, 10) << dendl;
839
840 int r = m_source_spec_builder->build_stream(m_json_object, &m_stream);
841 if (r < 0) {
842 lderr(cct) << "failed to build migration stream handler" << cpp_strerror(r)
843 << dendl;
844 on_finish->complete(r);
845 return;
846 }
847
848 auto ctx = new LambdaContext([this, on_finish](int r) {
849 handle_open(r, on_finish); });
850 m_stream->open(ctx);
851 }
852
853 template <typename I>
854 void QCOWFormat<I>::handle_open(int r, Context* on_finish) {
855 auto cct = m_image_ctx->cct;
856 ldout(cct, 10) << "r=" << r << dendl;
857
858 if (r < 0) {
859 lderr(cct) << "failed to open QCOW image: " << cpp_strerror(r)
860 << dendl;
861 on_finish->complete(r);
862 return;
863 }
864
865 probe(on_finish);
866 }
867
868 template <typename I>
869 void QCOWFormat<I>::probe(Context* on_finish) {
870 auto cct = m_image_ctx->cct;
871 ldout(cct, 10) << dendl;
872
873 auto ctx = new LambdaContext([this, on_finish](int r) {
874 handle_probe(r, on_finish); });
875 m_bl.clear();
876 m_stream->read({{0, 8}}, &m_bl, ctx);
877 }
878
879 template <typename I>
880 void QCOWFormat<I>::handle_probe(int r, Context* on_finish) {
881 auto cct = m_image_ctx->cct;
882 ldout(cct, 10) << "r=" << r << dendl;
883
884 if (r < 0) {
885 lderr(cct) << "failed to probe QCOW image: " << cpp_strerror(r)
886 << dendl;
887 on_finish->complete(r);
888 return;
889 }
890
891 auto header_probe = *reinterpret_cast<QCowHeaderProbe*>(
892 m_bl.c_str());
893 header_probe.magic = be32toh(header_probe.magic);
894 header_probe.version = be32toh(header_probe.version);
895
896 if (header_probe.magic != QCOW_MAGIC) {
897 lderr(cct) << "invalid QCOW header magic" << dendl;
898 on_finish->complete(-EINVAL);
899 return;
900 }
901
902 m_bl.clear();
903 if (header_probe.version == 1) {
904 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
905 read_v1_header(on_finish);
906 #else // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
907 lderr(cct) << "QCOW is not supported" << dendl;
908 on_finish->complete(-ENOTSUP);
909 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
910 return;
911 } else if (header_probe.version >= 2 && header_probe.version <= 3) {
912 read_v2_header(on_finish);
913 return;
914 } else {
915 lderr(cct) << "invalid QCOW header version " << header_probe.version
916 << dendl;
917 on_finish->complete(-EINVAL);
918 return;
919 }
920 }
921
922 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
923
924 template <typename I>
925 void QCOWFormat<I>::read_v1_header(Context* on_finish) {
926 auto cct = m_image_ctx->cct;
927 ldout(cct, 10) << dendl;
928
929 auto ctx = new LambdaContext([this, on_finish](int r) {
930 handle_read_v1_header(r, on_finish); });
931 m_bl.clear();
932 m_stream->read({{0, sizeof(QCowHeaderV1)}}, &m_bl, ctx);
933 }
934
935 template <typename I>
936 void QCOWFormat<I>::handle_read_v1_header(int r, Context* on_finish) {
937 auto cct = m_image_ctx->cct;
938 ldout(cct, 10) << "r=" << r << dendl;
939
940 if (r < 0) {
941 lderr(cct) << "failed to read QCOW header: " << cpp_strerror(r) << dendl;
942 on_finish->complete(r);
943 return;
944 }
945
946 auto header = *reinterpret_cast<QCowHeaderV1*>(m_bl.c_str());
947
948 // byte-swap important fields
949 header.magic = be32toh(header.magic);
950 header.version = be32toh(header.version);
951 header.backing_file_offset = be64toh(header.backing_file_offset);
952 header.backing_file_size = be32toh(header.backing_file_size);
953 header.size = be64toh(header.size);
954 header.crypt_method = be32toh(header.crypt_method);
955 header.l1_table_offset = be64toh(header.l1_table_offset);
956
957 if (header.magic != QCOW_MAGIC || header.version != 1) {
958 // honestly shouldn't happen since we've already validated it
959 lderr(cct) << "header is not QCOW" << dendl;
960 on_finish->complete(-EINVAL);
961 return;
962 }
963
964 if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
965 header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
966 lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
967 on_finish->complete(-EINVAL);
968 return;
969 }
970
971 if (header.l2_bits < (QCOW_MIN_CLUSTER_BITS - 3) ||
972 header.l2_bits > (QCOW_MAX_CLUSTER_BITS - 3)) {
973 lderr(cct) << "invalid L2 bits: " << header.l2_bits << dendl;
974 on_finish->complete(-EINVAL);
975 return;
976 }
977
978 if (header.crypt_method != QCOW_CRYPT_NONE) {
979 lderr(cct) << "invalid or unsupported encryption method" << dendl;
980 on_finish->complete(-EINVAL);
981 return;
982 }
983
984 m_size = header.size;
985 if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
986 lderr(cct) << "image size is not a multiple of block size" << dendl;
987 on_finish->complete(-EINVAL);
988 return;
989 }
990
991 m_backing_file_offset = header.backing_file_offset;
992 m_backing_file_size = header.backing_file_size;
993
994 m_cluster_bits = header.cluster_bits;
995 m_cluster_size = 1UL << header.cluster_bits;
996 m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
997 m_cluster_mask = ~QCOW_OFLAG_COMPRESSED;
998
999 m_l2_bits = header.l2_bits;
1000 m_l2_size = (1UL << m_l2_bits);
1001
1002 m_l1_shift = m_cluster_bits + m_l2_bits;
1003 m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
1004 m_l1_table_offset = header.l1_table_offset;
1005 if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
1006 m_l1_table.size >
1007 (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
1008 lderr(cct) << "image size too big: " << m_size << dendl;
1009 on_finish->complete(-EINVAL);
1010 return;
1011 }
1012
1013 ldout(cct, 15) << "size=" << m_size << ", "
1014 << "cluster_bits=" << m_cluster_bits << ", "
1015 << "l2_bits=" << m_l2_bits << dendl;
1016
1017 // allocate memory for L1 table and L2 + cluster caches
1018 m_l2_table_cache = std::make_unique<L2TableCache>(this);
1019 m_cluster_cache = std::make_unique<ClusterCache>(this);
1020
1021 read_l1_table(on_finish);
1022 }
1023
1024 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
1025
1026 template <typename I>
1027 void QCOWFormat<I>::read_v2_header(Context* on_finish) {
1028 auto cct = m_image_ctx->cct;
1029 ldout(cct, 10) << dendl;
1030
1031 auto ctx = new LambdaContext([this, on_finish](int r) {
1032 handle_read_v2_header(r, on_finish); });
1033 m_bl.clear();
1034 m_stream->read({{0, sizeof(QCowHeader)}}, &m_bl, ctx);
1035 }
1036
1037 template <typename I>
1038 void QCOWFormat<I>::handle_read_v2_header(int r, Context* on_finish) {
1039 auto cct = m_image_ctx->cct;
1040 ldout(cct, 10) << "r=" << r << dendl;
1041
1042 if (r < 0) {
1043 lderr(cct) << "failed to read QCOW2 header: " << cpp_strerror(r) << dendl;
1044 on_finish->complete(r);
1045 return;
1046 }
1047
1048 auto header = *reinterpret_cast<QCowHeader*>(m_bl.c_str());
1049
1050 // byte-swap important fields
1051 header.magic = be32toh(header.magic);
1052 header.version = be32toh(header.version);
1053 header.backing_file_offset = be64toh(header.backing_file_offset);
1054 header.backing_file_size = be32toh(header.backing_file_size);
1055 header.cluster_bits = be32toh(header.cluster_bits);
1056 header.size = be64toh(header.size);
1057 header.crypt_method = be32toh(header.crypt_method);
1058 header.l1_size = be32toh(header.l1_size);
1059 header.l1_table_offset = be64toh(header.l1_table_offset);
1060 header.nb_snapshots = be32toh(header.nb_snapshots);
1061 header.snapshots_offset = be64toh(header.snapshots_offset);
1062
1063 if (header.version == 2) {
1064 // valid only for version >= 3
1065 header.incompatible_features = 0;
1066 header.compatible_features = 0;
1067 header.autoclear_features = 0;
1068 header.header_length = 72;
1069 header.compression_type = 0;
1070 } else {
1071 header.incompatible_features = be64toh(header.incompatible_features);
1072 header.compatible_features = be64toh(header.compatible_features);
1073 header.autoclear_features = be64toh(header.autoclear_features);
1074 header.header_length = be32toh(header.header_length);
1075 }
1076
1077 if (header.magic != QCOW_MAGIC || header.version < 2 || header.version > 3) {
1078 // honestly shouldn't happen since we've already validated it
1079 lderr(cct) << "header is not QCOW2" << dendl;
1080 on_finish->complete(-EINVAL);
1081 return;
1082 }
1083
1084 if (header.cluster_bits < QCOW_MIN_CLUSTER_BITS ||
1085 header.cluster_bits > QCOW_MAX_CLUSTER_BITS) {
1086 lderr(cct) << "invalid cluster bits: " << header.cluster_bits << dendl;
1087 on_finish->complete(-EINVAL);
1088 return;
1089 }
1090
1091 if (header.crypt_method != QCOW_CRYPT_NONE) {
1092 lderr(cct) << "invalid or unsupported encryption method" << dendl;
1093 on_finish->complete(-EINVAL);
1094 return;
1095 }
1096
1097 m_size = header.size;
1098 if (p2roundup(m_size, static_cast<uint64_t>(512)) != m_size) {
1099 lderr(cct) << "image size is not a multiple of block size" << dendl;
1100 on_finish->complete(-EINVAL);
1101 return;
1102 }
1103
1104 if (header.header_length <= offsetof(QCowHeader, compression_type)) {
1105 header.compression_type = 0;
1106 }
1107
1108 if ((header.compression_type != 0) ||
1109 ((header.incompatible_features & QCOW2_INCOMPAT_COMPRESSION) != 0)) {
1110 lderr(cct) << "invalid or unsupported compression type" << dendl;
1111 on_finish->complete(-EINVAL);
1112 return;
1113 }
1114
1115 if ((header.incompatible_features & QCOW2_INCOMPAT_DATA_FILE) != 0) {
1116 lderr(cct) << "external data file feature not supported" << dendl;
1117 on_finish->complete(-ENOTSUP);
1118 }
1119
1120 if ((header.incompatible_features & QCOW2_INCOMPAT_EXTL2) != 0) {
1121 lderr(cct) << "extended L2 table feature not supported" << dendl;
1122 on_finish->complete(-ENOTSUP);
1123 return;
1124 }
1125
1126 header.incompatible_features &= ~QCOW2_INCOMPAT_MASK;
1127 if (header.incompatible_features != 0) {
1128 lderr(cct) << "unknown incompatible feature enabled" << dendl;
1129 on_finish->complete(-EINVAL);
1130 return;
1131 }
1132
1133 m_backing_file_offset = header.backing_file_offset;
1134 m_backing_file_size = header.backing_file_size;
1135
1136 m_cluster_bits = header.cluster_bits;
1137 m_cluster_size = 1UL << header.cluster_bits;
1138 m_cluster_offset_mask = (1ULL << (63 - header.cluster_bits)) - 1;
1139 m_cluster_mask = ~(QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_COPIED);
1140
1141 // L2 table is fixed a (1) cluster block to hold 8-byte (3 bit) offsets
1142 m_l2_bits = m_cluster_bits - 3;
1143 m_l2_size = (1UL << m_l2_bits);
1144
1145 m_l1_shift = m_cluster_bits + m_l2_bits;
1146 m_l1_table.size = (m_size + (1LL << m_l1_shift) - 1) >> m_l1_shift;
1147 m_l1_table_offset = header.l1_table_offset;
1148 if (m_size > (std::numeric_limits<uint64_t>::max() - (1ULL << m_l1_shift)) ||
1149 m_l1_table.size >
1150 (std::numeric_limits<int32_t>::max() / sizeof(uint64_t))) {
1151 lderr(cct) << "image size too big: " << m_size << dendl;
1152 on_finish->complete(-EINVAL);
1153 return;
1154 } else if (m_l1_table.size > header.l1_size) {
1155 lderr(cct) << "invalid L1 table size in header (" << header.l1_size
1156 << " < " << m_l1_table.size << ")" << dendl;
1157 on_finish->complete(-EINVAL);
1158 return;
1159 }
1160
1161 m_snapshot_count = header.nb_snapshots;
1162 m_snapshots_offset = header.snapshots_offset;
1163
1164 ldout(cct, 15) << "size=" << m_size << ", "
1165 << "cluster_bits=" << m_cluster_bits << ", "
1166 << "l1_table_offset=" << m_l1_table_offset << ", "
1167 << "snapshot_count=" << m_snapshot_count << ", "
1168 << "snapshots_offset=" << m_snapshots_offset << dendl;
1169
1170 // allocate memory for L1 table and L2 + cluster caches
1171 m_l2_table_cache = std::make_unique<L2TableCache>(this);
1172 m_cluster_cache = std::make_unique<ClusterCache>(this);
1173
1174 read_snapshot(on_finish);
1175 }
1176
1177 template <typename I>
1178 void QCOWFormat<I>::read_snapshot(Context* on_finish) {
1179 if (m_snapshots_offset == 0 || m_snapshots.size() == m_snapshot_count) {
1180 read_l1_table(on_finish);
1181 return;
1182 }
1183
1184 // header is always aligned on 8 byte boundary
1185 m_snapshots_offset = p2roundup(m_snapshots_offset, static_cast<uint64_t>(8));
1186
1187 auto cct = m_image_ctx->cct;
1188 ldout(cct, 10) << "snap_id=" << (m_snapshots.size() + 1) << ", "
1189 << "offset=" << m_snapshots_offset << dendl;
1190
1191 auto ctx = new LambdaContext([this, on_finish](int r) {
1192 handle_read_snapshot(r, on_finish); });
1193 m_bl.clear();
1194 m_stream->read({{m_snapshots_offset, sizeof(QCowSnapshotHeader)}}, &m_bl,
1195 ctx);
1196 }
1197
1198 template <typename I>
1199 void QCOWFormat<I>::handle_read_snapshot(int r, Context* on_finish) {
1200 auto cct = m_image_ctx->cct;
1201 ldout(cct, 10) << "r=" << r << ", "
1202 << "index=" << m_snapshots.size() << dendl;
1203
1204 if (r < 0) {
1205 lderr(cct) << "failed to read QCOW2 snapshot header: " << cpp_strerror(r)
1206 << dendl;
1207 on_finish->complete(r);
1208 return;
1209 }
1210
1211 m_snapshots_offset += m_bl.length();
1212 auto header = *reinterpret_cast<QCowSnapshotHeader*>(m_bl.c_str());
1213
1214 auto& snapshot = m_snapshots[m_snapshots.size() + 1];
1215 snapshot.id.resize(be16toh(header.id_str_size));
1216 snapshot.name.resize(be16toh(header.name_size));
1217 snapshot.l1_table_offset = be64toh(header.l1_table_offset);
1218 snapshot.l1_table.size = be32toh(header.l1_size);
1219 snapshot.timestamp.sec_ref() = be32toh(header.date_sec);
1220 snapshot.timestamp.nsec_ref() = be32toh(header.date_nsec);
1221 snapshot.extra_data_size = be32toh(header.extra_data_size);
1222
1223 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1224 << "id_str_len=" << snapshot.id.size() << ", "
1225 << "name_str_len=" << snapshot.name.size() << ", "
1226 << "l1_table_offset=" << snapshot.l1_table_offset << ", "
1227 << "l1_size=" << snapshot.l1_table.size << ", "
1228 << "extra_data_size=" << snapshot.extra_data_size << dendl;
1229
1230 read_snapshot_extra(on_finish);
1231 }
1232
1233 template <typename I>
1234 void QCOWFormat<I>::read_snapshot_extra(Context* on_finish) {
1235 ceph_assert(!m_snapshots.empty());
1236 auto& snapshot = m_snapshots.rbegin()->second;
1237
1238 uint32_t length = snapshot.extra_data_size +
1239 snapshot.id.size() +
1240 snapshot.name.size();
1241 if (length == 0) {
1242 uuid_d uuid_gen;
1243 uuid_gen.generate_random();
1244 snapshot.name = uuid_gen.to_string();
1245
1246 read_snapshot(on_finish);
1247 return;
1248 }
1249
1250 auto cct = m_image_ctx->cct;
1251 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1252 << "offset=" << m_snapshots_offset << ", "
1253 << "length=" << length << dendl;
1254
1255 auto offset = m_snapshots_offset;
1256 m_snapshots_offset += length;
1257
1258 auto ctx = new LambdaContext([this, on_finish](int r) {
1259 handle_read_snapshot_extra(r, on_finish); });
1260 m_bl.clear();
1261 m_stream->read({{offset, length}}, &m_bl, ctx);
1262 }
1263
1264 template <typename I>
1265 void QCOWFormat<I>::handle_read_snapshot_extra(int r, Context* on_finish) {
1266 ceph_assert(!m_snapshots.empty());
1267 auto& snapshot = m_snapshots.rbegin()->second;
1268
1269 auto cct = m_image_ctx->cct;
1270 ldout(cct, 10) << "r=" << r << ", "
1271 << "snap_id=" << m_snapshots.size() << dendl;
1272
1273 if (r < 0) {
1274 lderr(cct) << "failed to read QCOW2 snapshot header extra: "
1275 << cpp_strerror(r) << dendl;
1276 on_finish->complete(r);
1277 return;
1278 }
1279
1280 if (snapshot.extra_data_size >=
1281 offsetof(QCowSnapshotExtraData, disk_size) + sizeof(uint64_t)) {
1282 auto extra = reinterpret_cast<const QCowSnapshotExtraData*>(m_bl.c_str());
1283 snapshot.size = be64toh(extra->disk_size);
1284 } else {
1285 snapshot.size = m_size;
1286 }
1287
1288 auto data = reinterpret_cast<const char*>(m_bl.c_str());
1289 data += snapshot.extra_data_size;
1290
1291 if (!snapshot.id.empty()) {
1292 snapshot.id = std::string(data, snapshot.id.size());
1293 data += snapshot.id.size();
1294 }
1295
1296 if (!snapshot.name.empty()) {
1297 snapshot.name = std::string(data, snapshot.name.size());
1298 data += snapshot.name.size();
1299 } else {
1300 uuid_d uuid_gen;
1301 uuid_gen.generate_random();
1302 snapshot.name = uuid_gen.to_string();
1303 }
1304
1305 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1306 << "name=" << snapshot.name << ", "
1307 << "size=" << snapshot.size << dendl;
1308 read_snapshot_l1_table(on_finish);
1309 }
1310
1311 template <typename I>
1312 void QCOWFormat<I>::read_snapshot_l1_table(Context* on_finish) {
1313 ceph_assert(!m_snapshots.empty());
1314 auto& snapshot = m_snapshots.rbegin()->second;
1315
1316 auto cct = m_image_ctx->cct;
1317 ldout(cct, 10) << "snap_id=" << m_snapshots.size() << ", "
1318 << "l1_table_offset=" << snapshot.l1_table_offset
1319 << dendl;
1320
1321 auto ctx = new LambdaContext([this, on_finish](int r) {
1322 handle_read_snapshot_l1_table(r, on_finish); });
1323 m_stream->read({{snapshot.l1_table_offset,
1324 snapshot.l1_table.size * sizeof(uint64_t)}},
1325 &snapshot.l1_table.bl, ctx);
1326 }
1327
1328 template <typename I>
1329 void QCOWFormat<I>::handle_read_snapshot_l1_table(int r, Context* on_finish) {
1330 ceph_assert(!m_snapshots.empty());
1331 auto& snapshot = m_snapshots.rbegin()->second;
1332
1333 auto cct = m_image_ctx->cct;
1334 ldout(cct, 10) << "r=" << r << ", "
1335 << "snap_id=" << m_snapshots.size() << dendl;
1336
1337 if (r < 0) {
1338 lderr(cct) << "failed to read snapshot L1 table: " << cpp_strerror(r)
1339 << dendl;
1340 on_finish->complete(r);
1341 return;
1342 }
1343
1344 snapshot.l1_table.decode();
1345 read_snapshot(on_finish);
1346 }
1347
1348 template <typename I>
1349 void QCOWFormat<I>::read_l1_table(Context* on_finish) {
1350 auto cct = m_image_ctx->cct;
1351 ldout(cct, 10) << dendl;
1352
1353 auto ctx = new LambdaContext([this, on_finish](int r) {
1354 handle_read_l1_table(r, on_finish); });
1355 m_stream->read({{m_l1_table_offset,
1356 m_l1_table.size * sizeof(uint64_t)}},
1357 &m_l1_table.bl, ctx);
1358 }
1359
1360 template <typename I>
1361 void QCOWFormat<I>::handle_read_l1_table(int r, Context* on_finish) {
1362 auto cct = m_image_ctx->cct;
1363 ldout(cct, 10) << "r=" << r << dendl;
1364
1365 if (r < 0) {
1366 lderr(cct) << "failed to read L1 table: " << cpp_strerror(r) << dendl;
1367 on_finish->complete(r);
1368 return;
1369 }
1370
1371 m_l1_table.decode();
1372 read_backing_file(on_finish);
1373 }
1374
1375 template <typename I>
1376 void QCOWFormat<I>::read_backing_file(Context* on_finish) {
1377 if (m_backing_file_offset == 0 || m_backing_file_size == 0) {
1378 // all data is within the specified file
1379 on_finish->complete(0);
1380 return;
1381 }
1382
1383 auto cct = m_image_ctx->cct;
1384 ldout(cct, 10) << dendl;
1385
1386 // TODO add support for backing files
1387 on_finish->complete(-ENOTSUP);
1388 }
1389
1390 template <typename I>
1391 void QCOWFormat<I>::close(Context* on_finish) {
1392 auto cct = m_image_ctx->cct;
1393 ldout(cct, 10) << dendl;
1394
1395 m_stream->close(on_finish);
1396 }
1397
1398 template <typename I>
1399 void QCOWFormat<I>::get_snapshots(SnapInfos* snap_infos, Context* on_finish) {
1400 auto cct = m_image_ctx->cct;
1401 ldout(cct, 10) << dendl;
1402
1403 snap_infos->clear();
1404 for (auto& [snap_id, snapshot] : m_snapshots) {
1405 SnapInfo snap_info(snapshot.name, cls::rbd::UserSnapshotNamespace{},
1406 snapshot.size, {}, 0, 0, snapshot.timestamp);
1407 snap_infos->emplace(snap_id, snap_info);
1408 }
1409
1410 on_finish->complete(0);
1411 }
1412
1413 template <typename I>
1414 void QCOWFormat<I>::get_image_size(uint64_t snap_id, uint64_t* size,
1415 Context* on_finish) {
1416 auto cct = m_image_ctx->cct;
1417 ldout(cct, 10) << "snap_id=" << snap_id << dendl;
1418
1419 if (snap_id == CEPH_NOSNAP) {
1420 *size = m_size;
1421 } else {
1422 auto snapshot_it = m_snapshots.find(snap_id);
1423 if (snapshot_it == m_snapshots.end()) {
1424 on_finish->complete(-ENOENT);
1425 return;
1426 }
1427
1428 auto& snapshot = snapshot_it->second;
1429 *size = snapshot.size;
1430 }
1431
1432 on_finish->complete(0);
1433 }
1434
1435 template <typename I>
1436 bool QCOWFormat<I>::read(
1437 io::AioCompletion* aio_comp, uint64_t snap_id, io::Extents&& image_extents,
1438 io::ReadResult&& read_result, int op_flags, int read_flags,
1439 const ZTracer::Trace &parent_trace) {
1440 auto cct = m_image_ctx->cct;
1441 ldout(cct, 20) << "snap_id=" << snap_id << ", "
1442 << "image_extents=" << image_extents << dendl;
1443
1444 const LookupTable* l1_table = nullptr;
1445 if (snap_id == CEPH_NOSNAP) {
1446 l1_table = &m_l1_table;
1447 } else {
1448 auto snapshot_it = m_snapshots.find(snap_id);
1449 if (snapshot_it == m_snapshots.end()) {
1450 aio_comp->fail(-ENOENT);
1451 return true;
1452 }
1453
1454 auto& snapshot = snapshot_it->second;
1455 l1_table = &snapshot.l1_table;
1456 }
1457
1458 aio_comp->read_result = std::move(read_result);
1459 aio_comp->read_result.set_image_extents(image_extents);
1460
1461 auto read_request = new ReadRequest(this, aio_comp, l1_table,
1462 std::move(image_extents));
1463 read_request->send();
1464
1465 return true;
1466 }
1467
1468 template <typename I>
1469 void QCOWFormat<I>::list_snaps(io::Extents&& image_extents,
1470 io::SnapIds&& snap_ids, int list_snaps_flags,
1471 io::SnapshotDelta* snapshot_delta,
1472 const ZTracer::Trace &parent_trace,
1473 Context* on_finish) {
1474 auto cct = m_image_ctx->cct;
1475 ldout(cct, 20) << "image_extents=" << image_extents << dendl;
1476
1477 ClusterExtents cluster_extents;
1478 populate_cluster_extents(cct, m_cluster_size, image_extents,
1479 &cluster_extents);
1480
1481 // map L1 table indexes to cluster extents
1482 std::map<uint64_t, ClusterExtents> l1_cluster_extents;
1483 for (auto& cluster_extent : cluster_extents) {
1484 uint32_t l1_table_index = cluster_extent.image_offset >> m_l1_shift;
1485 auto& l1_cluster_extent = l1_cluster_extents[l1_table_index];
1486 l1_cluster_extent.reserve(cluster_extents.size());
1487 l1_cluster_extent.push_back(cluster_extent);
1488 }
1489
1490 std::map<uint64_t, const LookupTable*> snap_id_to_l1_table;
1491 for (auto& [snap_id, snapshot] : m_snapshots) {
1492 snap_id_to_l1_table[snap_id] = &snapshot.l1_table;
1493 }
1494 snap_id_to_l1_table[CEPH_NOSNAP] = &m_l1_table;
1495
1496 on_finish = new LambdaContext([this, image_extents,
1497 snap_ids=std::move(snap_ids),
1498 snapshot_delta, on_finish](int r) mutable {
1499 handle_list_snaps(r, std::move(image_extents), std::move(snap_ids),
1500 snapshot_delta, on_finish);
1501 });
1502
1503 auto gather_ctx = new C_Gather(cct, on_finish);
1504
1505 for (auto& [l1_table_index, cluster_extents] : l1_cluster_extents) {
1506 auto list_snaps_request = new ListSnapsRequest(
1507 this, l1_table_index, std::move(cluster_extents), snap_id_to_l1_table,
1508 snapshot_delta, gather_ctx->new_sub());
1509 list_snaps_request->send();
1510 }
1511
1512 gather_ctx->activate();
1513 }
1514
1515 template <typename I>
1516 void QCOWFormat<I>::handle_list_snaps(int r, io::Extents&& image_extents,
1517 io::SnapIds&& snap_ids,
1518 io::SnapshotDelta* snapshot_delta,
1519 Context* on_finish) {
1520 auto cct = m_image_ctx->cct;
1521 ldout(cct, 20) << "r=" << r << ", "
1522 << "snapshot_delta=" << *snapshot_delta << dendl;
1523
1524 std::optional<uint64_t> previous_size = std::nullopt;
1525 for (auto& [snap_id, snapshot] : m_snapshots) {
1526 auto sparse_extents = &(*snapshot_delta)[{snap_id, snap_id}];
1527 util::zero_shrunk_snapshot(cct, image_extents, snap_id, snapshot.size,
1528 &previous_size, sparse_extents);
1529 }
1530
1531 auto sparse_extents = &(*snapshot_delta)[{CEPH_NOSNAP, CEPH_NOSNAP}];
1532 util::zero_shrunk_snapshot(cct, image_extents, CEPH_NOSNAP, m_size,
1533 &previous_size, sparse_extents);
1534
1535 util::merge_snapshot_delta(snap_ids, snapshot_delta);
1536 on_finish->complete(r);
1537 }
1538
1539 } // namespace migration
1540 } // namespace librbd
1541
1542 template class librbd::migration::QCOWFormat<librbd::ImageCtx>;