1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/migration/QCOWFormat.h"
5 #include "common/Clock.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "include/intarith.h"
9 #include "librbd/AsioEngine.h"
10 #include "librbd/ImageCtx.h"
11 #include "librbd/ImageState.h"
12 #include "librbd/Utils.h"
13 #include "librbd/io/AioCompletion.h"
14 #include "librbd/io/ReadResult.h"
15 #include "librbd/migration/SnapshotInterface.h"
16 #include "librbd/migration/SourceSpecBuilder.h"
17 #include "librbd/migration/StreamInterface.h"
18 #include "librbd/migration/Utils.h"
19 #include <boost/asio/dispatch.hpp>
20 #include <boost/asio/post.hpp>
21 #include <boost/endian/conversion.hpp>
24 #include <unordered_map>
27 #define dout_subsys ceph_subsys_rbd
33 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
36 using boost::endian::big_to_native
;
38 namespace qcow_format
{
40 struct ClusterExtent
{
41 uint64_t cluster_offset
;
42 uint64_t cluster_length
;
43 uint64_t intra_cluster_offset
;
44 uint64_t image_offset
;
45 uint64_t buffer_offset
;
47 ClusterExtent(uint64_t cluster_offset
, uint64_t cluster_length
,
48 uint64_t intra_cluster_offset
, uint64_t image_offset
,
49 uint64_t buffer_offset
)
50 : cluster_offset(cluster_offset
), cluster_length(cluster_length
),
51 intra_cluster_offset(intra_cluster_offset
), image_offset(image_offset
),
52 buffer_offset(buffer_offset
) {
56 typedef std::vector
<ClusterExtent
> ClusterExtents
;
58 void LookupTable::init() {
59 if (cluster_offsets
== nullptr) {
60 cluster_offsets
= reinterpret_cast<uint64_t*>(bl
.c_str());
64 void LookupTable::decode() {
67 // L2 tables are selectively byte-swapped on demand if only requesting a
68 // single cluster offset
73 // translate the lookup table (big-endian -> CPU endianess)
74 for (auto idx
= 0UL; idx
< size
; ++idx
) {
75 cluster_offsets
[idx
] = big_to_native(cluster_offsets
[idx
]);
81 void populate_cluster_extents(CephContext
* cct
, uint64_t cluster_size
,
82 const io::Extents
& image_extents
,
83 ClusterExtents
* cluster_extents
) {
84 uint64_t buffer_offset
= 0;
85 for (auto [image_offset
, image_length
] : image_extents
) {
86 while (image_length
> 0) {
87 auto intra_cluster_offset
= image_offset
& (cluster_size
- 1);
88 auto intra_cluster_length
= cluster_size
- intra_cluster_offset
;
89 auto cluster_length
= std::min(image_length
, intra_cluster_length
);
91 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
92 << "image_length=" << image_length
<< ", "
93 << "cluster_length=" << cluster_length
<< dendl
;
96 cluster_extents
->emplace_back(0, cluster_length
, intra_cluster_offset
,
97 image_offset
, buffer_offset
);
99 image_offset
+= cluster_length
;
100 image_length
-= cluster_length
;
101 buffer_offset
+= cluster_length
;
106 } // namespace qcow_format
108 using namespace qcow_format
;
110 template <typename I
>
111 struct QCOWFormat
<I
>::Cluster
{
112 const uint64_t cluster_offset
;
113 bufferlist cluster_data_bl
;
115 Cluster(uint64_t cluster_offset
) : cluster_offset(cluster_offset
) {
120 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
121 << this << " " << __func__ << ": "
123 template <typename I
>
124 class QCOWFormat
<I
>::ClusterCache
{
126 ClusterCache(QCOWFormat
* qcow_format
)
127 : qcow_format(qcow_format
),
128 m_strand(*qcow_format
->m_image_ctx
->asio_engine
) {
131 void get_cluster(uint64_t cluster_offset
, uint64_t cluster_length
,
132 uint64_t intra_cluster_offset
, bufferlist
* bl
,
133 Context
* on_finish
) {
134 auto cct
= qcow_format
->m_image_ctx
->cct
;
135 ldout(cct
, 20) << "cluster_offset=" << cluster_offset
<< dendl
;
137 // cache state machine runs in a single strand thread
138 boost::asio::dispatch(
140 [this, cluster_offset
, cluster_length
, intra_cluster_offset
, bl
,
142 execute_get_cluster(cluster_offset
, cluster_length
,
143 intra_cluster_offset
, bl
, on_finish
);
148 typedef std::tuple
<uint64_t, uint64_t, bufferlist
*, Context
*> Completion
;
149 typedef std::list
<Completion
> Completions
;
151 QCOWFormat
* qcow_format
;
152 boost::asio::io_context::strand m_strand
;
154 std::shared_ptr
<Cluster
> cluster
;
155 std::unordered_map
<uint64_t, Completions
> cluster_completions
;
157 void execute_get_cluster(uint64_t cluster_offset
, uint64_t cluster_length
,
158 uint64_t intra_cluster_offset
, bufferlist
* bl
,
159 Context
* on_finish
) {
160 auto cct
= qcow_format
->m_image_ctx
->cct
;
161 ldout(cct
, 20) << "cluster_offset=" << cluster_offset
<< dendl
;
163 if (cluster
&& cluster
->cluster_offset
== cluster_offset
) {
164 // most-recent cluster matches
165 bl
->substr_of(cluster
->cluster_data_bl
, intra_cluster_offset
,
167 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
168 [on_finish
]() { on_finish
->complete(0); });
172 // record callback for cluster
173 bool new_request
= (cluster_completions
.count(cluster_offset
) == 0);
174 cluster_completions
[cluster_offset
].emplace_back(
175 intra_cluster_offset
, cluster_length
, bl
, on_finish
);
177 // start the new read request
178 read_cluster(std::make_shared
<Cluster
>(cluster_offset
));
182 void read_cluster(std::shared_ptr
<Cluster
> cluster
) {
183 auto cct
= qcow_format
->m_image_ctx
->cct
;
185 uint64_t stream_offset
= cluster
->cluster_offset
;
186 uint64_t stream_length
= qcow_format
->m_cluster_size
;
187 if ((cluster
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) != 0) {
188 // compressed clusters encode the compressed length in the lower bits
189 stream_offset
= cluster
->cluster_offset
&
190 qcow_format
->m_cluster_offset_mask
;
191 stream_length
= (cluster
->cluster_offset
>>
192 (63 - qcow_format
->m_cluster_bits
)) &
193 (qcow_format
->m_cluster_size
- 1);
196 ldout(cct
, 20) << "cluster_offset=" << cluster
->cluster_offset
<< ", "
197 << "stream_offset=" << stream_offset
<< ", "
198 << "stream_length=" << stream_length
<< dendl
;
200 // read the cluster into the cache entry
201 auto ctx
= new LambdaContext([this, cluster
](int r
) {
202 boost::asio::post(m_strand
, [this, cluster
, r
]() {
203 handle_read_cluster(r
, cluster
); }); });
204 qcow_format
->m_stream
->read({{stream_offset
, stream_length
}},
205 &cluster
->cluster_data_bl
, ctx
);
208 void handle_read_cluster(int r
, std::shared_ptr
<Cluster
> cluster
) {
209 auto cct
= qcow_format
->m_image_ctx
->cct
;
210 ldout(cct
, 20) << "r=" << r
<< ", "
211 << "cluster_offset=" << cluster
->cluster_offset
<< dendl
;
213 auto completions
= std::move(cluster_completions
[cluster
->cluster_offset
]);
214 cluster_completions
.erase(cluster
->cluster_offset
);
217 lderr(cct
) << "failed to read cluster offset " << cluster
->cluster_offset
218 << ": " << cpp_strerror(r
) << dendl
;
220 if ((cluster
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) != 0) {
221 bufferlist compressed_bl
{std::move(cluster
->cluster_data_bl
)};
222 cluster
->cluster_data_bl
.clear();
225 lderr(cct
) << "support for compressed clusters is not available"
229 // cache the MRU cluster in case of sequential IO
230 this->cluster
= cluster
;
234 // complete the IO back to caller
235 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
236 [r
, cluster
, completions
=std::move(completions
)]() {
237 for (auto completion
: completions
) {
239 std::get
<2>(completion
)->substr_of(
240 cluster
->cluster_data_bl
,
241 std::get
<0>(completion
),
242 std::get
<1>(completion
));
244 std::get
<3>(completion
)->complete(r
);
251 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
252 << this << " " << __func__ << ": "
254 template <typename I
>
255 class QCOWFormat
<I
>::L2TableCache
{
257 L2TableCache(QCOWFormat
* qcow_format
)
258 : qcow_format(qcow_format
),
259 m_strand(*qcow_format
->m_image_ctx
->asio_engine
),
260 l2_cache_entries(QCOW_L2_CACHE_SIZE
) {
263 void get_l2_table(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
264 std::shared_ptr
<const LookupTable
>* l2_table
,
265 Context
* on_finish
) {
266 auto cct
= qcow_format
->m_image_ctx
->cct
;
267 ldout(cct
, 20) << "l2_table_offset=" << l2_table_offset
<< dendl
;
269 // cache state machine runs in a single strand thread
270 Request request
{l1_table
, l2_table_offset
, l2_table
, on_finish
};
271 boost::asio::dispatch(
272 m_strand
, [this, request
=std::move(request
)]() {
273 requests
.push_back(std::move(request
));
278 void get_cluster_offset(const LookupTable
* l1_table
,
279 uint64_t image_offset
, uint64_t* cluster_offset
,
280 Context
* on_finish
) {
281 auto cct
= qcow_format
->m_image_ctx
->cct
;
282 uint32_t l1_table_index
= image_offset
>> qcow_format
->m_l1_shift
;
283 uint64_t l2_table_offset
= l1_table
->cluster_offsets
[std::min
<uint32_t>(
284 l1_table_index
, l1_table
->size
- 1)] &
285 qcow_format
->m_cluster_mask
;
286 uint32_t l2_table_index
= (image_offset
>> qcow_format
->m_cluster_bits
) &
287 (qcow_format
->m_l2_size
- 1);
288 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
289 << "l1_table_index=" << l1_table_index
<< ", "
290 << "l2_table_offset=" << l2_table_offset
<< ", "
291 << "l2_table_index=" << l2_table_index
<< dendl
;
293 if (l1_table_index
>= l1_table
->size
) {
294 lderr(cct
) << "L1 index " << l1_table_index
<< " out-of-bounds" << dendl
;
295 on_finish
->complete(-ERANGE
);
297 } else if (l2_table_offset
== 0) {
298 // L2 table has not been allocated for specified offset
299 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
300 << "cluster_offset=DNE" << dendl
;
302 on_finish
->complete(-ENOENT
);
306 // cache state machine runs in a single strand thread
307 Request request
{l1_table
, l2_table_offset
, l2_table_index
, cluster_offset
,
309 boost::asio::dispatch(
310 m_strand
, [this, request
=std::move(request
)]() {
311 requests
.push_back(std::move(request
));
317 QCOWFormat
* qcow_format
;
319 boost::asio::io_context::strand m_strand
;
322 const LookupTable
* l1_table
;
324 uint64_t l2_table_offset
;
326 // get_cluster_offset request
327 uint32_t l2_table_index
;
328 uint64_t* cluster_offset
= nullptr;
330 // get_l2_table request
331 std::shared_ptr
<const LookupTable
>* l2_table
;
335 Request(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
336 uint32_t l2_table_index
, uint64_t* cluster_offset
,
338 : l1_table(l1_table
), l2_table_offset(l2_table_offset
),
339 l2_table_index(l2_table_index
), cluster_offset(cluster_offset
),
340 on_finish(on_finish
) {
342 Request(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
343 std::shared_ptr
<const LookupTable
>* l2_table
, Context
* on_finish
)
344 : l1_table(l1_table
), l2_table_offset(l2_table_offset
),
345 l2_table(l2_table
), on_finish(on_finish
) {
349 typedef std::deque
<Request
> Requests
;
352 uint64_t l2_offset
= 0;
353 std::shared_ptr
<LookupTable
> l2_table
;
357 bool in_flight
= false;
361 std::vector
<L2Cache
> l2_cache_entries
;
365 void dispatch_request() {
366 boost::asio::dispatch(m_strand
, [this]() { execute_request(); });
369 void execute_request() {
370 auto cct
= qcow_format
->m_image_ctx
->cct
;
371 if (requests
.empty()) {
375 auto request
= requests
.front();
376 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< dendl
;
378 std::shared_ptr
<LookupTable
> l2_table
;
379 int r
= l2_table_lookup(request
.l2_table_offset
, &l2_table
);
381 lderr(cct
) << "failed to load L2 table: l2_table_offset="
382 << request
.l2_table_offset
<< ": "
383 << cpp_strerror(r
) << dendl
;
384 } else if (l2_table
== nullptr) {
385 // table not in cache -- will restart once its loaded
387 } else if (request
.cluster_offset
!= nullptr) {
388 auto cluster_offset
= l2_table
->cluster_offsets
[request
.l2_table_index
];
389 if (!l2_table
->decoded
) {
390 // table hasn't been byte-swapped
391 cluster_offset
= big_to_native(cluster_offset
);
394 *request
.cluster_offset
= cluster_offset
& qcow_format
->m_cluster_mask
;
395 if (*request
.cluster_offset
== QCOW_OFLAG_ZERO
) {
396 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< ", "
397 << "l2_table_index=" << request
.l2_table_index
<< ", "
398 << "cluster_offset=zeroed" << dendl
;
400 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< ", "
401 << "l2_table_index=" << request
.l2_table_index
<< ", "
402 << "cluster_offset=" << *request
.cluster_offset
405 } else if (request
.l2_table
!= nullptr) {
406 // ensure it's in the correct byte-order
408 *request
.l2_table
= l2_table
;
413 // complete the L2 cache request
414 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
415 [r
, ctx
=request
.on_finish
]() { ctx
->complete(r
); });
416 requests
.pop_front();
418 // process next request (if any)
422 int l2_table_lookup(uint64_t l2_offset
,
423 std::shared_ptr
<LookupTable
>* l2_table
) {
424 auto cct
= qcow_format
->m_image_ctx
->cct
;
428 // find a match in the existing cache
429 for (auto idx
= 0U; idx
< l2_cache_entries
.size(); ++idx
) {
430 auto& l2_cache
= l2_cache_entries
[idx
];
431 if (l2_cache
.l2_offset
== l2_offset
) {
432 if (l2_cache
.in_flight
) {
433 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
434 << "index=" << idx
<< " (in-flight)" << dendl
;
438 if (l2_cache
.ret_val
< 0) {
439 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
440 << "index=" << idx
<< " (error): "
441 << cpp_strerror(l2_cache
.ret_val
) << dendl
;
442 int r
= l2_cache
.ret_val
;
443 l2_cache
= L2Cache
{};
449 if (l2_cache
.count
== std::numeric_limits
<uint32_t>::max()) {
450 for (auto& entry
: l2_cache_entries
) {
455 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", " << "index=" << idx
457 *l2_table
= l2_cache
.l2_table
;
462 // find the least used entry
463 int32_t min_idx
= -1;
464 uint32_t min_count
= std::numeric_limits
<uint32_t>::max();
465 utime_t min_timestamp
;
466 for (uint32_t idx
= 0U; idx
< l2_cache_entries
.size(); ++idx
) {
467 auto& l2_cache
= l2_cache_entries
[idx
];
468 if (l2_cache
.in_flight
) {
472 if (l2_cache
.count
> 0) {
476 if (l2_cache
.count
<= min_count
) {
477 if (min_idx
== -1 || l2_cache
.timestamp
< min_timestamp
) {
478 min_timestamp
= l2_cache
.timestamp
;
479 min_count
= l2_cache
.count
;
486 // no space in the cache due to in-flight requests
487 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
488 << "index=DNE (cache busy)" << dendl
;
492 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
493 << "index=" << min_idx
<< " (loading)" << dendl
;
494 auto& l2_cache
= l2_cache_entries
[min_idx
];
495 l2_cache
.l2_table
= std::make_shared
<LookupTable
>(qcow_format
->m_l2_size
);
496 l2_cache
.l2_offset
= l2_offset
;
497 l2_cache
.timestamp
= ceph_clock_now();
499 l2_cache
.in_flight
= true;
501 // read the L2 table into the L2 cache entry
502 auto ctx
= new LambdaContext([this, index
=min_idx
, l2_offset
](int r
) {
503 boost::asio::post(m_strand
, [this, index
, l2_offset
, r
]() {
504 handle_l2_table_lookup(r
, index
, l2_offset
); }); });
505 qcow_format
->m_stream
->read(
506 {{l2_offset
, qcow_format
->m_l2_size
* sizeof(uint64_t)}},
507 &l2_cache
.l2_table
->bl
, ctx
);
511 void handle_l2_table_lookup(int r
, uint32_t index
, uint64_t l2_offset
) {
512 auto cct
= qcow_format
->m_image_ctx
->cct
;
513 ldout(cct
, 20) << "r=" << r
<< ", "
514 << "l2_offset=" << l2_offset
<< ", "
515 << "index=" << index
<< dendl
;
517 auto& l2_cache
= l2_cache_entries
[index
];
518 ceph_assert(l2_cache
.in_flight
);
519 l2_cache
.in_flight
= false;
522 lderr(cct
) << "failed to load L2 table: "
523 << "l2_offset=" << l2_cache
.l2_offset
<< ": "
524 << cpp_strerror(r
) << dendl
;
525 l2_cache
.ret_val
= r
;
527 // keep the L2 table in big-endian byte-order until the full table
529 l2_cache
.l2_table
->init();
532 // restart the state machine
539 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
540 << this << " " << __func__ << ": "
542 template <typename I
>
543 class QCOWFormat
<I
>::ReadRequest
{
545 ReadRequest(QCOWFormat
* qcow_format
, io::AioCompletion
* aio_comp
,
546 const LookupTable
* l1_table
, io::Extents
&& image_extents
)
547 : qcow_format(qcow_format
), aio_comp(aio_comp
), l1_table(l1_table
),
548 image_extents(std::move(image_extents
)) {
552 get_cluster_offsets();
556 QCOWFormat
* qcow_format
;
557 io::AioCompletion
* aio_comp
;
559 const LookupTable
* l1_table
;
560 io::Extents image_extents
;
562 size_t image_extents_idx
= 0;
563 uint32_t image_extent_offset
= 0;
565 ClusterExtents cluster_extents
;
567 void get_cluster_offsets() {
568 auto cct
= qcow_format
->m_image_ctx
->cct
;
569 populate_cluster_extents(cct
, qcow_format
->m_cluster_size
, image_extents
,
572 ldout(cct
, 20) << dendl
;
573 auto ctx
= new LambdaContext([this](int r
) {
574 handle_get_cluster_offsets(r
); });
575 auto gather_ctx
= new C_Gather(cct
, ctx
);
577 for (auto& cluster_extent
: cluster_extents
) {
578 auto sub_ctx
= new LambdaContext(
579 [this, &cluster_extent
, on_finish
=gather_ctx
->new_sub()](int r
) {
580 handle_get_cluster_offset(r
, cluster_extent
, on_finish
); });
581 qcow_format
->m_l2_table_cache
->get_cluster_offset(
582 l1_table
, cluster_extent
.image_offset
,
583 &cluster_extent
.cluster_offset
, sub_ctx
);
586 gather_ctx
->activate();
589 void handle_get_cluster_offset(int r
, const ClusterExtent
& cluster_extent
,
590 Context
* on_finish
) {
591 auto cct
= qcow_format
->m_image_ctx
->cct
;
592 ldout(cct
, 20) << "r=" << r
<< ", "
593 << "image_offset=" << cluster_extent
.image_offset
<< ", "
594 << "cluster_offset=" << cluster_extent
.cluster_offset
598 ldout(cct
, 20) << "image offset DNE in QCOW image" << dendl
;
601 lderr(cct
) << "failed to map image offset " << cluster_extent
.image_offset
602 << ": " << cpp_strerror(r
) << dendl
;
605 on_finish
->complete(r
);
608 void handle_get_cluster_offsets(int r
) {
609 auto cct
= qcow_format
->m_image_ctx
->cct
;
610 ldout(cct
, 20) << "r=" << r
<< dendl
;
613 lderr(cct
) << "failed to retrieve cluster extents: " << cpp_strerror(r
)
623 void read_clusters() {
624 auto cct
= qcow_format
->m_image_ctx
->cct
;
625 ldout(cct
, 20) << dendl
;
627 aio_comp
->set_request_count(cluster_extents
.size());
628 for (auto& cluster_extent
: cluster_extents
) {
629 auto read_ctx
= new io::ReadResult::C_ImageReadRequest(
630 aio_comp
, cluster_extent
.buffer_offset
,
631 {{cluster_extent
.image_offset
, cluster_extent
.cluster_length
}});
632 read_ctx
->ignore_enoent
= true;
634 auto log_ctx
= new LambdaContext(
635 [this, cct
=qcow_format
->m_image_ctx
->cct
,
636 image_offset
=cluster_extent
.image_offset
,
637 image_length
=cluster_extent
.cluster_length
, ctx
=read_ctx
](int r
) {
638 handle_read_cluster(cct
, r
, image_offset
, image_length
, ctx
);
641 if (cluster_extent
.cluster_offset
== 0) {
642 // QCOW header is at offset 0, implies cluster DNE
643 log_ctx
->complete(-ENOENT
);
644 } else if (cluster_extent
.cluster_offset
== QCOW_OFLAG_ZERO
) {
645 // explicitly zeroed section
646 read_ctx
->bl
.append_zero(cluster_extent
.cluster_length
);
647 log_ctx
->complete(0);
649 // request the (sub)cluster from the cluster cache
650 qcow_format
->m_cluster_cache
->get_cluster(
651 cluster_extent
.cluster_offset
, cluster_extent
.cluster_length
,
652 cluster_extent
.intra_cluster_offset
, &read_ctx
->bl
, log_ctx
);
659 void handle_read_cluster(CephContext
* cct
, int r
, uint64_t image_offset
,
660 uint64_t image_length
, Context
* on_finish
) const {
661 // NOTE: treat as static function, expect object has been deleted
663 ldout(cct
, 20) << "r=" << r
<< ", "
664 << "image_offset=" << image_offset
<< ", "
665 << "image_length=" << image_length
<< dendl
;
667 if (r
!= -ENOENT
&& r
< 0) {
668 lderr(cct
) << "failed to read image extent " << image_offset
<< "~"
669 << image_length
<< ": " << cpp_strerror(r
) << dendl
;
672 on_finish
->complete(r
);
677 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
678 << "ListSnapsRequest: " << this << " " \
681 template <typename I
>
682 class QCOWFormat
<I
>::ListSnapsRequest
{
685 QCOWFormat
* qcow_format
, uint32_t l1_table_index
,
686 ClusterExtents
&& cluster_extents
,
687 const std::map
<uint64_t, const LookupTable
*>& snap_id_to_l1_table
,
688 io::SnapshotDelta
* snapshot_delta
, Context
* on_finish
)
689 : qcow_format(qcow_format
), l1_table_index(l1_table_index
),
690 cluster_extents(std::move(cluster_extents
)),
691 snap_id_to_l1_table(snap_id_to_l1_table
), snapshot_delta(snapshot_delta
),
692 on_finish(on_finish
) {
700 QCOWFormat
* qcow_format
;
701 uint32_t l1_table_index
;
702 ClusterExtents cluster_extents
;
703 std::map
<uint64_t, const LookupTable
*> snap_id_to_l1_table
;
704 io::SnapshotDelta
* snapshot_delta
;
707 std::shared_ptr
<const LookupTable
> previous_l2_table
;
708 std::shared_ptr
<const LookupTable
> l2_table
;
710 void get_l2_table() {
711 auto cct
= qcow_format
->m_image_ctx
->cct
;
712 if (snap_id_to_l1_table
.empty()) {
717 auto it
= snap_id_to_l1_table
.begin();
718 auto [snap_id
, l1_table
] = *it
;
719 snap_id_to_l1_table
.erase(it
);
721 previous_l2_table
= l2_table
;
724 auto ctx
= new LambdaContext([this, snap_id
= snap_id
](int r
) {
725 boost::asio::post(qcow_format
->m_strand
, [this, snap_id
, r
]() {
726 handle_get_l2_table(r
, snap_id
);
730 if (l1_table_index
>= l1_table
->size
||
731 l1_table
->cluster_offsets
[l1_table_index
] == 0) {
732 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
733 << "snap_id=" << snap_id
<< ": DNE" << dendl
;
734 ctx
->complete(-ENOENT
);
738 uint64_t l2_table_offset
= l1_table
->cluster_offsets
[l1_table_index
] &
739 qcow_format
->m_cluster_mask
;
741 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
742 << "snap_id=" << snap_id
<< ", "
743 << "l2_table_offset=" << l2_table_offset
<< dendl
;
744 qcow_format
->m_l2_table_cache
->get_l2_table(l1_table
, l2_table_offset
,
748 void handle_get_l2_table(int r
, uint64_t snap_id
) {
749 ceph_assert(qcow_format
->m_strand
.running_in_this_thread());
751 auto cct
= qcow_format
->m_image_ctx
->cct
;
752 ldout(cct
, 20) << "r=" << r
<< ", "
753 << "snap_id=" << snap_id
<< dendl
;
758 lderr(cct
) << "failed to retrieve L2 table for snapshot " << snap_id
759 << ": " << cpp_strerror(r
) << dendl
;
764 // compare the cluster offsets at each requested L2 offset between
765 // the previous snapshot's L2 table and the current L2 table.
766 auto& sparse_extents
= (*snapshot_delta
)[{snap_id
, snap_id
}];
767 for (auto& cluster_extent
: cluster_extents
) {
768 uint32_t l2_table_index
=
769 (cluster_extent
.image_offset
>> qcow_format
->m_cluster_bits
) &
770 (qcow_format
->m_l2_size
- 1);
772 std::optional
<uint64_t> cluster_offset
;
773 if (l2_table
&& l2_table_index
< l2_table
->size
) {
774 cluster_offset
= l2_table
->cluster_offsets
[l2_table_index
] &
775 qcow_format
->m_cluster_offset_mask
;
778 std::optional
<uint64_t> prev_cluster_offset
;
779 if (previous_l2_table
&& l2_table_index
< previous_l2_table
->size
) {
780 prev_cluster_offset
=
781 previous_l2_table
->cluster_offsets
[l2_table_index
] &
782 qcow_format
->m_cluster_offset_mask
;
785 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
786 << "snap_id=" << snap_id
<< ", "
787 << "image_offset=" << cluster_extent
.image_offset
<< ", "
788 << "l2_table_index=" << l2_table_index
<< ", "
789 << "cluster_offset=" << cluster_offset
<< ", "
790 << "prev_cluster_offset=" << prev_cluster_offset
<< dendl
;
792 auto state
= io::SPARSE_EXTENT_STATE_DATA
;
793 if (cluster_offset
== prev_cluster_offset
) {
795 } else if ((prev_cluster_offset
&& !cluster_offset
) ||
796 *cluster_offset
== QCOW_OFLAG_ZERO
) {
797 // explicitly zeroed or deallocated
798 state
= io::SPARSE_EXTENT_STATE_ZEROED
;
801 sparse_extents
.insert(
802 cluster_extent
.image_offset
, cluster_extent
.cluster_length
,
803 {state
, cluster_extent
.cluster_length
});
806 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
807 << "snap_id=" << snap_id
<< ", "
808 << "sparse_extents=" << sparse_extents
<< dendl
;
810 // continue processing the L2 table at this index for all snapshots
811 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
812 [this]() { get_l2_table(); });
817 auto cct
= qcow_format
->m_image_ctx
->cct
;
818 ldout(cct
, 20) << "r=" << r
<< dendl
;
820 on_finish
->complete(r
);
826 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
827 << " " << __func__ << ": "
829 template <typename I
>
830 QCOWFormat
<I
>::QCOWFormat(
831 I
* image_ctx
, const json_spirit::mObject
& json_object
,
832 const SourceSpecBuilder
<I
>* source_spec_builder
)
833 : m_image_ctx(image_ctx
), m_json_object(json_object
),
834 m_source_spec_builder(source_spec_builder
),
835 m_strand(*image_ctx
->asio_engine
) {
838 template <typename I
>
839 void QCOWFormat
<I
>::open(Context
* on_finish
) {
840 auto cct
= m_image_ctx
->cct
;
841 ldout(cct
, 10) << dendl
;
843 int r
= m_source_spec_builder
->build_stream(m_json_object
, &m_stream
);
845 lderr(cct
) << "failed to build migration stream handler" << cpp_strerror(r
)
847 on_finish
->complete(r
);
851 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
852 handle_open(r
, on_finish
); });
856 template <typename I
>
857 void QCOWFormat
<I
>::handle_open(int r
, Context
* on_finish
) {
858 auto cct
= m_image_ctx
->cct
;
859 ldout(cct
, 10) << "r=" << r
<< dendl
;
862 lderr(cct
) << "failed to open QCOW image: " << cpp_strerror(r
)
864 on_finish
->complete(r
);
871 template <typename I
>
872 void QCOWFormat
<I
>::probe(Context
* on_finish
) {
873 auto cct
= m_image_ctx
->cct
;
874 ldout(cct
, 10) << dendl
;
876 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
877 handle_probe(r
, on_finish
); });
879 m_stream
->read({{0, 8}}, &m_bl
, ctx
);
882 template <typename I
>
883 void QCOWFormat
<I
>::handle_probe(int r
, Context
* on_finish
) {
884 auto cct
= m_image_ctx
->cct
;
885 ldout(cct
, 10) << "r=" << r
<< dendl
;
888 lderr(cct
) << "failed to probe QCOW image: " << cpp_strerror(r
)
890 on_finish
->complete(r
);
894 auto header_probe
= *reinterpret_cast<QCowHeaderProbe
*>(
896 header_probe
.magic
= big_to_native(header_probe
.magic
);
897 header_probe
.version
= big_to_native(header_probe
.version
);
899 if (header_probe
.magic
!= QCOW_MAGIC
) {
900 lderr(cct
) << "invalid QCOW header magic" << dendl
;
901 on_finish
->complete(-EINVAL
);
906 if (header_probe
.version
== 1) {
907 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
908 read_v1_header(on_finish
);
909 #else // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
910 lderr(cct
) << "QCOW is not supported" << dendl
;
911 on_finish
->complete(-ENOTSUP
);
912 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
914 } else if (header_probe
.version
>= 2 && header_probe
.version
<= 3) {
915 read_v2_header(on_finish
);
918 lderr(cct
) << "invalid QCOW header version " << header_probe
.version
920 on_finish
->complete(-EINVAL
);
925 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
927 template <typename I
>
928 void QCOWFormat
<I
>::read_v1_header(Context
* on_finish
) {
929 auto cct
= m_image_ctx
->cct
;
930 ldout(cct
, 10) << dendl
;
932 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
933 handle_read_v1_header(r
, on_finish
); });
935 m_stream
->read({{0, sizeof(QCowHeaderV1
)}}, &m_bl
, ctx
);
938 template <typename I
>
939 void QCOWFormat
<I
>::handle_read_v1_header(int r
, Context
* on_finish
) {
940 auto cct
= m_image_ctx
->cct
;
941 ldout(cct
, 10) << "r=" << r
<< dendl
;
944 lderr(cct
) << "failed to read QCOW header: " << cpp_strerror(r
) << dendl
;
945 on_finish
->complete(r
);
949 auto header
= *reinterpret_cast<QCowHeaderV1
*>(m_bl
.c_str());
951 // byte-swap important fields
952 header
.magic
= big_to_native(header
.magic
);
953 header
.version
= big_to_native(header
.version
);
954 header
.backing_file_offset
= big_to_native(header
.backing_file_offset
);
955 header
.backing_file_size
= big_to_native(header
.backing_file_size
);
956 header
.size
= big_to_native(header
.size
);
957 header
.crypt_method
= big_to_native(header
.crypt_method
);
958 header
.l1_table_offset
= big_to_native(header
.l1_table_offset
);
960 if (header
.magic
!= QCOW_MAGIC
|| header
.version
!= 1) {
961 // honestly shouldn't happen since we've already validated it
962 lderr(cct
) << "header is not QCOW" << dendl
;
963 on_finish
->complete(-EINVAL
);
967 if (header
.cluster_bits
< QCOW_MIN_CLUSTER_BITS
||
968 header
.cluster_bits
> QCOW_MAX_CLUSTER_BITS
) {
969 lderr(cct
) << "invalid cluster bits: " << header
.cluster_bits
<< dendl
;
970 on_finish
->complete(-EINVAL
);
974 if (header
.l2_bits
< (QCOW_MIN_CLUSTER_BITS
- 3) ||
975 header
.l2_bits
> (QCOW_MAX_CLUSTER_BITS
- 3)) {
976 lderr(cct
) << "invalid L2 bits: " << header
.l2_bits
<< dendl
;
977 on_finish
->complete(-EINVAL
);
981 if (header
.crypt_method
!= QCOW_CRYPT_NONE
) {
982 lderr(cct
) << "invalid or unsupported encryption method" << dendl
;
983 on_finish
->complete(-EINVAL
);
987 m_size
= header
.size
;
988 if (p2roundup(m_size
, static_cast<uint64_t>(512)) != m_size
) {
989 lderr(cct
) << "image size is not a multiple of block size" << dendl
;
990 on_finish
->complete(-EINVAL
);
994 m_backing_file_offset
= header
.backing_file_offset
;
995 m_backing_file_size
= header
.backing_file_size
;
997 m_cluster_bits
= header
.cluster_bits
;
998 m_cluster_size
= 1UL << header
.cluster_bits
;
999 m_cluster_offset_mask
= (1ULL << (63 - header
.cluster_bits
)) - 1;
1000 m_cluster_mask
= ~QCOW_OFLAG_COMPRESSED
;
1002 m_l2_bits
= header
.l2_bits
;
1003 m_l2_size
= (1UL << m_l2_bits
);
1005 m_l1_shift
= m_cluster_bits
+ m_l2_bits
;
1006 m_l1_table
.size
= (m_size
+ (1LL << m_l1_shift
) - 1) >> m_l1_shift
;
1007 m_l1_table_offset
= header
.l1_table_offset
;
1008 if (m_size
> (std::numeric_limits
<uint64_t>::max() - (1ULL << m_l1_shift
)) ||
1010 (std::numeric_limits
<int32_t>::max() / sizeof(uint64_t))) {
1011 lderr(cct
) << "image size too big: " << m_size
<< dendl
;
1012 on_finish
->complete(-EINVAL
);
1016 ldout(cct
, 15) << "size=" << m_size
<< ", "
1017 << "cluster_bits=" << m_cluster_bits
<< ", "
1018 << "l2_bits=" << m_l2_bits
<< dendl
;
1020 // allocate memory for L1 table and L2 + cluster caches
1021 m_l2_table_cache
= std::make_unique
<L2TableCache
>(this);
1022 m_cluster_cache
= std::make_unique
<ClusterCache
>(this);
1024 read_l1_table(on_finish
);
1027 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
1029 template <typename I
>
1030 void QCOWFormat
<I
>::read_v2_header(Context
* on_finish
) {
1031 auto cct
= m_image_ctx
->cct
;
1032 ldout(cct
, 10) << dendl
;
1034 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1035 handle_read_v2_header(r
, on_finish
); });
1037 m_stream
->read({{0, sizeof(QCowHeader
)}}, &m_bl
, ctx
);
1040 template <typename I
>
1041 void QCOWFormat
<I
>::handle_read_v2_header(int r
, Context
* on_finish
) {
1042 auto cct
= m_image_ctx
->cct
;
1043 ldout(cct
, 10) << "r=" << r
<< dendl
;
1046 lderr(cct
) << "failed to read QCOW2 header: " << cpp_strerror(r
) << dendl
;
1047 on_finish
->complete(r
);
1051 auto header
= *reinterpret_cast<QCowHeader
*>(m_bl
.c_str());
1053 // byte-swap important fields
1054 header
.magic
= big_to_native(header
.magic
);
1055 header
.version
= big_to_native(header
.version
);
1056 header
.backing_file_offset
= big_to_native(header
.backing_file_offset
);
1057 header
.backing_file_size
= big_to_native(header
.backing_file_size
);
1058 header
.cluster_bits
= big_to_native(header
.cluster_bits
);
1059 header
.size
= big_to_native(header
.size
);
1060 header
.crypt_method
= big_to_native(header
.crypt_method
);
1061 header
.l1_size
= big_to_native(header
.l1_size
);
1062 header
.l1_table_offset
= big_to_native(header
.l1_table_offset
);
1063 header
.nb_snapshots
= big_to_native(header
.nb_snapshots
);
1064 header
.snapshots_offset
= big_to_native(header
.snapshots_offset
);
1066 if (header
.version
== 2) {
1067 // valid only for version >= 3
1068 header
.incompatible_features
= 0;
1069 header
.compatible_features
= 0;
1070 header
.autoclear_features
= 0;
1071 header
.header_length
= 72;
1072 header
.compression_type
= 0;
1074 header
.incompatible_features
= big_to_native(header
.incompatible_features
);
1075 header
.compatible_features
= big_to_native(header
.compatible_features
);
1076 header
.autoclear_features
= big_to_native(header
.autoclear_features
);
1077 header
.header_length
= big_to_native(header
.header_length
);
1080 if (header
.magic
!= QCOW_MAGIC
|| header
.version
< 2 || header
.version
> 3) {
1081 // honestly shouldn't happen since we've already validated it
1082 lderr(cct
) << "header is not QCOW2" << dendl
;
1083 on_finish
->complete(-EINVAL
);
1087 if (header
.cluster_bits
< QCOW_MIN_CLUSTER_BITS
||
1088 header
.cluster_bits
> QCOW_MAX_CLUSTER_BITS
) {
1089 lderr(cct
) << "invalid cluster bits: " << header
.cluster_bits
<< dendl
;
1090 on_finish
->complete(-EINVAL
);
1094 if (header
.crypt_method
!= QCOW_CRYPT_NONE
) {
1095 lderr(cct
) << "invalid or unsupported encryption method" << dendl
;
1096 on_finish
->complete(-EINVAL
);
1100 m_size
= header
.size
;
1101 if (p2roundup(m_size
, static_cast<uint64_t>(512)) != m_size
) {
1102 lderr(cct
) << "image size is not a multiple of block size" << dendl
;
1103 on_finish
->complete(-EINVAL
);
1107 if (header
.header_length
<= offsetof(QCowHeader
, compression_type
)) {
1108 header
.compression_type
= 0;
1111 if ((header
.compression_type
!= 0) ||
1112 ((header
.incompatible_features
& QCOW2_INCOMPAT_COMPRESSION
) != 0)) {
1113 lderr(cct
) << "invalid or unsupported compression type" << dendl
;
1114 on_finish
->complete(-EINVAL
);
1118 if ((header
.incompatible_features
& QCOW2_INCOMPAT_DATA_FILE
) != 0) {
1119 lderr(cct
) << "external data file feature not supported" << dendl
;
1120 on_finish
->complete(-ENOTSUP
);
1123 if ((header
.incompatible_features
& QCOW2_INCOMPAT_EXTL2
) != 0) {
1124 lderr(cct
) << "extended L2 table feature not supported" << dendl
;
1125 on_finish
->complete(-ENOTSUP
);
1129 header
.incompatible_features
&= ~QCOW2_INCOMPAT_MASK
;
1130 if (header
.incompatible_features
!= 0) {
1131 lderr(cct
) << "unknown incompatible feature enabled" << dendl
;
1132 on_finish
->complete(-EINVAL
);
1136 m_backing_file_offset
= header
.backing_file_offset
;
1137 m_backing_file_size
= header
.backing_file_size
;
1139 m_cluster_bits
= header
.cluster_bits
;
1140 m_cluster_size
= 1UL << header
.cluster_bits
;
1141 m_cluster_offset_mask
= (1ULL << (63 - header
.cluster_bits
)) - 1;
1142 m_cluster_mask
= ~(QCOW_OFLAG_COMPRESSED
| QCOW_OFLAG_COPIED
);
1144 // L2 table is fixed a (1) cluster block to hold 8-byte (3 bit) offsets
1145 m_l2_bits
= m_cluster_bits
- 3;
1146 m_l2_size
= (1UL << m_l2_bits
);
1148 m_l1_shift
= m_cluster_bits
+ m_l2_bits
;
1149 m_l1_table
.size
= (m_size
+ (1LL << m_l1_shift
) - 1) >> m_l1_shift
;
1150 m_l1_table_offset
= header
.l1_table_offset
;
1151 if (m_size
> (std::numeric_limits
<uint64_t>::max() - (1ULL << m_l1_shift
)) ||
1153 (std::numeric_limits
<int32_t>::max() / sizeof(uint64_t))) {
1154 lderr(cct
) << "image size too big: " << m_size
<< dendl
;
1155 on_finish
->complete(-EINVAL
);
1157 } else if (m_l1_table
.size
> header
.l1_size
) {
1158 lderr(cct
) << "invalid L1 table size in header (" << header
.l1_size
1159 << " < " << m_l1_table
.size
<< ")" << dendl
;
1160 on_finish
->complete(-EINVAL
);
1164 m_snapshot_count
= header
.nb_snapshots
;
1165 m_snapshots_offset
= header
.snapshots_offset
;
1167 ldout(cct
, 15) << "size=" << m_size
<< ", "
1168 << "cluster_bits=" << m_cluster_bits
<< ", "
1169 << "l1_table_offset=" << m_l1_table_offset
<< ", "
1170 << "snapshot_count=" << m_snapshot_count
<< ", "
1171 << "snapshots_offset=" << m_snapshots_offset
<< dendl
;
1173 // allocate memory for L1 table and L2 + cluster caches
1174 m_l2_table_cache
= std::make_unique
<L2TableCache
>(this);
1175 m_cluster_cache
= std::make_unique
<ClusterCache
>(this);
1177 read_snapshot(on_finish
);
1180 template <typename I
>
1181 void QCOWFormat
<I
>::read_snapshot(Context
* on_finish
) {
1182 if (m_snapshots_offset
== 0 || m_snapshots
.size() == m_snapshot_count
) {
1183 read_l1_table(on_finish
);
1187 // header is always aligned on 8 byte boundary
1188 m_snapshots_offset
= p2roundup(m_snapshots_offset
, static_cast<uint64_t>(8));
1190 auto cct
= m_image_ctx
->cct
;
1191 ldout(cct
, 10) << "snap_id=" << (m_snapshots
.size() + 1) << ", "
1192 << "offset=" << m_snapshots_offset
<< dendl
;
1194 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1195 handle_read_snapshot(r
, on_finish
); });
1197 m_stream
->read({{m_snapshots_offset
, sizeof(QCowSnapshotHeader
)}}, &m_bl
,
1201 template <typename I
>
1202 void QCOWFormat
<I
>::handle_read_snapshot(int r
, Context
* on_finish
) {
1203 auto cct
= m_image_ctx
->cct
;
1204 ldout(cct
, 10) << "r=" << r
<< ", "
1205 << "index=" << m_snapshots
.size() << dendl
;
1208 lderr(cct
) << "failed to read QCOW2 snapshot header: " << cpp_strerror(r
)
1210 on_finish
->complete(r
);
1214 m_snapshots_offset
+= m_bl
.length();
1215 auto header
= *reinterpret_cast<QCowSnapshotHeader
*>(m_bl
.c_str());
1217 auto& snapshot
= m_snapshots
[m_snapshots
.size() + 1];
1218 snapshot
.id
.resize(big_to_native(header
.id_str_size
));
1219 snapshot
.name
.resize(big_to_native(header
.name_size
));
1220 snapshot
.l1_table_offset
= big_to_native(header
.l1_table_offset
);
1221 snapshot
.l1_table
.size
= big_to_native(header
.l1_size
);
1222 snapshot
.timestamp
.sec_ref() = big_to_native(header
.date_sec
);
1223 snapshot
.timestamp
.nsec_ref() = big_to_native(header
.date_nsec
);
1224 snapshot
.extra_data_size
= big_to_native(header
.extra_data_size
);
1226 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1227 << "id_str_len=" << snapshot
.id
.size() << ", "
1228 << "name_str_len=" << snapshot
.name
.size() << ", "
1229 << "l1_table_offset=" << snapshot
.l1_table_offset
<< ", "
1230 << "l1_size=" << snapshot
.l1_table
.size
<< ", "
1231 << "extra_data_size=" << snapshot
.extra_data_size
<< dendl
;
1233 read_snapshot_extra(on_finish
);
1236 template <typename I
>
1237 void QCOWFormat
<I
>::read_snapshot_extra(Context
* on_finish
) {
1238 ceph_assert(!m_snapshots
.empty());
1239 auto& snapshot
= m_snapshots
.rbegin()->second
;
1241 uint32_t length
= snapshot
.extra_data_size
+
1242 snapshot
.id
.size() +
1243 snapshot
.name
.size();
1246 uuid_gen
.generate_random();
1247 snapshot
.name
= uuid_gen
.to_string();
1249 read_snapshot(on_finish
);
1253 auto cct
= m_image_ctx
->cct
;
1254 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1255 << "offset=" << m_snapshots_offset
<< ", "
1256 << "length=" << length
<< dendl
;
1258 auto offset
= m_snapshots_offset
;
1259 m_snapshots_offset
+= length
;
1261 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1262 handle_read_snapshot_extra(r
, on_finish
); });
1264 m_stream
->read({{offset
, length
}}, &m_bl
, ctx
);
1267 template <typename I
>
1268 void QCOWFormat
<I
>::handle_read_snapshot_extra(int r
, Context
* on_finish
) {
1269 ceph_assert(!m_snapshots
.empty());
1270 auto& snapshot
= m_snapshots
.rbegin()->second
;
1272 auto cct
= m_image_ctx
->cct
;
1273 ldout(cct
, 10) << "r=" << r
<< ", "
1274 << "snap_id=" << m_snapshots
.size() << dendl
;
1277 lderr(cct
) << "failed to read QCOW2 snapshot header extra: "
1278 << cpp_strerror(r
) << dendl
;
1279 on_finish
->complete(r
);
1283 if (snapshot
.extra_data_size
>=
1284 offsetof(QCowSnapshotExtraData
, disk_size
) + sizeof(uint64_t)) {
1285 auto extra
= reinterpret_cast<const QCowSnapshotExtraData
*>(m_bl
.c_str());
1286 snapshot
.size
= big_to_native(extra
->disk_size
);
1288 snapshot
.size
= m_size
;
1291 auto data
= reinterpret_cast<const char*>(m_bl
.c_str());
1292 data
+= snapshot
.extra_data_size
;
1294 if (!snapshot
.id
.empty()) {
1295 snapshot
.id
= std::string(data
, snapshot
.id
.size());
1296 data
+= snapshot
.id
.size();
1299 if (!snapshot
.name
.empty()) {
1300 snapshot
.name
= std::string(data
, snapshot
.name
.size());
1301 data
+= snapshot
.name
.size();
1304 uuid_gen
.generate_random();
1305 snapshot
.name
= uuid_gen
.to_string();
1308 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1309 << "name=" << snapshot
.name
<< ", "
1310 << "size=" << snapshot
.size
<< dendl
;
1311 read_snapshot_l1_table(on_finish
);
1314 template <typename I
>
1315 void QCOWFormat
<I
>::read_snapshot_l1_table(Context
* on_finish
) {
1316 ceph_assert(!m_snapshots
.empty());
1317 auto& snapshot
= m_snapshots
.rbegin()->second
;
1319 auto cct
= m_image_ctx
->cct
;
1320 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1321 << "l1_table_offset=" << snapshot
.l1_table_offset
1324 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1325 handle_read_snapshot_l1_table(r
, on_finish
); });
1326 m_stream
->read({{snapshot
.l1_table_offset
,
1327 snapshot
.l1_table
.size
* sizeof(uint64_t)}},
1328 &snapshot
.l1_table
.bl
, ctx
);
1331 template <typename I
>
1332 void QCOWFormat
<I
>::handle_read_snapshot_l1_table(int r
, Context
* on_finish
) {
1333 ceph_assert(!m_snapshots
.empty());
1334 auto& snapshot
= m_snapshots
.rbegin()->second
;
1336 auto cct
= m_image_ctx
->cct
;
1337 ldout(cct
, 10) << "r=" << r
<< ", "
1338 << "snap_id=" << m_snapshots
.size() << dendl
;
1341 lderr(cct
) << "failed to read snapshot L1 table: " << cpp_strerror(r
)
1343 on_finish
->complete(r
);
1347 snapshot
.l1_table
.decode();
1348 read_snapshot(on_finish
);
1351 template <typename I
>
1352 void QCOWFormat
<I
>::read_l1_table(Context
* on_finish
) {
1353 auto cct
= m_image_ctx
->cct
;
1354 ldout(cct
, 10) << dendl
;
1356 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1357 handle_read_l1_table(r
, on_finish
); });
1358 m_stream
->read({{m_l1_table_offset
,
1359 m_l1_table
.size
* sizeof(uint64_t)}},
1360 &m_l1_table
.bl
, ctx
);
1363 template <typename I
>
1364 void QCOWFormat
<I
>::handle_read_l1_table(int r
, Context
* on_finish
) {
1365 auto cct
= m_image_ctx
->cct
;
1366 ldout(cct
, 10) << "r=" << r
<< dendl
;
1369 lderr(cct
) << "failed to read L1 table: " << cpp_strerror(r
) << dendl
;
1370 on_finish
->complete(r
);
1374 m_l1_table
.decode();
1375 read_backing_file(on_finish
);
1378 template <typename I
>
1379 void QCOWFormat
<I
>::read_backing_file(Context
* on_finish
) {
1380 if (m_backing_file_offset
== 0 || m_backing_file_size
== 0) {
1381 // all data is within the specified file
1382 on_finish
->complete(0);
1386 auto cct
= m_image_ctx
->cct
;
1387 ldout(cct
, 10) << dendl
;
1389 // TODO add support for backing files
1390 on_finish
->complete(-ENOTSUP
);
1393 template <typename I
>
1394 void QCOWFormat
<I
>::close(Context
* on_finish
) {
1395 auto cct
= m_image_ctx
->cct
;
1396 ldout(cct
, 10) << dendl
;
1398 m_stream
->close(on_finish
);
1401 template <typename I
>
1402 void QCOWFormat
<I
>::get_snapshots(SnapInfos
* snap_infos
, Context
* on_finish
) {
1403 auto cct
= m_image_ctx
->cct
;
1404 ldout(cct
, 10) << dendl
;
1406 snap_infos
->clear();
1407 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1408 SnapInfo
snap_info(snapshot
.name
, cls::rbd::UserSnapshotNamespace
{},
1409 snapshot
.size
, {}, 0, 0, snapshot
.timestamp
);
1410 snap_infos
->emplace(snap_id
, snap_info
);
1413 on_finish
->complete(0);
1416 template <typename I
>
1417 void QCOWFormat
<I
>::get_image_size(uint64_t snap_id
, uint64_t* size
,
1418 Context
* on_finish
) {
1419 auto cct
= m_image_ctx
->cct
;
1420 ldout(cct
, 10) << "snap_id=" << snap_id
<< dendl
;
1422 if (snap_id
== CEPH_NOSNAP
) {
1425 auto snapshot_it
= m_snapshots
.find(snap_id
);
1426 if (snapshot_it
== m_snapshots
.end()) {
1427 on_finish
->complete(-ENOENT
);
1431 auto& snapshot
= snapshot_it
->second
;
1432 *size
= snapshot
.size
;
1435 on_finish
->complete(0);
1438 template <typename I
>
1439 bool QCOWFormat
<I
>::read(
1440 io::AioCompletion
* aio_comp
, uint64_t snap_id
, io::Extents
&& image_extents
,
1441 io::ReadResult
&& read_result
, int op_flags
, int read_flags
,
1442 const ZTracer::Trace
&parent_trace
) {
1443 auto cct
= m_image_ctx
->cct
;
1444 ldout(cct
, 20) << "snap_id=" << snap_id
<< ", "
1445 << "image_extents=" << image_extents
<< dendl
;
1447 const LookupTable
* l1_table
= nullptr;
1448 if (snap_id
== CEPH_NOSNAP
) {
1449 l1_table
= &m_l1_table
;
1451 auto snapshot_it
= m_snapshots
.find(snap_id
);
1452 if (snapshot_it
== m_snapshots
.end()) {
1453 aio_comp
->fail(-ENOENT
);
1457 auto& snapshot
= snapshot_it
->second
;
1458 l1_table
= &snapshot
.l1_table
;
1461 aio_comp
->read_result
= std::move(read_result
);
1462 aio_comp
->read_result
.set_image_extents(image_extents
);
1464 auto read_request
= new ReadRequest(this, aio_comp
, l1_table
,
1465 std::move(image_extents
));
1466 read_request
->send();
1471 template <typename I
>
1472 void QCOWFormat
<I
>::list_snaps(io::Extents
&& image_extents
,
1473 io::SnapIds
&& snap_ids
, int list_snaps_flags
,
1474 io::SnapshotDelta
* snapshot_delta
,
1475 const ZTracer::Trace
&parent_trace
,
1476 Context
* on_finish
) {
1477 auto cct
= m_image_ctx
->cct
;
1478 ldout(cct
, 20) << "image_extents=" << image_extents
<< dendl
;
1480 ClusterExtents cluster_extents
;
1481 populate_cluster_extents(cct
, m_cluster_size
, image_extents
,
1484 // map L1 table indexes to cluster extents
1485 std::map
<uint64_t, ClusterExtents
> l1_cluster_extents
;
1486 for (auto& cluster_extent
: cluster_extents
) {
1487 uint32_t l1_table_index
= cluster_extent
.image_offset
>> m_l1_shift
;
1488 auto& l1_cluster_extent
= l1_cluster_extents
[l1_table_index
];
1489 l1_cluster_extent
.reserve(cluster_extents
.size());
1490 l1_cluster_extent
.push_back(cluster_extent
);
1493 std::map
<uint64_t, const LookupTable
*> snap_id_to_l1_table
;
1494 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1495 snap_id_to_l1_table
[snap_id
] = &snapshot
.l1_table
;
1497 snap_id_to_l1_table
[CEPH_NOSNAP
] = &m_l1_table
;
1499 on_finish
= new LambdaContext([this, image_extents
,
1500 snap_ids
=std::move(snap_ids
),
1501 snapshot_delta
, on_finish
](int r
) mutable {
1502 handle_list_snaps(r
, std::move(image_extents
), std::move(snap_ids
),
1503 snapshot_delta
, on_finish
);
1506 auto gather_ctx
= new C_Gather(cct
, on_finish
);
1508 for (auto& [l1_table_index
, cluster_extents
] : l1_cluster_extents
) {
1509 auto list_snaps_request
= new ListSnapsRequest(
1510 this, l1_table_index
, std::move(cluster_extents
), snap_id_to_l1_table
,
1511 snapshot_delta
, gather_ctx
->new_sub());
1512 list_snaps_request
->send();
1515 gather_ctx
->activate();
1518 template <typename I
>
1519 void QCOWFormat
<I
>::handle_list_snaps(int r
, io::Extents
&& image_extents
,
1520 io::SnapIds
&& snap_ids
,
1521 io::SnapshotDelta
* snapshot_delta
,
1522 Context
* on_finish
) {
1523 auto cct
= m_image_ctx
->cct
;
1524 ldout(cct
, 20) << "r=" << r
<< ", "
1525 << "snapshot_delta=" << *snapshot_delta
<< dendl
;
1527 std::optional
<uint64_t> previous_size
= std::nullopt
;
1528 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1529 auto sparse_extents
= &(*snapshot_delta
)[{snap_id
, snap_id
}];
1530 util::zero_shrunk_snapshot(cct
, image_extents
, snap_id
, snapshot
.size
,
1531 &previous_size
, sparse_extents
);
1534 auto sparse_extents
= &(*snapshot_delta
)[{CEPH_NOSNAP
, CEPH_NOSNAP
}];
1535 util::zero_shrunk_snapshot(cct
, image_extents
, CEPH_NOSNAP
, m_size
,
1536 &previous_size
, sparse_extents
);
1538 util::merge_snapshot_delta(snap_ids
, snapshot_delta
);
1539 on_finish
->complete(r
);
1542 } // namespace migration
1543 } // namespace librbd
1545 template class librbd::migration::QCOWFormat
<librbd::ImageCtx
>;