1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "librbd/migration/QCOWFormat.h"
5 #include "common/Clock.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "include/intarith.h"
9 #include "librbd/AsioEngine.h"
10 #include "librbd/ImageCtx.h"
11 #include "librbd/ImageState.h"
12 #include "librbd/Utils.h"
13 #include "librbd/io/AioCompletion.h"
14 #include "librbd/io/ReadResult.h"
15 #include "librbd/migration/SnapshotInterface.h"
16 #include "librbd/migration/SourceSpecBuilder.h"
17 #include "librbd/migration/StreamInterface.h"
18 #include "librbd/migration/Utils.h"
19 #include <boost/asio/dispatch.hpp>
20 #include <boost/asio/post.hpp>
23 #include <unordered_map>
26 #define dout_subsys ceph_subsys_rbd
32 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " \
35 namespace qcow_format
{
37 struct ClusterExtent
{
38 uint64_t cluster_offset
;
39 uint64_t cluster_length
;
40 uint64_t intra_cluster_offset
;
41 uint64_t image_offset
;
42 uint64_t buffer_offset
;
44 ClusterExtent(uint64_t cluster_offset
, uint64_t cluster_length
,
45 uint64_t intra_cluster_offset
, uint64_t image_offset
,
46 uint64_t buffer_offset
)
47 : cluster_offset(cluster_offset
), cluster_length(cluster_length
),
48 intra_cluster_offset(intra_cluster_offset
), image_offset(image_offset
),
49 buffer_offset(buffer_offset
) {
53 typedef std::vector
<ClusterExtent
> ClusterExtents
;
55 void LookupTable::init() {
56 if (cluster_offsets
== nullptr) {
57 cluster_offsets
= reinterpret_cast<uint64_t*>(bl
.c_str());
61 void LookupTable::decode() {
64 // L2 tables are selectively byte-swapped on demand if only requesting a
65 // single cluster offset
70 // translate the lookup table (big-endian -> CPU endianess)
71 for (auto idx
= 0UL; idx
< size
; ++idx
) {
72 cluster_offsets
[idx
] = be64toh(cluster_offsets
[idx
]);
78 void populate_cluster_extents(CephContext
* cct
, uint64_t cluster_size
,
79 const io::Extents
& image_extents
,
80 ClusterExtents
* cluster_extents
) {
81 uint64_t buffer_offset
= 0;
82 for (auto [image_offset
, image_length
] : image_extents
) {
83 while (image_length
> 0) {
84 auto intra_cluster_offset
= image_offset
& (cluster_size
- 1);
85 auto intra_cluster_length
= cluster_size
- intra_cluster_offset
;
86 auto cluster_length
= std::min(image_length
, intra_cluster_length
);
88 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
89 << "image_length=" << image_length
<< ", "
90 << "cluster_length=" << cluster_length
<< dendl
;
93 cluster_extents
->emplace_back(0, cluster_length
, intra_cluster_offset
,
94 image_offset
, buffer_offset
);
96 image_offset
+= cluster_length
;
97 image_length
-= cluster_length
;
98 buffer_offset
+= cluster_length
;
103 } // namespace qcow_format
105 using namespace qcow_format
;
107 template <typename I
>
108 struct QCOWFormat
<I
>::Cluster
{
109 const uint64_t cluster_offset
;
110 bufferlist cluster_data_bl
;
112 Cluster(uint64_t cluster_offset
) : cluster_offset(cluster_offset
) {
117 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ClusterCache: " \
118 << this << " " << __func__ << ": "
120 template <typename I
>
121 class QCOWFormat
<I
>::ClusterCache
{
123 ClusterCache(QCOWFormat
* qcow_format
)
124 : qcow_format(qcow_format
),
125 m_strand(*qcow_format
->m_image_ctx
->asio_engine
) {
128 void get_cluster(uint64_t cluster_offset
, uint64_t cluster_length
,
129 uint64_t intra_cluster_offset
, bufferlist
* bl
,
130 Context
* on_finish
) {
131 auto cct
= qcow_format
->m_image_ctx
->cct
;
132 ldout(cct
, 20) << "cluster_offset=" << cluster_offset
<< dendl
;
134 // cache state machine runs in a single strand thread
135 boost::asio::dispatch(
137 [this, cluster_offset
, cluster_length
, intra_cluster_offset
, bl
,
139 execute_get_cluster(cluster_offset
, cluster_length
,
140 intra_cluster_offset
, bl
, on_finish
);
145 typedef std::tuple
<uint64_t, uint64_t, bufferlist
*, Context
*> Completion
;
146 typedef std::list
<Completion
> Completions
;
148 QCOWFormat
* qcow_format
;
149 boost::asio::io_context::strand m_strand
;
151 std::shared_ptr
<Cluster
> cluster
;
152 std::unordered_map
<uint64_t, Completions
> cluster_completions
;
154 void execute_get_cluster(uint64_t cluster_offset
, uint64_t cluster_length
,
155 uint64_t intra_cluster_offset
, bufferlist
* bl
,
156 Context
* on_finish
) {
157 auto cct
= qcow_format
->m_image_ctx
->cct
;
158 ldout(cct
, 20) << "cluster_offset=" << cluster_offset
<< dendl
;
160 if (cluster
&& cluster
->cluster_offset
== cluster_offset
) {
161 // most-recent cluster matches
162 bl
->substr_of(cluster
->cluster_data_bl
, intra_cluster_offset
,
164 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
165 [on_finish
]() { on_finish
->complete(0); });
169 // record callback for cluster
170 bool new_request
= (cluster_completions
.count(cluster_offset
) == 0);
171 cluster_completions
[cluster_offset
].emplace_back(
172 intra_cluster_offset
, cluster_length
, bl
, on_finish
);
174 // start the new read request
175 read_cluster(std::make_shared
<Cluster
>(cluster_offset
));
179 void read_cluster(std::shared_ptr
<Cluster
> cluster
) {
180 auto cct
= qcow_format
->m_image_ctx
->cct
;
182 uint64_t stream_offset
= cluster
->cluster_offset
;
183 uint64_t stream_length
= qcow_format
->m_cluster_size
;
184 if ((cluster
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) != 0) {
185 // compressed clusters encode the compressed length in the lower bits
186 stream_offset
= cluster
->cluster_offset
&
187 qcow_format
->m_cluster_offset_mask
;
188 stream_length
= (cluster
->cluster_offset
>>
189 (63 - qcow_format
->m_cluster_bits
)) &
190 (qcow_format
->m_cluster_size
- 1);
193 ldout(cct
, 20) << "cluster_offset=" << cluster
->cluster_offset
<< ", "
194 << "stream_offset=" << stream_offset
<< ", "
195 << "stream_length=" << stream_length
<< dendl
;
197 // read the cluster into the cache entry
198 auto ctx
= new LambdaContext([this, cluster
](int r
) {
199 boost::asio::post(m_strand
, [this, cluster
, r
]() {
200 handle_read_cluster(r
, cluster
); }); });
201 qcow_format
->m_stream
->read({{stream_offset
, stream_length
}},
202 &cluster
->cluster_data_bl
, ctx
);
205 void handle_read_cluster(int r
, std::shared_ptr
<Cluster
> cluster
) {
206 auto cct
= qcow_format
->m_image_ctx
->cct
;
207 ldout(cct
, 20) << "r=" << r
<< ", "
208 << "cluster_offset=" << cluster
->cluster_offset
<< dendl
;
210 auto completions
= std::move(cluster_completions
[cluster
->cluster_offset
]);
211 cluster_completions
.erase(cluster
->cluster_offset
);
214 lderr(cct
) << "failed to read cluster offset " << cluster
->cluster_offset
215 << ": " << cpp_strerror(r
) << dendl
;
217 if ((cluster
->cluster_offset
& QCOW_OFLAG_COMPRESSED
) != 0) {
218 bufferlist compressed_bl
{std::move(cluster
->cluster_data_bl
)};
219 cluster
->cluster_data_bl
.clear();
222 lderr(cct
) << "support for compressed clusters is not available"
226 // cache the MRU cluster in case of sequential IO
227 this->cluster
= cluster
;
231 // complete the IO back to caller
232 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
233 [r
, cluster
, completions
=std::move(completions
)]() {
234 for (auto completion
: completions
) {
236 std::get
<2>(completion
)->substr_of(
237 cluster
->cluster_data_bl
,
238 std::get
<0>(completion
),
239 std::get
<1>(completion
));
241 std::get
<3>(completion
)->complete(r
);
248 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::L2TableCache: " \
249 << this << " " << __func__ << ": "
251 template <typename I
>
252 class QCOWFormat
<I
>::L2TableCache
{
254 L2TableCache(QCOWFormat
* qcow_format
)
255 : qcow_format(qcow_format
),
256 m_strand(*qcow_format
->m_image_ctx
->asio_engine
),
257 l2_cache_entries(QCOW_L2_CACHE_SIZE
) {
260 void get_l2_table(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
261 std::shared_ptr
<const LookupTable
>* l2_table
,
262 Context
* on_finish
) {
263 auto cct
= qcow_format
->m_image_ctx
->cct
;
264 ldout(cct
, 20) << "l2_table_offset=" << l2_table_offset
<< dendl
;
266 // cache state machine runs in a single strand thread
267 Request request
{l1_table
, l2_table_offset
, l2_table
, on_finish
};
268 boost::asio::dispatch(
269 m_strand
, [this, request
=std::move(request
)]() {
270 requests
.push_back(std::move(request
));
275 void get_cluster_offset(const LookupTable
* l1_table
,
276 uint64_t image_offset
, uint64_t* cluster_offset
,
277 Context
* on_finish
) {
278 auto cct
= qcow_format
->m_image_ctx
->cct
;
279 uint32_t l1_table_index
= image_offset
>> qcow_format
->m_l1_shift
;
280 uint64_t l2_table_offset
= l1_table
->cluster_offsets
[std::min
<uint32_t>(
281 l1_table_index
, l1_table
->size
- 1)] &
282 qcow_format
->m_cluster_mask
;
283 uint32_t l2_table_index
= (image_offset
>> qcow_format
->m_cluster_bits
) &
284 (qcow_format
->m_l2_size
- 1);
285 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
286 << "l1_table_index=" << l1_table_index
<< ", "
287 << "l2_table_offset=" << l2_table_offset
<< ", "
288 << "l2_table_index=" << l2_table_index
<< dendl
;
290 if (l1_table_index
>= l1_table
->size
) {
291 lderr(cct
) << "L1 index " << l1_table_index
<< " out-of-bounds" << dendl
;
292 on_finish
->complete(-ERANGE
);
294 } else if (l2_table_offset
== 0) {
295 // L2 table has not been allocated for specified offset
296 ldout(cct
, 20) << "image_offset=" << image_offset
<< ", "
297 << "cluster_offset=DNE" << dendl
;
299 on_finish
->complete(-ENOENT
);
303 // cache state machine runs in a single strand thread
304 Request request
{l1_table
, l2_table_offset
, l2_table_index
, cluster_offset
,
306 boost::asio::dispatch(
307 m_strand
, [this, request
=std::move(request
)]() {
308 requests
.push_back(std::move(request
));
314 QCOWFormat
* qcow_format
;
316 boost::asio::io_context::strand m_strand
;
319 const LookupTable
* l1_table
;
321 uint64_t l2_table_offset
;
323 // get_cluster_offset request
324 uint32_t l2_table_index
;
325 uint64_t* cluster_offset
= nullptr;
327 // get_l2_table request
328 std::shared_ptr
<const LookupTable
>* l2_table
;
332 Request(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
333 uint32_t l2_table_index
, uint64_t* cluster_offset
,
335 : l1_table(l1_table
), l2_table_offset(l2_table_offset
),
336 l2_table_index(l2_table_index
), cluster_offset(cluster_offset
),
337 on_finish(on_finish
) {
339 Request(const LookupTable
* l1_table
, uint64_t l2_table_offset
,
340 std::shared_ptr
<const LookupTable
>* l2_table
, Context
* on_finish
)
341 : l1_table(l1_table
), l2_table_offset(l2_table_offset
),
342 l2_table(l2_table
), on_finish(on_finish
) {
346 typedef std::deque
<Request
> Requests
;
349 uint64_t l2_offset
= 0;
350 std::shared_ptr
<LookupTable
> l2_table
;
354 bool in_flight
= false;
358 std::vector
<L2Cache
> l2_cache_entries
;
362 void dispatch_request() {
363 boost::asio::dispatch(m_strand
, [this]() { execute_request(); });
366 void execute_request() {
367 auto cct
= qcow_format
->m_image_ctx
->cct
;
368 if (requests
.empty()) {
372 auto request
= requests
.front();
373 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< dendl
;
375 std::shared_ptr
<LookupTable
> l2_table
;
376 int r
= l2_table_lookup(request
.l2_table_offset
, &l2_table
);
378 lderr(cct
) << "failed to load L2 table: l2_table_offset="
379 << request
.l2_table_offset
<< ": "
380 << cpp_strerror(r
) << dendl
;
381 } else if (l2_table
== nullptr) {
382 // table not in cache -- will restart once its loaded
384 } else if (request
.cluster_offset
!= nullptr) {
385 auto cluster_offset
= l2_table
->cluster_offsets
[request
.l2_table_index
];
386 if (!l2_table
->decoded
) {
387 // table hasn't been byte-swapped
388 cluster_offset
= be64toh(cluster_offset
);
391 *request
.cluster_offset
= cluster_offset
& qcow_format
->m_cluster_mask
;
392 if (*request
.cluster_offset
== QCOW_OFLAG_ZERO
) {
393 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< ", "
394 << "l2_table_index=" << request
.l2_table_index
<< ", "
395 << "cluster_offset=zeroed" << dendl
;
397 ldout(cct
, 20) << "l2_table_offset=" << request
.l2_table_offset
<< ", "
398 << "l2_table_index=" << request
.l2_table_index
<< ", "
399 << "cluster_offset=" << *request
.cluster_offset
402 } else if (request
.l2_table
!= nullptr) {
403 // ensure it's in the correct byte-order
405 *request
.l2_table
= l2_table
;
410 // complete the L2 cache request
411 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
412 [r
, ctx
=request
.on_finish
]() { ctx
->complete(r
); });
413 requests
.pop_front();
415 // process next request (if any)
419 int l2_table_lookup(uint64_t l2_offset
,
420 std::shared_ptr
<LookupTable
>* l2_table
) {
421 auto cct
= qcow_format
->m_image_ctx
->cct
;
425 // find a match in the existing cache
426 for (auto idx
= 0U; idx
< l2_cache_entries
.size(); ++idx
) {
427 auto& l2_cache
= l2_cache_entries
[idx
];
428 if (l2_cache
.l2_offset
== l2_offset
) {
429 if (l2_cache
.in_flight
) {
430 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
431 << "index=" << idx
<< " (in-flight)" << dendl
;
435 if (l2_cache
.ret_val
< 0) {
436 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
437 << "index=" << idx
<< " (error): "
438 << cpp_strerror(l2_cache
.ret_val
) << dendl
;
439 int r
= l2_cache
.ret_val
;
440 l2_cache
= L2Cache
{};
446 if (l2_cache
.count
== std::numeric_limits
<uint32_t>::max()) {
447 for (auto& entry
: l2_cache_entries
) {
452 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", " << "index=" << idx
454 *l2_table
= l2_cache
.l2_table
;
459 // find the least used entry
460 int32_t min_idx
= -1;
461 uint32_t min_count
= std::numeric_limits
<uint32_t>::max();
462 utime_t min_timestamp
;
463 for (uint32_t idx
= 0U; idx
< l2_cache_entries
.size(); ++idx
) {
464 auto& l2_cache
= l2_cache_entries
[idx
];
465 if (l2_cache
.in_flight
) {
469 if (l2_cache
.count
> 0) {
473 if (l2_cache
.count
<= min_count
) {
474 if (min_idx
== -1 || l2_cache
.timestamp
< min_timestamp
) {
475 min_timestamp
= l2_cache
.timestamp
;
476 min_count
= l2_cache
.count
;
483 // no space in the cache due to in-flight requests
484 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
485 << "index=DNE (cache busy)" << dendl
;
489 ldout(cct
, 20) << "l2_offset=" << l2_offset
<< ", "
490 << "index=" << min_idx
<< " (loading)" << dendl
;
491 auto& l2_cache
= l2_cache_entries
[min_idx
];
492 l2_cache
.l2_table
= std::make_shared
<LookupTable
>(qcow_format
->m_l2_size
);
493 l2_cache
.l2_offset
= l2_offset
;
494 l2_cache
.timestamp
= ceph_clock_now();
496 l2_cache
.in_flight
= true;
498 // read the L2 table into the L2 cache entry
499 auto ctx
= new LambdaContext([this, index
=min_idx
, l2_offset
](int r
) {
500 boost::asio::post(m_strand
, [this, index
, l2_offset
, r
]() {
501 handle_l2_table_lookup(r
, index
, l2_offset
); }); });
502 qcow_format
->m_stream
->read(
503 {{l2_offset
, qcow_format
->m_l2_size
* sizeof(uint64_t)}},
504 &l2_cache
.l2_table
->bl
, ctx
);
508 void handle_l2_table_lookup(int r
, uint32_t index
, uint64_t l2_offset
) {
509 auto cct
= qcow_format
->m_image_ctx
->cct
;
510 ldout(cct
, 20) << "r=" << r
<< ", "
511 << "l2_offset=" << l2_offset
<< ", "
512 << "index=" << index
<< dendl
;
514 auto& l2_cache
= l2_cache_entries
[index
];
515 ceph_assert(l2_cache
.in_flight
);
516 l2_cache
.in_flight
= false;
519 lderr(cct
) << "failed to load L2 table: "
520 << "l2_offset=" << l2_cache
.l2_offset
<< ": "
521 << cpp_strerror(r
) << dendl
;
522 l2_cache
.ret_val
= r
;
524 // keep the L2 table in big-endian byte-order until the full table
526 l2_cache
.l2_table
->init();
529 // restart the state machine
536 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::ReadRequest: " \
537 << this << " " << __func__ << ": "
539 template <typename I
>
540 class QCOWFormat
<I
>::ReadRequest
{
542 ReadRequest(QCOWFormat
* qcow_format
, io::AioCompletion
* aio_comp
,
543 const LookupTable
* l1_table
, io::Extents
&& image_extents
)
544 : qcow_format(qcow_format
), aio_comp(aio_comp
), l1_table(l1_table
),
545 image_extents(std::move(image_extents
)) {
549 get_cluster_offsets();
553 QCOWFormat
* qcow_format
;
554 io::AioCompletion
* aio_comp
;
556 const LookupTable
* l1_table
;
557 io::Extents image_extents
;
559 size_t image_extents_idx
= 0;
560 uint32_t image_extent_offset
= 0;
562 ClusterExtents cluster_extents
;
564 void get_cluster_offsets() {
565 auto cct
= qcow_format
->m_image_ctx
->cct
;
566 populate_cluster_extents(cct
, qcow_format
->m_cluster_size
, image_extents
,
569 ldout(cct
, 20) << dendl
;
570 auto ctx
= new LambdaContext([this](int r
) {
571 handle_get_cluster_offsets(r
); });
572 auto gather_ctx
= new C_Gather(cct
, ctx
);
574 for (auto& cluster_extent
: cluster_extents
) {
575 auto sub_ctx
= new LambdaContext(
576 [this, &cluster_extent
, on_finish
=gather_ctx
->new_sub()](int r
) {
577 handle_get_cluster_offset(r
, cluster_extent
, on_finish
); });
578 qcow_format
->m_l2_table_cache
->get_cluster_offset(
579 l1_table
, cluster_extent
.image_offset
,
580 &cluster_extent
.cluster_offset
, sub_ctx
);
583 gather_ctx
->activate();
586 void handle_get_cluster_offset(int r
, const ClusterExtent
& cluster_extent
,
587 Context
* on_finish
) {
588 auto cct
= qcow_format
->m_image_ctx
->cct
;
589 ldout(cct
, 20) << "r=" << r
<< ", "
590 << "image_offset=" << cluster_extent
.image_offset
<< ", "
591 << "cluster_offset=" << cluster_extent
.cluster_offset
595 ldout(cct
, 20) << "image offset DNE in QCOW image" << dendl
;
598 lderr(cct
) << "failed to map image offset " << cluster_extent
.image_offset
599 << ": " << cpp_strerror(r
) << dendl
;
602 on_finish
->complete(r
);
605 void handle_get_cluster_offsets(int r
) {
606 auto cct
= qcow_format
->m_image_ctx
->cct
;
607 ldout(cct
, 20) << "r=" << r
<< dendl
;
610 lderr(cct
) << "failed to retrieve cluster extents: " << cpp_strerror(r
)
620 void read_clusters() {
621 auto cct
= qcow_format
->m_image_ctx
->cct
;
622 ldout(cct
, 20) << dendl
;
624 aio_comp
->set_request_count(cluster_extents
.size());
625 for (auto& cluster_extent
: cluster_extents
) {
626 auto read_ctx
= new io::ReadResult::C_ImageReadRequest(
627 aio_comp
, cluster_extent
.buffer_offset
,
628 {{cluster_extent
.image_offset
, cluster_extent
.cluster_length
}});
629 read_ctx
->ignore_enoent
= true;
631 auto log_ctx
= new LambdaContext(
632 [this, cct
=qcow_format
->m_image_ctx
->cct
,
633 image_offset
=cluster_extent
.image_offset
,
634 image_length
=cluster_extent
.cluster_length
, ctx
=read_ctx
](int r
) {
635 handle_read_cluster(cct
, r
, image_offset
, image_length
, ctx
);
638 if (cluster_extent
.cluster_offset
== 0) {
639 // QCOW header is at offset 0, implies cluster DNE
640 log_ctx
->complete(-ENOENT
);
641 } else if (cluster_extent
.cluster_offset
== QCOW_OFLAG_ZERO
) {
642 // explicitly zeroed section
643 read_ctx
->bl
.append_zero(cluster_extent
.cluster_length
);
644 log_ctx
->complete(0);
646 // request the (sub)cluster from the cluster cache
647 qcow_format
->m_cluster_cache
->get_cluster(
648 cluster_extent
.cluster_offset
, cluster_extent
.cluster_length
,
649 cluster_extent
.intra_cluster_offset
, &read_ctx
->bl
, log_ctx
);
656 void handle_read_cluster(CephContext
* cct
, int r
, uint64_t image_offset
,
657 uint64_t image_length
, Context
* on_finish
) const {
658 // NOTE: treat as static function, expect object has been deleted
660 ldout(cct
, 20) << "r=" << r
<< ", "
661 << "image_offset=" << image_offset
<< ", "
662 << "image_length=" << image_length
<< dendl
;
664 if (r
!= -ENOENT
&& r
< 0) {
665 lderr(cct
) << "failed to read image extent " << image_offset
<< "~"
666 << image_length
<< ": " << cpp_strerror(r
) << dendl
;
669 on_finish
->complete(r
);
674 #define dout_prefix *_dout << "librbd::migration::QCOWFormat::" \
675 << "ListSnapsRequest: " << this << " " \
678 template <typename I
>
679 class QCOWFormat
<I
>::ListSnapsRequest
{
682 QCOWFormat
* qcow_format
, uint32_t l1_table_index
,
683 ClusterExtents
&& cluster_extents
,
684 const std::map
<uint64_t, const LookupTable
*>& snap_id_to_l1_table
,
685 io::SnapshotDelta
* snapshot_delta
, Context
* on_finish
)
686 : qcow_format(qcow_format
), l1_table_index(l1_table_index
),
687 cluster_extents(std::move(cluster_extents
)),
688 snap_id_to_l1_table(snap_id_to_l1_table
), snapshot_delta(snapshot_delta
),
689 on_finish(on_finish
) {
697 QCOWFormat
* qcow_format
;
698 uint32_t l1_table_index
;
699 ClusterExtents cluster_extents
;
700 std::map
<uint64_t, const LookupTable
*> snap_id_to_l1_table
;
701 io::SnapshotDelta
* snapshot_delta
;
704 std::shared_ptr
<const LookupTable
> previous_l2_table
;
705 std::shared_ptr
<const LookupTable
> l2_table
;
707 void get_l2_table() {
708 auto cct
= qcow_format
->m_image_ctx
->cct
;
709 if (snap_id_to_l1_table
.empty()) {
714 auto it
= snap_id_to_l1_table
.begin();
715 auto [snap_id
, l1_table
] = *it
;
716 snap_id_to_l1_table
.erase(it
);
718 previous_l2_table
= l2_table
;
721 auto ctx
= new LambdaContext([this, snap_id
= snap_id
](int r
) {
722 boost::asio::post(qcow_format
->m_strand
, [this, snap_id
, r
]() {
723 handle_get_l2_table(r
, snap_id
);
727 if (l1_table_index
>= l1_table
->size
||
728 l1_table
->cluster_offsets
[l1_table_index
] == 0) {
729 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
730 << "snap_id=" << snap_id
<< ": DNE" << dendl
;
731 ctx
->complete(-ENOENT
);
735 uint64_t l2_table_offset
= l1_table
->cluster_offsets
[l1_table_index
] &
736 qcow_format
->m_cluster_mask
;
738 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
739 << "snap_id=" << snap_id
<< ", "
740 << "l2_table_offset=" << l2_table_offset
<< dendl
;
741 qcow_format
->m_l2_table_cache
->get_l2_table(l1_table
, l2_table_offset
,
745 void handle_get_l2_table(int r
, uint64_t snap_id
) {
746 ceph_assert(qcow_format
->m_strand
.running_in_this_thread());
748 auto cct
= qcow_format
->m_image_ctx
->cct
;
749 ldout(cct
, 20) << "r=" << r
<< ", "
750 << "snap_id=" << snap_id
<< dendl
;
755 lderr(cct
) << "failed to retrieve L2 table for snapshot " << snap_id
756 << ": " << cpp_strerror(r
) << dendl
;
761 // compare the cluster offsets at each requested L2 offset between
762 // the previous snapshot's L2 table and the current L2 table.
763 auto& sparse_extents
= (*snapshot_delta
)[{snap_id
, snap_id
}];
764 for (auto& cluster_extent
: cluster_extents
) {
765 uint32_t l2_table_index
=
766 (cluster_extent
.image_offset
>> qcow_format
->m_cluster_bits
) &
767 (qcow_format
->m_l2_size
- 1);
769 std::optional
<uint64_t> cluster_offset
;
770 if (l2_table
&& l2_table_index
< l2_table
->size
) {
771 cluster_offset
= l2_table
->cluster_offsets
[l2_table_index
] &
772 qcow_format
->m_cluster_offset_mask
;
775 std::optional
<uint64_t> prev_cluster_offset
;
776 if (previous_l2_table
&& l2_table_index
< previous_l2_table
->size
) {
777 prev_cluster_offset
=
778 previous_l2_table
->cluster_offsets
[l2_table_index
] &
779 qcow_format
->m_cluster_offset_mask
;
782 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
783 << "snap_id=" << snap_id
<< ", "
784 << "image_offset=" << cluster_extent
.image_offset
<< ", "
785 << "l2_table_index=" << l2_table_index
<< ", "
786 << "cluster_offset=" << cluster_offset
<< ", "
787 << "prev_cluster_offset=" << prev_cluster_offset
<< dendl
;
789 auto state
= io::SPARSE_EXTENT_STATE_DATA
;
790 if (cluster_offset
== prev_cluster_offset
) {
792 } else if ((prev_cluster_offset
&& !cluster_offset
) ||
793 *cluster_offset
== QCOW_OFLAG_ZERO
) {
794 // explicitly zeroed or deallocated
795 state
= io::SPARSE_EXTENT_STATE_ZEROED
;
798 sparse_extents
.insert(
799 cluster_extent
.image_offset
, cluster_extent
.cluster_length
,
800 {state
, cluster_extent
.cluster_length
});
803 ldout(cct
, 20) << "l1_table_index=" << l1_table_index
<< ", "
804 << "snap_id=" << snap_id
<< ", "
805 << "sparse_extents=" << sparse_extents
<< dendl
;
807 // continue processing the L2 table at this index for all snapshots
808 boost::asio::post(*qcow_format
->m_image_ctx
->asio_engine
,
809 [this]() { get_l2_table(); });
814 auto cct
= qcow_format
->m_image_ctx
->cct
;
815 ldout(cct
, 20) << "r=" << r
<< dendl
;
817 on_finish
->complete(r
);
823 #define dout_prefix *_dout << "librbd::migration::QCOWFormat: " << this \
824 << " " << __func__ << ": "
826 template <typename I
>
827 QCOWFormat
<I
>::QCOWFormat(
828 I
* image_ctx
, const json_spirit::mObject
& json_object
,
829 const SourceSpecBuilder
<I
>* source_spec_builder
)
830 : m_image_ctx(image_ctx
), m_json_object(json_object
),
831 m_source_spec_builder(source_spec_builder
),
832 m_strand(*image_ctx
->asio_engine
) {
835 template <typename I
>
836 void QCOWFormat
<I
>::open(Context
* on_finish
) {
837 auto cct
= m_image_ctx
->cct
;
838 ldout(cct
, 10) << dendl
;
840 int r
= m_source_spec_builder
->build_stream(m_json_object
, &m_stream
);
842 lderr(cct
) << "failed to build migration stream handler" << cpp_strerror(r
)
844 on_finish
->complete(r
);
848 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
849 handle_open(r
, on_finish
); });
853 template <typename I
>
854 void QCOWFormat
<I
>::handle_open(int r
, Context
* on_finish
) {
855 auto cct
= m_image_ctx
->cct
;
856 ldout(cct
, 10) << "r=" << r
<< dendl
;
859 lderr(cct
) << "failed to open QCOW image: " << cpp_strerror(r
)
861 on_finish
->complete(r
);
868 template <typename I
>
869 void QCOWFormat
<I
>::probe(Context
* on_finish
) {
870 auto cct
= m_image_ctx
->cct
;
871 ldout(cct
, 10) << dendl
;
873 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
874 handle_probe(r
, on_finish
); });
876 m_stream
->read({{0, 8}}, &m_bl
, ctx
);
879 template <typename I
>
880 void QCOWFormat
<I
>::handle_probe(int r
, Context
* on_finish
) {
881 auto cct
= m_image_ctx
->cct
;
882 ldout(cct
, 10) << "r=" << r
<< dendl
;
885 lderr(cct
) << "failed to probe QCOW image: " << cpp_strerror(r
)
887 on_finish
->complete(r
);
891 auto header_probe
= *reinterpret_cast<QCowHeaderProbe
*>(
893 header_probe
.magic
= be32toh(header_probe
.magic
);
894 header_probe
.version
= be32toh(header_probe
.version
);
896 if (header_probe
.magic
!= QCOW_MAGIC
) {
897 lderr(cct
) << "invalid QCOW header magic" << dendl
;
898 on_finish
->complete(-EINVAL
);
903 if (header_probe
.version
== 1) {
904 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
905 read_v1_header(on_finish
);
906 #else // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
907 lderr(cct
) << "QCOW is not supported" << dendl
;
908 on_finish
->complete(-ENOTSUP
);
909 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
911 } else if (header_probe
.version
>= 2 && header_probe
.version
<= 3) {
912 read_v2_header(on_finish
);
915 lderr(cct
) << "invalid QCOW header version " << header_probe
.version
917 on_finish
->complete(-EINVAL
);
922 #ifdef WITH_RBD_MIGRATION_FORMAT_QCOW_V1
924 template <typename I
>
925 void QCOWFormat
<I
>::read_v1_header(Context
* on_finish
) {
926 auto cct
= m_image_ctx
->cct
;
927 ldout(cct
, 10) << dendl
;
929 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
930 handle_read_v1_header(r
, on_finish
); });
932 m_stream
->read({{0, sizeof(QCowHeaderV1
)}}, &m_bl
, ctx
);
935 template <typename I
>
936 void QCOWFormat
<I
>::handle_read_v1_header(int r
, Context
* on_finish
) {
937 auto cct
= m_image_ctx
->cct
;
938 ldout(cct
, 10) << "r=" << r
<< dendl
;
941 lderr(cct
) << "failed to read QCOW header: " << cpp_strerror(r
) << dendl
;
942 on_finish
->complete(r
);
946 auto header
= *reinterpret_cast<QCowHeaderV1
*>(m_bl
.c_str());
948 // byte-swap important fields
949 header
.magic
= be32toh(header
.magic
);
950 header
.version
= be32toh(header
.version
);
951 header
.backing_file_offset
= be64toh(header
.backing_file_offset
);
952 header
.backing_file_size
= be32toh(header
.backing_file_size
);
953 header
.size
= be64toh(header
.size
);
954 header
.crypt_method
= be32toh(header
.crypt_method
);
955 header
.l1_table_offset
= be64toh(header
.l1_table_offset
);
957 if (header
.magic
!= QCOW_MAGIC
|| header
.version
!= 1) {
958 // honestly shouldn't happen since we've already validated it
959 lderr(cct
) << "header is not QCOW" << dendl
;
960 on_finish
->complete(-EINVAL
);
964 if (header
.cluster_bits
< QCOW_MIN_CLUSTER_BITS
||
965 header
.cluster_bits
> QCOW_MAX_CLUSTER_BITS
) {
966 lderr(cct
) << "invalid cluster bits: " << header
.cluster_bits
<< dendl
;
967 on_finish
->complete(-EINVAL
);
971 if (header
.l2_bits
< (QCOW_MIN_CLUSTER_BITS
- 3) ||
972 header
.l2_bits
> (QCOW_MAX_CLUSTER_BITS
- 3)) {
973 lderr(cct
) << "invalid L2 bits: " << header
.l2_bits
<< dendl
;
974 on_finish
->complete(-EINVAL
);
978 if (header
.crypt_method
!= QCOW_CRYPT_NONE
) {
979 lderr(cct
) << "invalid or unsupported encryption method" << dendl
;
980 on_finish
->complete(-EINVAL
);
984 m_size
= header
.size
;
985 if (p2roundup(m_size
, static_cast<uint64_t>(512)) != m_size
) {
986 lderr(cct
) << "image size is not a multiple of block size" << dendl
;
987 on_finish
->complete(-EINVAL
);
991 m_backing_file_offset
= header
.backing_file_offset
;
992 m_backing_file_size
= header
.backing_file_size
;
994 m_cluster_bits
= header
.cluster_bits
;
995 m_cluster_size
= 1UL << header
.cluster_bits
;
996 m_cluster_offset_mask
= (1ULL << (63 - header
.cluster_bits
)) - 1;
997 m_cluster_mask
= ~QCOW_OFLAG_COMPRESSED
;
999 m_l2_bits
= header
.l2_bits
;
1000 m_l2_size
= (1UL << m_l2_bits
);
1002 m_l1_shift
= m_cluster_bits
+ m_l2_bits
;
1003 m_l1_table
.size
= (m_size
+ (1LL << m_l1_shift
) - 1) >> m_l1_shift
;
1004 m_l1_table_offset
= header
.l1_table_offset
;
1005 if (m_size
> (std::numeric_limits
<uint64_t>::max() - (1ULL << m_l1_shift
)) ||
1007 (std::numeric_limits
<int32_t>::max() / sizeof(uint64_t))) {
1008 lderr(cct
) << "image size too big: " << m_size
<< dendl
;
1009 on_finish
->complete(-EINVAL
);
1013 ldout(cct
, 15) << "size=" << m_size
<< ", "
1014 << "cluster_bits=" << m_cluster_bits
<< ", "
1015 << "l2_bits=" << m_l2_bits
<< dendl
;
1017 // allocate memory for L1 table and L2 + cluster caches
1018 m_l2_table_cache
= std::make_unique
<L2TableCache
>(this);
1019 m_cluster_cache
= std::make_unique
<ClusterCache
>(this);
1021 read_l1_table(on_finish
);
1024 #endif // WITH_RBD_MIGRATION_FORMAT_QCOW_V1
1026 template <typename I
>
1027 void QCOWFormat
<I
>::read_v2_header(Context
* on_finish
) {
1028 auto cct
= m_image_ctx
->cct
;
1029 ldout(cct
, 10) << dendl
;
1031 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1032 handle_read_v2_header(r
, on_finish
); });
1034 m_stream
->read({{0, sizeof(QCowHeader
)}}, &m_bl
, ctx
);
1037 template <typename I
>
1038 void QCOWFormat
<I
>::handle_read_v2_header(int r
, Context
* on_finish
) {
1039 auto cct
= m_image_ctx
->cct
;
1040 ldout(cct
, 10) << "r=" << r
<< dendl
;
1043 lderr(cct
) << "failed to read QCOW2 header: " << cpp_strerror(r
) << dendl
;
1044 on_finish
->complete(r
);
1048 auto header
= *reinterpret_cast<QCowHeader
*>(m_bl
.c_str());
1050 // byte-swap important fields
1051 header
.magic
= be32toh(header
.magic
);
1052 header
.version
= be32toh(header
.version
);
1053 header
.backing_file_offset
= be64toh(header
.backing_file_offset
);
1054 header
.backing_file_size
= be32toh(header
.backing_file_size
);
1055 header
.cluster_bits
= be32toh(header
.cluster_bits
);
1056 header
.size
= be64toh(header
.size
);
1057 header
.crypt_method
= be32toh(header
.crypt_method
);
1058 header
.l1_size
= be32toh(header
.l1_size
);
1059 header
.l1_table_offset
= be64toh(header
.l1_table_offset
);
1060 header
.nb_snapshots
= be32toh(header
.nb_snapshots
);
1061 header
.snapshots_offset
= be64toh(header
.snapshots_offset
);
1063 if (header
.version
== 2) {
1064 // valid only for version >= 3
1065 header
.incompatible_features
= 0;
1066 header
.compatible_features
= 0;
1067 header
.autoclear_features
= 0;
1068 header
.header_length
= 72;
1069 header
.compression_type
= 0;
1071 header
.incompatible_features
= be64toh(header
.incompatible_features
);
1072 header
.compatible_features
= be64toh(header
.compatible_features
);
1073 header
.autoclear_features
= be64toh(header
.autoclear_features
);
1074 header
.header_length
= be32toh(header
.header_length
);
1077 if (header
.magic
!= QCOW_MAGIC
|| header
.version
< 2 || header
.version
> 3) {
1078 // honestly shouldn't happen since we've already validated it
1079 lderr(cct
) << "header is not QCOW2" << dendl
;
1080 on_finish
->complete(-EINVAL
);
1084 if (header
.cluster_bits
< QCOW_MIN_CLUSTER_BITS
||
1085 header
.cluster_bits
> QCOW_MAX_CLUSTER_BITS
) {
1086 lderr(cct
) << "invalid cluster bits: " << header
.cluster_bits
<< dendl
;
1087 on_finish
->complete(-EINVAL
);
1091 if (header
.crypt_method
!= QCOW_CRYPT_NONE
) {
1092 lderr(cct
) << "invalid or unsupported encryption method" << dendl
;
1093 on_finish
->complete(-EINVAL
);
1097 m_size
= header
.size
;
1098 if (p2roundup(m_size
, static_cast<uint64_t>(512)) != m_size
) {
1099 lderr(cct
) << "image size is not a multiple of block size" << dendl
;
1100 on_finish
->complete(-EINVAL
);
1104 if (header
.header_length
<= offsetof(QCowHeader
, compression_type
)) {
1105 header
.compression_type
= 0;
1108 if ((header
.compression_type
!= 0) ||
1109 ((header
.incompatible_features
& QCOW2_INCOMPAT_COMPRESSION
) != 0)) {
1110 lderr(cct
) << "invalid or unsupported compression type" << dendl
;
1111 on_finish
->complete(-EINVAL
);
1115 if ((header
.incompatible_features
& QCOW2_INCOMPAT_DATA_FILE
) != 0) {
1116 lderr(cct
) << "external data file feature not supported" << dendl
;
1117 on_finish
->complete(-ENOTSUP
);
1120 if ((header
.incompatible_features
& QCOW2_INCOMPAT_EXTL2
) != 0) {
1121 lderr(cct
) << "extended L2 table feature not supported" << dendl
;
1122 on_finish
->complete(-ENOTSUP
);
1126 header
.incompatible_features
&= ~QCOW2_INCOMPAT_MASK
;
1127 if (header
.incompatible_features
!= 0) {
1128 lderr(cct
) << "unknown incompatible feature enabled" << dendl
;
1129 on_finish
->complete(-EINVAL
);
1133 m_backing_file_offset
= header
.backing_file_offset
;
1134 m_backing_file_size
= header
.backing_file_size
;
1136 m_cluster_bits
= header
.cluster_bits
;
1137 m_cluster_size
= 1UL << header
.cluster_bits
;
1138 m_cluster_offset_mask
= (1ULL << (63 - header
.cluster_bits
)) - 1;
1139 m_cluster_mask
= ~(QCOW_OFLAG_COMPRESSED
| QCOW_OFLAG_COPIED
);
1141 // L2 table is fixed a (1) cluster block to hold 8-byte (3 bit) offsets
1142 m_l2_bits
= m_cluster_bits
- 3;
1143 m_l2_size
= (1UL << m_l2_bits
);
1145 m_l1_shift
= m_cluster_bits
+ m_l2_bits
;
1146 m_l1_table
.size
= (m_size
+ (1LL << m_l1_shift
) - 1) >> m_l1_shift
;
1147 m_l1_table_offset
= header
.l1_table_offset
;
1148 if (m_size
> (std::numeric_limits
<uint64_t>::max() - (1ULL << m_l1_shift
)) ||
1150 (std::numeric_limits
<int32_t>::max() / sizeof(uint64_t))) {
1151 lderr(cct
) << "image size too big: " << m_size
<< dendl
;
1152 on_finish
->complete(-EINVAL
);
1154 } else if (m_l1_table
.size
> header
.l1_size
) {
1155 lderr(cct
) << "invalid L1 table size in header (" << header
.l1_size
1156 << " < " << m_l1_table
.size
<< ")" << dendl
;
1157 on_finish
->complete(-EINVAL
);
1161 m_snapshot_count
= header
.nb_snapshots
;
1162 m_snapshots_offset
= header
.snapshots_offset
;
1164 ldout(cct
, 15) << "size=" << m_size
<< ", "
1165 << "cluster_bits=" << m_cluster_bits
<< ", "
1166 << "l1_table_offset=" << m_l1_table_offset
<< ", "
1167 << "snapshot_count=" << m_snapshot_count
<< ", "
1168 << "snapshots_offset=" << m_snapshots_offset
<< dendl
;
1170 // allocate memory for L1 table and L2 + cluster caches
1171 m_l2_table_cache
= std::make_unique
<L2TableCache
>(this);
1172 m_cluster_cache
= std::make_unique
<ClusterCache
>(this);
1174 read_snapshot(on_finish
);
1177 template <typename I
>
1178 void QCOWFormat
<I
>::read_snapshot(Context
* on_finish
) {
1179 if (m_snapshots_offset
== 0 || m_snapshots
.size() == m_snapshot_count
) {
1180 read_l1_table(on_finish
);
1184 // header is always aligned on 8 byte boundary
1185 m_snapshots_offset
= p2roundup(m_snapshots_offset
, static_cast<uint64_t>(8));
1187 auto cct
= m_image_ctx
->cct
;
1188 ldout(cct
, 10) << "snap_id=" << (m_snapshots
.size() + 1) << ", "
1189 << "offset=" << m_snapshots_offset
<< dendl
;
1191 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1192 handle_read_snapshot(r
, on_finish
); });
1194 m_stream
->read({{m_snapshots_offset
, sizeof(QCowSnapshotHeader
)}}, &m_bl
,
1198 template <typename I
>
1199 void QCOWFormat
<I
>::handle_read_snapshot(int r
, Context
* on_finish
) {
1200 auto cct
= m_image_ctx
->cct
;
1201 ldout(cct
, 10) << "r=" << r
<< ", "
1202 << "index=" << m_snapshots
.size() << dendl
;
1205 lderr(cct
) << "failed to read QCOW2 snapshot header: " << cpp_strerror(r
)
1207 on_finish
->complete(r
);
1211 m_snapshots_offset
+= m_bl
.length();
1212 auto header
= *reinterpret_cast<QCowSnapshotHeader
*>(m_bl
.c_str());
1214 auto& snapshot
= m_snapshots
[m_snapshots
.size() + 1];
1215 snapshot
.id
.resize(be16toh(header
.id_str_size
));
1216 snapshot
.name
.resize(be16toh(header
.name_size
));
1217 snapshot
.l1_table_offset
= be64toh(header
.l1_table_offset
);
1218 snapshot
.l1_table
.size
= be32toh(header
.l1_size
);
1219 snapshot
.timestamp
.sec_ref() = be32toh(header
.date_sec
);
1220 snapshot
.timestamp
.nsec_ref() = be32toh(header
.date_nsec
);
1221 snapshot
.extra_data_size
= be32toh(header
.extra_data_size
);
1223 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1224 << "id_str_len=" << snapshot
.id
.size() << ", "
1225 << "name_str_len=" << snapshot
.name
.size() << ", "
1226 << "l1_table_offset=" << snapshot
.l1_table_offset
<< ", "
1227 << "l1_size=" << snapshot
.l1_table
.size
<< ", "
1228 << "extra_data_size=" << snapshot
.extra_data_size
<< dendl
;
1230 read_snapshot_extra(on_finish
);
1233 template <typename I
>
1234 void QCOWFormat
<I
>::read_snapshot_extra(Context
* on_finish
) {
1235 ceph_assert(!m_snapshots
.empty());
1236 auto& snapshot
= m_snapshots
.rbegin()->second
;
1238 uint32_t length
= snapshot
.extra_data_size
+
1239 snapshot
.id
.size() +
1240 snapshot
.name
.size();
1243 uuid_gen
.generate_random();
1244 snapshot
.name
= uuid_gen
.to_string();
1246 read_snapshot(on_finish
);
1250 auto cct
= m_image_ctx
->cct
;
1251 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1252 << "offset=" << m_snapshots_offset
<< ", "
1253 << "length=" << length
<< dendl
;
1255 auto offset
= m_snapshots_offset
;
1256 m_snapshots_offset
+= length
;
1258 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1259 handle_read_snapshot_extra(r
, on_finish
); });
1261 m_stream
->read({{offset
, length
}}, &m_bl
, ctx
);
1264 template <typename I
>
1265 void QCOWFormat
<I
>::handle_read_snapshot_extra(int r
, Context
* on_finish
) {
1266 ceph_assert(!m_snapshots
.empty());
1267 auto& snapshot
= m_snapshots
.rbegin()->second
;
1269 auto cct
= m_image_ctx
->cct
;
1270 ldout(cct
, 10) << "r=" << r
<< ", "
1271 << "snap_id=" << m_snapshots
.size() << dendl
;
1274 lderr(cct
) << "failed to read QCOW2 snapshot header extra: "
1275 << cpp_strerror(r
) << dendl
;
1276 on_finish
->complete(r
);
1280 if (snapshot
.extra_data_size
>=
1281 offsetof(QCowSnapshotExtraData
, disk_size
) + sizeof(uint64_t)) {
1282 auto extra
= reinterpret_cast<const QCowSnapshotExtraData
*>(m_bl
.c_str());
1283 snapshot
.size
= be64toh(extra
->disk_size
);
1285 snapshot
.size
= m_size
;
1288 auto data
= reinterpret_cast<const char*>(m_bl
.c_str());
1289 data
+= snapshot
.extra_data_size
;
1291 if (!snapshot
.id
.empty()) {
1292 snapshot
.id
= std::string(data
, snapshot
.id
.size());
1293 data
+= snapshot
.id
.size();
1296 if (!snapshot
.name
.empty()) {
1297 snapshot
.name
= std::string(data
, snapshot
.name
.size());
1298 data
+= snapshot
.name
.size();
1301 uuid_gen
.generate_random();
1302 snapshot
.name
= uuid_gen
.to_string();
1305 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1306 << "name=" << snapshot
.name
<< ", "
1307 << "size=" << snapshot
.size
<< dendl
;
1308 read_snapshot_l1_table(on_finish
);
1311 template <typename I
>
1312 void QCOWFormat
<I
>::read_snapshot_l1_table(Context
* on_finish
) {
1313 ceph_assert(!m_snapshots
.empty());
1314 auto& snapshot
= m_snapshots
.rbegin()->second
;
1316 auto cct
= m_image_ctx
->cct
;
1317 ldout(cct
, 10) << "snap_id=" << m_snapshots
.size() << ", "
1318 << "l1_table_offset=" << snapshot
.l1_table_offset
1321 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1322 handle_read_snapshot_l1_table(r
, on_finish
); });
1323 m_stream
->read({{snapshot
.l1_table_offset
,
1324 snapshot
.l1_table
.size
* sizeof(uint64_t)}},
1325 &snapshot
.l1_table
.bl
, ctx
);
1328 template <typename I
>
1329 void QCOWFormat
<I
>::handle_read_snapshot_l1_table(int r
, Context
* on_finish
) {
1330 ceph_assert(!m_snapshots
.empty());
1331 auto& snapshot
= m_snapshots
.rbegin()->second
;
1333 auto cct
= m_image_ctx
->cct
;
1334 ldout(cct
, 10) << "r=" << r
<< ", "
1335 << "snap_id=" << m_snapshots
.size() << dendl
;
1338 lderr(cct
) << "failed to read snapshot L1 table: " << cpp_strerror(r
)
1340 on_finish
->complete(r
);
1344 snapshot
.l1_table
.decode();
1345 read_snapshot(on_finish
);
1348 template <typename I
>
1349 void QCOWFormat
<I
>::read_l1_table(Context
* on_finish
) {
1350 auto cct
= m_image_ctx
->cct
;
1351 ldout(cct
, 10) << dendl
;
1353 auto ctx
= new LambdaContext([this, on_finish
](int r
) {
1354 handle_read_l1_table(r
, on_finish
); });
1355 m_stream
->read({{m_l1_table_offset
,
1356 m_l1_table
.size
* sizeof(uint64_t)}},
1357 &m_l1_table
.bl
, ctx
);
1360 template <typename I
>
1361 void QCOWFormat
<I
>::handle_read_l1_table(int r
, Context
* on_finish
) {
1362 auto cct
= m_image_ctx
->cct
;
1363 ldout(cct
, 10) << "r=" << r
<< dendl
;
1366 lderr(cct
) << "failed to read L1 table: " << cpp_strerror(r
) << dendl
;
1367 on_finish
->complete(r
);
1371 m_l1_table
.decode();
1372 read_backing_file(on_finish
);
1375 template <typename I
>
1376 void QCOWFormat
<I
>::read_backing_file(Context
* on_finish
) {
1377 if (m_backing_file_offset
== 0 || m_backing_file_size
== 0) {
1378 // all data is within the specified file
1379 on_finish
->complete(0);
1383 auto cct
= m_image_ctx
->cct
;
1384 ldout(cct
, 10) << dendl
;
1386 // TODO add support for backing files
1387 on_finish
->complete(-ENOTSUP
);
1390 template <typename I
>
1391 void QCOWFormat
<I
>::close(Context
* on_finish
) {
1392 auto cct
= m_image_ctx
->cct
;
1393 ldout(cct
, 10) << dendl
;
1395 m_stream
->close(on_finish
);
1398 template <typename I
>
1399 void QCOWFormat
<I
>::get_snapshots(SnapInfos
* snap_infos
, Context
* on_finish
) {
1400 auto cct
= m_image_ctx
->cct
;
1401 ldout(cct
, 10) << dendl
;
1403 snap_infos
->clear();
1404 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1405 SnapInfo
snap_info(snapshot
.name
, cls::rbd::UserSnapshotNamespace
{},
1406 snapshot
.size
, {}, 0, 0, snapshot
.timestamp
);
1407 snap_infos
->emplace(snap_id
, snap_info
);
1410 on_finish
->complete(0);
1413 template <typename I
>
1414 void QCOWFormat
<I
>::get_image_size(uint64_t snap_id
, uint64_t* size
,
1415 Context
* on_finish
) {
1416 auto cct
= m_image_ctx
->cct
;
1417 ldout(cct
, 10) << "snap_id=" << snap_id
<< dendl
;
1419 if (snap_id
== CEPH_NOSNAP
) {
1422 auto snapshot_it
= m_snapshots
.find(snap_id
);
1423 if (snapshot_it
== m_snapshots
.end()) {
1424 on_finish
->complete(-ENOENT
);
1428 auto& snapshot
= snapshot_it
->second
;
1429 *size
= snapshot
.size
;
1432 on_finish
->complete(0);
1435 template <typename I
>
1436 bool QCOWFormat
<I
>::read(
1437 io::AioCompletion
* aio_comp
, uint64_t snap_id
, io::Extents
&& image_extents
,
1438 io::ReadResult
&& read_result
, int op_flags
, int read_flags
,
1439 const ZTracer::Trace
&parent_trace
) {
1440 auto cct
= m_image_ctx
->cct
;
1441 ldout(cct
, 20) << "snap_id=" << snap_id
<< ", "
1442 << "image_extents=" << image_extents
<< dendl
;
1444 const LookupTable
* l1_table
= nullptr;
1445 if (snap_id
== CEPH_NOSNAP
) {
1446 l1_table
= &m_l1_table
;
1448 auto snapshot_it
= m_snapshots
.find(snap_id
);
1449 if (snapshot_it
== m_snapshots
.end()) {
1450 aio_comp
->fail(-ENOENT
);
1454 auto& snapshot
= snapshot_it
->second
;
1455 l1_table
= &snapshot
.l1_table
;
1458 aio_comp
->read_result
= std::move(read_result
);
1459 aio_comp
->read_result
.set_image_extents(image_extents
);
1461 auto read_request
= new ReadRequest(this, aio_comp
, l1_table
,
1462 std::move(image_extents
));
1463 read_request
->send();
1468 template <typename I
>
1469 void QCOWFormat
<I
>::list_snaps(io::Extents
&& image_extents
,
1470 io::SnapIds
&& snap_ids
, int list_snaps_flags
,
1471 io::SnapshotDelta
* snapshot_delta
,
1472 const ZTracer::Trace
&parent_trace
,
1473 Context
* on_finish
) {
1474 auto cct
= m_image_ctx
->cct
;
1475 ldout(cct
, 20) << "image_extents=" << image_extents
<< dendl
;
1477 ClusterExtents cluster_extents
;
1478 populate_cluster_extents(cct
, m_cluster_size
, image_extents
,
1481 // map L1 table indexes to cluster extents
1482 std::map
<uint64_t, ClusterExtents
> l1_cluster_extents
;
1483 for (auto& cluster_extent
: cluster_extents
) {
1484 uint32_t l1_table_index
= cluster_extent
.image_offset
>> m_l1_shift
;
1485 auto& l1_cluster_extent
= l1_cluster_extents
[l1_table_index
];
1486 l1_cluster_extent
.reserve(cluster_extents
.size());
1487 l1_cluster_extent
.push_back(cluster_extent
);
1490 std::map
<uint64_t, const LookupTable
*> snap_id_to_l1_table
;
1491 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1492 snap_id_to_l1_table
[snap_id
] = &snapshot
.l1_table
;
1494 snap_id_to_l1_table
[CEPH_NOSNAP
] = &m_l1_table
;
1496 on_finish
= new LambdaContext([this, image_extents
,
1497 snap_ids
=std::move(snap_ids
),
1498 snapshot_delta
, on_finish
](int r
) mutable {
1499 handle_list_snaps(r
, std::move(image_extents
), std::move(snap_ids
),
1500 snapshot_delta
, on_finish
);
1503 auto gather_ctx
= new C_Gather(cct
, on_finish
);
1505 for (auto& [l1_table_index
, cluster_extents
] : l1_cluster_extents
) {
1506 auto list_snaps_request
= new ListSnapsRequest(
1507 this, l1_table_index
, std::move(cluster_extents
), snap_id_to_l1_table
,
1508 snapshot_delta
, gather_ctx
->new_sub());
1509 list_snaps_request
->send();
1512 gather_ctx
->activate();
1515 template <typename I
>
1516 void QCOWFormat
<I
>::handle_list_snaps(int r
, io::Extents
&& image_extents
,
1517 io::SnapIds
&& snap_ids
,
1518 io::SnapshotDelta
* snapshot_delta
,
1519 Context
* on_finish
) {
1520 auto cct
= m_image_ctx
->cct
;
1521 ldout(cct
, 20) << "r=" << r
<< ", "
1522 << "snapshot_delta=" << *snapshot_delta
<< dendl
;
1524 std::optional
<uint64_t> previous_size
= std::nullopt
;
1525 for (auto& [snap_id
, snapshot
] : m_snapshots
) {
1526 auto sparse_extents
= &(*snapshot_delta
)[{snap_id
, snap_id
}];
1527 util::zero_shrunk_snapshot(cct
, image_extents
, snap_id
, snapshot
.size
,
1528 &previous_size
, sparse_extents
);
1531 auto sparse_extents
= &(*snapshot_delta
)[{CEPH_NOSNAP
, CEPH_NOSNAP
}];
1532 util::zero_shrunk_snapshot(cct
, image_extents
, CEPH_NOSNAP
, m_size
,
1533 &previous_size
, sparse_extents
);
1535 util::merge_snapshot_delta(snap_ids
, snapshot_delta
);
1536 on_finish
->complete(r
);
1539 } // namespace migration
1540 } // namespace librbd
1542 template class librbd::migration::QCOWFormat
<librbd::ImageCtx
>;