1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "crimson/os/seastore/cache.h"
5 #include "crimson/common/log.h"
7 // included for get_extent_by_type
8 #include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
9 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
10 #include "crimson/os/seastore/onode_manager/simple-fltree/onode_block.h"
11 #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h"
12 #include "test/crimson/seastore/test_block.h"
15 seastar::logger
& logger() {
16 return crimson::get_logger(ceph_subsys_filestore
);
20 namespace crimson::os::seastore
{
22 Cache::Cache(SegmentManager
&segment_manager
) :
23 segment_manager(segment_manager
) {}
27 for (auto &i
: extents
) {
28 logger().error("~Cache: extent {} still alive", i
);
30 ceph_assert(extents
.empty());
33 Cache::retire_extent_ret
Cache::retire_extent_if_cached(
34 Transaction
&t
, paddr_t addr
)
36 if (auto ext
= t
.write_set
.find_offset(addr
); ext
!= t
.write_set
.end()) {
37 logger().debug("{}: found {} in t.write_set", __func__
, addr
);
38 t
.add_to_retired_set(CachedExtentRef(&*ext
));
39 return retire_extent_ertr::now();
40 } else if (auto iter
= extents
.find_offset(addr
);
41 iter
!= extents
.end()) {
42 auto ret
= CachedExtentRef(&*iter
);
43 return ret
->wait_io().then([&t
, ret
=std::move(ret
)]() mutable {
44 t
.add_to_retired_set(ret
);
45 return retire_extent_ertr::now();
48 return retire_extent_ertr::now();
52 void Cache::add_extent(CachedExtentRef ref
)
54 assert(ref
->is_valid());
57 if (ref
->is_dirty()) {
60 ceph_assert(!ref
->primary_ref_list_hook
.is_linked());
62 logger().debug("add_extent: {}", *ref
);
65 void Cache::mark_dirty(CachedExtentRef ref
)
67 if (ref
->is_dirty()) {
68 assert(ref
->primary_ref_list_hook
.is_linked());
73 ref
->state
= CachedExtent::extent_state_t::DIRTY
;
75 logger().debug("mark_dirty: {}", *ref
);
78 void Cache::add_to_dirty(CachedExtentRef ref
)
80 assert(ref
->is_valid());
81 assert(!ref
->primary_ref_list_hook
.is_linked());
82 intrusive_ptr_add_ref(&*ref
);
83 dirty
.push_back(*ref
);
86 void Cache::remove_extent(CachedExtentRef ref
)
88 logger().debug("remove_extent: {}", *ref
);
89 assert(ref
->is_valid());
92 if (ref
->is_dirty()) {
93 ceph_assert(ref
->primary_ref_list_hook
.is_linked());
94 dirty
.erase(dirty
.s_iterator_to(*ref
));
95 intrusive_ptr_release(&*ref
);
97 ceph_assert(!ref
->primary_ref_list_hook
.is_linked());
101 void Cache::replace_extent(CachedExtentRef next
, CachedExtentRef prev
)
103 assert(next
->get_paddr() == prev
->get_paddr());
104 assert(next
->version
== prev
->version
+ 1);
105 extents
.replace(*next
, *prev
);
107 if (prev
->is_dirty()) {
108 ceph_assert(prev
->primary_ref_list_hook
.is_linked());
109 auto prev_it
= dirty
.iterator_to(*prev
);
110 dirty
.insert(prev_it
, *next
);
111 dirty
.erase(prev_it
);
112 intrusive_ptr_release(&*prev
);
113 intrusive_ptr_add_ref(&*next
);
119 CachedExtentRef
Cache::alloc_new_extent_by_type(
120 Transaction
&t
, ///< [in, out] current transaction
121 extent_types_t type
, ///< [in] type tag
122 segment_off_t length
///< [in] length
126 case extent_types_t::ROOT
:
127 assert(0 == "ROOT is never directly alloc'd");
128 return CachedExtentRef();
129 case extent_types_t::LADDR_INTERNAL
:
130 return alloc_new_extent
<lba_manager::btree::LBAInternalNode
>(t
, length
);
131 case extent_types_t::LADDR_LEAF
:
132 return alloc_new_extent
<lba_manager::btree::LBALeafNode
>(t
, length
);
133 case extent_types_t::ONODE_BLOCK
:
134 return alloc_new_extent
<OnodeBlock
>(t
, length
);
135 case extent_types_t::EXTMAP_INNER
:
136 return alloc_new_extent
<extentmap_manager::ExtMapInnerNode
>(t
, length
);
137 case extent_types_t::EXTMAP_LEAF
:
138 return alloc_new_extent
<extentmap_manager::ExtMapLeafNode
>(t
, length
);
139 case extent_types_t::TEST_BLOCK
:
140 return alloc_new_extent
<TestBlock
>(t
, length
);
141 case extent_types_t::TEST_BLOCK_PHYSICAL
:
142 return alloc_new_extent
<TestBlockPhysical
>(t
, length
);
143 case extent_types_t::NONE
: {
144 ceph_assert(0 == "NONE is an invalid extent type");
145 return CachedExtentRef();
148 ceph_assert(0 == "impossible");
149 return CachedExtentRef();
153 CachedExtentRef
Cache::duplicate_for_write(
159 auto ret
= i
->duplicate_for_write();
160 if (ret
->get_type() == extent_types_t::ROOT
) {
161 // root must be loaded before mutate
163 t
.root
= ret
->cast
<RootBlock
>();
165 ret
->last_committed_crc
= i
->last_committed_crc
;
166 ret
->prior_instance
= i
;
167 t
.add_mutated_extent(ret
);
171 ret
->state
= CachedExtent::extent_state_t::MUTATION_PENDING
;
172 logger().debug("Cache::duplicate_for_write: {} -> {}", *i
, *ret
);
176 std::optional
<record_t
> Cache::try_construct_record(Transaction
&t
)
178 // First, validate read set
179 for (auto &i
: t
.read_set
) {
180 if (i
->state
== CachedExtent::extent_state_t::INVALID
)
188 // Add new copy of mutated blocks, set_io_wait to block until written
189 record
.deltas
.reserve(t
.mutated_block_list
.size());
190 for (auto &i
: t
.mutated_block_list
) {
191 if (!i
->is_valid()) {
192 logger().debug("try_construct_record: ignoring invalid {}", *i
);
195 logger().debug("try_construct_record: mutating {}", *i
);
197 assert(i
->prior_instance
);
198 replace_extent(i
, i
->prior_instance
);
203 assert(i
->get_version() > 0);
204 auto final_crc
= i
->get_crc32c();
205 record
.deltas
.push_back(
210 ? i
->cast
<LogicalCachedExtent
>()->get_laddr()
212 i
->last_committed_crc
,
214 (segment_off_t
)i
->get_length(),
215 i
->get_version() - 1,
218 i
->last_committed_crc
= final_crc
;
223 "{}: writing out root delta for {}",
226 record
.deltas
.push_back(
228 extent_types_t::ROOT
,
234 t
.root
->get_version() - 1,
239 // Transaction is now a go, set up in-memory cache state
240 // invalidate now invalid blocks
241 for (auto &i
: t
.retired_set
) {
242 logger().debug("try_construct_record: retiring {}", *i
);
243 ceph_assert(i
->is_valid());
245 i
->state
= CachedExtent::extent_state_t::INVALID
;
248 record
.extents
.reserve(t
.fresh_block_list
.size());
249 for (auto &i
: t
.fresh_block_list
) {
250 logger().debug("try_construct_record: fresh block {}", *i
);
253 bl
.append(i
->get_bptr());
254 if (i
->get_type() == extent_types_t::ROOT
) {
255 assert(0 == "ROOT never gets written as a fresh block");
258 assert(bl
.length() == i
->get_length());
259 record
.extents
.push_back(extent_t
{
262 ? i
->cast
<LogicalCachedExtent
>()->get_laddr()
268 return std::make_optional
<record_t
>(std::move(record
));
271 void Cache::complete_commit(
273 paddr_t final_block_start
,
275 SegmentCleaner
*cleaner
)
280 root
->state
= CachedExtent::extent_state_t::DIRTY
;
281 root
->on_delta_write(final_block_start
);
282 root
->dirty_from
= seq
;
284 logger().debug("complete_commit: new root {}", *t
.root
);
287 for (auto &i
: t
.fresh_block_list
) {
288 i
->set_paddr(final_block_start
.add_relative(i
->get_paddr()));
289 i
->last_committed_crc
= i
->get_crc32c();
290 i
->on_initial_write();
292 if (!i
->is_valid()) {
293 logger().debug("complete_commit: invalid {}", *i
);
297 i
->state
= CachedExtent::extent_state_t::CLEAN
;
298 logger().debug("complete_commit: fresh {}", *i
);
301 cleaner
->mark_space_used(
307 // Add new copy of mutated blocks, set_io_wait to block until written
308 for (auto &i
: t
.mutated_block_list
) {
309 logger().debug("complete_commit: mutated {}", *i
);
310 assert(i
->prior_instance
);
311 i
->on_delta_write(final_block_start
);
312 i
->prior_instance
= CachedExtentRef();
313 if (!i
->is_valid()) {
314 logger().debug("complete_commit: not dirtying invalid {}", *i
);
317 i
->state
= CachedExtent::extent_state_t::DIRTY
;
318 if (i
->version
== 1) {
324 for (auto &i
: t
.retired_set
) {
325 cleaner
->mark_space_free(
331 for (auto &i
: t
.mutated_block_list
) {
338 // initial creation will do mkfs followed by mount each of which calls init
342 root
= new RootBlock();
343 root
->state
= CachedExtent::extent_state_t::DIRTY
;
347 Cache::mkfs_ertr::future
<> Cache::mkfs(Transaction
&t
)
349 return get_root(t
).safe_then([this, &t
](auto croot
) {
350 duplicate_for_write(t
, croot
);
351 return mkfs_ertr::now();
355 Cache::close_ertr::future
<> Cache::close()
358 for (auto i
= dirty
.begin(); i
!= dirty
.end(); ) {
361 intrusive_ptr_release(ptr
);
363 return close_ertr::now();
366 Cache::replay_delta_ret
368 journal_seq_t journal_seq
,
370 const delta_info_t
&delta
)
372 if (delta
.type
== extent_types_t::ROOT
) {
373 logger().debug("replay_delta: found root delta");
374 root
->apply_delta_and_adjust_crc(record_base
, delta
.bl
);
375 root
->dirty_from
= journal_seq
;
376 return replay_delta_ertr::now();
378 auto get_extent_if_cached
= [this](paddr_t addr
)
379 -> replay_delta_ertr::future
<CachedExtentRef
> {
380 auto retiter
= extents
.find_offset(addr
);
381 if (retiter
!= extents
.end()) {
382 return replay_delta_ertr::make_ready_future
<CachedExtentRef
>(&*retiter
);
384 return replay_delta_ertr::make_ready_future
<CachedExtentRef
>();
387 auto extent_fut
= delta
.pversion
== 0 ?
393 get_extent_if_cached(
395 return extent_fut
.safe_then([=, &delta
](auto extent
) {
397 assert(delta
.pversion
> 0);
399 "replay_delta: replaying {}, extent not present so delta is obsolete",
405 "replay_delta: replaying {} on {}",
409 assert(extent
->version
== delta
.pversion
);
411 assert(extent
->last_committed_crc
== delta
.prev_crc
);
412 extent
->apply_delta_and_adjust_crc(record_base
, delta
.bl
);
413 assert(extent
->last_committed_crc
== delta
.final_crc
);
415 if (extent
->version
== 0) {
416 extent
->dirty_from
= journal_seq
;
424 Cache::get_next_dirty_extents_ret
Cache::get_next_dirty_extents(
427 std::vector
<CachedExtentRef
> ret
;
428 for (auto i
= dirty
.begin(); i
!= dirty
.end(); ++i
) {
429 CachedExtentRef cand
;
430 if (i
->dirty_from
< seq
) {
431 assert(ret
.empty() || ret
.back()->dirty_from
<= i
->dirty_from
);
437 return seastar::do_with(
440 return seastar::do_for_each(
444 "get_next_dirty_extents: waiting on {}",
446 return ext
->wait_io();
447 }).then([&ret
]() mutable {
448 return seastar::make_ready_future
<std::vector
<CachedExtentRef
>>(
454 Cache::get_root_ret
Cache::get_root(Transaction
&t
)
458 get_root_ertr::ready_future_marker
{},
462 return ret
->wait_io().then([ret
, &t
] {
465 get_root_ertr::ready_future_marker
{},
471 using StagedOnodeBlock
= crimson::os::seastore::onode::SeastoreNodeExtent
;
473 Cache::get_extent_ertr::future
<CachedExtentRef
> Cache::get_extent_by_type(
477 segment_off_t length
)
481 case extent_types_t::ROOT
:
482 assert(0 == "ROOT is never directly read");
483 return get_extent_ertr::make_ready_future
<CachedExtentRef
>();
484 case extent_types_t::LADDR_INTERNAL
:
485 return get_extent
<lba_manager::btree::LBAInternalNode
>(offset
, length
486 ).safe_then([](auto extent
) {
487 return CachedExtentRef(extent
.detach(), false /* add_ref */);
489 case extent_types_t::LADDR_LEAF
:
490 return get_extent
<lba_manager::btree::LBALeafNode
>(offset
, length
491 ).safe_then([](auto extent
) {
492 return CachedExtentRef(extent
.detach(), false /* add_ref */);
494 case extent_types_t::EXTMAP_INNER
:
495 return get_extent
<extentmap_manager::ExtMapInnerNode
>(offset
, length
496 ).safe_then([](auto extent
) {
497 return CachedExtentRef(extent
.detach(), false /* add_ref */);
499 case extent_types_t::EXTMAP_LEAF
:
500 return get_extent
<extentmap_manager::ExtMapLeafNode
>(offset
, length
501 ).safe_then([](auto extent
) {
502 return CachedExtentRef(extent
.detach(), false /* add_ref */);
504 case extent_types_t::ONODE_BLOCK
:
505 return get_extent
<OnodeBlock
>(offset
, length
506 ).safe_then([](auto extent
) {
507 return CachedExtentRef(extent
.detach(), false /* add_ref */);
509 case extent_types_t::ONODE_BLOCK_STAGED
:
510 return get_extent
<StagedOnodeBlock
>(offset
, length
511 ).safe_then([](auto extent
) {
512 return CachedExtentRef(extent
.detach(), false /* add_ref */);
514 case extent_types_t::TEST_BLOCK
:
515 return get_extent
<TestBlock
>(offset
, length
516 ).safe_then([](auto extent
) {
517 return CachedExtentRef(extent
.detach(), false /* add_ref */);
519 case extent_types_t::TEST_BLOCK_PHYSICAL
:
520 return get_extent
<TestBlockPhysical
>(offset
, length
521 ).safe_then([](auto extent
) {
522 return CachedExtentRef(extent
.detach(), false /* add_ref */);
524 case extent_types_t::NONE
: {
525 ceph_assert(0 == "NONE is an invalid extent type");
526 return get_extent_ertr::make_ready_future
<CachedExtentRef
>();
529 ceph_assert(0 == "impossible");
530 return get_extent_ertr::make_ready_future
<CachedExtentRef
>();
532 }().safe_then([laddr
](CachedExtentRef e
) {
533 assert(e
->is_logical() == (laddr
!= L_ADDR_NULL
));
534 if (e
->is_logical()) {
535 e
->cast
<LogicalCachedExtent
>()->set_laddr(laddr
);
537 return get_extent_ertr::make_ready_future
<CachedExtentRef
>(e
);