1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
8 #include <boost/algorithm/string/trim.hpp>
9 #include <fmt/format.h>
10 #include <fmt/ostream.h>
12 #include <seastar/core/file.hh>
13 #include <seastar/core/fstream.hh>
14 #include <seastar/core/shared_mutex.hh>
16 #include "common/safe_io.h"
17 #include "include/stringify.h"
18 #include "os/Transaction.h"
20 #include "crimson/common/buffer_io.h"
22 #include "crimson/os/futurized_collection.h"
24 #include "crimson/os/seastore/segment_cleaner.h"
25 #include "crimson/os/seastore/segment_manager.h"
26 #include "crimson/os/seastore/segment_manager/block.h"
27 #include "crimson/os/seastore/collection_manager/flat_collection_manager.h"
28 #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
29 #include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
30 #include "crimson/os/seastore/segment_manager/ephemeral.h"
31 #include "crimson/os/seastore/onode_manager.h"
32 #include "crimson/os/seastore/object_data_handler.h"
36 using crimson::common::local_conf
;
38 template <> struct fmt::formatter
<crimson::os::seastore::SeaStore::op_type_t
>
39 : fmt::formatter
<std::string_view
> {
40 using op_type_t
= crimson::os::seastore::SeaStore::op_type_t
;
41 // parse is inherited from formatter<string_view>.
42 template <typename FormatContext
>
43 auto format(op_type_t op
, FormatContext
& ctx
) {
44 std::string_view name
= "unknown";
46 case op_type_t::TRANSACTION
:
52 case op_type_t::WRITE
:
55 case op_type_t::GET_ATTR
:
58 case op_type_t::GET_ATTRS
:
64 case op_type_t::OMAP_GET_VALUES
:
65 name
= "omap_get_values";
67 case op_type_t::OMAP_LIST
:
74 return formatter
<string_view
>::format(name
, ctx
);
80 namespace crimson::os::seastore
{
82 class FileMDStore final
: public SeaStore::MDStore
{
85 FileMDStore(const std::string
& root
) : root(root
) {}
87 write_meta_ret
write_meta(
88 const std::string
& key
, const std::string
& value
) final
{
89 std::string path
= fmt::format("{}/{}", root
, key
);
91 bl
.append(value
+ "\n");
92 return crimson::write_file(std::move(bl
), path
);
95 read_meta_ret
read_meta(const std::string
& key
) final
{
96 std::string path
= fmt::format("{}/{}", root
, key
);
97 return seastar::file_exists(
99 ).then([path
] (bool exist
) {
101 return crimson::read_file(path
)
102 .then([] (auto tmp_buf
) {
103 std::string v
= {tmp_buf
.get(), tmp_buf
.size()};
104 std::size_t pos
= v
.find("\n");
105 std::string str
= v
.substr(0, pos
);
106 return seastar::make_ready_future
<std::optional
<std::string
>>(str
);
109 return seastar::make_ready_future
<std::optional
<std::string
>>(std::nullopt
);
115 using crimson::common::get_conf
;
118 const std::string
& root
,
120 SegmentManagerRef sm
,
121 TransactionManagerRef tm
,
122 CollectionManagerRef cm
,
125 mdstore(std::move(mdstore
)),
126 segment_manager(std::move(sm
)),
127 transaction_manager(std::move(tm
)),
128 collection_manager(std::move(cm
)),
129 onode_manager(std::move(om
)),
131 get_conf
<uint64_t>("seastore_default_max_object_size"))
137 const std::string
& root
,
138 SegmentManagerRef sm
,
139 TransactionManagerRef tm
,
140 CollectionManagerRef cm
,
144 std::make_unique
<FileMDStore
>(root
),
145 std::move(sm
), std::move(tm
), std::move(cm
), std::move(om
)) {}
147 SeaStore::~SeaStore() = default;
149 void SeaStore::register_metrics()
151 namespace sm
= seastar::metrics
;
152 using op_type_t
= SeaStore::op_type_t
;
153 auto lat_label
= sm::label("latency");
154 std::pair
<op_type_t
, sm::label_instance
> labels_by_op_type
[] = {
155 {op_type_t::TRANSACTION
, lat_label("TRANSACTION")},
156 {op_type_t::READ
, lat_label("READ")},
157 {op_type_t::WRITE
, lat_label("WRITE")},
158 {op_type_t::GET_ATTR
, lat_label("GET_ATTR")},
159 {op_type_t::GET_ATTRS
, lat_label("GET_ATTRS")},
160 {op_type_t::STAT
, lat_label("STAT")},
161 {op_type_t::OMAP_GET_VALUES
, lat_label("OMAP_GET_VALUES")},
162 {op_type_t::OMAP_LIST
, lat_label("OMAP_LIST")},
165 for (auto& [op_type
, label
] : labels_by_op_type
) {
166 auto desc
= fmt::format("latency of seastore operation (optype={})",
172 "op_lat", [this, op_type
=op_type
] {
173 return get_latency(op_type
);
175 sm::description(desc
),
183 seastar::future
<> SeaStore::stop()
185 return seastar::now();
188 SeaStore::mount_ertr::future
<> SeaStore::mount()
190 return segment_manager
->mount(
192 transaction_manager
->add_segment_manager(segment_manager
.get());
193 auto sec_devices
= segment_manager
->get_secondary_devices();
194 return crimson::do_for_each(sec_devices
, [this](auto& device_entry
) {
195 device_id_t id
= device_entry
.first
;
196 magic_t magic
= device_entry
.second
.magic
;
197 device_type_t dtype
= device_entry
.second
.dtype
;
198 std::ostringstream oss
;
199 oss
<< root
<< "/block." << dtype
<< "." << std::to_string(id
);
200 auto sm
= std::make_unique
<
201 segment_manager::block::BlockSegmentManager
>(oss
.str());
202 return sm
->mount().safe_then([this, sm
=std::move(sm
), magic
]() mutable {
203 assert(sm
->get_magic() == magic
);
204 transaction_manager
->add_segment_manager(sm
.get());
205 secondaries
.emplace_back(std::move(sm
));
206 return seastar::now();
209 }).safe_then([this] {
210 return transaction_manager
->mount();
212 crimson::ct_error::assert_all
{
213 "Invalid error in SeaStore::mount"
218 seastar::future
<> SeaStore::umount()
220 return transaction_manager
->close(
222 return crimson::do_for_each(
224 [](auto& sm
) -> SegmentManager::close_ertr::future
<> {
227 }).safe_then([this] {
228 return segment_manager
->close();
230 crimson::ct_error::assert_all
{
231 "Invalid error in SeaStore::umount"
236 seastar::future
<> SeaStore::write_fsid(uuid_d new_osd_fsid
)
238 LOG_PREFIX(SeaStore::write_fsid
);
239 return read_meta("fsid").then([this, FNAME
, new_osd_fsid
] (auto tuple
) {
240 auto [ret
, fsid
] = tuple
;
241 std::string str_fsid
= stringify(new_osd_fsid
);
243 return write_meta("fsid", stringify(new_osd_fsid
));
244 } else if (ret
== 0 && fsid
!= str_fsid
) {
245 ERROR("on-disk fsid {} != provided {}",
246 fsid
, stringify(new_osd_fsid
));
247 throw std::runtime_error("store fsid error");
249 return seastar::now();
254 SeaStore::mkfs_ertr::future
<> SeaStore::mkfs(uuid_d new_osd_fsid
)
256 return read_meta("mkfs_done").then([this, new_osd_fsid
] (auto tuple
) {
257 auto [done
, value
] = tuple
;
259 return seastar::now();
261 return seastar::do_with(
262 secondary_device_set_t(),
263 [this, new_osd_fsid
](auto& sds
) {
264 auto fut
= seastar::now();
265 LOG_PREFIX(SeaStore::mkfs
);
266 DEBUG("root: {}", root
);
268 fut
= seastar::open_directory(root
).then(
269 [this, &sds
, new_osd_fsid
](seastar::file rdir
) mutable {
270 std::unique_ptr
<seastar::file
> root_f
=
271 std::make_unique
<seastar::file
>(std::move(rdir
));
272 auto sub
= root_f
->list_directory(
273 [this, &sds
, new_osd_fsid
](auto de
) mutable
274 -> seastar::future
<> {
275 LOG_PREFIX(SeaStore::mkfs
);
276 DEBUG("found file: {}", de
.name
);
277 if (de
.name
.find("block.") == 0
278 && de
.name
.length() > 6 /* 6 for "block." */) {
279 std::string entry_name
= de
.name
;
280 auto dtype_end
= entry_name
.find_first_of('.', 6);
281 device_type_t dtype
=
282 string_to_device_type(
283 entry_name
.substr(6, dtype_end
- 6));
284 if (dtype
== device_type_t::NONE
) {
285 // invalid device type
286 return seastar::now();
288 auto id
= std::stoi(entry_name
.substr(dtype_end
+ 1));
289 auto sm
= std::make_unique
<
290 segment_manager::block::BlockSegmentManager
291 >(root
+ "/" + entry_name
);
292 magic_t magic
= (magic_t
)std::rand();
300 segment_manager_config_t
{
305 seastore_meta_t
{new_osd_fsid
},
306 secondary_device_set_t()}
307 ).safe_then([this, sm
=std::move(sm
), id
]() mutable {
308 LOG_PREFIX(SeaStore::mkfs
);
309 DEBUG("mkfs: finished for segment manager {}", id
);
310 secondaries
.emplace_back(std::move(sm
));
311 return seastar::now();
312 }).handle_error(crimson::ct_error::assert_all
{"not possible"});
314 return seastar::now();
316 return sub
.done().then(
317 [root_f
=std::move(root_f
)] {
318 return seastar::now();
322 return fut
.then([this, &sds
, new_osd_fsid
] {
323 return segment_manager
->mkfs(
324 segment_manager_config_t
{
326 (magic_t
)std::rand(),
327 device_type_t::SEGMENTED
,
329 seastore_meta_t
{new_osd_fsid
},
332 }).safe_then([this] {
333 return crimson::do_for_each(secondaries
, [this](auto& sec_sm
) {
334 return sec_sm
->mount().safe_then([this, &sec_sm
] {
335 transaction_manager
->add_segment_manager(sec_sm
.get());
336 return seastar::now();
340 }).safe_then([this] {
341 return segment_manager
->mount();
342 }).safe_then([this] {
343 transaction_manager
->add_segment_manager(segment_manager
.get());
344 return transaction_manager
->mkfs();
345 }).safe_then([this] {
346 return transaction_manager
->mount();
347 }).safe_then([this] {
348 return repeat_eagain([this] {
349 return transaction_manager
->with_transaction_intr(
350 Transaction::src_t::MUTATE
,
354 return onode_manager
->mkfs(t
355 ).si_then([this, &t
] {
356 return collection_manager
->mkfs(t
);
357 }).si_then([this, &t
](auto coll_root
) {
358 transaction_manager
->write_collection_root(
360 return transaction_manager
->submit_transaction(t
);
364 }).safe_then([this, new_osd_fsid
] {
365 return write_fsid(new_osd_fsid
);
366 }).safe_then([this] {
367 return read_meta("type").then([this] (auto tuple
) {
368 auto [ret
, type
] = tuple
;
369 if (ret
== 0 && type
== "seastore") {
370 return seastar::now();
371 } else if (ret
== 0 && type
!= "seastore") {
372 LOG_PREFIX(SeaStore::mkfs
);
373 ERROR("expected seastore, but type is {}", type
);
374 throw std::runtime_error("store type error");
376 return write_meta("type", "seastore");
379 }).safe_then([this] {
380 return write_meta("mkfs_done", "yes");
381 }).safe_then([this] {
384 crimson::ct_error::assert_all
{
385 "Invalid error in SeaStore::mkfs"
392 seastar::future
<store_statfs_t
> SeaStore::stat() const
394 LOG_PREFIX(SeaStore::stat
);
396 return seastar::make_ready_future
<store_statfs_t
>(
397 transaction_manager
->store_stat()
401 seastar::future
<std::tuple
<std::vector
<ghobject_t
>, ghobject_t
>>
402 SeaStore::list_objects(CollectionRef ch
,
403 const ghobject_t
& start
,
404 const ghobject_t
& end
,
405 uint64_t limit
) const
407 using RetType
= typename
OnodeManager::list_onodes_bare_ret
;
408 return seastar::do_with(
410 [this, start
, end
, limit
] (auto& ret
) {
411 return repeat_eagain([this, start
, end
, limit
, &ret
] {
412 return transaction_manager
->with_transaction_intr(
413 Transaction::src_t::READ
,
415 [this, start
, end
, limit
](auto &t
)
417 return onode_manager
->list_onodes(t
, start
, end
, limit
);
418 }).safe_then([&ret
](auto&& _ret
) {
419 ret
= std::move(_ret
);
421 }).safe_then([&ret
] {
422 return std::move(ret
);
424 crimson::ct_error::assert_all
{
425 "Invalid error in SeaStore::list_objects"
431 seastar::future
<CollectionRef
> SeaStore::create_new_collection(const coll_t
& cid
)
433 LOG_PREFIX(SeaStore::create_new_collection
);
435 return seastar::make_ready_future
<CollectionRef
>(_get_collection(cid
));
438 seastar::future
<CollectionRef
> SeaStore::open_collection(const coll_t
& cid
)
440 LOG_PREFIX(SeaStore::open_collection
);
442 return list_collections().then([cid
, this] (auto colls
) {
443 if (auto found
= std::find(colls
.begin(), colls
.end(), cid
);
444 found
!= colls
.end()) {
445 return seastar::make_ready_future
<CollectionRef
>(_get_collection(cid
));
447 return seastar::make_ready_future
<CollectionRef
>();
452 seastar::future
<std::vector
<coll_t
>> SeaStore::list_collections()
454 return seastar::do_with(
455 std::vector
<coll_t
>(),
457 return repeat_eagain([this, &ret
] {
458 return transaction_manager
->with_transaction_intr(
459 Transaction::src_t::READ
,
461 [this, &ret
](auto& t
)
463 return transaction_manager
->read_collection_root(t
464 ).si_then([this, &t
](auto coll_root
) {
465 return collection_manager
->list(coll_root
, t
);
466 }).si_then([&ret
](auto colls
) {
467 ret
.resize(colls
.size());
469 colls
.begin(), colls
.end(), ret
.begin(),
470 [](auto p
) { return p
.first
; });
473 }).safe_then([&ret
] {
474 return seastar::make_ready_future
<std::vector
<coll_t
>>(ret
);
478 crimson::ct_error::assert_all
{
479 "Invalid error in SeaStore::list_collections"
484 SeaStore::read_errorator::future
<ceph::bufferlist
> SeaStore::read(
486 const ghobject_t
& oid
,
491 LOG_PREFIX(SeaStore::read
);
492 DEBUG("oid {} offset {} len {}", oid
, offset
, len
);
493 return repeat_with_onode
<ceph::bufferlist
>(
496 Transaction::src_t::READ
,
499 [=](auto &t
, auto &onode
) -> ObjectDataHandler::read_ret
{
500 size_t size
= onode
.get_layout().size
;
502 if (offset
>= size
) {
503 return seastar::make_ready_future
<ceph::bufferlist
>();
506 size_t corrected_len
= (len
== 0) ?
508 std::min(size
- offset
, len
);
510 return ObjectDataHandler(max_object_size
).read(
511 ObjectDataHandler::context_t
{
512 *transaction_manager
,
521 SeaStore::read_errorator::future
<ceph::bufferlist
> SeaStore::readv(
523 const ghobject_t
& oid
,
524 interval_set
<uint64_t>& m
,
527 return read_errorator::make_ready_future
<ceph::bufferlist
>();
530 using crimson::os::seastore::omap_manager::BtreeOMapManager
;
532 SeaStore::get_attr_errorator::future
<ceph::bufferlist
> SeaStore::get_attr(
534 const ghobject_t
& oid
,
535 std::string_view name
) const
537 auto c
= static_cast<SeastoreCollection
*>(ch
.get());
538 LOG_PREFIX(SeaStore::get_attr
);
539 DEBUG("{} {}", c
->get_cid(), oid
);
540 return repeat_with_onode
<ceph::bufferlist
>(
543 Transaction::src_t::READ
,
546 [=](auto &t
, auto& onode
) -> _omap_get_value_ret
{
547 auto& layout
= onode
.get_layout();
548 if (name
== OI_ATTR
&& layout
.oi_size
) {
550 bl
.append(ceph::bufferptr(&layout
.oi
[0], layout
.oi_size
));
551 return seastar::make_ready_future
<ceph::bufferlist
>(std::move(bl
));
553 if (name
== SS_ATTR
&& layout
.ss_size
) {
555 bl
.append(ceph::bufferptr(&layout
.ss
[0], layout
.ss_size
));
556 return seastar::make_ready_future
<ceph::bufferlist
>(std::move(bl
));
558 return _omap_get_value(
560 layout
.xattr_root
.get(onode
.get_metadata_hint()),
563 ).handle_error(crimson::ct_error::input_output_error::handle([FNAME
] {
564 ERROR("EIO when getting attrs");
566 }), crimson::ct_error::pass_further_all
{});
569 SeaStore::get_attrs_ertr::future
<SeaStore::attrs_t
> SeaStore::get_attrs(
571 const ghobject_t
& oid
)
573 LOG_PREFIX(SeaStore::get_attrs
);
574 auto c
= static_cast<SeastoreCollection
*>(ch
.get());
575 DEBUG("{} {}", c
->get_cid(), oid
);
576 return repeat_with_onode
<attrs_t
>(
579 Transaction::src_t::READ
,
581 op_type_t::GET_ATTRS
,
582 [=](auto &t
, auto& onode
) {
583 auto& layout
= onode
.get_layout();
584 return _omap_list(onode
, layout
.xattr_root
, t
, std::nullopt
,
585 OMapManager::omap_list_config_t::with_inclusive(false)
586 ).si_then([&layout
](auto p
) {
587 auto& attrs
= std::get
<1>(p
);
589 if (layout
.oi_size
) {
590 bl
.append(ceph::bufferptr(&layout
.oi
[0], layout
.oi_size
));
591 attrs
.emplace(OI_ATTR
, std::move(bl
));
593 if (layout
.ss_size
) {
595 bl
.append(ceph::bufferptr(&layout
.ss
[0], layout
.ss_size
));
596 attrs
.emplace(SS_ATTR
, std::move(bl
));
598 return seastar::make_ready_future
<omap_values_t
>(std::move(attrs
));
601 ).handle_error(crimson::ct_error::input_output_error::handle([FNAME
] {
602 ERROR("EIO when getting attrs");
604 }), crimson::ct_error::pass_further_all
{});
607 seastar::future
<struct stat
> SeaStore::stat(
609 const ghobject_t
& oid
)
611 LOG_PREFIX(SeaStore::stat
);
612 return repeat_with_onode
<struct stat
>(
615 Transaction::src_t::READ
,
618 [=, &oid
](auto &t
, auto &onode
) {
620 auto &olayout
= onode
.get_layout();
621 st
.st_size
= olayout
.size
;
622 st
.st_blksize
= transaction_manager
->get_block_size();
623 st
.st_blocks
= (st
.st_size
+ st
.st_blksize
- 1) / st
.st_blksize
;
625 DEBUGT("cid {}, oid {}, return size {}", t
, c
->get_cid(), oid
, st
.st_size
);
626 return seastar::make_ready_future
<struct stat
>(st
);
629 crimson::ct_error::assert_all
{
630 "Invalid error in SeaStore::stat"
636 SeaStore::omap_get_header(
638 const ghobject_t
& oid
)
639 -> read_errorator::future
<bufferlist
>
641 return seastar::make_ready_future
<bufferlist
>();
644 SeaStore::read_errorator::future
<SeaStore::omap_values_t
>
645 SeaStore::omap_get_values(
647 const ghobject_t
&oid
,
648 const omap_keys_t
&keys
)
650 auto c
= static_cast<SeastoreCollection
*>(ch
.get());
651 return repeat_with_onode
<omap_values_t
>(
654 Transaction::src_t::READ
,
656 op_type_t::OMAP_GET_VALUES
,
657 [this, keys
](auto &t
, auto &onode
) {
658 omap_root_t omap_root
= onode
.get_layout().omap_root
.get(
659 onode
.get_metadata_hint());
660 return _omap_get_values(
662 std::move(omap_root
),
667 SeaStore::_omap_get_value_ret
SeaStore::_omap_get_value(
670 std::string_view key
) const
672 return seastar::do_with(
673 BtreeOMapManager(*transaction_manager
),
676 [&t
](auto &manager
, auto& root
, auto& key
) -> _omap_get_value_ret
{
677 if (root
.is_null()) {
678 return crimson::ct_error::enodata::make();
680 return manager
.omap_get_value(root
, t
, key
681 ).si_then([](auto opt
) -> _omap_get_value_ret
{
683 return crimson::ct_error::enodata::make();
685 return seastar::make_ready_future
<ceph::bufferlist
>(std::move(*opt
));
691 SeaStore::_omap_get_values_ret
SeaStore::_omap_get_values(
693 omap_root_t
&&omap_root
,
694 const omap_keys_t
&keys
) const
696 if (omap_root
.is_null()) {
697 return seastar::make_ready_future
<omap_values_t
>();
699 return seastar::do_with(
700 BtreeOMapManager(*transaction_manager
),
701 std::move(omap_root
),
703 [&](auto &manager
, auto &root
, auto &ret
) {
704 return trans_intr::do_for_each(
708 return manager
.omap_get_value(
712 ).si_then([&ret
, &key
](auto &&p
) {
720 return seastar::now();
724 return std::move(ret
);
730 SeaStore::_omap_list_ret
SeaStore::_omap_list(
732 const omap_root_le_t
& omap_root
,
734 const std::optional
<std::string
>& start
,
735 OMapManager::omap_list_config_t config
) const
737 auto root
= omap_root
.get(onode
.get_metadata_hint());
738 if (root
.is_null()) {
739 return seastar::make_ready_future
<_omap_list_bare_ret
>(
740 true, omap_values_t
{}
743 return seastar::do_with(
744 BtreeOMapManager(*transaction_manager
),
747 [&t
, config
](auto &manager
, auto& root
, auto& start
) {
748 return manager
.omap_list(root
, t
, start
, config
);
752 SeaStore::omap_get_values_ret_t
SeaStore::omap_list(
754 const ghobject_t
&oid
,
755 const std::optional
<string
> &start
,
756 OMapManager::omap_list_config_t config
)
758 auto c
= static_cast<SeastoreCollection
*>(ch
.get());
759 LOG_PREFIX(SeaStore::omap_list
);
760 DEBUG("{} {}", c
->get_cid(), oid
);
761 using ret_bare_t
= std::tuple
<bool, SeaStore::omap_values_t
>;
762 return repeat_with_onode
<ret_bare_t
>(
765 Transaction::src_t::READ
,
767 op_type_t::OMAP_LIST
,
768 [this, config
, &start
](auto &t
, auto &onode
) {
771 onode
.get_layout().omap_root
,
777 SeaStore::omap_get_values_ret_t
SeaStore::omap_get_values(
779 const ghobject_t
&oid
,
780 const std::optional
<string
> &start
)
782 return seastar::do_with(oid
, start
,
783 [this, ch
=std::move(ch
)](auto& oid
, auto& start
) {
786 OMapManager::omap_list_config_t::with_inclusive(false));
790 class SeaStoreOmapIterator
: public FuturizedStore::OmapIterator
{
791 using omap_values_t
= FuturizedStore::omap_values_t
;
795 const ghobject_t oid
;
797 omap_values_t current
;
798 omap_values_t::iterator iter
;
800 seastar::future
<> repopulate_from(
801 std::optional
<std::string
> from
,
803 return seastar::do_with(
805 [this, inclusive
](auto &from
) {
806 return seastore
.omap_list(
810 OMapManager::omap_list_config_t::with_inclusive(inclusive
)
811 ).safe_then([this](auto p
) {
812 auto &[complete
, values
] = p
;
813 current
.swap(values
);
814 if (current
.empty()) {
817 iter
= current
.begin();
820 crimson::ct_error::assert_all
{
821 "Invalid error in SeaStoreOmapIterator::repopulate_from"
826 SeaStoreOmapIterator(
829 const ghobject_t
&oid
) :
833 iter(current
.begin())
836 seastar::future
<> seek_to_first() final
{
837 return repopulate_from(
841 seastar::future
<> upper_bound(const std::string
&after
) final
{
842 return repopulate_from(
846 seastar::future
<> lower_bound(const std::string
&from
) final
{
847 return repopulate_from(
852 return iter
!= current
.end();
854 seastar::future
<> next() final
{
857 if (iter
== current
.end()) {
858 return repopulate_from(
862 return seastar::now();
868 ceph::buffer::list
value() {
874 ~SeaStoreOmapIterator() {}
877 seastar::future
<FuturizedStore::OmapIteratorRef
> SeaStore::get_omap_iterator(
879 const ghobject_t
& oid
)
881 LOG_PREFIX(SeaStore::get_omap_iterator
);
882 DEBUG("oid: {}", oid
);
883 auto ret
= FuturizedStore::OmapIteratorRef(
884 new SeaStoreOmapIterator(
888 return ret
->seek_to_first(
889 ).then([ret
]() mutable {
890 return std::move(ret
);
894 seastar::future
<std::map
<uint64_t, uint64_t>> SeaStore::fiemap(
896 const ghobject_t
& oid
,
900 return seastar::make_ready_future
<std::map
<uint64_t, uint64_t>>();
903 void SeaStore::on_error(ceph::os::Transaction
&t
) {
904 LOG_PREFIX(SeaStore::on_error
);
905 ERROR(" transaction dump:\n");
906 JSONFormatter
f(true);
907 f
.open_object_section("transaction");
910 std::stringstream str
;
912 ERROR("{}", str
.str());
916 seastar::future
<> SeaStore::do_transaction(
918 ceph::os::Transaction
&& _t
)
920 // repeat_with_internal_context ensures ordering via collection lock
921 return repeat_with_internal_context(
924 Transaction::src_t::MUTATE
,
926 op_type_t::TRANSACTION
,
928 return with_trans_intr(*ctx
.transaction
, [&, this](auto &t
) {
929 return onode_manager
->get_or_create_onodes(
930 *ctx
.transaction
, ctx
.iter
.get_objects()
931 ).si_then([this, &ctx
](auto &&onodes
) {
932 return seastar::do_with(std::move(onodes
), [this, &ctx
](auto& onodes
) {
933 return trans_intr::repeat(
934 [this, &ctx
, &onodes
]() -> tm_iertr::future
<seastar::stop_iteration
>
936 if (ctx
.iter
.have_op()) {
937 return _do_transaction_step(
938 ctx
, ctx
.ch
, onodes
, ctx
.iter
940 return seastar::make_ready_future
<seastar::stop_iteration
>(
941 seastar::stop_iteration::no
);
944 return seastar::make_ready_future
<seastar::stop_iteration
>(
945 seastar::stop_iteration::yes
);
947 }).si_then([this, &ctx
, &onodes
] {
948 return onode_manager
->write_dirty(*ctx
.transaction
, onodes
);
951 }).si_then([this, &ctx
] {
952 return transaction_manager
->submit_transaction(*ctx
.transaction
);
954 }).safe_then([&ctx
]() {
956 ctx
.ext_transaction
.get_on_applied(),
957 ctx
.ext_transaction
.get_on_commit(),
958 ctx
.ext_transaction
.get_on_applied_sync()}) {
963 return seastar::now();
968 SeaStore::tm_ret
SeaStore::_do_transaction_step(
969 internal_context_t
&ctx
,
971 std::vector
<OnodeRef
> &onodes
,
972 ceph::os::Transaction::iterator
&i
)
974 LOG_PREFIX(SeaStore::_do_transaction_step
);
975 auto get_onode
= [&onodes
](size_t i
) -> OnodeRef
& {
976 ceph_assert(i
< onodes
.size());
980 using ceph::os::Transaction
;
982 switch (auto op
= i
.decode_op(); op
->op
) {
983 case Transaction::OP_NOP
:
984 return tm_iertr::now();
985 case Transaction::OP_REMOVE
:
987 return _remove(ctx
, get_onode(op
->oid
));
990 case Transaction::OP_TOUCH
:
992 return _touch(ctx
, get_onode(op
->oid
));
995 case Transaction::OP_WRITE
:
997 uint64_t off
= op
->off
;
998 uint64_t len
= op
->len
;
999 uint32_t fadvise_flags
= i
.get_fadvise_flags();
1000 ceph::bufferlist bl
;
1003 ctx
, get_onode(op
->oid
), off
, len
, std::move(bl
),
1007 case Transaction::OP_TRUNCATE
:
1009 uint64_t off
= op
->off
;
1010 return _truncate(ctx
, get_onode(op
->oid
), off
);
1013 case Transaction::OP_SETATTR
:
1015 std::string name
= i
.decode_string();
1016 std::map
<std::string
, bufferlist
> to_set
;
1017 ceph::bufferlist
& bl
= to_set
[name
];
1019 return _setattrs(ctx
, get_onode(op
->oid
), std::move(to_set
));
1022 case Transaction::OP_MKCOLL
:
1024 coll_t cid
= i
.get_cid(op
->cid
);
1025 return _create_collection(ctx
, cid
, op
->split_bits
);
1028 case Transaction::OP_RMCOLL
:
1030 coll_t cid
= i
.get_cid(op
->cid
);
1031 return _remove_collection(ctx
, cid
);
1034 case Transaction::OP_OMAP_SETKEYS
:
1036 std::map
<std::string
, ceph::bufferlist
> aset
;
1037 i
.decode_attrset(aset
);
1038 return _omap_set_values(ctx
, get_onode(op
->oid
), std::move(aset
));
1041 case Transaction::OP_OMAP_SETHEADER
:
1043 ceph::bufferlist bl
;
1045 return _omap_set_header(ctx
, get_onode(op
->oid
), std::move(bl
));
1048 case Transaction::OP_OMAP_RMKEYS
:
1051 i
.decode_keyset(keys
);
1052 return _omap_rmkeys(ctx
, get_onode(op
->oid
), std::move(keys
));
1055 case Transaction::OP_OMAP_RMKEYRANGE
:
1058 first
= i
.decode_string();
1059 last
= i
.decode_string();
1060 return _omap_rmkeyrange(
1061 ctx
, get_onode(op
->oid
),
1062 std::move(first
), std::move(last
));
1065 case Transaction::OP_COLL_HINT
:
1067 ceph::bufferlist hint
;
1069 return tm_iertr::now();
1072 ERROR("bad op {}", static_cast<unsigned>(op
->op
));
1073 return crimson::ct_error::input_output_error::make();
1075 } catch (std::exception
&e
) {
1076 ERROR("got exception {}", e
);
1077 return crimson::ct_error::input_output_error::make();
1081 SeaStore::tm_ret
SeaStore::_remove(
1082 internal_context_t
&ctx
,
1085 LOG_PREFIX(SeaStore::_remove
);
1086 DEBUGT("onode={}", *ctx
.transaction
, *onode
);
1087 return onode_manager
->erase_onode(*ctx
.transaction
, onode
);
1090 SeaStore::tm_ret
SeaStore::_touch(
1091 internal_context_t
&ctx
,
1094 LOG_PREFIX(SeaStore::_touch
);
1095 DEBUGT("onode={}", *ctx
.transaction
, *onode
);
1096 return tm_iertr::now();
1099 SeaStore::tm_ret
SeaStore::_write(
1100 internal_context_t
&ctx
,
1102 uint64_t offset
, size_t len
,
1103 ceph::bufferlist
&&_bl
,
1104 uint32_t fadvise_flags
)
1106 LOG_PREFIX(SeaStore::_write
);
1107 DEBUGT("onode={} {}~{}", *ctx
.transaction
, *onode
, offset
, len
);
1109 auto &object_size
= onode
->get_mutable_layout(*ctx
.transaction
).size
;
1110 object_size
= std::max
<uint64_t>(
1114 return seastar::do_with(
1116 [=, &ctx
, &onode
](auto &bl
) {
1117 return ObjectDataHandler(max_object_size
).write(
1118 ObjectDataHandler::context_t
{
1119 *transaction_manager
,
1128 SeaStore::omap_set_kvs_ret
1129 SeaStore::_omap_set_kvs(
1131 const omap_root_le_t
& omap_root
,
1133 omap_root_le_t
& mutable_omap_root
,
1134 std::map
<std::string
, ceph::bufferlist
>&& kvs
)
1136 return seastar::do_with(
1137 BtreeOMapManager(*transaction_manager
),
1138 omap_root
.get(onode
->get_metadata_hint()),
1139 [&, keys
=std::move(kvs
)](auto &omap_manager
, auto &root
) {
1140 tm_iertr::future
<> maybe_create_root
=
1143 omap_manager
.initialize_omap(
1144 t
, onode
->get_metadata_hint()
1145 ).si_then([&root
](auto new_root
) {
1148 return maybe_create_root
.si_then(
1149 [&, keys
=std::move(keys
)]() mutable {
1150 return omap_manager
.omap_set_keys(root
, t
, std::move(keys
));
1152 return tm_iertr::make_ready_future
<omap_root_t
>(std::move(root
));
1153 }).si_then([&mutable_omap_root
](auto root
) {
1154 if (root
.must_update()) {
1155 mutable_omap_root
.update(root
);
1162 SeaStore::tm_ret
SeaStore::_omap_set_values(
1163 internal_context_t
&ctx
,
1165 std::map
<std::string
, ceph::bufferlist
> &&aset
)
1167 LOG_PREFIX(SeaStore::_omap_set_values
);
1168 DEBUGT("{} {} keys", *ctx
.transaction
, *onode
, aset
.size());
1169 return _omap_set_kvs(
1171 onode
->get_layout().omap_root
,
1173 onode
->get_mutable_layout(*ctx
.transaction
).omap_root
,
1177 SeaStore::tm_ret
SeaStore::_omap_set_header(
1178 internal_context_t
&ctx
,
1180 ceph::bufferlist
&&header
)
1182 LOG_PREFIX(SeaStore::_omap_set_header
);
1183 DEBUGT("{} {} bytes", *ctx
.transaction
, *onode
, header
.length());
1184 assert(0 == "not supported yet");
1185 return tm_iertr::now();
1188 SeaStore::tm_ret
SeaStore::_omap_rmkeys(
1189 internal_context_t
&ctx
,
1193 LOG_PREFIX(SeaStore::_omap_rmkeys
);
1194 DEBUGT("{} {} keys", *ctx
.transaction
, *onode
, keys
.size());
1195 auto omap_root
= onode
->get_layout().omap_root
.get(onode
->get_metadata_hint());
1196 if (omap_root
.is_null()) {
1197 return seastar::now();
1199 return seastar::do_with(
1200 BtreeOMapManager(*transaction_manager
),
1201 onode
->get_layout().omap_root
.get(onode
->get_metadata_hint()),
1207 return trans_intr::do_for_each(
1211 return omap_manager
.omap_rm_key(
1217 if (omap_root
.must_update()) {
1218 onode
->get_mutable_layout(*ctx
.transaction
1219 ).omap_root
.update(omap_root
);
1227 SeaStore::tm_ret
SeaStore::_omap_rmkeyrange(
1228 internal_context_t
&ctx
,
1233 LOG_PREFIX(SeaStore::_omap_rmkeyrange
);
1234 DEBUGT("{} first={} last={}", *ctx
.transaction
, *onode
, first
, last
);
1235 assert(0 == "not supported yet");
1236 return tm_iertr::now();
1239 SeaStore::tm_ret
SeaStore::_truncate(
1240 internal_context_t
&ctx
,
1244 LOG_PREFIX(SeaStore::_truncate
);
1245 DEBUGT("onode={} size={}", *ctx
.transaction
, *onode
, size
);
1246 onode
->get_mutable_layout(*ctx
.transaction
).size
= size
;
1247 return ObjectDataHandler(max_object_size
).truncate(
1248 ObjectDataHandler::context_t
{
1249 *transaction_manager
,
1256 SeaStore::tm_ret
SeaStore::_setattrs(
1257 internal_context_t
&ctx
,
1259 std::map
<std::string
, bufferlist
>&& aset
)
1261 LOG_PREFIX(SeaStore::_setattrs
);
1262 DEBUGT("onode={}", *ctx
.transaction
, *onode
);
1263 auto& layout
= onode
->get_mutable_layout(*ctx
.transaction
);
1264 if (auto it
= aset
.find(OI_ATTR
); it
!= aset
.end()) {
1265 auto& val
= it
->second
;
1266 if (likely(val
.length() <= onode_layout_t::MAX_OI_LENGTH
)) {
1267 layout
.oi_size
= val
.length();
1268 maybe_inline_memcpy(
1272 onode_layout_t::MAX_OI_LENGTH
);
1279 if (auto it
= aset
.find(SS_ATTR
); it
!= aset
.end()) {
1280 auto& val
= it
->second
;
1281 if (likely(val
.length() <= onode_layout_t::MAX_SS_LENGTH
)) {
1282 layout
.ss_size
= val
.length();
1283 maybe_inline_memcpy(
1287 onode_layout_t::MAX_SS_LENGTH
);
1288 it
= aset
.erase(it
);
1295 return tm_iertr::now();
1298 return _omap_set_kvs(
1300 onode
->get_layout().xattr_root
,
1306 SeaStore::tm_ret
SeaStore::_create_collection(
1307 internal_context_t
&ctx
,
1308 const coll_t
& cid
, int bits
)
1310 return transaction_manager
->read_collection_root(
1312 ).si_then([=, &ctx
](auto _cmroot
) {
1313 return seastar::do_with(
1315 [=, &ctx
](auto &cmroot
) {
1316 return collection_manager
->create(
1321 ).si_then([=, &ctx
, &cmroot
] {
1322 if (cmroot
.must_update()) {
1323 transaction_manager
->write_collection_root(
1330 }).handle_error_interruptible(
1331 tm_iertr::pass_further
{},
1332 crimson::ct_error::assert_all
{
1333 "Invalid error in SeaStore::_create_collection"
1338 SeaStore::tm_ret
SeaStore::_remove_collection(
1339 internal_context_t
&ctx
,
1342 return transaction_manager
->read_collection_root(
1344 ).si_then([=, &ctx
](auto _cmroot
) {
1345 return seastar::do_with(
1347 [=, &ctx
](auto &cmroot
) {
1348 return collection_manager
->remove(
1352 ).si_then([=, &ctx
, &cmroot
] {
1353 // param here denotes whether it already existed, probably error
1354 if (cmroot
.must_update()) {
1355 transaction_manager
->write_collection_root(
1361 }).handle_error_interruptible(
1362 tm_iertr::pass_further
{},
1363 crimson::ct_error::assert_all
{
1364 "Invalid error in SeaStore::_create_collection"
1369 boost::intrusive_ptr
<SeastoreCollection
> SeaStore::_get_collection(const coll_t
& cid
)
1371 return new SeastoreCollection
{cid
};
1374 seastar::future
<> SeaStore::write_meta(const std::string
& key
,
1375 const std::string
& value
)
1377 LOG_PREFIX(SeaStore::write_meta
);
1378 DEBUG("key: {}; value: {}", key
, value
);
1379 return seastar::do_with(
1381 [this, FNAME
](auto& key
, auto& value
) {
1382 return repeat_eagain([this, FNAME
, &key
, &value
] {
1383 return transaction_manager
->with_transaction_intr(
1384 Transaction::src_t::MUTATE
,
1386 [this, FNAME
, &key
, &value
](auto& t
)
1388 DEBUGT("Have transaction, key: {}; value: {}", t
, key
, value
);
1389 return transaction_manager
->update_root_meta(
1391 ).si_then([this, &t
] {
1392 return transaction_manager
->submit_transaction(t
);
1395 }).safe_then([this, &key
, &value
] {
1396 return mdstore
->write_meta(key
, value
);
1399 crimson::ct_error::assert_all
{"Invalid error in SeaStore::write_meta"}
1403 seastar::future
<std::tuple
<int, std::string
>> SeaStore::read_meta(const std::string
& key
)
1405 LOG_PREFIX(SeaStore::read_meta
);
1406 DEBUG("key: {}", key
);
1407 return mdstore
->read_meta(key
).safe_then([](auto v
) {
1409 return std::make_tuple(0, std::move(*v
));
1411 return std::make_tuple(-1, std::string(""));
1414 crimson::ct_error::assert_all
{
1415 "Invalid error in SeaStore::read_meta"
1420 uuid_d
SeaStore::get_fsid() const
1422 return segment_manager
->get_meta().seastore_id
;
1425 seastar::future
<std::unique_ptr
<SeaStore
>> make_seastore(
1426 const std::string
&device
,
1427 const ConfigValues
&config
)
1429 return SegmentManager::get_segment_manager(
1431 ).then([&device
](auto sm
) {
1432 auto scanner
= std::make_unique
<ExtentReader
>();
1433 auto& scanner_ref
= *scanner
.get();
1434 auto segment_cleaner
= std::make_unique
<SegmentCleaner
>(
1435 SegmentCleaner::config_t::get_default(),
1437 false /* detailed */);
1439 auto journal
= std::make_unique
<Journal
>(*sm
, scanner_ref
);
1440 auto cache
= std::make_unique
<Cache
>(scanner_ref
);
1441 auto lba_manager
= lba_manager::create_lba_manager(*sm
, *cache
);
1443 auto epm
= std::make_unique
<ExtentPlacementManager
>(*cache
, *lba_manager
);
1445 journal
->set_segment_provider(&*segment_cleaner
);
1447 auto tm
= std::make_unique
<TransactionManager
>(
1449 std::move(segment_cleaner
),
1452 std::move(lba_manager
),
1456 auto cm
= std::make_unique
<collection_manager::FlatCollectionManager
>(*tm
);
1457 return std::make_unique
<SeaStore
>(
1462 std::make_unique
<crimson::os::seastore::onode::FLTreeOnodeManager
>(*tm
));