1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
7 #include <unordered_map>
13 #include <seastar/core/future.hh>
15 #include "include/uuid.h"
17 #include "os/Transaction.h"
18 #include "crimson/os/futurized_collection.h"
19 #include "crimson/os/futurized_store.h"
21 #include "crimson/os/seastore/transaction.h"
22 #include "crimson/os/seastore/onode_manager.h"
23 #include "crimson/os/seastore/omap_manager.h"
24 #include "crimson/os/seastore/collection_manager.h"
26 namespace crimson::os::seastore
{
29 using OnodeRef
= boost::intrusive_ptr
<Onode
>;
30 class TransactionManager
;
32 class SeastoreCollection final
: public FuturizedCollection
{
34 template <typename
... T
>
35 SeastoreCollection(T
&&... args
) :
36 FuturizedCollection(std::forward
<T
>(args
)...) {}
38 seastar::shared_mutex ordering_lock
;
41 class SeaStore final
: public FuturizedStore
{
45 using base_iertr
= crimson::errorator
<
46 crimson::ct_error::input_output_error
49 using write_meta_ertr
= base_iertr
;
50 using write_meta_ret
= write_meta_ertr::future
<>;
51 virtual write_meta_ret
write_meta(
52 const std::string
&key
,
53 const std::string
&val
56 using read_meta_ertr
= base_iertr
;
57 using read_meta_ret
= write_meta_ertr::future
<std::optional
<std::string
>>;
58 virtual read_meta_ret
read_meta(const std::string
&key
) = 0;
62 using MDStoreRef
= std::unique_ptr
<MDStore
>;
65 const std::string
& root
,
68 TransactionManagerRef tm
,
69 CollectionManagerRef cm
,
72 const std::string
& root
,
74 TransactionManagerRef tm
,
75 CollectionManagerRef cm
,
79 seastar::future
<> stop() final
;
80 mount_ertr::future
<> mount() final
;
81 seastar::future
<> umount() final
;
83 mkfs_ertr::future
<> mkfs(uuid_d new_osd_fsid
) final
;
84 seastar::future
<store_statfs_t
> stat() const final
;
86 read_errorator::future
<ceph::bufferlist
> read(
88 const ghobject_t
& oid
,
91 uint32_t op_flags
= 0) final
;
92 read_errorator::future
<ceph::bufferlist
> readv(
94 const ghobject_t
& oid
,
95 interval_set
<uint64_t>& m
,
96 uint32_t op_flags
= 0) final
;
97 get_attr_errorator::future
<ceph::bufferlist
> get_attr(
99 const ghobject_t
& oid
,
100 std::string_view name
) const final
;
101 get_attrs_ertr::future
<attrs_t
> get_attrs(
103 const ghobject_t
& oid
) final
;
105 seastar::future
<struct stat
> stat(
107 const ghobject_t
& oid
) final
;
109 read_errorator::future
<omap_values_t
> omap_get_values(
111 const ghobject_t
& oid
,
112 const omap_keys_t
& keys
) final
;
114 /// Retrieves paged set of values > start (if present)
115 using omap_get_values_ret_bare_t
= std::tuple
<bool, omap_values_t
>;
116 using omap_get_values_ret_t
= read_errorator::future
<
117 omap_get_values_ret_bare_t
>;
118 omap_get_values_ret_t
omap_get_values(
119 CollectionRef c
, ///< [in] collection
120 const ghobject_t
&oid
, ///< [in] oid
121 const std::optional
<std::string
> &start
///< [in] start, empty for begin
122 ) final
; ///< @return <done, values> values.empty() iff done
124 read_errorator::future
<bufferlist
> omap_get_header(
126 const ghobject_t
& oid
) final
;
128 seastar::future
<std::tuple
<std::vector
<ghobject_t
>, ghobject_t
>> list_objects(
130 const ghobject_t
& start
,
131 const ghobject_t
& end
,
132 uint64_t limit
) const final
;
134 seastar::future
<CollectionRef
> create_new_collection(const coll_t
& cid
) final
;
135 seastar::future
<CollectionRef
> open_collection(const coll_t
& cid
) final
;
136 seastar::future
<std::vector
<coll_t
>> list_collections() final
;
138 seastar::future
<> do_transaction(
140 ceph::os::Transaction
&& txn
) final
;
142 seastar::future
<OmapIteratorRef
> get_omap_iterator(
144 const ghobject_t
& oid
) final
;
145 seastar::future
<std::map
<uint64_t, uint64_t>> fiemap(
147 const ghobject_t
& oid
,
151 seastar::future
<> write_meta(const std::string
& key
,
152 const std::string
& value
) final
;
153 seastar::future
<std::tuple
<int, std::string
>> read_meta(const std::string
& key
) final
;
154 uuid_d
get_fsid() const final
;
156 unsigned get_max_attr_name_length() const final
{
159 enum class op_type_t
: uint8_t {
172 struct internal_context_t
{
174 ceph::os::Transaction ext_transaction
;
178 ceph::os::Transaction
&&_ext_transaction
,
179 TransactionRef
&&transaction
)
180 : ch(ch
), ext_transaction(std::move(_ext_transaction
)),
181 transaction(std::move(transaction
)),
182 iter(ext_transaction
.begin()) {}
184 TransactionRef transaction
;
186 ceph::os::Transaction::iterator iter
;
187 std::chrono::steady_clock::time_point begin_timestamp
= std::chrono::steady_clock::now();
189 void reset_preserve_handle(TransactionManager
&tm
) {
190 tm
.reset_transaction_preserve_handle(*transaction
);
191 iter
= ext_transaction
.begin();
195 static void on_error(ceph::os::Transaction
&t
);
197 template <typename F
>
198 auto repeat_with_internal_context(
200 ceph::os::Transaction
&&t
,
201 Transaction::src_t src
,
205 return seastar::do_with(
208 transaction_manager
->create_transaction(src
, tname
)),
210 [this, op_type
](auto &ctx
, auto &f
) {
211 return ctx
.transaction
->get_handle().take_collection_lock(
212 static_cast<SeastoreCollection
&>(*(ctx
.ch
)).ordering_lock
214 return repeat_eagain([&, this] {
215 ctx
.reset_preserve_handle(*transaction_manager
);
216 return std::invoke(f
, ctx
);
218 crimson::ct_error::eagain::pass_further
{},
219 crimson::ct_error::all_same_way([&ctx
](auto e
) {
220 on_error(ctx
.ext_transaction
);
223 }).then([this, op_type
, &ctx
] {
224 add_latency_sample(op_type
,
225 std::chrono::steady_clock::now() - ctx
.begin_timestamp
);
231 template <typename Ret
, typename F
>
232 auto repeat_with_onode(
234 const ghobject_t
&oid
,
235 Transaction::src_t src
,
239 auto begin_time
= std::chrono::steady_clock::now();
240 return seastar::do_with(
241 oid
, Ret
{}, std::forward
<F
>(f
),
242 [this, src
, op_type
, begin_time
, tname
243 ](auto &oid
, auto &ret
, auto &f
)
245 return repeat_eagain([&, this, src
, tname
] {
246 return transaction_manager
->with_transaction_intr(
251 return onode_manager
->get_onode(t
, oid
252 ).si_then([&](auto onode
) {
253 return seastar::do_with(std::move(onode
), [&](auto& onode
) {
256 }).si_then([&ret
](auto _ret
) {
260 }).safe_then([&ret
, op_type
, begin_time
, this] {
261 const_cast<SeaStore
*>(this)->add_latency_sample(op_type
,
262 std::chrono::steady_clock::now() - begin_time
);
263 return seastar::make_ready_future
<Ret
>(ret
);
268 using _omap_get_value_iertr
= OMapManager::base_iertr::extend
<
269 crimson::ct_error::enodata
271 using _omap_get_value_ret
= _omap_get_value_iertr::future
<ceph::bufferlist
>;
272 _omap_get_value_ret
_omap_get_value(
275 std::string_view key
) const;
277 using _omap_get_values_iertr
= OMapManager::base_iertr
;
278 using _omap_get_values_ret
= _omap_get_values_iertr::future
<omap_values_t
>;
279 _omap_get_values_ret
_omap_get_values(
282 const omap_keys_t
&keys
) const;
284 using _omap_list_bare_ret
= OMapManager::omap_list_bare_ret
;
285 using _omap_list_ret
= OMapManager::omap_list_ret
;
286 _omap_list_ret
_omap_list(
288 const omap_root_le_t
& omap_root
,
290 const std::optional
<std::string
>& start
,
291 OMapManager::omap_list_config_t config
) const;
293 friend class SeaStoreOmapIterator
;
294 omap_get_values_ret_t
omap_list(
296 const ghobject_t
&oid
,
297 const std::optional
<std::string
> &_start
,
298 OMapManager::omap_list_config_t config
);
302 SegmentManagerRef segment_manager
;
303 std::vector
<SegmentManagerRef
> secondaries
;
304 TransactionManagerRef transaction_manager
;
305 CollectionManagerRef collection_manager
;
306 OnodeManagerRef onode_manager
;
307 const uint32_t max_object_size
= 0;
309 using tm_iertr
= TransactionManager::base_iertr
;
310 using tm_ret
= tm_iertr::future
<>;
311 tm_ret
_do_transaction_step(
312 internal_context_t
&ctx
,
314 std::vector
<OnodeRef
> &onodes
,
315 ceph::os::Transaction::iterator
&i
);
318 internal_context_t
&ctx
,
321 internal_context_t
&ctx
,
324 internal_context_t
&ctx
,
326 uint64_t offset
, size_t len
,
327 ceph::bufferlist
&&bl
,
328 uint32_t fadvise_flags
);
329 tm_ret
_omap_set_values(
330 internal_context_t
&ctx
,
332 std::map
<std::string
, ceph::bufferlist
> &&aset
);
333 tm_ret
_omap_set_header(
334 internal_context_t
&ctx
,
336 ceph::bufferlist
&&header
);
338 internal_context_t
&ctx
,
341 tm_ret
_omap_rmkeyrange(
342 internal_context_t
&ctx
,
347 internal_context_t
&ctx
,
348 OnodeRef
&onode
, uint64_t size
);
350 internal_context_t
&ctx
,
352 std::map
<std::string
,bufferlist
>&& aset
);
353 tm_ret
_create_collection(
354 internal_context_t
&ctx
,
355 const coll_t
& cid
, int bits
);
356 tm_ret
_remove_collection(
357 internal_context_t
&ctx
,
359 using omap_set_kvs_ret
= tm_iertr::future
<>;
360 omap_set_kvs_ret
_omap_set_kvs(
362 const omap_root_le_t
& omap_root
,
364 omap_root_le_t
& mutable_omap_root
,
365 std::map
<std::string
, ceph::bufferlist
>&& kvs
);
367 boost::intrusive_ptr
<SeastoreCollection
> _get_collection(const coll_t
& cid
);
369 static constexpr auto LAT_MAX
= static_cast<std::size_t>(op_type_t::MAX
);
371 std::array
<seastar::metrics::histogram
, LAT_MAX
> op_lat
;
374 seastar::metrics::histogram
& get_latency(
376 assert(static_cast<std::size_t>(op_type
) < stats
.op_lat
.size());
377 return stats
.op_lat
[static_cast<std::size_t>(op_type
)];
380 void add_latency_sample(op_type_t op_type
,
381 std::chrono::steady_clock::duration dur
) {
382 seastar::metrics::histogram
& lat
= get_latency(op_type
);
384 lat
.sample_sum
+= std::chrono::duration_cast
<std::chrono::milliseconds
>(dur
).count();
386 seastar::metrics::metric_group metrics
;
387 void register_metrics();
388 seastar::future
<> write_fsid(uuid_d new_osd_fsid
);
391 seastar::future
<std::unique_ptr
<SeaStore
>> make_seastore(
392 const std::string
&device
,
393 const ConfigValues
&config
);