1 //-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include "crimson/os/seastore/seastore_types.h"
7 #include "crimson/os/seastore/random_block_manager.h"
8 #include "crimson/os/seastore/device.h"
16 namespace crimson::os::seastore::random_block_device
{
18 // from blk/BlockDevice.h
19 #if defined(__linux__)
20 #if !defined(F_SET_FILE_RW_HINT)
21 #define F_LINUX_SPECIFIC_BASE 1024
22 #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
24 // These values match Linux definition
25 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
26 #define WRITE_LIFE_NOT_SET 0 // No hint information set
27 #define WRITE_LIFE_NONE 1 // No hints about write life time
28 #define WRITE_LIFE_SHORT 2 // Data written has a short life time
29 #define WRITE_LIFE_MEDIUM 3 // Data written has a medium life time
30 #define WRITE_LIFE_LONG 4 // Data written has a long life time
31 #define WRITE_LIFE_EXTREME 5 // Data written has an extremely long life time
32 #define WRITE_LIFE_MAX 6
34 // On systems don't have WRITE_LIFE_* only use one FD
35 // And all files are created equal
36 #define WRITE_LIFE_NOT_SET 0 // No hint information set
37 #define WRITE_LIFE_NONE 0 // No hints about write life time
38 #define WRITE_LIFE_SHORT 0 // Data written has a short life time
39 #define WRITE_LIFE_MEDIUM 0 // Data written has a medium life time
40 #define WRITE_LIFE_LONG 0 // Data written has a long life time
41 #define WRITE_LIFE_EXTREME 0 // Data written has an extremely long life time
42 #define WRITE_LIFE_MAX 1
45 using read_ertr
= crimson::errorator
<
46 crimson::ct_error::input_output_error
,
47 crimson::ct_error::invarg
,
48 crimson::ct_error::enoent
,
49 crimson::ct_error::erange
>;
51 using write_ertr
= crimson::errorator
<
52 crimson::ct_error::input_output_error
,
53 crimson::ct_error::invarg
,
54 crimson::ct_error::ebadf
,
55 crimson::ct_error::enospc
>;
57 using open_ertr
= crimson::errorator
<
58 crimson::ct_error::input_output_error
,
59 crimson::ct_error::invarg
,
60 crimson::ct_error::enoent
>;
62 using nvme_command_ertr
= crimson::errorator
<
63 crimson::ct_error::input_output_error
>;
65 using discard_ertr
= crimson::errorator
<
66 crimson::ct_error::input_output_error
>;
68 constexpr uint32_t RBM_SUPERBLOCK_SIZE
= 4096;
70 // TODO: This allows the device to manage crc on a block by itself
71 RBM_NVME_END_TO_END_PROTECTION
= 1,
72 RBM_BITMAP_BLOCK_CRC
= 2,
75 class RBMDevice
: public Device
{
78 read_ertr::future
<> read (
81 ceph::bufferptr
&out
) final
{
82 uint64_t rbm_addr
= convert_paddr_to_abs_addr(addr
);
83 return read(rbm_addr
, out
);
86 rbm_metadata_header_t super
;
87 rbm_shard_info_t shard_info
;
90 virtual ~RBMDevice() = default;
93 static std::unique_ptr
<T
> create() {
94 return std::make_unique
<T
>();
97 device_id_t
get_device_id() const {
98 return super
.config
.spec
.id
;
101 magic_t
get_magic() const final
{
102 return super
.config
.spec
.magic
;
105 device_type_t
get_device_type() const final
{
106 return device_type_t::RANDOM_BLOCK_SSD
;
109 backend_type_t
get_backend_type() const final
{
110 return backend_type_t::RANDOM_BLOCK
;
113 const seastore_meta_t
&get_meta() const final
{
114 return super
.config
.meta
;
117 secondary_device_set_t
& get_secondary_devices() final
{
118 return super
.config
.secondary_devices
;
120 std::size_t get_available_size() const { return super
.size
; }
121 extent_len_t
get_block_size() const { return super
.block_size
; }
123 virtual read_ertr::future
<> read(
125 bufferptr
&bptr
) = 0;
130 * Give hint to device about classification of data whose life time is similar
131 * with each other. Data with same stream value will be managed together in
132 * SSD for better write performance.
134 virtual write_ertr::future
<> write(
137 uint16_t stream
= 0) = 0;
139 virtual discard_ertr::future
<> discard(
141 uint64_t len
) { return seastar::now(); }
143 virtual open_ertr::future
<> open(
144 const std::string
& path
,
145 seastar::open_flags mode
) = 0;
147 virtual write_ertr::future
<> writev(
150 uint16_t stream
= 0) = 0;
152 bool is_data_protection_enabled() const { return false; }
154 mkfs_ret
do_mkfs(device_config_t
);
157 mkfs_ret
do_primary_mkfs(device_config_t
, int shard_num
, size_t journal_size
);
159 mount_ret
do_mount();
161 mount_ret
do_shard_mount();
163 write_ertr::future
<> write_rbm_header();
165 read_ertr::future
<rbm_metadata_header_t
> read_rbm_header(rbm_abs_addr addr
);
167 using stat_device_ret
=
168 read_ertr::future
<seastar::stat_data
>;
169 virtual stat_device_ret
stat_device() = 0;
171 virtual std::string
get_device_path() const = 0;
173 uint64_t get_journal_size() const {
174 return super
.journal_size
;
177 static rbm_abs_addr
get_shard_reserved_size() {
178 return RBM_SUPERBLOCK_SIZE
;
181 rbm_abs_addr
get_shard_journal_start() {
182 return shard_info
.start_offset
+ get_shard_reserved_size();
185 uint64_t get_shard_start() const {
186 return shard_info
.start_offset
;
189 uint64_t get_shard_end() const {
190 return shard_info
.start_offset
+ shard_info
.size
;
193 using RBMDeviceRef
= std::unique_ptr
<RBMDevice
>;
195 constexpr uint64_t DEFAULT_TEST_CBJOURNAL_SIZE
= 1 << 26;
197 class EphemeralRBMDevice
: public RBMDevice
{
200 uint64_t block_size
= 0;
201 constexpr static uint32_t TEST_BLOCK_SIZE
= 4096;
203 EphemeralRBMDevice(size_t size
, uint64_t block_size
) :
204 size(size
), block_size(block_size
), buf(nullptr) {
206 ~EphemeralRBMDevice() {
213 std::size_t get_available_size() const final
{ return size
; }
214 extent_len_t
get_block_size() const final
{ return block_size
; }
216 mount_ret
mount() final
;
217 mkfs_ret
mkfs(device_config_t config
) final
;
219 open_ertr::future
<> open(
220 const std::string
&in_path
,
221 seastar::open_flags mode
) override
;
223 write_ertr::future
<> write(
226 uint16_t stream
= 0) override
;
228 using RBMDevice::read
;
229 read_ertr::future
<> read(
231 bufferptr
&bptr
) override
;
233 close_ertr::future
<> close() override
;
235 write_ertr::future
<> writev(
238 uint16_t stream
= 0) final
;
240 stat_device_ret
stat_device() final
{
241 seastar::stat_data stat
;
242 stat
.block_size
= block_size
;
244 return stat_device_ret(
245 read_ertr::ready_future_marker
{},
250 std::string
get_device_path() const final
{
256 using EphemeralRBMDeviceRef
= std::unique_ptr
<EphemeralRBMDevice
>;
257 EphemeralRBMDeviceRef
create_test_ephemeral(
258 uint64_t journal_size
= DEFAULT_TEST_CBJOURNAL_SIZE
,
259 uint64_t data_size
= DEFAULT_TEST_CBJOURNAL_SIZE
);