1 //-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include "crimson/os/seastore/seastore_types.h"
7 #include "crimson/os/seastore/random_block_manager.h"
8 #include "crimson/os/seastore/device.h"
16 namespace crimson::os::seastore::random_block_device
{
18 // from blk/BlockDevice.h
19 #if defined(__linux__)
20 #if !defined(F_SET_FILE_RW_HINT)
21 #define F_LINUX_SPECIFIC_BASE 1024
22 #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
24 // These values match Linux definition
25 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
26 #define WRITE_LIFE_NOT_SET 0 // No hint information set
27 #define WRITE_LIFE_NONE 1 // No hints about write life time
28 #define WRITE_LIFE_SHORT 2 // Data written has a short life time
29 #define WRITE_LIFE_MEDIUM 3 // Data written has a medium life time
30 #define WRITE_LIFE_LONG 4 // Data written has a long life time
31 #define WRITE_LIFE_EXTREME 5 // Data written has an extremely long life time
32 #define WRITE_LIFE_MAX 6
34 // On systems don't have WRITE_LIFE_* only use one FD
35 // And all files are created equal
36 #define WRITE_LIFE_NOT_SET 0 // No hint information set
37 #define WRITE_LIFE_NONE 0 // No hints about write life time
38 #define WRITE_LIFE_SHORT 0 // Data written has a short life time
39 #define WRITE_LIFE_MEDIUM 0 // Data written has a medium life time
40 #define WRITE_LIFE_LONG 0 // Data written has a long life time
41 #define WRITE_LIFE_EXTREME 0 // Data written has an extremely long life time
42 #define WRITE_LIFE_MAX 1
45 using read_ertr
= crimson::errorator
<
46 crimson::ct_error::input_output_error
,
47 crimson::ct_error::invarg
,
48 crimson::ct_error::enoent
,
49 crimson::ct_error::erange
>;
51 using write_ertr
= crimson::errorator
<
52 crimson::ct_error::input_output_error
,
53 crimson::ct_error::invarg
,
54 crimson::ct_error::ebadf
,
55 crimson::ct_error::enospc
>;
57 using open_ertr
= crimson::errorator
<
58 crimson::ct_error::input_output_error
,
59 crimson::ct_error::invarg
,
60 crimson::ct_error::enoent
>;
62 using nvme_command_ertr
= crimson::errorator
<
63 crimson::ct_error::input_output_error
>;
65 using discard_ertr
= crimson::errorator
<
66 crimson::ct_error::input_output_error
>;
68 constexpr uint32_t RBM_SUPERBLOCK_SIZE
= 4096;
70 // TODO: This allows the device to manage crc on a block by itself
71 RBM_NVME_END_TO_END_PROTECTION
= 1,
72 RBM_BITMAP_BLOCK_CRC
= 2,
75 class RBMDevice
: public Device
{
78 read_ertr::future
<> read (
81 ceph::bufferptr
&out
) final
{
82 uint64_t rbm_addr
= convert_paddr_to_abs_addr(addr
);
83 return read(rbm_addr
, out
);
86 rbm_metadata_header_t super
;
89 virtual ~RBMDevice() = default;
92 static std::unique_ptr
<T
> create() {
93 return std::make_unique
<T
>();
96 device_id_t
get_device_id() const {
97 return super
.config
.spec
.id
;
100 magic_t
get_magic() const final
{
101 return super
.config
.spec
.magic
;
104 device_type_t
get_device_type() const final
{
105 return device_type_t::RANDOM_BLOCK_SSD
;
108 backend_type_t
get_backend_type() const final
{
109 return backend_type_t::RANDOM_BLOCK
;
112 const seastore_meta_t
&get_meta() const final
{
113 return super
.config
.meta
;
116 secondary_device_set_t
& get_secondary_devices() final
{
117 return super
.config
.secondary_devices
;
119 std::size_t get_available_size() const { return super
.size
; }
120 extent_len_t
get_block_size() const { return super
.block_size
; }
122 virtual read_ertr::future
<> read(
124 bufferptr
&bptr
) = 0;
129 * Give hint to device about classification of data whose life time is similar
130 * with each other. Data with same stream value will be managed together in
131 * SSD for better write performance.
133 virtual write_ertr::future
<> write(
136 uint16_t stream
= 0) = 0;
138 virtual discard_ertr::future
<> discard(
140 uint64_t len
) { return seastar::now(); }
142 virtual open_ertr::future
<> open(
143 const std::string
& path
,
144 seastar::open_flags mode
) = 0;
146 virtual write_ertr::future
<> writev(
149 uint16_t stream
= 0) = 0;
151 bool is_data_protection_enabled() const { return false; }
153 mkfs_ret
do_mkfs(device_config_t
);
155 mount_ret
do_mount();
157 write_ertr::future
<> write_rbm_header();
159 read_ertr::future
<rbm_metadata_header_t
> read_rbm_header(rbm_abs_addr addr
);
161 using stat_device_ret
=
162 read_ertr::future
<seastar::stat_data
>;
163 virtual stat_device_ret
stat_device() = 0;
165 virtual std::string
get_device_path() const = 0;
167 uint64_t get_journal_size() const {
168 return super
.journal_size
;
171 static rbm_abs_addr
get_journal_start() {
172 return RBM_SUPERBLOCK_SIZE
;
175 using RBMDeviceRef
= std::unique_ptr
<RBMDevice
>;
177 constexpr uint64_t DEFAULT_TEST_CBJOURNAL_SIZE
= 1 << 26;
179 class EphemeralRBMDevice
: public RBMDevice
{
182 uint64_t block_size
= 0;
183 constexpr static uint32_t TEST_BLOCK_SIZE
= 4096;
185 EphemeralRBMDevice(size_t size
, uint64_t block_size
) :
186 size(size
), block_size(block_size
), buf(nullptr) {
188 ~EphemeralRBMDevice() {
195 std::size_t get_available_size() const final
{ return size
; }
196 extent_len_t
get_block_size() const final
{ return block_size
; }
198 mount_ret
mount() final
{
202 mkfs_ret
mkfs(device_config_t config
) final
{
203 super
.journal_size
= DEFAULT_TEST_CBJOURNAL_SIZE
;
204 return do_mkfs(config
);
207 open_ertr::future
<> open(
208 const std::string
&in_path
,
209 seastar::open_flags mode
) override
;
211 write_ertr::future
<> write(
214 uint16_t stream
= 0) override
;
216 using RBMDevice::read
;
217 read_ertr::future
<> read(
219 bufferptr
&bptr
) override
;
221 close_ertr::future
<> close() override
;
223 write_ertr::future
<> writev(
226 uint16_t stream
= 0) final
;
228 stat_device_ret
stat_device() final
{
229 seastar::stat_data stat
;
230 stat
.block_size
= block_size
;
232 return stat_device_ret(
233 read_ertr::ready_future_marker
{},
238 std::string
get_device_path() const final
{
244 using EphemeralRBMDeviceRef
= std::unique_ptr
<EphemeralRBMDevice
>;
245 EphemeralRBMDeviceRef
create_test_ephemeral(
246 uint64_t journal_size
= DEFAULT_TEST_CBJOURNAL_SIZE
,
247 uint64_t data_size
= DEFAULT_TEST_CBJOURNAL_SIZE
);