]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/random_block_manager/rbm_device.h
7f30b197f3a5362f12eefd6d3f0536f4dd09d36e
[ceph.git] / ceph / src / crimson / os / seastore / random_block_manager / rbm_device.h
1 //-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include "crimson/os/seastore/seastore_types.h"
7 #include "crimson/os/seastore/random_block_manager.h"
8 #include "crimson/os/seastore/device.h"
9
10 namespace ceph {
11 namespace buffer {
12 class bufferptr;
13 }
14 }
15
16 namespace crimson::os::seastore::random_block_device {
17
18 // from blk/BlockDevice.h
19 #if defined(__linux__)
20 #if !defined(F_SET_FILE_RW_HINT)
21 #define F_LINUX_SPECIFIC_BASE 1024
22 #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
23 #endif
24 // These values match Linux definition
25 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
26 #define WRITE_LIFE_NOT_SET 0 // No hint information set
27 #define WRITE_LIFE_NONE 1 // No hints about write life time
28 #define WRITE_LIFE_SHORT 2 // Data written has a short life time
29 #define WRITE_LIFE_MEDIUM 3 // Data written has a medium life time
30 #define WRITE_LIFE_LONG 4 // Data written has a long life time
31 #define WRITE_LIFE_EXTREME 5 // Data written has an extremely long life time
32 #define WRITE_LIFE_MAX 6
33 #else
34 // On systems don't have WRITE_LIFE_* only use one FD
35 // And all files are created equal
36 #define WRITE_LIFE_NOT_SET 0 // No hint information set
37 #define WRITE_LIFE_NONE 0 // No hints about write life time
38 #define WRITE_LIFE_SHORT 0 // Data written has a short life time
39 #define WRITE_LIFE_MEDIUM 0 // Data written has a medium life time
40 #define WRITE_LIFE_LONG 0 // Data written has a long life time
41 #define WRITE_LIFE_EXTREME 0 // Data written has an extremely long life time
42 #define WRITE_LIFE_MAX 1
43 #endif
44
45 using read_ertr = crimson::errorator<
46 crimson::ct_error::input_output_error,
47 crimson::ct_error::invarg,
48 crimson::ct_error::enoent,
49 crimson::ct_error::erange>;
50
51 using write_ertr = crimson::errorator<
52 crimson::ct_error::input_output_error,
53 crimson::ct_error::invarg,
54 crimson::ct_error::ebadf,
55 crimson::ct_error::enospc>;
56
57 using open_ertr = crimson::errorator<
58 crimson::ct_error::input_output_error,
59 crimson::ct_error::invarg,
60 crimson::ct_error::enoent>;
61
62 using nvme_command_ertr = crimson::errorator<
63 crimson::ct_error::input_output_error>;
64
65 using discard_ertr = crimson::errorator<
66 crimson::ct_error::input_output_error>;
67
68 constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
69 enum {
70 // TODO: This allows the device to manage crc on a block by itself
71 RBM_NVME_END_TO_END_PROTECTION = 1,
72 RBM_BITMAP_BLOCK_CRC = 2,
73 };
74
75 class RBMDevice : public Device {
76 public:
77 using Device::read;
78 read_ertr::future<> read (
79 paddr_t addr,
80 size_t len,
81 ceph::bufferptr &out) final {
82 uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
83 return read(rbm_addr, out);
84 }
85 protected:
86 rbm_metadata_header_t super;
87 public:
88 RBMDevice() {}
89 virtual ~RBMDevice() = default;
90
91 template <typename T>
92 static std::unique_ptr<T> create() {
93 return std::make_unique<T>();
94 }
95
96 device_id_t get_device_id() const {
97 return super.config.spec.id;
98 }
99
100 magic_t get_magic() const final {
101 return super.config.spec.magic;
102 }
103
104 device_type_t get_device_type() const final {
105 return device_type_t::RANDOM_BLOCK_SSD;
106 }
107
108 backend_type_t get_backend_type() const final {
109 return backend_type_t::RANDOM_BLOCK;
110 }
111
112 const seastore_meta_t &get_meta() const final {
113 return super.config.meta;
114 }
115
116 secondary_device_set_t& get_secondary_devices() final {
117 return super.config.secondary_devices;
118 }
119 std::size_t get_available_size() const { return super.size; }
120 extent_len_t get_block_size() const { return super.block_size; }
121
122 virtual read_ertr::future<> read(
123 uint64_t offset,
124 bufferptr &bptr) = 0;
125
126 /*
127 * Multi-stream write
128 *
129 * Give hint to device about classification of data whose life time is similar
130 * with each other. Data with same stream value will be managed together in
131 * SSD for better write performance.
132 */
133 virtual write_ertr::future<> write(
134 uint64_t offset,
135 bufferptr &&bptr,
136 uint16_t stream = 0) = 0;
137
138 virtual discard_ertr::future<> discard(
139 uint64_t offset,
140 uint64_t len) { return seastar::now(); }
141
142 virtual open_ertr::future<> open(
143 const std::string& path,
144 seastar::open_flags mode) = 0;
145
146 virtual write_ertr::future<> writev(
147 uint64_t offset,
148 ceph::bufferlist bl,
149 uint16_t stream = 0) = 0;
150
151 bool is_data_protection_enabled() const { return false; }
152
153 mkfs_ret do_mkfs(device_config_t);
154
155 mount_ret do_mount();
156
157 write_ertr::future<> write_rbm_header();
158
159 read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
160
161 using stat_device_ret =
162 read_ertr::future<seastar::stat_data>;
163 virtual stat_device_ret stat_device() = 0;
164
165 virtual std::string get_device_path() const = 0;
166
167 uint64_t get_journal_size() const {
168 return super.journal_size;
169 }
170
171 static rbm_abs_addr get_journal_start() {
172 return RBM_SUPERBLOCK_SIZE;
173 }
174 };
175 using RBMDeviceRef = std::unique_ptr<RBMDevice>;
176
177 constexpr uint64_t DEFAULT_TEST_CBJOURNAL_SIZE = 1 << 26;
178
179 class EphemeralRBMDevice : public RBMDevice {
180 public:
181 uint64_t size = 0;
182 uint64_t block_size = 0;
183 constexpr static uint32_t TEST_BLOCK_SIZE = 4096;
184
185 EphemeralRBMDevice(size_t size, uint64_t block_size) :
186 size(size), block_size(block_size), buf(nullptr) {
187 }
188 ~EphemeralRBMDevice() {
189 if (buf) {
190 ::munmap(buf, size);
191 buf = nullptr;
192 }
193 }
194
195 std::size_t get_available_size() const final { return size; }
196 extent_len_t get_block_size() const final { return block_size; }
197
198 mount_ret mount() final {
199 return do_mount();
200 }
201
202 mkfs_ret mkfs(device_config_t config) final {
203 super.journal_size = DEFAULT_TEST_CBJOURNAL_SIZE;
204 return do_mkfs(config);
205 }
206
207 open_ertr::future<> open(
208 const std::string &in_path,
209 seastar::open_flags mode) override;
210
211 write_ertr::future<> write(
212 uint64_t offset,
213 bufferptr &&bptr,
214 uint16_t stream = 0) override;
215
216 using RBMDevice::read;
217 read_ertr::future<> read(
218 uint64_t offset,
219 bufferptr &bptr) override;
220
221 close_ertr::future<> close() override;
222
223 write_ertr::future<> writev(
224 uint64_t offset,
225 ceph::bufferlist bl,
226 uint16_t stream = 0) final;
227
228 stat_device_ret stat_device() final {
229 seastar::stat_data stat;
230 stat.block_size = block_size;
231 stat.size = size;
232 return stat_device_ret(
233 read_ertr::ready_future_marker{},
234 stat
235 );
236 }
237
238 std::string get_device_path() const final {
239 return "";
240 }
241
242 char *buf;
243 };
244 using EphemeralRBMDeviceRef = std::unique_ptr<EphemeralRBMDevice>;
245 EphemeralRBMDeviceRef create_test_ephemeral(
246 uint64_t journal_size = DEFAULT_TEST_CBJOURNAL_SIZE,
247 uint64_t data_size = DEFAULT_TEST_CBJOURNAL_SIZE);
248
249 }