]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/random_block_manager/rbm_device.h
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / crimson / os / seastore / random_block_manager / rbm_device.h
1 //-*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include "crimson/os/seastore/seastore_types.h"
7 #include "crimson/os/seastore/random_block_manager.h"
8 #include "crimson/os/seastore/device.h"
9
10 namespace ceph {
11 namespace buffer {
12 class bufferptr;
13 }
14 }
15
16 namespace crimson::os::seastore::random_block_device {
17
18 // from blk/BlockDevice.h
19 #if defined(__linux__)
20 #if !defined(F_SET_FILE_RW_HINT)
21 #define F_LINUX_SPECIFIC_BASE 1024
22 #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
23 #endif
24 // These values match Linux definition
25 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/fcntl.h#n56
26 #define WRITE_LIFE_NOT_SET 0 // No hint information set
27 #define WRITE_LIFE_NONE 1 // No hints about write life time
28 #define WRITE_LIFE_SHORT 2 // Data written has a short life time
29 #define WRITE_LIFE_MEDIUM 3 // Data written has a medium life time
30 #define WRITE_LIFE_LONG 4 // Data written has a long life time
31 #define WRITE_LIFE_EXTREME 5 // Data written has an extremely long life time
32 #define WRITE_LIFE_MAX 6
33 #else
34 // On systems don't have WRITE_LIFE_* only use one FD
35 // And all files are created equal
36 #define WRITE_LIFE_NOT_SET 0 // No hint information set
37 #define WRITE_LIFE_NONE 0 // No hints about write life time
38 #define WRITE_LIFE_SHORT 0 // Data written has a short life time
39 #define WRITE_LIFE_MEDIUM 0 // Data written has a medium life time
40 #define WRITE_LIFE_LONG 0 // Data written has a long life time
41 #define WRITE_LIFE_EXTREME 0 // Data written has an extremely long life time
42 #define WRITE_LIFE_MAX 1
43 #endif
44
45 using read_ertr = crimson::errorator<
46 crimson::ct_error::input_output_error,
47 crimson::ct_error::invarg,
48 crimson::ct_error::enoent,
49 crimson::ct_error::erange>;
50
51 using write_ertr = crimson::errorator<
52 crimson::ct_error::input_output_error,
53 crimson::ct_error::invarg,
54 crimson::ct_error::ebadf,
55 crimson::ct_error::enospc>;
56
57 using open_ertr = crimson::errorator<
58 crimson::ct_error::input_output_error,
59 crimson::ct_error::invarg,
60 crimson::ct_error::enoent>;
61
62 using nvme_command_ertr = crimson::errorator<
63 crimson::ct_error::input_output_error>;
64
65 using discard_ertr = crimson::errorator<
66 crimson::ct_error::input_output_error>;
67
68 constexpr uint32_t RBM_SUPERBLOCK_SIZE = 4096;
69 enum {
70 // TODO: This allows the device to manage crc on a block by itself
71 RBM_NVME_END_TO_END_PROTECTION = 1,
72 RBM_BITMAP_BLOCK_CRC = 2,
73 };
74
75 class RBMDevice : public Device {
76 public:
77 using Device::read;
78 read_ertr::future<> read (
79 paddr_t addr,
80 size_t len,
81 ceph::bufferptr &out) final {
82 uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
83 return read(rbm_addr, out);
84 }
85 protected:
86 rbm_metadata_header_t super;
87 rbm_shard_info_t shard_info;
88 public:
89 RBMDevice() {}
90 virtual ~RBMDevice() = default;
91
92 template <typename T>
93 static std::unique_ptr<T> create() {
94 return std::make_unique<T>();
95 }
96
97 device_id_t get_device_id() const {
98 return super.config.spec.id;
99 }
100
101 magic_t get_magic() const final {
102 return super.config.spec.magic;
103 }
104
105 device_type_t get_device_type() const final {
106 return device_type_t::RANDOM_BLOCK_SSD;
107 }
108
109 backend_type_t get_backend_type() const final {
110 return backend_type_t::RANDOM_BLOCK;
111 }
112
113 const seastore_meta_t &get_meta() const final {
114 return super.config.meta;
115 }
116
117 secondary_device_set_t& get_secondary_devices() final {
118 return super.config.secondary_devices;
119 }
120 std::size_t get_available_size() const { return super.size; }
121 extent_len_t get_block_size() const { return super.block_size; }
122
123 virtual read_ertr::future<> read(
124 uint64_t offset,
125 bufferptr &bptr) = 0;
126
127 /*
128 * Multi-stream write
129 *
130 * Give hint to device about classification of data whose life time is similar
131 * with each other. Data with same stream value will be managed together in
132 * SSD for better write performance.
133 */
134 virtual write_ertr::future<> write(
135 uint64_t offset,
136 bufferptr &&bptr,
137 uint16_t stream = 0) = 0;
138
139 virtual discard_ertr::future<> discard(
140 uint64_t offset,
141 uint64_t len) { return seastar::now(); }
142
143 virtual open_ertr::future<> open(
144 const std::string& path,
145 seastar::open_flags mode) = 0;
146
147 virtual write_ertr::future<> writev(
148 uint64_t offset,
149 ceph::bufferlist bl,
150 uint16_t stream = 0) = 0;
151
152 bool is_data_protection_enabled() const { return false; }
153
154 mkfs_ret do_mkfs(device_config_t);
155
156 // shard 0 mkfs
157 mkfs_ret do_primary_mkfs(device_config_t, int shard_num, size_t journal_size);
158
159 mount_ret do_mount();
160
161 mount_ret do_shard_mount();
162
163 write_ertr::future<> write_rbm_header();
164
165 read_ertr::future<rbm_metadata_header_t> read_rbm_header(rbm_abs_addr addr);
166
167 using stat_device_ret =
168 read_ertr::future<seastar::stat_data>;
169 virtual stat_device_ret stat_device() = 0;
170
171 virtual std::string get_device_path() const = 0;
172
173 uint64_t get_journal_size() const {
174 return super.journal_size;
175 }
176
177 static rbm_abs_addr get_shard_reserved_size() {
178 return RBM_SUPERBLOCK_SIZE;
179 }
180
181 rbm_abs_addr get_shard_journal_start() {
182 return shard_info.start_offset + get_shard_reserved_size();
183 }
184
185 uint64_t get_shard_start() const {
186 return shard_info.start_offset;
187 }
188
189 uint64_t get_shard_end() const {
190 return shard_info.start_offset + shard_info.size;
191 }
192 };
193 using RBMDeviceRef = std::unique_ptr<RBMDevice>;
194
195 constexpr uint64_t DEFAULT_TEST_CBJOURNAL_SIZE = 1 << 26;
196
197 class EphemeralRBMDevice : public RBMDevice {
198 public:
199 uint64_t size = 0;
200 uint64_t block_size = 0;
201 constexpr static uint32_t TEST_BLOCK_SIZE = 4096;
202
203 EphemeralRBMDevice(size_t size, uint64_t block_size) :
204 size(size), block_size(block_size), buf(nullptr) {
205 }
206 ~EphemeralRBMDevice() {
207 if (buf) {
208 ::munmap(buf, size);
209 buf = nullptr;
210 }
211 }
212
213 std::size_t get_available_size() const final { return size; }
214 extent_len_t get_block_size() const final { return block_size; }
215
216 mount_ret mount() final;
217 mkfs_ret mkfs(device_config_t config) final;
218
219 open_ertr::future<> open(
220 const std::string &in_path,
221 seastar::open_flags mode) override;
222
223 write_ertr::future<> write(
224 uint64_t offset,
225 bufferptr &&bptr,
226 uint16_t stream = 0) override;
227
228 using RBMDevice::read;
229 read_ertr::future<> read(
230 uint64_t offset,
231 bufferptr &bptr) override;
232
233 close_ertr::future<> close() override;
234
235 write_ertr::future<> writev(
236 uint64_t offset,
237 ceph::bufferlist bl,
238 uint16_t stream = 0) final;
239
240 stat_device_ret stat_device() final {
241 seastar::stat_data stat;
242 stat.block_size = block_size;
243 stat.size = size;
244 return stat_device_ret(
245 read_ertr::ready_future_marker{},
246 stat
247 );
248 }
249
250 std::string get_device_path() const final {
251 return "";
252 }
253
254 char *buf;
255 };
256 using EphemeralRBMDeviceRef = std::unique_ptr<EphemeralRBMDevice>;
257 EphemeralRBMDeviceRef create_test_ephemeral(
258 uint64_t journal_size = DEFAULT_TEST_CBJOURNAL_SIZE,
259 uint64_t data_size = DEFAULT_TEST_CBJOURNAL_SIZE);
260
261 }