]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/NVMEDevice.h
update sources to v12.1.1
[ceph.git] / ceph / src / os / bluestore / NVMEDevice.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 XSky <haomai@xsky.com>
7 *
8 * Author: Haomai Wang <haomaiwang@gmail.com>
9 *
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
14 *
15 */
16
17 #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
18 #define CEPH_OS_BLUESTORE_NVMEDEVICE
19
20 #include <queue>
21 #include <map>
22 #include <limits>
23
24 // since _Static_assert introduced in c11
25 #define _Static_assert static_assert
26
27
28 #include "include/interval_set.h"
29 #include "common/ceph_time.h"
30 #include "common/Mutex.h"
31 #include "common/Cond.h"
32 #include "BlockDevice.h"
33
34 enum class IOCommand {
35 READ_COMMAND,
36 WRITE_COMMAND,
37 FLUSH_COMMAND
38 };
39
40 class Task;
41 class PerfCounters;
42 class SharedDriverData;
43
44 class NVMEDevice : public BlockDevice {
45 /**
46 * points to pinned, physically contiguous memory region;
47 * contains 4KB IDENTIFY structure for controller which is
48 * target for CONTROLLER IDENTIFY command during initialization
49 */
50 SharedDriverData *driver;
51 string name;
52
53 uint64_t size;
54 uint64_t block_size;
55
56 bool aio_stop;
57
58 struct BufferedExtents {
59 struct Extent {
60 uint64_t x_len;
61 uint64_t x_off;
62 const char *data;
63 uint64_t data_len;
64 };
65 using Offset = uint64_t;
66 map<Offset, Extent> buffered_extents;
67 uint64_t left_edge = std::numeric_limits<uint64_t>::max();
68 uint64_t right_edge = 0;
69
70 void verify() {
71 interval_set<uint64_t> m;
72 for (auto && it : buffered_extents) {
73 assert(!m.intersects(it.first, it.second.x_len));
74 m.insert(it.first, it.second.x_len);
75 }
76 }
77
78 void insert(uint64_t off, uint64_t len, const char *data) {
79 auto it = buffered_extents.lower_bound(off);
80 if (it != buffered_extents.begin()) {
81 --it;
82 if (it->first + it->second.x_len <= off)
83 ++it;
84 }
85 uint64_t end = off + len;
86 if (off < left_edge)
87 left_edge = off;
88 if (end > right_edge)
89 right_edge = end;
90 while (it != buffered_extents.end()) {
91 if (it->first >= end)
92 break;
93 uint64_t extent_it_end = it->first + it->second.x_len;
94 assert(extent_it_end >= off);
95 if (it->first <= off) {
96 if (extent_it_end > end) {
97 // <- data ->
98 // <- it ->
99 it->second.x_len -= (extent_it_end - off);
100 buffered_extents[end] = Extent{
101 extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len};
102 } else {
103 // <- data ->
104 // <- it ->
105 assert(extent_it_end <= end);
106 it->second.x_len -= (extent_it_end - off);
107 }
108 ++it;
109 } else {
110 assert(it->first > off);
111 if (extent_it_end > end) {
112 // <- data ->
113 // <- it ->
114 uint64_t overlap = end - it->first;
115 buffered_extents[end] = Extent{
116 it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len};
117 } else {
118 // <- data ->
119 // <- it ->
120 }
121 buffered_extents.erase(it++);
122 }
123 }
124 buffered_extents[off] = Extent{
125 len, 0, data, len};
126
127 if (0)
128 verify();
129 }
130
131 void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off,
132 map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) {
133 if (0) {
134 assert(dst_off + copylen <= dst_raw_len);
135 assert(it->second.x_off + src_off + copylen <= it->second.data_len);
136 }
137 memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen);
138 }
139
140 uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) {
141 uint64_t end = off + len;
142 if (end <= left_edge || off >= right_edge)
143 return 0;
144
145 uint64_t copied = 0;
146 auto it = buffered_extents.lower_bound(off);
147 if (it != buffered_extents.begin()) {
148 --it;
149 if (it->first + it->second.x_len <= off)
150 ++it;
151 }
152 uint64_t copy_len;
153 while (it != buffered_extents.end()) {
154 if (it->first >= end)
155 break;
156 uint64_t extent_it_end = it->first + it->second.x_len;
157 assert(extent_it_end >= off);
158 if (it->first >= off) {
159 if (extent_it_end > end) {
160 // <- data ->
161 // <- it ->
162 copy_len = len - (it->first - off);
163 memcpy_check(buf, len, it->first - off, it, 0, copy_len);
164 } else {
165 // <- data ->
166 // <- it ->
167 copy_len = it->second.x_len;
168 memcpy_check(buf, len, it->first - off, it, 0, copy_len);
169 }
170 } else {
171 if (extent_it_end > end) {
172 // <- data ->
173 // <- it ->
174 copy_len = len;
175 memcpy_check(buf, len, 0, it, off - it->first, copy_len);
176 } else {
177 // <- data ->
178 // <- it ->
179 assert(extent_it_end <= end);
180 copy_len = it->first + it->second.x_len - off;
181 memcpy_check(buf, len, 0, it, off - it->first, copy_len);
182 }
183 }
184 copied += copy_len;
185 ++it;
186 }
187 return copied;
188 }
189
190 void clear() {
191 buffered_extents.clear();
192 left_edge = std::numeric_limits<uint64_t>::max();
193 right_edge = 0;
194 }
195 };
196 Mutex buffer_lock;
197 BufferedExtents buffered_extents;
198 Task *buffered_task_head = nullptr;
199
200 static void init();
201 public:
202 SharedDriverData *get_driver() { return driver; }
203
204 public:
205 aio_callback_t aio_callback;
206 void *aio_callback_priv;
207
208 NVMEDevice(CephContext* cct, aio_callback_t cb, void *cbpriv);
209
210 bool supported_bdev_label() override { return false; }
211
212 void aio_submit(IOContext *ioc) override;
213
214 uint64_t get_size() const override {
215 return size;
216 }
217 uint64_t get_block_size() const override {
218 return block_size;
219 }
220
221 int read(uint64_t off, uint64_t len, bufferlist *pbl,
222 IOContext *ioc,
223 bool buffered) override;
224 int aio_read(
225 uint64_t off,
226 uint64_t len,
227 bufferlist *pbl,
228 IOContext *ioc) override;
229 int aio_write(uint64_t off, bufferlist& bl,
230 IOContext *ioc,
231 bool buffered) override;
232 int write(uint64_t off, bufferlist& bl, bool buffered) override;
233 int flush() override;
234 int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override;
235
236 // for managing buffered readers/writers
237 int invalidate_cache(uint64_t off, uint64_t len) override;
238 int open(const string& path) override;
239 void close() override;
240 int collect_metadata(string prefix, map<string,string> *pm) const override;
241 };
242
243 #endif