]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/NVMEDevice.h
a44b1e1ecc9e0d6e5afeaa6d93342657278c848f
[ceph.git] / ceph / src / os / bluestore / NVMEDevice.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 XSky <haomai@xsky.com>
7 *
8 * Author: Haomai Wang <haomaiwang@gmail.com>
9 *
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
14 *
15 */
16
17 #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
18 #define CEPH_OS_BLUESTORE_NVMEDEVICE
19
20 #include <queue>
21 #include <map>
22 #include <limits>
23
24 // since _Static_assert introduced in c11
25 #define _Static_assert static_assert
26
27
28 #include "include/interval_set.h"
29 #include "common/ceph_time.h"
30 #include "common/Mutex.h"
31 #include "BlockDevice.h"
32
33 enum class IOCommand {
34 READ_COMMAND,
35 WRITE_COMMAND,
36 FLUSH_COMMAND
37 };
38
39 class Task;
40 class PerfCounters;
41 class SharedDriverData;
42
43 class NVMEDevice : public BlockDevice {
44 /**
45 * points to pinned, physically contiguous memory region;
46 * contains 4KB IDENTIFY structure for controller which is
47 * target for CONTROLLER IDENTIFY command during initialization
48 */
49 SharedDriverData *driver;
50 string name;
51
52 uint64_t size;
53 uint64_t block_size;
54
55 bool aio_stop;
56
57 struct BufferedExtents {
58 struct Extent {
59 uint64_t x_len;
60 uint64_t x_off;
61 const char *data;
62 uint64_t data_len;
63 };
64 using Offset = uint64_t;
65 map<Offset, Extent> buffered_extents;
66 uint64_t left_edge = std::numeric_limits<uint64_t>::max();
67 uint64_t right_edge = 0;
68
69 void verify() {
70 interval_set<uint64_t> m;
71 for (auto && it : buffered_extents) {
72 assert(!m.intersects(it.first, it.second.x_len));
73 m.insert(it.first, it.second.x_len);
74 }
75 }
76
77 void insert(uint64_t off, uint64_t len, const char *data) {
78 auto it = buffered_extents.lower_bound(off);
79 if (it != buffered_extents.begin()) {
80 --it;
81 if (it->first + it->second.x_len <= off)
82 ++it;
83 }
84 uint64_t end = off + len;
85 if (off < left_edge)
86 left_edge = off;
87 if (end > right_edge)
88 right_edge = end;
89 while (it != buffered_extents.end()) {
90 if (it->first >= end)
91 break;
92 uint64_t extent_it_end = it->first + it->second.x_len;
93 assert(extent_it_end >= off);
94 if (it->first <= off) {
95 if (extent_it_end > end) {
96 // <- data ->
97 // <- it ->
98 it->second.x_len -= (extent_it_end - off);
99 buffered_extents[end] = Extent{
100 extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len};
101 } else {
102 // <- data ->
103 // <- it ->
104 assert(extent_it_end <= end);
105 it->second.x_len -= (extent_it_end - off);
106 }
107 ++it;
108 } else {
109 assert(it->first > off);
110 if (extent_it_end > end) {
111 // <- data ->
112 // <- it ->
113 uint64_t overlap = end - it->first;
114 buffered_extents[end] = Extent{
115 it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len};
116 } else {
117 // <- data ->
118 // <- it ->
119 }
120 buffered_extents.erase(it++);
121 }
122 }
123 buffered_extents[off] = Extent{
124 len, 0, data, len};
125
126 if (0)
127 verify();
128 }
129
130 void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off,
131 map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) {
132 if (0) {
133 assert(dst_off + copylen <= dst_raw_len);
134 assert(it->second.x_off + src_off + copylen <= it->second.data_len);
135 }
136 memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen);
137 }
138
139 uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) {
140 uint64_t end = off + len;
141 if (end <= left_edge || off >= right_edge)
142 return 0;
143
144 uint64_t copied = 0;
145 auto it = buffered_extents.lower_bound(off);
146 if (it != buffered_extents.begin()) {
147 --it;
148 if (it->first + it->second.x_len <= off)
149 ++it;
150 }
151 uint64_t copy_len;
152 while (it != buffered_extents.end()) {
153 if (it->first >= end)
154 break;
155 uint64_t extent_it_end = it->first + it->second.x_len;
156 assert(extent_it_end >= off);
157 if (it->first >= off) {
158 if (extent_it_end > end) {
159 // <- data ->
160 // <- it ->
161 copy_len = len - (it->first - off);
162 memcpy_check(buf, len, it->first - off, it, 0, copy_len);
163 } else {
164 // <- data ->
165 // <- it ->
166 copy_len = it->second.x_len;
167 memcpy_check(buf, len, it->first - off, it, 0, copy_len);
168 }
169 } else {
170 if (extent_it_end > end) {
171 // <- data ->
172 // <- it ->
173 copy_len = len;
174 memcpy_check(buf, len, 0, it, off - it->first, copy_len);
175 } else {
176 // <- data ->
177 // <- it ->
178 assert(extent_it_end <= end);
179 copy_len = it->first + it->second.x_len - off;
180 memcpy_check(buf, len, 0, it, off - it->first, copy_len);
181 }
182 }
183 copied += copy_len;
184 ++it;
185 }
186 return copied;
187 }
188
189 void clear() {
190 buffered_extents.clear();
191 left_edge = std::numeric_limits<uint64_t>::max();
192 right_edge = 0;
193 }
194 };
195 Mutex buffer_lock;
196 BufferedExtents buffered_extents;
197 Task *buffered_task_head = nullptr;
198
199 static void init();
200 public:
201 SharedDriverData *get_driver() { return driver; }
202
203 public:
204 aio_callback_t aio_callback;
205 void *aio_callback_priv;
206
207 NVMEDevice(CephContext* cct, aio_callback_t cb, void *cbpriv);
208
209 bool supported_bdev_label() override { return false; }
210
211 void aio_submit(IOContext *ioc) override;
212
213 uint64_t get_size() const override {
214 return size;
215 }
216 uint64_t get_block_size() const override {
217 return block_size;
218 }
219
220 int read(uint64_t off, uint64_t len, bufferlist *pbl,
221 IOContext *ioc,
222 bool buffered) override;
223 int aio_read(
224 uint64_t off,
225 uint64_t len,
226 bufferlist *pbl,
227 IOContext *ioc) override;
228 int aio_write(uint64_t off, bufferlist& bl,
229 IOContext *ioc,
230 bool buffered) override;
231 int write(uint64_t off, bufferlist& bl, bool buffered) override;
232 int flush() override;
233 int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override;
234
235 // for managing buffered readers/writers
236 int invalidate_cache(uint64_t off, uint64_t len) override;
237 int open(const string& path) override;
238 void close() override;
239 int collect_metadata(string prefix, map<string,string> *pm) const override;
240 };
241
242 #endif