]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 XSky <haomai@xsky.com> | |
7 | * | |
8 | * Author: Haomai Wang <haomaiwang@gmail.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | ||
17 | #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE | |
18 | #define CEPH_OS_BLUESTORE_NVMEDEVICE | |
19 | ||
20 | #include <queue> | |
21 | #include <map> | |
22 | #include <limits> | |
23 | ||
24 | // since _Static_assert introduced in c11 | |
25 | #define _Static_assert static_assert | |
26 | ||
27 | ||
7c673cae FG |
28 | #include "include/interval_set.h" |
29 | #include "common/ceph_time.h" | |
30 | #include "common/Mutex.h" | |
31 | #include "BlockDevice.h" | |
32 | ||
33 | enum class IOCommand { | |
34 | READ_COMMAND, | |
35 | WRITE_COMMAND, | |
36 | FLUSH_COMMAND | |
37 | }; | |
38 | ||
39 | class Task; | |
40 | class PerfCounters; | |
41 | class SharedDriverData; | |
42 | ||
43 | class NVMEDevice : public BlockDevice { | |
44 | /** | |
45 | * points to pinned, physically contiguous memory region; | |
46 | * contains 4KB IDENTIFY structure for controller which is | |
47 | * target for CONTROLLER IDENTIFY command during initialization | |
48 | */ | |
49 | SharedDriverData *driver; | |
50 | string name; | |
51 | ||
52 | uint64_t size; | |
53 | uint64_t block_size; | |
54 | ||
55 | bool aio_stop; | |
56 | ||
57 | struct BufferedExtents { | |
58 | struct Extent { | |
59 | uint64_t x_len; | |
60 | uint64_t x_off; | |
61 | const char *data; | |
62 | uint64_t data_len; | |
63 | }; | |
64 | using Offset = uint64_t; | |
65 | map<Offset, Extent> buffered_extents; | |
66 | uint64_t left_edge = std::numeric_limits<uint64_t>::max(); | |
67 | uint64_t right_edge = 0; | |
68 | ||
69 | void verify() { | |
70 | interval_set<uint64_t> m; | |
71 | for (auto && it : buffered_extents) { | |
72 | assert(!m.intersects(it.first, it.second.x_len)); | |
73 | m.insert(it.first, it.second.x_len); | |
74 | } | |
75 | } | |
76 | ||
77 | void insert(uint64_t off, uint64_t len, const char *data) { | |
78 | auto it = buffered_extents.lower_bound(off); | |
79 | if (it != buffered_extents.begin()) { | |
80 | --it; | |
81 | if (it->first + it->second.x_len <= off) | |
82 | ++it; | |
83 | } | |
84 | uint64_t end = off + len; | |
85 | if (off < left_edge) | |
86 | left_edge = off; | |
87 | if (end > right_edge) | |
88 | right_edge = end; | |
89 | while (it != buffered_extents.end()) { | |
90 | if (it->first >= end) | |
91 | break; | |
92 | uint64_t extent_it_end = it->first + it->second.x_len; | |
93 | assert(extent_it_end >= off); | |
94 | if (it->first <= off) { | |
95 | if (extent_it_end > end) { | |
96 | // <- data -> | |
97 | // <- it -> | |
98 | it->second.x_len -= (extent_it_end - off); | |
99 | buffered_extents[end] = Extent{ | |
100 | extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len}; | |
101 | } else { | |
102 | // <- data -> | |
103 | // <- it -> | |
104 | assert(extent_it_end <= end); | |
105 | it->second.x_len -= (extent_it_end - off); | |
106 | } | |
107 | ++it; | |
108 | } else { | |
109 | assert(it->first > off); | |
110 | if (extent_it_end > end) { | |
111 | // <- data -> | |
112 | // <- it -> | |
113 | uint64_t overlap = end - it->first; | |
114 | buffered_extents[end] = Extent{ | |
115 | it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len}; | |
116 | } else { | |
117 | // <- data -> | |
118 | // <- it -> | |
119 | } | |
120 | buffered_extents.erase(it++); | |
121 | } | |
122 | } | |
123 | buffered_extents[off] = Extent{ | |
124 | len, 0, data, len}; | |
125 | ||
126 | if (0) | |
127 | verify(); | |
128 | } | |
129 | ||
130 | void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off, | |
131 | map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) { | |
132 | if (0) { | |
133 | assert(dst_off + copylen <= dst_raw_len); | |
134 | assert(it->second.x_off + src_off + copylen <= it->second.data_len); | |
135 | } | |
136 | memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen); | |
137 | } | |
138 | ||
139 | uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) { | |
140 | uint64_t end = off + len; | |
141 | if (end <= left_edge || off >= right_edge) | |
142 | return 0; | |
143 | ||
144 | uint64_t copied = 0; | |
145 | auto it = buffered_extents.lower_bound(off); | |
146 | if (it != buffered_extents.begin()) { | |
147 | --it; | |
148 | if (it->first + it->second.x_len <= off) | |
149 | ++it; | |
150 | } | |
151 | uint64_t copy_len; | |
152 | while (it != buffered_extents.end()) { | |
153 | if (it->first >= end) | |
154 | break; | |
155 | uint64_t extent_it_end = it->first + it->second.x_len; | |
156 | assert(extent_it_end >= off); | |
157 | if (it->first >= off) { | |
158 | if (extent_it_end > end) { | |
159 | // <- data -> | |
160 | // <- it -> | |
161 | copy_len = len - (it->first - off); | |
162 | memcpy_check(buf, len, it->first - off, it, 0, copy_len); | |
163 | } else { | |
164 | // <- data -> | |
165 | // <- it -> | |
166 | copy_len = it->second.x_len; | |
167 | memcpy_check(buf, len, it->first - off, it, 0, copy_len); | |
168 | } | |
169 | } else { | |
170 | if (extent_it_end > end) { | |
171 | // <- data -> | |
172 | // <- it -> | |
173 | copy_len = len; | |
174 | memcpy_check(buf, len, 0, it, off - it->first, copy_len); | |
175 | } else { | |
176 | // <- data -> | |
177 | // <- it -> | |
178 | assert(extent_it_end <= end); | |
179 | copy_len = it->first + it->second.x_len - off; | |
180 | memcpy_check(buf, len, 0, it, off - it->first, copy_len); | |
181 | } | |
182 | } | |
183 | copied += copy_len; | |
184 | ++it; | |
185 | } | |
186 | return copied; | |
187 | } | |
188 | ||
189 | void clear() { | |
190 | buffered_extents.clear(); | |
191 | left_edge = std::numeric_limits<uint64_t>::max(); | |
192 | right_edge = 0; | |
193 | } | |
194 | }; | |
195 | Mutex buffer_lock; | |
196 | BufferedExtents buffered_extents; | |
197 | Task *buffered_task_head = nullptr; | |
198 | ||
199 | static void init(); | |
200 | public: | |
201 | SharedDriverData *get_driver() { return driver; } | |
202 | ||
203 | public: | |
204 | aio_callback_t aio_callback; | |
205 | void *aio_callback_priv; | |
206 | ||
207 | NVMEDevice(CephContext* cct, aio_callback_t cb, void *cbpriv); | |
208 | ||
209 | bool supported_bdev_label() override { return false; } | |
210 | ||
211 | void aio_submit(IOContext *ioc) override; | |
212 | ||
213 | uint64_t get_size() const override { | |
214 | return size; | |
215 | } | |
216 | uint64_t get_block_size() const override { | |
217 | return block_size; | |
218 | } | |
219 | ||
220 | int read(uint64_t off, uint64_t len, bufferlist *pbl, | |
221 | IOContext *ioc, | |
222 | bool buffered) override; | |
223 | int aio_read( | |
224 | uint64_t off, | |
225 | uint64_t len, | |
226 | bufferlist *pbl, | |
227 | IOContext *ioc) override; | |
228 | int aio_write(uint64_t off, bufferlist& bl, | |
229 | IOContext *ioc, | |
230 | bool buffered) override; | |
231 | int write(uint64_t off, bufferlist& bl, bool buffered) override; | |
232 | int flush() override; | |
233 | int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; | |
234 | ||
235 | // for managing buffered readers/writers | |
236 | int invalidate_cache(uint64_t off, uint64_t len) override; | |
237 | int open(const string& path) override; | |
238 | void close() override; | |
239 | int collect_metadata(string prefix, map<string,string> *pm) const override; | |
240 | }; | |
241 | ||
242 | #endif |