]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 XSky <haomai@xsky.com> | |
7 | * | |
8 | * Author: Haomai Wang <haomaiwang@gmail.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | ||
17 | #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE | |
18 | #define CEPH_OS_BLUESTORE_NVMEDEVICE | |
19 | ||
20 | #include <queue> | |
21 | #include <map> | |
22 | #include <limits> | |
23 | ||
24 | // since _Static_assert introduced in c11 | |
25 | #define _Static_assert static_assert | |
26 | ||
27 | ||
7c673cae FG |
28 | #include "include/interval_set.h" |
29 | #include "common/ceph_time.h" | |
30 | #include "common/Mutex.h" | |
224ce89b | 31 | #include "common/Cond.h" |
7c673cae FG |
32 | #include "BlockDevice.h" |
33 | ||
34 | enum class IOCommand { | |
35 | READ_COMMAND, | |
36 | WRITE_COMMAND, | |
37 | FLUSH_COMMAND | |
38 | }; | |
39 | ||
40 | class Task; | |
41 | class PerfCounters; | |
42 | class SharedDriverData; | |
43 | ||
44 | class NVMEDevice : public BlockDevice { | |
45 | /** | |
46 | * points to pinned, physically contiguous memory region; | |
47 | * contains 4KB IDENTIFY structure for controller which is | |
48 | * target for CONTROLLER IDENTIFY command during initialization | |
49 | */ | |
50 | SharedDriverData *driver; | |
51 | string name; | |
52 | ||
53 | uint64_t size; | |
54 | uint64_t block_size; | |
55 | ||
56 | bool aio_stop; | |
57 | ||
58 | struct BufferedExtents { | |
59 | struct Extent { | |
60 | uint64_t x_len; | |
61 | uint64_t x_off; | |
62 | const char *data; | |
63 | uint64_t data_len; | |
64 | }; | |
65 | using Offset = uint64_t; | |
66 | map<Offset, Extent> buffered_extents; | |
67 | uint64_t left_edge = std::numeric_limits<uint64_t>::max(); | |
68 | uint64_t right_edge = 0; | |
69 | ||
70 | void verify() { | |
71 | interval_set<uint64_t> m; | |
72 | for (auto && it : buffered_extents) { | |
73 | assert(!m.intersects(it.first, it.second.x_len)); | |
74 | m.insert(it.first, it.second.x_len); | |
75 | } | |
76 | } | |
77 | ||
78 | void insert(uint64_t off, uint64_t len, const char *data) { | |
79 | auto it = buffered_extents.lower_bound(off); | |
80 | if (it != buffered_extents.begin()) { | |
81 | --it; | |
82 | if (it->first + it->second.x_len <= off) | |
83 | ++it; | |
84 | } | |
85 | uint64_t end = off + len; | |
86 | if (off < left_edge) | |
87 | left_edge = off; | |
88 | if (end > right_edge) | |
89 | right_edge = end; | |
90 | while (it != buffered_extents.end()) { | |
91 | if (it->first >= end) | |
92 | break; | |
93 | uint64_t extent_it_end = it->first + it->second.x_len; | |
94 | assert(extent_it_end >= off); | |
95 | if (it->first <= off) { | |
96 | if (extent_it_end > end) { | |
97 | // <- data -> | |
98 | // <- it -> | |
99 | it->second.x_len -= (extent_it_end - off); | |
100 | buffered_extents[end] = Extent{ | |
101 | extent_it_end - end, it->second.x_off + it->second.x_len + len, it->second.data, it->second.data_len}; | |
102 | } else { | |
103 | // <- data -> | |
104 | // <- it -> | |
105 | assert(extent_it_end <= end); | |
106 | it->second.x_len -= (extent_it_end - off); | |
107 | } | |
108 | ++it; | |
109 | } else { | |
110 | assert(it->first > off); | |
111 | if (extent_it_end > end) { | |
112 | // <- data -> | |
113 | // <- it -> | |
114 | uint64_t overlap = end - it->first; | |
115 | buffered_extents[end] = Extent{ | |
116 | it->second.x_len - overlap, it->second.x_off + overlap, it->second.data, it->second.data_len}; | |
117 | } else { | |
118 | // <- data -> | |
119 | // <- it -> | |
120 | } | |
121 | buffered_extents.erase(it++); | |
122 | } | |
123 | } | |
124 | buffered_extents[off] = Extent{ | |
125 | len, 0, data, len}; | |
126 | ||
127 | if (0) | |
128 | verify(); | |
129 | } | |
130 | ||
131 | void memcpy_check(char *dst, uint64_t dst_raw_len, uint64_t dst_off, | |
132 | map<Offset, Extent>::iterator &it, uint64_t src_off, uint64_t copylen) { | |
133 | if (0) { | |
134 | assert(dst_off + copylen <= dst_raw_len); | |
135 | assert(it->second.x_off + src_off + copylen <= it->second.data_len); | |
136 | } | |
137 | memcpy(dst + dst_off, it->second.data + it->second.x_off + src_off, copylen); | |
138 | } | |
139 | ||
140 | uint64_t read_overlap(uint64_t off, uint64_t len, char *buf) { | |
141 | uint64_t end = off + len; | |
142 | if (end <= left_edge || off >= right_edge) | |
143 | return 0; | |
144 | ||
145 | uint64_t copied = 0; | |
146 | auto it = buffered_extents.lower_bound(off); | |
147 | if (it != buffered_extents.begin()) { | |
148 | --it; | |
149 | if (it->first + it->second.x_len <= off) | |
150 | ++it; | |
151 | } | |
152 | uint64_t copy_len; | |
153 | while (it != buffered_extents.end()) { | |
154 | if (it->first >= end) | |
155 | break; | |
156 | uint64_t extent_it_end = it->first + it->second.x_len; | |
157 | assert(extent_it_end >= off); | |
158 | if (it->first >= off) { | |
159 | if (extent_it_end > end) { | |
160 | // <- data -> | |
161 | // <- it -> | |
162 | copy_len = len - (it->first - off); | |
163 | memcpy_check(buf, len, it->first - off, it, 0, copy_len); | |
164 | } else { | |
165 | // <- data -> | |
166 | // <- it -> | |
167 | copy_len = it->second.x_len; | |
168 | memcpy_check(buf, len, it->first - off, it, 0, copy_len); | |
169 | } | |
170 | } else { | |
171 | if (extent_it_end > end) { | |
172 | // <- data -> | |
173 | // <- it -> | |
174 | copy_len = len; | |
175 | memcpy_check(buf, len, 0, it, off - it->first, copy_len); | |
176 | } else { | |
177 | // <- data -> | |
178 | // <- it -> | |
179 | assert(extent_it_end <= end); | |
180 | copy_len = it->first + it->second.x_len - off; | |
181 | memcpy_check(buf, len, 0, it, off - it->first, copy_len); | |
182 | } | |
183 | } | |
184 | copied += copy_len; | |
185 | ++it; | |
186 | } | |
187 | return copied; | |
188 | } | |
189 | ||
190 | void clear() { | |
191 | buffered_extents.clear(); | |
192 | left_edge = std::numeric_limits<uint64_t>::max(); | |
193 | right_edge = 0; | |
194 | } | |
195 | }; | |
196 | Mutex buffer_lock; | |
197 | BufferedExtents buffered_extents; | |
198 | Task *buffered_task_head = nullptr; | |
199 | ||
200 | static void init(); | |
201 | public: | |
202 | SharedDriverData *get_driver() { return driver; } | |
203 | ||
204 | public: | |
205 | aio_callback_t aio_callback; | |
206 | void *aio_callback_priv; | |
207 | ||
208 | NVMEDevice(CephContext* cct, aio_callback_t cb, void *cbpriv); | |
209 | ||
210 | bool supported_bdev_label() override { return false; } | |
211 | ||
212 | void aio_submit(IOContext *ioc) override; | |
213 | ||
214 | uint64_t get_size() const override { | |
215 | return size; | |
216 | } | |
217 | uint64_t get_block_size() const override { | |
218 | return block_size; | |
219 | } | |
220 | ||
221 | int read(uint64_t off, uint64_t len, bufferlist *pbl, | |
222 | IOContext *ioc, | |
223 | bool buffered) override; | |
224 | int aio_read( | |
225 | uint64_t off, | |
226 | uint64_t len, | |
227 | bufferlist *pbl, | |
228 | IOContext *ioc) override; | |
229 | int aio_write(uint64_t off, bufferlist& bl, | |
230 | IOContext *ioc, | |
231 | bool buffered) override; | |
232 | int write(uint64_t off, bufferlist& bl, bool buffered) override; | |
233 | int flush() override; | |
234 | int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; | |
235 | ||
236 | // for managing buffered readers/writers | |
237 | int invalidate_cache(uint64_t off, uint64_t len) override; | |
238 | int open(const string& path) override; | |
239 | void close() override; | |
240 | int collect_metadata(string prefix, map<string,string> *pm) const override; | |
241 | }; | |
242 | ||
243 | #endif |