1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2015 XSky <haomai@xsky.com>
8 * Author: Haomai Wang <haomaiwang@gmail.com>
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
17 #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
18 #define CEPH_OS_BLUESTORE_NVMEDEVICE
24 // since _Static_assert introduced in c11
25 #define _Static_assert static_assert
28 #include "include/interval_set.h"
29 #include "common/ceph_time.h"
30 #include "common/Mutex.h"
31 #include "common/Cond.h"
32 #include "BlockDevice.h"
34 enum class IOCommand
{
42 class SharedDriverData
;
44 class NVMEDevice
: public BlockDevice
{
46 * points to pinned, physically contiguous memory region;
47 * contains 4KB IDENTIFY structure for controller which is
48 * target for CONTROLLER IDENTIFY command during initialization
50 SharedDriverData
*driver
;
58 struct BufferedExtents
{
65 using Offset
= uint64_t;
66 map
<Offset
, Extent
> buffered_extents
;
67 uint64_t left_edge
= std::numeric_limits
<uint64_t>::max();
68 uint64_t right_edge
= 0;
71 interval_set
<uint64_t> m
;
72 for (auto && it
: buffered_extents
) {
73 assert(!m
.intersects(it
.first
, it
.second
.x_len
));
74 m
.insert(it
.first
, it
.second
.x_len
);
78 void insert(uint64_t off
, uint64_t len
, const char *data
) {
79 auto it
= buffered_extents
.lower_bound(off
);
80 if (it
!= buffered_extents
.begin()) {
82 if (it
->first
+ it
->second
.x_len
<= off
)
85 uint64_t end
= off
+ len
;
90 while (it
!= buffered_extents
.end()) {
93 uint64_t extent_it_end
= it
->first
+ it
->second
.x_len
;
94 assert(extent_it_end
>= off
);
95 if (it
->first
<= off
) {
96 if (extent_it_end
> end
) {
99 it
->second
.x_len
-= (extent_it_end
- off
);
100 buffered_extents
[end
] = Extent
{
101 extent_it_end
- end
, it
->second
.x_off
+ it
->second
.x_len
+ len
, it
->second
.data
, it
->second
.data_len
};
105 assert(extent_it_end
<= end
);
106 it
->second
.x_len
-= (extent_it_end
- off
);
110 assert(it
->first
> off
);
111 if (extent_it_end
> end
) {
114 uint64_t overlap
= end
- it
->first
;
115 buffered_extents
[end
] = Extent
{
116 it
->second
.x_len
- overlap
, it
->second
.x_off
+ overlap
, it
->second
.data
, it
->second
.data_len
};
121 buffered_extents
.erase(it
++);
124 buffered_extents
[off
] = Extent
{
131 void memcpy_check(char *dst
, uint64_t dst_raw_len
, uint64_t dst_off
,
132 map
<Offset
, Extent
>::iterator
&it
, uint64_t src_off
, uint64_t copylen
) {
134 assert(dst_off
+ copylen
<= dst_raw_len
);
135 assert(it
->second
.x_off
+ src_off
+ copylen
<= it
->second
.data_len
);
137 memcpy(dst
+ dst_off
, it
->second
.data
+ it
->second
.x_off
+ src_off
, copylen
);
140 uint64_t read_overlap(uint64_t off
, uint64_t len
, char *buf
) {
141 uint64_t end
= off
+ len
;
142 if (end
<= left_edge
|| off
>= right_edge
)
146 auto it
= buffered_extents
.lower_bound(off
);
147 if (it
!= buffered_extents
.begin()) {
149 if (it
->first
+ it
->second
.x_len
<= off
)
153 while (it
!= buffered_extents
.end()) {
154 if (it
->first
>= end
)
156 uint64_t extent_it_end
= it
->first
+ it
->second
.x_len
;
157 assert(extent_it_end
>= off
);
158 if (it
->first
>= off
) {
159 if (extent_it_end
> end
) {
162 copy_len
= len
- (it
->first
- off
);
163 memcpy_check(buf
, len
, it
->first
- off
, it
, 0, copy_len
);
167 copy_len
= it
->second
.x_len
;
168 memcpy_check(buf
, len
, it
->first
- off
, it
, 0, copy_len
);
171 if (extent_it_end
> end
) {
175 memcpy_check(buf
, len
, 0, it
, off
- it
->first
, copy_len
);
179 assert(extent_it_end
<= end
);
180 copy_len
= it
->first
+ it
->second
.x_len
- off
;
181 memcpy_check(buf
, len
, 0, it
, off
- it
->first
, copy_len
);
191 buffered_extents
.clear();
192 left_edge
= std::numeric_limits
<uint64_t>::max();
197 BufferedExtents buffered_extents
;
198 Task
*buffered_task_head
= nullptr;
202 SharedDriverData
*get_driver() { return driver
; }
205 aio_callback_t aio_callback
;
206 void *aio_callback_priv
;
208 NVMEDevice(CephContext
* cct
, aio_callback_t cb
, void *cbpriv
);
210 bool supported_bdev_label() override
{ return false; }
212 void aio_submit(IOContext
*ioc
) override
;
214 uint64_t get_size() const override
{
217 uint64_t get_block_size() const override
{
221 int read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
223 bool buffered
) override
;
228 IOContext
*ioc
) override
;
229 int aio_write(uint64_t off
, bufferlist
& bl
,
231 bool buffered
) override
;
232 int write(uint64_t off
, bufferlist
& bl
, bool buffered
) override
;
233 int flush() override
;
234 int read_random(uint64_t off
, uint64_t len
, char *buf
, bool buffered
) override
;
236 // for managing buffered readers/writers
237 int invalidate_cache(uint64_t off
, uint64_t len
) override
;
238 int open(const string
& path
) override
;
239 void close() override
;
240 int collect_metadata(string prefix
, map
<string
,string
> *pm
) const override
;