1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2015 XSky <haomai@xsky.com>
8 * Author: Haomai Wang <haomaiwang@gmail.com>
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
17 #ifndef CEPH_OS_BLUESTORE_NVMEDEVICE
18 #define CEPH_OS_BLUESTORE_NVMEDEVICE
24 // since _Static_assert introduced in c11
25 #define _Static_assert static_assert
28 #include "include/interval_set.h"
29 #include "common/ceph_time.h"
30 #include "common/Mutex.h"
31 #include "BlockDevice.h"
33 enum class IOCommand
{
41 class SharedDriverData
;
43 class NVMEDevice
: public BlockDevice
{
45 * points to pinned, physically contiguous memory region;
46 * contains 4KB IDENTIFY structure for controller which is
47 * target for CONTROLLER IDENTIFY command during initialization
49 SharedDriverData
*driver
;
57 struct BufferedExtents
{
64 using Offset
= uint64_t;
65 map
<Offset
, Extent
> buffered_extents
;
66 uint64_t left_edge
= std::numeric_limits
<uint64_t>::max();
67 uint64_t right_edge
= 0;
70 interval_set
<uint64_t> m
;
71 for (auto && it
: buffered_extents
) {
72 assert(!m
.intersects(it
.first
, it
.second
.x_len
));
73 m
.insert(it
.first
, it
.second
.x_len
);
77 void insert(uint64_t off
, uint64_t len
, const char *data
) {
78 auto it
= buffered_extents
.lower_bound(off
);
79 if (it
!= buffered_extents
.begin()) {
81 if (it
->first
+ it
->second
.x_len
<= off
)
84 uint64_t end
= off
+ len
;
89 while (it
!= buffered_extents
.end()) {
92 uint64_t extent_it_end
= it
->first
+ it
->second
.x_len
;
93 assert(extent_it_end
>= off
);
94 if (it
->first
<= off
) {
95 if (extent_it_end
> end
) {
98 it
->second
.x_len
-= (extent_it_end
- off
);
99 buffered_extents
[end
] = Extent
{
100 extent_it_end
- end
, it
->second
.x_off
+ it
->second
.x_len
+ len
, it
->second
.data
, it
->second
.data_len
};
104 assert(extent_it_end
<= end
);
105 it
->second
.x_len
-= (extent_it_end
- off
);
109 assert(it
->first
> off
);
110 if (extent_it_end
> end
) {
113 uint64_t overlap
= end
- it
->first
;
114 buffered_extents
[end
] = Extent
{
115 it
->second
.x_len
- overlap
, it
->second
.x_off
+ overlap
, it
->second
.data
, it
->second
.data_len
};
120 buffered_extents
.erase(it
++);
123 buffered_extents
[off
] = Extent
{
130 void memcpy_check(char *dst
, uint64_t dst_raw_len
, uint64_t dst_off
,
131 map
<Offset
, Extent
>::iterator
&it
, uint64_t src_off
, uint64_t copylen
) {
133 assert(dst_off
+ copylen
<= dst_raw_len
);
134 assert(it
->second
.x_off
+ src_off
+ copylen
<= it
->second
.data_len
);
136 memcpy(dst
+ dst_off
, it
->second
.data
+ it
->second
.x_off
+ src_off
, copylen
);
139 uint64_t read_overlap(uint64_t off
, uint64_t len
, char *buf
) {
140 uint64_t end
= off
+ len
;
141 if (end
<= left_edge
|| off
>= right_edge
)
145 auto it
= buffered_extents
.lower_bound(off
);
146 if (it
!= buffered_extents
.begin()) {
148 if (it
->first
+ it
->second
.x_len
<= off
)
152 while (it
!= buffered_extents
.end()) {
153 if (it
->first
>= end
)
155 uint64_t extent_it_end
= it
->first
+ it
->second
.x_len
;
156 assert(extent_it_end
>= off
);
157 if (it
->first
>= off
) {
158 if (extent_it_end
> end
) {
161 copy_len
= len
- (it
->first
- off
);
162 memcpy_check(buf
, len
, it
->first
- off
, it
, 0, copy_len
);
166 copy_len
= it
->second
.x_len
;
167 memcpy_check(buf
, len
, it
->first
- off
, it
, 0, copy_len
);
170 if (extent_it_end
> end
) {
174 memcpy_check(buf
, len
, 0, it
, off
- it
->first
, copy_len
);
178 assert(extent_it_end
<= end
);
179 copy_len
= it
->first
+ it
->second
.x_len
- off
;
180 memcpy_check(buf
, len
, 0, it
, off
- it
->first
, copy_len
);
190 buffered_extents
.clear();
191 left_edge
= std::numeric_limits
<uint64_t>::max();
196 BufferedExtents buffered_extents
;
197 Task
*buffered_task_head
= nullptr;
201 SharedDriverData
*get_driver() { return driver
; }
204 aio_callback_t aio_callback
;
205 void *aio_callback_priv
;
207 NVMEDevice(CephContext
* cct
, aio_callback_t cb
, void *cbpriv
);
209 bool supported_bdev_label() override
{ return false; }
211 void aio_submit(IOContext
*ioc
) override
;
213 uint64_t get_size() const override
{
216 uint64_t get_block_size() const override
{
220 int read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
222 bool buffered
) override
;
227 IOContext
*ioc
) override
;
228 int aio_write(uint64_t off
, bufferlist
& bl
,
230 bool buffered
) override
;
231 int write(uint64_t off
, bufferlist
& bl
, bool buffered
) override
;
232 int flush() override
;
233 int read_random(uint64_t off
, uint64_t len
, char *buf
, bool buffered
) override
;
235 // for managing buffered readers/writers
236 int invalidate_cache(uint64_t off
, uint64_t len
) override
;
237 int open(const string
& path
) override
;
238 void close() override
;
239 int collect_metadata(string prefix
, map
<string
,string
> *pm
) const override
;