]>
git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/PMEMDevice.cc
262eeb1c3bb0e49c958032b881fc515fd306ef58
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2015 Intel <jianpeng.ma@intel.com>
8 * Author: Jianpeng Ma <jianpeng.ma@intel.com>
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
19 #include <sys/types.h>
23 #include "PMEMDevice.h"
24 #include "include/types.h"
25 #include "include/compat.h"
26 #include "include/stringify.h"
27 #include "common/errno.h"
28 #include "common/debug.h"
29 #include "common/blkdev.h"
31 #define dout_context cct
32 #define dout_subsys ceph_subsys_bdev
34 #define dout_prefix *_dout << "bdev-PMEM(" << path << ") "
36 PMEMDevice::PMEMDevice(CephContext
*cct
, aio_callback_t cb
, void *cbpriv
)
39 size(0), block_size(0),
40 debug_lock("PMEMDevice::debug_lock"),
45 int PMEMDevice::_lock()
48 memset(&l
, 0, sizeof(l
));
50 l
.l_whence
= SEEK_SET
;
53 int r
= ::fcntl(fd
, F_SETLK
, &l
);
59 int PMEMDevice::open(const string
& p
)
63 dout(1) << __func__
<< " path " << path
<< dendl
;
65 fd
= ::open(path
.c_str(), O_RDWR
);
68 derr
<< __func__
<< " open got: " << cpp_strerror(r
) << dendl
;
74 derr
<< __func__
<< " failed to lock " << path
<< ": " << cpp_strerror(r
)
83 derr
<< __func__
<< " fstat got " << cpp_strerror(r
) << dendl
;
86 if (S_ISBLK(st
.st_mode
)) {
88 r
= get_block_device_size(fd
, &s
);
98 addr
= (char *)pmem_map_file(path
.c_str(), size
, PMEM_FILE_EXCL
, O_RDWR
, &map_len
, NULL
);
100 derr
<< __func__
<< " pmem_map_file error" << dendl
;
105 // Operate as though the block size is 4 KB. The backing file
106 // blksize doesn't strictly matter except that some file systems may
107 // require a read/modify/write if we write something smaller than
109 block_size
= g_conf
->bdev_block_size
;
110 if (block_size
!= (unsigned)st
.st_blksize
) {
111 dout(1) << __func__
<< " backing device/file reports st_blksize "
112 << st
.st_blksize
<< ", using bdev_block_size "
113 << block_size
<< " anyway" << dendl
;
118 << " (" << pretty_si_t(size
) << "B)"
119 << " block_size " << block_size
120 << " (" << pretty_si_t(block_size
) << "B)"
125 VOID_TEMP_FAILURE_RETRY(::close(fd
));
130 void PMEMDevice::close()
132 dout(1) << __func__
<< dendl
;
134 assert(addr
!= NULL
);
135 pmem_unmap(addr
, size
);
137 VOID_TEMP_FAILURE_RETRY(::close(fd
));
143 static string
get_dev_property(const char *dev
, const char *property
)
145 char val
[1024] = {0};
146 get_block_device_string_property(dev
, property
, val
, sizeof(val
));
150 int PMEMDevice::collect_metadata(string prefix
, map
<string
,string
> *pm
) const
152 (*pm
)[prefix
+ "rotational"] = stringify((int)(bool)rotational
);
153 (*pm
)[prefix
+ "size"] = stringify(get_size());
154 (*pm
)[prefix
+ "block_size"] = stringify(get_block_size());
155 (*pm
)[prefix
+ "driver"] = "PMEMDevice";
156 (*pm
)[prefix
+ "type"] = "ssd";
159 int r
= ::fstat(fd
, &st
);
162 if (S_ISBLK(st
.st_mode
)) {
163 (*pm
)[prefix
+ "access_mode"] = "blk";
164 char partition_path
[PATH_MAX
];
165 char dev_node
[PATH_MAX
];
166 int rc
= get_device_by_fd(fd
, partition_path
, dev_node
, PATH_MAX
);
170 (*pm
)[prefix
+ "partition_path"] = "unknown";
171 (*pm
)[prefix
+ "dev_node"] = "unknown";
174 (*pm
)[prefix
+ "partition_path"] = string(partition_path
);
175 (*pm
)[prefix
+ "dev_node"] = "unknown";
179 (*pm
)[prefix
+ "partition_path"] = string(partition_path
);
180 (*pm
)[prefix
+ "dev_node"] = string(dev_node
);
181 (*pm
)[prefix
+ "model"] = get_dev_property(dev_node
, "device/model");
182 (*pm
)[prefix
+ "dev"] = get_dev_property(dev_node
, "dev");
184 // nvme exposes a serial number
185 string serial
= get_dev_property(dev_node
, "device/serial");
186 if (serial
.length()) {
187 (*pm
)[prefix
+ "serial"] = serial
;
190 // nvme has a device/device/* structure; infer from that. there
191 // is probably a better way?
192 string nvme_vendor
= get_dev_property(dev_node
, "device/device/vendor");
193 if (nvme_vendor
.length()) {
194 (*pm
)[prefix
+ "type"] = "nvme";
199 (*pm
)[prefix
+ "access_mode"] = "file";
200 (*pm
)[prefix
+ "path"] = path
;
205 int PMEMDevice::flush()
207 //Because all write is persist. So no need
212 void PMEMDevice::aio_submit(IOContext
*ioc
)
217 int PMEMDevice::write(uint64_t off
, bufferlist
& bl
, bool buffered
)
219 uint64_t len
= bl
.length();
220 dout(20) << __func__
<< " " << off
<< "~" << len
<< dendl
;
223 assert(off
+ len
<= size
);
225 dout(40) << "data: ";
229 if (g_conf
->bdev_inject_crash
&&
230 rand() % g_conf
->bdev_inject_crash
== 0) {
231 derr
<< __func__
<< " bdev_inject_crash: dropping io " << off
<< "~" << len
237 bufferlist::iterator p
= bl
.begin();
241 uint32_t l
= p
.get_ptr_and_advance(len
, &data
);
242 pmem_memcpy_persist(addr
+ off1
, data
, l
);
250 int PMEMDevice::aio_write(
256 return write(off
, bl
, buffered
);
260 int PMEMDevice::read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
264 dout(5) << __func__
<< " " << off
<< "~" << len
<< dendl
;
267 assert(off
+ len
<= size
);
269 bufferptr p
= buffer::create_page_aligned(len
);
270 memcpy(p
.c_str(), addr
+ off
, len
);
273 pbl
->push_back(std::move(p
));
275 dout(40) << "data: ";
276 pbl
->hexdump(*_dout
);
282 int PMEMDevice::aio_read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
285 return read(off
, len
, pbl
, ioc
, false);
288 int PMEMDevice::read_random(uint64_t off
, uint64_t len
, char *buf
, bool buffered
)
292 assert(off
+ len
<= size
);
294 memcpy(buf
, addr
+ off
, len
);
299 int PMEMDevice::invalidate_cache(uint64_t off
, uint64_t len
)
301 dout(5) << __func__
<< " " << off
<< "~" << len
<< dendl
;