]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/PMEMDevice.cc
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / os / bluestore / PMEMDevice.cc
CommitLineData
31f18b77
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Intel <jianpeng.ma@intel.com>
7 *
8 * Author: Jianpeng Ma <jianpeng.ma@intel.com>
9 *
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
14 *
15 */
16
17#include <unistd.h>
18#include <stdlib.h>
19#include <sys/types.h>
20#include <sys/stat.h>
31f18b77
FG
21
22#include "PMEMDevice.h"
11fdf7f2 23#include "libpmem.h"
31f18b77
FG
24#include "include/types.h"
25#include "include/compat.h"
26#include "include/stringify.h"
27#include "common/errno.h"
28#include "common/debug.h"
29#include "common/blkdev.h"
30
31#define dout_context cct
32#define dout_subsys ceph_subsys_bdev
33#undef dout_prefix
34#define dout_prefix *_dout << "bdev-PMEM(" << path << ") "
35
36PMEMDevice::PMEMDevice(CephContext *cct, aio_callback_t cb, void *cbpriv)
11fdf7f2 37 : BlockDevice(cct, cb, cbpriv),
31f18b77 38 fd(-1), addr(0),
31f18b77
FG
39 injecting_crash(0)
40{
41}
42
43int PMEMDevice::_lock()
44{
45 struct flock l;
46 memset(&l, 0, sizeof(l));
47 l.l_type = F_WRLCK;
48 l.l_whence = SEEK_SET;
49 l.l_start = 0;
50 l.l_len = 0;
51 int r = ::fcntl(fd, F_SETLK, &l);
52 if (r < 0)
53 return -errno;
54 return 0;
55}
56
57int PMEMDevice::open(const string& p)
58{
59 path = p;
60 int r = 0;
61 dout(1) << __func__ << " path " << path << dendl;
62
91327a77 63 fd = ::open(path.c_str(), O_RDWR | O_CLOEXEC);
31f18b77
FG
64 if (fd < 0) {
65 r = -errno;
66 derr << __func__ << " open got: " << cpp_strerror(r) << dendl;
67 return r;
68 }
69
70 r = _lock();
71 if (r < 0) {
72 derr << __func__ << " failed to lock " << path << ": " << cpp_strerror(r)
73 << dendl;
74 goto out_fail;
75 }
76
77 struct stat st;
78 r = ::fstat(fd, &st);
79 if (r < 0) {
80 r = -errno;
81 derr << __func__ << " fstat got " << cpp_strerror(r) << dendl;
82 goto out_fail;
83 }
31f18b77
FG
84
85 size_t map_len;
11fdf7f2 86 addr = (char *)pmem_map_file(path.c_str(), 0, PMEM_FILE_EXCL, O_RDWR, &map_len, NULL);
31f18b77 87 if (addr == NULL) {
11fdf7f2 88 derr << __func__ << " pmem_map_file failed: " << pmem_errormsg() << dendl;
31f18b77
FG
89 goto out_fail;
90 }
91 size = map_len;
92
93 // Operate as though the block size is 4 KB. The backing file
94 // blksize doesn't strictly matter except that some file systems may
95 // require a read/modify/write if we write something smaller than
96 // it.
11fdf7f2 97 block_size = g_conf()->bdev_block_size;
31f18b77
FG
98 if (block_size != (unsigned)st.st_blksize) {
99 dout(1) << __func__ << " backing device/file reports st_blksize "
100 << st.st_blksize << ", using bdev_block_size "
101 << block_size << " anyway" << dendl;
102 }
103
104 dout(1) << __func__
105 << " size " << size
1adf2230 106 << " (" << byte_u_t(size) << ")"
31f18b77 107 << " block_size " << block_size
1adf2230 108 << " (" << byte_u_t(block_size) << ")"
31f18b77
FG
109 << dendl;
110 return 0;
111
112 out_fail:
113 VOID_TEMP_FAILURE_RETRY(::close(fd));
114 fd = -1;
115 return r;
116}
117
118void PMEMDevice::close()
119{
120 dout(1) << __func__ << dendl;
121
11fdf7f2 122 ceph_assert(addr != NULL);
31f18b77 123 pmem_unmap(addr, size);
11fdf7f2 124 ceph_assert(fd >= 0);
31f18b77
FG
125 VOID_TEMP_FAILURE_RETRY(::close(fd));
126 fd = -1;
127
128 path.clear();
129}
130
11fdf7f2 131int PMEMDevice::collect_metadata(const string& prefix, map<string,string> *pm) const
31f18b77
FG
132{
133 (*pm)[prefix + "rotational"] = stringify((int)(bool)rotational);
134 (*pm)[prefix + "size"] = stringify(get_size());
135 (*pm)[prefix + "block_size"] = stringify(get_block_size());
136 (*pm)[prefix + "driver"] = "PMEMDevice";
137 (*pm)[prefix + "type"] = "ssd";
138
139 struct stat st;
140 int r = ::fstat(fd, &st);
141 if (r < 0)
142 return -errno;
143 if (S_ISBLK(st.st_mode)) {
144 (*pm)[prefix + "access_mode"] = "blk";
11fdf7f2 145 char buffer[1024] = {0};
9f95a23c 146 BlkDev blkdev(fd);
11fdf7f2
TL
147
148 blkdev.model(buffer, sizeof(buffer));
149 (*pm)[prefix + "model"] = buffer;
150
151 buffer[0] = '\0';
152 blkdev.dev(buffer, sizeof(buffer));
153 (*pm)[prefix + "dev"] = buffer;
154
155 // nvme exposes a serial number
156 buffer[0] = '\0';
157 blkdev.serial(buffer, sizeof(buffer));
158 (*pm)[prefix + "serial"] = buffer;
159
31f18b77
FG
160 } else {
161 (*pm)[prefix + "access_mode"] = "file";
162 (*pm)[prefix + "path"] = path;
163 }
164 return 0;
165}
166
167int PMEMDevice::flush()
168{
169 //Because all write is persist. So no need
170 return 0;
171}
172
173
174void PMEMDevice::aio_submit(IOContext *ioc)
175{
11fdf7f2
TL
176 if (ioc->priv) {
177 ceph_assert(ioc->num_running == 0);
178 aio_callback(aio_callback_priv, ioc->priv);
179 } else {
180 ioc->try_aio_wake();
181 }
31f18b77
FG
182 return;
183}
184
9f95a23c 185int PMEMDevice::write(uint64_t off, bufferlist& bl, bool buffered, int write_hint)
31f18b77
FG
186{
187 uint64_t len = bl.length();
188 dout(20) << __func__ << " " << off << "~" << len << dendl;
11fdf7f2 189 ceph_assert(is_valid_io(off, len));
31f18b77
FG
190
191 dout(40) << "data: ";
192 bl.hexdump(*_dout);
193 *_dout << dendl;
194
11fdf7f2
TL
195 if (g_conf()->bdev_inject_crash &&
196 rand() % g_conf()->bdev_inject_crash == 0) {
31f18b77
FG
197 derr << __func__ << " bdev_inject_crash: dropping io " << off << "~" << len
198 << dendl;
199 ++injecting_crash;
200 return 0;
201 }
202
203 bufferlist::iterator p = bl.begin();
204 uint32_t off1 = off;
205 while (len) {
206 const char *data;
207 uint32_t l = p.get_ptr_and_advance(len, &data);
208 pmem_memcpy_persist(addr + off1, data, l);
209 len -= l;
210 off1 += l;
211 }
31f18b77
FG
212 return 0;
213}
214
215int PMEMDevice::aio_write(
216 uint64_t off,
217 bufferlist &bl,
218 IOContext *ioc,
11fdf7f2 219 bool buffered,
9f95a23c 220 int write_hint)
31f18b77
FG
221{
222 return write(off, bl, buffered);
223}
224
225
226int PMEMDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
227 IOContext *ioc,
228 bool buffered)
229{
230 dout(5) << __func__ << " " << off << "~" << len << dendl;
11fdf7f2 231 ceph_assert(is_valid_io(off, len));
31f18b77 232
11fdf7f2 233 bufferptr p = buffer::create_small_page_aligned(len);
31f18b77
FG
234 memcpy(p.c_str(), addr + off, len);
235
236 pbl->clear();
237 pbl->push_back(std::move(p));
238
239 dout(40) << "data: ";
240 pbl->hexdump(*_dout);
241 *_dout << dendl;
242
243 return 0;
244}
245
246int PMEMDevice::aio_read(uint64_t off, uint64_t len, bufferlist *pbl,
247 IOContext *ioc)
248{
249 return read(off, len, pbl, ioc, false);
250}
251
252int PMEMDevice::read_random(uint64_t off, uint64_t len, char *buf, bool buffered)
253{
11fdf7f2
TL
254 dout(5) << __func__ << " " << off << "~" << len << dendl;
255 ceph_assert(is_valid_io(off, len));
31f18b77
FG
256
257 memcpy(buf, addr + off, len);
258 return 0;
259}
260
261
262int PMEMDevice::invalidate_cache(uint64_t off, uint64_t len)
263{
264 dout(5) << __func__ << " " << off << "~" << len << dendl;
265 return 0;
266}
267
268