1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2014 Red Hat
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_OS_BLUESTORE_KERNELDEVICE_H
16 #define CEPH_OS_BLUESTORE_KERNELDEVICE_H
20 #include "include/types.h"
21 #include "include/interval_set.h"
22 #include "common/Thread.h"
23 #include "include/utime.h"
26 #include "BlockDevice.h"
28 #define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK)
31 class KernelDevice
: public BlockDevice
{
32 std::vector
<int> fd_directs
, fd_buffereds
;
33 bool enable_wrt
= true;
37 int vdo_fd
= -1; ///< fd for vdo sysfs directory
40 std::string devname
; ///< kernel dev name (/sys/block/$devname), if any
42 ceph::mutex debug_lock
= ceph::make_mutex("KernelDevice::debug_lock");
43 interval_set
<uint64_t> debug_inflight
;
45 std::atomic
<bool> io_since_flush
= {false};
46 ceph::mutex flush_mutex
= ceph::make_mutex("KernelDevice::flush_mutex");
48 aio_queue_t aio_queue
;
49 aio_callback_t discard_callback
;
50 void *discard_callback_priv
;
55 ceph::mutex discard_lock
= ceph::make_mutex("KernelDevice::discard_lock");
56 ceph::condition_variable discard_cond
;
57 bool discard_running
= false;
58 interval_set
<uint64_t> discard_queued
;
59 interval_set
<uint64_t> discard_finishing
;
61 struct AioCompletionThread
: public Thread
{
63 explicit AioCompletionThread(KernelDevice
*b
) : bdev(b
) {}
64 void *entry() override
{
70 struct DiscardThread
: public Thread
{
72 explicit DiscardThread(KernelDevice
*b
) : bdev(b
) {}
73 void *entry() override
{
74 bdev
->_discard_thread();
79 std::atomic_int injecting_crash
;
82 void _discard_thread();
83 int queue_discard(interval_set
<uint64_t> &to_release
) override
;
91 void _aio_log_start(IOContext
*ioc
, uint64_t offset
, uint64_t length
);
92 void _aio_log_finish(IOContext
*ioc
, uint64_t offset
, uint64_t length
);
94 int _sync_write(uint64_t off
, bufferlist
& bl
, bool buffered
, int write_hint
);
98 int direct_read_unaligned(uint64_t off
, uint64_t len
, char *buf
);
100 // stalled aio debugging
101 aio_list_t debug_queue
;
102 ceph::mutex debug_queue_lock
= ceph::make_mutex("KernelDevice::debug_queue_lock");
103 aio_t
*debug_oldest
= nullptr;
104 utime_t debug_stall_since
;
105 void debug_aio_link(aio_t
& aio
);
106 void debug_aio_unlink(aio_t
& aio
);
109 int choose_fd(bool buffered
, int write_hint
) const;
112 KernelDevice(CephContext
* cct
, aio_callback_t cb
, void *cbpriv
, aio_callback_t d_cb
, void *d_cbpriv
);
114 void aio_submit(IOContext
*ioc
) override
;
115 void discard_drain() override
;
117 int collect_metadata(const std::string
& prefix
, map
<std::string
,std::string
> *pm
) const override
;
118 int get_devname(std::string
*s
) override
{
119 if (devname
.empty()) {
125 int get_devices(std::set
<std::string
> *ls
) override
;
127 bool get_thin_utilization(uint64_t *total
, uint64_t *avail
) const override
;
129 int read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
131 bool buffered
) override
;
132 int aio_read(uint64_t off
, uint64_t len
, bufferlist
*pbl
,
133 IOContext
*ioc
) override
;
134 int read_random(uint64_t off
, uint64_t len
, char *buf
, bool buffered
) override
;
136 int write(uint64_t off
, bufferlist
& bl
, bool buffered
, int write_hint
= WRITE_LIFE_NOT_SET
) override
;
137 int aio_write(uint64_t off
, bufferlist
& bl
,
140 int write_hint
= WRITE_LIFE_NOT_SET
) override
;
141 int flush() override
;
142 int discard(uint64_t offset
, uint64_t len
) override
;
144 // for managing buffered readers/writers
145 int invalidate_cache(uint64_t off
, uint64_t len
) override
;
146 int open(const std::string
& path
) override
;
147 void close() override
;