]> git.proxmox.com Git - ceph.git/blame - ceph/src/blk/kernel/KernelDevice.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / blk / kernel / KernelDevice.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2014 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
f67539c2
TL
15#ifndef CEPH_BLK_KERNELDEVICE_H
16#define CEPH_BLK_KERNELDEVICE_H
7c673cae
FG
17
18#include <atomic>
19
11fdf7f2 20#include "include/types.h"
7c673cae 21#include "include/interval_set.h"
11fdf7f2
TL
22#include "common/Thread.h"
23#include "include/utime.h"
7c673cae 24
f67539c2 25#include "aio/aio.h"
7c673cae 26#include "BlockDevice.h"
1e59de90 27#include "extblkdev/ExtBlkDevPlugin.h"
7c673cae 28
eafe8130 29#define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK)
494da23a 30
7c673cae 31class KernelDevice : public BlockDevice {
20effc67
TL
32protected:
33 std::string path;
34private:
11fdf7f2
TL
35 std::vector<int> fd_directs, fd_buffereds;
36 bool enable_wrt = true;
7c673cae
FG
37 bool aio, dio;
38
1e59de90 39 ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device
11fdf7f2
TL
40
41 std::string devname; ///< kernel dev name (/sys/block/$devname), if any
42
43 ceph::mutex debug_lock = ceph::make_mutex("KernelDevice::debug_lock");
7c673cae
FG
44 interval_set<uint64_t> debug_inflight;
45
46 std::atomic<bool> io_since_flush = {false};
11fdf7f2 47 ceph::mutex flush_mutex = ceph::make_mutex("KernelDevice::flush_mutex");
7c673cae 48
9f95a23c 49 std::unique_ptr<io_queue_t> io_queue;
11fdf7f2
TL
50 aio_callback_t discard_callback;
51 void *discard_callback_priv;
7c673cae 52 bool aio_stop;
11fdf7f2
TL
53 bool discard_started;
54 bool discard_stop;
55
56 ceph::mutex discard_lock = ceph::make_mutex("KernelDevice::discard_lock");
57 ceph::condition_variable discard_cond;
58 bool discard_running = false;
59 interval_set<uint64_t> discard_queued;
60 interval_set<uint64_t> discard_finishing;
7c673cae
FG
61
62 struct AioCompletionThread : public Thread {
63 KernelDevice *bdev;
64 explicit AioCompletionThread(KernelDevice *b) : bdev(b) {}
65 void *entry() override {
66 bdev->_aio_thread();
67 return NULL;
68 }
69 } aio_thread;
70
11fdf7f2
TL
71 struct DiscardThread : public Thread {
72 KernelDevice *bdev;
73 explicit DiscardThread(KernelDevice *b) : bdev(b) {}
74 void *entry() override {
75 bdev->_discard_thread();
76 return NULL;
77 }
78 } discard_thread;
79
7c673cae
FG
80 std::atomic_int injecting_crash;
81
20effc67
TL
82 virtual int _post_open() { return 0; } // hook for child implementations
83 virtual void _pre_close() { } // hook for child implementations
84
7c673cae 85 void _aio_thread();
11fdf7f2 86 void _discard_thread();
1e59de90
TL
87 int _queue_discard(interval_set<uint64_t> &to_release);
88 bool try_discard(interval_set<uint64_t> &to_release, bool async = true) override;
11fdf7f2 89
7c673cae
FG
90 int _aio_start();
91 void _aio_stop();
92
1e59de90 93 void _discard_start();
11fdf7f2
TL
94 void _discard_stop();
95
7c673cae
FG
96 void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length);
97 void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length);
98
f67539c2 99 int _sync_write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint);
7c673cae
FG
100
101 int _lock();
102
103 int direct_read_unaligned(uint64_t off, uint64_t len, char *buf);
104
105 // stalled aio debugging
106 aio_list_t debug_queue;
11fdf7f2 107 ceph::mutex debug_queue_lock = ceph::make_mutex("KernelDevice::debug_queue_lock");
7c673cae
FG
108 aio_t *debug_oldest = nullptr;
109 utime_t debug_stall_since;
110 void debug_aio_link(aio_t& aio);
111 void debug_aio_unlink(aio_t& aio);
112
11fdf7f2
TL
113 int choose_fd(bool buffered, int write_hint) const;
114
20effc67
TL
115 ceph::unique_leakable_ptr<buffer::raw> create_custom_aligned(size_t len, IOContext* ioc) const;
116
7c673cae 117public:
11fdf7f2 118 KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv);
7c673cae
FG
119
120 void aio_submit(IOContext *ioc) override;
11fdf7f2 121 void discard_drain() override;
7c673cae 122
f67539c2 123 int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const override;
9f95a23c 124 int get_devname(std::string *s) const override {
11fdf7f2
TL
125 if (devname.empty()) {
126 return -ENOENT;
127 }
128 *s = devname;
129 return 0;
7c673cae 130 }
9f95a23c 131 int get_devices(std::set<std::string> *ls) const override;
7c673cae 132
1e59de90 133 int get_ebd_state(ExtBlkDevState &state) const override;
7c673cae 134
f67539c2 135 int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
7c673cae
FG
136 IOContext *ioc,
137 bool buffered) override;
f67539c2 138 int aio_read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
7c673cae
FG
139 IOContext *ioc) override;
140 int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override;
141
f67539c2
TL
142 int write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint = WRITE_LIFE_NOT_SET) override;
143 int aio_write(uint64_t off, ceph::buffer::list& bl,
7c673cae 144 IOContext *ioc,
11fdf7f2
TL
145 bool buffered,
146 int write_hint = WRITE_LIFE_NOT_SET) override;
7c673cae 147 int flush() override;
1e59de90 148 int _discard(uint64_t offset, uint64_t len);
7c673cae
FG
149
150 // for managing buffered readers/writers
151 int invalidate_cache(uint64_t off, uint64_t len) override;
152 int open(const std::string& path) override;
153 void close() override;
154};
155
156#endif