]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
f67539c2 TL |
15 | #ifndef CEPH_BLK_KERNELDEVICE_H |
16 | #define CEPH_BLK_KERNELDEVICE_H | |
7c673cae FG |
17 | |
18 | #include <atomic> | |
19 | ||
11fdf7f2 | 20 | #include "include/types.h" |
7c673cae | 21 | #include "include/interval_set.h" |
11fdf7f2 TL |
22 | #include "common/Thread.h" |
23 | #include "include/utime.h" | |
7c673cae | 24 | |
f67539c2 | 25 | #include "aio/aio.h" |
7c673cae | 26 | #include "BlockDevice.h" |
1e59de90 | 27 | #include "extblkdev/ExtBlkDevPlugin.h" |
7c673cae | 28 | |
eafe8130 | 29 | #define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK) |
494da23a | 30 | |
7c673cae | 31 | class KernelDevice : public BlockDevice { |
20effc67 TL |
32 | protected: |
33 | std::string path; | |
34 | private: | |
11fdf7f2 TL |
35 | std::vector<int> fd_directs, fd_buffereds; |
36 | bool enable_wrt = true; | |
7c673cae FG |
37 | bool aio, dio; |
38 | ||
1e59de90 | 39 | ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device |
11fdf7f2 TL |
40 | |
41 | std::string devname; ///< kernel dev name (/sys/block/$devname), if any | |
42 | ||
43 | ceph::mutex debug_lock = ceph::make_mutex("KernelDevice::debug_lock"); | |
7c673cae FG |
44 | interval_set<uint64_t> debug_inflight; |
45 | ||
46 | std::atomic<bool> io_since_flush = {false}; | |
11fdf7f2 | 47 | ceph::mutex flush_mutex = ceph::make_mutex("KernelDevice::flush_mutex"); |
7c673cae | 48 | |
9f95a23c | 49 | std::unique_ptr<io_queue_t> io_queue; |
11fdf7f2 TL |
50 | aio_callback_t discard_callback; |
51 | void *discard_callback_priv; | |
7c673cae | 52 | bool aio_stop; |
11fdf7f2 TL |
53 | bool discard_started; |
54 | bool discard_stop; | |
55 | ||
56 | ceph::mutex discard_lock = ceph::make_mutex("KernelDevice::discard_lock"); | |
57 | ceph::condition_variable discard_cond; | |
58 | bool discard_running = false; | |
59 | interval_set<uint64_t> discard_queued; | |
60 | interval_set<uint64_t> discard_finishing; | |
7c673cae FG |
61 | |
62 | struct AioCompletionThread : public Thread { | |
63 | KernelDevice *bdev; | |
64 | explicit AioCompletionThread(KernelDevice *b) : bdev(b) {} | |
65 | void *entry() override { | |
66 | bdev->_aio_thread(); | |
67 | return NULL; | |
68 | } | |
69 | } aio_thread; | |
70 | ||
11fdf7f2 TL |
71 | struct DiscardThread : public Thread { |
72 | KernelDevice *bdev; | |
73 | explicit DiscardThread(KernelDevice *b) : bdev(b) {} | |
74 | void *entry() override { | |
75 | bdev->_discard_thread(); | |
76 | return NULL; | |
77 | } | |
78 | } discard_thread; | |
79 | ||
7c673cae FG |
80 | std::atomic_int injecting_crash; |
81 | ||
20effc67 TL |
82 | virtual int _post_open() { return 0; } // hook for child implementations |
83 | virtual void _pre_close() { } // hook for child implementations | |
84 | ||
7c673cae | 85 | void _aio_thread(); |
11fdf7f2 | 86 | void _discard_thread(); |
1e59de90 TL |
87 | int _queue_discard(interval_set<uint64_t> &to_release); |
88 | bool try_discard(interval_set<uint64_t> &to_release, bool async = true) override; | |
11fdf7f2 | 89 | |
7c673cae FG |
90 | int _aio_start(); |
91 | void _aio_stop(); | |
92 | ||
1e59de90 | 93 | void _discard_start(); |
11fdf7f2 TL |
94 | void _discard_stop(); |
95 | ||
7c673cae FG |
96 | void _aio_log_start(IOContext *ioc, uint64_t offset, uint64_t length); |
97 | void _aio_log_finish(IOContext *ioc, uint64_t offset, uint64_t length); | |
98 | ||
f67539c2 | 99 | int _sync_write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint); |
7c673cae FG |
100 | |
101 | int _lock(); | |
102 | ||
103 | int direct_read_unaligned(uint64_t off, uint64_t len, char *buf); | |
104 | ||
105 | // stalled aio debugging | |
106 | aio_list_t debug_queue; | |
11fdf7f2 | 107 | ceph::mutex debug_queue_lock = ceph::make_mutex("KernelDevice::debug_queue_lock"); |
7c673cae FG |
108 | aio_t *debug_oldest = nullptr; |
109 | utime_t debug_stall_since; | |
110 | void debug_aio_link(aio_t& aio); | |
111 | void debug_aio_unlink(aio_t& aio); | |
112 | ||
11fdf7f2 TL |
113 | int choose_fd(bool buffered, int write_hint) const; |
114 | ||
20effc67 TL |
115 | ceph::unique_leakable_ptr<buffer::raw> create_custom_aligned(size_t len, IOContext* ioc) const; |
116 | ||
7c673cae | 117 | public: |
11fdf7f2 | 118 | KernelDevice(CephContext* cct, aio_callback_t cb, void *cbpriv, aio_callback_t d_cb, void *d_cbpriv); |
7c673cae FG |
119 | |
120 | void aio_submit(IOContext *ioc) override; | |
11fdf7f2 | 121 | void discard_drain() override; |
7c673cae | 122 | |
f67539c2 | 123 | int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const override; |
9f95a23c | 124 | int get_devname(std::string *s) const override { |
11fdf7f2 TL |
125 | if (devname.empty()) { |
126 | return -ENOENT; | |
127 | } | |
128 | *s = devname; | |
129 | return 0; | |
7c673cae | 130 | } |
9f95a23c | 131 | int get_devices(std::set<std::string> *ls) const override; |
7c673cae | 132 | |
1e59de90 | 133 | int get_ebd_state(ExtBlkDevState &state) const override; |
7c673cae | 134 | |
f67539c2 | 135 | int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl, |
7c673cae FG |
136 | IOContext *ioc, |
137 | bool buffered) override; | |
f67539c2 | 138 | int aio_read(uint64_t off, uint64_t len, ceph::buffer::list *pbl, |
7c673cae FG |
139 | IOContext *ioc) override; |
140 | int read_random(uint64_t off, uint64_t len, char *buf, bool buffered) override; | |
141 | ||
f67539c2 TL |
142 | int write(uint64_t off, ceph::buffer::list& bl, bool buffered, int write_hint = WRITE_LIFE_NOT_SET) override; |
143 | int aio_write(uint64_t off, ceph::buffer::list& bl, | |
7c673cae | 144 | IOContext *ioc, |
11fdf7f2 TL |
145 | bool buffered, |
146 | int write_hint = WRITE_LIFE_NOT_SET) override; | |
7c673cae | 147 | int flush() override; |
1e59de90 | 148 | int _discard(uint64_t offset, uint64_t len); |
7c673cae FG |
149 | |
150 | // for managing buffered readers/writers | |
151 | int invalidate_cache(uint64_t off, uint64_t len) override; | |
152 | int open(const std::string& path) override; | |
153 | void close() override; | |
154 | }; | |
155 | ||
156 | #endif |