]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2013 Inktank Storage, Inc. | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef WBTHROTTLE_H | |
16 | #define WBTHROTTLE_H | |
17 | ||
18 | #include "include/unordered_map.h" | |
19 | #include <boost/tuple/tuple.hpp> | |
7c673cae FG |
20 | #include "common/Formatter.h" |
21 | #include "common/hobject.h" | |
22 | #include "include/interval_set.h" | |
23 | #include "FDCache.h" | |
24 | #include "common/Thread.h" | |
25 | #include "common/ceph_context.h" | |
26 | ||
27 | class PerfCounters; | |
28 | enum { | |
29 | l_wbthrottle_first = 999090, | |
30 | l_wbthrottle_bytes_dirtied, | |
31 | l_wbthrottle_bytes_wb, | |
32 | l_wbthrottle_ios_dirtied, | |
33 | l_wbthrottle_ios_wb, | |
34 | l_wbthrottle_inodes_dirtied, | |
35 | l_wbthrottle_inodes_wb, | |
36 | l_wbthrottle_last | |
37 | }; | |
38 | ||
39 | /** | |
40 | * WBThrottle | |
41 | * | |
42 | * Tracks, throttles, and flushes outstanding IO | |
43 | */ | |
44 | class WBThrottle : Thread, public md_config_obs_t { | |
45 | ghobject_t clearing; | |
46 | /* *_limits.first is the start_flusher limit and | |
47 | * *_limits.second is the hard limit | |
48 | */ | |
49 | ||
50 | /// Limits on unflushed bytes | |
51 | pair<uint64_t, uint64_t> size_limits; | |
52 | ||
53 | /// Limits on unflushed ios | |
54 | pair<uint64_t, uint64_t> io_limits; | |
55 | ||
56 | /// Limits on unflushed objects | |
57 | pair<uint64_t, uint64_t> fd_limits; | |
58 | ||
59 | uint64_t cur_ios; /// Currently unflushed IOs | |
60 | uint64_t cur_size; /// Currently unflushed bytes | |
61 | ||
62 | /** | |
63 | * PendingWB tracks the ios pending on an object. | |
64 | */ | |
65 | class PendingWB { | |
66 | public: | |
67 | bool nocache; | |
68 | uint64_t size; | |
69 | uint64_t ios; | |
70 | PendingWB() : nocache(true), size(0), ios(0) {} | |
71 | void add(bool _nocache, uint64_t _size, uint64_t _ios) { | |
72 | if (!_nocache) | |
73 | nocache = false; // only nocache if all writes are nocache | |
74 | size += _size; | |
75 | ios += _ios; | |
76 | } | |
77 | }; | |
78 | ||
79 | CephContext *cct; | |
80 | PerfCounters *logger; | |
81 | bool stopping; | |
82 | Mutex lock; | |
83 | Cond cond; | |
84 | ||
85 | ||
86 | /** | |
87 | * Flush objects in lru order | |
88 | */ | |
89 | list<ghobject_t> lru; | |
90 | ceph::unordered_map<ghobject_t, list<ghobject_t>::iterator> rev_lru; | |
91 | void remove_object(const ghobject_t &oid) { | |
11fdf7f2 | 92 | ceph_assert(lock.is_locked()); |
7c673cae FG |
93 | ceph::unordered_map<ghobject_t, list<ghobject_t>::iterator>::iterator iter = |
94 | rev_lru.find(oid); | |
95 | if (iter == rev_lru.end()) | |
96 | return; | |
97 | ||
98 | lru.erase(iter->second); | |
99 | rev_lru.erase(iter); | |
100 | } | |
101 | ghobject_t pop_object() { | |
11fdf7f2 | 102 | ceph_assert(!lru.empty()); |
7c673cae FG |
103 | ghobject_t oid(lru.front()); |
104 | lru.pop_front(); | |
105 | rev_lru.erase(oid); | |
106 | return oid; | |
107 | } | |
108 | void insert_object(const ghobject_t &oid) { | |
11fdf7f2 | 109 | ceph_assert(rev_lru.find(oid) == rev_lru.end()); |
7c673cae FG |
110 | lru.push_back(oid); |
111 | rev_lru.insert(make_pair(oid, --lru.end())); | |
112 | } | |
113 | ||
114 | ceph::unordered_map<ghobject_t, pair<PendingWB, FDRef> > pending_wbs; | |
115 | ||
116 | /// get next flush to perform | |
117 | bool get_next_should_flush( | |
118 | boost::tuple<ghobject_t, FDRef, PendingWB> *next ///< [out] next to flush | |
119 | ); ///< @return false if we are shutting down | |
120 | public: | |
121 | enum FS { | |
122 | BTRFS, | |
123 | XFS | |
124 | }; | |
125 | ||
126 | private: | |
127 | FS fs; | |
128 | ||
129 | void set_from_conf(); | |
130 | bool beyond_limit() const { | |
131 | if (cur_ios < io_limits.first && | |
132 | pending_wbs.size() < fd_limits.first && | |
133 | cur_size < size_limits.first) | |
134 | return false; | |
135 | else | |
136 | return true; | |
137 | } | |
138 | bool need_flush() const { | |
139 | if (cur_ios < io_limits.second && | |
140 | pending_wbs.size() < fd_limits.second && | |
141 | cur_size < size_limits.second) | |
142 | return false; | |
143 | else | |
144 | return true; | |
145 | } | |
146 | ||
147 | public: | |
148 | explicit WBThrottle(CephContext *cct); | |
149 | ~WBThrottle() override; | |
150 | ||
151 | void start(); | |
152 | void stop(); | |
153 | /// Set fs as XFS or BTRFS | |
154 | void set_fs(FS new_fs) { | |
155 | Mutex::Locker l(lock); | |
156 | fs = new_fs; | |
157 | set_from_conf(); | |
158 | } | |
159 | ||
160 | /// Queue wb on oid, fd taking throttle (does not block) | |
161 | void queue_wb( | |
162 | FDRef fd, ///< [in] FDRef to oid | |
163 | const ghobject_t &oid, ///< [in] object | |
164 | uint64_t offset, ///< [in] offset written | |
165 | uint64_t len, ///< [in] length written | |
166 | bool nocache ///< [in] try to clear out of cache after write | |
167 | ); | |
168 | ||
169 | /// Clear all wb (probably due to sync) | |
170 | void clear(); | |
171 | ||
172 | /// Clear object | |
173 | void clear_object(const ghobject_t &oid); | |
174 | ||
175 | /// Block until there is throttle available | |
176 | void throttle(); | |
177 | ||
178 | /// md_config_obs_t | |
179 | const char** get_tracked_conf_keys() const override; | |
11fdf7f2 | 180 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
181 | const std::set<std::string> &changed) override; |
182 | ||
183 | /// Thread | |
184 | void *entry() override; | |
185 | }; | |
186 | ||
187 | #endif |