]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
7c673cae FG |
14 | #ifndef CEPH_MDLOG_H |
15 | #define CEPH_MDLOG_H | |
16 | ||
9f95a23c TL |
17 | #include "include/common_fwd.h" |
18 | ||
7c673cae FG |
19 | enum { |
20 | l_mdl_first = 5000, | |
21 | l_mdl_evadd, | |
22 | l_mdl_evex, | |
23 | l_mdl_evtrm, | |
24 | l_mdl_ev, | |
25 | l_mdl_evexg, | |
26 | l_mdl_evexd, | |
27 | l_mdl_segadd, | |
28 | l_mdl_segex, | |
29 | l_mdl_segtrm, | |
30 | l_mdl_seg, | |
31 | l_mdl_segexg, | |
32 | l_mdl_segexd, | |
33 | l_mdl_expos, | |
34 | l_mdl_wrpos, | |
35 | l_mdl_rdpos, | |
36 | l_mdl_jlat, | |
37 | l_mdl_replayed, | |
38 | l_mdl_last, | |
39 | }; | |
40 | ||
41 | #include "include/types.h" | |
42 | #include "include/Context.h" | |
43 | ||
11fdf7f2 | 44 | #include "MDSContext.h" |
7c673cae | 45 | #include "common/Cond.h" |
9f95a23c TL |
46 | #include "common/Finisher.h" |
47 | #include "common/Thread.h" | |
7c673cae FG |
48 | |
49 | #include "LogSegment.h" | |
50 | ||
51 | #include <list> | |
9f95a23c | 52 | #include <map> |
7c673cae FG |
53 | |
54 | class Journaler; | |
55 | class JournalPointer; | |
56 | class LogEvent; | |
57 | class MDSRank; | |
58 | class LogSegment; | |
59 | class ESubtreeMap; | |
60 | ||
7c673cae FG |
61 | class MDLog { |
62 | public: | |
9f95a23c TL |
63 | explicit MDLog(MDSRank *m) : mds(m), |
64 | replay_thread(this), | |
65 | recovery_thread(this), | |
66 | submit_thread(this) {} | |
67 | ~MDLog(); | |
7c673cae | 68 | |
7c673cae FG |
69 | const std::set<LogSegment*> &get_expiring_segments() const |
70 | { | |
71 | return expiring_segments; | |
72 | } | |
7c673cae | 73 | |
7c673cae | 74 | void create_logger(); |
7c673cae FG |
75 | void set_write_iohint(unsigned iohint_flags); |
76 | ||
7c673cae | 77 | void start_new_segment() { |
11fdf7f2 | 78 | std::lock_guard l(submit_mutex); |
7c673cae FG |
79 | _start_new_segment(); |
80 | } | |
81 | void prepare_new_segment() { | |
11fdf7f2 | 82 | std::lock_guard l(submit_mutex); |
7c673cae FG |
83 | _prepare_new_segment(); |
84 | } | |
11fdf7f2 | 85 | void journal_segment_subtree_map(MDSContext *onsync=NULL) { |
9f95a23c TL |
86 | { |
87 | std::lock_guard l{submit_mutex}; | |
88 | _journal_segment_subtree_map(onsync); | |
89 | } | |
7c673cae FG |
90 | if (onsync) |
91 | flush(); | |
92 | } | |
93 | ||
94 | LogSegment *peek_current_segment() { | |
95 | return segments.empty() ? NULL : segments.rbegin()->second; | |
96 | } | |
97 | ||
98 | LogSegment *get_current_segment() { | |
11fdf7f2 | 99 | ceph_assert(!segments.empty()); |
7c673cae FG |
100 | return segments.rbegin()->second; |
101 | } | |
102 | ||
9f95a23c | 103 | LogSegment *get_segment(LogSegment::seq_t seq) { |
7c673cae FG |
104 | if (segments.count(seq)) |
105 | return segments[seq]; | |
106 | return NULL; | |
107 | } | |
108 | ||
109 | bool have_any_segments() const { | |
110 | return !segments.empty(); | |
111 | } | |
112 | ||
113 | void flush_logger(); | |
114 | ||
115 | size_t get_num_events() const { return num_events; } | |
116 | size_t get_num_segments() const { return segments.size(); } | |
117 | ||
118 | uint64_t get_read_pos() const; | |
119 | uint64_t get_write_pos() const; | |
120 | uint64_t get_safe_pos() const; | |
121 | Journaler *get_journaler() { return journaler; } | |
122 | bool empty() const { return segments.empty(); } | |
123 | ||
2a845540 | 124 | bool is_capped() const { return mds_is_shutting_down; } |
7c673cae FG |
125 | void cap(); |
126 | ||
127 | void kick_submitter(); | |
128 | void shutdown(); | |
129 | ||
7c673cae FG |
130 | void _start_entry(LogEvent *e); |
131 | void start_entry(LogEvent *e) { | |
11fdf7f2 | 132 | std::lock_guard l(submit_mutex); |
7c673cae FG |
133 | _start_entry(e); |
134 | } | |
135 | void cancel_entry(LogEvent *e); | |
136 | void _submit_entry(LogEvent *e, MDSLogContextBase *c); | |
137 | void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { | |
11fdf7f2 | 138 | std::lock_guard l(submit_mutex); |
7c673cae | 139 | _submit_entry(e, c); |
9f95a23c | 140 | submit_cond.notify_all(); |
7c673cae FG |
141 | } |
142 | void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { | |
11fdf7f2 | 143 | std::lock_guard l(submit_mutex); |
7c673cae FG |
144 | _start_entry(e); |
145 | _submit_entry(e, c); | |
9f95a23c | 146 | submit_cond.notify_all(); |
7c673cae FG |
147 | } |
148 | bool entry_is_open() const { return cur_event != NULL; } | |
149 | ||
11fdf7f2 | 150 | void wait_for_safe( MDSContext *c ); |
7c673cae FG |
151 | void flush(); |
152 | bool is_flushed() const { | |
153 | return unflushed == 0; | |
154 | } | |
155 | ||
7c673cae FG |
156 | void trim_expired_segments(); |
157 | void trim(int max=-1); | |
158 | int trim_all(); | |
159 | bool expiry_done() const | |
160 | { | |
161 | return expiring_segments.empty() && expired_segments.empty(); | |
162 | }; | |
163 | ||
11fdf7f2 TL |
164 | void create(MDSContext *onfinish); // fresh, empty log! |
165 | void open(MDSContext *onopen); // append() or replay() to follow! | |
166 | void reopen(MDSContext *onopen); | |
7c673cae | 167 | void append(); |
11fdf7f2 | 168 | void replay(MDSContext *onfinish); |
7c673cae FG |
169 | |
170 | void standby_trim_segments(); | |
171 | ||
172 | void dump_replay_status(Formatter *f) const; | |
7c673cae | 173 | |
9f95a23c TL |
174 | MDSRank *mds; |
175 | // replay state | |
20effc67 | 176 | std::map<inodeno_t, std::set<inodeno_t>> pending_exports; |
9f95a23c TL |
177 | |
178 | protected: | |
179 | struct PendingEvent { | |
180 | PendingEvent(LogEvent *e, MDSContext *c, bool f=false) : le(e), fin(c), flush(f) {} | |
181 | LogEvent *le; | |
182 | MDSContext *fin; | |
183 | bool flush; | |
184 | }; | |
185 | ||
186 | // -- replay -- | |
187 | class ReplayThread : public Thread { | |
188 | public: | |
189 | explicit ReplayThread(MDLog *l) : log(l) {} | |
190 | void* entry() override { | |
191 | log->_replay_thread(); | |
192 | return 0; | |
193 | } | |
194 | private: | |
195 | MDLog *log; | |
196 | } replay_thread; | |
197 | ||
198 | // Journal recovery/rewrite logic | |
199 | class RecoveryThread : public Thread { | |
200 | public: | |
201 | explicit RecoveryThread(MDLog *l) : log(l) {} | |
202 | void set_completion(MDSContext *c) {completion = c;} | |
203 | void* entry() override { | |
204 | log->_recovery_thread(completion); | |
205 | return 0; | |
206 | } | |
207 | private: | |
208 | MDLog *log; | |
209 | MDSContext *completion = nullptr; | |
210 | } recovery_thread; | |
211 | ||
212 | class SubmitThread : public Thread { | |
213 | public: | |
214 | explicit SubmitThread(MDLog *l) : log(l) {} | |
215 | void* entry() override { | |
216 | log->_submit_thread(); | |
217 | return 0; | |
218 | } | |
219 | private: | |
220 | MDLog *log; | |
221 | } submit_thread; | |
222 | ||
223 | friend class ReplayThread; | |
224 | friend class C_MDL_Replay; | |
225 | friend class MDSLogContextBase; | |
226 | friend class SubmitThread; | |
227 | // -- subtreemaps -- | |
228 | friend class ESubtreeMap; | |
229 | friend class MDCache; | |
230 | ||
231 | void _replay(); // old way | |
232 | void _replay_thread(); // new way | |
233 | ||
234 | void _recovery_thread(MDSContext *completion); | |
235 | void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, MDSContext *completion); | |
236 | ||
237 | void set_safe_pos(uint64_t pos) | |
238 | { | |
239 | std::lock_guard l(submit_mutex); | |
240 | ceph_assert(pos >= safe_pos); | |
241 | safe_pos = pos; | |
242 | } | |
243 | ||
244 | void _submit_thread(); | |
245 | ||
246 | uint64_t get_last_segment_seq() const { | |
247 | ceph_assert(!segments.empty()); | |
248 | return segments.rbegin()->first; | |
249 | } | |
250 | LogSegment *get_oldest_segment() { | |
251 | return segments.begin()->second; | |
252 | } | |
253 | void remove_oldest_segment() { | |
254 | std::map<uint64_t, LogSegment*>::iterator p = segments.begin(); | |
255 | delete p->second; | |
256 | segments.erase(p); | |
257 | } | |
258 | ||
259 | int num_events = 0; // in events | |
260 | int unflushed = 0; | |
2a845540 | 261 | bool mds_is_shutting_down = false; |
9f95a23c TL |
262 | |
263 | // Log position which is persistent *and* for which | |
264 | // submit_entry wait_for_safe callbacks have already | |
265 | // been called. | |
266 | uint64_t safe_pos = 0; | |
267 | ||
268 | inodeno_t ino; | |
269 | Journaler *journaler = nullptr; | |
270 | ||
271 | PerfCounters *logger = nullptr; | |
272 | ||
273 | bool already_replayed = false; | |
274 | ||
275 | MDSContext::vec waitfor_replay; | |
276 | ||
277 | // -- segments -- | |
278 | std::map<uint64_t,LogSegment*> segments; | |
20effc67 TL |
279 | std::set<LogSegment*> expiring_segments; |
280 | std::set<LogSegment*> expired_segments; | |
9f95a23c TL |
281 | std::size_t pre_segments_size = 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing |
282 | uint64_t event_seq = 0; | |
283 | int expiring_events = 0; | |
284 | int expired_events = 0; | |
285 | ||
286 | int64_t mdsmap_up_features = 0; | |
20effc67 | 287 | std::map<uint64_t,std::list<PendingEvent> > pending_events; // log segment -> event list |
9f95a23c TL |
288 | ceph::mutex submit_mutex = ceph::make_mutex("MDLog::submit_mutex"); |
289 | ceph::condition_variable submit_cond; | |
290 | ||
291 | private: | |
292 | friend class C_MaybeExpiredSegment; | |
293 | friend class C_MDL_Flushed; | |
294 | friend class C_OFT_Committed; | |
295 | ||
296 | // -- segments -- | |
297 | void _start_new_segment(); | |
298 | void _prepare_new_segment(); | |
299 | void _journal_segment_subtree_map(MDSContext *onsync); | |
300 | ||
2a845540 TL |
301 | void try_to_commit_open_file_table(uint64_t last_seq); |
302 | ||
9f95a23c TL |
303 | void try_expire(LogSegment *ls, int op_prio); |
304 | void _maybe_expired(LogSegment *ls, int op_prio); | |
305 | void _expired(LogSegment *ls); | |
306 | void _trim_expired_segments(); | |
307 | void write_head(MDSContext *onfinish); | |
308 | ||
309 | // -- events -- | |
310 | LogEvent *cur_event = nullptr; | |
311 | }; | |
7c673cae | 312 | #endif |