]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #ifndef CEPH_MDLOG_H | |
17 | #define CEPH_MDLOG_H | |
18 | ||
19 | enum { | |
20 | l_mdl_first = 5000, | |
21 | l_mdl_evadd, | |
22 | l_mdl_evex, | |
23 | l_mdl_evtrm, | |
24 | l_mdl_ev, | |
25 | l_mdl_evexg, | |
26 | l_mdl_evexd, | |
27 | l_mdl_segadd, | |
28 | l_mdl_segex, | |
29 | l_mdl_segtrm, | |
30 | l_mdl_seg, | |
31 | l_mdl_segexg, | |
32 | l_mdl_segexd, | |
33 | l_mdl_expos, | |
34 | l_mdl_wrpos, | |
35 | l_mdl_rdpos, | |
36 | l_mdl_jlat, | |
37 | l_mdl_replayed, | |
38 | l_mdl_last, | |
39 | }; | |
40 | ||
41 | #include "include/types.h" | |
42 | #include "include/Context.h" | |
43 | ||
11fdf7f2 | 44 | #include "MDSContext.h" |
7c673cae FG |
45 | #include "common/Thread.h" |
46 | #include "common/Cond.h" | |
47 | ||
48 | #include "LogSegment.h" | |
49 | ||
50 | #include <list> | |
51 | ||
52 | class Journaler; | |
53 | class JournalPointer; | |
54 | class LogEvent; | |
55 | class MDSRank; | |
56 | class LogSegment; | |
57 | class ESubtreeMap; | |
58 | ||
59 | class PerfCounters; | |
60 | ||
61 | #include <map> | |
62 | using std::map; | |
63 | ||
64 | #include "common/Finisher.h" | |
65 | ||
66 | ||
67 | class MDLog { | |
68 | public: | |
69 | MDSRank *mds; | |
70 | protected: | |
71 | int num_events; // in events | |
72 | ||
73 | int unflushed; | |
74 | ||
75 | bool capped; | |
76 | ||
77 | // Log position which is persistent *and* for which | |
78 | // submit_entry wait_for_safe callbacks have already | |
79 | // been called. | |
80 | uint64_t safe_pos; | |
81 | ||
82 | inodeno_t ino; | |
83 | Journaler *journaler; | |
84 | ||
85 | PerfCounters *logger; | |
86 | ||
87 | ||
88 | // -- replay -- | |
89 | class ReplayThread : public Thread { | |
90 | MDLog *log; | |
91 | public: | |
92 | explicit ReplayThread(MDLog *l) : log(l) {} | |
93 | void* entry() override { | |
94 | log->_replay_thread(); | |
95 | return 0; | |
96 | } | |
97 | } replay_thread; | |
98 | bool already_replayed; | |
99 | ||
100 | friend class ReplayThread; | |
101 | friend class C_MDL_Replay; | |
102 | ||
11fdf7f2 | 103 | MDSContext::vec waitfor_replay; |
7c673cae FG |
104 | |
105 | void _replay(); // old way | |
106 | void _replay_thread(); // new way | |
107 | ||
108 | // Journal recovery/rewrite logic | |
109 | class RecoveryThread : public Thread { | |
110 | MDLog *log; | |
11fdf7f2 | 111 | MDSContext *completion; |
7c673cae | 112 | public: |
11fdf7f2 | 113 | void set_completion(MDSContext *c) {completion = c;} |
7c673cae FG |
114 | explicit RecoveryThread(MDLog *l) : log(l), completion(NULL) {} |
115 | void* entry() override { | |
116 | log->_recovery_thread(completion); | |
117 | return 0; | |
118 | } | |
119 | } recovery_thread; | |
11fdf7f2 TL |
120 | void _recovery_thread(MDSContext *completion); |
121 | void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, MDSContext *completion); | |
7c673cae FG |
122 | |
123 | // -- segments -- | |
124 | map<uint64_t,LogSegment*> segments; | |
125 | set<LogSegment*> expiring_segments; | |
126 | set<LogSegment*> expired_segments; | |
81eedcae | 127 | std::size_t pre_segments_size = 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing |
7c673cae FG |
128 | uint64_t event_seq; |
129 | int expiring_events; | |
130 | int expired_events; | |
131 | ||
132 | struct PendingEvent { | |
133 | LogEvent *le; | |
134 | MDSContext *fin; | |
135 | bool flush; | |
136 | PendingEvent(LogEvent *e, MDSContext *c, bool f=false) : le(e), fin(c), flush(f) {} | |
137 | }; | |
138 | ||
139 | int64_t mdsmap_up_features; | |
140 | map<uint64_t,list<PendingEvent> > pending_events; // log segment -> event list | |
141 | Mutex submit_mutex; | |
142 | Cond submit_cond; | |
143 | ||
144 | void set_safe_pos(uint64_t pos) | |
145 | { | |
11fdf7f2 TL |
146 | std::lock_guard l(submit_mutex); |
147 | ceph_assert(pos >= safe_pos); | |
7c673cae FG |
148 | safe_pos = pos; |
149 | } | |
150 | friend class MDSLogContextBase; | |
151 | ||
152 | void _submit_thread(); | |
153 | class SubmitThread : public Thread { | |
154 | MDLog *log; | |
155 | public: | |
156 | explicit SubmitThread(MDLog *l) : log(l) {} | |
157 | void* entry() override { | |
158 | log->_submit_thread(); | |
159 | return 0; | |
160 | } | |
161 | } submit_thread; | |
162 | friend class SubmitThread; | |
163 | ||
164 | public: | |
165 | const std::set<LogSegment*> &get_expiring_segments() const | |
166 | { | |
167 | return expiring_segments; | |
168 | } | |
169 | protected: | |
170 | ||
171 | // -- subtreemaps -- | |
172 | friend class ESubtreeMap; | |
173 | friend class MDCache; | |
174 | ||
175 | uint64_t get_last_segment_seq() const { | |
11fdf7f2 | 176 | ceph_assert(!segments.empty()); |
7c673cae FG |
177 | return segments.rbegin()->first; |
178 | } | |
179 | LogSegment *get_oldest_segment() { | |
180 | return segments.begin()->second; | |
181 | } | |
182 | void remove_oldest_segment() { | |
183 | map<uint64_t, LogSegment*>::iterator p = segments.begin(); | |
184 | delete p->second; | |
185 | segments.erase(p); | |
186 | } | |
187 | ||
188 | public: | |
189 | void create_logger(); | |
190 | ||
191 | // replay state | |
192 | map<inodeno_t, set<inodeno_t> > pending_exports; | |
193 | ||
194 | void set_write_iohint(unsigned iohint_flags); | |
195 | ||
196 | public: | |
197 | explicit MDLog(MDSRank *m) : mds(m), | |
198 | num_events(0), | |
199 | unflushed(0), | |
200 | capped(false), | |
201 | safe_pos(0), | |
202 | journaler(0), | |
203 | logger(0), | |
204 | replay_thread(this), | |
205 | already_replayed(false), | |
206 | recovery_thread(this), | |
207 | event_seq(0), expiring_events(0), expired_events(0), | |
208 | mdsmap_up_features(0), | |
209 | submit_mutex("MDLog::submit_mutex"), | |
210 | submit_thread(this), | |
211 | cur_event(NULL) { } | |
212 | ~MDLog(); | |
213 | ||
214 | ||
215 | private: | |
216 | // -- segments -- | |
217 | void _start_new_segment(); | |
218 | void _prepare_new_segment(); | |
11fdf7f2 | 219 | void _journal_segment_subtree_map(MDSContext *onsync); |
7c673cae FG |
220 | public: |
221 | void start_new_segment() { | |
11fdf7f2 | 222 | std::lock_guard l(submit_mutex); |
7c673cae FG |
223 | _start_new_segment(); |
224 | } | |
225 | void prepare_new_segment() { | |
11fdf7f2 | 226 | std::lock_guard l(submit_mutex); |
7c673cae FG |
227 | _prepare_new_segment(); |
228 | } | |
11fdf7f2 | 229 | void journal_segment_subtree_map(MDSContext *onsync=NULL) { |
7c673cae FG |
230 | submit_mutex.Lock(); |
231 | _journal_segment_subtree_map(onsync); | |
232 | submit_mutex.Unlock(); | |
233 | if (onsync) | |
234 | flush(); | |
235 | } | |
236 | ||
237 | LogSegment *peek_current_segment() { | |
238 | return segments.empty() ? NULL : segments.rbegin()->second; | |
239 | } | |
240 | ||
241 | LogSegment *get_current_segment() { | |
11fdf7f2 | 242 | ceph_assert(!segments.empty()); |
7c673cae FG |
243 | return segments.rbegin()->second; |
244 | } | |
245 | ||
246 | LogSegment *get_segment(log_segment_seq_t seq) { | |
247 | if (segments.count(seq)) | |
248 | return segments[seq]; | |
249 | return NULL; | |
250 | } | |
251 | ||
252 | bool have_any_segments() const { | |
253 | return !segments.empty(); | |
254 | } | |
255 | ||
256 | void flush_logger(); | |
257 | ||
258 | size_t get_num_events() const { return num_events; } | |
259 | size_t get_num_segments() const { return segments.size(); } | |
260 | ||
261 | uint64_t get_read_pos() const; | |
262 | uint64_t get_write_pos() const; | |
263 | uint64_t get_safe_pos() const; | |
264 | Journaler *get_journaler() { return journaler; } | |
265 | bool empty() const { return segments.empty(); } | |
266 | ||
267 | bool is_capped() const { return capped; } | |
268 | void cap(); | |
269 | ||
270 | void kick_submitter(); | |
271 | void shutdown(); | |
272 | ||
273 | // -- events -- | |
274 | private: | |
275 | LogEvent *cur_event; | |
276 | public: | |
277 | void _start_entry(LogEvent *e); | |
278 | void start_entry(LogEvent *e) { | |
11fdf7f2 | 279 | std::lock_guard l(submit_mutex); |
7c673cae FG |
280 | _start_entry(e); |
281 | } | |
282 | void cancel_entry(LogEvent *e); | |
283 | void _submit_entry(LogEvent *e, MDSLogContextBase *c); | |
284 | void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { | |
11fdf7f2 | 285 | std::lock_guard l(submit_mutex); |
7c673cae FG |
286 | _submit_entry(e, c); |
287 | submit_cond.Signal(); | |
288 | } | |
289 | void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) { | |
11fdf7f2 | 290 | std::lock_guard l(submit_mutex); |
7c673cae FG |
291 | _start_entry(e); |
292 | _submit_entry(e, c); | |
293 | submit_cond.Signal(); | |
294 | } | |
295 | bool entry_is_open() const { return cur_event != NULL; } | |
296 | ||
11fdf7f2 | 297 | void wait_for_safe( MDSContext *c ); |
7c673cae FG |
298 | void flush(); |
299 | bool is_flushed() const { | |
300 | return unflushed == 0; | |
301 | } | |
302 | ||
303 | private: | |
304 | void try_expire(LogSegment *ls, int op_prio); | |
305 | void _maybe_expired(LogSegment *ls, int op_prio); | |
306 | void _expired(LogSegment *ls); | |
307 | void _trim_expired_segments(); | |
308 | ||
309 | friend class C_MaybeExpiredSegment; | |
310 | friend class C_MDL_Flushed; | |
11fdf7f2 | 311 | friend class C_OFT_Committed; |
7c673cae FG |
312 | |
313 | public: | |
314 | void trim_expired_segments(); | |
315 | void trim(int max=-1); | |
316 | int trim_all(); | |
317 | bool expiry_done() const | |
318 | { | |
319 | return expiring_segments.empty() && expired_segments.empty(); | |
320 | }; | |
321 | ||
322 | private: | |
11fdf7f2 | 323 | void write_head(MDSContext *onfinish); |
7c673cae FG |
324 | |
325 | public: | |
11fdf7f2 TL |
326 | void create(MDSContext *onfinish); // fresh, empty log! |
327 | void open(MDSContext *onopen); // append() or replay() to follow! | |
328 | void reopen(MDSContext *onopen); | |
7c673cae | 329 | void append(); |
11fdf7f2 | 330 | void replay(MDSContext *onfinish); |
7c673cae FG |
331 | |
332 | void standby_trim_segments(); | |
333 | ||
334 | void dump_replay_status(Formatter *f) const; | |
335 | }; | |
336 | ||
337 | #endif |