]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDLog.h
6111f85a97f97bea5a221ca429954cd23f5a4e46
[ceph.git] / ceph / src / mds / MDLog.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14 #ifndef CEPH_MDLOG_H
15 #define CEPH_MDLOG_H
16
17 #include "include/common_fwd.h"
18
19 enum {
20 l_mdl_first = 5000,
21 l_mdl_evadd,
22 l_mdl_evex,
23 l_mdl_evtrm,
24 l_mdl_ev,
25 l_mdl_evexg,
26 l_mdl_evexd,
27 l_mdl_segadd,
28 l_mdl_segex,
29 l_mdl_segtrm,
30 l_mdl_seg,
31 l_mdl_segexg,
32 l_mdl_segexd,
33 l_mdl_expos,
34 l_mdl_wrpos,
35 l_mdl_rdpos,
36 l_mdl_jlat,
37 l_mdl_replayed,
38 l_mdl_last,
39 };
40
41 #include "include/types.h"
42 #include "include/Context.h"
43
44 #include "MDSContext.h"
45 #include "common/Cond.h"
46 #include "common/Finisher.h"
47 #include "common/Thread.h"
48
49 #include "LogSegment.h"
50
51 #include <list>
52 #include <map>
53
54 class Journaler;
55 class JournalPointer;
56 class LogEvent;
57 class MDSRank;
58 class LogSegment;
59 class ESubtreeMap;
60
61 class MDLog {
62 public:
63 explicit MDLog(MDSRank *m) : mds(m),
64 replay_thread(this),
65 recovery_thread(this),
66 submit_thread(this) {}
67 ~MDLog();
68
69 const std::set<LogSegment*> &get_expiring_segments() const
70 {
71 return expiring_segments;
72 }
73
74 void create_logger();
75 void set_write_iohint(unsigned iohint_flags);
76
77 void start_new_segment() {
78 std::lock_guard l(submit_mutex);
79 _start_new_segment();
80 }
81 void prepare_new_segment() {
82 std::lock_guard l(submit_mutex);
83 _prepare_new_segment();
84 }
85 void journal_segment_subtree_map(MDSContext *onsync=NULL) {
86 {
87 std::lock_guard l{submit_mutex};
88 _journal_segment_subtree_map(onsync);
89 }
90 if (onsync)
91 flush();
92 }
93
94 LogSegment *peek_current_segment() {
95 return segments.empty() ? NULL : segments.rbegin()->second;
96 }
97
98 LogSegment *get_current_segment() {
99 ceph_assert(!segments.empty());
100 return segments.rbegin()->second;
101 }
102
103 LogSegment *get_segment(LogSegment::seq_t seq) {
104 if (segments.count(seq))
105 return segments[seq];
106 return NULL;
107 }
108
109 bool have_any_segments() const {
110 return !segments.empty();
111 }
112
113 void flush_logger();
114
115 size_t get_num_events() const { return num_events; }
116 size_t get_num_segments() const { return segments.size(); }
117
118 uint64_t get_read_pos() const;
119 uint64_t get_write_pos() const;
120 uint64_t get_safe_pos() const;
121 Journaler *get_journaler() { return journaler; }
122 bool empty() const { return segments.empty(); }
123
124 bool is_capped() const { return mds_is_shutting_down; }
125 void cap();
126
127 void kick_submitter();
128 void shutdown();
129
130 void _start_entry(LogEvent *e);
131 void start_entry(LogEvent *e) {
132 std::lock_guard l(submit_mutex);
133 _start_entry(e);
134 }
135 void cancel_entry(LogEvent *e);
136 void _submit_entry(LogEvent *e, MDSLogContextBase *c);
137 void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
138 std::lock_guard l(submit_mutex);
139 _submit_entry(e, c);
140 submit_cond.notify_all();
141 }
142 void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
143 std::lock_guard l(submit_mutex);
144 _start_entry(e);
145 _submit_entry(e, c);
146 submit_cond.notify_all();
147 }
148 bool entry_is_open() const { return cur_event != NULL; }
149
150 void wait_for_safe( MDSContext *c );
151 void flush();
152 bool is_flushed() const {
153 return unflushed == 0;
154 }
155
156 void trim_expired_segments();
157 void trim(int max=-1);
158 int trim_all();
159 bool expiry_done() const
160 {
161 return expiring_segments.empty() && expired_segments.empty();
162 };
163
164 void create(MDSContext *onfinish); // fresh, empty log!
165 void open(MDSContext *onopen); // append() or replay() to follow!
166 void reopen(MDSContext *onopen);
167 void append();
168 void replay(MDSContext *onfinish);
169
170 void standby_trim_segments();
171
172 void dump_replay_status(Formatter *f) const;
173
174 MDSRank *mds;
175 // replay state
176 std::map<inodeno_t, std::set<inodeno_t>> pending_exports;
177
178 protected:
179 struct PendingEvent {
180 PendingEvent(LogEvent *e, MDSContext *c, bool f=false) : le(e), fin(c), flush(f) {}
181 LogEvent *le;
182 MDSContext *fin;
183 bool flush;
184 };
185
186 // -- replay --
187 class ReplayThread : public Thread {
188 public:
189 explicit ReplayThread(MDLog *l) : log(l) {}
190 void* entry() override {
191 log->_replay_thread();
192 return 0;
193 }
194 private:
195 MDLog *log;
196 } replay_thread;
197
198 // Journal recovery/rewrite logic
199 class RecoveryThread : public Thread {
200 public:
201 explicit RecoveryThread(MDLog *l) : log(l) {}
202 void set_completion(MDSContext *c) {completion = c;}
203 void* entry() override {
204 log->_recovery_thread(completion);
205 return 0;
206 }
207 private:
208 MDLog *log;
209 MDSContext *completion = nullptr;
210 } recovery_thread;
211
212 class SubmitThread : public Thread {
213 public:
214 explicit SubmitThread(MDLog *l) : log(l) {}
215 void* entry() override {
216 log->_submit_thread();
217 return 0;
218 }
219 private:
220 MDLog *log;
221 } submit_thread;
222
223 friend class ReplayThread;
224 friend class C_MDL_Replay;
225 friend class MDSLogContextBase;
226 friend class SubmitThread;
227 // -- subtreemaps --
228 friend class ESubtreeMap;
229 friend class MDCache;
230
231 void _replay(); // old way
232 void _replay_thread(); // new way
233
234 void _recovery_thread(MDSContext *completion);
235 void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, MDSContext *completion);
236
237 void set_safe_pos(uint64_t pos)
238 {
239 std::lock_guard l(submit_mutex);
240 ceph_assert(pos >= safe_pos);
241 safe_pos = pos;
242 }
243
244 void _submit_thread();
245
246 uint64_t get_last_segment_seq() const {
247 ceph_assert(!segments.empty());
248 return segments.rbegin()->first;
249 }
250 LogSegment *get_oldest_segment() {
251 return segments.begin()->second;
252 }
253 void remove_oldest_segment() {
254 std::map<uint64_t, LogSegment*>::iterator p = segments.begin();
255 delete p->second;
256 segments.erase(p);
257 }
258
259 int num_events = 0; // in events
260 int unflushed = 0;
261 bool mds_is_shutting_down = false;
262
263 // Log position which is persistent *and* for which
264 // submit_entry wait_for_safe callbacks have already
265 // been called.
266 uint64_t safe_pos = 0;
267
268 inodeno_t ino;
269 Journaler *journaler = nullptr;
270
271 PerfCounters *logger = nullptr;
272
273 bool already_replayed = false;
274
275 MDSContext::vec waitfor_replay;
276
277 // -- segments --
278 std::map<uint64_t,LogSegment*> segments;
279 std::set<LogSegment*> expiring_segments;
280 std::set<LogSegment*> expired_segments;
281 std::size_t pre_segments_size = 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing
282 uint64_t event_seq = 0;
283 int expiring_events = 0;
284 int expired_events = 0;
285
286 int64_t mdsmap_up_features = 0;
287 std::map<uint64_t,std::list<PendingEvent> > pending_events; // log segment -> event list
288 ceph::mutex submit_mutex = ceph::make_mutex("MDLog::submit_mutex");
289 ceph::condition_variable submit_cond;
290
291 private:
292 friend class C_MaybeExpiredSegment;
293 friend class C_MDL_Flushed;
294 friend class C_OFT_Committed;
295
296 // -- segments --
297 void _start_new_segment();
298 void _prepare_new_segment();
299 void _journal_segment_subtree_map(MDSContext *onsync);
300
301 void try_to_commit_open_file_table(uint64_t last_seq);
302
303 void try_expire(LogSegment *ls, int op_prio);
304 void _maybe_expired(LogSegment *ls, int op_prio);
305 void _expired(LogSegment *ls);
306 void _trim_expired_segments();
307 void write_head(MDSContext *onfinish);
308
309 // -- events --
310 LogEvent *cur_event = nullptr;
311 };
312 #endif