]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDLog.h
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / mds / MDLog.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16#ifndef CEPH_MDLOG_H
17#define CEPH_MDLOG_H
18
19enum {
20 l_mdl_first = 5000,
21 l_mdl_evadd,
22 l_mdl_evex,
23 l_mdl_evtrm,
24 l_mdl_ev,
25 l_mdl_evexg,
26 l_mdl_evexd,
27 l_mdl_segadd,
28 l_mdl_segex,
29 l_mdl_segtrm,
30 l_mdl_seg,
31 l_mdl_segexg,
32 l_mdl_segexd,
33 l_mdl_expos,
34 l_mdl_wrpos,
35 l_mdl_rdpos,
36 l_mdl_jlat,
37 l_mdl_replayed,
38 l_mdl_last,
39};
40
41#include "include/types.h"
42#include "include/Context.h"
43
11fdf7f2 44#include "MDSContext.h"
7c673cae
FG
45#include "common/Thread.h"
46#include "common/Cond.h"
47
48#include "LogSegment.h"
49
50#include <list>
51
52class Journaler;
53class JournalPointer;
54class LogEvent;
55class MDSRank;
56class LogSegment;
57class ESubtreeMap;
58
59class PerfCounters;
60
61#include <map>
62using std::map;
63
64#include "common/Finisher.h"
65
66
67class MDLog {
68public:
69 MDSRank *mds;
70protected:
71 int num_events; // in events
72
73 int unflushed;
74
75 bool capped;
76
77 // Log position which is persistent *and* for which
78 // submit_entry wait_for_safe callbacks have already
79 // been called.
80 uint64_t safe_pos;
81
82 inodeno_t ino;
83 Journaler *journaler;
84
85 PerfCounters *logger;
86
87
88 // -- replay --
89 class ReplayThread : public Thread {
90 MDLog *log;
91 public:
92 explicit ReplayThread(MDLog *l) : log(l) {}
93 void* entry() override {
94 log->_replay_thread();
95 return 0;
96 }
97 } replay_thread;
98 bool already_replayed;
99
100 friend class ReplayThread;
101 friend class C_MDL_Replay;
102
11fdf7f2 103 MDSContext::vec waitfor_replay;
7c673cae
FG
104
105 void _replay(); // old way
106 void _replay_thread(); // new way
107
108 // Journal recovery/rewrite logic
109 class RecoveryThread : public Thread {
110 MDLog *log;
11fdf7f2 111 MDSContext *completion;
7c673cae 112 public:
11fdf7f2 113 void set_completion(MDSContext *c) {completion = c;}
7c673cae
FG
114 explicit RecoveryThread(MDLog *l) : log(l), completion(NULL) {}
115 void* entry() override {
116 log->_recovery_thread(completion);
117 return 0;
118 }
119 } recovery_thread;
11fdf7f2
TL
120 void _recovery_thread(MDSContext *completion);
121 void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, MDSContext *completion);
7c673cae
FG
122
123 // -- segments --
124 map<uint64_t,LogSegment*> segments;
125 set<LogSegment*> expiring_segments;
126 set<LogSegment*> expired_segments;
81eedcae 127 std::size_t pre_segments_size = 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing
7c673cae
FG
128 uint64_t event_seq;
129 int expiring_events;
130 int expired_events;
131
132 struct PendingEvent {
133 LogEvent *le;
134 MDSContext *fin;
135 bool flush;
136 PendingEvent(LogEvent *e, MDSContext *c, bool f=false) : le(e), fin(c), flush(f) {}
137 };
138
139 int64_t mdsmap_up_features;
140 map<uint64_t,list<PendingEvent> > pending_events; // log segment -> event list
141 Mutex submit_mutex;
142 Cond submit_cond;
143
144 void set_safe_pos(uint64_t pos)
145 {
11fdf7f2
TL
146 std::lock_guard l(submit_mutex);
147 ceph_assert(pos >= safe_pos);
7c673cae
FG
148 safe_pos = pos;
149 }
150 friend class MDSLogContextBase;
151
152 void _submit_thread();
153 class SubmitThread : public Thread {
154 MDLog *log;
155 public:
156 explicit SubmitThread(MDLog *l) : log(l) {}
157 void* entry() override {
158 log->_submit_thread();
159 return 0;
160 }
161 } submit_thread;
162 friend class SubmitThread;
163
164public:
165 const std::set<LogSegment*> &get_expiring_segments() const
166 {
167 return expiring_segments;
168 }
169protected:
170
171 // -- subtreemaps --
172 friend class ESubtreeMap;
173 friend class MDCache;
174
175 uint64_t get_last_segment_seq() const {
11fdf7f2 176 ceph_assert(!segments.empty());
7c673cae
FG
177 return segments.rbegin()->first;
178 }
179 LogSegment *get_oldest_segment() {
180 return segments.begin()->second;
181 }
182 void remove_oldest_segment() {
183 map<uint64_t, LogSegment*>::iterator p = segments.begin();
184 delete p->second;
185 segments.erase(p);
186 }
187
188public:
189 void create_logger();
190
191 // replay state
192 map<inodeno_t, set<inodeno_t> > pending_exports;
193
194 void set_write_iohint(unsigned iohint_flags);
195
196public:
197 explicit MDLog(MDSRank *m) : mds(m),
198 num_events(0),
199 unflushed(0),
200 capped(false),
201 safe_pos(0),
202 journaler(0),
203 logger(0),
204 replay_thread(this),
205 already_replayed(false),
206 recovery_thread(this),
207 event_seq(0), expiring_events(0), expired_events(0),
208 mdsmap_up_features(0),
209 submit_mutex("MDLog::submit_mutex"),
210 submit_thread(this),
211 cur_event(NULL) { }
212 ~MDLog();
213
214
215private:
216 // -- segments --
217 void _start_new_segment();
218 void _prepare_new_segment();
11fdf7f2 219 void _journal_segment_subtree_map(MDSContext *onsync);
7c673cae
FG
220public:
221 void start_new_segment() {
11fdf7f2 222 std::lock_guard l(submit_mutex);
7c673cae
FG
223 _start_new_segment();
224 }
225 void prepare_new_segment() {
11fdf7f2 226 std::lock_guard l(submit_mutex);
7c673cae
FG
227 _prepare_new_segment();
228 }
11fdf7f2 229 void journal_segment_subtree_map(MDSContext *onsync=NULL) {
7c673cae
FG
230 submit_mutex.Lock();
231 _journal_segment_subtree_map(onsync);
232 submit_mutex.Unlock();
233 if (onsync)
234 flush();
235 }
236
237 LogSegment *peek_current_segment() {
238 return segments.empty() ? NULL : segments.rbegin()->second;
239 }
240
241 LogSegment *get_current_segment() {
11fdf7f2 242 ceph_assert(!segments.empty());
7c673cae
FG
243 return segments.rbegin()->second;
244 }
245
246 LogSegment *get_segment(log_segment_seq_t seq) {
247 if (segments.count(seq))
248 return segments[seq];
249 return NULL;
250 }
251
252 bool have_any_segments() const {
253 return !segments.empty();
254 }
255
256 void flush_logger();
257
258 size_t get_num_events() const { return num_events; }
259 size_t get_num_segments() const { return segments.size(); }
260
261 uint64_t get_read_pos() const;
262 uint64_t get_write_pos() const;
263 uint64_t get_safe_pos() const;
264 Journaler *get_journaler() { return journaler; }
265 bool empty() const { return segments.empty(); }
266
267 bool is_capped() const { return capped; }
268 void cap();
269
270 void kick_submitter();
271 void shutdown();
272
273 // -- events --
274private:
275 LogEvent *cur_event;
276public:
277 void _start_entry(LogEvent *e);
278 void start_entry(LogEvent *e) {
11fdf7f2 279 std::lock_guard l(submit_mutex);
7c673cae
FG
280 _start_entry(e);
281 }
282 void cancel_entry(LogEvent *e);
283 void _submit_entry(LogEvent *e, MDSLogContextBase *c);
284 void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
11fdf7f2 285 std::lock_guard l(submit_mutex);
7c673cae
FG
286 _submit_entry(e, c);
287 submit_cond.Signal();
288 }
289 void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
11fdf7f2 290 std::lock_guard l(submit_mutex);
7c673cae
FG
291 _start_entry(e);
292 _submit_entry(e, c);
293 submit_cond.Signal();
294 }
295 bool entry_is_open() const { return cur_event != NULL; }
296
11fdf7f2 297 void wait_for_safe( MDSContext *c );
7c673cae
FG
298 void flush();
299 bool is_flushed() const {
300 return unflushed == 0;
301 }
302
303private:
304 void try_expire(LogSegment *ls, int op_prio);
305 void _maybe_expired(LogSegment *ls, int op_prio);
306 void _expired(LogSegment *ls);
307 void _trim_expired_segments();
308
309 friend class C_MaybeExpiredSegment;
310 friend class C_MDL_Flushed;
11fdf7f2 311 friend class C_OFT_Committed;
7c673cae
FG
312
313public:
314 void trim_expired_segments();
315 void trim(int max=-1);
316 int trim_all();
317 bool expiry_done() const
318 {
319 return expiring_segments.empty() && expired_segments.empty();
320 };
321
322private:
11fdf7f2 323 void write_head(MDSContext *onfinish);
7c673cae
FG
324
325public:
11fdf7f2
TL
326 void create(MDSContext *onfinish); // fresh, empty log!
327 void open(MDSContext *onopen); // append() or replay() to follow!
328 void reopen(MDSContext *onopen);
7c673cae 329 void append();
11fdf7f2 330 void replay(MDSContext *onfinish);
7c673cae
FG
331
332 void standby_trim_segments();
333
334 void dump_replay_status(Formatter *f) const;
335};
336
337#endif