]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDLog.h
import ceph quincy 17.2.4
[ceph.git] / ceph / src / mds / MDLog.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
7c673cae
FG
14#ifndef CEPH_MDLOG_H
15#define CEPH_MDLOG_H
16
9f95a23c
TL
17#include "include/common_fwd.h"
18
7c673cae
FG
19enum {
20 l_mdl_first = 5000,
21 l_mdl_evadd,
22 l_mdl_evex,
23 l_mdl_evtrm,
24 l_mdl_ev,
25 l_mdl_evexg,
26 l_mdl_evexd,
27 l_mdl_segadd,
28 l_mdl_segex,
29 l_mdl_segtrm,
30 l_mdl_seg,
31 l_mdl_segexg,
32 l_mdl_segexd,
33 l_mdl_expos,
34 l_mdl_wrpos,
35 l_mdl_rdpos,
36 l_mdl_jlat,
37 l_mdl_replayed,
38 l_mdl_last,
39};
40
41#include "include/types.h"
42#include "include/Context.h"
43
11fdf7f2 44#include "MDSContext.h"
7c673cae 45#include "common/Cond.h"
9f95a23c
TL
46#include "common/Finisher.h"
47#include "common/Thread.h"
7c673cae
FG
48
49#include "LogSegment.h"
50
51#include <list>
9f95a23c 52#include <map>
7c673cae
FG
53
54class Journaler;
55class JournalPointer;
56class LogEvent;
57class MDSRank;
58class LogSegment;
59class ESubtreeMap;
60
7c673cae
FG
61class MDLog {
62public:
9f95a23c
TL
63 explicit MDLog(MDSRank *m) : mds(m),
64 replay_thread(this),
65 recovery_thread(this),
66 submit_thread(this) {}
67 ~MDLog();
7c673cae 68
7c673cae
FG
69 const std::set<LogSegment*> &get_expiring_segments() const
70 {
71 return expiring_segments;
72 }
7c673cae 73
7c673cae 74 void create_logger();
7c673cae
FG
75 void set_write_iohint(unsigned iohint_flags);
76
7c673cae 77 void start_new_segment() {
11fdf7f2 78 std::lock_guard l(submit_mutex);
7c673cae
FG
79 _start_new_segment();
80 }
81 void prepare_new_segment() {
11fdf7f2 82 std::lock_guard l(submit_mutex);
7c673cae
FG
83 _prepare_new_segment();
84 }
11fdf7f2 85 void journal_segment_subtree_map(MDSContext *onsync=NULL) {
9f95a23c
TL
86 {
87 std::lock_guard l{submit_mutex};
88 _journal_segment_subtree_map(onsync);
89 }
7c673cae
FG
90 if (onsync)
91 flush();
92 }
93
94 LogSegment *peek_current_segment() {
95 return segments.empty() ? NULL : segments.rbegin()->second;
96 }
97
98 LogSegment *get_current_segment() {
11fdf7f2 99 ceph_assert(!segments.empty());
7c673cae
FG
100 return segments.rbegin()->second;
101 }
102
9f95a23c 103 LogSegment *get_segment(LogSegment::seq_t seq) {
7c673cae
FG
104 if (segments.count(seq))
105 return segments[seq];
106 return NULL;
107 }
108
109 bool have_any_segments() const {
110 return !segments.empty();
111 }
112
113 void flush_logger();
114
115 size_t get_num_events() const { return num_events; }
116 size_t get_num_segments() const { return segments.size(); }
117
118 uint64_t get_read_pos() const;
119 uint64_t get_write_pos() const;
120 uint64_t get_safe_pos() const;
121 Journaler *get_journaler() { return journaler; }
122 bool empty() const { return segments.empty(); }
123
2a845540 124 bool is_capped() const { return mds_is_shutting_down; }
7c673cae
FG
125 void cap();
126
127 void kick_submitter();
128 void shutdown();
129
7c673cae
FG
130 void _start_entry(LogEvent *e);
131 void start_entry(LogEvent *e) {
11fdf7f2 132 std::lock_guard l(submit_mutex);
7c673cae
FG
133 _start_entry(e);
134 }
135 void cancel_entry(LogEvent *e);
136 void _submit_entry(LogEvent *e, MDSLogContextBase *c);
137 void submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
11fdf7f2 138 std::lock_guard l(submit_mutex);
7c673cae 139 _submit_entry(e, c);
9f95a23c 140 submit_cond.notify_all();
7c673cae
FG
141 }
142 void start_submit_entry(LogEvent *e, MDSLogContextBase *c = 0) {
11fdf7f2 143 std::lock_guard l(submit_mutex);
7c673cae
FG
144 _start_entry(e);
145 _submit_entry(e, c);
9f95a23c 146 submit_cond.notify_all();
7c673cae
FG
147 }
148 bool entry_is_open() const { return cur_event != NULL; }
149
11fdf7f2 150 void wait_for_safe( MDSContext *c );
7c673cae
FG
151 void flush();
152 bool is_flushed() const {
153 return unflushed == 0;
154 }
155
7c673cae
FG
156 void trim_expired_segments();
157 void trim(int max=-1);
158 int trim_all();
159 bool expiry_done() const
160 {
161 return expiring_segments.empty() && expired_segments.empty();
162 };
163
11fdf7f2
TL
164 void create(MDSContext *onfinish); // fresh, empty log!
165 void open(MDSContext *onopen); // append() or replay() to follow!
166 void reopen(MDSContext *onopen);
7c673cae 167 void append();
11fdf7f2 168 void replay(MDSContext *onfinish);
7c673cae
FG
169
170 void standby_trim_segments();
171
172 void dump_replay_status(Formatter *f) const;
7c673cae 173
9f95a23c
TL
174 MDSRank *mds;
175 // replay state
20effc67 176 std::map<inodeno_t, std::set<inodeno_t>> pending_exports;
9f95a23c
TL
177
178protected:
179 struct PendingEvent {
180 PendingEvent(LogEvent *e, MDSContext *c, bool f=false) : le(e), fin(c), flush(f) {}
181 LogEvent *le;
182 MDSContext *fin;
183 bool flush;
184 };
185
186 // -- replay --
187 class ReplayThread : public Thread {
188 public:
189 explicit ReplayThread(MDLog *l) : log(l) {}
190 void* entry() override {
191 log->_replay_thread();
192 return 0;
193 }
194 private:
195 MDLog *log;
196 } replay_thread;
197
198 // Journal recovery/rewrite logic
199 class RecoveryThread : public Thread {
200 public:
201 explicit RecoveryThread(MDLog *l) : log(l) {}
202 void set_completion(MDSContext *c) {completion = c;}
203 void* entry() override {
204 log->_recovery_thread(completion);
205 return 0;
206 }
207 private:
208 MDLog *log;
209 MDSContext *completion = nullptr;
210 } recovery_thread;
211
212 class SubmitThread : public Thread {
213 public:
214 explicit SubmitThread(MDLog *l) : log(l) {}
215 void* entry() override {
216 log->_submit_thread();
217 return 0;
218 }
219 private:
220 MDLog *log;
221 } submit_thread;
222
223 friend class ReplayThread;
224 friend class C_MDL_Replay;
225 friend class MDSLogContextBase;
226 friend class SubmitThread;
227 // -- subtreemaps --
228 friend class ESubtreeMap;
229 friend class MDCache;
230
231 void _replay(); // old way
232 void _replay_thread(); // new way
233
234 void _recovery_thread(MDSContext *completion);
235 void _reformat_journal(JournalPointer const &jp, Journaler *old_journal, MDSContext *completion);
236
237 void set_safe_pos(uint64_t pos)
238 {
239 std::lock_guard l(submit_mutex);
240 ceph_assert(pos >= safe_pos);
241 safe_pos = pos;
242 }
243
244 void _submit_thread();
245
246 uint64_t get_last_segment_seq() const {
247 ceph_assert(!segments.empty());
248 return segments.rbegin()->first;
249 }
250 LogSegment *get_oldest_segment() {
251 return segments.begin()->second;
252 }
253 void remove_oldest_segment() {
254 std::map<uint64_t, LogSegment*>::iterator p = segments.begin();
255 delete p->second;
256 segments.erase(p);
257 }
258
259 int num_events = 0; // in events
260 int unflushed = 0;
2a845540 261 bool mds_is_shutting_down = false;
9f95a23c
TL
262
263 // Log position which is persistent *and* for which
264 // submit_entry wait_for_safe callbacks have already
265 // been called.
266 uint64_t safe_pos = 0;
267
268 inodeno_t ino;
269 Journaler *journaler = nullptr;
270
271 PerfCounters *logger = nullptr;
272
273 bool already_replayed = false;
274
275 MDSContext::vec waitfor_replay;
276
277 // -- segments --
278 std::map<uint64_t,LogSegment*> segments;
20effc67
TL
279 std::set<LogSegment*> expiring_segments;
280 std::set<LogSegment*> expired_segments;
9f95a23c
TL
281 std::size_t pre_segments_size = 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing
282 uint64_t event_seq = 0;
283 int expiring_events = 0;
284 int expired_events = 0;
285
286 int64_t mdsmap_up_features = 0;
20effc67 287 std::map<uint64_t,std::list<PendingEvent> > pending_events; // log segment -> event list
9f95a23c
TL
288 ceph::mutex submit_mutex = ceph::make_mutex("MDLog::submit_mutex");
289 ceph::condition_variable submit_cond;
290
291private:
292 friend class C_MaybeExpiredSegment;
293 friend class C_MDL_Flushed;
294 friend class C_OFT_Committed;
295
296 // -- segments --
297 void _start_new_segment();
298 void _prepare_new_segment();
299 void _journal_segment_subtree_map(MDSContext *onsync);
300
2a845540
TL
301 void try_to_commit_open_file_table(uint64_t last_seq);
302
9f95a23c
TL
303 void try_expire(LogSegment *ls, int op_prio);
304 void _maybe_expired(LogSegment *ls, int op_prio);
305 void _expired(LogSegment *ls);
306 void _trim_expired_segments();
307 void write_head(MDSContext *onfinish);
308
309 // -- events --
310 LogEvent *cur_event = nullptr;
311};
7c673cae 312#endif