1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #include "include/common_fwd.h"
41 #include "include/types.h"
42 #include "include/Context.h"
44 #include "MDSContext.h"
45 #include "common/Cond.h"
46 #include "common/Finisher.h"
47 #include "common/Thread.h"
49 #include "LogSegment.h"
63 explicit MDLog(MDSRank
*m
) : mds(m
),
65 recovery_thread(this),
66 submit_thread(this) {}
69 const std::set
<LogSegment
*> &get_expiring_segments() const
71 return expiring_segments
;
75 void set_write_iohint(unsigned iohint_flags
);
77 void start_new_segment() {
78 std::lock_guard
l(submit_mutex
);
81 void prepare_new_segment() {
82 std::lock_guard
l(submit_mutex
);
83 _prepare_new_segment();
85 void journal_segment_subtree_map(MDSContext
*onsync
=NULL
) {
87 std::lock_guard l
{submit_mutex
};
88 _journal_segment_subtree_map(onsync
);
94 LogSegment
*peek_current_segment() {
95 return segments
.empty() ? NULL
: segments
.rbegin()->second
;
98 LogSegment
*get_current_segment() {
99 ceph_assert(!segments
.empty());
100 return segments
.rbegin()->second
;
103 LogSegment
*get_segment(LogSegment::seq_t seq
) {
104 if (segments
.count(seq
))
105 return segments
[seq
];
109 bool have_any_segments() const {
110 return !segments
.empty();
115 size_t get_num_events() const { return num_events
; }
116 size_t get_num_segments() const { return segments
.size(); }
118 uint64_t get_read_pos() const;
119 uint64_t get_write_pos() const;
120 uint64_t get_safe_pos() const;
121 Journaler
*get_journaler() { return journaler
; }
122 bool empty() const { return segments
.empty(); }
124 bool is_capped() const { return mds_is_shutting_down
; }
127 void kick_submitter();
130 void _start_entry(LogEvent
*e
);
131 void start_entry(LogEvent
*e
) {
132 std::lock_guard
l(submit_mutex
);
135 void cancel_entry(LogEvent
*e
);
136 void _submit_entry(LogEvent
*e
, MDSLogContextBase
*c
);
137 void submit_entry(LogEvent
*e
, MDSLogContextBase
*c
= 0) {
138 std::lock_guard
l(submit_mutex
);
140 submit_cond
.notify_all();
142 void start_submit_entry(LogEvent
*e
, MDSLogContextBase
*c
= 0) {
143 std::lock_guard
l(submit_mutex
);
146 submit_cond
.notify_all();
148 bool entry_is_open() const { return cur_event
!= NULL
; }
150 void wait_for_safe( MDSContext
*c
);
152 bool is_flushed() const {
153 return unflushed
== 0;
156 void trim_expired_segments();
157 void trim(int max
=-1);
159 bool expiry_done() const
161 return expiring_segments
.empty() && expired_segments
.empty();
164 void create(MDSContext
*onfinish
); // fresh, empty log!
165 void open(MDSContext
*onopen
); // append() or replay() to follow!
166 void reopen(MDSContext
*onopen
);
168 void replay(MDSContext
*onfinish
);
170 void standby_trim_segments();
172 void dump_replay_status(Formatter
*f
) const;
176 std::map
<inodeno_t
, std::set
<inodeno_t
>> pending_exports
;
179 struct PendingEvent
{
180 PendingEvent(LogEvent
*e
, MDSContext
*c
, bool f
=false) : le(e
), fin(c
), flush(f
) {}
187 class ReplayThread
: public Thread
{
189 explicit ReplayThread(MDLog
*l
) : log(l
) {}
190 void* entry() override
{
191 log
->_replay_thread();
198 // Journal recovery/rewrite logic
199 class RecoveryThread
: public Thread
{
201 explicit RecoveryThread(MDLog
*l
) : log(l
) {}
202 void set_completion(MDSContext
*c
) {completion
= c
;}
203 void* entry() override
{
204 log
->_recovery_thread(completion
);
209 MDSContext
*completion
= nullptr;
212 class SubmitThread
: public Thread
{
214 explicit SubmitThread(MDLog
*l
) : log(l
) {}
215 void* entry() override
{
216 log
->_submit_thread();
223 friend class ReplayThread
;
224 friend class C_MDL_Replay
;
225 friend class MDSLogContextBase
;
226 friend class SubmitThread
;
228 friend class ESubtreeMap
;
229 friend class MDCache
;
231 void _replay(); // old way
232 void _replay_thread(); // new way
234 void _recovery_thread(MDSContext
*completion
);
235 void _reformat_journal(JournalPointer
const &jp
, Journaler
*old_journal
, MDSContext
*completion
);
237 void set_safe_pos(uint64_t pos
)
239 std::lock_guard
l(submit_mutex
);
240 ceph_assert(pos
>= safe_pos
);
244 void _submit_thread();
246 uint64_t get_last_segment_seq() const {
247 ceph_assert(!segments
.empty());
248 return segments
.rbegin()->first
;
250 LogSegment
*get_oldest_segment() {
251 return segments
.begin()->second
;
253 void remove_oldest_segment() {
254 std::map
<uint64_t, LogSegment
*>::iterator p
= segments
.begin();
259 int num_events
= 0; // in events
261 bool mds_is_shutting_down
= false;
263 // Log position which is persistent *and* for which
264 // submit_entry wait_for_safe callbacks have already
266 uint64_t safe_pos
= 0;
269 Journaler
*journaler
= nullptr;
271 PerfCounters
*logger
= nullptr;
273 bool already_replayed
= false;
275 MDSContext::vec waitfor_replay
;
278 std::map
<uint64_t,LogSegment
*> segments
;
279 std::set
<LogSegment
*> expiring_segments
;
280 std::set
<LogSegment
*> expired_segments
;
281 std::size_t pre_segments_size
= 0; // the num of segments when the mds finished replay-journal, to calc the num of segments growing
282 uint64_t event_seq
= 0;
283 int expiring_events
= 0;
284 int expired_events
= 0;
286 int64_t mdsmap_up_features
= 0;
287 std::map
<uint64_t,std::list
<PendingEvent
> > pending_events
; // log segment -> event list
288 ceph::mutex submit_mutex
= ceph::make_mutex("MDLog::submit_mutex");
289 ceph::condition_variable submit_cond
;
292 friend class C_MaybeExpiredSegment
;
293 friend class C_MDL_Flushed
;
294 friend class C_OFT_Committed
;
297 void _start_new_segment();
298 void _prepare_new_segment();
299 void _journal_segment_subtree_map(MDSContext
*onsync
);
301 void try_to_commit_open_file_table(uint64_t last_seq
);
303 void try_expire(LogSegment
*ls
, int op_prio
);
304 void _maybe_expired(LogSegment
*ls
, int op_prio
);
305 void _expired(LogSegment
*ls
);
306 void _trim_expired_segments();
307 void write_head(MDSContext
*onfinish
);
310 LogEvent
*cur_event
= nullptr;