1 // -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * ceph - scalable distributed file system
6 * copyright (c) 2014 john spray <john.spray@inktank.com>
8 * this is free software; you can redistribute it and/or
9 * modify it under the terms of the gnu lesser general public
10 * license version 2.1, as published by the free software
11 * foundation. see file copying.
15 #include "include/rados/librados.hpp"
16 #include "mds/JournalPointer.h"
18 #include "mds/events/ESubtreeMap.h"
20 #include "JournalScanner.h"
22 #define dout_context g_ceph_context
23 #define dout_subsys ceph_subsys_mds
26 * Read journal header, followed by sequential scan through journal space.
28 * Return 0 on success, else error code. Note that success has the special meaning
29 * that we were able to apply our checks, it does *not* mean that the journal is
32 int JournalScanner::scan(bool const full
)
41 if (pointer_present
) {
48 if (full
&& header_present
) {
59 int JournalScanner::scan_pointer()
62 std::string
const pointer_oid
= obj_name(MDS_INO_LOG_POINTER_OFFSET
+ rank
, 0);
63 bufferlist pointer_bl
;
64 int r
= io
.read(pointer_oid
, pointer_bl
, INT_MAX
, 0);
66 // 'Successfully' discovered the pointer is missing.
67 derr
<< "Pointer " << pointer_oid
<< " is absent" << dendl
;
70 // Error preventing us interrogating pointer
71 derr
<< "Pointer " << pointer_oid
<< " is unreadable" << dendl
;
74 dout(4) << "Pointer " << pointer_oid
<< " is readable" << dendl
;
75 pointer_present
= true;
79 bufferlist::iterator q
= pointer_bl
.begin();
81 } catch(buffer::error
&e
) {
82 derr
<< "Pointer " << pointer_oid
<< " is corrupt: " << e
.what() << dendl
;
93 int JournalScanner::scan_header()
98 std::string header_name
= obj_name(0);
99 dout(4) << "JournalScanner::scan: reading header object '" << header_name
<< "'" << dendl
;
100 r
= io
.read(header_name
, header_bl
, INT_MAX
, 0);
102 derr
<< "Header " << header_name
<< " is unreadable" << dendl
;
103 return 0; // "Successfully" found an error
105 header_present
= true;
108 bufferlist::iterator header_bl_i
= header_bl
.begin();
109 header
= new Journaler::Header();
112 header
->decode(header_bl_i
);
114 catch (buffer::error
&e
)
116 derr
<< "Header is corrupt (" << e
.what() << ")" << dendl
;
119 return 0; // "Successfully" found an error
122 if (header
->magic
!= std::string(CEPH_FS_ONDISK_MAGIC
)) {
123 derr
<< "Header is corrupt (bad magic)" << dendl
;
124 return 0; // "Successfully" found an error
126 if (!((header
->trimmed_pos
<= header
->expire_pos
) && (header
->expire_pos
<= header
->write_pos
))) {
127 derr
<< "Header is invalid (inconsistent offsets)" << dendl
;
128 return 0; // "Successfully" found an error
136 int JournalScanner::scan_events()
138 uint64_t object_size
= g_conf
->mds_log_segment_size
;
139 if (object_size
== 0) {
140 // Default layout object size
141 object_size
= file_layout_t::get_default().object_size
;
144 uint64_t read_offset
= header
->expire_pos
;
145 dout(10) << std::hex
<< "Header 0x"
146 << header
->trimmed_pos
<< " 0x"
147 << header
->expire_pos
<< " 0x"
148 << header
->write_pos
<< std::dec
<< dendl
;
149 dout(10) << "Starting journal scan from offset 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
151 // TODO also check for extraneous objects before the trimmed pos or after the write pos,
152 // which would indicate a bogus header.
156 uint64_t gap_start
= -1;
157 for (uint64_t obj_offset
= (read_offset
/ object_size
); ; obj_offset
++) {
158 uint64_t offset_in_obj
= 0;
159 if (obj_offset
* object_size
< header
->expire_pos
) {
160 // Skip up to expire_pos from start of the object
161 // (happens for the first object we read)
162 offset_in_obj
= header
->expire_pos
- obj_offset
* object_size
;
165 // Read this journal segment
166 bufferlist this_object
;
167 std::string
const oid
= obj_name(obj_offset
);
168 int r
= io
.read(oid
, this_object
, INT_MAX
, offset_in_obj
);
170 // Handle absent journal segments
172 if (obj_offset
> (header
->write_pos
/ object_size
)) {
173 dout(4) << "Reached end of journal objects" << dendl
;
176 derr
<< "Missing object " << oid
<< dendl
;
179 objects_missing
.push_back(obj_offset
);
181 gap_start
= read_offset
;
184 dout(4) << "Read 0x" << std::hex
<< this_object
.length() << std::dec
185 << " bytes from " << oid
<< " gap=" << gap
<< dendl
;
186 objects_valid
.push_back(oid
);
187 this_object
.copy(0, this_object
.length(), read_buf
);
191 // No valid data at the current read offset, scan forward until we find something valid looking
192 // or have to drop out to load another object.
193 dout(4) << "Searching for sentinel from 0x" << std::hex
<< read_offset
194 << ", 0x" << read_buf
.length() << std::dec
<< " bytes available" << dendl
;
197 bufferlist::iterator p
= read_buf
.begin();
198 uint64_t candidate_sentinel
;
199 ::decode(candidate_sentinel
, p
);
201 dout(4) << "Data at 0x" << std::hex
<< read_offset
<< " = 0x" << candidate_sentinel
<< std::dec
<< dendl
;
203 if (candidate_sentinel
== JournalStream::sentinel
) {
204 dout(4) << "Found sentinel at 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
205 ranges_invalid
.push_back(Range(gap_start
, read_offset
));
209 // No sentinel, discard this byte
210 read_buf
.splice(0, 1);
213 } while (read_buf
.length() >= sizeof(JournalStream::sentinel
));
214 dout(4) << "read_buf size is " << read_buf
.length() << dendl
;
216 dout(10) << "Parsing data, 0x" << std::hex
<< read_buf
.length() << std::dec
<< " bytes available" << dendl
;
218 // TODO: detect and handle legacy format journals: can do many things
219 // on them but on read errors have to give up instead of searching
221 JournalStream
journal_stream(JOURNAL_FORMAT_RESILIENT
);
222 bool readable
= false;
225 readable
= journal_stream
.readable(read_buf
, &need
);
226 } catch (buffer::error
&e
) {
228 dout(4) << "Invalid container encoding at 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
230 gap_start
= read_offset
;
231 read_buf
.splice(0, 1);
237 // Out of data, continue to read next object
241 bufferlist le_bl
; //< Serialized LogEvent blob
242 dout(10) << "Attempting decode at 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
243 // This cannot fail to decode because we pre-checked that a serialized entry
244 // blob would be readable.
245 uint64_t start_ptr
= 0;
246 uint64_t consumed
= journal_stream
.read(read_buf
, &le_bl
, &start_ptr
);
247 dout(10) << "Consumed 0x" << std::hex
<< consumed
<< std::dec
<< " bytes" << dendl
;
248 if (start_ptr
!= read_offset
) {
249 derr
<< "Bad entry start ptr (0x" << std::hex
<< start_ptr
<< ") at 0x"
250 << read_offset
<< std::dec
<< dendl
;
252 gap_start
= read_offset
;
253 // FIXME: given that entry was invalid, should we be skipping over it?
254 // maybe push bytes back onto start of read_buf and just advance one byte
255 // to start scanning instead. e.g. if a bogus size value is found it can
256 // cause us to consume and thus skip a bunch of following valid events.
257 read_offset
+= consumed
;
261 LogEvent
*le
= LogEvent::decode(le_bl
);
264 dout(10) << "Valid entry at 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
266 if (le
->get_type() == EVENT_SUBTREEMAP
267 || le
->get_type() == EVENT_SUBTREEMAP_TEST
) {
268 ESubtreeMap
*sle
= dynamic_cast<ESubtreeMap
*>(le
);
269 if (sle
->expire_pos
> read_offset
) {
270 errors
.insert(std::make_pair(
271 read_offset
, EventError(
273 "ESubtreeMap has expire_pos ahead of its own position")));
277 if (filter
.apply(read_offset
, *le
)) {
278 events
[read_offset
] = EventRecord(le
, consumed
);
282 events_valid
.push_back(read_offset
);
283 read_offset
+= consumed
;
285 dout(10) << "Invalid entry at 0x" << std::hex
<< read_offset
<< std::dec
<< dendl
;
287 gap_start
= read_offset
;
288 read_offset
+= consumed
;
295 // Ended on a gap, assume it ran to end
296 ranges_invalid
.push_back(Range(gap_start
, -1));
299 dout(4) << "Scanned objects, " << objects_missing
.size() << " missing, " << objects_valid
.size() << " valid" << dendl
;
300 dout(4) << "Events scanned, " << ranges_invalid
.size() << " gaps" << dendl
;
301 dout(4) << "Found " << events_valid
.size() << " valid events" << dendl
;
302 dout(4) << "Selected " << events
.size() << " events events for processing" << dendl
;
308 JournalScanner::~JournalScanner()
314 dout(4) << events
.size() << " events" << dendl
;
315 for (EventMap::iterator i
= events
.begin(); i
!= events
.end(); ++i
) {
316 delete i
->second
.log_event
;
323 * Whether the journal data looks valid and replayable
325 bool JournalScanner::is_healthy() const
327 return (pointer_present
&& pointer_valid
328 && header_present
&& header_valid
329 && ranges_invalid
.empty()
330 && objects_missing
.empty());
335 * Whether the journal data can be read from RADOS
337 bool JournalScanner::is_readable() const
339 return (header_present
&& header_valid
&& objects_missing
.empty());
344 * Calculate the object name for a given offset
346 std::string
JournalScanner::obj_name(inodeno_t ino
, uint64_t offset
) const
349 snprintf(name
, sizeof(name
), "%llx.%08llx",
350 (unsigned long long)(ino
),
351 (unsigned long long)offset
);
352 return std::string(name
);
356 std::string
JournalScanner::obj_name(uint64_t offset
) const
358 return obj_name(ino
, offset
);
363 * Write a human readable summary of the journal health
365 void JournalScanner::report(std::ostream
&out
) const
367 out
<< "Overall journal integrity: " << (is_healthy() ? "OK" : "DAMAGED") << std::endl
;
369 if (!pointer_present
) {
370 out
<< "Pointer not found" << std::endl
;
371 } else if (!pointer_valid
) {
372 out
<< "Pointer could not be decoded" << std::endl
;
375 if (!header_present
) {
376 out
<< "Header not found" << std::endl
;
377 } else if (!header_valid
) {
378 out
<< "Header could not be decoded" << std::endl
;
381 if (objects_missing
.size()) {
382 out
<< "Objects missing:" << std::endl
;
383 for (std::vector
<uint64_t>::const_iterator om
= objects_missing
.begin();
384 om
!= objects_missing
.end(); ++om
) {
385 out
<< " 0x" << std::hex
<< *om
<< std::dec
<< std::endl
;
389 if (ranges_invalid
.size()) {
390 out
<< "Corrupt regions:" << std::endl
;
391 for (std::vector
<Range
>::const_iterator r
= ranges_invalid
.begin();
392 r
!= ranges_invalid
.end(); ++r
) {
393 out
<< " 0x" << std::hex
<< r
->first
<< "-" << r
->second
<< std::dec
<< std::endl
;