]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/cephfs/JournalScanner.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / tools / cephfs / JournalScanner.cc
1 // -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * ceph - scalable distributed file system
5 *
6 * copyright (c) 2014 john spray <john.spray@inktank.com>
7 *
8 * this is free software; you can redistribute it and/or
9 * modify it under the terms of the gnu lesser general public
10 * license version 2.1, as published by the free software
11 * foundation. see file copying.
12 */
13
14
15 #include "include/rados/librados.hpp"
16 #include "mds/JournalPointer.h"
17
18 #include "mds/events/ESubtreeMap.h"
19
20 #include "JournalScanner.h"
21
22 #define dout_context g_ceph_context
23 #define dout_subsys ceph_subsys_mds
24
25 /**
26 * Read journal header, followed by sequential scan through journal space.
27 *
28 * Return 0 on success, else error code. Note that success has the special meaning
29 * that we were able to apply our checks, it does *not* mean that the journal is
30 * healthy.
31 */
32 int JournalScanner::scan(bool const full)
33 {
34 int r = 0;
35
36 r = scan_pointer();
37 if (r < 0) {
38 return r;
39 }
40
41 if (pointer_present) {
42 r = scan_header();
43 if (r < 0) {
44 return r;
45 }
46 }
47
48 if (full && header_present) {
49 r = scan_events();
50 if (r < 0) {
51 return r;
52 }
53 }
54
55 return 0;
56 }
57
58
59 int JournalScanner::scan_pointer()
60 {
61 // Issue read
62 std::string const pointer_oid = obj_name(MDS_INO_LOG_POINTER_OFFSET + rank, 0);
63 bufferlist pointer_bl;
64 int r = io.read(pointer_oid, pointer_bl, INT_MAX, 0);
65 if (r == -ENOENT) {
66 // 'Successfully' discovered the pointer is missing.
67 derr << "Pointer " << pointer_oid << " is absent" << dendl;
68 return 0;
69 } else if (r < 0) {
70 // Error preventing us interrogating pointer
71 derr << "Pointer " << pointer_oid << " is unreadable" << dendl;
72 return r;
73 } else {
74 dout(4) << "Pointer " << pointer_oid << " is readable" << dendl;
75 pointer_present = true;
76
77 JournalPointer jp;
78 try {
79 bufferlist::iterator q = pointer_bl.begin();
80 jp.decode(q);
81 } catch(buffer::error &e) {
82 derr << "Pointer " << pointer_oid << " is corrupt: " << e.what() << dendl;
83 return 0;
84 }
85
86 pointer_valid = true;
87 ino = jp.front;
88 return 0;
89 }
90 }
91
92
93 int JournalScanner::scan_header()
94 {
95 int r;
96
97 bufferlist header_bl;
98 std::string header_name = obj_name(0);
99 dout(4) << "JournalScanner::scan: reading header object '" << header_name << "'" << dendl;
100 r = io.read(header_name, header_bl, INT_MAX, 0);
101 if (r < 0) {
102 derr << "Header " << header_name << " is unreadable" << dendl;
103 return 0; // "Successfully" found an error
104 } else {
105 header_present = true;
106 }
107
108 bufferlist::iterator header_bl_i = header_bl.begin();
109 header = new Journaler::Header();
110 try
111 {
112 header->decode(header_bl_i);
113 }
114 catch (buffer::error &e)
115 {
116 derr << "Header is corrupt (" << e.what() << ")" << dendl;
117 delete header;
118 header = NULL;
119 return 0; // "Successfully" found an error
120 }
121
122 if (header->magic != std::string(CEPH_FS_ONDISK_MAGIC)) {
123 derr << "Header is corrupt (bad magic)" << dendl;
124 return 0; // "Successfully" found an error
125 }
126 if (!((header->trimmed_pos <= header->expire_pos) && (header->expire_pos <= header->write_pos))) {
127 derr << "Header is invalid (inconsistent offsets)" << dendl;
128 return 0; // "Successfully" found an error
129 }
130 header_valid = true;
131
132 return 0;
133 }
134
135
136 int JournalScanner::scan_events()
137 {
138 uint64_t object_size = g_conf->mds_log_segment_size;
139 if (object_size == 0) {
140 // Default layout object size
141 object_size = file_layout_t::get_default().object_size;
142 }
143
144 uint64_t read_offset = header->expire_pos;
145 dout(10) << std::hex << "Header 0x"
146 << header->trimmed_pos << " 0x"
147 << header->expire_pos << " 0x"
148 << header->write_pos << std::dec << dendl;
149 dout(10) << "Starting journal scan from offset 0x" << std::hex << read_offset << std::dec << dendl;
150
151 // TODO also check for extraneous objects before the trimmed pos or after the write pos,
152 // which would indicate a bogus header.
153
154 bufferlist read_buf;
155 bool gap = false;
156 uint64_t gap_start = -1;
157 for (uint64_t obj_offset = (read_offset / object_size); ; obj_offset++) {
158 uint64_t offset_in_obj = 0;
159 if (obj_offset * object_size < header->expire_pos) {
160 // Skip up to expire_pos from start of the object
161 // (happens for the first object we read)
162 offset_in_obj = header->expire_pos - obj_offset * object_size;
163 }
164
165 // Read this journal segment
166 bufferlist this_object;
167 std::string const oid = obj_name(obj_offset);
168 int r = io.read(oid, this_object, INT_MAX, offset_in_obj);
169
170 // Handle absent journal segments
171 if (r < 0) {
172 if (obj_offset > (header->write_pos / object_size)) {
173 dout(4) << "Reached end of journal objects" << dendl;
174 break;
175 } else {
176 derr << "Missing object " << oid << dendl;
177 }
178
179 objects_missing.push_back(obj_offset);
180 gap = true;
181 gap_start = read_offset;
182 continue;
183 } else {
184 dout(4) << "Read 0x" << std::hex << this_object.length() << std::dec
185 << " bytes from " << oid << " gap=" << gap << dendl;
186 objects_valid.push_back(oid);
187 this_object.copy(0, this_object.length(), read_buf);
188 }
189
190 if (gap) {
191 // No valid data at the current read offset, scan forward until we find something valid looking
192 // or have to drop out to load another object.
193 dout(4) << "Searching for sentinel from 0x" << std::hex << read_offset
194 << ", 0x" << read_buf.length() << std::dec << " bytes available" << dendl;
195
196 do {
197 bufferlist::iterator p = read_buf.begin();
198 uint64_t candidate_sentinel;
199 ::decode(candidate_sentinel, p);
200
201 dout(4) << "Data at 0x" << std::hex << read_offset << " = 0x" << candidate_sentinel << std::dec << dendl;
202
203 if (candidate_sentinel == JournalStream::sentinel) {
204 dout(4) << "Found sentinel at 0x" << std::hex << read_offset << std::dec << dendl;
205 ranges_invalid.push_back(Range(gap_start, read_offset));
206 gap = false;
207 break;
208 } else {
209 // No sentinel, discard this byte
210 read_buf.splice(0, 1);
211 read_offset += 1;
212 }
213 } while (read_buf.length() >= sizeof(JournalStream::sentinel));
214 dout(4) << "read_buf size is " << read_buf.length() << dendl;
215 } else {
216 dout(10) << "Parsing data, 0x" << std::hex << read_buf.length() << std::dec << " bytes available" << dendl;
217 while(true) {
218 // TODO: detect and handle legacy format journals: can do many things
219 // on them but on read errors have to give up instead of searching
220 // for sentinels.
221 JournalStream journal_stream(JOURNAL_FORMAT_RESILIENT);
222 bool readable = false;
223 try {
224 uint64_t need;
225 readable = journal_stream.readable(read_buf, &need);
226 } catch (buffer::error &e) {
227 readable = false;
228 dout(4) << "Invalid container encoding at 0x" << std::hex << read_offset << std::dec << dendl;
229 gap = true;
230 gap_start = read_offset;
231 read_buf.splice(0, 1);
232 read_offset += 1;
233 break;
234 }
235
236 if (!readable) {
237 // Out of data, continue to read next object
238 break;
239 }
240
241 bufferlist le_bl; //< Serialized LogEvent blob
242 dout(10) << "Attempting decode at 0x" << std::hex << read_offset << std::dec << dendl;
243 // This cannot fail to decode because we pre-checked that a serialized entry
244 // blob would be readable.
245 uint64_t start_ptr = 0;
246 uint64_t consumed = journal_stream.read(read_buf, &le_bl, &start_ptr);
247 dout(10) << "Consumed 0x" << std::hex << consumed << std::dec << " bytes" << dendl;
248 if (start_ptr != read_offset) {
249 derr << "Bad entry start ptr (0x" << std::hex << start_ptr << ") at 0x"
250 << read_offset << std::dec << dendl;
251 gap = true;
252 gap_start = read_offset;
253 // FIXME: given that entry was invalid, should we be skipping over it?
254 // maybe push bytes back onto start of read_buf and just advance one byte
255 // to start scanning instead. e.g. if a bogus size value is found it can
256 // cause us to consume and thus skip a bunch of following valid events.
257 read_offset += consumed;
258 break;
259 }
260
261 LogEvent *le = LogEvent::decode(le_bl);
262
263 if (le) {
264 dout(10) << "Valid entry at 0x" << std::hex << read_offset << std::dec << dendl;
265
266 if (le->get_type() == EVENT_SUBTREEMAP
267 || le->get_type() == EVENT_SUBTREEMAP_TEST) {
268 ESubtreeMap *sle = dynamic_cast<ESubtreeMap*>(le);
269 if (sle->expire_pos > read_offset) {
270 errors.insert(std::make_pair(
271 read_offset, EventError(
272 -ERANGE,
273 "ESubtreeMap has expire_pos ahead of its own position")));
274 }
275 }
276
277 if (filter.apply(read_offset, *le)) {
278 events[read_offset] = EventRecord(le, consumed);
279 } else {
280 delete le;
281 }
282 events_valid.push_back(read_offset);
283 read_offset += consumed;
284 } else {
285 dout(10) << "Invalid entry at 0x" << std::hex << read_offset << std::dec << dendl;
286 gap = true;
287 gap_start = read_offset;
288 read_offset += consumed;
289 }
290 }
291 }
292 }
293
294 if (gap) {
295 // Ended on a gap, assume it ran to end
296 ranges_invalid.push_back(Range(gap_start, -1));
297 }
298
299 dout(4) << "Scanned objects, " << objects_missing.size() << " missing, " << objects_valid.size() << " valid" << dendl;
300 dout(4) << "Events scanned, " << ranges_invalid.size() << " gaps" << dendl;
301 dout(4) << "Found " << events_valid.size() << " valid events" << dendl;
302 dout(4) << "Selected " << events.size() << " events events for processing" << dendl;
303
304 return 0;
305 }
306
307
308 JournalScanner::~JournalScanner()
309 {
310 if (header) {
311 delete header;
312 header = NULL;
313 }
314 dout(4) << events.size() << " events" << dendl;
315 for (EventMap::iterator i = events.begin(); i != events.end(); ++i) {
316 delete i->second.log_event;
317 }
318 events.clear();
319 }
320
321
322 /**
323 * Whether the journal data looks valid and replayable
324 */
325 bool JournalScanner::is_healthy() const
326 {
327 return (pointer_present && pointer_valid
328 && header_present && header_valid
329 && ranges_invalid.empty()
330 && objects_missing.empty());
331 }
332
333
334 /**
335 * Whether the journal data can be read from RADOS
336 */
337 bool JournalScanner::is_readable() const
338 {
339 return (header_present && header_valid && objects_missing.empty());
340 }
341
342
343 /**
344 * Calculate the object name for a given offset
345 */
346 std::string JournalScanner::obj_name(inodeno_t ino, uint64_t offset) const
347 {
348 char name[60];
349 snprintf(name, sizeof(name), "%llx.%08llx",
350 (unsigned long long)(ino),
351 (unsigned long long)offset);
352 return std::string(name);
353 }
354
355
356 std::string JournalScanner::obj_name(uint64_t offset) const
357 {
358 return obj_name(ino, offset);
359 }
360
361
362 /*
363 * Write a human readable summary of the journal health
364 */
365 void JournalScanner::report(std::ostream &out) const
366 {
367 out << "Overall journal integrity: " << (is_healthy() ? "OK" : "DAMAGED") << std::endl;
368
369 if (!pointer_present) {
370 out << "Pointer not found" << std::endl;
371 } else if (!pointer_valid) {
372 out << "Pointer could not be decoded" << std::endl;
373 }
374
375 if (!header_present) {
376 out << "Header not found" << std::endl;
377 } else if (!header_valid) {
378 out << "Header could not be decoded" << std::endl;
379 }
380
381 if (objects_missing.size()) {
382 out << "Objects missing:" << std::endl;
383 for (std::vector<uint64_t>::const_iterator om = objects_missing.begin();
384 om != objects_missing.end(); ++om) {
385 out << " 0x" << std::hex << *om << std::dec << std::endl;
386 }
387 }
388
389 if (ranges_invalid.size()) {
390 out << "Corrupt regions:" << std::endl;
391 for (std::vector<Range>::const_iterator r = ranges_invalid.begin();
392 r != ranges_invalid.end(); ++r) {
393 out << " 0x" << std::hex << r->first << "-" << r->second << std::dec << std::endl;
394 }
395 }
396 }
397