]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/cephfs/JournalScanner.cc
Import ceph 15.2.8
[ceph.git] / ceph / src / tools / cephfs / JournalScanner.cc
CommitLineData
7c673cae
FG
1// -*- mode:c++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * ceph - scalable distributed file system
5 *
6 * copyright (c) 2014 john spray <john.spray@inktank.com>
7 *
8 * this is free software; you can redistribute it and/or
9 * modify it under the terms of the gnu lesser general public
10 * license version 2.1, as published by the free software
11 * foundation. see file copying.
12 */
13
14
15#include "include/rados/librados.hpp"
16#include "mds/JournalPointer.h"
17
18#include "mds/events/ESubtreeMap.h"
11fdf7f2 19#include "mds/PurgeQueue.h"
7c673cae
FG
20
21#include "JournalScanner.h"
22
23#define dout_context g_ceph_context
24#define dout_subsys ceph_subsys_mds
25
26/**
27 * Read journal header, followed by sequential scan through journal space.
28 *
29 * Return 0 on success, else error code. Note that success has the special meaning
30 * that we were able to apply our checks, it does *not* mean that the journal is
31 * healthy.
32 */
33int JournalScanner::scan(bool const full)
34{
35 int r = 0;
36
11fdf7f2 37 r = set_journal_ino();
7c673cae
FG
38 if (r < 0) {
39 return r;
40 }
41
11fdf7f2 42 if (!is_mdlog || pointer_present) {
7c673cae
FG
43 r = scan_header();
44 if (r < 0) {
45 return r;
46 }
47 }
48
49 if (full && header_present) {
50 r = scan_events();
51 if (r < 0) {
52 return r;
53 }
54 }
55
56 return 0;
57}
58
59
11fdf7f2
TL
60int JournalScanner::set_journal_ino()
61{
62 int r = 0;
63 if (type == "purge_queue") {
64 ino = MDS_INO_PURGE_QUEUE + rank;
65 }
66 else if (type == "mdlog"){
67 r = scan_pointer();
68 is_mdlog = true;
69 }
70 else {
71 ceph_abort(); // should not get here
72 }
73 return r;
74}
75
7c673cae
FG
76int JournalScanner::scan_pointer()
77{
78 // Issue read
79 std::string const pointer_oid = obj_name(MDS_INO_LOG_POINTER_OFFSET + rank, 0);
80 bufferlist pointer_bl;
81 int r = io.read(pointer_oid, pointer_bl, INT_MAX, 0);
82 if (r == -ENOENT) {
83 // 'Successfully' discovered the pointer is missing.
84 derr << "Pointer " << pointer_oid << " is absent" << dendl;
85 return 0;
86 } else if (r < 0) {
87 // Error preventing us interrogating pointer
88 derr << "Pointer " << pointer_oid << " is unreadable" << dendl;
89 return r;
90 } else {
91 dout(4) << "Pointer " << pointer_oid << " is readable" << dendl;
92 pointer_present = true;
93
94 JournalPointer jp;
95 try {
11fdf7f2 96 auto q = pointer_bl.cbegin();
7c673cae
FG
97 jp.decode(q);
98 } catch(buffer::error &e) {
99 derr << "Pointer " << pointer_oid << " is corrupt: " << e.what() << dendl;
100 return 0;
101 }
102
103 pointer_valid = true;
104 ino = jp.front;
105 return 0;
106 }
107}
108
109
110int JournalScanner::scan_header()
111{
112 int r;
113
114 bufferlist header_bl;
115 std::string header_name = obj_name(0);
116 dout(4) << "JournalScanner::scan: reading header object '" << header_name << "'" << dendl;
117 r = io.read(header_name, header_bl, INT_MAX, 0);
118 if (r < 0) {
119 derr << "Header " << header_name << " is unreadable" << dendl;
120 return 0; // "Successfully" found an error
121 } else {
122 header_present = true;
123 }
124
11fdf7f2 125 auto header_bl_i = header_bl.cbegin();
7c673cae
FG
126 header = new Journaler::Header();
127 try
128 {
129 header->decode(header_bl_i);
130 }
131 catch (buffer::error &e)
132 {
133 derr << "Header is corrupt (" << e.what() << ")" << dendl;
134 delete header;
135 header = NULL;
136 return 0; // "Successfully" found an error
137 }
138
139 if (header->magic != std::string(CEPH_FS_ONDISK_MAGIC)) {
140 derr << "Header is corrupt (bad magic)" << dendl;
141 return 0; // "Successfully" found an error
142 }
143 if (!((header->trimmed_pos <= header->expire_pos) && (header->expire_pos <= header->write_pos))) {
144 derr << "Header is invalid (inconsistent offsets)" << dendl;
145 return 0; // "Successfully" found an error
146 }
147 header_valid = true;
148
149 return 0;
150}
151
152
153int JournalScanner::scan_events()
154{
11fdf7f2 155 uint64_t object_size = g_conf()->mds_log_segment_size;
7c673cae
FG
156 if (object_size == 0) {
157 // Default layout object size
158 object_size = file_layout_t::get_default().object_size;
159 }
160
161 uint64_t read_offset = header->expire_pos;
162 dout(10) << std::hex << "Header 0x"
163 << header->trimmed_pos << " 0x"
164 << header->expire_pos << " 0x"
165 << header->write_pos << std::dec << dendl;
166 dout(10) << "Starting journal scan from offset 0x" << std::hex << read_offset << std::dec << dendl;
167
168 // TODO also check for extraneous objects before the trimmed pos or after the write pos,
169 // which would indicate a bogus header.
170
171 bufferlist read_buf;
172 bool gap = false;
173 uint64_t gap_start = -1;
174 for (uint64_t obj_offset = (read_offset / object_size); ; obj_offset++) {
175 uint64_t offset_in_obj = 0;
176 if (obj_offset * object_size < header->expire_pos) {
177 // Skip up to expire_pos from start of the object
178 // (happens for the first object we read)
179 offset_in_obj = header->expire_pos - obj_offset * object_size;
180 }
181
182 // Read this journal segment
183 bufferlist this_object;
184 std::string const oid = obj_name(obj_offset);
185 int r = io.read(oid, this_object, INT_MAX, offset_in_obj);
186
187 // Handle absent journal segments
188 if (r < 0) {
189 if (obj_offset > (header->write_pos / object_size)) {
190 dout(4) << "Reached end of journal objects" << dendl;
191 break;
192 } else {
193 derr << "Missing object " << oid << dendl;
194 }
195
196 objects_missing.push_back(obj_offset);
f91f0fd5
TL
197 if (!gap) {
198 gap_start = read_offset;
199 gap = true;
200 }
201 if (read_buf.length() > 0) {
202 read_offset += read_buf.length();
203 read_buf.clear();
204 }
205 read_offset += object_size - offset_in_obj;
7c673cae
FG
206 continue;
207 } else {
208 dout(4) << "Read 0x" << std::hex << this_object.length() << std::dec
209 << " bytes from " << oid << " gap=" << gap << dendl;
210 objects_valid.push_back(oid);
9f95a23c 211 this_object.begin().copy(this_object.length(), read_buf);
7c673cae
FG
212 }
213
214 if (gap) {
215 // No valid data at the current read offset, scan forward until we find something valid looking
216 // or have to drop out to load another object.
217 dout(4) << "Searching for sentinel from 0x" << std::hex << read_offset
218 << ", 0x" << read_buf.length() << std::dec << " bytes available" << dendl;
219
220 do {
11fdf7f2 221 auto p = read_buf.cbegin();
7c673cae 222 uint64_t candidate_sentinel;
11fdf7f2 223 decode(candidate_sentinel, p);
7c673cae
FG
224
225 dout(4) << "Data at 0x" << std::hex << read_offset << " = 0x" << candidate_sentinel << std::dec << dendl;
226
227 if (candidate_sentinel == JournalStream::sentinel) {
228 dout(4) << "Found sentinel at 0x" << std::hex << read_offset << std::dec << dendl;
229 ranges_invalid.push_back(Range(gap_start, read_offset));
230 gap = false;
231 break;
232 } else {
233 // No sentinel, discard this byte
234 read_buf.splice(0, 1);
235 read_offset += 1;
236 }
237 } while (read_buf.length() >= sizeof(JournalStream::sentinel));
238 dout(4) << "read_buf size is " << read_buf.length() << dendl;
f91f0fd5
TL
239 }
240 {
7c673cae
FG
241 dout(10) << "Parsing data, 0x" << std::hex << read_buf.length() << std::dec << " bytes available" << dendl;
242 while(true) {
243 // TODO: detect and handle legacy format journals: can do many things
244 // on them but on read errors have to give up instead of searching
245 // for sentinels.
246 JournalStream journal_stream(JOURNAL_FORMAT_RESILIENT);
247 bool readable = false;
248 try {
249 uint64_t need;
250 readable = journal_stream.readable(read_buf, &need);
251 } catch (buffer::error &e) {
252 readable = false;
253 dout(4) << "Invalid container encoding at 0x" << std::hex << read_offset << std::dec << dendl;
254 gap = true;
255 gap_start = read_offset;
256 read_buf.splice(0, 1);
257 read_offset += 1;
258 break;
259 }
260
261 if (!readable) {
262 // Out of data, continue to read next object
263 break;
264 }
265
266 bufferlist le_bl; //< Serialized LogEvent blob
267 dout(10) << "Attempting decode at 0x" << std::hex << read_offset << std::dec << dendl;
268 // This cannot fail to decode because we pre-checked that a serialized entry
269 // blob would be readable.
270 uint64_t start_ptr = 0;
271 uint64_t consumed = journal_stream.read(read_buf, &le_bl, &start_ptr);
272 dout(10) << "Consumed 0x" << std::hex << consumed << std::dec << " bytes" << dendl;
273 if (start_ptr != read_offset) {
274 derr << "Bad entry start ptr (0x" << std::hex << start_ptr << ") at 0x"
275 << read_offset << std::dec << dendl;
276 gap = true;
277 gap_start = read_offset;
278 // FIXME: given that entry was invalid, should we be skipping over it?
279 // maybe push bytes back onto start of read_buf and just advance one byte
280 // to start scanning instead. e.g. if a bogus size value is found it can
281 // cause us to consume and thus skip a bunch of following valid events.
282 read_offset += consumed;
283 break;
284 }
11fdf7f2
TL
285 bool valid_entry = true;
286 if (is_mdlog) {
287 auto le = LogEvent::decode_event(le_bl.cbegin());
288
289 if (le) {
290 dout(10) << "Valid entry at 0x" << std::hex << read_offset << std::dec << dendl;
291
292 if (le->get_type() == EVENT_SUBTREEMAP
293 || le->get_type() == EVENT_SUBTREEMAP_TEST) {
294 auto&& sle = dynamic_cast<ESubtreeMap&>(*le);
295 if (sle.expire_pos > read_offset) {
296 errors.insert(std::make_pair(
297 read_offset, EventError(
298 -ERANGE,
299 "ESubtreeMap has expire_pos ahead of its own position")));
300 }
7c673cae 301 }
7c673cae 302
11fdf7f2
TL
303 if (filter.apply(read_offset, *le)) {
304 events.insert_or_assign(read_offset, EventRecord(std::move(le), consumed));
305 }
7c673cae 306 } else {
11fdf7f2 307 valid_entry = false;
7c673cae 308 }
11fdf7f2
TL
309 } else if (type == "purge_queue"){
310 auto pi = std::make_unique<PurgeItem>();
311 try {
312 auto q = le_bl.cbegin();
313 pi->decode(q);
314 if (filter.apply(read_offset, *pi)) {
315 events.insert_or_assign(read_offset, EventRecord(std::move(pi), consumed));
316 }
317 } catch (const buffer::error &err) {
318 valid_entry = false;
319 }
7c673cae 320 } else {
11fdf7f2
TL
321 ceph_abort(); // should not get here
322 }
323 if (!valid_entry) {
7c673cae
FG
324 dout(10) << "Invalid entry at 0x" << std::hex << read_offset << std::dec << dendl;
325 gap = true;
326 gap_start = read_offset;
327 read_offset += consumed;
b32b8144 328 break;
11fdf7f2
TL
329 } else {
330 events_valid.push_back(read_offset);
331 read_offset += consumed;
7c673cae
FG
332 }
333 }
334 }
335 }
336
337 if (gap) {
338 // Ended on a gap, assume it ran to end
339 ranges_invalid.push_back(Range(gap_start, -1));
340 }
341
342 dout(4) << "Scanned objects, " << objects_missing.size() << " missing, " << objects_valid.size() << " valid" << dendl;
343 dout(4) << "Events scanned, " << ranges_invalid.size() << " gaps" << dendl;
344 dout(4) << "Found " << events_valid.size() << " valid events" << dendl;
345 dout(4) << "Selected " << events.size() << " events events for processing" << dendl;
346
347 return 0;
348}
349
350
351JournalScanner::~JournalScanner()
352{
353 if (header) {
354 delete header;
355 header = NULL;
356 }
357 dout(4) << events.size() << " events" << dendl;
7c673cae
FG
358 events.clear();
359}
360
361
362/**
363 * Whether the journal data looks valid and replayable
364 */
365bool JournalScanner::is_healthy() const
366{
11fdf7f2 367 return ((!is_mdlog || (pointer_present && pointer_valid))
7c673cae
FG
368 && header_present && header_valid
369 && ranges_invalid.empty()
370 && objects_missing.empty());
371}
372
373
374/**
375 * Whether the journal data can be read from RADOS
376 */
377bool JournalScanner::is_readable() const
378{
379 return (header_present && header_valid && objects_missing.empty());
380}
381
382
383/**
384 * Calculate the object name for a given offset
385 */
386std::string JournalScanner::obj_name(inodeno_t ino, uint64_t offset) const
387{
388 char name[60];
389 snprintf(name, sizeof(name), "%llx.%08llx",
390 (unsigned long long)(ino),
391 (unsigned long long)offset);
392 return std::string(name);
393}
394
395
396std::string JournalScanner::obj_name(uint64_t offset) const
397{
398 return obj_name(ino, offset);
399}
400
401
402/*
403 * Write a human readable summary of the journal health
404 */
405void JournalScanner::report(std::ostream &out) const
406{
407 out << "Overall journal integrity: " << (is_healthy() ? "OK" : "DAMAGED") << std::endl;
408
11fdf7f2
TL
409 if (is_mdlog) {
410 if (!pointer_present) {
411 out << "Pointer not found" << std::endl;
412 } else if (!pointer_valid) {
413 out << "Pointer could not be decoded" << std::endl;
414 }
7c673cae 415 }
7c673cae
FG
416 if (!header_present) {
417 out << "Header not found" << std::endl;
418 } else if (!header_valid) {
419 out << "Header could not be decoded" << std::endl;
420 }
421
422 if (objects_missing.size()) {
423 out << "Objects missing:" << std::endl;
424 for (std::vector<uint64_t>::const_iterator om = objects_missing.begin();
425 om != objects_missing.end(); ++om) {
426 out << " 0x" << std::hex << *om << std::dec << std::endl;
427 }
428 }
429
430 if (ranges_invalid.size()) {
431 out << "Corrupt regions:" << std::endl;
432 for (std::vector<Range>::const_iterator r = ranges_invalid.begin();
433 r != ranges_invalid.end(); ++r) {
434 out << " 0x" << std::hex << r->first << "-" << r->second << std::dec << std::endl;
435 }
436 }
437}
438