]>
git.proxmox.com Git - ceph.git/blob - ceph/src/tools/cephfs/Dumper.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2010 Greg Farnum <gregf@hq.newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef _BACKWARD_BACKWARD_WARNING_H
16 #define _BACKWARD_BACKWARD_WARNING_H // make gcc 4.3 shut up about hash_*
19 #include "include/compat.h"
20 #include "include/fs_types.h"
21 #include "common/entity_name.h"
22 #include "common/errno.h"
23 #include "common/safe_io.h"
24 #include "mds/mdstypes.h"
25 #include "mds/LogEvent.h"
26 #include "mds/JournalPointer.h"
27 #include "osdc/Journaler.h"
28 #include "mon/MonClient.h"
32 #define dout_context g_ceph_context
33 #define dout_subsys ceph_subsys_mds
35 #define HEADER_LEN 4096
39 int Dumper::init(mds_role_t role_
, const std::string
&type
)
43 int r
= MDSUtility::init();
48 auto fs
= fsmap
->get_filesystem(role
.fscid
);
49 ceph_assert(fs
!= nullptr);
51 if (type
== "mdlog") {
52 JournalPointer
jp(role
.rank
, fs
->mds_map
.get_metadata_pool());
53 int jp_load_result
= jp
.load(objecter
);
54 if (jp_load_result
!= 0) {
55 std::cerr
<< "Error loading journal: " << cpp_strerror(jp_load_result
) << std::endl
;
56 return jp_load_result
;
60 } else if (type
== "purge_queue") {
61 ino
= MDS_INO_PURGE_QUEUE
+ role
.rank
;
63 ceph_abort(); // should not get here
69 int Dumper::recover_journal(Journaler
*journaler
)
73 journaler
->recover(&cond
);
75 const int r
= cond
.wait();
78 derr
<< "error on recovery: " << cpp_strerror(r
) << dendl
;
81 dout(10) << "completed journal recovery" << dendl
;
87 int Dumper::dump(const char *dump_file
)
91 auto fs
= fsmap
->get_filesystem(role
.fscid
);
92 ceph_assert(fs
!= nullptr);
94 Journaler
journaler("dumper", ino
, fs
->mds_map
.get_metadata_pool(),
95 CEPH_FS_ONDISK_MAGIC
, objecter
, 0, 0,
97 r
= recover_journal(&journaler
);
101 uint64_t start
= journaler
.get_read_pos();
102 uint64_t end
= journaler
.get_write_pos();
103 uint64_t len
= end
-start
;
105 Filer
filer(objecter
, &finisher
);
107 cout
<< "journal is " << start
<< "~" << len
<< std::endl
;
109 int fd
= ::open(dump_file
, O_WRONLY
|O_CREAT
|O_TRUNC
|O_BINARY
, 0644);
111 // include an informative header
112 uuid_d fsid
= monc
->get_fsid();
114 fsid
.print(fsid_str
);
115 char buf
[HEADER_LEN
];
116 memset(buf
, 0, sizeof(buf
));
117 snprintf(buf
, HEADER_LEN
, "Ceph mds%d journal dump\n start offset %llu (0x%llx)\n\
118 length %llu (0x%llx)\n write_pos %llu (0x%llx)\n format %llu\n\
119 trimmed_pos %llu (0x%llx)\n stripe_unit %lu (0x%lx)\n stripe_count %lu (0x%lx)\n\
120 object_size %lu (0x%lx)\n fsid %s\n%c",
122 (unsigned long long)start
, (unsigned long long)start
,
123 (unsigned long long)len
, (unsigned long long)len
,
124 (unsigned long long)journaler
.last_committed
.write_pos
, (unsigned long long)journaler
.last_committed
.write_pos
,
125 (unsigned long long)journaler
.last_committed
.stream_format
,
126 (unsigned long long)journaler
.last_committed
.trimmed_pos
, (unsigned long long)journaler
.last_committed
.trimmed_pos
,
127 (unsigned long)journaler
.last_committed
.layout
.stripe_unit
, (unsigned long)journaler
.last_committed
.layout
.stripe_unit
,
128 (unsigned long)journaler
.last_committed
.layout
.stripe_count
, (unsigned long)journaler
.last_committed
.layout
.stripe_count
,
129 (unsigned long)journaler
.last_committed
.layout
.object_size
, (unsigned long)journaler
.last_committed
.layout
.object_size
,
132 r
= safe_write(fd
, buf
, sizeof(buf
));
134 derr
<< "Error " << r
<< " (" << cpp_strerror(r
) << ") writing journal file header" << dendl
;
140 off64_t seeked
= ::lseek64(fd
, start
, SEEK_SET
);
141 if (seeked
== (off64_t
)-1) {
143 derr
<< "Error " << r
<< " (" << cpp_strerror(r
) << ") seeking to 0x" << std::hex
<< start
<< std::dec
<< dendl
;
149 // Read and write 32MB chunks. Slower than it could be because we're not
150 // streaming, but that's okay because this is just a debug/disaster tool.
151 const uint32_t chunk_size
= 32 * 1024 * 1024;
153 for (uint64_t pos
= start
; pos
< start
+ len
; pos
+= chunk_size
) {
155 dout(10) << "Reading at pos=0x" << std::hex
<< pos
<< std::dec
<< dendl
;
157 const uint32_t read_size
= std::min
<uint64_t>(chunk_size
, end
- pos
);
161 filer
.read(ino
, &journaler
.get_layout(), CEPH_NOSNAP
,
162 pos
, read_size
, &bl
, 0, &cond
);
166 derr
<< "Error " << r
<< " (" << cpp_strerror(r
) << ") reading "
167 "journal at offset 0x" << std::hex
<< pos
<< std::dec
<< dendl
;
171 dout(10) << "Got 0x" << std::hex
<< bl
.length() << std::dec
172 << " bytes" << dendl
;
176 derr
<< "Error " << r
<< " (" << cpp_strerror(r
) << ") writing journal file" << dendl
;
185 derr
<< "Error " << r
<< " (" << cpp_strerror(r
) << ") closing journal file" << dendl
;
189 cout
<< "wrote " << len
<< " bytes at offset " << start
<< " to " << dump_file
<< "\n"
190 << "NOTE: this is a _sparse_ file; you can\n"
191 << "\t$ tar cSzf " << dump_file
<< ".tgz " << dump_file
<< "\n"
192 << " to efficiently compress it while preserving sparseness." << std::endl
;
196 derr
<< "unable to open " << dump_file
<< ": " << cpp_strerror(err
) << dendl
;
201 int Dumper::undump(const char *dump_file
, bool force
)
203 cout
<< "undump " << dump_file
<< std::endl
;
205 auto fs
= fsmap
->get_filesystem(role
.fscid
);
206 ceph_assert(fs
!= nullptr);
209 // try get layout info from cluster
210 Journaler
journaler("umdumper", ino
, fs
->mds_map
.get_metadata_pool(),
211 CEPH_FS_ONDISK_MAGIC
, objecter
, 0, 0,
213 int recovered
= recover_journal(&journaler
);
214 if (recovered
!= 0) {
215 derr
<< "recover_journal failed, try to get header from dump file " << dendl
;
218 int fd
= ::open(dump_file
, O_RDONLY
|O_BINARY
);
221 derr
<< "couldn't open " << dump_file
<< ": " << cpp_strerror(r
) << dendl
;
225 // Ceph mds0 journal dump
226 // start offset 232401996 (0xdda2c4c)
227 // length 1097504 (0x10bf20)
229 char buf
[HEADER_LEN
];
230 r
= safe_read(fd
, buf
, sizeof(buf
));
232 VOID_TEMP_FAILURE_RETRY(::close(fd
));
236 long long unsigned start
, len
, write_pos
, format
, trimmed_pos
;
237 long unsigned stripe_unit
, stripe_count
, object_size
;
238 sscanf(strstr(buf
, "start offset"), "start offset %llu", &start
);
239 sscanf(strstr(buf
, "length"), "length %llu", &len
);
240 sscanf(strstr(buf
, "write_pos"), "write_pos %llu", &write_pos
);
241 sscanf(strstr(buf
, "format"), "format %llu", &format
);
244 // need to check if fsid match onlien cluster fsid
245 if (strstr(buf
, "fsid")) {
248 sscanf(strstr(buf
, "fsid"), "fsid %39s", fsid_str
);
249 r
= fsid
.parse(fsid_str
);
251 derr
<< "Invalid fsid" << dendl
;
256 if (fsid
!= monc
->get_fsid()) {
257 derr
<< "Imported journal fsid does not match online cluster fsid" << dendl
;
258 derr
<< "Use --force to skip fsid check" << dendl
;
263 derr
<< "Invalid header, no fsid embeded" << dendl
;
269 if (recovered
== 0) {
270 stripe_unit
= journaler
.last_committed
.layout
.stripe_unit
;
271 stripe_count
= journaler
.last_committed
.layout
.stripe_count
;
272 object_size
= journaler
.last_committed
.layout
.object_size
;
274 // try to get layout from dump file header, if failed set layout to default
275 if (strstr(buf
, "stripe_unit")) {
276 sscanf(strstr(buf
, "stripe_unit"), "stripe_unit %lu", &stripe_unit
);
278 stripe_unit
= file_layout_t::get_default().stripe_unit
;
280 if (strstr(buf
, "stripe_count")) {
281 sscanf(strstr(buf
, "stripe_count"), "stripe_count %lu", &stripe_count
);
283 stripe_count
= file_layout_t::get_default().stripe_count
;
285 if (strstr(buf
, "object_size")) {
286 sscanf(strstr(buf
, "object_size"), "object_size %lu", &object_size
);
288 object_size
= file_layout_t::get_default().object_size
;
292 if (strstr(buf
, "trimmed_pos")) {
293 sscanf(strstr(buf
, "trimmed_pos"), "trimmed_pos %llu", &trimmed_pos
);
295 // Old format dump, any untrimmed objects before expire_pos will
296 // be discarded as trash.
297 trimmed_pos
= start
- (start
% object_size
);
300 if (trimmed_pos
> start
) {
301 derr
<< std::hex
<< "Invalid header (trimmed 0x" << trimmed_pos
302 << " > expire 0x" << start
<< std::dec
<< dendl
;
307 if (start
> write_pos
) {
308 derr
<< std::hex
<< "Invalid header (expire 0x" << start
309 << " > write 0x" << write_pos
<< std::dec
<< dendl
;
314 cout
<< "start " << start
<<
316 " write_pos " << write_pos
<<
317 " format " << format
<<
318 " trimmed_pos " << trimmed_pos
<<
319 " stripe_unit " << stripe_unit
<<
320 " stripe_count " << stripe_count
<<
321 " object_size " << object_size
<< std::endl
;
324 h
.trimmed_pos
= trimmed_pos
;
325 h
.expire_pos
= start
;
326 h
.write_pos
= write_pos
;
327 h
.stream_format
= format
;
328 h
.magic
= CEPH_FS_ONDISK_MAGIC
;
330 h
.layout
.stripe_unit
= stripe_unit
;
331 h
.layout
.stripe_count
= stripe_count
;
332 h
.layout
.object_size
= object_size
;
333 h
.layout
.pool_id
= fs
->mds_map
.get_metadata_pool();
338 object_t oid
= file_object_t(ino
, 0);
339 object_locator_t
oloc(fs
->mds_map
.get_metadata_pool());
342 cout
<< "writing header " << oid
<< std::endl
;
343 C_SaferCond header_cond
;
345 objecter
->write_full(oid
, oloc
, snapc
, hbl
,
346 ceph::real_clock::now(), 0,
350 r
= header_cond
.wait();
352 derr
<< "Failed to write header: " << cpp_strerror(r
) << dendl
;
357 Filer
filer(objecter
, &finisher
);
359 /* Erase any objects at the end of the region to which we shall write
360 * the new log data. This is to avoid leaving trailing junk after
361 * the newly written data. Any junk more than one object ahead
362 * will be taken care of during normal operation by Journaler's
363 * prezeroing behaviour */
365 uint32_t const object_size
= h
.layout
.object_size
;
366 ceph_assert(object_size
> 0);
367 uint64_t last_obj
= h
.write_pos
/ object_size
;
368 uint64_t purge_count
= 2;
369 /* When the length is zero, the last_obj should be zeroed
370 * from the offset determined by the new write_pos instead of being purged.
376 C_SaferCond purge_cond
;
377 cout
<< "Purging " << purge_count
<< " objects from " << last_obj
<< std::endl
;
379 filer
.purge_range(ino
, &h
.layout
, snapc
, last_obj
, purge_count
,
380 ceph::real_clock::now(), 0, &purge_cond
);
384 /* When the length is zero, zero the last object
385 * from the offset determined by the new write_pos.
388 uint64_t offset_in_obj
= h
.write_pos
% h
.layout
.object_size
;
389 uint64_t len
= h
.layout
.object_size
- offset_in_obj
;
390 C_SaferCond zero_cond
;
391 cout
<< "Zeroing " << len
<< " bytes in the last object." << std::endl
;
394 filer
.zero(ino
, &h
.layout
, snapc
, h
.write_pos
, len
, ceph::real_clock::now(), 0, &zero_cond
);
399 // Stream from `fd` to `filer`
400 uint64_t pos
= start
;
405 lseek64(fd
, pos
, SEEK_SET
);
406 uint64_t l
= std::min
<uint64_t>(left
, 1024*1024);
410 cout
<< " writing " << pos
<< "~" << l
<< std::endl
;
411 C_SaferCond write_cond
;
413 filer
.write(ino
, &h
.layout
, snapc
, pos
, l
, j
,
414 ceph::real_clock::now(), 0, &write_cond
);
417 r
= write_cond
.wait();
419 derr
<< "Failed to write header: " << cpp_strerror(r
) << dendl
;
429 VOID_TEMP_FAILURE_RETRY(::close(fd
));
430 cout
<< "done." << std::endl
;