]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/cephfs/Dumper.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / tools / cephfs / Dumper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2010 Greg Farnum <gregf@hq.newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef _BACKWARD_BACKWARD_WARNING_H
16#define _BACKWARD_BACKWARD_WARNING_H // make gcc 4.3 shut up about hash_*
17#endif
18
19#include "include/compat.h"
20#include "include/fs_types.h"
21#include "common/entity_name.h"
22#include "common/errno.h"
23#include "common/safe_io.h"
24#include "mds/mdstypes.h"
25#include "mds/LogEvent.h"
26#include "mds/JournalPointer.h"
27#include "osdc/Journaler.h"
91327a77 28#include "mon/MonClient.h"
7c673cae
FG
29
30#include "Dumper.h"
31
32#define dout_context g_ceph_context
33#define dout_subsys ceph_subsys_mds
34
35#define HEADER_LEN 4096
36
20effc67
TL
37using namespace std;
38
11fdf7f2 39int Dumper::init(mds_role_t role_, const std::string &type)
7c673cae
FG
40{
41 role = role_;
42
43 int r = MDSUtility::init();
44 if (r < 0) {
45 return r;
46 }
47
48 auto fs = fsmap->get_filesystem(role.fscid);
11fdf7f2
TL
49 ceph_assert(fs != nullptr);
50
51 if (type == "mdlog") {
52 JournalPointer jp(role.rank, fs->mds_map.get_metadata_pool());
53 int jp_load_result = jp.load(objecter);
54 if (jp_load_result != 0) {
55 std::cerr << "Error loading journal: " << cpp_strerror(jp_load_result) << std::endl;
56 return jp_load_result;
57 } else {
58 ino = jp.front;
59 }
60 } else if (type == "purge_queue") {
61 ino = MDS_INO_PURGE_QUEUE + role.rank;
7c673cae 62 } else {
11fdf7f2 63 ceph_abort(); // should not get here
7c673cae 64 }
11fdf7f2 65 return 0;
7c673cae
FG
66}
67
68
69int Dumper::recover_journal(Journaler *journaler)
70{
71 C_SaferCond cond;
9f95a23c 72 lock.lock();
7c673cae 73 journaler->recover(&cond);
9f95a23c 74 lock.unlock();
d2e6a577 75 const int r = cond.wait();
7c673cae
FG
76
77 if (r < 0) { // Error
78 derr << "error on recovery: " << cpp_strerror(r) << dendl;
79 return r;
80 } else {
81 dout(10) << "completed journal recovery" << dendl;
82 return 0;
83 }
84}
85
86
87int Dumper::dump(const char *dump_file)
88{
89 int r = 0;
90
91 auto fs = fsmap->get_filesystem(role.fscid);
11fdf7f2 92 ceph_assert(fs != nullptr);
7c673cae
FG
93
94 Journaler journaler("dumper", ino, fs->mds_map.get_metadata_pool(),
95 CEPH_FS_ONDISK_MAGIC, objecter, 0, 0,
96 &finisher);
97 r = recover_journal(&journaler);
98 if (r) {
99 return r;
100 }
101 uint64_t start = journaler.get_read_pos();
102 uint64_t end = journaler.get_write_pos();
103 uint64_t len = end-start;
104
105 Filer filer(objecter, &finisher);
106
107 cout << "journal is " << start << "~" << len << std::endl;
108
f67539c2 109 int fd = ::open(dump_file, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644);
7c673cae
FG
110 if (fd >= 0) {
111 // include an informative header
91327a77
AA
112 uuid_d fsid = monc->get_fsid();
113 char fsid_str[40];
114 fsid.print(fsid_str);
7c673cae
FG
115 char buf[HEADER_LEN];
116 memset(buf, 0, sizeof(buf));
91327a77
AA
117 snprintf(buf, HEADER_LEN, "Ceph mds%d journal dump\n start offset %llu (0x%llx)\n\
118 length %llu (0x%llx)\n write_pos %llu (0x%llx)\n format %llu\n\
119 trimmed_pos %llu (0x%llx)\n stripe_unit %lu (0x%lx)\n stripe_count %lu (0x%lx)\n\
120 object_size %lu (0x%lx)\n fsid %s\n%c",
7c673cae
FG
121 role.rank,
122 (unsigned long long)start, (unsigned long long)start,
123 (unsigned long long)len, (unsigned long long)len,
124 (unsigned long long)journaler.last_committed.write_pos, (unsigned long long)journaler.last_committed.write_pos,
125 (unsigned long long)journaler.last_committed.stream_format,
126 (unsigned long long)journaler.last_committed.trimmed_pos, (unsigned long long)journaler.last_committed.trimmed_pos,
91327a77
AA
127 (unsigned long)journaler.last_committed.layout.stripe_unit, (unsigned long)journaler.last_committed.layout.stripe_unit,
128 (unsigned long)journaler.last_committed.layout.stripe_count, (unsigned long)journaler.last_committed.layout.stripe_count,
129 (unsigned long)journaler.last_committed.layout.object_size, (unsigned long)journaler.last_committed.layout.object_size,
130 fsid_str,
7c673cae
FG
131 4);
132 r = safe_write(fd, buf, sizeof(buf));
133 if (r) {
134 derr << "Error " << r << " (" << cpp_strerror(r) << ") writing journal file header" << dendl;
135 ::close(fd);
136 return r;
137 }
138
139 // write the data
140 off64_t seeked = ::lseek64(fd, start, SEEK_SET);
141 if (seeked == (off64_t)-1) {
142 r = errno;
143 derr << "Error " << r << " (" << cpp_strerror(r) << ") seeking to 0x" << std::hex << start << std::dec << dendl;
144 ::close(fd);
145 return r;
146 }
147
148
149 // Read and write 32MB chunks. Slower than it could be because we're not
150 // streaming, but that's okay because this is just a debug/disaster tool.
151 const uint32_t chunk_size = 32 * 1024 * 1024;
152
153 for (uint64_t pos = start; pos < start + len; pos += chunk_size) {
154 bufferlist bl;
155 dout(10) << "Reading at pos=0x" << std::hex << pos << std::dec << dendl;
156
11fdf7f2 157 const uint32_t read_size = std::min<uint64_t>(chunk_size, end - pos);
7c673cae
FG
158
159 C_SaferCond cond;
9f95a23c 160 lock.lock();
7c673cae
FG
161 filer.read(ino, &journaler.get_layout(), CEPH_NOSNAP,
162 pos, read_size, &bl, 0, &cond);
9f95a23c 163 lock.unlock();
7c673cae
FG
164 r = cond.wait();
165 if (r < 0) {
166 derr << "Error " << r << " (" << cpp_strerror(r) << ") reading "
167 "journal at offset 0x" << std::hex << pos << std::dec << dendl;
168 ::close(fd);
169 return r;
170 }
171 dout(10) << "Got 0x" << std::hex << bl.length() << std::dec
172 << " bytes" << dendl;
173
174 r = bl.write_fd(fd);
175 if (r) {
176 derr << "Error " << r << " (" << cpp_strerror(r) << ") writing journal file" << dendl;
177 ::close(fd);
178 return r;
179 }
180 }
181
182 r = ::close(fd);
183 if (r) {
184 r = errno;
185 derr << "Error " << r << " (" << cpp_strerror(r) << ") closing journal file" << dendl;
186 return r;
187 }
188
189 cout << "wrote " << len << " bytes at offset " << start << " to " << dump_file << "\n"
190 << "NOTE: this is a _sparse_ file; you can\n"
191 << "\t$ tar cSzf " << dump_file << ".tgz " << dump_file << "\n"
192 << " to efficiently compress it while preserving sparseness." << std::endl;
193 return 0;
194 } else {
195 int err = errno;
196 derr << "unable to open " << dump_file << ": " << cpp_strerror(err) << dendl;
197 return err;
198 }
199}
200
91327a77 201int Dumper::undump(const char *dump_file, bool force)
7c673cae
FG
202{
203 cout << "undump " << dump_file << std::endl;
204
205 auto fs = fsmap->get_filesystem(role.fscid);
11fdf7f2 206 ceph_assert(fs != nullptr);
7c673cae
FG
207
208 int r = 0;
91327a77
AA
209 // try get layout info from cluster
210 Journaler journaler("umdumper", ino, fs->mds_map.get_metadata_pool(),
211 CEPH_FS_ONDISK_MAGIC, objecter, 0, 0,
212 &finisher);
213 int recovered = recover_journal(&journaler);
214 if (recovered != 0) {
215 derr << "recover_journal failed, try to get header from dump file " << dendl;
216 }
217
f67539c2 218 int fd = ::open(dump_file, O_RDONLY|O_BINARY);
7c673cae
FG
219 if (fd < 0) {
220 r = errno;
221 derr << "couldn't open " << dump_file << ": " << cpp_strerror(r) << dendl;
222 return r;
223 }
224
225 // Ceph mds0 journal dump
226 // start offset 232401996 (0xdda2c4c)
227 // length 1097504 (0x10bf20)
228
229 char buf[HEADER_LEN];
230 r = safe_read(fd, buf, sizeof(buf));
231 if (r < 0) {
232 VOID_TEMP_FAILURE_RETRY(::close(fd));
233 return r;
234 }
235
236 long long unsigned start, len, write_pos, format, trimmed_pos;
91327a77 237 long unsigned stripe_unit, stripe_count, object_size;
7c673cae
FG
238 sscanf(strstr(buf, "start offset"), "start offset %llu", &start);
239 sscanf(strstr(buf, "length"), "length %llu", &len);
240 sscanf(strstr(buf, "write_pos"), "write_pos %llu", &write_pos);
241 sscanf(strstr(buf, "format"), "format %llu", &format);
91327a77
AA
242
243 if (!force) {
244 // need to check if fsid match onlien cluster fsid
245 if (strstr(buf, "fsid")) {
246 uuid_d fsid;
247 char fsid_str[40];
92f5a8d4 248 sscanf(strstr(buf, "fsid"), "fsid %39s", fsid_str);
91327a77
AA
249 r = fsid.parse(fsid_str);
250 if (!r) {
251 derr << "Invalid fsid" << dendl;
252 ::close(fd);
253 return -EINVAL;
254 }
255
256 if (fsid != monc->get_fsid()) {
257 derr << "Imported journal fsid does not match online cluster fsid" << dendl;
258 derr << "Use --force to skip fsid check" << dendl;
259 ::close(fd);
260 return -EINVAL;
261 }
262 } else {
263 derr << "Invalid header, no fsid embeded" << dendl;
264 ::close(fd);
265 return -EINVAL;
266 }
267 }
268
269 if (recovered == 0) {
270 stripe_unit = journaler.last_committed.layout.stripe_unit;
271 stripe_count = journaler.last_committed.layout.stripe_count;
272 object_size = journaler.last_committed.layout.object_size;
273 } else {
274 // try to get layout from dump file header, if failed set layout to default
275 if (strstr(buf, "stripe_unit")) {
276 sscanf(strstr(buf, "stripe_unit"), "stripe_unit %lu", &stripe_unit);
277 } else {
278 stripe_unit = file_layout_t::get_default().stripe_unit;
279 }
280 if (strstr(buf, "stripe_count")) {
281 sscanf(strstr(buf, "stripe_count"), "stripe_count %lu", &stripe_count);
282 } else {
283 stripe_count = file_layout_t::get_default().stripe_count;
284 }
285 if (strstr(buf, "object_size")) {
286 sscanf(strstr(buf, "object_size"), "object_size %lu", &object_size);
287 } else {
288 object_size = file_layout_t::get_default().object_size;
289 }
290 }
291
7c673cae
FG
292 if (strstr(buf, "trimmed_pos")) {
293 sscanf(strstr(buf, "trimmed_pos"), "trimmed_pos %llu", &trimmed_pos);
294 } else {
295 // Old format dump, any untrimmed objects before expire_pos will
296 // be discarded as trash.
91327a77 297 trimmed_pos = start - (start % object_size);
7c673cae
FG
298 }
299
300 if (trimmed_pos > start) {
301 derr << std::hex << "Invalid header (trimmed 0x" << trimmed_pos
302 << " > expire 0x" << start << std::dec << dendl;
303 ::close(fd);
304 return -EINVAL;
305 }
306
307 if (start > write_pos) {
308 derr << std::hex << "Invalid header (expire 0x" << start
309 << " > write 0x" << write_pos << std::dec << dendl;
310 ::close(fd);
311 return -EINVAL;
312 }
313
314 cout << "start " << start <<
315 " len " << len <<
316 " write_pos " << write_pos <<
317 " format " << format <<
91327a77
AA
318 " trimmed_pos " << trimmed_pos <<
319 " stripe_unit " << stripe_unit <<
320 " stripe_count " << stripe_count <<
321 " object_size " << object_size << std::endl;
7c673cae
FG
322
323 Journaler::Header h;
324 h.trimmed_pos = trimmed_pos;
325 h.expire_pos = start;
326 h.write_pos = write_pos;
327 h.stream_format = format;
328 h.magic = CEPH_FS_ONDISK_MAGIC;
329
91327a77
AA
330 h.layout.stripe_unit = stripe_unit;
331 h.layout.stripe_count = stripe_count;
332 h.layout.object_size = object_size;
7c673cae
FG
333 h.layout.pool_id = fs->mds_map.get_metadata_pool();
334
335 bufferlist hbl;
11fdf7f2 336 encode(h, hbl);
7c673cae
FG
337
338 object_t oid = file_object_t(ino, 0);
339 object_locator_t oloc(fs->mds_map.get_metadata_pool());
340 SnapContext snapc;
341
342 cout << "writing header " << oid << std::endl;
343 C_SaferCond header_cond;
9f95a23c 344 lock.lock();
7c673cae
FG
345 objecter->write_full(oid, oloc, snapc, hbl,
346 ceph::real_clock::now(), 0,
347 &header_cond);
9f95a23c 348 lock.unlock();
7c673cae
FG
349
350 r = header_cond.wait();
351 if (r != 0) {
352 derr << "Failed to write header: " << cpp_strerror(r) << dendl;
353 ::close(fd);
354 return r;
355 }
356
357 Filer filer(objecter, &finisher);
358
359 /* Erase any objects at the end of the region to which we shall write
360 * the new log data. This is to avoid leaving trailing junk after
361 * the newly written data. Any junk more than one object ahead
362 * will be taken care of during normal operation by Journaler's
363 * prezeroing behaviour */
364 {
365 uint32_t const object_size = h.layout.object_size;
11fdf7f2 366 ceph_assert(object_size > 0);
1adf2230
AA
367 uint64_t last_obj = h.write_pos / object_size;
368 uint64_t purge_count = 2;
369 /* When the length is zero, the last_obj should be zeroed
370 * from the offset determined by the new write_pos instead of being purged.
371 */
372 if (!len) {
373 purge_count = 1;
374 ++last_obj;
375 }
7c673cae
FG
376 C_SaferCond purge_cond;
377 cout << "Purging " << purge_count << " objects from " << last_obj << std::endl;
9f95a23c 378 lock.lock();
7c673cae
FG
379 filer.purge_range(ino, &h.layout, snapc, last_obj, purge_count,
380 ceph::real_clock::now(), 0, &purge_cond);
9f95a23c 381 lock.unlock();
7c673cae
FG
382 purge_cond.wait();
383 }
1adf2230
AA
384 /* When the length is zero, zero the last object
385 * from the offset determined by the new write_pos.
386 */
387 if (!len) {
388 uint64_t offset_in_obj = h.write_pos % h.layout.object_size;
389 uint64_t len = h.layout.object_size - offset_in_obj;
390 C_SaferCond zero_cond;
391 cout << "Zeroing " << len << " bytes in the last object." << std::endl;
392
9f95a23c 393 lock.lock();
1adf2230 394 filer.zero(ino, &h.layout, snapc, h.write_pos, len, ceph::real_clock::now(), 0, &zero_cond);
9f95a23c 395 lock.unlock();
1adf2230
AA
396 zero_cond.wait();
397 }
7c673cae
FG
398
399 // Stream from `fd` to `filer`
400 uint64_t pos = start;
401 uint64_t left = len;
402 while (left > 0) {
403 // Read
404 bufferlist j;
405 lseek64(fd, pos, SEEK_SET);
11fdf7f2 406 uint64_t l = std::min<uint64_t>(left, 1024*1024);
7c673cae
FG
407 j.read_fd(fd, l);
408
409 // Write
410 cout << " writing " << pos << "~" << l << std::endl;
411 C_SaferCond write_cond;
9f95a23c 412 lock.lock();
7c673cae
FG
413 filer.write(ino, &h.layout, snapc, pos, l, j,
414 ceph::real_clock::now(), 0, &write_cond);
9f95a23c 415 lock.unlock();
7c673cae
FG
416
417 r = write_cond.wait();
418 if (r != 0) {
419 derr << "Failed to write header: " << cpp_strerror(r) << dendl;
420 ::close(fd);
421 return r;
422 }
423
424 // Advance
425 pos += l;
426 left -= l;
427 }
428
429 VOID_TEMP_FAILURE_RETRY(::close(fd));
430 cout << "done." << std::endl;
431 return 0;
432}
433