]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/cephfs/Dumper.cc
update sources to 12.2.10
[ceph.git] / ceph / src / tools / cephfs / Dumper.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2010 Greg Farnum <gregf@hq.newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef _BACKWARD_BACKWARD_WARNING_H
16#define _BACKWARD_BACKWARD_WARNING_H // make gcc 4.3 shut up about hash_*
17#endif
18
19#include "include/compat.h"
20#include "include/fs_types.h"
21#include "common/entity_name.h"
22#include "common/errno.h"
23#include "common/safe_io.h"
24#include "mds/mdstypes.h"
25#include "mds/LogEvent.h"
26#include "mds/JournalPointer.h"
27#include "osdc/Journaler.h"
91327a77 28#include "mon/MonClient.h"
7c673cae
FG
29
30#include "Dumper.h"
31
32#define dout_context g_ceph_context
33#define dout_subsys ceph_subsys_mds
34
35#define HEADER_LEN 4096
36
37int Dumper::init(mds_role_t role_)
38{
39 role = role_;
40
41 int r = MDSUtility::init();
42 if (r < 0) {
43 return r;
44 }
45
46 auto fs = fsmap->get_filesystem(role.fscid);
47 assert(fs != nullptr);
48
49 JournalPointer jp(role.rank, fs->mds_map.get_metadata_pool());
50 int jp_load_result = jp.load(objecter);
51 if (jp_load_result != 0) {
52 std::cerr << "Error loading journal: " << cpp_strerror(jp_load_result) << std::endl;
53 return jp_load_result;
54 } else {
55 ino = jp.front;
56 return 0;
57 }
58}
59
60
61int Dumper::recover_journal(Journaler *journaler)
62{
63 C_SaferCond cond;
64 lock.Lock();
65 journaler->recover(&cond);
66 lock.Unlock();
d2e6a577 67 const int r = cond.wait();
7c673cae
FG
68
69 if (r < 0) { // Error
70 derr << "error on recovery: " << cpp_strerror(r) << dendl;
71 return r;
72 } else {
73 dout(10) << "completed journal recovery" << dendl;
74 return 0;
75 }
76}
77
78
79int Dumper::dump(const char *dump_file)
80{
81 int r = 0;
82
83 auto fs = fsmap->get_filesystem(role.fscid);
84 assert(fs != nullptr);
85
86 Journaler journaler("dumper", ino, fs->mds_map.get_metadata_pool(),
87 CEPH_FS_ONDISK_MAGIC, objecter, 0, 0,
88 &finisher);
89 r = recover_journal(&journaler);
90 if (r) {
91 return r;
92 }
93 uint64_t start = journaler.get_read_pos();
94 uint64_t end = journaler.get_write_pos();
95 uint64_t len = end-start;
96
97 Filer filer(objecter, &finisher);
98
99 cout << "journal is " << start << "~" << len << std::endl;
100
101 int fd = ::open(dump_file, O_WRONLY|O_CREAT|O_TRUNC, 0644);
102 if (fd >= 0) {
103 // include an informative header
91327a77
AA
104 uuid_d fsid = monc->get_fsid();
105 char fsid_str[40];
106 fsid.print(fsid_str);
7c673cae
FG
107 char buf[HEADER_LEN];
108 memset(buf, 0, sizeof(buf));
91327a77
AA
109 snprintf(buf, HEADER_LEN, "Ceph mds%d journal dump\n start offset %llu (0x%llx)\n\
110 length %llu (0x%llx)\n write_pos %llu (0x%llx)\n format %llu\n\
111 trimmed_pos %llu (0x%llx)\n stripe_unit %lu (0x%lx)\n stripe_count %lu (0x%lx)\n\
112 object_size %lu (0x%lx)\n fsid %s\n%c",
7c673cae
FG
113 role.rank,
114 (unsigned long long)start, (unsigned long long)start,
115 (unsigned long long)len, (unsigned long long)len,
116 (unsigned long long)journaler.last_committed.write_pos, (unsigned long long)journaler.last_committed.write_pos,
117 (unsigned long long)journaler.last_committed.stream_format,
118 (unsigned long long)journaler.last_committed.trimmed_pos, (unsigned long long)journaler.last_committed.trimmed_pos,
91327a77
AA
119 (unsigned long)journaler.last_committed.layout.stripe_unit, (unsigned long)journaler.last_committed.layout.stripe_unit,
120 (unsigned long)journaler.last_committed.layout.stripe_count, (unsigned long)journaler.last_committed.layout.stripe_count,
121 (unsigned long)journaler.last_committed.layout.object_size, (unsigned long)journaler.last_committed.layout.object_size,
122 fsid_str,
7c673cae
FG
123 4);
124 r = safe_write(fd, buf, sizeof(buf));
125 if (r) {
126 derr << "Error " << r << " (" << cpp_strerror(r) << ") writing journal file header" << dendl;
127 ::close(fd);
128 return r;
129 }
130
131 // write the data
132 off64_t seeked = ::lseek64(fd, start, SEEK_SET);
133 if (seeked == (off64_t)-1) {
134 r = errno;
135 derr << "Error " << r << " (" << cpp_strerror(r) << ") seeking to 0x" << std::hex << start << std::dec << dendl;
136 ::close(fd);
137 return r;
138 }
139
140
141 // Read and write 32MB chunks. Slower than it could be because we're not
142 // streaming, but that's okay because this is just a debug/disaster tool.
143 const uint32_t chunk_size = 32 * 1024 * 1024;
144
145 for (uint64_t pos = start; pos < start + len; pos += chunk_size) {
146 bufferlist bl;
147 dout(10) << "Reading at pos=0x" << std::hex << pos << std::dec << dendl;
148
149 const uint32_t read_size = MIN(chunk_size, end - pos);
150
151 C_SaferCond cond;
152 lock.Lock();
153 filer.read(ino, &journaler.get_layout(), CEPH_NOSNAP,
154 pos, read_size, &bl, 0, &cond);
155 lock.Unlock();
156 r = cond.wait();
157 if (r < 0) {
158 derr << "Error " << r << " (" << cpp_strerror(r) << ") reading "
159 "journal at offset 0x" << std::hex << pos << std::dec << dendl;
160 ::close(fd);
161 return r;
162 }
163 dout(10) << "Got 0x" << std::hex << bl.length() << std::dec
164 << " bytes" << dendl;
165
166 r = bl.write_fd(fd);
167 if (r) {
168 derr << "Error " << r << " (" << cpp_strerror(r) << ") writing journal file" << dendl;
169 ::close(fd);
170 return r;
171 }
172 }
173
174 r = ::close(fd);
175 if (r) {
176 r = errno;
177 derr << "Error " << r << " (" << cpp_strerror(r) << ") closing journal file" << dendl;
178 return r;
179 }
180
181 cout << "wrote " << len << " bytes at offset " << start << " to " << dump_file << "\n"
182 << "NOTE: this is a _sparse_ file; you can\n"
183 << "\t$ tar cSzf " << dump_file << ".tgz " << dump_file << "\n"
184 << " to efficiently compress it while preserving sparseness." << std::endl;
185 return 0;
186 } else {
187 int err = errno;
188 derr << "unable to open " << dump_file << ": " << cpp_strerror(err) << dendl;
189 return err;
190 }
191}
192
91327a77 193int Dumper::undump(const char *dump_file, bool force)
7c673cae
FG
194{
195 cout << "undump " << dump_file << std::endl;
196
197 auto fs = fsmap->get_filesystem(role.fscid);
198 assert(fs != nullptr);
199
200 int r = 0;
91327a77
AA
201 // try get layout info from cluster
202 Journaler journaler("umdumper", ino, fs->mds_map.get_metadata_pool(),
203 CEPH_FS_ONDISK_MAGIC, objecter, 0, 0,
204 &finisher);
205 int recovered = recover_journal(&journaler);
206 if (recovered != 0) {
207 derr << "recover_journal failed, try to get header from dump file " << dendl;
208 }
209
7c673cae
FG
210 int fd = ::open(dump_file, O_RDONLY);
211 if (fd < 0) {
212 r = errno;
213 derr << "couldn't open " << dump_file << ": " << cpp_strerror(r) << dendl;
214 return r;
215 }
216
217 // Ceph mds0 journal dump
218 // start offset 232401996 (0xdda2c4c)
219 // length 1097504 (0x10bf20)
220
221 char buf[HEADER_LEN];
222 r = safe_read(fd, buf, sizeof(buf));
223 if (r < 0) {
224 VOID_TEMP_FAILURE_RETRY(::close(fd));
225 return r;
226 }
227
228 long long unsigned start, len, write_pos, format, trimmed_pos;
91327a77 229 long unsigned stripe_unit, stripe_count, object_size;
7c673cae
FG
230 sscanf(strstr(buf, "start offset"), "start offset %llu", &start);
231 sscanf(strstr(buf, "length"), "length %llu", &len);
232 sscanf(strstr(buf, "write_pos"), "write_pos %llu", &write_pos);
233 sscanf(strstr(buf, "format"), "format %llu", &format);
91327a77
AA
234
235 if (!force) {
236 // need to check if fsid match onlien cluster fsid
237 if (strstr(buf, "fsid")) {
238 uuid_d fsid;
239 char fsid_str[40];
240 sscanf(strstr(buf, "fsid"), "fsid %s", fsid_str);
241 r = fsid.parse(fsid_str);
242 if (!r) {
243 derr << "Invalid fsid" << dendl;
244 ::close(fd);
245 return -EINVAL;
246 }
247
248 if (fsid != monc->get_fsid()) {
249 derr << "Imported journal fsid does not match online cluster fsid" << dendl;
250 derr << "Use --force to skip fsid check" << dendl;
251 ::close(fd);
252 return -EINVAL;
253 }
254 } else {
255 derr << "Invalid header, no fsid embeded" << dendl;
256 ::close(fd);
257 return -EINVAL;
258 }
259 }
260
261 if (recovered == 0) {
262 stripe_unit = journaler.last_committed.layout.stripe_unit;
263 stripe_count = journaler.last_committed.layout.stripe_count;
264 object_size = journaler.last_committed.layout.object_size;
265 } else {
266 // try to get layout from dump file header, if failed set layout to default
267 if (strstr(buf, "stripe_unit")) {
268 sscanf(strstr(buf, "stripe_unit"), "stripe_unit %lu", &stripe_unit);
269 } else {
270 stripe_unit = file_layout_t::get_default().stripe_unit;
271 }
272 if (strstr(buf, "stripe_count")) {
273 sscanf(strstr(buf, "stripe_count"), "stripe_count %lu", &stripe_count);
274 } else {
275 stripe_count = file_layout_t::get_default().stripe_count;
276 }
277 if (strstr(buf, "object_size")) {
278 sscanf(strstr(buf, "object_size"), "object_size %lu", &object_size);
279 } else {
280 object_size = file_layout_t::get_default().object_size;
281 }
282 }
283
7c673cae
FG
284 if (strstr(buf, "trimmed_pos")) {
285 sscanf(strstr(buf, "trimmed_pos"), "trimmed_pos %llu", &trimmed_pos);
286 } else {
287 // Old format dump, any untrimmed objects before expire_pos will
288 // be discarded as trash.
91327a77 289 trimmed_pos = start - (start % object_size);
7c673cae
FG
290 }
291
292 if (trimmed_pos > start) {
293 derr << std::hex << "Invalid header (trimmed 0x" << trimmed_pos
294 << " > expire 0x" << start << std::dec << dendl;
295 ::close(fd);
296 return -EINVAL;
297 }
298
299 if (start > write_pos) {
300 derr << std::hex << "Invalid header (expire 0x" << start
301 << " > write 0x" << write_pos << std::dec << dendl;
302 ::close(fd);
303 return -EINVAL;
304 }
305
306 cout << "start " << start <<
307 " len " << len <<
308 " write_pos " << write_pos <<
309 " format " << format <<
91327a77
AA
310 " trimmed_pos " << trimmed_pos <<
311 " stripe_unit " << stripe_unit <<
312 " stripe_count " << stripe_count <<
313 " object_size " << object_size << std::endl;
7c673cae
FG
314
315 Journaler::Header h;
316 h.trimmed_pos = trimmed_pos;
317 h.expire_pos = start;
318 h.write_pos = write_pos;
319 h.stream_format = format;
320 h.magic = CEPH_FS_ONDISK_MAGIC;
321
91327a77
AA
322 h.layout.stripe_unit = stripe_unit;
323 h.layout.stripe_count = stripe_count;
324 h.layout.object_size = object_size;
7c673cae
FG
325 h.layout.pool_id = fs->mds_map.get_metadata_pool();
326
327 bufferlist hbl;
328 ::encode(h, hbl);
329
330 object_t oid = file_object_t(ino, 0);
331 object_locator_t oloc(fs->mds_map.get_metadata_pool());
332 SnapContext snapc;
333
334 cout << "writing header " << oid << std::endl;
335 C_SaferCond header_cond;
336 lock.Lock();
337 objecter->write_full(oid, oloc, snapc, hbl,
338 ceph::real_clock::now(), 0,
339 &header_cond);
340 lock.Unlock();
341
342 r = header_cond.wait();
343 if (r != 0) {
344 derr << "Failed to write header: " << cpp_strerror(r) << dendl;
345 ::close(fd);
346 return r;
347 }
348
349 Filer filer(objecter, &finisher);
350
351 /* Erase any objects at the end of the region to which we shall write
352 * the new log data. This is to avoid leaving trailing junk after
353 * the newly written data. Any junk more than one object ahead
354 * will be taken care of during normal operation by Journaler's
355 * prezeroing behaviour */
356 {
357 uint32_t const object_size = h.layout.object_size;
358 assert(object_size > 0);
1adf2230
AA
359 uint64_t last_obj = h.write_pos / object_size;
360 uint64_t purge_count = 2;
361 /* When the length is zero, the last_obj should be zeroed
362 * from the offset determined by the new write_pos instead of being purged.
363 */
364 if (!len) {
365 purge_count = 1;
366 ++last_obj;
367 }
7c673cae
FG
368 C_SaferCond purge_cond;
369 cout << "Purging " << purge_count << " objects from " << last_obj << std::endl;
370 lock.Lock();
371 filer.purge_range(ino, &h.layout, snapc, last_obj, purge_count,
372 ceph::real_clock::now(), 0, &purge_cond);
373 lock.Unlock();
374 purge_cond.wait();
375 }
1adf2230
AA
376 /* When the length is zero, zero the last object
377 * from the offset determined by the new write_pos.
378 */
379 if (!len) {
380 uint64_t offset_in_obj = h.write_pos % h.layout.object_size;
381 uint64_t len = h.layout.object_size - offset_in_obj;
382 C_SaferCond zero_cond;
383 cout << "Zeroing " << len << " bytes in the last object." << std::endl;
384
385 lock.Lock();
386 filer.zero(ino, &h.layout, snapc, h.write_pos, len, ceph::real_clock::now(), 0, &zero_cond);
387 lock.Unlock();
388 zero_cond.wait();
389 }
7c673cae
FG
390
391 // Stream from `fd` to `filer`
392 uint64_t pos = start;
393 uint64_t left = len;
394 while (left > 0) {
395 // Read
396 bufferlist j;
397 lseek64(fd, pos, SEEK_SET);
398 uint64_t l = MIN(left, 1024*1024);
399 j.read_fd(fd, l);
400
401 // Write
402 cout << " writing " << pos << "~" << l << std::endl;
403 C_SaferCond write_cond;
404 lock.Lock();
405 filer.write(ino, &h.layout, snapc, pos, l, j,
406 ceph::real_clock::now(), 0, &write_cond);
407 lock.Unlock();
408
409 r = write_cond.wait();
410 if (r != 0) {
411 derr << "Failed to write header: " << cpp_strerror(r) << dendl;
412 ::close(fd);
413 return r;
414 }
415
416 // Advance
417 pos += l;
418 left -= l;
419 }
420
421 VOID_TEMP_FAILURE_RETRY(::close(fd));
422 cout << "done." << std::endl;
423 return 0;
424}
425