]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef RADOS_DUMP_H_ | |
16 | #define RADOS_DUMP_H_ | |
17 | ||
18 | #include <stdint.h> | |
19 | ||
20 | #include "include/buffer.h" | |
21 | #include "include/encoding.h" | |
22 | ||
23 | #include "osd/osd_types.h" | |
24 | #include "osd/OSDMap.h" | |
25 | ||
26 | typedef uint8_t sectiontype_t; | |
27 | typedef uint32_t mymagic_t; | |
28 | typedef int64_t mysize_t; | |
29 | ||
30 | enum { | |
31 | TYPE_NONE = 0, | |
32 | TYPE_PG_BEGIN, | |
33 | TYPE_PG_END, | |
34 | TYPE_OBJECT_BEGIN, | |
35 | TYPE_OBJECT_END, | |
36 | TYPE_DATA, | |
37 | TYPE_ATTRS, | |
38 | TYPE_OMAP_HDR, | |
39 | TYPE_OMAP, | |
40 | TYPE_PG_METADATA, | |
41 | TYPE_POOL_BEGIN, | |
42 | TYPE_POOL_END, | |
43 | END_OF_TYPES, //Keep at the end | |
44 | }; | |
45 | ||
46 | const uint16_t shortmagic = 0xffce; //goes into stream as "ceff" | |
47 | //endmagic goes into stream as "ceff ffec" | |
48 | const mymagic_t endmagic = (0xecff << 16) | shortmagic; | |
49 | ||
50 | //The first FIXED_LENGTH bytes are a fixed | |
51 | //portion of the export output. This includes the overall | |
52 | //version number, and size of header and footer. | |
53 | //THIS STRUCTURE CAN ONLY BE APPENDED TO. If it needs to expand, | |
54 | //the version can be bumped and then anything | |
55 | //can be added to the export format. | |
56 | struct super_header { | |
57 | static const uint32_t super_magic = (shortmagic << 16) | shortmagic; | |
58 | // ver = 1, Initial version | |
59 | // ver = 2, Add OSDSuperblock to pg_begin | |
60 | static const uint32_t super_ver = 2; | |
61 | static const uint32_t FIXED_LENGTH = 16; | |
62 | uint32_t magic; | |
63 | uint32_t version; | |
64 | uint32_t header_size; | |
65 | uint32_t footer_size; | |
66 | ||
67 | super_header() : magic(0), version(0), header_size(0), footer_size(0) { } | |
68 | ||
69 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
70 | using ceph::encode; |
71 | encode(magic, bl); | |
72 | encode(version, bl); | |
73 | encode(header_size, bl); | |
74 | encode(footer_size, bl); | |
7c673cae | 75 | } |
11fdf7f2 TL |
76 | void decode(bufferlist::const_iterator& bl) { |
77 | using ceph::decode; | |
78 | decode(magic, bl); | |
79 | decode(version, bl); | |
80 | decode(header_size, bl); | |
81 | decode(footer_size, bl); | |
7c673cae FG |
82 | } |
83 | }; | |
84 | ||
85 | struct header { | |
86 | sectiontype_t type; | |
87 | mysize_t size; | |
88 | header(sectiontype_t type, mysize_t size) : | |
89 | type(type), size(size) { } | |
90 | header(): type(0), size(0) { } | |
91 | ||
92 | void encode(bufferlist& bl) const { | |
93 | uint32_t debug_type = (type << 24) | (type << 16) | shortmagic; | |
94 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
95 | encode(debug_type, bl); |
96 | encode(size, bl); | |
7c673cae FG |
97 | ENCODE_FINISH(bl); |
98 | } | |
11fdf7f2 | 99 | void decode(bufferlist::const_iterator& bl) { |
7c673cae FG |
100 | uint32_t debug_type; |
101 | DECODE_START(1, bl); | |
11fdf7f2 | 102 | decode(debug_type, bl); |
7c673cae | 103 | type = debug_type >> 24; |
11fdf7f2 | 104 | decode(size, bl); |
7c673cae FG |
105 | DECODE_FINISH(bl); |
106 | } | |
107 | }; | |
108 | ||
109 | struct footer { | |
110 | mymagic_t magic; | |
111 | footer() : magic(endmagic) { } | |
112 | ||
113 | void encode(bufferlist& bl) const { | |
114 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 115 | encode(magic, bl); |
7c673cae FG |
116 | ENCODE_FINISH(bl); |
117 | } | |
11fdf7f2 | 118 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 119 | DECODE_START(1, bl); |
11fdf7f2 | 120 | decode(magic, bl); |
7c673cae FG |
121 | DECODE_FINISH(bl); |
122 | } | |
123 | }; | |
124 | ||
125 | struct pg_begin { | |
126 | spg_t pgid; | |
127 | OSDSuperblock superblock; | |
128 | ||
129 | pg_begin(spg_t pg, const OSDSuperblock& sb): | |
130 | pgid(pg), superblock(sb) { } | |
131 | pg_begin() { } | |
132 | ||
133 | void encode(bufferlist& bl) const { | |
134 | // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then | |
135 | // shard will be NO_SHARD for a replicated pool. This means | |
136 | // that we allow the decode by struct_v 2. | |
137 | ENCODE_START(3, 2, bl); | |
11fdf7f2 TL |
138 | encode(pgid.pgid, bl); |
139 | encode(superblock, bl); | |
140 | encode(pgid.shard, bl); | |
7c673cae FG |
141 | ENCODE_FINISH(bl); |
142 | } | |
143 | // NOTE: New super_ver prevents decode from ver 1 | |
11fdf7f2 | 144 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 145 | DECODE_START(3, bl); |
11fdf7f2 | 146 | decode(pgid.pgid, bl); |
7c673cae | 147 | if (struct_v > 1) { |
11fdf7f2 | 148 | decode(superblock, bl); |
7c673cae FG |
149 | } |
150 | if (struct_v > 2) { | |
11fdf7f2 | 151 | decode(pgid.shard, bl); |
7c673cae FG |
152 | } else { |
153 | pgid.shard = shard_id_t::NO_SHARD; | |
154 | } | |
155 | DECODE_FINISH(bl); | |
156 | } | |
157 | }; | |
158 | ||
159 | struct object_begin { | |
160 | ghobject_t hoid; | |
161 | ||
162 | // Duplicate what is in the OI_ATTR so we have it at the start | |
163 | // of object processing. | |
164 | object_info_t oi; | |
165 | ||
166 | explicit object_begin(const ghobject_t &hoid): hoid(hoid) { } | |
167 | object_begin() { } | |
168 | ||
169 | // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then | |
170 | // generation will be NO_GEN, shard_id will be NO_SHARD for a replicated | |
171 | // pool. This means we will allow the decode by struct_v 1. | |
172 | void encode(bufferlist& bl) const { | |
173 | ENCODE_START(3, 1, bl); | |
11fdf7f2 TL |
174 | encode(hoid.hobj, bl); |
175 | encode(hoid.generation, bl); | |
176 | encode(hoid.shard_id, bl); | |
177 | encode(oi, bl, -1); /* FIXME: we always encode with full features */ | |
7c673cae FG |
178 | ENCODE_FINISH(bl); |
179 | } | |
11fdf7f2 | 180 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 181 | DECODE_START(3, bl); |
11fdf7f2 | 182 | decode(hoid.hobj, bl); |
7c673cae | 183 | if (struct_v > 1) { |
11fdf7f2 TL |
184 | decode(hoid.generation, bl); |
185 | decode(hoid.shard_id, bl); | |
7c673cae FG |
186 | } else { |
187 | hoid.generation = ghobject_t::NO_GEN; | |
188 | hoid.shard_id = shard_id_t::NO_SHARD; | |
189 | } | |
190 | if (struct_v > 2) { | |
11fdf7f2 | 191 | decode(oi, bl); |
7c673cae FG |
192 | } |
193 | DECODE_FINISH(bl); | |
194 | } | |
195 | }; | |
196 | ||
197 | struct data_section { | |
198 | uint64_t offset; | |
199 | uint64_t len; | |
200 | bufferlist databl; | |
201 | data_section(uint64_t offset, uint64_t len, bufferlist bl): | |
202 | offset(offset), len(len), databl(bl) { } | |
203 | data_section(): offset(0), len(0) { } | |
204 | ||
205 | void encode(bufferlist& bl) const { | |
206 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
207 | encode(offset, bl); |
208 | encode(len, bl); | |
209 | encode(databl, bl); | |
7c673cae FG |
210 | ENCODE_FINISH(bl); |
211 | } | |
11fdf7f2 | 212 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 213 | DECODE_START(1, bl); |
11fdf7f2 TL |
214 | decode(offset, bl); |
215 | decode(len, bl); | |
216 | decode(databl, bl); | |
7c673cae FG |
217 | DECODE_FINISH(bl); |
218 | } | |
219 | }; | |
220 | ||
221 | struct attr_section { | |
222 | map<string,bufferlist> data; | |
223 | explicit attr_section(const map<string,bufferlist> &data) : data(data) { } | |
224 | explicit attr_section(map<string, bufferptr> &data_) | |
225 | { | |
226 | for (std::map<std::string, bufferptr>::iterator i = data_.begin(); | |
227 | i != data_.end(); ++i) { | |
228 | bufferlist bl; | |
11fdf7f2 | 229 | bl.push_back(i->second); |
7c673cae FG |
230 | data[i->first] = bl; |
231 | } | |
232 | } | |
233 | ||
234 | attr_section() { } | |
235 | ||
236 | void encode(bufferlist& bl) const { | |
237 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 238 | encode(data, bl); |
7c673cae FG |
239 | ENCODE_FINISH(bl); |
240 | } | |
11fdf7f2 | 241 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 242 | DECODE_START(1, bl); |
11fdf7f2 | 243 | decode(data, bl); |
7c673cae FG |
244 | DECODE_FINISH(bl); |
245 | } | |
246 | }; | |
247 | ||
248 | struct omap_hdr_section { | |
249 | bufferlist hdr; | |
250 | explicit omap_hdr_section(bufferlist hdr) : hdr(hdr) { } | |
251 | omap_hdr_section() { } | |
252 | ||
253 | void encode(bufferlist& bl) const { | |
254 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 255 | encode(hdr, bl); |
7c673cae FG |
256 | ENCODE_FINISH(bl); |
257 | } | |
11fdf7f2 | 258 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 259 | DECODE_START(1, bl); |
11fdf7f2 | 260 | decode(hdr, bl); |
7c673cae FG |
261 | DECODE_FINISH(bl); |
262 | } | |
263 | }; | |
264 | ||
265 | struct omap_section { | |
266 | map<string, bufferlist> omap; | |
267 | explicit omap_section(const map<string, bufferlist> &omap) : | |
268 | omap(omap) { } | |
269 | omap_section() { } | |
270 | ||
271 | void encode(bufferlist& bl) const { | |
272 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 273 | encode(omap, bl); |
7c673cae FG |
274 | ENCODE_FINISH(bl); |
275 | } | |
11fdf7f2 | 276 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 277 | DECODE_START(1, bl); |
11fdf7f2 | 278 | decode(omap, bl); |
7c673cae FG |
279 | DECODE_FINISH(bl); |
280 | } | |
281 | }; | |
282 | ||
283 | struct metadata_section { | |
284 | // struct_ver is the on-disk version of original pg | |
285 | __u8 struct_ver; // for reference | |
286 | epoch_t map_epoch; | |
287 | pg_info_t info; | |
288 | pg_log_t log; | |
289 | PastIntervals past_intervals; | |
290 | OSDMap osdmap; | |
291 | bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking | |
292 | map<eversion_t, hobject_t> divergent_priors; | |
293 | pg_missing_t missing; | |
294 | ||
295 | metadata_section( | |
296 | __u8 struct_ver, | |
297 | epoch_t map_epoch, | |
298 | const pg_info_t &info, | |
299 | const pg_log_t &log, | |
300 | const PastIntervals &past_intervals, | |
301 | const pg_missing_t &missing) | |
302 | : struct_ver(struct_ver), | |
303 | map_epoch(map_epoch), | |
304 | info(info), | |
305 | log(log), | |
306 | past_intervals(past_intervals), | |
307 | missing(missing) {} | |
308 | metadata_section() | |
309 | : struct_ver(0), | |
310 | map_epoch(0) { } | |
311 | ||
312 | void encode(bufferlist& bl) const { | |
313 | ENCODE_START(6, 6, bl); | |
11fdf7f2 TL |
314 | encode(struct_ver, bl); |
315 | encode(map_epoch, bl); | |
316 | encode(info, bl); | |
317 | encode(log, bl); | |
318 | encode(past_intervals, bl); | |
7c673cae FG |
319 | // Equivalent to osdmap.encode(bl, features); but |
320 | // preserving exact layout for CRC checking. | |
321 | bl.append(osdmap_bl); | |
11fdf7f2 TL |
322 | encode(divergent_priors, bl); |
323 | encode(missing, bl); | |
7c673cae FG |
324 | ENCODE_FINISH(bl); |
325 | } | |
11fdf7f2 | 326 | void decode(bufferlist::const_iterator& bl) { |
7c673cae | 327 | DECODE_START(6, bl); |
11fdf7f2 TL |
328 | decode(struct_ver, bl); |
329 | decode(map_epoch, bl); | |
330 | decode(info, bl); | |
331 | decode(log, bl); | |
7c673cae | 332 | if (struct_v >= 6) { |
11fdf7f2 | 333 | decode(past_intervals, bl); |
7c673cae | 334 | } else if (struct_v > 1) { |
11fdf7f2 | 335 | cout << "NOTICE: Older export with classic past_intervals" << std::endl; |
7c673cae FG |
336 | } else { |
337 | cout << "NOTICE: Older export without past_intervals" << std::endl; | |
338 | } | |
339 | if (struct_v > 2) { | |
340 | osdmap.decode(bl); | |
341 | } else { | |
342 | cout << "WARNING: Older export without OSDMap information" << std::endl; | |
343 | } | |
344 | if (struct_v > 3) { | |
11fdf7f2 | 345 | decode(divergent_priors, bl); |
7c673cae FG |
346 | } |
347 | if (struct_v > 4) { | |
11fdf7f2 | 348 | decode(missing, bl); |
7c673cae FG |
349 | } |
350 | DECODE_FINISH(bl); | |
351 | } | |
352 | }; | |
353 | ||
354 | /** | |
355 | * Superclass for classes that will need to handle a serialized RADOS | |
356 | * dump. Requires that the serialized dump be opened with a known FD. | |
357 | */ | |
358 | class RadosDump | |
359 | { | |
360 | protected: | |
361 | int file_fd; | |
362 | super_header sh; | |
363 | bool dry_run; | |
364 | ||
365 | public: | |
366 | RadosDump(int file_fd_, bool dry_run_) | |
367 | : file_fd(file_fd_), dry_run(dry_run_) | |
368 | {} | |
369 | ||
370 | int read_super(); | |
371 | int get_header(header *h); | |
372 | int get_footer(footer *f); | |
373 | int read_section(sectiontype_t *type, bufferlist *bl); | |
374 | int skip_object(bufferlist &bl); | |
375 | void write_super(); | |
376 | ||
377 | // Define this in .h because it's templated | |
378 | template <typename T> | |
379 | int write_section(sectiontype_t type, const T& obj, int fd) { | |
380 | if (dry_run) | |
381 | return 0; | |
382 | bufferlist blhdr, bl, blftr; | |
383 | obj.encode(bl); | |
384 | header hdr(type, bl.length()); | |
385 | hdr.encode(blhdr); | |
386 | footer ft; | |
387 | ft.encode(blftr); | |
388 | ||
389 | int ret = blhdr.write_fd(fd); | |
390 | if (ret) return ret; | |
391 | ret = bl.write_fd(fd); | |
392 | if (ret) return ret; | |
393 | ret = blftr.write_fd(fd); | |
394 | return ret; | |
395 | } | |
396 | ||
397 | int write_simple(sectiontype_t type, int fd) | |
398 | { | |
399 | if (dry_run) | |
400 | return 0; | |
401 | bufferlist hbl; | |
402 | ||
403 | header hdr(type, 0); | |
404 | hdr.encode(hbl); | |
405 | return hbl.write_fd(fd); | |
406 | } | |
407 | }; | |
408 | ||
409 | #endif |