]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2015 Red Hat | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef RADOS_DUMP_H_ | |
16 | #define RADOS_DUMP_H_ | |
17 | ||
18 | #include <stdint.h> | |
19 | ||
20 | #include "include/buffer.h" | |
21 | #include "include/encoding.h" | |
22 | ||
23 | #include "osd/osd_types.h" | |
24 | #include "osd/OSDMap.h" | |
25 | ||
26 | typedef uint8_t sectiontype_t; | |
27 | typedef uint32_t mymagic_t; | |
28 | typedef int64_t mysize_t; | |
29 | ||
30 | enum { | |
31 | TYPE_NONE = 0, | |
32 | TYPE_PG_BEGIN, | |
33 | TYPE_PG_END, | |
34 | TYPE_OBJECT_BEGIN, | |
35 | TYPE_OBJECT_END, | |
36 | TYPE_DATA, | |
37 | TYPE_ATTRS, | |
38 | TYPE_OMAP_HDR, | |
39 | TYPE_OMAP, | |
40 | TYPE_PG_METADATA, | |
41 | TYPE_POOL_BEGIN, | |
42 | TYPE_POOL_END, | |
43 | END_OF_TYPES, //Keep at the end | |
44 | }; | |
45 | ||
46 | const uint16_t shortmagic = 0xffce; //goes into stream as "ceff" | |
47 | //endmagic goes into stream as "ceff ffec" | |
48 | const mymagic_t endmagic = (0xecff << 16) | shortmagic; | |
49 | ||
50 | //The first FIXED_LENGTH bytes are a fixed | |
51 | //portion of the export output. This includes the overall | |
52 | //version number, and size of header and footer. | |
53 | //THIS STRUCTURE CAN ONLY BE APPENDED TO. If it needs to expand, | |
54 | //the version can be bumped and then anything | |
55 | //can be added to the export format. | |
56 | struct super_header { | |
57 | static const uint32_t super_magic = (shortmagic << 16) | shortmagic; | |
58 | // ver = 1, Initial version | |
59 | // ver = 2, Add OSDSuperblock to pg_begin | |
60 | static const uint32_t super_ver = 2; | |
61 | static const uint32_t FIXED_LENGTH = 16; | |
62 | uint32_t magic; | |
63 | uint32_t version; | |
64 | uint32_t header_size; | |
65 | uint32_t footer_size; | |
66 | ||
67 | super_header() : magic(0), version(0), header_size(0), footer_size(0) { } | |
68 | ||
69 | void encode(bufferlist& bl) const { | |
70 | ::encode(magic, bl); | |
71 | ::encode(version, bl); | |
72 | ::encode(header_size, bl); | |
73 | ::encode(footer_size, bl); | |
74 | } | |
75 | void decode(bufferlist::iterator& bl) { | |
76 | ::decode(magic, bl); | |
77 | ::decode(version, bl); | |
78 | ::decode(header_size, bl); | |
79 | ::decode(footer_size, bl); | |
80 | } | |
81 | }; | |
82 | ||
83 | struct header { | |
84 | sectiontype_t type; | |
85 | mysize_t size; | |
86 | header(sectiontype_t type, mysize_t size) : | |
87 | type(type), size(size) { } | |
88 | header(): type(0), size(0) { } | |
89 | ||
90 | void encode(bufferlist& bl) const { | |
91 | uint32_t debug_type = (type << 24) | (type << 16) | shortmagic; | |
92 | ENCODE_START(1, 1, bl); | |
93 | ::encode(debug_type, bl); | |
94 | ::encode(size, bl); | |
95 | ENCODE_FINISH(bl); | |
96 | } | |
97 | void decode(bufferlist::iterator& bl) { | |
98 | uint32_t debug_type; | |
99 | DECODE_START(1, bl); | |
100 | ::decode(debug_type, bl); | |
101 | type = debug_type >> 24; | |
102 | ::decode(size, bl); | |
103 | DECODE_FINISH(bl); | |
104 | } | |
105 | }; | |
106 | ||
107 | struct footer { | |
108 | mymagic_t magic; | |
109 | footer() : magic(endmagic) { } | |
110 | ||
111 | void encode(bufferlist& bl) const { | |
112 | ENCODE_START(1, 1, bl); | |
113 | ::encode(magic, bl); | |
114 | ENCODE_FINISH(bl); | |
115 | } | |
116 | void decode(bufferlist::iterator& bl) { | |
117 | DECODE_START(1, bl); | |
118 | ::decode(magic, bl); | |
119 | DECODE_FINISH(bl); | |
120 | } | |
121 | }; | |
122 | ||
123 | struct pg_begin { | |
124 | spg_t pgid; | |
125 | OSDSuperblock superblock; | |
126 | ||
127 | pg_begin(spg_t pg, const OSDSuperblock& sb): | |
128 | pgid(pg), superblock(sb) { } | |
129 | pg_begin() { } | |
130 | ||
131 | void encode(bufferlist& bl) const { | |
132 | // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then | |
133 | // shard will be NO_SHARD for a replicated pool. This means | |
134 | // that we allow the decode by struct_v 2. | |
135 | ENCODE_START(3, 2, bl); | |
136 | ::encode(pgid.pgid, bl); | |
137 | ::encode(superblock, bl); | |
138 | ::encode(pgid.shard, bl); | |
139 | ENCODE_FINISH(bl); | |
140 | } | |
141 | // NOTE: New super_ver prevents decode from ver 1 | |
142 | void decode(bufferlist::iterator& bl) { | |
143 | DECODE_START(3, bl); | |
144 | ::decode(pgid.pgid, bl); | |
145 | if (struct_v > 1) { | |
146 | ::decode(superblock, bl); | |
147 | } | |
148 | if (struct_v > 2) { | |
149 | ::decode(pgid.shard, bl); | |
150 | } else { | |
151 | pgid.shard = shard_id_t::NO_SHARD; | |
152 | } | |
153 | DECODE_FINISH(bl); | |
154 | } | |
155 | }; | |
156 | ||
157 | struct object_begin { | |
158 | ghobject_t hoid; | |
159 | ||
160 | // Duplicate what is in the OI_ATTR so we have it at the start | |
161 | // of object processing. | |
162 | object_info_t oi; | |
163 | ||
164 | explicit object_begin(const ghobject_t &hoid): hoid(hoid) { } | |
165 | object_begin() { } | |
166 | ||
167 | // If superblock doesn't include CEPH_FS_FEATURE_INCOMPAT_SHARDS then | |
168 | // generation will be NO_GEN, shard_id will be NO_SHARD for a replicated | |
169 | // pool. This means we will allow the decode by struct_v 1. | |
170 | void encode(bufferlist& bl) const { | |
171 | ENCODE_START(3, 1, bl); | |
172 | ::encode(hoid.hobj, bl); | |
173 | ::encode(hoid.generation, bl); | |
174 | ::encode(hoid.shard_id, bl); | |
175 | ::encode(oi, bl, -1); /* FIXME: we always encode with full features */ | |
176 | ENCODE_FINISH(bl); | |
177 | } | |
178 | void decode(bufferlist::iterator& bl) { | |
179 | DECODE_START(3, bl); | |
180 | ::decode(hoid.hobj, bl); | |
181 | if (struct_v > 1) { | |
182 | ::decode(hoid.generation, bl); | |
183 | ::decode(hoid.shard_id, bl); | |
184 | } else { | |
185 | hoid.generation = ghobject_t::NO_GEN; | |
186 | hoid.shard_id = shard_id_t::NO_SHARD; | |
187 | } | |
188 | if (struct_v > 2) { | |
189 | ::decode(oi, bl); | |
190 | } | |
191 | DECODE_FINISH(bl); | |
192 | } | |
193 | }; | |
194 | ||
195 | struct data_section { | |
196 | uint64_t offset; | |
197 | uint64_t len; | |
198 | bufferlist databl; | |
199 | data_section(uint64_t offset, uint64_t len, bufferlist bl): | |
200 | offset(offset), len(len), databl(bl) { } | |
201 | data_section(): offset(0), len(0) { } | |
202 | ||
203 | void encode(bufferlist& bl) const { | |
204 | ENCODE_START(1, 1, bl); | |
205 | ::encode(offset, bl); | |
206 | ::encode(len, bl); | |
207 | ::encode(databl, bl); | |
208 | ENCODE_FINISH(bl); | |
209 | } | |
210 | void decode(bufferlist::iterator& bl) { | |
211 | DECODE_START(1, bl); | |
212 | ::decode(offset, bl); | |
213 | ::decode(len, bl); | |
214 | ::decode(databl, bl); | |
215 | DECODE_FINISH(bl); | |
216 | } | |
217 | }; | |
218 | ||
219 | struct attr_section { | |
220 | map<string,bufferlist> data; | |
221 | explicit attr_section(const map<string,bufferlist> &data) : data(data) { } | |
222 | explicit attr_section(map<string, bufferptr> &data_) | |
223 | { | |
224 | for (std::map<std::string, bufferptr>::iterator i = data_.begin(); | |
225 | i != data_.end(); ++i) { | |
226 | bufferlist bl; | |
227 | bl.push_front(i->second); | |
228 | data[i->first] = bl; | |
229 | } | |
230 | } | |
231 | ||
232 | attr_section() { } | |
233 | ||
234 | void encode(bufferlist& bl) const { | |
235 | ENCODE_START(1, 1, bl); | |
236 | ::encode(data, bl); | |
237 | ENCODE_FINISH(bl); | |
238 | } | |
239 | void decode(bufferlist::iterator& bl) { | |
240 | DECODE_START(1, bl); | |
241 | ::decode(data, bl); | |
242 | DECODE_FINISH(bl); | |
243 | } | |
244 | }; | |
245 | ||
246 | struct omap_hdr_section { | |
247 | bufferlist hdr; | |
248 | explicit omap_hdr_section(bufferlist hdr) : hdr(hdr) { } | |
249 | omap_hdr_section() { } | |
250 | ||
251 | void encode(bufferlist& bl) const { | |
252 | ENCODE_START(1, 1, bl); | |
253 | ::encode(hdr, bl); | |
254 | ENCODE_FINISH(bl); | |
255 | } | |
256 | void decode(bufferlist::iterator& bl) { | |
257 | DECODE_START(1, bl); | |
258 | ::decode(hdr, bl); | |
259 | DECODE_FINISH(bl); | |
260 | } | |
261 | }; | |
262 | ||
263 | struct omap_section { | |
264 | map<string, bufferlist> omap; | |
265 | explicit omap_section(const map<string, bufferlist> &omap) : | |
266 | omap(omap) { } | |
267 | omap_section() { } | |
268 | ||
269 | void encode(bufferlist& bl) const { | |
270 | ENCODE_START(1, 1, bl); | |
271 | ::encode(omap, bl); | |
272 | ENCODE_FINISH(bl); | |
273 | } | |
274 | void decode(bufferlist::iterator& bl) { | |
275 | DECODE_START(1, bl); | |
276 | ::decode(omap, bl); | |
277 | DECODE_FINISH(bl); | |
278 | } | |
279 | }; | |
280 | ||
281 | struct metadata_section { | |
282 | // struct_ver is the on-disk version of original pg | |
283 | __u8 struct_ver; // for reference | |
284 | epoch_t map_epoch; | |
285 | pg_info_t info; | |
286 | pg_log_t log; | |
287 | PastIntervals past_intervals; | |
288 | OSDMap osdmap; | |
289 | bufferlist osdmap_bl; // Used in lieu of encoding osdmap due to crc checking | |
290 | map<eversion_t, hobject_t> divergent_priors; | |
291 | pg_missing_t missing; | |
292 | ||
293 | metadata_section( | |
294 | __u8 struct_ver, | |
295 | epoch_t map_epoch, | |
296 | const pg_info_t &info, | |
297 | const pg_log_t &log, | |
298 | const PastIntervals &past_intervals, | |
299 | const pg_missing_t &missing) | |
300 | : struct_ver(struct_ver), | |
301 | map_epoch(map_epoch), | |
302 | info(info), | |
303 | log(log), | |
304 | past_intervals(past_intervals), | |
305 | missing(missing) {} | |
306 | metadata_section() | |
307 | : struct_ver(0), | |
308 | map_epoch(0) { } | |
309 | ||
310 | void encode(bufferlist& bl) const { | |
311 | ENCODE_START(6, 6, bl); | |
312 | ::encode(struct_ver, bl); | |
313 | ::encode(map_epoch, bl); | |
314 | ::encode(info, bl); | |
315 | ::encode(log, bl); | |
316 | ::encode(past_intervals, bl); | |
317 | // Equivalent to osdmap.encode(bl, features); but | |
318 | // preserving exact layout for CRC checking. | |
319 | bl.append(osdmap_bl); | |
320 | ::encode(divergent_priors, bl); | |
321 | ::encode(missing, bl); | |
322 | ENCODE_FINISH(bl); | |
323 | } | |
324 | void decode(bufferlist::iterator& bl) { | |
325 | DECODE_START(6, bl); | |
326 | ::decode(struct_ver, bl); | |
327 | ::decode(map_epoch, bl); | |
328 | ::decode(info, bl); | |
329 | ::decode(log, bl); | |
330 | if (struct_v >= 6) { | |
331 | ::decode(past_intervals, bl); | |
332 | } else if (struct_v > 1) { | |
333 | past_intervals.decode_classic(bl); | |
334 | } else { | |
335 | cout << "NOTICE: Older export without past_intervals" << std::endl; | |
336 | } | |
337 | if (struct_v > 2) { | |
338 | osdmap.decode(bl); | |
339 | } else { | |
340 | cout << "WARNING: Older export without OSDMap information" << std::endl; | |
341 | } | |
342 | if (struct_v > 3) { | |
343 | ::decode(divergent_priors, bl); | |
344 | } | |
345 | if (struct_v > 4) { | |
346 | ::decode(missing, bl); | |
347 | } | |
348 | DECODE_FINISH(bl); | |
349 | } | |
350 | }; | |
351 | ||
352 | /** | |
353 | * Superclass for classes that will need to handle a serialized RADOS | |
354 | * dump. Requires that the serialized dump be opened with a known FD. | |
355 | */ | |
356 | class RadosDump | |
357 | { | |
358 | protected: | |
359 | int file_fd; | |
360 | super_header sh; | |
361 | bool dry_run; | |
362 | ||
363 | public: | |
364 | RadosDump(int file_fd_, bool dry_run_) | |
365 | : file_fd(file_fd_), dry_run(dry_run_) | |
366 | {} | |
367 | ||
368 | int read_super(); | |
369 | int get_header(header *h); | |
370 | int get_footer(footer *f); | |
371 | int read_section(sectiontype_t *type, bufferlist *bl); | |
372 | int skip_object(bufferlist &bl); | |
373 | void write_super(); | |
374 | ||
375 | // Define this in .h because it's templated | |
376 | template <typename T> | |
377 | int write_section(sectiontype_t type, const T& obj, int fd) { | |
378 | if (dry_run) | |
379 | return 0; | |
380 | bufferlist blhdr, bl, blftr; | |
381 | obj.encode(bl); | |
382 | header hdr(type, bl.length()); | |
383 | hdr.encode(blhdr); | |
384 | footer ft; | |
385 | ft.encode(blftr); | |
386 | ||
387 | int ret = blhdr.write_fd(fd); | |
388 | if (ret) return ret; | |
389 | ret = bl.write_fd(fd); | |
390 | if (ret) return ret; | |
391 | ret = blftr.write_fd(fd); | |
392 | return ret; | |
393 | } | |
394 | ||
395 | int write_simple(sectiontype_t type, int fd) | |
396 | { | |
397 | if (dry_run) | |
398 | return 0; | |
399 | bufferlist hbl; | |
400 | ||
401 | header hdr(type, 0); | |
402 | hdr.encode(hbl); | |
403 | return hbl.write_fd(fd); | |
404 | } | |
405 | }; | |
406 | ||
407 | #endif |