]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_MDSTYPES_H | |
4 | #define CEPH_MDSTYPES_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <math.h> | |
9 | #include <ostream> | |
10 | #include <set> | |
11 | #include <map> | |
11fdf7f2 | 12 | #include <string_view> |
7c673cae FG |
13 | |
14 | #include "common/config.h" | |
15 | #include "common/Clock.h" | |
16 | #include "common/DecayCounter.h" | |
17 | #include "common/entity_name.h" | |
18 | ||
19 | #include "include/Context.h" | |
20 | #include "include/frag.h" | |
21 | #include "include/xlist.h" | |
22 | #include "include/interval_set.h" | |
23 | #include "include/compact_map.h" | |
24 | #include "include/compact_set.h" | |
25 | #include "include/fs_types.h" | |
26 | ||
27 | #include "inode_backtrace.h" | |
28 | ||
29 | #include <boost/spirit/include/qi.hpp> | |
30 | #include <boost/pool/pool.hpp> | |
11fdf7f2 | 31 | #include "include/ceph_assert.h" |
7c673cae FG |
32 | #include <boost/serialization/strong_typedef.hpp> |
33 | ||
34 | #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" | |
35 | ||
36 | #define MDS_PORT_CACHE 0x200 | |
37 | #define MDS_PORT_LOCKER 0x300 | |
38 | #define MDS_PORT_MIGRATOR 0x400 | |
39 | ||
40 | #define MAX_MDS 0x100 | |
41 | #define NUM_STRAY 10 | |
42 | ||
43 | #define MDS_INO_ROOT 1 | |
44 | ||
45 | // No longer created but recognised in existing filesystems | |
46 | // so that we don't try to fragment it. | |
47 | #define MDS_INO_CEPH 2 | |
48 | ||
11fdf7f2 TL |
49 | #define MDS_INO_GLOBAL_SNAPREALM 3 |
50 | ||
7c673cae FG |
51 | #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS) |
52 | #define MDS_INO_STRAY_OFFSET (6*MAX_MDS) | |
53 | ||
54 | // Locations for journal data | |
55 | #define MDS_INO_LOG_OFFSET (2*MAX_MDS) | |
56 | #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS) | |
57 | #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS) | |
58 | #define MDS_INO_PURGE_QUEUE (5*MAX_MDS) | |
59 | ||
60 | #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY)) | |
61 | ||
62 | #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i)))) | |
63 | #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x)) | |
64 | ||
65 | #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY))) | |
66 | #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS)) | |
67 | #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET)) | |
11fdf7f2 | 68 | #define MDS_INO_IS_BASE(i) ((i) == MDS_INO_ROOT || (i) == MDS_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i)) |
7c673cae FG |
69 | #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY)) |
70 | #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY) | |
71 | ||
7c673cae | 72 | typedef int32_t mds_rank_t; |
11fdf7f2 | 73 | constexpr mds_rank_t MDS_RANK_NONE = -1; |
7c673cae FG |
74 | |
75 | BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) | |
76 | extern const mds_gid_t MDS_GID_NONE; | |
11fdf7f2 TL |
77 | |
78 | typedef int32_t fs_cluster_id_t; | |
79 | constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1; | |
7c673cae | 80 | // The namespace ID of the anonymous default filesystem from legacy systems |
11fdf7f2 | 81 | constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0; |
7c673cae | 82 | |
9f95a23c TL |
83 | class mds_role_t { |
84 | public: | |
7c673cae FG |
85 | mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_) |
86 | : fscid(fscid_), rank(rank_) | |
87 | {} | |
9f95a23c TL |
88 | mds_role_t() {} |
89 | ||
90 | bool operator<(mds_role_t const &rhs) const { | |
7c673cae FG |
91 | if (fscid < rhs.fscid) { |
92 | return true; | |
93 | } else if (fscid == rhs.fscid) { | |
94 | return rank < rhs.rank; | |
95 | } else { | |
96 | return false; | |
97 | } | |
98 | } | |
99 | ||
9f95a23c | 100 | bool is_none() const { |
7c673cae FG |
101 | return (rank == MDS_RANK_NONE); |
102 | } | |
7c673cae | 103 | |
9f95a23c TL |
104 | fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE; |
105 | mds_rank_t rank = MDS_RANK_NONE; | |
106 | }; | |
107 | inline std::ostream& operator<<(std::ostream& out, const mds_role_t& role) { | |
108 | return out << role.fscid << ":" << role.rank; | |
109 | } | |
7c673cae FG |
110 | |
111 | // CAPS | |
7c673cae FG |
112 | inline string gcap_string(int cap) |
113 | { | |
114 | string s; | |
115 | if (cap & CEPH_CAP_GSHARED) s += "s"; | |
116 | if (cap & CEPH_CAP_GEXCL) s += "x"; | |
117 | if (cap & CEPH_CAP_GCACHE) s += "c"; | |
118 | if (cap & CEPH_CAP_GRD) s += "r"; | |
119 | if (cap & CEPH_CAP_GWR) s += "w"; | |
120 | if (cap & CEPH_CAP_GBUFFER) s += "b"; | |
121 | if (cap & CEPH_CAP_GWREXTEND) s += "a"; | |
122 | if (cap & CEPH_CAP_GLAZYIO) s += "l"; | |
123 | return s; | |
124 | } | |
125 | inline string ccap_string(int cap) | |
126 | { | |
127 | string s; | |
128 | if (cap & CEPH_CAP_PIN) s += "p"; | |
129 | ||
130 | int a = (cap >> CEPH_CAP_SAUTH) & 3; | |
131 | if (a) s += 'A' + gcap_string(a); | |
132 | ||
133 | a = (cap >> CEPH_CAP_SLINK) & 3; | |
134 | if (a) s += 'L' + gcap_string(a); | |
135 | ||
136 | a = (cap >> CEPH_CAP_SXATTR) & 3; | |
137 | if (a) s += 'X' + gcap_string(a); | |
138 | ||
139 | a = cap >> CEPH_CAP_SFILE; | |
140 | if (a) s += 'F' + gcap_string(a); | |
141 | ||
142 | if (s.length() == 0) | |
143 | s = "-"; | |
144 | return s; | |
145 | } | |
146 | ||
7c673cae | 147 | struct scatter_info_t { |
94b18763 | 148 | version_t version = 0; |
7c673cae FG |
149 | }; |
150 | ||
151 | struct frag_info_t : public scatter_info_t { | |
7c673cae FG |
152 | int64_t size() const { return nfiles + nsubdirs; } |
153 | ||
154 | void zero() { | |
155 | *this = frag_info_t(); | |
156 | } | |
157 | ||
158 | // *this += cur - acc; | |
159 | void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { | |
160 | if (cur.mtime > mtime) { | |
161 | mtime = cur.mtime; | |
162 | if (touched_mtime) | |
163 | *touched_mtime = true; | |
164 | } | |
165 | if (cur.change_attr > change_attr) { | |
166 | change_attr = cur.change_attr; | |
167 | if (touched_chattr) | |
168 | *touched_chattr = true; | |
169 | } | |
170 | nfiles += cur.nfiles - acc.nfiles; | |
171 | nsubdirs += cur.nsubdirs - acc.nsubdirs; | |
172 | } | |
173 | ||
174 | void add(const frag_info_t& other) { | |
175 | if (other.mtime > mtime) | |
176 | mtime = other.mtime; | |
177 | if (other.change_attr > change_attr) | |
178 | change_attr = other.change_attr; | |
179 | nfiles += other.nfiles; | |
180 | nsubdirs += other.nsubdirs; | |
181 | } | |
182 | ||
183 | bool same_sums(const frag_info_t &o) const { | |
184 | return mtime <= o.mtime && | |
185 | nfiles == o.nfiles && | |
186 | nsubdirs == o.nsubdirs; | |
187 | } | |
188 | ||
189 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 190 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 191 | void dump(Formatter *f) const; |
9f95a23c TL |
192 | static void generate_test_instances(std::list<frag_info_t*>& ls); |
193 | ||
194 | // this frag | |
195 | utime_t mtime; | |
196 | uint64_t change_attr = 0; | |
197 | int64_t nfiles = 0; // files | |
198 | int64_t nsubdirs = 0; // subdirs | |
7c673cae FG |
199 | }; |
200 | WRITE_CLASS_ENCODER(frag_info_t) | |
201 | ||
202 | inline bool operator==(const frag_info_t &l, const frag_info_t &r) { | |
203 | return memcmp(&l, &r, sizeof(l)) == 0; | |
204 | } | |
205 | inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { | |
206 | return !(l == r); | |
207 | } | |
208 | ||
209 | std::ostream& operator<<(std::ostream &out, const frag_info_t &f); | |
210 | ||
211 | ||
212 | struct nest_info_t : public scatter_info_t { | |
7c673cae FG |
213 | int64_t rsize() const { return rfiles + rsubdirs; } |
214 | ||
7c673cae FG |
215 | void zero() { |
216 | *this = nest_info_t(); | |
217 | } | |
218 | ||
219 | void sub(const nest_info_t &other) { | |
220 | add(other, -1); | |
221 | } | |
222 | void add(const nest_info_t &other, int fac=1) { | |
223 | if (other.rctime > rctime) | |
224 | rctime = other.rctime; | |
225 | rbytes += fac*other.rbytes; | |
226 | rfiles += fac*other.rfiles; | |
227 | rsubdirs += fac*other.rsubdirs; | |
11fdf7f2 | 228 | rsnaps += fac*other.rsnaps; |
7c673cae FG |
229 | } |
230 | ||
231 | // *this += cur - acc; | |
232 | void add_delta(const nest_info_t &cur, const nest_info_t &acc) { | |
233 | if (cur.rctime > rctime) | |
234 | rctime = cur.rctime; | |
235 | rbytes += cur.rbytes - acc.rbytes; | |
236 | rfiles += cur.rfiles - acc.rfiles; | |
237 | rsubdirs += cur.rsubdirs - acc.rsubdirs; | |
11fdf7f2 | 238 | rsnaps += cur.rsnaps - acc.rsnaps; |
7c673cae FG |
239 | } |
240 | ||
241 | bool same_sums(const nest_info_t &o) const { | |
242 | return rctime <= o.rctime && | |
243 | rbytes == o.rbytes && | |
244 | rfiles == o.rfiles && | |
245 | rsubdirs == o.rsubdirs && | |
11fdf7f2 | 246 | rsnaps == o.rsnaps; |
7c673cae FG |
247 | } |
248 | ||
249 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 250 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 251 | void dump(Formatter *f) const; |
9f95a23c TL |
252 | static void generate_test_instances(std::list<nest_info_t*>& ls); |
253 | ||
254 | // this frag + children | |
255 | utime_t rctime; | |
256 | int64_t rbytes = 0; | |
257 | int64_t rfiles = 0; | |
258 | int64_t rsubdirs = 0; | |
259 | int64_t rsnaps = 0; | |
7c673cae FG |
260 | }; |
261 | WRITE_CLASS_ENCODER(nest_info_t) | |
262 | ||
263 | inline bool operator==(const nest_info_t &l, const nest_info_t &r) { | |
264 | return memcmp(&l, &r, sizeof(l)) == 0; | |
265 | } | |
266 | inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { | |
267 | return !(l == r); | |
268 | } | |
269 | ||
270 | std::ostream& operator<<(std::ostream &out, const nest_info_t &n); | |
271 | ||
7c673cae | 272 | struct vinodeno_t { |
7c673cae FG |
273 | vinodeno_t() {} |
274 | vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} | |
275 | ||
276 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
277 | using ceph::encode; |
278 | encode(ino, bl); | |
279 | encode(snapid, bl); | |
7c673cae | 280 | } |
11fdf7f2 TL |
281 | void decode(bufferlist::const_iterator& p) { |
282 | using ceph::decode; | |
283 | decode(ino, p); | |
284 | decode(snapid, p); | |
7c673cae | 285 | } |
9f95a23c TL |
286 | |
287 | inodeno_t ino; | |
288 | snapid_t snapid; | |
7c673cae FG |
289 | }; |
290 | WRITE_CLASS_ENCODER(vinodeno_t) | |
291 | ||
292 | inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { | |
293 | return l.ino == r.ino && l.snapid == r.snapid; | |
294 | } | |
295 | inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { | |
296 | return !(l == r); | |
297 | } | |
298 | inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { | |
299 | return | |
300 | l.ino < r.ino || | |
301 | (l.ino == r.ino && l.snapid < r.snapid); | |
302 | } | |
303 | ||
304 | struct quota_info_t | |
305 | { | |
7c673cae FG |
306 | void encode(bufferlist& bl) const { |
307 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
308 | encode(max_bytes, bl); |
309 | encode(max_files, bl); | |
7c673cae FG |
310 | ENCODE_FINISH(bl); |
311 | } | |
11fdf7f2 | 312 | void decode(bufferlist::const_iterator& p) { |
7c673cae | 313 | DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); |
11fdf7f2 TL |
314 | decode(max_bytes, p); |
315 | decode(max_files, p); | |
7c673cae FG |
316 | DECODE_FINISH(p); |
317 | } | |
318 | ||
319 | void dump(Formatter *f) const; | |
9f95a23c | 320 | static void generate_test_instances(std::list<quota_info_t *>& ls); |
7c673cae FG |
321 | |
322 | bool is_valid() const { | |
323 | return max_bytes >=0 && max_files >=0; | |
324 | } | |
325 | bool is_enable() const { | |
326 | return max_bytes || max_files; | |
327 | } | |
9f95a23c TL |
328 | |
329 | int64_t max_bytes = 0; | |
330 | int64_t max_files = 0; | |
7c673cae FG |
331 | }; |
332 | WRITE_CLASS_ENCODER(quota_info_t) | |
333 | ||
334 | inline bool operator==(const quota_info_t &l, const quota_info_t &r) { | |
335 | return memcmp(&l, &r, sizeof(l)) == 0; | |
336 | } | |
337 | ||
338 | ostream& operator<<(ostream &out, const quota_info_t &n); | |
339 | ||
340 | namespace std { | |
341 | template<> struct hash<vinodeno_t> { | |
342 | size_t operator()(const vinodeno_t &vino) const { | |
343 | hash<inodeno_t> H; | |
344 | hash<uint64_t> I; | |
345 | return H(vino.ino) ^ I(vino.snapid); | |
346 | } | |
347 | }; | |
9f95a23c | 348 | } |
7c673cae FG |
349 | |
350 | inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { | |
351 | out << vino.ino; | |
352 | if (vino.snapid == CEPH_NOSNAP) | |
353 | out << ".head"; | |
354 | else if (vino.snapid) | |
355 | out << '.' << vino.snapid; | |
356 | return out; | |
357 | } | |
358 | ||
7c673cae FG |
359 | struct client_writeable_range_t { |
360 | struct byte_range_t { | |
94b18763 | 361 | uint64_t first = 0, last = 0; // interval client can write to |
7c673cae FG |
362 | }; |
363 | ||
7c673cae | 364 | void encode(bufferlist &bl) const; |
11fdf7f2 | 365 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 366 | void dump(Formatter *f) const; |
94b18763 | 367 | static void generate_test_instances(std::list<client_writeable_range_t*>& ls); |
9f95a23c TL |
368 | |
369 | byte_range_t range; | |
370 | snapid_t follows = 0; // aka "data+metadata flushed thru" | |
7c673cae FG |
371 | }; |
372 | ||
11fdf7f2 TL |
373 | inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::const_iterator& bl) { |
374 | decode(range.first, bl); | |
375 | decode(range.last, bl); | |
7c673cae FG |
376 | } |
377 | ||
378 | WRITE_CLASS_ENCODER(client_writeable_range_t) | |
379 | ||
380 | std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); | |
381 | ||
382 | inline bool operator==(const client_writeable_range_t& l, | |
383 | const client_writeable_range_t& r) { | |
384 | return l.range.first == r.range.first && l.range.last == r.range.last && | |
385 | l.follows == r.follows; | |
386 | } | |
387 | ||
388 | struct inline_data_t { | |
7c673cae | 389 | public: |
94b18763 | 390 | inline_data_t() {} |
7c673cae FG |
391 | inline_data_t(const inline_data_t& o) : version(o.version) { |
392 | if (o.blp) | |
393 | get_data() = *o.blp; | |
394 | } | |
395 | inline_data_t& operator=(const inline_data_t& o) { | |
396 | version = o.version; | |
397 | if (o.blp) | |
398 | get_data() = *o.blp; | |
399 | else | |
400 | free_data(); | |
401 | return *this; | |
402 | } | |
9f95a23c TL |
403 | |
404 | void free_data() { | |
405 | blp.reset(); | |
406 | } | |
407 | bufferlist& get_data() { | |
408 | if (!blp) | |
409 | blp.reset(new bufferlist); | |
410 | return *blp; | |
411 | } | |
412 | size_t length() const { return blp ? blp->length() : 0; } | |
413 | ||
7c673cae FG |
414 | bool operator==(const inline_data_t& o) const { |
415 | return length() == o.length() && | |
416 | (length() == 0 || | |
417 | (*const_cast<bufferlist*>(blp.get()) == *const_cast<bufferlist*>(o.blp.get()))); | |
418 | } | |
419 | bool operator!=(const inline_data_t& o) const { | |
420 | return !(*this == o); | |
421 | } | |
422 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 423 | void decode(bufferlist::const_iterator& bl); |
9f95a23c TL |
424 | |
425 | version_t version = 1; | |
426 | ||
427 | private: | |
428 | std::unique_ptr<bufferlist> blp; | |
7c673cae FG |
429 | }; |
430 | WRITE_CLASS_ENCODER(inline_data_t) | |
431 | ||
432 | enum { | |
433 | DAMAGE_STATS, // statistics (dirstat, size, etc) | |
434 | DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) | |
435 | DAMAGE_FRAGTREE // fragtree -- repair by searching | |
436 | }; | |
437 | typedef uint32_t damage_flags_t; | |
438 | ||
94b18763 | 439 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
440 | struct inode_t { |
441 | /** | |
442 | * *************** | |
443 | * Do not forget to add any new fields to the compare() function. | |
444 | * *************** | |
445 | */ | |
94b18763 | 446 | using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; |
94b18763 FG |
447 | |
448 | inode_t() | |
449 | { | |
7c673cae | 450 | clear_layout(); |
7c673cae FG |
451 | } |
452 | ||
453 | // file type | |
454 | bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } | |
455 | bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } | |
456 | bool is_file() const { return (mode & S_IFMT) == S_IFREG; } | |
457 | ||
458 | bool is_truncating() const { return (truncate_pending > 0); } | |
459 | void truncate(uint64_t old_size, uint64_t new_size) { | |
11fdf7f2 | 460 | ceph_assert(new_size < old_size); |
7c673cae FG |
461 | if (old_size > max_size_ever) |
462 | max_size_ever = old_size; | |
463 | truncate_from = old_size; | |
464 | size = new_size; | |
465 | rstat.rbytes = new_size; | |
466 | truncate_size = size; | |
467 | truncate_seq++; | |
468 | truncate_pending++; | |
469 | } | |
470 | ||
471 | bool has_layout() const { | |
472 | return layout != file_layout_t(); | |
473 | } | |
474 | ||
475 | void clear_layout() { | |
476 | layout = file_layout_t(); | |
477 | } | |
478 | ||
479 | uint64_t get_layout_size_increment() const { | |
480 | return layout.get_period(); | |
481 | } | |
482 | ||
483 | bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } | |
484 | ||
485 | uint64_t get_max_size() const { | |
486 | uint64_t max = 0; | |
487 | for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); | |
488 | p != client_ranges.end(); | |
489 | ++p) | |
490 | if (p->second.range.last > max) | |
491 | max = p->second.range.last; | |
492 | return max; | |
493 | } | |
494 | void set_max_size(uint64_t new_max) { | |
495 | if (new_max == 0) { | |
496 | client_ranges.clear(); | |
497 | } else { | |
498 | for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); | |
499 | p != client_ranges.end(); | |
500 | ++p) | |
501 | p->second.range.last = new_max; | |
502 | } | |
503 | } | |
504 | ||
505 | void trim_client_ranges(snapid_t last) { | |
506 | std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); | |
507 | while (p != client_ranges.end()) { | |
508 | if (p->second.follows >= last) | |
509 | client_ranges.erase(p++); | |
510 | else | |
511 | ++p; | |
512 | } | |
513 | } | |
514 | ||
515 | bool is_backtrace_updated() const { | |
516 | return backtrace_version == version; | |
517 | } | |
518 | void update_backtrace(version_t pv=0) { | |
519 | backtrace_version = pv ? pv : version; | |
520 | } | |
521 | ||
522 | void add_old_pool(int64_t l) { | |
523 | backtrace_version = version; | |
524 | old_pools.insert(l); | |
525 | } | |
526 | ||
527 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 528 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 529 | void dump(Formatter *f) const; |
94b18763 | 530 | static void generate_test_instances(std::list<inode_t*>& ls); |
7c673cae FG |
531 | /** |
532 | * Compare this inode_t with another that represent *the same inode* | |
533 | * at different points in time. | |
534 | * @pre The inodes are the same ino | |
535 | * | |
536 | * @param other The inode_t to compare ourselves with | |
537 | * @param divergent A bool pointer which will be set to true | |
538 | * if the values are different in a way that can't be explained | |
539 | * by one being a newer version than the other. | |
540 | * | |
541 | * @returns 1 if we are newer than the other, 0 if equal, -1 if older. | |
542 | */ | |
543 | int compare(const inode_t &other, bool *divergent) const; | |
9f95a23c TL |
544 | |
545 | // base (immutable) | |
546 | inodeno_t ino = 0; | |
547 | uint32_t rdev = 0; // if special file | |
548 | ||
549 | // affected by any inode change... | |
550 | utime_t ctime; // inode change time | |
551 | utime_t btime; // birth time | |
552 | ||
553 | // perm (namespace permissions) | |
554 | uint32_t mode = 0; | |
555 | uid_t uid = 0; | |
556 | gid_t gid = 0; | |
557 | ||
558 | // nlink | |
559 | int32_t nlink = 0; | |
560 | ||
561 | // file (data access) | |
562 | ceph_dir_layout dir_layout = {}; // [dir only] | |
563 | file_layout_t layout; | |
564 | compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; | |
565 | uint64_t size = 0; // on directory, # dentries | |
566 | uint64_t max_size_ever = 0; // max size the file has ever been | |
567 | uint32_t truncate_seq = 0; | |
568 | uint64_t truncate_size = 0, truncate_from = 0; | |
569 | uint32_t truncate_pending = 0; | |
570 | utime_t mtime; // file data modify time. | |
571 | utime_t atime; // file data access time. | |
572 | uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) | |
573 | inline_data_t inline_data; // FIXME check | |
574 | ||
575 | // change attribute | |
576 | uint64_t change_attr = 0; | |
577 | ||
578 | client_range_map client_ranges; // client(s) can write to these ranges | |
579 | ||
580 | // dirfrag, recursive accountin | |
581 | frag_info_t dirstat; // protected by my filelock | |
582 | nest_info_t rstat; // protected by my nestlock | |
583 | nest_info_t accounted_rstat; // protected by parent's nestlock | |
584 | ||
585 | quota_info_t quota; | |
586 | ||
587 | mds_rank_t export_pin = MDS_RANK_NONE; | |
588 | ||
f6b5b4d7 TL |
589 | double export_ephemeral_random_pin = 0; |
590 | bool export_ephemeral_distributed_pin = false; | |
591 | ||
9f95a23c TL |
592 | // special stuff |
593 | version_t version = 0; // auth only | |
594 | version_t file_data_version = 0; // auth only | |
595 | version_t xattr_version = 0; | |
596 | ||
597 | utime_t last_scrub_stamp; // start time of last complete scrub | |
598 | version_t last_scrub_version = 0;// (parent) start version of last complete scrub | |
599 | ||
600 | version_t backtrace_version = 0; | |
601 | ||
602 | snapid_t oldest_snap; | |
603 | ||
604 | std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink | |
605 | ||
7c673cae FG |
606 | private: |
607 | bool older_is_consistent(const inode_t &other) const; | |
608 | }; | |
7c673cae | 609 | |
94b18763 FG |
610 | // These methods may be moved back to mdstypes.cc when we have pmr |
611 | template<template<typename> class Allocator> | |
612 | void inode_t<Allocator>::encode(bufferlist &bl, uint64_t features) const | |
613 | { | |
f6b5b4d7 | 614 | ENCODE_START(16, 6, bl); |
94b18763 | 615 | |
11fdf7f2 TL |
616 | encode(ino, bl); |
617 | encode(rdev, bl); | |
618 | encode(ctime, bl); | |
94b18763 | 619 | |
11fdf7f2 TL |
620 | encode(mode, bl); |
621 | encode(uid, bl); | |
622 | encode(gid, bl); | |
94b18763 | 623 | |
11fdf7f2 | 624 | encode(nlink, bl); |
94b18763 FG |
625 | { |
626 | // removed field | |
627 | bool anchored = 0; | |
11fdf7f2 | 628 | encode(anchored, bl); |
94b18763 FG |
629 | } |
630 | ||
11fdf7f2 TL |
631 | encode(dir_layout, bl); |
632 | encode(layout, bl, features); | |
633 | encode(size, bl); | |
634 | encode(truncate_seq, bl); | |
635 | encode(truncate_size, bl); | |
636 | encode(truncate_from, bl); | |
637 | encode(truncate_pending, bl); | |
638 | encode(mtime, bl); | |
639 | encode(atime, bl); | |
640 | encode(time_warp_seq, bl); | |
641 | encode(client_ranges, bl); | |
94b18763 | 642 | |
11fdf7f2 TL |
643 | encode(dirstat, bl); |
644 | encode(rstat, bl); | |
645 | encode(accounted_rstat, bl); | |
94b18763 | 646 | |
11fdf7f2 TL |
647 | encode(version, bl); |
648 | encode(file_data_version, bl); | |
649 | encode(xattr_version, bl); | |
650 | encode(backtrace_version, bl); | |
651 | encode(old_pools, bl); | |
652 | encode(max_size_ever, bl); | |
653 | encode(inline_data, bl); | |
654 | encode(quota, bl); | |
94b18763 | 655 | |
11fdf7f2 | 656 | encode(stray_prior_path, bl); |
94b18763 | 657 | |
11fdf7f2 TL |
658 | encode(last_scrub_version, bl); |
659 | encode(last_scrub_stamp, bl); | |
94b18763 | 660 | |
11fdf7f2 TL |
661 | encode(btime, bl); |
662 | encode(change_attr, bl); | |
94b18763 | 663 | |
11fdf7f2 | 664 | encode(export_pin, bl); |
94b18763 | 665 | |
f6b5b4d7 TL |
666 | encode(export_ephemeral_random_pin, bl); |
667 | encode(export_ephemeral_distributed_pin, bl); | |
668 | ||
94b18763 FG |
669 | ENCODE_FINISH(bl); |
670 | } | |
671 | ||
672 | template<template<typename> class Allocator> | |
11fdf7f2 | 673 | void inode_t<Allocator>::decode(bufferlist::const_iterator &p) |
94b18763 | 674 | { |
f6b5b4d7 | 675 | DECODE_START_LEGACY_COMPAT_LEN(16, 6, 6, p); |
94b18763 | 676 | |
11fdf7f2 TL |
677 | decode(ino, p); |
678 | decode(rdev, p); | |
679 | decode(ctime, p); | |
94b18763 | 680 | |
11fdf7f2 TL |
681 | decode(mode, p); |
682 | decode(uid, p); | |
683 | decode(gid, p); | |
94b18763 | 684 | |
11fdf7f2 | 685 | decode(nlink, p); |
94b18763 FG |
686 | { |
687 | bool anchored; | |
11fdf7f2 | 688 | decode(anchored, p); |
94b18763 FG |
689 | } |
690 | ||
691 | if (struct_v >= 4) | |
11fdf7f2 | 692 | decode(dir_layout, p); |
92f5a8d4 TL |
693 | else { |
694 | // FIPS zeroization audit 20191117: this memset is not security related. | |
94b18763 | 695 | memset(&dir_layout, 0, sizeof(dir_layout)); |
92f5a8d4 | 696 | } |
11fdf7f2 TL |
697 | decode(layout, p); |
698 | decode(size, p); | |
699 | decode(truncate_seq, p); | |
700 | decode(truncate_size, p); | |
701 | decode(truncate_from, p); | |
94b18763 | 702 | if (struct_v >= 5) |
11fdf7f2 | 703 | decode(truncate_pending, p); |
94b18763 FG |
704 | else |
705 | truncate_pending = 0; | |
11fdf7f2 TL |
706 | decode(mtime, p); |
707 | decode(atime, p); | |
708 | decode(time_warp_seq, p); | |
94b18763 | 709 | if (struct_v >= 3) { |
11fdf7f2 | 710 | decode(client_ranges, p); |
94b18763 FG |
711 | } else { |
712 | map<client_t, client_writeable_range_t::byte_range_t> m; | |
11fdf7f2 | 713 | decode(m, p); |
94b18763 FG |
714 | for (map<client_t, client_writeable_range_t::byte_range_t>::iterator |
715 | q = m.begin(); q != m.end(); ++q) | |
716 | client_ranges[q->first].range = q->second; | |
717 | } | |
718 | ||
11fdf7f2 TL |
719 | decode(dirstat, p); |
720 | decode(rstat, p); | |
721 | decode(accounted_rstat, p); | |
94b18763 | 722 | |
11fdf7f2 TL |
723 | decode(version, p); |
724 | decode(file_data_version, p); | |
725 | decode(xattr_version, p); | |
94b18763 | 726 | if (struct_v >= 2) |
11fdf7f2 | 727 | decode(backtrace_version, p); |
94b18763 | 728 | if (struct_v >= 7) |
11fdf7f2 | 729 | decode(old_pools, p); |
94b18763 | 730 | if (struct_v >= 8) |
11fdf7f2 | 731 | decode(max_size_ever, p); |
94b18763 | 732 | if (struct_v >= 9) { |
11fdf7f2 | 733 | decode(inline_data, p); |
94b18763 FG |
734 | } else { |
735 | inline_data.version = CEPH_INLINE_NONE; | |
736 | } | |
737 | if (struct_v < 10) | |
738 | backtrace_version = 0; // force update backtrace | |
739 | if (struct_v >= 11) | |
11fdf7f2 | 740 | decode(quota, p); |
94b18763 FG |
741 | |
742 | if (struct_v >= 12) { | |
743 | std::string tmp; | |
11fdf7f2 TL |
744 | decode(tmp, p); |
745 | stray_prior_path = std::string_view(tmp); | |
94b18763 FG |
746 | } |
747 | ||
748 | if (struct_v >= 13) { | |
11fdf7f2 TL |
749 | decode(last_scrub_version, p); |
750 | decode(last_scrub_stamp, p); | |
94b18763 FG |
751 | } |
752 | if (struct_v >= 14) { | |
11fdf7f2 TL |
753 | decode(btime, p); |
754 | decode(change_attr, p); | |
94b18763 FG |
755 | } else { |
756 | btime = utime_t(); | |
757 | change_attr = 0; | |
758 | } | |
759 | ||
760 | if (struct_v >= 15) { | |
11fdf7f2 | 761 | decode(export_pin, p); |
94b18763 FG |
762 | } else { |
763 | export_pin = MDS_RANK_NONE; | |
764 | } | |
765 | ||
f6b5b4d7 TL |
766 | if (struct_v >= 16) { |
767 | decode(export_ephemeral_random_pin, p); | |
768 | decode(export_ephemeral_distributed_pin, p); | |
769 | } else { | |
770 | export_ephemeral_random_pin = 0; | |
771 | export_ephemeral_distributed_pin = false; | |
772 | } | |
773 | ||
94b18763 FG |
774 | DECODE_FINISH(p); |
775 | } | |
776 | ||
777 | template<template<typename> class Allocator> | |
778 | void inode_t<Allocator>::dump(Formatter *f) const | |
779 | { | |
780 | f->dump_unsigned("ino", ino); | |
781 | f->dump_unsigned("rdev", rdev); | |
782 | f->dump_stream("ctime") << ctime; | |
783 | f->dump_stream("btime") << btime; | |
784 | f->dump_unsigned("mode", mode); | |
785 | f->dump_unsigned("uid", uid); | |
786 | f->dump_unsigned("gid", gid); | |
787 | f->dump_unsigned("nlink", nlink); | |
788 | ||
789 | f->open_object_section("dir_layout"); | |
790 | ::dump(dir_layout, f); | |
791 | f->close_section(); | |
792 | ||
793 | f->dump_object("layout", layout); | |
794 | ||
795 | f->open_array_section("old_pools"); | |
796 | for (const auto &p : old_pools) { | |
797 | f->dump_int("pool", p); | |
798 | } | |
799 | f->close_section(); | |
800 | ||
801 | f->dump_unsigned("size", size); | |
802 | f->dump_unsigned("truncate_seq", truncate_seq); | |
803 | f->dump_unsigned("truncate_size", truncate_size); | |
804 | f->dump_unsigned("truncate_from", truncate_from); | |
805 | f->dump_unsigned("truncate_pending", truncate_pending); | |
806 | f->dump_stream("mtime") << mtime; | |
807 | f->dump_stream("atime") << atime; | |
808 | f->dump_unsigned("time_warp_seq", time_warp_seq); | |
809 | f->dump_unsigned("change_attr", change_attr); | |
810 | f->dump_int("export_pin", export_pin); | |
f6b5b4d7 TL |
811 | f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin); |
812 | f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin); | |
94b18763 FG |
813 | |
814 | f->open_array_section("client_ranges"); | |
815 | for (const auto &p : client_ranges) { | |
816 | f->open_object_section("client"); | |
817 | f->dump_unsigned("client", p.first.v); | |
818 | p.second.dump(f); | |
819 | f->close_section(); | |
820 | } | |
821 | f->close_section(); | |
822 | ||
823 | f->open_object_section("dirstat"); | |
824 | dirstat.dump(f); | |
825 | f->close_section(); | |
826 | ||
827 | f->open_object_section("rstat"); | |
828 | rstat.dump(f); | |
829 | f->close_section(); | |
830 | ||
831 | f->open_object_section("accounted_rstat"); | |
832 | accounted_rstat.dump(f); | |
833 | f->close_section(); | |
834 | ||
835 | f->dump_unsigned("version", version); | |
836 | f->dump_unsigned("file_data_version", file_data_version); | |
837 | f->dump_unsigned("xattr_version", xattr_version); | |
838 | f->dump_unsigned("backtrace_version", backtrace_version); | |
839 | ||
840 | f->dump_string("stray_prior_path", stray_prior_path); | |
9f95a23c TL |
841 | f->dump_unsigned("max_size_ever", max_size_ever); |
842 | ||
843 | f->open_object_section("quota"); | |
844 | quota.dump(f); | |
845 | f->close_section(); | |
846 | ||
847 | f->dump_stream("last_scrub_stamp") << last_scrub_stamp; | |
848 | f->dump_unsigned("last_scrub_version", last_scrub_version); | |
94b18763 FG |
849 | } |
850 | ||
851 | template<template<typename> class Allocator> | |
9f95a23c | 852 | void inode_t<Allocator>::generate_test_instances(std::list<inode_t*>& ls) |
94b18763 FG |
853 | { |
854 | ls.push_back(new inode_t<Allocator>); | |
855 | ls.push_back(new inode_t<Allocator>); | |
856 | ls.back()->ino = 1; | |
857 | // i am lazy. | |
858 | } | |
859 | ||
860 | template<template<typename> class Allocator> | |
861 | int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const | |
862 | { | |
11fdf7f2 | 863 | ceph_assert(ino == other.ino); |
94b18763 FG |
864 | *divergent = false; |
865 | if (version == other.version) { | |
866 | if (rdev != other.rdev || | |
867 | ctime != other.ctime || | |
868 | btime != other.btime || | |
869 | mode != other.mode || | |
870 | uid != other.uid || | |
871 | gid != other.gid || | |
872 | nlink != other.nlink || | |
873 | memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || | |
874 | layout != other.layout || | |
875 | old_pools != other.old_pools || | |
876 | size != other.size || | |
877 | max_size_ever != other.max_size_ever || | |
878 | truncate_seq != other.truncate_seq || | |
879 | truncate_size != other.truncate_size || | |
880 | truncate_from != other.truncate_from || | |
881 | truncate_pending != other.truncate_pending || | |
882 | change_attr != other.change_attr || | |
883 | mtime != other.mtime || | |
884 | atime != other.atime || | |
885 | time_warp_seq != other.time_warp_seq || | |
886 | inline_data != other.inline_data || | |
887 | client_ranges != other.client_ranges || | |
888 | !(dirstat == other.dirstat) || | |
889 | !(rstat == other.rstat) || | |
890 | !(accounted_rstat == other.accounted_rstat) || | |
891 | file_data_version != other.file_data_version || | |
892 | xattr_version != other.xattr_version || | |
893 | backtrace_version != other.backtrace_version) { | |
894 | *divergent = true; | |
895 | } | |
896 | return 0; | |
897 | } else if (version > other.version) { | |
898 | *divergent = !older_is_consistent(other); | |
899 | return 1; | |
900 | } else { | |
11fdf7f2 | 901 | ceph_assert(version < other.version); |
94b18763 FG |
902 | *divergent = !other.older_is_consistent(*this); |
903 | return -1; | |
904 | } | |
905 | } | |
906 | ||
907 | template<template<typename> class Allocator> | |
908 | bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const | |
909 | { | |
910 | if (max_size_ever < other.max_size_ever || | |
911 | truncate_seq < other.truncate_seq || | |
912 | time_warp_seq < other.time_warp_seq || | |
913 | inline_data.version < other.inline_data.version || | |
914 | dirstat.version < other.dirstat.version || | |
915 | rstat.version < other.rstat.version || | |
916 | accounted_rstat.version < other.accounted_rstat.version || | |
917 | file_data_version < other.file_data_version || | |
918 | xattr_version < other.xattr_version || | |
919 | backtrace_version < other.backtrace_version) { | |
920 | return false; | |
921 | } | |
922 | return true; | |
923 | } | |
924 | ||
925 | template<template<typename> class Allocator> | |
926 | inline void encode(const inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
927 | { | |
928 | ENCODE_DUMP_PRE(); | |
929 | c.encode(bl, features); | |
930 | ENCODE_DUMP_POST(cl); | |
931 | } | |
932 | template<template<typename> class Allocator> | |
11fdf7f2 | 933 | inline void decode(inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
934 | { |
935 | c.decode(p); | |
936 | } | |
937 | ||
938 | template<template<typename> class Allocator> | |
939 | using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>; | |
940 | ||
941 | template<template<typename> class Allocator> | |
942 | using xattr_map = compact_map<alloc_string<Allocator>, bufferptr, std::less<alloc_string<Allocator>>, Allocator<std::pair<const alloc_string<Allocator>, bufferptr>>>; // FIXME bufferptr not in mempool | |
7c673cae | 943 | |
e306af50 TL |
944 | template<template<typename> class Allocator> |
945 | inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p) | |
946 | { | |
947 | __u32 n; | |
948 | decode(n, p); | |
949 | while (n-- > 0) { | |
950 | alloc_string<Allocator> key; | |
951 | decode(key, p); | |
952 | __u32 len; | |
953 | decode(len, p); | |
954 | p.copy_deep(len, xattrs[key]); | |
955 | } | |
956 | } | |
957 | ||
94b18763 | 958 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
959 | struct old_inode_t { |
960 | snapid_t first; | |
94b18763 FG |
961 | inode_t<Allocator> inode; |
962 | xattr_map<Allocator> xattrs; | |
7c673cae FG |
963 | |
964 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 965 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 966 | void dump(Formatter *f) const; |
94b18763 | 967 | static void generate_test_instances(std::list<old_inode_t*>& ls); |
7c673cae | 968 | }; |
94b18763 FG |
969 | |
970 | // These methods may be moved back to mdstypes.cc when we have pmr | |
971 | template<template<typename> class Allocator> | |
972 | void old_inode_t<Allocator>::encode(bufferlist& bl, uint64_t features) const | |
973 | { | |
974 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
975 | encode(first, bl); |
976 | encode(inode, bl, features); | |
977 | encode(xattrs, bl); | |
94b18763 FG |
978 | ENCODE_FINISH(bl); |
979 | } | |
980 | ||
981 | template<template<typename> class Allocator> | |
11fdf7f2 | 982 | void old_inode_t<Allocator>::decode(bufferlist::const_iterator& bl) |
94b18763 FG |
983 | { |
984 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
985 | decode(first, bl); |
986 | decode(inode, bl); | |
e306af50 | 987 | decode_noshare<Allocator>(xattrs, bl); |
94b18763 FG |
988 | DECODE_FINISH(bl); |
989 | } | |
990 | ||
991 | template<template<typename> class Allocator> | |
992 | void old_inode_t<Allocator>::dump(Formatter *f) const | |
993 | { | |
994 | f->dump_unsigned("first", first); | |
995 | inode.dump(f); | |
996 | f->open_object_section("xattrs"); | |
997 | for (const auto &p : xattrs) { | |
998 | std::string v(p.second.c_str(), p.second.length()); | |
999 | f->dump_string(p.first.c_str(), v); | |
1000 | } | |
1001 | f->close_section(); | |
1002 | } | |
1003 | ||
1004 | template<template<typename> class Allocator> | |
1005 | void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls) | |
1006 | { | |
1007 | ls.push_back(new old_inode_t<Allocator>); | |
1008 | ls.push_back(new old_inode_t<Allocator>); | |
1009 | ls.back()->first = 2; | |
1010 | std::list<inode_t<Allocator>*> ils; | |
1011 | inode_t<Allocator>::generate_test_instances(ils); | |
1012 | ls.back()->inode = *ils.back(); | |
1013 | ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); | |
1014 | ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); | |
1015 | } | |
1016 | ||
1017 | template<template<typename> class Allocator> | |
1018 | inline void encode(const old_inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
1019 | { | |
1020 | ENCODE_DUMP_PRE(); | |
1021 | c.encode(bl, features); | |
1022 | ENCODE_DUMP_POST(cl); | |
1023 | } | |
1024 | template<template<typename> class Allocator> | |
11fdf7f2 | 1025 | inline void decode(old_inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
1026 | { |
1027 | c.decode(p); | |
1028 | } | |
7c673cae | 1029 | |
7c673cae FG |
1030 | /* |
1031 | * like an inode, but for a dir frag | |
1032 | */ | |
1033 | struct fnode_t { | |
9f95a23c TL |
1034 | void encode(bufferlist &bl) const; |
1035 | void decode(bufferlist::const_iterator& bl); | |
1036 | void dump(Formatter *f) const; | |
1037 | static void generate_test_instances(std::list<fnode_t*>& ls); | |
1038 | ||
94b18763 | 1039 | version_t version = 0; |
7c673cae FG |
1040 | snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru |
1041 | frag_info_t fragstat, accounted_fragstat; | |
1042 | nest_info_t rstat, accounted_rstat; | |
94b18763 | 1043 | damage_flags_t damage_flags = 0; |
7c673cae FG |
1044 | |
1045 | // we know we and all our descendants have been scrubbed since this version | |
94b18763 | 1046 | version_t recursive_scrub_version = 0; |
7c673cae FG |
1047 | utime_t recursive_scrub_stamp; |
1048 | // version at which we last scrubbed our personal data structures | |
94b18763 | 1049 | version_t localized_scrub_version = 0; |
7c673cae | 1050 | utime_t localized_scrub_stamp; |
7c673cae FG |
1051 | }; |
1052 | WRITE_CLASS_ENCODER(fnode_t) | |
1053 | ||
1054 | ||
1055 | struct old_rstat_t { | |
7c673cae | 1056 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1057 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1058 | void dump(Formatter *f) const; |
9f95a23c TL |
1059 | static void generate_test_instances(std::list<old_rstat_t*>& ls); |
1060 | ||
1061 | snapid_t first; | |
1062 | nest_info_t rstat, accounted_rstat; | |
7c673cae FG |
1063 | }; |
1064 | WRITE_CLASS_ENCODER(old_rstat_t) | |
1065 | ||
1066 | inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { | |
1067 | return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; | |
1068 | } | |
1069 | ||
11fdf7f2 TL |
1070 | class feature_bitset_t { |
1071 | public: | |
1072 | typedef uint64_t block_type; | |
1073 | static const size_t bits_per_block = sizeof(block_type) * 8; | |
1074 | ||
1075 | feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {} | |
1076 | feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {} | |
1077 | feature_bitset_t(unsigned long value = 0); | |
1078 | feature_bitset_t(const vector<size_t>& array); | |
1079 | feature_bitset_t& operator=(const feature_bitset_t& other) { | |
1080 | _vec = other._vec; | |
1081 | return *this; | |
1082 | } | |
1083 | feature_bitset_t& operator=(feature_bitset_t&& other) { | |
1084 | _vec = std::move(other._vec); | |
1085 | return *this; | |
1086 | } | |
9f95a23c | 1087 | feature_bitset_t& operator-=(const feature_bitset_t& other); |
11fdf7f2 | 1088 | bool empty() const { |
9f95a23c | 1089 | //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty" |
11fdf7f2 TL |
1090 | for (auto& v : _vec) { |
1091 | if (v) | |
1092 | return false; | |
1093 | } | |
1094 | return true; | |
1095 | } | |
1096 | bool test(size_t bit) const { | |
1097 | if (bit >= bits_per_block * _vec.size()) | |
1098 | return false; | |
1099 | return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block)); | |
1100 | } | |
1101 | void clear() { | |
1102 | _vec.clear(); | |
1103 | } | |
11fdf7f2 TL |
1104 | void encode(bufferlist& bl) const; |
1105 | void decode(bufferlist::const_iterator &p); | |
9f95a23c | 1106 | void dump(Formatter *f) const; |
11fdf7f2 TL |
1107 | void print(ostream& out) const; |
1108 | private: | |
1109 | vector<block_type> _vec; | |
1110 | }; | |
1111 | WRITE_CLASS_ENCODER(feature_bitset_t) | |
1112 | ||
1113 | inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) { | |
1114 | s.print(out); | |
1115 | return out; | |
1116 | } | |
1117 | ||
9f95a23c TL |
1118 | struct metric_spec_t { |
1119 | metric_spec_t() {} | |
1120 | metric_spec_t(const metric_spec_t& other) : | |
1121 | metric_flags(other.metric_flags) {} | |
1122 | metric_spec_t(metric_spec_t&& other) : | |
1123 | metric_flags(std::move(other.metric_flags)) {} | |
1124 | metric_spec_t(const feature_bitset_t& mf) : | |
1125 | metric_flags(mf) {} | |
1126 | metric_spec_t(feature_bitset_t&& mf) : | |
1127 | metric_flags(std::move(mf)) {} | |
1128 | ||
1129 | metric_spec_t& operator=(const metric_spec_t& other) { | |
1130 | metric_flags = other.metric_flags; | |
1131 | return *this; | |
1132 | } | |
1133 | metric_spec_t& operator=(metric_spec_t&& other) { | |
1134 | metric_flags = std::move(other.metric_flags); | |
1135 | return *this; | |
1136 | } | |
1137 | ||
1138 | bool empty() const { | |
1139 | return metric_flags.empty(); | |
1140 | } | |
1141 | ||
1142 | void clear() { | |
1143 | metric_flags.clear(); | |
1144 | } | |
1145 | ||
1146 | void encode(bufferlist& bl) const; | |
1147 | void decode(bufferlist::const_iterator& p); | |
1148 | void dump(Formatter *f) const; | |
1149 | void print(ostream& out) const; | |
1150 | ||
1151 | // set of metrics that a client is capable of forwarding | |
1152 | feature_bitset_t metric_flags; | |
1153 | }; | |
1154 | WRITE_CLASS_ENCODER(metric_spec_t) | |
1155 | ||
1156 | inline std::ostream& operator<<(std::ostream& out, const metric_spec_t& mst) { | |
1157 | mst.print(out); | |
1158 | return out; | |
1159 | } | |
1160 | ||
11fdf7f2 TL |
1161 | /* |
1162 | * client_metadata_t | |
1163 | */ | |
1164 | struct client_metadata_t { | |
1165 | using kv_map_t = std::map<std::string,std::string>; | |
1166 | using iterator = kv_map_t::const_iterator; | |
1167 | ||
11fdf7f2 | 1168 | client_metadata_t() {} |
9f95a23c TL |
1169 | client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f, const metric_spec_t &mst) : |
1170 | kv_map(kv), | |
1171 | features(f), | |
1172 | metric_spec(mst) {} | |
11fdf7f2 TL |
1173 | client_metadata_t& operator=(const client_metadata_t& other) { |
1174 | kv_map = other.kv_map; | |
1175 | features = other.features; | |
9f95a23c | 1176 | metric_spec = other.metric_spec; |
11fdf7f2 TL |
1177 | return *this; |
1178 | } | |
1179 | ||
9f95a23c | 1180 | bool empty() const { return kv_map.empty() && features.empty() && metric_spec.empty(); } |
11fdf7f2 TL |
1181 | iterator find(const std::string& key) const { return kv_map.find(key); } |
1182 | iterator begin() const { return kv_map.begin(); } | |
1183 | iterator end() const { return kv_map.end(); } | |
92f5a8d4 | 1184 | void erase(iterator it) { kv_map.erase(it); } |
11fdf7f2 TL |
1185 | std::string& operator[](const std::string& key) { return kv_map[key]; } |
1186 | void merge(const client_metadata_t& other) { | |
1187 | kv_map.insert(other.kv_map.begin(), other.kv_map.end()); | |
1188 | features = other.features; | |
9f95a23c | 1189 | metric_spec = other.metric_spec; |
11fdf7f2 TL |
1190 | } |
1191 | void clear() { | |
1192 | kv_map.clear(); | |
1193 | features.clear(); | |
9f95a23c | 1194 | metric_spec.clear(); |
11fdf7f2 TL |
1195 | } |
1196 | ||
1197 | void encode(bufferlist& bl) const; | |
1198 | void decode(bufferlist::const_iterator& p); | |
1199 | void dump(Formatter *f) const; | |
9f95a23c TL |
1200 | |
1201 | kv_map_t kv_map; | |
1202 | feature_bitset_t features; | |
1203 | metric_spec_t metric_spec; | |
11fdf7f2 TL |
1204 | }; |
1205 | WRITE_CLASS_ENCODER(client_metadata_t) | |
7c673cae FG |
1206 | |
1207 | /* | |
9f95a23c | 1208 | * session_info_t - durable part of a Session |
7c673cae | 1209 | */ |
7c673cae | 1210 | struct session_info_t { |
7c673cae | 1211 | client_t get_client() const { return client_t(inst.name.num()); } |
11fdf7f2 | 1212 | bool has_feature(size_t bit) const { return client_metadata.features.test(bit); } |
7c673cae FG |
1213 | const entity_name_t& get_source() const { return inst.name; } |
1214 | ||
1215 | void clear_meta() { | |
1216 | prealloc_inos.clear(); | |
1217 | used_inos.clear(); | |
1218 | completed_requests.clear(); | |
1219 | completed_flushes.clear(); | |
11fdf7f2 | 1220 | client_metadata.clear(); |
7c673cae FG |
1221 | } |
1222 | ||
1223 | void encode(bufferlist& bl, uint64_t features) const; | |
11fdf7f2 | 1224 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1225 | void dump(Formatter *f) const; |
9f95a23c TL |
1226 | static void generate_test_instances(std::list<session_info_t*>& ls); |
1227 | ||
1228 | entity_inst_t inst; | |
1229 | std::map<ceph_tid_t,inodeno_t> completed_requests; | |
1230 | interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use. | |
1231 | interval_set<inodeno_t> used_inos; // journaling use | |
1232 | client_metadata_t client_metadata; | |
1233 | std::set<ceph_tid_t> completed_flushes; | |
1234 | EntityName auth_name; | |
7c673cae FG |
1235 | }; |
1236 | WRITE_CLASS_ENCODER_FEATURES(session_info_t) | |
1237 | ||
7c673cae | 1238 | // dentries |
7c673cae | 1239 | struct dentry_key_t { |
94b18763 | 1240 | dentry_key_t() {} |
11fdf7f2 | 1241 | dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) : |
7c673cae FG |
1242 | snapid(s), name(n), hash(h) {} |
1243 | ||
94b18763 | 1244 | bool is_valid() { return name.length() || snapid; } |
7c673cae FG |
1245 | |
1246 | // encode into something that can be decoded as a string. | |
1247 | // name_ (head) or name_%x (!head) | |
1248 | void encode(bufferlist& bl) const { | |
1249 | string key; | |
1250 | encode(key); | |
11fdf7f2 TL |
1251 | using ceph::encode; |
1252 | encode(key, bl); | |
7c673cae FG |
1253 | } |
1254 | void encode(string& key) const { | |
1255 | char b[20]; | |
1256 | if (snapid != CEPH_NOSNAP) { | |
1257 | uint64_t val(snapid); | |
1258 | snprintf(b, sizeof(b), "%" PRIx64, val); | |
1259 | } else { | |
1260 | snprintf(b, sizeof(b), "%s", "head"); | |
1261 | } | |
1262 | ostringstream oss; | |
1263 | oss << name << "_" << b; | |
1264 | key = oss.str(); | |
1265 | } | |
11fdf7f2 | 1266 | static void decode_helper(bufferlist::const_iterator& bl, string& nm, snapid_t& sn) { |
7c673cae | 1267 | string key; |
11fdf7f2 | 1268 | decode(key, bl); |
7c673cae FG |
1269 | decode_helper(key, nm, sn); |
1270 | } | |
11fdf7f2 | 1271 | static void decode_helper(std::string_view key, string& nm, snapid_t& sn) { |
7c673cae | 1272 | size_t i = key.find_last_of('_'); |
11fdf7f2 TL |
1273 | ceph_assert(i != string::npos); |
1274 | if (key.compare(i+1, std::string_view::npos, "head") == 0) { | |
7c673cae FG |
1275 | // name_head |
1276 | sn = CEPH_NOSNAP; | |
1277 | } else { | |
1278 | // name_%x | |
1279 | long long unsigned x = 0; | |
94b18763 FG |
1280 | std::string x_str(key.substr(i+1)); |
1281 | sscanf(x_str.c_str(), "%llx", &x); | |
7c673cae | 1282 | sn = x; |
9f95a23c | 1283 | } |
11fdf7f2 | 1284 | nm = key.substr(0, i); |
7c673cae | 1285 | } |
9f95a23c TL |
1286 | |
1287 | snapid_t snapid = 0; | |
1288 | std::string_view name; | |
1289 | __u32 hash = 0; | |
7c673cae FG |
1290 | }; |
1291 | ||
1292 | inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k) | |
1293 | { | |
1294 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1295 | } | |
1296 | ||
1297 | inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) | |
1298 | { | |
1299 | /* | |
1300 | * order by hash, name, snap | |
1301 | */ | |
1302 | int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash); | |
1303 | if (c) | |
1304 | return c < 0; | |
94b18763 | 1305 | c = k1.name.compare(k2.name); |
7c673cae FG |
1306 | if (c) |
1307 | return c < 0; | |
1308 | return k1.snapid < k2.snapid; | |
1309 | } | |
1310 | ||
7c673cae FG |
1311 | /* |
1312 | * string_snap_t is a simple (string, snapid_t) pair | |
1313 | */ | |
1314 | struct string_snap_t { | |
7c673cae | 1315 | string_snap_t() {} |
11fdf7f2 | 1316 | string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {} |
7c673cae FG |
1317 | |
1318 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1319 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1320 | void dump(Formatter *f) const; |
9f95a23c TL |
1321 | static void generate_test_instances(std::list<string_snap_t*>& ls); |
1322 | ||
1323 | string name; | |
1324 | snapid_t snapid; | |
7c673cae FG |
1325 | }; |
1326 | WRITE_CLASS_ENCODER(string_snap_t) | |
1327 | ||
1328 | inline bool operator<(const string_snap_t& l, const string_snap_t& r) { | |
94b18763 | 1329 | int c = l.name.compare(r.name); |
7c673cae FG |
1330 | return c < 0 || (c == 0 && l.snapid < r.snapid); |
1331 | } | |
1332 | ||
1333 | inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k) | |
1334 | { | |
1335 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1336 | } | |
1337 | ||
1338 | /* | |
1339 | * mds_table_pending_t | |
1340 | * | |
9f95a23c | 1341 | * For mds's requesting any pending ops, child needs to encode the corresponding |
7c673cae FG |
1342 | * pending mutation state in the table. |
1343 | */ | |
1344 | struct mds_table_pending_t { | |
7c673cae | 1345 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1346 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1347 | void dump(Formatter *f) const; |
9f95a23c TL |
1348 | static void generate_test_instances(std::list<mds_table_pending_t*>& ls); |
1349 | ||
1350 | uint64_t reqid = 0; | |
1351 | __s32 mds = 0; | |
1352 | version_t tid = 0; | |
7c673cae FG |
1353 | }; |
1354 | WRITE_CLASS_ENCODER(mds_table_pending_t) | |
1355 | ||
7c673cae | 1356 | // requests |
7c673cae | 1357 | struct metareqid_t { |
94b18763 | 1358 | metareqid_t() {} |
7c673cae FG |
1359 | metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {} |
1360 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1361 | using ceph::encode; |
1362 | encode(name, bl); | |
1363 | encode(tid, bl); | |
7c673cae | 1364 | } |
11fdf7f2 TL |
1365 | void decode(bufferlist::const_iterator &p) { |
1366 | using ceph::decode; | |
1367 | decode(name, p); | |
1368 | decode(tid, p); | |
7c673cae | 1369 | } |
9f95a23c TL |
1370 | |
1371 | entity_name_t name; | |
1372 | uint64_t tid = 0; | |
7c673cae FG |
1373 | }; |
1374 | WRITE_CLASS_ENCODER(metareqid_t) | |
1375 | ||
1376 | inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) { | |
1377 | return out << r.name << ":" << r.tid; | |
1378 | } | |
1379 | ||
1380 | inline bool operator==(const metareqid_t& l, const metareqid_t& r) { | |
1381 | return (l.name == r.name) && (l.tid == r.tid); | |
1382 | } | |
1383 | inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { | |
1384 | return (l.name != r.name) || (l.tid != r.tid); | |
1385 | } | |
1386 | inline bool operator<(const metareqid_t& l, const metareqid_t& r) { | |
1387 | return (l.name < r.name) || | |
1388 | (l.name == r.name && l.tid < r.tid); | |
1389 | } | |
1390 | inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { | |
1391 | return (l.name < r.name) || | |
1392 | (l.name == r.name && l.tid <= r.tid); | |
1393 | } | |
1394 | inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } | |
1395 | inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } | |
1396 | ||
1397 | namespace std { | |
1398 | template<> struct hash<metareqid_t> { | |
1399 | size_t operator()(const metareqid_t &r) const { | |
1400 | hash<uint64_t> H; | |
1401 | return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); | |
1402 | } | |
1403 | }; | |
1404 | } // namespace std | |
1405 | ||
7c673cae FG |
1406 | // cap info for client reconnect |
1407 | struct cap_reconnect_t { | |
9f95a23c | 1408 | cap_reconnect_t() {} |
11fdf7f2 | 1409 | cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i, |
7c673cae FG |
1410 | inodeno_t sr, snapid_t sf, bufferlist& lb) : |
1411 | path(p) { | |
1412 | capinfo.cap_id = cap_id; | |
1413 | capinfo.wanted = w; | |
1414 | capinfo.issued = i; | |
1415 | capinfo.snaprealm = sr; | |
1416 | capinfo.pathbase = pino; | |
1417 | capinfo.flock_len = 0; | |
1418 | snap_follows = sf; | |
1419 | flockbl.claim(lb); | |
1420 | } | |
1421 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1422 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1423 | void encode_old(bufferlist& bl) const; |
11fdf7f2 | 1424 | void decode_old(bufferlist::const_iterator& bl); |
7c673cae FG |
1425 | |
1426 | void dump(Formatter *f) const; | |
9f95a23c TL |
1427 | static void generate_test_instances(std::list<cap_reconnect_t*>& ls); |
1428 | ||
1429 | string path; | |
1430 | mutable ceph_mds_cap_reconnect capinfo = {}; | |
1431 | snapid_t snap_follows = 0; | |
1432 | bufferlist flockbl; | |
7c673cae FG |
1433 | }; |
1434 | WRITE_CLASS_ENCODER(cap_reconnect_t) | |
1435 | ||
11fdf7f2 | 1436 | struct snaprealm_reconnect_t { |
9f95a23c | 1437 | snaprealm_reconnect_t() {} |
11fdf7f2 TL |
1438 | snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) { |
1439 | realm.ino = ino; | |
1440 | realm.seq = seq; | |
1441 | realm.parent = parent; | |
1442 | } | |
1443 | void encode(bufferlist& bl) const; | |
1444 | void decode(bufferlist::const_iterator& bl); | |
1445 | void encode_old(bufferlist& bl) const; | |
1446 | void decode_old(bufferlist::const_iterator& bl); | |
1447 | ||
1448 | void dump(Formatter *f) const; | |
9f95a23c TL |
1449 | static void generate_test_instances(std::list<snaprealm_reconnect_t*>& ls); |
1450 | ||
1451 | mutable ceph_mds_snaprealm_reconnect realm = {}; | |
11fdf7f2 TL |
1452 | }; |
1453 | WRITE_CLASS_ENCODER(snaprealm_reconnect_t) | |
7c673cae FG |
1454 | |
1455 | // compat for pre-FLOCK feature | |
1456 | struct old_ceph_mds_cap_reconnect { | |
eafe8130 TL |
1457 | ceph_le64 cap_id; |
1458 | ceph_le32 wanted; | |
1459 | ceph_le32 issued; | |
1460 | ceph_le64 old_size; | |
7c673cae | 1461 | struct ceph_timespec old_mtime, old_atime; |
eafe8130 TL |
1462 | ceph_le64 snaprealm; |
1463 | ceph_le64 pathbase; /* base ino for our path to this ino */ | |
7c673cae FG |
1464 | } __attribute__ ((packed)); |
1465 | WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect) | |
1466 | ||
1467 | struct old_cap_reconnect_t { | |
7c673cae FG |
1468 | const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) { |
1469 | path = n.path; | |
1470 | capinfo.cap_id = n.capinfo.cap_id; | |
1471 | capinfo.wanted = n.capinfo.wanted; | |
1472 | capinfo.issued = n.capinfo.issued; | |
1473 | capinfo.snaprealm = n.capinfo.snaprealm; | |
1474 | capinfo.pathbase = n.capinfo.pathbase; | |
1475 | return *this; | |
1476 | } | |
1477 | operator cap_reconnect_t() { | |
1478 | cap_reconnect_t n; | |
1479 | n.path = path; | |
1480 | n.capinfo.cap_id = capinfo.cap_id; | |
1481 | n.capinfo.wanted = capinfo.wanted; | |
1482 | n.capinfo.issued = capinfo.issued; | |
1483 | n.capinfo.snaprealm = capinfo.snaprealm; | |
1484 | n.capinfo.pathbase = capinfo.pathbase; | |
1485 | return n; | |
1486 | } | |
1487 | ||
1488 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1489 | using ceph::encode; |
1490 | encode(path, bl); | |
1491 | encode(capinfo, bl); | |
7c673cae | 1492 | } |
11fdf7f2 TL |
1493 | void decode(bufferlist::const_iterator& bl) { |
1494 | using ceph::decode; | |
1495 | decode(path, bl); | |
1496 | decode(capinfo, bl); | |
7c673cae | 1497 | } |
9f95a23c TL |
1498 | |
1499 | string path; | |
1500 | old_ceph_mds_cap_reconnect capinfo; | |
7c673cae FG |
1501 | }; |
1502 | WRITE_CLASS_ENCODER(old_cap_reconnect_t) | |
1503 | ||
7c673cae | 1504 | // dir frag |
7c673cae | 1505 | struct dirfrag_t { |
94b18763 | 1506 | dirfrag_t() {} |
7c673cae FG |
1507 | dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } |
1508 | ||
1509 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1510 | using ceph::encode; |
1511 | encode(ino, bl); | |
1512 | encode(frag, bl); | |
7c673cae | 1513 | } |
11fdf7f2 TL |
1514 | void decode(bufferlist::const_iterator& bl) { |
1515 | using ceph::decode; | |
1516 | decode(ino, bl); | |
1517 | decode(frag, bl); | |
7c673cae | 1518 | } |
9f95a23c TL |
1519 | |
1520 | inodeno_t ino = 0; | |
1521 | frag_t frag; | |
7c673cae FG |
1522 | }; |
1523 | WRITE_CLASS_ENCODER(dirfrag_t) | |
1524 | ||
7c673cae FG |
1525 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) { |
1526 | out << df.ino; | |
1527 | if (!df.frag.is_root()) out << "." << df.frag; | |
1528 | return out; | |
1529 | } | |
1530 | inline bool operator<(dirfrag_t l, dirfrag_t r) { | |
1531 | if (l.ino < r.ino) return true; | |
1532 | if (l.ino == r.ino && l.frag < r.frag) return true; | |
1533 | return false; | |
1534 | } | |
1535 | inline bool operator==(dirfrag_t l, dirfrag_t r) { | |
1536 | return l.ino == r.ino && l.frag == r.frag; | |
1537 | } | |
1538 | ||
1539 | namespace std { | |
1540 | template<> struct hash<dirfrag_t> { | |
1541 | size_t operator()(const dirfrag_t &df) const { | |
1542 | static rjhash<uint64_t> H; | |
1543 | static rjhash<uint32_t> I; | |
1544 | return H(df.ino) ^ I(df.frag); | |
1545 | } | |
1546 | }; | |
1547 | } // namespace std | |
1548 | ||
7c673cae | 1549 | // ================================================================ |
7c673cae FG |
1550 | #define META_POP_IRD 0 |
1551 | #define META_POP_IWR 1 | |
1552 | #define META_POP_READDIR 2 | |
1553 | #define META_POP_FETCH 3 | |
1554 | #define META_POP_STORE 4 | |
1555 | #define META_NPOP 5 | |
1556 | ||
1557 | class inode_load_vec_t { | |
7c673cae | 1558 | public: |
11fdf7f2 TL |
1559 | using time = DecayCounter::time; |
1560 | using clock = DecayCounter::clock; | |
1561 | static const size_t NUM = 2; | |
1562 | ||
1563 | inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {} | |
1564 | inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {} | |
1565 | ||
7c673cae | 1566 | DecayCounter &get(int t) { |
7c673cae FG |
1567 | return vec[t]; |
1568 | } | |
11fdf7f2 TL |
1569 | void zero() { |
1570 | for (auto &d : vec) { | |
1571 | d.reset(); | |
1572 | } | |
7c673cae FG |
1573 | } |
1574 | void encode(bufferlist &bl) const; | |
11fdf7f2 TL |
1575 | void decode(bufferlist::const_iterator& p); |
1576 | void dump(Formatter *f) const; | |
9f95a23c | 1577 | static void generate_test_instances(std::list<inode_load_vec_t*>& ls); |
11fdf7f2 TL |
1578 | |
1579 | private: | |
1580 | std::array<DecayCounter, NUM> vec; | |
7c673cae | 1581 | }; |
11fdf7f2 TL |
1582 | inline void encode(const inode_load_vec_t &c, bufferlist &bl) { |
1583 | c.encode(bl); | |
7c673cae | 1584 | } |
11fdf7f2 TL |
1585 | inline void decode(inode_load_vec_t & c, bufferlist::const_iterator &p) { |
1586 | c.decode(p); | |
7c673cae FG |
1587 | } |
1588 | ||
1589 | class dirfrag_load_vec_t { | |
1590 | public: | |
11fdf7f2 TL |
1591 | using time = DecayCounter::time; |
1592 | using clock = DecayCounter::clock; | |
1593 | static const size_t NUM = 5; | |
1594 | ||
1595 | dirfrag_load_vec_t() : | |
1596 | vec{DecayCounter(DecayRate()), | |
1597 | DecayCounter(DecayRate()), | |
1598 | DecayCounter(DecayRate()), | |
1599 | DecayCounter(DecayRate()), | |
1600 | DecayCounter(DecayRate()) | |
1601 | } | |
7c673cae | 1602 | {} |
11fdf7f2 TL |
1603 | dirfrag_load_vec_t(const DecayRate &rate) : |
1604 | vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)} | |
1605 | {} | |
1606 | ||
7c673cae FG |
1607 | void encode(bufferlist &bl) const { |
1608 | ENCODE_START(2, 2, bl); | |
94b18763 | 1609 | for (const auto &i : vec) { |
11fdf7f2 | 1610 | encode(i, bl); |
94b18763 | 1611 | } |
7c673cae FG |
1612 | ENCODE_FINISH(bl); |
1613 | } | |
11fdf7f2 | 1614 | void decode(bufferlist::const_iterator &p) { |
7c673cae | 1615 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); |
94b18763 | 1616 | for (auto &i : vec) { |
11fdf7f2 | 1617 | decode(i, p); |
94b18763 | 1618 | } |
7c673cae FG |
1619 | DECODE_FINISH(p); |
1620 | } | |
7c673cae | 1621 | void dump(Formatter *f) const; |
11fdf7f2 TL |
1622 | void dump(Formatter *f, const DecayRate& rate) const; |
1623 | static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls); | |
7c673cae | 1624 | |
11fdf7f2 TL |
1625 | const DecayCounter &get(int t) const { |
1626 | return vec[t]; | |
7c673cae | 1627 | } |
11fdf7f2 TL |
1628 | DecayCounter &get(int t) { |
1629 | return vec[t]; | |
1630 | } | |
1631 | void adjust(double d) { | |
94b18763 | 1632 | for (auto &i : vec) { |
11fdf7f2 | 1633 | i.adjust(d); |
94b18763 | 1634 | } |
7c673cae | 1635 | } |
11fdf7f2 | 1636 | void zero() { |
94b18763 | 1637 | for (auto &i : vec) { |
11fdf7f2 | 1638 | i.reset(); |
94b18763 | 1639 | } |
7c673cae | 1640 | } |
28e407b8 | 1641 | double meta_load() const { |
7c673cae | 1642 | return |
11fdf7f2 TL |
1643 | 1*vec[META_POP_IRD].get() + |
1644 | 2*vec[META_POP_IWR].get() + | |
1645 | 1*vec[META_POP_READDIR].get() + | |
1646 | 2*vec[META_POP_FETCH].get() + | |
1647 | 4*vec[META_POP_STORE].get(); | |
7c673cae FG |
1648 | } |
1649 | ||
11fdf7f2 TL |
1650 | void add(dirfrag_load_vec_t& r) { |
1651 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1652 | vec[i].adjust(r.vec[i].get()); | |
7c673cae | 1653 | } |
11fdf7f2 TL |
1654 | void sub(dirfrag_load_vec_t& r) { |
1655 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1656 | vec[i].adjust(-r.vec[i].get()); | |
7c673cae FG |
1657 | } |
1658 | void scale(double f) { | |
11fdf7f2 | 1659 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) |
7c673cae FG |
1660 | vec[i].scale(f); |
1661 | } | |
11fdf7f2 TL |
1662 | |
1663 | private: | |
1664 | friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl); | |
1665 | std::array<DecayCounter, NUM> vec; | |
7c673cae FG |
1666 | }; |
1667 | ||
11fdf7f2 TL |
1668 | inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { |
1669 | c.encode(bl); | |
7c673cae | 1670 | } |
11fdf7f2 TL |
1671 | inline void decode(dirfrag_load_vec_t& c, bufferlist::const_iterator &p) { |
1672 | c.decode(p); | |
7c673cae FG |
1673 | } |
1674 | ||
28e407b8 | 1675 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl) |
7c673cae | 1676 | { |
11fdf7f2 TL |
1677 | std::ostringstream ss; |
1678 | ss << std::setprecision(1) << std::fixed | |
1679 | << "[pop" | |
1680 | " IRD:" << dl.vec[0] | |
1681 | << " IWR:" << dl.vec[1] | |
1682 | << " RDR:" << dl.vec[2] | |
1683 | << " FET:" << dl.vec[3] | |
1684 | << " STR:" << dl.vec[4] | |
1685 | << " *LOAD:" << dl.meta_load() << "]"; | |
1686 | return out << ss.str() << std::endl; | |
7c673cae FG |
1687 | } |
1688 | ||
7c673cae | 1689 | struct mds_load_t { |
11fdf7f2 TL |
1690 | using clock = dirfrag_load_vec_t::clock; |
1691 | using time = dirfrag_load_vec_t::time; | |
1692 | ||
7c673cae FG |
1693 | dirfrag_load_vec_t auth; |
1694 | dirfrag_load_vec_t all; | |
1695 | ||
11fdf7f2 TL |
1696 | mds_load_t() : auth(DecayRate()), all(DecayRate()) {} |
1697 | mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {} | |
1698 | ||
94b18763 FG |
1699 | double req_rate = 0.0; |
1700 | double cache_hit_rate = 0.0; | |
1701 | double queue_len = 0.0; | |
7c673cae | 1702 | |
94b18763 | 1703 | double cpu_load_avg = 0.0; |
7c673cae | 1704 | |
11fdf7f2 | 1705 | double mds_load() const; // defiend in MDBalancer.cc |
7c673cae | 1706 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1707 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1708 | void dump(Formatter *f) const; |
11fdf7f2 | 1709 | static void generate_test_instances(std::list<mds_load_t*>& ls); |
7c673cae | 1710 | }; |
11fdf7f2 TL |
1711 | inline void encode(const mds_load_t &c, bufferlist &bl) { |
1712 | c.encode(bl); | |
7c673cae | 1713 | } |
11fdf7f2 TL |
1714 | inline void decode(mds_load_t &c, bufferlist::const_iterator &p) { |
1715 | c.decode(p); | |
7c673cae FG |
1716 | } |
1717 | ||
28e407b8 | 1718 | inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load) |
7c673cae FG |
1719 | { |
1720 | return out << "mdsload<" << load.auth << "/" << load.all | |
1721 | << ", req " << load.req_rate | |
1722 | << ", hr " << load.cache_hit_rate | |
1723 | << ", qlen " << load.queue_len | |
1724 | << ", cpu " << load.cpu_load_avg | |
1725 | << ">"; | |
1726 | } | |
1727 | ||
1728 | class load_spread_t { | |
1729 | public: | |
11fdf7f2 TL |
1730 | using time = DecayCounter::time; |
1731 | using clock = DecayCounter::clock; | |
7c673cae | 1732 | static const int MAX = 4; |
7c673cae | 1733 | |
11fdf7f2 | 1734 | load_spread_t(const DecayRate &rate) : count(rate) |
9f95a23c TL |
1735 | {} |
1736 | ||
1737 | load_spread_t() = delete; | |
7c673cae | 1738 | |
11fdf7f2 | 1739 | double hit(int who) { |
7c673cae FG |
1740 | for (int i=0; i<n; i++) |
1741 | if (last[i] == who) | |
1742 | return count.get_last(); | |
1743 | ||
1744 | // we're new(ish) | |
1745 | last[p++] = who; | |
1746 | if (n < MAX) n++; | |
1747 | if (n == 1) return 0.0; | |
1748 | ||
1749 | if (p == MAX) p = 0; | |
1750 | ||
11fdf7f2 | 1751 | return count.hit(); |
7c673cae | 1752 | } |
11fdf7f2 TL |
1753 | double get() const { |
1754 | return count.get(); | |
7c673cae | 1755 | } |
7c673cae | 1756 | |
9f95a23c TL |
1757 | std::array<int, MAX> last = {-1, -1, -1, -1}; |
1758 | int p = 0, n = 0; | |
1759 | DecayCounter count; | |
1760 | }; | |
7c673cae FG |
1761 | |
1762 | // ================================================================ | |
1763 | typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t; | |
1764 | ||
1765 | // -- authority delegation -- | |
1766 | // directory authority types | |
1767 | // >= 0 is the auth mds | |
1768 | #define CDIR_AUTH_PARENT mds_rank_t(-1) // default | |
1769 | #define CDIR_AUTH_UNKNOWN mds_rank_t(-2) | |
1770 | #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN) | |
1771 | #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN) | |
1772 | //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2) | |
1773 | ||
1774 | class MDSCacheObjectInfo { | |
1775 | public: | |
9f95a23c TL |
1776 | void encode(bufferlist& bl) const; |
1777 | void decode(bufferlist::const_iterator& bl); | |
1778 | void dump(Formatter *f) const; | |
1779 | static void generate_test_instances(std::list<MDSCacheObjectInfo*>& ls); | |
1780 | ||
94b18763 | 1781 | inodeno_t ino = 0; |
7c673cae FG |
1782 | dirfrag_t dirfrag; |
1783 | string dname; | |
1784 | snapid_t snapid; | |
7c673cae FG |
1785 | }; |
1786 | ||
1787 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) { | |
1788 | if (info.ino) return out << info.ino << "." << info.snapid; | |
1789 | if (info.dname.length()) return out << info.dirfrag << "/" << info.dname | |
1790 | << " snap " << info.snapid; | |
1791 | return out << info.dirfrag; | |
1792 | } | |
1793 | ||
1794 | inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { | |
1795 | if (l.ino || r.ino) | |
1796 | return l.ino == r.ino && l.snapid == r.snapid; | |
1797 | else | |
1798 | return l.dirfrag == r.dirfrag && l.dname == r.dname; | |
1799 | } | |
1800 | WRITE_CLASS_ENCODER(MDSCacheObjectInfo) | |
1801 | ||
7c673cae FG |
1802 | // parse a map of keys/values. |
1803 | namespace qi = boost::spirit::qi; | |
1804 | ||
1805 | template <typename Iterator> | |
1806 | struct keys_and_values | |
1807 | : qi::grammar<Iterator, std::map<string, string>()> | |
1808 | { | |
1809 | keys_and_values() | |
1810 | : keys_and_values::base_type(query) | |
1811 | { | |
1812 | query = pair >> *(qi::lit(' ') >> pair); | |
1813 | pair = key >> '=' >> value; | |
1814 | key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); | |
e306af50 | 1815 | value = +qi::char_("a-zA-Z0-9-_."); |
7c673cae FG |
1816 | } |
1817 | qi::rule<Iterator, std::map<string, string>()> query; | |
1818 | qi::rule<Iterator, std::pair<string, string>()> pair; | |
1819 | qi::rule<Iterator, string()> key, value; | |
1820 | }; | |
1821 | ||
1822 | #endif |