]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_MDSTYPES_H | |
4 | #define CEPH_MDSTYPES_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <math.h> | |
9 | #include <ostream> | |
10 | #include <set> | |
11 | #include <map> | |
11fdf7f2 | 12 | #include <string_view> |
7c673cae FG |
13 | |
14 | #include "common/config.h" | |
15 | #include "common/Clock.h" | |
16 | #include "common/DecayCounter.h" | |
17 | #include "common/entity_name.h" | |
18 | ||
19 | #include "include/Context.h" | |
20 | #include "include/frag.h" | |
21 | #include "include/xlist.h" | |
22 | #include "include/interval_set.h" | |
23 | #include "include/compact_map.h" | |
24 | #include "include/compact_set.h" | |
25 | #include "include/fs_types.h" | |
26 | ||
27 | #include "inode_backtrace.h" | |
28 | ||
29 | #include <boost/spirit/include/qi.hpp> | |
30 | #include <boost/pool/pool.hpp> | |
11fdf7f2 | 31 | #include "include/ceph_assert.h" |
7c673cae FG |
32 | #include <boost/serialization/strong_typedef.hpp> |
33 | ||
34 | #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" | |
35 | ||
36 | #define MDS_PORT_CACHE 0x200 | |
37 | #define MDS_PORT_LOCKER 0x300 | |
38 | #define MDS_PORT_MIGRATOR 0x400 | |
39 | ||
40 | #define MAX_MDS 0x100 | |
41 | #define NUM_STRAY 10 | |
42 | ||
43 | #define MDS_INO_ROOT 1 | |
44 | ||
45 | // No longer created but recognised in existing filesystems | |
46 | // so that we don't try to fragment it. | |
47 | #define MDS_INO_CEPH 2 | |
48 | ||
11fdf7f2 TL |
49 | #define MDS_INO_GLOBAL_SNAPREALM 3 |
50 | ||
7c673cae FG |
51 | #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS) |
52 | #define MDS_INO_STRAY_OFFSET (6*MAX_MDS) | |
53 | ||
54 | // Locations for journal data | |
55 | #define MDS_INO_LOG_OFFSET (2*MAX_MDS) | |
56 | #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS) | |
57 | #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS) | |
58 | #define MDS_INO_PURGE_QUEUE (5*MAX_MDS) | |
59 | ||
60 | #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY)) | |
61 | ||
62 | #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i)))) | |
63 | #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x)) | |
64 | ||
65 | #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY))) | |
66 | #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS)) | |
67 | #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET)) | |
11fdf7f2 | 68 | #define MDS_INO_IS_BASE(i) ((i) == MDS_INO_ROOT || (i) == MDS_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i)) |
7c673cae FG |
69 | #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY)) |
70 | #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY) | |
71 | ||
7c673cae | 72 | typedef int32_t mds_rank_t; |
11fdf7f2 | 73 | constexpr mds_rank_t MDS_RANK_NONE = -1; |
7c673cae FG |
74 | |
75 | BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) | |
76 | extern const mds_gid_t MDS_GID_NONE; | |
11fdf7f2 TL |
77 | |
78 | typedef int32_t fs_cluster_id_t; | |
79 | constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1; | |
7c673cae | 80 | // The namespace ID of the anonymous default filesystem from legacy systems |
11fdf7f2 | 81 | constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0; |
7c673cae | 82 | |
9f95a23c TL |
83 | class mds_role_t { |
84 | public: | |
7c673cae FG |
85 | mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_) |
86 | : fscid(fscid_), rank(rank_) | |
87 | {} | |
9f95a23c TL |
88 | mds_role_t() {} |
89 | ||
90 | bool operator<(mds_role_t const &rhs) const { | |
7c673cae FG |
91 | if (fscid < rhs.fscid) { |
92 | return true; | |
93 | } else if (fscid == rhs.fscid) { | |
94 | return rank < rhs.rank; | |
95 | } else { | |
96 | return false; | |
97 | } | |
98 | } | |
99 | ||
9f95a23c | 100 | bool is_none() const { |
7c673cae FG |
101 | return (rank == MDS_RANK_NONE); |
102 | } | |
7c673cae | 103 | |
9f95a23c TL |
104 | fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE; |
105 | mds_rank_t rank = MDS_RANK_NONE; | |
106 | }; | |
107 | inline std::ostream& operator<<(std::ostream& out, const mds_role_t& role) { | |
108 | return out << role.fscid << ":" << role.rank; | |
109 | } | |
7c673cae FG |
110 | |
111 | // CAPS | |
7c673cae FG |
112 | inline string gcap_string(int cap) |
113 | { | |
114 | string s; | |
115 | if (cap & CEPH_CAP_GSHARED) s += "s"; | |
116 | if (cap & CEPH_CAP_GEXCL) s += "x"; | |
117 | if (cap & CEPH_CAP_GCACHE) s += "c"; | |
118 | if (cap & CEPH_CAP_GRD) s += "r"; | |
119 | if (cap & CEPH_CAP_GWR) s += "w"; | |
120 | if (cap & CEPH_CAP_GBUFFER) s += "b"; | |
121 | if (cap & CEPH_CAP_GWREXTEND) s += "a"; | |
122 | if (cap & CEPH_CAP_GLAZYIO) s += "l"; | |
123 | return s; | |
124 | } | |
125 | inline string ccap_string(int cap) | |
126 | { | |
127 | string s; | |
128 | if (cap & CEPH_CAP_PIN) s += "p"; | |
129 | ||
130 | int a = (cap >> CEPH_CAP_SAUTH) & 3; | |
131 | if (a) s += 'A' + gcap_string(a); | |
132 | ||
133 | a = (cap >> CEPH_CAP_SLINK) & 3; | |
134 | if (a) s += 'L' + gcap_string(a); | |
135 | ||
136 | a = (cap >> CEPH_CAP_SXATTR) & 3; | |
137 | if (a) s += 'X' + gcap_string(a); | |
138 | ||
139 | a = cap >> CEPH_CAP_SFILE; | |
140 | if (a) s += 'F' + gcap_string(a); | |
141 | ||
142 | if (s.length() == 0) | |
143 | s = "-"; | |
144 | return s; | |
145 | } | |
146 | ||
7c673cae | 147 | struct scatter_info_t { |
94b18763 | 148 | version_t version = 0; |
7c673cae FG |
149 | }; |
150 | ||
151 | struct frag_info_t : public scatter_info_t { | |
7c673cae FG |
152 | int64_t size() const { return nfiles + nsubdirs; } |
153 | ||
154 | void zero() { | |
155 | *this = frag_info_t(); | |
156 | } | |
157 | ||
158 | // *this += cur - acc; | |
159 | void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { | |
160 | if (cur.mtime > mtime) { | |
161 | mtime = cur.mtime; | |
162 | if (touched_mtime) | |
163 | *touched_mtime = true; | |
164 | } | |
165 | if (cur.change_attr > change_attr) { | |
166 | change_attr = cur.change_attr; | |
167 | if (touched_chattr) | |
168 | *touched_chattr = true; | |
169 | } | |
170 | nfiles += cur.nfiles - acc.nfiles; | |
171 | nsubdirs += cur.nsubdirs - acc.nsubdirs; | |
172 | } | |
173 | ||
174 | void add(const frag_info_t& other) { | |
175 | if (other.mtime > mtime) | |
176 | mtime = other.mtime; | |
177 | if (other.change_attr > change_attr) | |
178 | change_attr = other.change_attr; | |
179 | nfiles += other.nfiles; | |
180 | nsubdirs += other.nsubdirs; | |
181 | } | |
182 | ||
183 | bool same_sums(const frag_info_t &o) const { | |
184 | return mtime <= o.mtime && | |
185 | nfiles == o.nfiles && | |
186 | nsubdirs == o.nsubdirs; | |
187 | } | |
188 | ||
189 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 190 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 191 | void dump(Formatter *f) const; |
9f95a23c TL |
192 | static void generate_test_instances(std::list<frag_info_t*>& ls); |
193 | ||
194 | // this frag | |
195 | utime_t mtime; | |
196 | uint64_t change_attr = 0; | |
197 | int64_t nfiles = 0; // files | |
198 | int64_t nsubdirs = 0; // subdirs | |
7c673cae FG |
199 | }; |
200 | WRITE_CLASS_ENCODER(frag_info_t) | |
201 | ||
202 | inline bool operator==(const frag_info_t &l, const frag_info_t &r) { | |
203 | return memcmp(&l, &r, sizeof(l)) == 0; | |
204 | } | |
205 | inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { | |
206 | return !(l == r); | |
207 | } | |
208 | ||
209 | std::ostream& operator<<(std::ostream &out, const frag_info_t &f); | |
210 | ||
211 | ||
212 | struct nest_info_t : public scatter_info_t { | |
7c673cae FG |
213 | int64_t rsize() const { return rfiles + rsubdirs; } |
214 | ||
7c673cae FG |
215 | void zero() { |
216 | *this = nest_info_t(); | |
217 | } | |
218 | ||
219 | void sub(const nest_info_t &other) { | |
220 | add(other, -1); | |
221 | } | |
222 | void add(const nest_info_t &other, int fac=1) { | |
223 | if (other.rctime > rctime) | |
224 | rctime = other.rctime; | |
225 | rbytes += fac*other.rbytes; | |
226 | rfiles += fac*other.rfiles; | |
227 | rsubdirs += fac*other.rsubdirs; | |
11fdf7f2 | 228 | rsnaps += fac*other.rsnaps; |
7c673cae FG |
229 | } |
230 | ||
231 | // *this += cur - acc; | |
232 | void add_delta(const nest_info_t &cur, const nest_info_t &acc) { | |
233 | if (cur.rctime > rctime) | |
234 | rctime = cur.rctime; | |
235 | rbytes += cur.rbytes - acc.rbytes; | |
236 | rfiles += cur.rfiles - acc.rfiles; | |
237 | rsubdirs += cur.rsubdirs - acc.rsubdirs; | |
11fdf7f2 | 238 | rsnaps += cur.rsnaps - acc.rsnaps; |
7c673cae FG |
239 | } |
240 | ||
241 | bool same_sums(const nest_info_t &o) const { | |
242 | return rctime <= o.rctime && | |
243 | rbytes == o.rbytes && | |
244 | rfiles == o.rfiles && | |
245 | rsubdirs == o.rsubdirs && | |
11fdf7f2 | 246 | rsnaps == o.rsnaps; |
7c673cae FG |
247 | } |
248 | ||
249 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 250 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 251 | void dump(Formatter *f) const; |
9f95a23c TL |
252 | static void generate_test_instances(std::list<nest_info_t*>& ls); |
253 | ||
254 | // this frag + children | |
255 | utime_t rctime; | |
256 | int64_t rbytes = 0; | |
257 | int64_t rfiles = 0; | |
258 | int64_t rsubdirs = 0; | |
259 | int64_t rsnaps = 0; | |
7c673cae FG |
260 | }; |
261 | WRITE_CLASS_ENCODER(nest_info_t) | |
262 | ||
263 | inline bool operator==(const nest_info_t &l, const nest_info_t &r) { | |
264 | return memcmp(&l, &r, sizeof(l)) == 0; | |
265 | } | |
266 | inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { | |
267 | return !(l == r); | |
268 | } | |
269 | ||
270 | std::ostream& operator<<(std::ostream &out, const nest_info_t &n); | |
271 | ||
7c673cae | 272 | struct vinodeno_t { |
7c673cae FG |
273 | vinodeno_t() {} |
274 | vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} | |
275 | ||
276 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
277 | using ceph::encode; |
278 | encode(ino, bl); | |
279 | encode(snapid, bl); | |
7c673cae | 280 | } |
11fdf7f2 TL |
281 | void decode(bufferlist::const_iterator& p) { |
282 | using ceph::decode; | |
283 | decode(ino, p); | |
284 | decode(snapid, p); | |
7c673cae | 285 | } |
9f95a23c TL |
286 | |
287 | inodeno_t ino; | |
288 | snapid_t snapid; | |
7c673cae FG |
289 | }; |
290 | WRITE_CLASS_ENCODER(vinodeno_t) | |
291 | ||
292 | inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { | |
293 | return l.ino == r.ino && l.snapid == r.snapid; | |
294 | } | |
295 | inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { | |
296 | return !(l == r); | |
297 | } | |
298 | inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { | |
299 | return | |
300 | l.ino < r.ino || | |
301 | (l.ino == r.ino && l.snapid < r.snapid); | |
302 | } | |
303 | ||
304 | struct quota_info_t | |
305 | { | |
7c673cae FG |
306 | void encode(bufferlist& bl) const { |
307 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
308 | encode(max_bytes, bl); |
309 | encode(max_files, bl); | |
7c673cae FG |
310 | ENCODE_FINISH(bl); |
311 | } | |
11fdf7f2 | 312 | void decode(bufferlist::const_iterator& p) { |
7c673cae | 313 | DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); |
11fdf7f2 TL |
314 | decode(max_bytes, p); |
315 | decode(max_files, p); | |
7c673cae FG |
316 | DECODE_FINISH(p); |
317 | } | |
318 | ||
319 | void dump(Formatter *f) const; | |
9f95a23c | 320 | static void generate_test_instances(std::list<quota_info_t *>& ls); |
7c673cae FG |
321 | |
322 | bool is_valid() const { | |
323 | return max_bytes >=0 && max_files >=0; | |
324 | } | |
325 | bool is_enable() const { | |
326 | return max_bytes || max_files; | |
327 | } | |
9f95a23c TL |
328 | |
329 | int64_t max_bytes = 0; | |
330 | int64_t max_files = 0; | |
7c673cae FG |
331 | }; |
332 | WRITE_CLASS_ENCODER(quota_info_t) | |
333 | ||
334 | inline bool operator==(const quota_info_t &l, const quota_info_t &r) { | |
335 | return memcmp(&l, &r, sizeof(l)) == 0; | |
336 | } | |
337 | ||
338 | ostream& operator<<(ostream &out, const quota_info_t &n); | |
339 | ||
340 | namespace std { | |
341 | template<> struct hash<vinodeno_t> { | |
342 | size_t operator()(const vinodeno_t &vino) const { | |
343 | hash<inodeno_t> H; | |
344 | hash<uint64_t> I; | |
345 | return H(vino.ino) ^ I(vino.snapid); | |
346 | } | |
347 | }; | |
9f95a23c | 348 | } |
7c673cae FG |
349 | |
350 | inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { | |
351 | out << vino.ino; | |
352 | if (vino.snapid == CEPH_NOSNAP) | |
353 | out << ".head"; | |
354 | else if (vino.snapid) | |
355 | out << '.' << vino.snapid; | |
356 | return out; | |
357 | } | |
358 | ||
7c673cae FG |
359 | struct client_writeable_range_t { |
360 | struct byte_range_t { | |
94b18763 | 361 | uint64_t first = 0, last = 0; // interval client can write to |
7c673cae FG |
362 | }; |
363 | ||
7c673cae | 364 | void encode(bufferlist &bl) const; |
11fdf7f2 | 365 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 366 | void dump(Formatter *f) const; |
94b18763 | 367 | static void generate_test_instances(std::list<client_writeable_range_t*>& ls); |
9f95a23c TL |
368 | |
369 | byte_range_t range; | |
370 | snapid_t follows = 0; // aka "data+metadata flushed thru" | |
7c673cae FG |
371 | }; |
372 | ||
11fdf7f2 TL |
373 | inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::const_iterator& bl) { |
374 | decode(range.first, bl); | |
375 | decode(range.last, bl); | |
7c673cae FG |
376 | } |
377 | ||
378 | WRITE_CLASS_ENCODER(client_writeable_range_t) | |
379 | ||
380 | std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); | |
381 | ||
382 | inline bool operator==(const client_writeable_range_t& l, | |
383 | const client_writeable_range_t& r) { | |
384 | return l.range.first == r.range.first && l.range.last == r.range.last && | |
385 | l.follows == r.follows; | |
386 | } | |
387 | ||
388 | struct inline_data_t { | |
7c673cae | 389 | public: |
94b18763 | 390 | inline_data_t() {} |
7c673cae FG |
391 | inline_data_t(const inline_data_t& o) : version(o.version) { |
392 | if (o.blp) | |
393 | get_data() = *o.blp; | |
394 | } | |
395 | inline_data_t& operator=(const inline_data_t& o) { | |
396 | version = o.version; | |
397 | if (o.blp) | |
398 | get_data() = *o.blp; | |
399 | else | |
400 | free_data(); | |
401 | return *this; | |
402 | } | |
9f95a23c TL |
403 | |
404 | void free_data() { | |
405 | blp.reset(); | |
406 | } | |
407 | bufferlist& get_data() { | |
408 | if (!blp) | |
409 | blp.reset(new bufferlist); | |
410 | return *blp; | |
411 | } | |
412 | size_t length() const { return blp ? blp->length() : 0; } | |
413 | ||
7c673cae FG |
414 | bool operator==(const inline_data_t& o) const { |
415 | return length() == o.length() && | |
416 | (length() == 0 || | |
417 | (*const_cast<bufferlist*>(blp.get()) == *const_cast<bufferlist*>(o.blp.get()))); | |
418 | } | |
419 | bool operator!=(const inline_data_t& o) const { | |
420 | return !(*this == o); | |
421 | } | |
422 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 423 | void decode(bufferlist::const_iterator& bl); |
9f95a23c TL |
424 | |
425 | version_t version = 1; | |
426 | ||
427 | private: | |
428 | std::unique_ptr<bufferlist> blp; | |
7c673cae FG |
429 | }; |
430 | WRITE_CLASS_ENCODER(inline_data_t) | |
431 | ||
432 | enum { | |
433 | DAMAGE_STATS, // statistics (dirstat, size, etc) | |
434 | DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) | |
435 | DAMAGE_FRAGTREE // fragtree -- repair by searching | |
436 | }; | |
437 | typedef uint32_t damage_flags_t; | |
438 | ||
94b18763 | 439 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
440 | struct inode_t { |
441 | /** | |
442 | * *************** | |
443 | * Do not forget to add any new fields to the compare() function. | |
444 | * *************** | |
445 | */ | |
94b18763 | 446 | using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; |
94b18763 FG |
447 | |
448 | inode_t() | |
449 | { | |
7c673cae | 450 | clear_layout(); |
7c673cae FG |
451 | } |
452 | ||
453 | // file type | |
454 | bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } | |
455 | bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } | |
456 | bool is_file() const { return (mode & S_IFMT) == S_IFREG; } | |
457 | ||
458 | bool is_truncating() const { return (truncate_pending > 0); } | |
459 | void truncate(uint64_t old_size, uint64_t new_size) { | |
11fdf7f2 | 460 | ceph_assert(new_size < old_size); |
7c673cae FG |
461 | if (old_size > max_size_ever) |
462 | max_size_ever = old_size; | |
463 | truncate_from = old_size; | |
464 | size = new_size; | |
465 | rstat.rbytes = new_size; | |
466 | truncate_size = size; | |
467 | truncate_seq++; | |
468 | truncate_pending++; | |
469 | } | |
470 | ||
471 | bool has_layout() const { | |
472 | return layout != file_layout_t(); | |
473 | } | |
474 | ||
475 | void clear_layout() { | |
476 | layout = file_layout_t(); | |
477 | } | |
478 | ||
479 | uint64_t get_layout_size_increment() const { | |
480 | return layout.get_period(); | |
481 | } | |
482 | ||
483 | bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } | |
484 | ||
f91f0fd5 TL |
485 | uint64_t get_client_range(client_t client) const { |
486 | auto it = client_ranges.find(client); | |
487 | return it != client_ranges.end() ? it->second.range.last : 0; | |
488 | } | |
489 | ||
7c673cae FG |
490 | uint64_t get_max_size() const { |
491 | uint64_t max = 0; | |
492 | for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); | |
493 | p != client_ranges.end(); | |
494 | ++p) | |
495 | if (p->second.range.last > max) | |
496 | max = p->second.range.last; | |
497 | return max; | |
498 | } | |
499 | void set_max_size(uint64_t new_max) { | |
500 | if (new_max == 0) { | |
501 | client_ranges.clear(); | |
502 | } else { | |
503 | for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); | |
504 | p != client_ranges.end(); | |
505 | ++p) | |
506 | p->second.range.last = new_max; | |
507 | } | |
508 | } | |
509 | ||
510 | void trim_client_ranges(snapid_t last) { | |
511 | std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); | |
512 | while (p != client_ranges.end()) { | |
513 | if (p->second.follows >= last) | |
514 | client_ranges.erase(p++); | |
515 | else | |
516 | ++p; | |
517 | } | |
518 | } | |
519 | ||
520 | bool is_backtrace_updated() const { | |
521 | return backtrace_version == version; | |
522 | } | |
523 | void update_backtrace(version_t pv=0) { | |
524 | backtrace_version = pv ? pv : version; | |
525 | } | |
526 | ||
527 | void add_old_pool(int64_t l) { | |
528 | backtrace_version = version; | |
529 | old_pools.insert(l); | |
530 | } | |
531 | ||
532 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 533 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 534 | void dump(Formatter *f) const; |
94b18763 | 535 | static void generate_test_instances(std::list<inode_t*>& ls); |
7c673cae FG |
536 | /** |
537 | * Compare this inode_t with another that represent *the same inode* | |
538 | * at different points in time. | |
539 | * @pre The inodes are the same ino | |
540 | * | |
541 | * @param other The inode_t to compare ourselves with | |
542 | * @param divergent A bool pointer which will be set to true | |
543 | * if the values are different in a way that can't be explained | |
544 | * by one being a newer version than the other. | |
545 | * | |
546 | * @returns 1 if we are newer than the other, 0 if equal, -1 if older. | |
547 | */ | |
548 | int compare(const inode_t &other, bool *divergent) const; | |
9f95a23c TL |
549 | |
550 | // base (immutable) | |
551 | inodeno_t ino = 0; | |
552 | uint32_t rdev = 0; // if special file | |
553 | ||
554 | // affected by any inode change... | |
555 | utime_t ctime; // inode change time | |
556 | utime_t btime; // birth time | |
557 | ||
558 | // perm (namespace permissions) | |
559 | uint32_t mode = 0; | |
560 | uid_t uid = 0; | |
561 | gid_t gid = 0; | |
562 | ||
563 | // nlink | |
564 | int32_t nlink = 0; | |
565 | ||
566 | // file (data access) | |
567 | ceph_dir_layout dir_layout = {}; // [dir only] | |
568 | file_layout_t layout; | |
569 | compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; | |
570 | uint64_t size = 0; // on directory, # dentries | |
571 | uint64_t max_size_ever = 0; // max size the file has ever been | |
572 | uint32_t truncate_seq = 0; | |
573 | uint64_t truncate_size = 0, truncate_from = 0; | |
574 | uint32_t truncate_pending = 0; | |
575 | utime_t mtime; // file data modify time. | |
576 | utime_t atime; // file data access time. | |
577 | uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) | |
578 | inline_data_t inline_data; // FIXME check | |
579 | ||
580 | // change attribute | |
581 | uint64_t change_attr = 0; | |
582 | ||
583 | client_range_map client_ranges; // client(s) can write to these ranges | |
584 | ||
585 | // dirfrag, recursive accountin | |
586 | frag_info_t dirstat; // protected by my filelock | |
587 | nest_info_t rstat; // protected by my nestlock | |
588 | nest_info_t accounted_rstat; // protected by parent's nestlock | |
589 | ||
590 | quota_info_t quota; | |
591 | ||
592 | mds_rank_t export_pin = MDS_RANK_NONE; | |
593 | ||
f6b5b4d7 TL |
594 | double export_ephemeral_random_pin = 0; |
595 | bool export_ephemeral_distributed_pin = false; | |
596 | ||
9f95a23c TL |
597 | // special stuff |
598 | version_t version = 0; // auth only | |
599 | version_t file_data_version = 0; // auth only | |
600 | version_t xattr_version = 0; | |
601 | ||
602 | utime_t last_scrub_stamp; // start time of last complete scrub | |
603 | version_t last_scrub_version = 0;// (parent) start version of last complete scrub | |
604 | ||
605 | version_t backtrace_version = 0; | |
606 | ||
607 | snapid_t oldest_snap; | |
608 | ||
609 | std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink | |
610 | ||
7c673cae FG |
611 | private: |
612 | bool older_is_consistent(const inode_t &other) const; | |
613 | }; | |
7c673cae | 614 | |
94b18763 FG |
615 | // These methods may be moved back to mdstypes.cc when we have pmr |
616 | template<template<typename> class Allocator> | |
617 | void inode_t<Allocator>::encode(bufferlist &bl, uint64_t features) const | |
618 | { | |
f6b5b4d7 | 619 | ENCODE_START(16, 6, bl); |
94b18763 | 620 | |
11fdf7f2 TL |
621 | encode(ino, bl); |
622 | encode(rdev, bl); | |
623 | encode(ctime, bl); | |
94b18763 | 624 | |
11fdf7f2 TL |
625 | encode(mode, bl); |
626 | encode(uid, bl); | |
627 | encode(gid, bl); | |
94b18763 | 628 | |
11fdf7f2 | 629 | encode(nlink, bl); |
94b18763 FG |
630 | { |
631 | // removed field | |
632 | bool anchored = 0; | |
11fdf7f2 | 633 | encode(anchored, bl); |
94b18763 FG |
634 | } |
635 | ||
11fdf7f2 TL |
636 | encode(dir_layout, bl); |
637 | encode(layout, bl, features); | |
638 | encode(size, bl); | |
639 | encode(truncate_seq, bl); | |
640 | encode(truncate_size, bl); | |
641 | encode(truncate_from, bl); | |
642 | encode(truncate_pending, bl); | |
643 | encode(mtime, bl); | |
644 | encode(atime, bl); | |
645 | encode(time_warp_seq, bl); | |
646 | encode(client_ranges, bl); | |
94b18763 | 647 | |
11fdf7f2 TL |
648 | encode(dirstat, bl); |
649 | encode(rstat, bl); | |
650 | encode(accounted_rstat, bl); | |
94b18763 | 651 | |
11fdf7f2 TL |
652 | encode(version, bl); |
653 | encode(file_data_version, bl); | |
654 | encode(xattr_version, bl); | |
655 | encode(backtrace_version, bl); | |
656 | encode(old_pools, bl); | |
657 | encode(max_size_ever, bl); | |
658 | encode(inline_data, bl); | |
659 | encode(quota, bl); | |
94b18763 | 660 | |
11fdf7f2 | 661 | encode(stray_prior_path, bl); |
94b18763 | 662 | |
11fdf7f2 TL |
663 | encode(last_scrub_version, bl); |
664 | encode(last_scrub_stamp, bl); | |
94b18763 | 665 | |
11fdf7f2 TL |
666 | encode(btime, bl); |
667 | encode(change_attr, bl); | |
94b18763 | 668 | |
11fdf7f2 | 669 | encode(export_pin, bl); |
94b18763 | 670 | |
f6b5b4d7 TL |
671 | encode(export_ephemeral_random_pin, bl); |
672 | encode(export_ephemeral_distributed_pin, bl); | |
673 | ||
94b18763 FG |
674 | ENCODE_FINISH(bl); |
675 | } | |
676 | ||
677 | template<template<typename> class Allocator> | |
11fdf7f2 | 678 | void inode_t<Allocator>::decode(bufferlist::const_iterator &p) |
94b18763 | 679 | { |
f6b5b4d7 | 680 | DECODE_START_LEGACY_COMPAT_LEN(16, 6, 6, p); |
94b18763 | 681 | |
11fdf7f2 TL |
682 | decode(ino, p); |
683 | decode(rdev, p); | |
684 | decode(ctime, p); | |
94b18763 | 685 | |
11fdf7f2 TL |
686 | decode(mode, p); |
687 | decode(uid, p); | |
688 | decode(gid, p); | |
94b18763 | 689 | |
11fdf7f2 | 690 | decode(nlink, p); |
94b18763 FG |
691 | { |
692 | bool anchored; | |
11fdf7f2 | 693 | decode(anchored, p); |
94b18763 FG |
694 | } |
695 | ||
696 | if (struct_v >= 4) | |
11fdf7f2 | 697 | decode(dir_layout, p); |
92f5a8d4 TL |
698 | else { |
699 | // FIPS zeroization audit 20191117: this memset is not security related. | |
94b18763 | 700 | memset(&dir_layout, 0, sizeof(dir_layout)); |
92f5a8d4 | 701 | } |
11fdf7f2 TL |
702 | decode(layout, p); |
703 | decode(size, p); | |
704 | decode(truncate_seq, p); | |
705 | decode(truncate_size, p); | |
706 | decode(truncate_from, p); | |
94b18763 | 707 | if (struct_v >= 5) |
11fdf7f2 | 708 | decode(truncate_pending, p); |
94b18763 FG |
709 | else |
710 | truncate_pending = 0; | |
11fdf7f2 TL |
711 | decode(mtime, p); |
712 | decode(atime, p); | |
713 | decode(time_warp_seq, p); | |
94b18763 | 714 | if (struct_v >= 3) { |
11fdf7f2 | 715 | decode(client_ranges, p); |
94b18763 FG |
716 | } else { |
717 | map<client_t, client_writeable_range_t::byte_range_t> m; | |
11fdf7f2 | 718 | decode(m, p); |
94b18763 FG |
719 | for (map<client_t, client_writeable_range_t::byte_range_t>::iterator |
720 | q = m.begin(); q != m.end(); ++q) | |
721 | client_ranges[q->first].range = q->second; | |
722 | } | |
723 | ||
11fdf7f2 TL |
724 | decode(dirstat, p); |
725 | decode(rstat, p); | |
726 | decode(accounted_rstat, p); | |
94b18763 | 727 | |
11fdf7f2 TL |
728 | decode(version, p); |
729 | decode(file_data_version, p); | |
730 | decode(xattr_version, p); | |
94b18763 | 731 | if (struct_v >= 2) |
11fdf7f2 | 732 | decode(backtrace_version, p); |
94b18763 | 733 | if (struct_v >= 7) |
11fdf7f2 | 734 | decode(old_pools, p); |
94b18763 | 735 | if (struct_v >= 8) |
11fdf7f2 | 736 | decode(max_size_ever, p); |
94b18763 | 737 | if (struct_v >= 9) { |
11fdf7f2 | 738 | decode(inline_data, p); |
94b18763 FG |
739 | } else { |
740 | inline_data.version = CEPH_INLINE_NONE; | |
741 | } | |
742 | if (struct_v < 10) | |
743 | backtrace_version = 0; // force update backtrace | |
744 | if (struct_v >= 11) | |
11fdf7f2 | 745 | decode(quota, p); |
94b18763 FG |
746 | |
747 | if (struct_v >= 12) { | |
748 | std::string tmp; | |
11fdf7f2 TL |
749 | decode(tmp, p); |
750 | stray_prior_path = std::string_view(tmp); | |
94b18763 FG |
751 | } |
752 | ||
753 | if (struct_v >= 13) { | |
11fdf7f2 TL |
754 | decode(last_scrub_version, p); |
755 | decode(last_scrub_stamp, p); | |
94b18763 FG |
756 | } |
757 | if (struct_v >= 14) { | |
11fdf7f2 TL |
758 | decode(btime, p); |
759 | decode(change_attr, p); | |
94b18763 FG |
760 | } else { |
761 | btime = utime_t(); | |
762 | change_attr = 0; | |
763 | } | |
764 | ||
765 | if (struct_v >= 15) { | |
11fdf7f2 | 766 | decode(export_pin, p); |
94b18763 FG |
767 | } else { |
768 | export_pin = MDS_RANK_NONE; | |
769 | } | |
770 | ||
f6b5b4d7 TL |
771 | if (struct_v >= 16) { |
772 | decode(export_ephemeral_random_pin, p); | |
773 | decode(export_ephemeral_distributed_pin, p); | |
774 | } else { | |
775 | export_ephemeral_random_pin = 0; | |
776 | export_ephemeral_distributed_pin = false; | |
777 | } | |
778 | ||
94b18763 FG |
779 | DECODE_FINISH(p); |
780 | } | |
781 | ||
782 | template<template<typename> class Allocator> | |
783 | void inode_t<Allocator>::dump(Formatter *f) const | |
784 | { | |
785 | f->dump_unsigned("ino", ino); | |
786 | f->dump_unsigned("rdev", rdev); | |
787 | f->dump_stream("ctime") << ctime; | |
788 | f->dump_stream("btime") << btime; | |
789 | f->dump_unsigned("mode", mode); | |
790 | f->dump_unsigned("uid", uid); | |
791 | f->dump_unsigned("gid", gid); | |
792 | f->dump_unsigned("nlink", nlink); | |
793 | ||
794 | f->open_object_section("dir_layout"); | |
795 | ::dump(dir_layout, f); | |
796 | f->close_section(); | |
797 | ||
798 | f->dump_object("layout", layout); | |
799 | ||
800 | f->open_array_section("old_pools"); | |
801 | for (const auto &p : old_pools) { | |
802 | f->dump_int("pool", p); | |
803 | } | |
804 | f->close_section(); | |
805 | ||
806 | f->dump_unsigned("size", size); | |
807 | f->dump_unsigned("truncate_seq", truncate_seq); | |
808 | f->dump_unsigned("truncate_size", truncate_size); | |
809 | f->dump_unsigned("truncate_from", truncate_from); | |
810 | f->dump_unsigned("truncate_pending", truncate_pending); | |
811 | f->dump_stream("mtime") << mtime; | |
812 | f->dump_stream("atime") << atime; | |
813 | f->dump_unsigned("time_warp_seq", time_warp_seq); | |
814 | f->dump_unsigned("change_attr", change_attr); | |
815 | f->dump_int("export_pin", export_pin); | |
f6b5b4d7 TL |
816 | f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin); |
817 | f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin); | |
94b18763 FG |
818 | |
819 | f->open_array_section("client_ranges"); | |
820 | for (const auto &p : client_ranges) { | |
821 | f->open_object_section("client"); | |
822 | f->dump_unsigned("client", p.first.v); | |
823 | p.second.dump(f); | |
824 | f->close_section(); | |
825 | } | |
826 | f->close_section(); | |
827 | ||
828 | f->open_object_section("dirstat"); | |
829 | dirstat.dump(f); | |
830 | f->close_section(); | |
831 | ||
832 | f->open_object_section("rstat"); | |
833 | rstat.dump(f); | |
834 | f->close_section(); | |
835 | ||
836 | f->open_object_section("accounted_rstat"); | |
837 | accounted_rstat.dump(f); | |
838 | f->close_section(); | |
839 | ||
840 | f->dump_unsigned("version", version); | |
841 | f->dump_unsigned("file_data_version", file_data_version); | |
842 | f->dump_unsigned("xattr_version", xattr_version); | |
843 | f->dump_unsigned("backtrace_version", backtrace_version); | |
844 | ||
845 | f->dump_string("stray_prior_path", stray_prior_path); | |
9f95a23c TL |
846 | f->dump_unsigned("max_size_ever", max_size_ever); |
847 | ||
848 | f->open_object_section("quota"); | |
849 | quota.dump(f); | |
850 | f->close_section(); | |
851 | ||
852 | f->dump_stream("last_scrub_stamp") << last_scrub_stamp; | |
853 | f->dump_unsigned("last_scrub_version", last_scrub_version); | |
94b18763 FG |
854 | } |
855 | ||
856 | template<template<typename> class Allocator> | |
9f95a23c | 857 | void inode_t<Allocator>::generate_test_instances(std::list<inode_t*>& ls) |
94b18763 FG |
858 | { |
859 | ls.push_back(new inode_t<Allocator>); | |
860 | ls.push_back(new inode_t<Allocator>); | |
861 | ls.back()->ino = 1; | |
862 | // i am lazy. | |
863 | } | |
864 | ||
865 | template<template<typename> class Allocator> | |
866 | int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const | |
867 | { | |
11fdf7f2 | 868 | ceph_assert(ino == other.ino); |
94b18763 FG |
869 | *divergent = false; |
870 | if (version == other.version) { | |
871 | if (rdev != other.rdev || | |
872 | ctime != other.ctime || | |
873 | btime != other.btime || | |
874 | mode != other.mode || | |
875 | uid != other.uid || | |
876 | gid != other.gid || | |
877 | nlink != other.nlink || | |
878 | memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || | |
879 | layout != other.layout || | |
880 | old_pools != other.old_pools || | |
881 | size != other.size || | |
882 | max_size_ever != other.max_size_ever || | |
883 | truncate_seq != other.truncate_seq || | |
884 | truncate_size != other.truncate_size || | |
885 | truncate_from != other.truncate_from || | |
886 | truncate_pending != other.truncate_pending || | |
887 | change_attr != other.change_attr || | |
888 | mtime != other.mtime || | |
889 | atime != other.atime || | |
890 | time_warp_seq != other.time_warp_seq || | |
891 | inline_data != other.inline_data || | |
892 | client_ranges != other.client_ranges || | |
893 | !(dirstat == other.dirstat) || | |
894 | !(rstat == other.rstat) || | |
895 | !(accounted_rstat == other.accounted_rstat) || | |
896 | file_data_version != other.file_data_version || | |
897 | xattr_version != other.xattr_version || | |
898 | backtrace_version != other.backtrace_version) { | |
899 | *divergent = true; | |
900 | } | |
901 | return 0; | |
902 | } else if (version > other.version) { | |
903 | *divergent = !older_is_consistent(other); | |
904 | return 1; | |
905 | } else { | |
11fdf7f2 | 906 | ceph_assert(version < other.version); |
94b18763 FG |
907 | *divergent = !other.older_is_consistent(*this); |
908 | return -1; | |
909 | } | |
910 | } | |
911 | ||
912 | template<template<typename> class Allocator> | |
913 | bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const | |
914 | { | |
915 | if (max_size_ever < other.max_size_ever || | |
916 | truncate_seq < other.truncate_seq || | |
917 | time_warp_seq < other.time_warp_seq || | |
918 | inline_data.version < other.inline_data.version || | |
919 | dirstat.version < other.dirstat.version || | |
920 | rstat.version < other.rstat.version || | |
921 | accounted_rstat.version < other.accounted_rstat.version || | |
922 | file_data_version < other.file_data_version || | |
923 | xattr_version < other.xattr_version || | |
924 | backtrace_version < other.backtrace_version) { | |
925 | return false; | |
926 | } | |
927 | return true; | |
928 | } | |
929 | ||
930 | template<template<typename> class Allocator> | |
931 | inline void encode(const inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
932 | { | |
933 | ENCODE_DUMP_PRE(); | |
934 | c.encode(bl, features); | |
935 | ENCODE_DUMP_POST(cl); | |
936 | } | |
937 | template<template<typename> class Allocator> | |
11fdf7f2 | 938 | inline void decode(inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
939 | { |
940 | c.decode(p); | |
941 | } | |
942 | ||
943 | template<template<typename> class Allocator> | |
944 | using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>; | |
945 | ||
946 | template<template<typename> class Allocator> | |
947 | using xattr_map = compact_map<alloc_string<Allocator>, bufferptr, std::less<alloc_string<Allocator>>, Allocator<std::pair<const alloc_string<Allocator>, bufferptr>>>; // FIXME bufferptr not in mempool | |
7c673cae | 948 | |
e306af50 TL |
949 | template<template<typename> class Allocator> |
950 | inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p) | |
951 | { | |
952 | __u32 n; | |
953 | decode(n, p); | |
954 | while (n-- > 0) { | |
955 | alloc_string<Allocator> key; | |
956 | decode(key, p); | |
957 | __u32 len; | |
958 | decode(len, p); | |
959 | p.copy_deep(len, xattrs[key]); | |
960 | } | |
961 | } | |
962 | ||
94b18763 | 963 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
964 | struct old_inode_t { |
965 | snapid_t first; | |
94b18763 FG |
966 | inode_t<Allocator> inode; |
967 | xattr_map<Allocator> xattrs; | |
7c673cae FG |
968 | |
969 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 970 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 971 | void dump(Formatter *f) const; |
94b18763 | 972 | static void generate_test_instances(std::list<old_inode_t*>& ls); |
7c673cae | 973 | }; |
94b18763 FG |
974 | |
975 | // These methods may be moved back to mdstypes.cc when we have pmr | |
976 | template<template<typename> class Allocator> | |
977 | void old_inode_t<Allocator>::encode(bufferlist& bl, uint64_t features) const | |
978 | { | |
979 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
980 | encode(first, bl); |
981 | encode(inode, bl, features); | |
982 | encode(xattrs, bl); | |
94b18763 FG |
983 | ENCODE_FINISH(bl); |
984 | } | |
985 | ||
986 | template<template<typename> class Allocator> | |
11fdf7f2 | 987 | void old_inode_t<Allocator>::decode(bufferlist::const_iterator& bl) |
94b18763 FG |
988 | { |
989 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
990 | decode(first, bl); |
991 | decode(inode, bl); | |
e306af50 | 992 | decode_noshare<Allocator>(xattrs, bl); |
94b18763 FG |
993 | DECODE_FINISH(bl); |
994 | } | |
995 | ||
996 | template<template<typename> class Allocator> | |
997 | void old_inode_t<Allocator>::dump(Formatter *f) const | |
998 | { | |
999 | f->dump_unsigned("first", first); | |
1000 | inode.dump(f); | |
1001 | f->open_object_section("xattrs"); | |
1002 | for (const auto &p : xattrs) { | |
1003 | std::string v(p.second.c_str(), p.second.length()); | |
1004 | f->dump_string(p.first.c_str(), v); | |
1005 | } | |
1006 | f->close_section(); | |
1007 | } | |
1008 | ||
1009 | template<template<typename> class Allocator> | |
1010 | void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls) | |
1011 | { | |
1012 | ls.push_back(new old_inode_t<Allocator>); | |
1013 | ls.push_back(new old_inode_t<Allocator>); | |
1014 | ls.back()->first = 2; | |
1015 | std::list<inode_t<Allocator>*> ils; | |
1016 | inode_t<Allocator>::generate_test_instances(ils); | |
1017 | ls.back()->inode = *ils.back(); | |
1018 | ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); | |
1019 | ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); | |
1020 | } | |
1021 | ||
1022 | template<template<typename> class Allocator> | |
1023 | inline void encode(const old_inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
1024 | { | |
1025 | ENCODE_DUMP_PRE(); | |
1026 | c.encode(bl, features); | |
1027 | ENCODE_DUMP_POST(cl); | |
1028 | } | |
1029 | template<template<typename> class Allocator> | |
11fdf7f2 | 1030 | inline void decode(old_inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
1031 | { |
1032 | c.decode(p); | |
1033 | } | |
7c673cae | 1034 | |
7c673cae FG |
1035 | /* |
1036 | * like an inode, but for a dir frag | |
1037 | */ | |
1038 | struct fnode_t { | |
9f95a23c TL |
1039 | void encode(bufferlist &bl) const; |
1040 | void decode(bufferlist::const_iterator& bl); | |
1041 | void dump(Formatter *f) const; | |
1042 | static void generate_test_instances(std::list<fnode_t*>& ls); | |
1043 | ||
94b18763 | 1044 | version_t version = 0; |
7c673cae FG |
1045 | snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru |
1046 | frag_info_t fragstat, accounted_fragstat; | |
1047 | nest_info_t rstat, accounted_rstat; | |
94b18763 | 1048 | damage_flags_t damage_flags = 0; |
7c673cae FG |
1049 | |
1050 | // we know we and all our descendants have been scrubbed since this version | |
94b18763 | 1051 | version_t recursive_scrub_version = 0; |
7c673cae FG |
1052 | utime_t recursive_scrub_stamp; |
1053 | // version at which we last scrubbed our personal data structures | |
94b18763 | 1054 | version_t localized_scrub_version = 0; |
7c673cae | 1055 | utime_t localized_scrub_stamp; |
7c673cae FG |
1056 | }; |
1057 | WRITE_CLASS_ENCODER(fnode_t) | |
1058 | ||
1059 | ||
1060 | struct old_rstat_t { | |
7c673cae | 1061 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1062 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1063 | void dump(Formatter *f) const; |
9f95a23c TL |
1064 | static void generate_test_instances(std::list<old_rstat_t*>& ls); |
1065 | ||
1066 | snapid_t first; | |
1067 | nest_info_t rstat, accounted_rstat; | |
7c673cae FG |
1068 | }; |
1069 | WRITE_CLASS_ENCODER(old_rstat_t) | |
1070 | ||
1071 | inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { | |
1072 | return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; | |
1073 | } | |
1074 | ||
11fdf7f2 TL |
1075 | class feature_bitset_t { |
1076 | public: | |
1077 | typedef uint64_t block_type; | |
1078 | static const size_t bits_per_block = sizeof(block_type) * 8; | |
1079 | ||
1080 | feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {} | |
1081 | feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {} | |
1082 | feature_bitset_t(unsigned long value = 0); | |
1083 | feature_bitset_t(const vector<size_t>& array); | |
1084 | feature_bitset_t& operator=(const feature_bitset_t& other) { | |
1085 | _vec = other._vec; | |
1086 | return *this; | |
1087 | } | |
1088 | feature_bitset_t& operator=(feature_bitset_t&& other) { | |
1089 | _vec = std::move(other._vec); | |
1090 | return *this; | |
1091 | } | |
9f95a23c | 1092 | feature_bitset_t& operator-=(const feature_bitset_t& other); |
11fdf7f2 | 1093 | bool empty() const { |
9f95a23c | 1094 | //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty" |
11fdf7f2 TL |
1095 | for (auto& v : _vec) { |
1096 | if (v) | |
1097 | return false; | |
1098 | } | |
1099 | return true; | |
1100 | } | |
1101 | bool test(size_t bit) const { | |
1102 | if (bit >= bits_per_block * _vec.size()) | |
1103 | return false; | |
1104 | return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block)); | |
1105 | } | |
1106 | void clear() { | |
1107 | _vec.clear(); | |
1108 | } | |
11fdf7f2 TL |
1109 | void encode(bufferlist& bl) const; |
1110 | void decode(bufferlist::const_iterator &p); | |
9f95a23c | 1111 | void dump(Formatter *f) const; |
11fdf7f2 TL |
1112 | void print(ostream& out) const; |
1113 | private: | |
1114 | vector<block_type> _vec; | |
1115 | }; | |
1116 | WRITE_CLASS_ENCODER(feature_bitset_t) | |
1117 | ||
1118 | inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) { | |
1119 | s.print(out); | |
1120 | return out; | |
1121 | } | |
1122 | ||
9f95a23c TL |
1123 | struct metric_spec_t { |
1124 | metric_spec_t() {} | |
1125 | metric_spec_t(const metric_spec_t& other) : | |
1126 | metric_flags(other.metric_flags) {} | |
1127 | metric_spec_t(metric_spec_t&& other) : | |
1128 | metric_flags(std::move(other.metric_flags)) {} | |
1129 | metric_spec_t(const feature_bitset_t& mf) : | |
1130 | metric_flags(mf) {} | |
1131 | metric_spec_t(feature_bitset_t&& mf) : | |
1132 | metric_flags(std::move(mf)) {} | |
1133 | ||
1134 | metric_spec_t& operator=(const metric_spec_t& other) { | |
1135 | metric_flags = other.metric_flags; | |
1136 | return *this; | |
1137 | } | |
1138 | metric_spec_t& operator=(metric_spec_t&& other) { | |
1139 | metric_flags = std::move(other.metric_flags); | |
1140 | return *this; | |
1141 | } | |
1142 | ||
1143 | bool empty() const { | |
1144 | return metric_flags.empty(); | |
1145 | } | |
1146 | ||
1147 | void clear() { | |
1148 | metric_flags.clear(); | |
1149 | } | |
1150 | ||
1151 | void encode(bufferlist& bl) const; | |
1152 | void decode(bufferlist::const_iterator& p); | |
1153 | void dump(Formatter *f) const; | |
1154 | void print(ostream& out) const; | |
1155 | ||
1156 | // set of metrics that a client is capable of forwarding | |
1157 | feature_bitset_t metric_flags; | |
1158 | }; | |
1159 | WRITE_CLASS_ENCODER(metric_spec_t) | |
1160 | ||
1161 | inline std::ostream& operator<<(std::ostream& out, const metric_spec_t& mst) { | |
1162 | mst.print(out); | |
1163 | return out; | |
1164 | } | |
1165 | ||
11fdf7f2 TL |
1166 | /* |
1167 | * client_metadata_t | |
1168 | */ | |
1169 | struct client_metadata_t { | |
1170 | using kv_map_t = std::map<std::string,std::string>; | |
1171 | using iterator = kv_map_t::const_iterator; | |
1172 | ||
11fdf7f2 | 1173 | client_metadata_t() {} |
9f95a23c TL |
1174 | client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f, const metric_spec_t &mst) : |
1175 | kv_map(kv), | |
1176 | features(f), | |
1177 | metric_spec(mst) {} | |
11fdf7f2 TL |
1178 | client_metadata_t& operator=(const client_metadata_t& other) { |
1179 | kv_map = other.kv_map; | |
1180 | features = other.features; | |
9f95a23c | 1181 | metric_spec = other.metric_spec; |
11fdf7f2 TL |
1182 | return *this; |
1183 | } | |
1184 | ||
9f95a23c | 1185 | bool empty() const { return kv_map.empty() && features.empty() && metric_spec.empty(); } |
11fdf7f2 TL |
1186 | iterator find(const std::string& key) const { return kv_map.find(key); } |
1187 | iterator begin() const { return kv_map.begin(); } | |
1188 | iterator end() const { return kv_map.end(); } | |
92f5a8d4 | 1189 | void erase(iterator it) { kv_map.erase(it); } |
11fdf7f2 TL |
1190 | std::string& operator[](const std::string& key) { return kv_map[key]; } |
1191 | void merge(const client_metadata_t& other) { | |
1192 | kv_map.insert(other.kv_map.begin(), other.kv_map.end()); | |
1193 | features = other.features; | |
9f95a23c | 1194 | metric_spec = other.metric_spec; |
11fdf7f2 TL |
1195 | } |
1196 | void clear() { | |
1197 | kv_map.clear(); | |
1198 | features.clear(); | |
9f95a23c | 1199 | metric_spec.clear(); |
11fdf7f2 TL |
1200 | } |
1201 | ||
1202 | void encode(bufferlist& bl) const; | |
1203 | void decode(bufferlist::const_iterator& p); | |
1204 | void dump(Formatter *f) const; | |
9f95a23c TL |
1205 | |
1206 | kv_map_t kv_map; | |
1207 | feature_bitset_t features; | |
1208 | metric_spec_t metric_spec; | |
11fdf7f2 TL |
1209 | }; |
1210 | WRITE_CLASS_ENCODER(client_metadata_t) | |
7c673cae FG |
1211 | |
1212 | /* | |
9f95a23c | 1213 | * session_info_t - durable part of a Session |
7c673cae | 1214 | */ |
7c673cae | 1215 | struct session_info_t { |
7c673cae | 1216 | client_t get_client() const { return client_t(inst.name.num()); } |
11fdf7f2 | 1217 | bool has_feature(size_t bit) const { return client_metadata.features.test(bit); } |
7c673cae FG |
1218 | const entity_name_t& get_source() const { return inst.name; } |
1219 | ||
1220 | void clear_meta() { | |
1221 | prealloc_inos.clear(); | |
1222 | used_inos.clear(); | |
1223 | completed_requests.clear(); | |
1224 | completed_flushes.clear(); | |
11fdf7f2 | 1225 | client_metadata.clear(); |
7c673cae FG |
1226 | } |
1227 | ||
1228 | void encode(bufferlist& bl, uint64_t features) const; | |
11fdf7f2 | 1229 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1230 | void dump(Formatter *f) const; |
9f95a23c TL |
1231 | static void generate_test_instances(std::list<session_info_t*>& ls); |
1232 | ||
1233 | entity_inst_t inst; | |
1234 | std::map<ceph_tid_t,inodeno_t> completed_requests; | |
1235 | interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use. | |
1236 | interval_set<inodeno_t> used_inos; // journaling use | |
1237 | client_metadata_t client_metadata; | |
1238 | std::set<ceph_tid_t> completed_flushes; | |
1239 | EntityName auth_name; | |
7c673cae FG |
1240 | }; |
1241 | WRITE_CLASS_ENCODER_FEATURES(session_info_t) | |
1242 | ||
7c673cae | 1243 | // dentries |
7c673cae | 1244 | struct dentry_key_t { |
94b18763 | 1245 | dentry_key_t() {} |
11fdf7f2 | 1246 | dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) : |
7c673cae FG |
1247 | snapid(s), name(n), hash(h) {} |
1248 | ||
94b18763 | 1249 | bool is_valid() { return name.length() || snapid; } |
7c673cae FG |
1250 | |
1251 | // encode into something that can be decoded as a string. | |
1252 | // name_ (head) or name_%x (!head) | |
1253 | void encode(bufferlist& bl) const { | |
1254 | string key; | |
1255 | encode(key); | |
11fdf7f2 TL |
1256 | using ceph::encode; |
1257 | encode(key, bl); | |
7c673cae FG |
1258 | } |
1259 | void encode(string& key) const { | |
1260 | char b[20]; | |
1261 | if (snapid != CEPH_NOSNAP) { | |
1262 | uint64_t val(snapid); | |
1263 | snprintf(b, sizeof(b), "%" PRIx64, val); | |
1264 | } else { | |
1265 | snprintf(b, sizeof(b), "%s", "head"); | |
1266 | } | |
1267 | ostringstream oss; | |
1268 | oss << name << "_" << b; | |
1269 | key = oss.str(); | |
1270 | } | |
11fdf7f2 | 1271 | static void decode_helper(bufferlist::const_iterator& bl, string& nm, snapid_t& sn) { |
7c673cae | 1272 | string key; |
11fdf7f2 | 1273 | decode(key, bl); |
7c673cae FG |
1274 | decode_helper(key, nm, sn); |
1275 | } | |
11fdf7f2 | 1276 | static void decode_helper(std::string_view key, string& nm, snapid_t& sn) { |
7c673cae | 1277 | size_t i = key.find_last_of('_'); |
11fdf7f2 TL |
1278 | ceph_assert(i != string::npos); |
1279 | if (key.compare(i+1, std::string_view::npos, "head") == 0) { | |
7c673cae FG |
1280 | // name_head |
1281 | sn = CEPH_NOSNAP; | |
1282 | } else { | |
1283 | // name_%x | |
1284 | long long unsigned x = 0; | |
94b18763 FG |
1285 | std::string x_str(key.substr(i+1)); |
1286 | sscanf(x_str.c_str(), "%llx", &x); | |
7c673cae | 1287 | sn = x; |
9f95a23c | 1288 | } |
11fdf7f2 | 1289 | nm = key.substr(0, i); |
7c673cae | 1290 | } |
9f95a23c TL |
1291 | |
1292 | snapid_t snapid = 0; | |
1293 | std::string_view name; | |
1294 | __u32 hash = 0; | |
7c673cae FG |
1295 | }; |
1296 | ||
1297 | inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k) | |
1298 | { | |
1299 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1300 | } | |
1301 | ||
1302 | inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) | |
1303 | { | |
1304 | /* | |
1305 | * order by hash, name, snap | |
1306 | */ | |
1307 | int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash); | |
1308 | if (c) | |
1309 | return c < 0; | |
94b18763 | 1310 | c = k1.name.compare(k2.name); |
7c673cae FG |
1311 | if (c) |
1312 | return c < 0; | |
1313 | return k1.snapid < k2.snapid; | |
1314 | } | |
1315 | ||
7c673cae FG |
1316 | /* |
1317 | * string_snap_t is a simple (string, snapid_t) pair | |
1318 | */ | |
1319 | struct string_snap_t { | |
7c673cae | 1320 | string_snap_t() {} |
11fdf7f2 | 1321 | string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {} |
7c673cae FG |
1322 | |
1323 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1324 | void decode(bufferlist::const_iterator& p); |
7c673cae | 1325 | void dump(Formatter *f) const; |
9f95a23c TL |
1326 | static void generate_test_instances(std::list<string_snap_t*>& ls); |
1327 | ||
1328 | string name; | |
1329 | snapid_t snapid; | |
7c673cae FG |
1330 | }; |
1331 | WRITE_CLASS_ENCODER(string_snap_t) | |
1332 | ||
1333 | inline bool operator<(const string_snap_t& l, const string_snap_t& r) { | |
94b18763 | 1334 | int c = l.name.compare(r.name); |
7c673cae FG |
1335 | return c < 0 || (c == 0 && l.snapid < r.snapid); |
1336 | } | |
1337 | ||
1338 | inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k) | |
1339 | { | |
1340 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1341 | } | |
1342 | ||
1343 | /* | |
1344 | * mds_table_pending_t | |
1345 | * | |
9f95a23c | 1346 | * For mds's requesting any pending ops, child needs to encode the corresponding |
7c673cae FG |
1347 | * pending mutation state in the table. |
1348 | */ | |
1349 | struct mds_table_pending_t { | |
7c673cae | 1350 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1351 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1352 | void dump(Formatter *f) const; |
9f95a23c TL |
1353 | static void generate_test_instances(std::list<mds_table_pending_t*>& ls); |
1354 | ||
1355 | uint64_t reqid = 0; | |
1356 | __s32 mds = 0; | |
1357 | version_t tid = 0; | |
7c673cae FG |
1358 | }; |
1359 | WRITE_CLASS_ENCODER(mds_table_pending_t) | |
1360 | ||
7c673cae | 1361 | // requests |
7c673cae | 1362 | struct metareqid_t { |
94b18763 | 1363 | metareqid_t() {} |
7c673cae FG |
1364 | metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {} |
1365 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1366 | using ceph::encode; |
1367 | encode(name, bl); | |
1368 | encode(tid, bl); | |
7c673cae | 1369 | } |
11fdf7f2 TL |
1370 | void decode(bufferlist::const_iterator &p) { |
1371 | using ceph::decode; | |
1372 | decode(name, p); | |
1373 | decode(tid, p); | |
7c673cae | 1374 | } |
9f95a23c TL |
1375 | |
1376 | entity_name_t name; | |
1377 | uint64_t tid = 0; | |
7c673cae FG |
1378 | }; |
1379 | WRITE_CLASS_ENCODER(metareqid_t) | |
1380 | ||
1381 | inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) { | |
1382 | return out << r.name << ":" << r.tid; | |
1383 | } | |
1384 | ||
1385 | inline bool operator==(const metareqid_t& l, const metareqid_t& r) { | |
1386 | return (l.name == r.name) && (l.tid == r.tid); | |
1387 | } | |
1388 | inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { | |
1389 | return (l.name != r.name) || (l.tid != r.tid); | |
1390 | } | |
1391 | inline bool operator<(const metareqid_t& l, const metareqid_t& r) { | |
1392 | return (l.name < r.name) || | |
1393 | (l.name == r.name && l.tid < r.tid); | |
1394 | } | |
1395 | inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { | |
1396 | return (l.name < r.name) || | |
1397 | (l.name == r.name && l.tid <= r.tid); | |
1398 | } | |
1399 | inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } | |
1400 | inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } | |
1401 | ||
1402 | namespace std { | |
1403 | template<> struct hash<metareqid_t> { | |
1404 | size_t operator()(const metareqid_t &r) const { | |
1405 | hash<uint64_t> H; | |
1406 | return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); | |
1407 | } | |
1408 | }; | |
1409 | } // namespace std | |
1410 | ||
7c673cae FG |
1411 | // cap info for client reconnect |
1412 | struct cap_reconnect_t { | |
9f95a23c | 1413 | cap_reconnect_t() {} |
11fdf7f2 | 1414 | cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i, |
7c673cae FG |
1415 | inodeno_t sr, snapid_t sf, bufferlist& lb) : |
1416 | path(p) { | |
1417 | capinfo.cap_id = cap_id; | |
1418 | capinfo.wanted = w; | |
1419 | capinfo.issued = i; | |
1420 | capinfo.snaprealm = sr; | |
1421 | capinfo.pathbase = pino; | |
1422 | capinfo.flock_len = 0; | |
1423 | snap_follows = sf; | |
1424 | flockbl.claim(lb); | |
1425 | } | |
1426 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1427 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1428 | void encode_old(bufferlist& bl) const; |
11fdf7f2 | 1429 | void decode_old(bufferlist::const_iterator& bl); |
7c673cae FG |
1430 | |
1431 | void dump(Formatter *f) const; | |
9f95a23c TL |
1432 | static void generate_test_instances(std::list<cap_reconnect_t*>& ls); |
1433 | ||
1434 | string path; | |
1435 | mutable ceph_mds_cap_reconnect capinfo = {}; | |
1436 | snapid_t snap_follows = 0; | |
1437 | bufferlist flockbl; | |
7c673cae FG |
1438 | }; |
1439 | WRITE_CLASS_ENCODER(cap_reconnect_t) | |
1440 | ||
11fdf7f2 | 1441 | struct snaprealm_reconnect_t { |
9f95a23c | 1442 | snaprealm_reconnect_t() {} |
11fdf7f2 TL |
1443 | snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) { |
1444 | realm.ino = ino; | |
1445 | realm.seq = seq; | |
1446 | realm.parent = parent; | |
1447 | } | |
1448 | void encode(bufferlist& bl) const; | |
1449 | void decode(bufferlist::const_iterator& bl); | |
1450 | void encode_old(bufferlist& bl) const; | |
1451 | void decode_old(bufferlist::const_iterator& bl); | |
1452 | ||
1453 | void dump(Formatter *f) const; | |
9f95a23c TL |
1454 | static void generate_test_instances(std::list<snaprealm_reconnect_t*>& ls); |
1455 | ||
1456 | mutable ceph_mds_snaprealm_reconnect realm = {}; | |
11fdf7f2 TL |
1457 | }; |
1458 | WRITE_CLASS_ENCODER(snaprealm_reconnect_t) | |
7c673cae FG |
1459 | |
1460 | // compat for pre-FLOCK feature | |
1461 | struct old_ceph_mds_cap_reconnect { | |
eafe8130 TL |
1462 | ceph_le64 cap_id; |
1463 | ceph_le32 wanted; | |
1464 | ceph_le32 issued; | |
1465 | ceph_le64 old_size; | |
7c673cae | 1466 | struct ceph_timespec old_mtime, old_atime; |
eafe8130 TL |
1467 | ceph_le64 snaprealm; |
1468 | ceph_le64 pathbase; /* base ino for our path to this ino */ | |
7c673cae FG |
1469 | } __attribute__ ((packed)); |
1470 | WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect) | |
1471 | ||
1472 | struct old_cap_reconnect_t { | |
7c673cae FG |
1473 | const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) { |
1474 | path = n.path; | |
1475 | capinfo.cap_id = n.capinfo.cap_id; | |
1476 | capinfo.wanted = n.capinfo.wanted; | |
1477 | capinfo.issued = n.capinfo.issued; | |
1478 | capinfo.snaprealm = n.capinfo.snaprealm; | |
1479 | capinfo.pathbase = n.capinfo.pathbase; | |
1480 | return *this; | |
1481 | } | |
1482 | operator cap_reconnect_t() { | |
1483 | cap_reconnect_t n; | |
1484 | n.path = path; | |
1485 | n.capinfo.cap_id = capinfo.cap_id; | |
1486 | n.capinfo.wanted = capinfo.wanted; | |
1487 | n.capinfo.issued = capinfo.issued; | |
1488 | n.capinfo.snaprealm = capinfo.snaprealm; | |
1489 | n.capinfo.pathbase = capinfo.pathbase; | |
1490 | return n; | |
1491 | } | |
1492 | ||
1493 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1494 | using ceph::encode; |
1495 | encode(path, bl); | |
1496 | encode(capinfo, bl); | |
7c673cae | 1497 | } |
11fdf7f2 TL |
1498 | void decode(bufferlist::const_iterator& bl) { |
1499 | using ceph::decode; | |
1500 | decode(path, bl); | |
1501 | decode(capinfo, bl); | |
7c673cae | 1502 | } |
9f95a23c TL |
1503 | |
1504 | string path; | |
1505 | old_ceph_mds_cap_reconnect capinfo; | |
7c673cae FG |
1506 | }; |
1507 | WRITE_CLASS_ENCODER(old_cap_reconnect_t) | |
1508 | ||
7c673cae | 1509 | // dir frag |
7c673cae | 1510 | struct dirfrag_t { |
94b18763 | 1511 | dirfrag_t() {} |
7c673cae FG |
1512 | dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } |
1513 | ||
1514 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1515 | using ceph::encode; |
1516 | encode(ino, bl); | |
1517 | encode(frag, bl); | |
7c673cae | 1518 | } |
11fdf7f2 TL |
1519 | void decode(bufferlist::const_iterator& bl) { |
1520 | using ceph::decode; | |
1521 | decode(ino, bl); | |
1522 | decode(frag, bl); | |
7c673cae | 1523 | } |
9f95a23c TL |
1524 | |
1525 | inodeno_t ino = 0; | |
1526 | frag_t frag; | |
7c673cae FG |
1527 | }; |
1528 | WRITE_CLASS_ENCODER(dirfrag_t) | |
1529 | ||
7c673cae FG |
1530 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) { |
1531 | out << df.ino; | |
1532 | if (!df.frag.is_root()) out << "." << df.frag; | |
1533 | return out; | |
1534 | } | |
1535 | inline bool operator<(dirfrag_t l, dirfrag_t r) { | |
1536 | if (l.ino < r.ino) return true; | |
1537 | if (l.ino == r.ino && l.frag < r.frag) return true; | |
1538 | return false; | |
1539 | } | |
1540 | inline bool operator==(dirfrag_t l, dirfrag_t r) { | |
1541 | return l.ino == r.ino && l.frag == r.frag; | |
1542 | } | |
1543 | ||
1544 | namespace std { | |
1545 | template<> struct hash<dirfrag_t> { | |
1546 | size_t operator()(const dirfrag_t &df) const { | |
1547 | static rjhash<uint64_t> H; | |
1548 | static rjhash<uint32_t> I; | |
1549 | return H(df.ino) ^ I(df.frag); | |
1550 | } | |
1551 | }; | |
1552 | } // namespace std | |
1553 | ||
7c673cae | 1554 | // ================================================================ |
7c673cae FG |
1555 | #define META_POP_IRD 0 |
1556 | #define META_POP_IWR 1 | |
1557 | #define META_POP_READDIR 2 | |
1558 | #define META_POP_FETCH 3 | |
1559 | #define META_POP_STORE 4 | |
1560 | #define META_NPOP 5 | |
1561 | ||
1562 | class inode_load_vec_t { | |
7c673cae | 1563 | public: |
11fdf7f2 TL |
1564 | using time = DecayCounter::time; |
1565 | using clock = DecayCounter::clock; | |
1566 | static const size_t NUM = 2; | |
1567 | ||
1568 | inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {} | |
1569 | inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {} | |
1570 | ||
7c673cae | 1571 | DecayCounter &get(int t) { |
7c673cae FG |
1572 | return vec[t]; |
1573 | } | |
11fdf7f2 TL |
1574 | void zero() { |
1575 | for (auto &d : vec) { | |
1576 | d.reset(); | |
1577 | } | |
7c673cae FG |
1578 | } |
1579 | void encode(bufferlist &bl) const; | |
11fdf7f2 TL |
1580 | void decode(bufferlist::const_iterator& p); |
1581 | void dump(Formatter *f) const; | |
9f95a23c | 1582 | static void generate_test_instances(std::list<inode_load_vec_t*>& ls); |
11fdf7f2 TL |
1583 | |
1584 | private: | |
1585 | std::array<DecayCounter, NUM> vec; | |
7c673cae | 1586 | }; |
11fdf7f2 TL |
1587 | inline void encode(const inode_load_vec_t &c, bufferlist &bl) { |
1588 | c.encode(bl); | |
7c673cae | 1589 | } |
11fdf7f2 TL |
1590 | inline void decode(inode_load_vec_t & c, bufferlist::const_iterator &p) { |
1591 | c.decode(p); | |
7c673cae FG |
1592 | } |
1593 | ||
1594 | class dirfrag_load_vec_t { | |
1595 | public: | |
11fdf7f2 TL |
1596 | using time = DecayCounter::time; |
1597 | using clock = DecayCounter::clock; | |
1598 | static const size_t NUM = 5; | |
1599 | ||
1600 | dirfrag_load_vec_t() : | |
1601 | vec{DecayCounter(DecayRate()), | |
1602 | DecayCounter(DecayRate()), | |
1603 | DecayCounter(DecayRate()), | |
1604 | DecayCounter(DecayRate()), | |
1605 | DecayCounter(DecayRate()) | |
1606 | } | |
7c673cae | 1607 | {} |
11fdf7f2 TL |
1608 | dirfrag_load_vec_t(const DecayRate &rate) : |
1609 | vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)} | |
1610 | {} | |
1611 | ||
7c673cae FG |
1612 | void encode(bufferlist &bl) const { |
1613 | ENCODE_START(2, 2, bl); | |
94b18763 | 1614 | for (const auto &i : vec) { |
11fdf7f2 | 1615 | encode(i, bl); |
94b18763 | 1616 | } |
7c673cae FG |
1617 | ENCODE_FINISH(bl); |
1618 | } | |
11fdf7f2 | 1619 | void decode(bufferlist::const_iterator &p) { |
7c673cae | 1620 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); |
94b18763 | 1621 | for (auto &i : vec) { |
11fdf7f2 | 1622 | decode(i, p); |
94b18763 | 1623 | } |
7c673cae FG |
1624 | DECODE_FINISH(p); |
1625 | } | |
7c673cae | 1626 | void dump(Formatter *f) const; |
11fdf7f2 TL |
1627 | void dump(Formatter *f, const DecayRate& rate) const; |
1628 | static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls); | |
7c673cae | 1629 | |
11fdf7f2 TL |
1630 | const DecayCounter &get(int t) const { |
1631 | return vec[t]; | |
7c673cae | 1632 | } |
11fdf7f2 TL |
1633 | DecayCounter &get(int t) { |
1634 | return vec[t]; | |
1635 | } | |
1636 | void adjust(double d) { | |
94b18763 | 1637 | for (auto &i : vec) { |
11fdf7f2 | 1638 | i.adjust(d); |
94b18763 | 1639 | } |
7c673cae | 1640 | } |
11fdf7f2 | 1641 | void zero() { |
94b18763 | 1642 | for (auto &i : vec) { |
11fdf7f2 | 1643 | i.reset(); |
94b18763 | 1644 | } |
7c673cae | 1645 | } |
28e407b8 | 1646 | double meta_load() const { |
7c673cae | 1647 | return |
11fdf7f2 TL |
1648 | 1*vec[META_POP_IRD].get() + |
1649 | 2*vec[META_POP_IWR].get() + | |
1650 | 1*vec[META_POP_READDIR].get() + | |
1651 | 2*vec[META_POP_FETCH].get() + | |
1652 | 4*vec[META_POP_STORE].get(); | |
7c673cae FG |
1653 | } |
1654 | ||
11fdf7f2 TL |
1655 | void add(dirfrag_load_vec_t& r) { |
1656 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1657 | vec[i].adjust(r.vec[i].get()); | |
7c673cae | 1658 | } |
11fdf7f2 TL |
1659 | void sub(dirfrag_load_vec_t& r) { |
1660 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1661 | vec[i].adjust(-r.vec[i].get()); | |
7c673cae FG |
1662 | } |
1663 | void scale(double f) { | |
11fdf7f2 | 1664 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) |
7c673cae FG |
1665 | vec[i].scale(f); |
1666 | } | |
11fdf7f2 TL |
1667 | |
1668 | private: | |
1669 | friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl); | |
1670 | std::array<DecayCounter, NUM> vec; | |
7c673cae FG |
1671 | }; |
1672 | ||
11fdf7f2 TL |
1673 | inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { |
1674 | c.encode(bl); | |
7c673cae | 1675 | } |
11fdf7f2 TL |
1676 | inline void decode(dirfrag_load_vec_t& c, bufferlist::const_iterator &p) { |
1677 | c.decode(p); | |
7c673cae FG |
1678 | } |
1679 | ||
28e407b8 | 1680 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl) |
7c673cae | 1681 | { |
11fdf7f2 TL |
1682 | std::ostringstream ss; |
1683 | ss << std::setprecision(1) << std::fixed | |
1684 | << "[pop" | |
1685 | " IRD:" << dl.vec[0] | |
1686 | << " IWR:" << dl.vec[1] | |
1687 | << " RDR:" << dl.vec[2] | |
1688 | << " FET:" << dl.vec[3] | |
1689 | << " STR:" << dl.vec[4] | |
1690 | << " *LOAD:" << dl.meta_load() << "]"; | |
1691 | return out << ss.str() << std::endl; | |
7c673cae FG |
1692 | } |
1693 | ||
7c673cae | 1694 | struct mds_load_t { |
11fdf7f2 TL |
1695 | using clock = dirfrag_load_vec_t::clock; |
1696 | using time = dirfrag_load_vec_t::time; | |
1697 | ||
7c673cae FG |
1698 | dirfrag_load_vec_t auth; |
1699 | dirfrag_load_vec_t all; | |
1700 | ||
11fdf7f2 TL |
1701 | mds_load_t() : auth(DecayRate()), all(DecayRate()) {} |
1702 | mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {} | |
1703 | ||
94b18763 FG |
1704 | double req_rate = 0.0; |
1705 | double cache_hit_rate = 0.0; | |
1706 | double queue_len = 0.0; | |
7c673cae | 1707 | |
94b18763 | 1708 | double cpu_load_avg = 0.0; |
7c673cae | 1709 | |
11fdf7f2 | 1710 | double mds_load() const; // defiend in MDBalancer.cc |
7c673cae | 1711 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1712 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1713 | void dump(Formatter *f) const; |
11fdf7f2 | 1714 | static void generate_test_instances(std::list<mds_load_t*>& ls); |
7c673cae | 1715 | }; |
11fdf7f2 TL |
1716 | inline void encode(const mds_load_t &c, bufferlist &bl) { |
1717 | c.encode(bl); | |
7c673cae | 1718 | } |
11fdf7f2 TL |
1719 | inline void decode(mds_load_t &c, bufferlist::const_iterator &p) { |
1720 | c.decode(p); | |
7c673cae FG |
1721 | } |
1722 | ||
28e407b8 | 1723 | inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load) |
7c673cae FG |
1724 | { |
1725 | return out << "mdsload<" << load.auth << "/" << load.all | |
1726 | << ", req " << load.req_rate | |
1727 | << ", hr " << load.cache_hit_rate | |
1728 | << ", qlen " << load.queue_len | |
1729 | << ", cpu " << load.cpu_load_avg | |
1730 | << ">"; | |
1731 | } | |
1732 | ||
1733 | class load_spread_t { | |
1734 | public: | |
11fdf7f2 TL |
1735 | using time = DecayCounter::time; |
1736 | using clock = DecayCounter::clock; | |
7c673cae | 1737 | static const int MAX = 4; |
7c673cae | 1738 | |
11fdf7f2 | 1739 | load_spread_t(const DecayRate &rate) : count(rate) |
9f95a23c TL |
1740 | {} |
1741 | ||
1742 | load_spread_t() = delete; | |
7c673cae | 1743 | |
11fdf7f2 | 1744 | double hit(int who) { |
7c673cae FG |
1745 | for (int i=0; i<n; i++) |
1746 | if (last[i] == who) | |
1747 | return count.get_last(); | |
1748 | ||
1749 | // we're new(ish) | |
1750 | last[p++] = who; | |
1751 | if (n < MAX) n++; | |
1752 | if (n == 1) return 0.0; | |
1753 | ||
1754 | if (p == MAX) p = 0; | |
1755 | ||
11fdf7f2 | 1756 | return count.hit(); |
7c673cae | 1757 | } |
11fdf7f2 TL |
1758 | double get() const { |
1759 | return count.get(); | |
7c673cae | 1760 | } |
7c673cae | 1761 | |
9f95a23c TL |
1762 | std::array<int, MAX> last = {-1, -1, -1, -1}; |
1763 | int p = 0, n = 0; | |
1764 | DecayCounter count; | |
1765 | }; | |
7c673cae FG |
1766 | |
1767 | // ================================================================ | |
1768 | typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t; | |
1769 | ||
1770 | // -- authority delegation -- | |
1771 | // directory authority types | |
1772 | // >= 0 is the auth mds | |
1773 | #define CDIR_AUTH_PARENT mds_rank_t(-1) // default | |
1774 | #define CDIR_AUTH_UNKNOWN mds_rank_t(-2) | |
1775 | #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN) | |
1776 | #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN) | |
1777 | //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2) | |
1778 | ||
1779 | class MDSCacheObjectInfo { | |
1780 | public: | |
9f95a23c TL |
1781 | void encode(bufferlist& bl) const; |
1782 | void decode(bufferlist::const_iterator& bl); | |
1783 | void dump(Formatter *f) const; | |
1784 | static void generate_test_instances(std::list<MDSCacheObjectInfo*>& ls); | |
1785 | ||
94b18763 | 1786 | inodeno_t ino = 0; |
7c673cae FG |
1787 | dirfrag_t dirfrag; |
1788 | string dname; | |
1789 | snapid_t snapid; | |
7c673cae FG |
1790 | }; |
1791 | ||
1792 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) { | |
1793 | if (info.ino) return out << info.ino << "." << info.snapid; | |
1794 | if (info.dname.length()) return out << info.dirfrag << "/" << info.dname | |
1795 | << " snap " << info.snapid; | |
1796 | return out << info.dirfrag; | |
1797 | } | |
1798 | ||
1799 | inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { | |
1800 | if (l.ino || r.ino) | |
1801 | return l.ino == r.ino && l.snapid == r.snapid; | |
1802 | else | |
1803 | return l.dirfrag == r.dirfrag && l.dname == r.dname; | |
1804 | } | |
1805 | WRITE_CLASS_ENCODER(MDSCacheObjectInfo) | |
1806 | ||
7c673cae FG |
1807 | // parse a map of keys/values. |
1808 | namespace qi = boost::spirit::qi; | |
1809 | ||
1810 | template <typename Iterator> | |
1811 | struct keys_and_values | |
1812 | : qi::grammar<Iterator, std::map<string, string>()> | |
1813 | { | |
1814 | keys_and_values() | |
1815 | : keys_and_values::base_type(query) | |
1816 | { | |
1817 | query = pair >> *(qi::lit(' ') >> pair); | |
1818 | pair = key >> '=' >> value; | |
1819 | key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); | |
e306af50 | 1820 | value = +qi::char_("a-zA-Z0-9-_."); |
7c673cae FG |
1821 | } |
1822 | qi::rule<Iterator, std::map<string, string>()> query; | |
1823 | qi::rule<Iterator, std::pair<string, string>()> pair; | |
1824 | qi::rule<Iterator, string()> key, value; | |
1825 | }; | |
1826 | ||
1827 | #endif |