]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_MDSTYPES_H | |
4 | #define CEPH_MDSTYPES_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <math.h> | |
9 | #include <ostream> | |
10 | #include <set> | |
11 | #include <map> | |
11fdf7f2 | 12 | #include <string_view> |
7c673cae FG |
13 | |
14 | #include "common/config.h" | |
15 | #include "common/Clock.h" | |
16 | #include "common/DecayCounter.h" | |
17 | #include "common/entity_name.h" | |
18 | ||
19 | #include "include/Context.h" | |
20 | #include "include/frag.h" | |
21 | #include "include/xlist.h" | |
22 | #include "include/interval_set.h" | |
23 | #include "include/compact_map.h" | |
24 | #include "include/compact_set.h" | |
25 | #include "include/fs_types.h" | |
26 | ||
27 | #include "inode_backtrace.h" | |
28 | ||
29 | #include <boost/spirit/include/qi.hpp> | |
30 | #include <boost/pool/pool.hpp> | |
11fdf7f2 | 31 | #include "include/ceph_assert.h" |
7c673cae FG |
32 | #include <boost/serialization/strong_typedef.hpp> |
33 | ||
34 | #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" | |
35 | ||
36 | #define MDS_PORT_CACHE 0x200 | |
37 | #define MDS_PORT_LOCKER 0x300 | |
38 | #define MDS_PORT_MIGRATOR 0x400 | |
39 | ||
40 | #define MAX_MDS 0x100 | |
41 | #define NUM_STRAY 10 | |
42 | ||
43 | #define MDS_INO_ROOT 1 | |
44 | ||
45 | // No longer created but recognised in existing filesystems | |
46 | // so that we don't try to fragment it. | |
47 | #define MDS_INO_CEPH 2 | |
48 | ||
11fdf7f2 TL |
49 | #define MDS_INO_GLOBAL_SNAPREALM 3 |
50 | ||
7c673cae FG |
51 | #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS) |
52 | #define MDS_INO_STRAY_OFFSET (6*MAX_MDS) | |
53 | ||
54 | // Locations for journal data | |
55 | #define MDS_INO_LOG_OFFSET (2*MAX_MDS) | |
56 | #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS) | |
57 | #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS) | |
58 | #define MDS_INO_PURGE_QUEUE (5*MAX_MDS) | |
59 | ||
60 | #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY)) | |
61 | ||
62 | #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i)))) | |
63 | #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x)) | |
64 | ||
65 | #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY))) | |
66 | #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS)) | |
67 | #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET)) | |
11fdf7f2 | 68 | #define MDS_INO_IS_BASE(i) ((i) == MDS_INO_ROOT || (i) == MDS_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i)) |
7c673cae FG |
69 | #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY)) |
70 | #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY) | |
71 | ||
72 | #define MDS_TRAVERSE_FORWARD 1 | |
73 | #define MDS_TRAVERSE_DISCOVER 2 // skips permissions checks etc. | |
74 | #define MDS_TRAVERSE_DISCOVERXLOCK 3 // succeeds on (foreign?) null, xlocked dentries. | |
75 | ||
76 | ||
77 | typedef int32_t mds_rank_t; | |
11fdf7f2 | 78 | constexpr mds_rank_t MDS_RANK_NONE = -1; |
7c673cae FG |
79 | |
80 | BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) | |
81 | extern const mds_gid_t MDS_GID_NONE; | |
11fdf7f2 TL |
82 | |
83 | typedef int32_t fs_cluster_id_t; | |
84 | constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1; | |
7c673cae | 85 | // The namespace ID of the anonymous default filesystem from legacy systems |
11fdf7f2 | 86 | constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0; |
7c673cae FG |
87 | |
88 | class mds_role_t | |
89 | { | |
90 | public: | |
91 | fs_cluster_id_t fscid; | |
92 | mds_rank_t rank; | |
93 | ||
94 | mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_) | |
95 | : fscid(fscid_), rank(rank_) | |
96 | {} | |
97 | mds_role_t() | |
98 | : fscid(FS_CLUSTER_ID_NONE), rank(MDS_RANK_NONE) | |
99 | {} | |
100 | bool operator<(mds_role_t const &rhs) const | |
101 | { | |
102 | if (fscid < rhs.fscid) { | |
103 | return true; | |
104 | } else if (fscid == rhs.fscid) { | |
105 | return rank < rhs.rank; | |
106 | } else { | |
107 | return false; | |
108 | } | |
109 | } | |
110 | ||
111 | bool is_none() const | |
112 | { | |
113 | return (rank == MDS_RANK_NONE); | |
114 | } | |
115 | }; | |
116 | std::ostream& operator<<(std::ostream &out, const mds_role_t &role); | |
117 | ||
118 | ||
119 | // CAPS | |
120 | ||
121 | inline string gcap_string(int cap) | |
122 | { | |
123 | string s; | |
124 | if (cap & CEPH_CAP_GSHARED) s += "s"; | |
125 | if (cap & CEPH_CAP_GEXCL) s += "x"; | |
126 | if (cap & CEPH_CAP_GCACHE) s += "c"; | |
127 | if (cap & CEPH_CAP_GRD) s += "r"; | |
128 | if (cap & CEPH_CAP_GWR) s += "w"; | |
129 | if (cap & CEPH_CAP_GBUFFER) s += "b"; | |
130 | if (cap & CEPH_CAP_GWREXTEND) s += "a"; | |
131 | if (cap & CEPH_CAP_GLAZYIO) s += "l"; | |
132 | return s; | |
133 | } | |
134 | inline string ccap_string(int cap) | |
135 | { | |
136 | string s; | |
137 | if (cap & CEPH_CAP_PIN) s += "p"; | |
138 | ||
139 | int a = (cap >> CEPH_CAP_SAUTH) & 3; | |
140 | if (a) s += 'A' + gcap_string(a); | |
141 | ||
142 | a = (cap >> CEPH_CAP_SLINK) & 3; | |
143 | if (a) s += 'L' + gcap_string(a); | |
144 | ||
145 | a = (cap >> CEPH_CAP_SXATTR) & 3; | |
146 | if (a) s += 'X' + gcap_string(a); | |
147 | ||
148 | a = cap >> CEPH_CAP_SFILE; | |
149 | if (a) s += 'F' + gcap_string(a); | |
150 | ||
151 | if (s.length() == 0) | |
152 | s = "-"; | |
153 | return s; | |
154 | } | |
155 | ||
156 | ||
157 | struct scatter_info_t { | |
94b18763 | 158 | version_t version = 0; |
7c673cae | 159 | |
94b18763 | 160 | scatter_info_t() {} |
7c673cae FG |
161 | }; |
162 | ||
163 | struct frag_info_t : public scatter_info_t { | |
164 | // this frag | |
165 | utime_t mtime; | |
94b18763 FG |
166 | uint64_t change_attr = 0; |
167 | int64_t nfiles = 0; // files | |
168 | int64_t nsubdirs = 0; // subdirs | |
7c673cae | 169 | |
94b18763 | 170 | frag_info_t() {} |
7c673cae FG |
171 | |
172 | int64_t size() const { return nfiles + nsubdirs; } | |
173 | ||
174 | void zero() { | |
175 | *this = frag_info_t(); | |
176 | } | |
177 | ||
178 | // *this += cur - acc; | |
179 | void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { | |
180 | if (cur.mtime > mtime) { | |
181 | mtime = cur.mtime; | |
182 | if (touched_mtime) | |
183 | *touched_mtime = true; | |
184 | } | |
185 | if (cur.change_attr > change_attr) { | |
186 | change_attr = cur.change_attr; | |
187 | if (touched_chattr) | |
188 | *touched_chattr = true; | |
189 | } | |
190 | nfiles += cur.nfiles - acc.nfiles; | |
191 | nsubdirs += cur.nsubdirs - acc.nsubdirs; | |
192 | } | |
193 | ||
194 | void add(const frag_info_t& other) { | |
195 | if (other.mtime > mtime) | |
196 | mtime = other.mtime; | |
197 | if (other.change_attr > change_attr) | |
198 | change_attr = other.change_attr; | |
199 | nfiles += other.nfiles; | |
200 | nsubdirs += other.nsubdirs; | |
201 | } | |
202 | ||
203 | bool same_sums(const frag_info_t &o) const { | |
204 | return mtime <= o.mtime && | |
205 | nfiles == o.nfiles && | |
206 | nsubdirs == o.nsubdirs; | |
207 | } | |
208 | ||
209 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 210 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
211 | void dump(Formatter *f) const; |
212 | static void generate_test_instances(list<frag_info_t*>& ls); | |
213 | }; | |
214 | WRITE_CLASS_ENCODER(frag_info_t) | |
215 | ||
216 | inline bool operator==(const frag_info_t &l, const frag_info_t &r) { | |
217 | return memcmp(&l, &r, sizeof(l)) == 0; | |
218 | } | |
219 | inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { | |
220 | return !(l == r); | |
221 | } | |
222 | ||
223 | std::ostream& operator<<(std::ostream &out, const frag_info_t &f); | |
224 | ||
225 | ||
226 | struct nest_info_t : public scatter_info_t { | |
227 | // this frag + children | |
228 | utime_t rctime; | |
94b18763 FG |
229 | int64_t rbytes = 0; |
230 | int64_t rfiles = 0; | |
231 | int64_t rsubdirs = 0; | |
7c673cae FG |
232 | int64_t rsize() const { return rfiles + rsubdirs; } |
233 | ||
11fdf7f2 | 234 | int64_t rsnaps = 0; |
7c673cae | 235 | |
94b18763 | 236 | nest_info_t() {} |
7c673cae FG |
237 | |
238 | void zero() { | |
239 | *this = nest_info_t(); | |
240 | } | |
241 | ||
242 | void sub(const nest_info_t &other) { | |
243 | add(other, -1); | |
244 | } | |
245 | void add(const nest_info_t &other, int fac=1) { | |
246 | if (other.rctime > rctime) | |
247 | rctime = other.rctime; | |
248 | rbytes += fac*other.rbytes; | |
249 | rfiles += fac*other.rfiles; | |
250 | rsubdirs += fac*other.rsubdirs; | |
11fdf7f2 | 251 | rsnaps += fac*other.rsnaps; |
7c673cae FG |
252 | } |
253 | ||
254 | // *this += cur - acc; | |
255 | void add_delta(const nest_info_t &cur, const nest_info_t &acc) { | |
256 | if (cur.rctime > rctime) | |
257 | rctime = cur.rctime; | |
258 | rbytes += cur.rbytes - acc.rbytes; | |
259 | rfiles += cur.rfiles - acc.rfiles; | |
260 | rsubdirs += cur.rsubdirs - acc.rsubdirs; | |
11fdf7f2 | 261 | rsnaps += cur.rsnaps - acc.rsnaps; |
7c673cae FG |
262 | } |
263 | ||
264 | bool same_sums(const nest_info_t &o) const { | |
265 | return rctime <= o.rctime && | |
266 | rbytes == o.rbytes && | |
267 | rfiles == o.rfiles && | |
268 | rsubdirs == o.rsubdirs && | |
11fdf7f2 | 269 | rsnaps == o.rsnaps; |
7c673cae FG |
270 | } |
271 | ||
272 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 273 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
274 | void dump(Formatter *f) const; |
275 | static void generate_test_instances(list<nest_info_t*>& ls); | |
276 | }; | |
277 | WRITE_CLASS_ENCODER(nest_info_t) | |
278 | ||
279 | inline bool operator==(const nest_info_t &l, const nest_info_t &r) { | |
280 | return memcmp(&l, &r, sizeof(l)) == 0; | |
281 | } | |
282 | inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { | |
283 | return !(l == r); | |
284 | } | |
285 | ||
286 | std::ostream& operator<<(std::ostream &out, const nest_info_t &n); | |
287 | ||
288 | ||
289 | struct vinodeno_t { | |
290 | inodeno_t ino; | |
291 | snapid_t snapid; | |
292 | vinodeno_t() {} | |
293 | vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} | |
294 | ||
295 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
296 | using ceph::encode; |
297 | encode(ino, bl); | |
298 | encode(snapid, bl); | |
7c673cae | 299 | } |
11fdf7f2 TL |
300 | void decode(bufferlist::const_iterator& p) { |
301 | using ceph::decode; | |
302 | decode(ino, p); | |
303 | decode(snapid, p); | |
7c673cae FG |
304 | } |
305 | }; | |
306 | WRITE_CLASS_ENCODER(vinodeno_t) | |
307 | ||
308 | inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { | |
309 | return l.ino == r.ino && l.snapid == r.snapid; | |
310 | } | |
311 | inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { | |
312 | return !(l == r); | |
313 | } | |
314 | inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { | |
315 | return | |
316 | l.ino < r.ino || | |
317 | (l.ino == r.ino && l.snapid < r.snapid); | |
318 | } | |
319 | ||
320 | struct quota_info_t | |
321 | { | |
94b18763 FG |
322 | int64_t max_bytes = 0; |
323 | int64_t max_files = 0; | |
7c673cae | 324 | |
94b18763 | 325 | quota_info_t() {} |
7c673cae FG |
326 | |
327 | void encode(bufferlist& bl) const { | |
328 | ENCODE_START(1, 1, bl); | |
11fdf7f2 TL |
329 | encode(max_bytes, bl); |
330 | encode(max_files, bl); | |
7c673cae FG |
331 | ENCODE_FINISH(bl); |
332 | } | |
11fdf7f2 | 333 | void decode(bufferlist::const_iterator& p) { |
7c673cae | 334 | DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); |
11fdf7f2 TL |
335 | decode(max_bytes, p); |
336 | decode(max_files, p); | |
7c673cae FG |
337 | DECODE_FINISH(p); |
338 | } | |
339 | ||
340 | void dump(Formatter *f) const; | |
341 | static void generate_test_instances(list<quota_info_t *>& ls); | |
342 | ||
343 | bool is_valid() const { | |
344 | return max_bytes >=0 && max_files >=0; | |
345 | } | |
346 | bool is_enable() const { | |
347 | return max_bytes || max_files; | |
348 | } | |
349 | }; | |
350 | WRITE_CLASS_ENCODER(quota_info_t) | |
351 | ||
352 | inline bool operator==(const quota_info_t &l, const quota_info_t &r) { | |
353 | return memcmp(&l, &r, sizeof(l)) == 0; | |
354 | } | |
355 | ||
356 | ostream& operator<<(ostream &out, const quota_info_t &n); | |
357 | ||
358 | namespace std { | |
359 | template<> struct hash<vinodeno_t> { | |
360 | size_t operator()(const vinodeno_t &vino) const { | |
361 | hash<inodeno_t> H; | |
362 | hash<uint64_t> I; | |
363 | return H(vino.ino) ^ I(vino.snapid); | |
364 | } | |
365 | }; | |
366 | } // namespace std | |
367 | ||
368 | ||
369 | ||
370 | ||
371 | inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { | |
372 | out << vino.ino; | |
373 | if (vino.snapid == CEPH_NOSNAP) | |
374 | out << ".head"; | |
375 | else if (vino.snapid) | |
376 | out << '.' << vino.snapid; | |
377 | return out; | |
378 | } | |
379 | ||
380 | ||
381 | /* | |
382 | * client_writeable_range_t | |
383 | */ | |
384 | struct client_writeable_range_t { | |
385 | struct byte_range_t { | |
94b18763 FG |
386 | uint64_t first = 0, last = 0; // interval client can write to |
387 | byte_range_t() {} | |
7c673cae FG |
388 | }; |
389 | ||
390 | byte_range_t range; | |
94b18763 | 391 | snapid_t follows = 0; // aka "data+metadata flushed thru" |
7c673cae | 392 | |
94b18763 | 393 | client_writeable_range_t() {} |
7c673cae FG |
394 | |
395 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 396 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 397 | void dump(Formatter *f) const; |
94b18763 | 398 | static void generate_test_instances(std::list<client_writeable_range_t*>& ls); |
7c673cae FG |
399 | }; |
400 | ||
11fdf7f2 TL |
401 | inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::const_iterator& bl) { |
402 | decode(range.first, bl); | |
403 | decode(range.last, bl); | |
7c673cae FG |
404 | } |
405 | ||
406 | WRITE_CLASS_ENCODER(client_writeable_range_t) | |
407 | ||
408 | std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); | |
409 | ||
410 | inline bool operator==(const client_writeable_range_t& l, | |
411 | const client_writeable_range_t& r) { | |
412 | return l.range.first == r.range.first && l.range.last == r.range.last && | |
413 | l.follows == r.follows; | |
414 | } | |
415 | ||
416 | struct inline_data_t { | |
417 | private: | |
418 | std::unique_ptr<bufferlist> blp; | |
419 | public: | |
94b18763 | 420 | version_t version = 1; |
7c673cae FG |
421 | |
422 | void free_data() { | |
423 | blp.reset(); | |
424 | } | |
425 | bufferlist& get_data() { | |
426 | if (!blp) | |
427 | blp.reset(new bufferlist); | |
428 | return *blp; | |
429 | } | |
430 | size_t length() const { return blp ? blp->length() : 0; } | |
431 | ||
94b18763 | 432 | inline_data_t() {} |
7c673cae FG |
433 | inline_data_t(const inline_data_t& o) : version(o.version) { |
434 | if (o.blp) | |
435 | get_data() = *o.blp; | |
436 | } | |
437 | inline_data_t& operator=(const inline_data_t& o) { | |
438 | version = o.version; | |
439 | if (o.blp) | |
440 | get_data() = *o.blp; | |
441 | else | |
442 | free_data(); | |
443 | return *this; | |
444 | } | |
445 | bool operator==(const inline_data_t& o) const { | |
446 | return length() == o.length() && | |
447 | (length() == 0 || | |
448 | (*const_cast<bufferlist*>(blp.get()) == *const_cast<bufferlist*>(o.blp.get()))); | |
449 | } | |
450 | bool operator!=(const inline_data_t& o) const { | |
451 | return !(*this == o); | |
452 | } | |
453 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 454 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
455 | }; |
456 | WRITE_CLASS_ENCODER(inline_data_t) | |
457 | ||
458 | enum { | |
459 | DAMAGE_STATS, // statistics (dirstat, size, etc) | |
460 | DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) | |
461 | DAMAGE_FRAGTREE // fragtree -- repair by searching | |
462 | }; | |
463 | typedef uint32_t damage_flags_t; | |
464 | ||
465 | /* | |
466 | * inode_t | |
467 | */ | |
94b18763 | 468 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
469 | struct inode_t { |
470 | /** | |
471 | * *************** | |
472 | * Do not forget to add any new fields to the compare() function. | |
473 | * *************** | |
474 | */ | |
475 | // base (immutable) | |
94b18763 FG |
476 | inodeno_t ino = 0; |
477 | uint32_t rdev = 0; // if special file | |
7c673cae FG |
478 | |
479 | // affected by any inode change... | |
480 | utime_t ctime; // inode change time | |
481 | utime_t btime; // birth time | |
482 | ||
483 | // perm (namespace permissions) | |
94b18763 FG |
484 | uint32_t mode = 0; |
485 | uid_t uid = 0; | |
486 | gid_t gid = 0; | |
7c673cae FG |
487 | |
488 | // nlink | |
94b18763 | 489 | int32_t nlink = 0; |
7c673cae FG |
490 | |
491 | // file (data access) | |
492 | ceph_dir_layout dir_layout; // [dir only] | |
493 | file_layout_t layout; | |
94b18763 FG |
494 | compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; |
495 | uint64_t size = 0; // on directory, # dentries | |
496 | uint64_t max_size_ever = 0; // max size the file has ever been | |
497 | uint32_t truncate_seq = 0; | |
498 | uint64_t truncate_size = 0, truncate_from = 0; | |
499 | uint32_t truncate_pending = 0; | |
7c673cae FG |
500 | utime_t mtime; // file data modify time. |
501 | utime_t atime; // file data access time. | |
94b18763 FG |
502 | uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) |
503 | inline_data_t inline_data; // FIXME check | |
7c673cae FG |
504 | |
505 | // change attribute | |
94b18763 | 506 | uint64_t change_attr = 0; |
7c673cae | 507 | |
94b18763 FG |
508 | using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; |
509 | client_range_map client_ranges; // client(s) can write to these ranges | |
7c673cae FG |
510 | |
511 | // dirfrag, recursive accountin | |
512 | frag_info_t dirstat; // protected by my filelock | |
513 | nest_info_t rstat; // protected by my nestlock | |
514 | nest_info_t accounted_rstat; // protected by parent's nestlock | |
515 | ||
516 | quota_info_t quota; | |
517 | ||
94b18763 | 518 | mds_rank_t export_pin = MDS_RANK_NONE; |
7c673cae FG |
519 | |
520 | // special stuff | |
94b18763 FG |
521 | version_t version = 0; // auth only |
522 | version_t file_data_version = 0; // auth only | |
523 | version_t xattr_version = 0; | |
7c673cae FG |
524 | |
525 | utime_t last_scrub_stamp; // start time of last complete scrub | |
94b18763 | 526 | version_t last_scrub_version = 0;// (parent) start version of last complete scrub |
7c673cae | 527 | |
94b18763 | 528 | version_t backtrace_version = 0; |
7c673cae FG |
529 | |
530 | snapid_t oldest_snap; | |
531 | ||
94b18763 FG |
532 | std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink |
533 | ||
534 | inode_t() | |
535 | { | |
7c673cae FG |
536 | clear_layout(); |
537 | memset(&dir_layout, 0, sizeof(dir_layout)); | |
7c673cae FG |
538 | } |
539 | ||
540 | // file type | |
541 | bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } | |
542 | bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } | |
543 | bool is_file() const { return (mode & S_IFMT) == S_IFREG; } | |
544 | ||
545 | bool is_truncating() const { return (truncate_pending > 0); } | |
546 | void truncate(uint64_t old_size, uint64_t new_size) { | |
11fdf7f2 | 547 | ceph_assert(new_size < old_size); |
7c673cae FG |
548 | if (old_size > max_size_ever) |
549 | max_size_ever = old_size; | |
550 | truncate_from = old_size; | |
551 | size = new_size; | |
552 | rstat.rbytes = new_size; | |
553 | truncate_size = size; | |
554 | truncate_seq++; | |
555 | truncate_pending++; | |
556 | } | |
557 | ||
558 | bool has_layout() const { | |
559 | return layout != file_layout_t(); | |
560 | } | |
561 | ||
562 | void clear_layout() { | |
563 | layout = file_layout_t(); | |
564 | } | |
565 | ||
566 | uint64_t get_layout_size_increment() const { | |
567 | return layout.get_period(); | |
568 | } | |
569 | ||
570 | bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } | |
571 | ||
572 | uint64_t get_max_size() const { | |
573 | uint64_t max = 0; | |
574 | for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); | |
575 | p != client_ranges.end(); | |
576 | ++p) | |
577 | if (p->second.range.last > max) | |
578 | max = p->second.range.last; | |
579 | return max; | |
580 | } | |
581 | void set_max_size(uint64_t new_max) { | |
582 | if (new_max == 0) { | |
583 | client_ranges.clear(); | |
584 | } else { | |
585 | for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); | |
586 | p != client_ranges.end(); | |
587 | ++p) | |
588 | p->second.range.last = new_max; | |
589 | } | |
590 | } | |
591 | ||
592 | void trim_client_ranges(snapid_t last) { | |
593 | std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); | |
594 | while (p != client_ranges.end()) { | |
595 | if (p->second.follows >= last) | |
596 | client_ranges.erase(p++); | |
597 | else | |
598 | ++p; | |
599 | } | |
600 | } | |
601 | ||
602 | bool is_backtrace_updated() const { | |
603 | return backtrace_version == version; | |
604 | } | |
605 | void update_backtrace(version_t pv=0) { | |
606 | backtrace_version = pv ? pv : version; | |
607 | } | |
608 | ||
609 | void add_old_pool(int64_t l) { | |
610 | backtrace_version = version; | |
611 | old_pools.insert(l); | |
612 | } | |
613 | ||
614 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 615 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 616 | void dump(Formatter *f) const; |
94b18763 | 617 | static void generate_test_instances(std::list<inode_t*>& ls); |
7c673cae FG |
618 | /** |
619 | * Compare this inode_t with another that represent *the same inode* | |
620 | * at different points in time. | |
621 | * @pre The inodes are the same ino | |
622 | * | |
623 | * @param other The inode_t to compare ourselves with | |
624 | * @param divergent A bool pointer which will be set to true | |
625 | * if the values are different in a way that can't be explained | |
626 | * by one being a newer version than the other. | |
627 | * | |
628 | * @returns 1 if we are newer than the other, 0 if equal, -1 if older. | |
629 | */ | |
630 | int compare(const inode_t &other, bool *divergent) const; | |
631 | private: | |
632 | bool older_is_consistent(const inode_t &other) const; | |
633 | }; | |
7c673cae | 634 | |
94b18763 FG |
635 | // These methods may be moved back to mdstypes.cc when we have pmr |
636 | template<template<typename> class Allocator> | |
637 | void inode_t<Allocator>::encode(bufferlist &bl, uint64_t features) const | |
638 | { | |
639 | ENCODE_START(15, 6, bl); | |
640 | ||
11fdf7f2 TL |
641 | encode(ino, bl); |
642 | encode(rdev, bl); | |
643 | encode(ctime, bl); | |
94b18763 | 644 | |
11fdf7f2 TL |
645 | encode(mode, bl); |
646 | encode(uid, bl); | |
647 | encode(gid, bl); | |
94b18763 | 648 | |
11fdf7f2 | 649 | encode(nlink, bl); |
94b18763 FG |
650 | { |
651 | // removed field | |
652 | bool anchored = 0; | |
11fdf7f2 | 653 | encode(anchored, bl); |
94b18763 FG |
654 | } |
655 | ||
11fdf7f2 TL |
656 | encode(dir_layout, bl); |
657 | encode(layout, bl, features); | |
658 | encode(size, bl); | |
659 | encode(truncate_seq, bl); | |
660 | encode(truncate_size, bl); | |
661 | encode(truncate_from, bl); | |
662 | encode(truncate_pending, bl); | |
663 | encode(mtime, bl); | |
664 | encode(atime, bl); | |
665 | encode(time_warp_seq, bl); | |
666 | encode(client_ranges, bl); | |
94b18763 | 667 | |
11fdf7f2 TL |
668 | encode(dirstat, bl); |
669 | encode(rstat, bl); | |
670 | encode(accounted_rstat, bl); | |
94b18763 | 671 | |
11fdf7f2 TL |
672 | encode(version, bl); |
673 | encode(file_data_version, bl); | |
674 | encode(xattr_version, bl); | |
675 | encode(backtrace_version, bl); | |
676 | encode(old_pools, bl); | |
677 | encode(max_size_ever, bl); | |
678 | encode(inline_data, bl); | |
679 | encode(quota, bl); | |
94b18763 | 680 | |
11fdf7f2 | 681 | encode(stray_prior_path, bl); |
94b18763 | 682 | |
11fdf7f2 TL |
683 | encode(last_scrub_version, bl); |
684 | encode(last_scrub_stamp, bl); | |
94b18763 | 685 | |
11fdf7f2 TL |
686 | encode(btime, bl); |
687 | encode(change_attr, bl); | |
94b18763 | 688 | |
11fdf7f2 | 689 | encode(export_pin, bl); |
94b18763 FG |
690 | |
691 | ENCODE_FINISH(bl); | |
692 | } | |
693 | ||
694 | template<template<typename> class Allocator> | |
11fdf7f2 | 695 | void inode_t<Allocator>::decode(bufferlist::const_iterator &p) |
94b18763 FG |
696 | { |
697 | DECODE_START_LEGACY_COMPAT_LEN(15, 6, 6, p); | |
698 | ||
11fdf7f2 TL |
699 | decode(ino, p); |
700 | decode(rdev, p); | |
701 | decode(ctime, p); | |
94b18763 | 702 | |
11fdf7f2 TL |
703 | decode(mode, p); |
704 | decode(uid, p); | |
705 | decode(gid, p); | |
94b18763 | 706 | |
11fdf7f2 | 707 | decode(nlink, p); |
94b18763 FG |
708 | { |
709 | bool anchored; | |
11fdf7f2 | 710 | decode(anchored, p); |
94b18763 FG |
711 | } |
712 | ||
713 | if (struct_v >= 4) | |
11fdf7f2 | 714 | decode(dir_layout, p); |
94b18763 FG |
715 | else |
716 | memset(&dir_layout, 0, sizeof(dir_layout)); | |
11fdf7f2 TL |
717 | decode(layout, p); |
718 | decode(size, p); | |
719 | decode(truncate_seq, p); | |
720 | decode(truncate_size, p); | |
721 | decode(truncate_from, p); | |
94b18763 | 722 | if (struct_v >= 5) |
11fdf7f2 | 723 | decode(truncate_pending, p); |
94b18763 FG |
724 | else |
725 | truncate_pending = 0; | |
11fdf7f2 TL |
726 | decode(mtime, p); |
727 | decode(atime, p); | |
728 | decode(time_warp_seq, p); | |
94b18763 | 729 | if (struct_v >= 3) { |
11fdf7f2 | 730 | decode(client_ranges, p); |
94b18763 FG |
731 | } else { |
732 | map<client_t, client_writeable_range_t::byte_range_t> m; | |
11fdf7f2 | 733 | decode(m, p); |
94b18763 FG |
734 | for (map<client_t, client_writeable_range_t::byte_range_t>::iterator |
735 | q = m.begin(); q != m.end(); ++q) | |
736 | client_ranges[q->first].range = q->second; | |
737 | } | |
738 | ||
11fdf7f2 TL |
739 | decode(dirstat, p); |
740 | decode(rstat, p); | |
741 | decode(accounted_rstat, p); | |
94b18763 | 742 | |
11fdf7f2 TL |
743 | decode(version, p); |
744 | decode(file_data_version, p); | |
745 | decode(xattr_version, p); | |
94b18763 | 746 | if (struct_v >= 2) |
11fdf7f2 | 747 | decode(backtrace_version, p); |
94b18763 | 748 | if (struct_v >= 7) |
11fdf7f2 | 749 | decode(old_pools, p); |
94b18763 | 750 | if (struct_v >= 8) |
11fdf7f2 | 751 | decode(max_size_ever, p); |
94b18763 | 752 | if (struct_v >= 9) { |
11fdf7f2 | 753 | decode(inline_data, p); |
94b18763 FG |
754 | } else { |
755 | inline_data.version = CEPH_INLINE_NONE; | |
756 | } | |
757 | if (struct_v < 10) | |
758 | backtrace_version = 0; // force update backtrace | |
759 | if (struct_v >= 11) | |
11fdf7f2 | 760 | decode(quota, p); |
94b18763 FG |
761 | |
762 | if (struct_v >= 12) { | |
763 | std::string tmp; | |
11fdf7f2 TL |
764 | decode(tmp, p); |
765 | stray_prior_path = std::string_view(tmp); | |
94b18763 FG |
766 | } |
767 | ||
768 | if (struct_v >= 13) { | |
11fdf7f2 TL |
769 | decode(last_scrub_version, p); |
770 | decode(last_scrub_stamp, p); | |
94b18763 FG |
771 | } |
772 | if (struct_v >= 14) { | |
11fdf7f2 TL |
773 | decode(btime, p); |
774 | decode(change_attr, p); | |
94b18763 FG |
775 | } else { |
776 | btime = utime_t(); | |
777 | change_attr = 0; | |
778 | } | |
779 | ||
780 | if (struct_v >= 15) { | |
11fdf7f2 | 781 | decode(export_pin, p); |
94b18763 FG |
782 | } else { |
783 | export_pin = MDS_RANK_NONE; | |
784 | } | |
785 | ||
786 | DECODE_FINISH(p); | |
787 | } | |
788 | ||
789 | template<template<typename> class Allocator> | |
790 | void inode_t<Allocator>::dump(Formatter *f) const | |
791 | { | |
792 | f->dump_unsigned("ino", ino); | |
793 | f->dump_unsigned("rdev", rdev); | |
794 | f->dump_stream("ctime") << ctime; | |
795 | f->dump_stream("btime") << btime; | |
796 | f->dump_unsigned("mode", mode); | |
797 | f->dump_unsigned("uid", uid); | |
798 | f->dump_unsigned("gid", gid); | |
799 | f->dump_unsigned("nlink", nlink); | |
800 | ||
801 | f->open_object_section("dir_layout"); | |
802 | ::dump(dir_layout, f); | |
803 | f->close_section(); | |
804 | ||
805 | f->dump_object("layout", layout); | |
806 | ||
807 | f->open_array_section("old_pools"); | |
808 | for (const auto &p : old_pools) { | |
809 | f->dump_int("pool", p); | |
810 | } | |
811 | f->close_section(); | |
812 | ||
813 | f->dump_unsigned("size", size); | |
814 | f->dump_unsigned("truncate_seq", truncate_seq); | |
815 | f->dump_unsigned("truncate_size", truncate_size); | |
816 | f->dump_unsigned("truncate_from", truncate_from); | |
817 | f->dump_unsigned("truncate_pending", truncate_pending); | |
818 | f->dump_stream("mtime") << mtime; | |
819 | f->dump_stream("atime") << atime; | |
820 | f->dump_unsigned("time_warp_seq", time_warp_seq); | |
821 | f->dump_unsigned("change_attr", change_attr); | |
822 | f->dump_int("export_pin", export_pin); | |
823 | ||
824 | f->open_array_section("client_ranges"); | |
825 | for (const auto &p : client_ranges) { | |
826 | f->open_object_section("client"); | |
827 | f->dump_unsigned("client", p.first.v); | |
828 | p.second.dump(f); | |
829 | f->close_section(); | |
830 | } | |
831 | f->close_section(); | |
832 | ||
833 | f->open_object_section("dirstat"); | |
834 | dirstat.dump(f); | |
835 | f->close_section(); | |
836 | ||
837 | f->open_object_section("rstat"); | |
838 | rstat.dump(f); | |
839 | f->close_section(); | |
840 | ||
841 | f->open_object_section("accounted_rstat"); | |
842 | accounted_rstat.dump(f); | |
843 | f->close_section(); | |
844 | ||
845 | f->dump_unsigned("version", version); | |
846 | f->dump_unsigned("file_data_version", file_data_version); | |
847 | f->dump_unsigned("xattr_version", xattr_version); | |
848 | f->dump_unsigned("backtrace_version", backtrace_version); | |
849 | ||
850 | f->dump_string("stray_prior_path", stray_prior_path); | |
851 | } | |
852 | ||
853 | template<template<typename> class Allocator> | |
854 | void inode_t<Allocator>::generate_test_instances(list<inode_t*>& ls) | |
855 | { | |
856 | ls.push_back(new inode_t<Allocator>); | |
857 | ls.push_back(new inode_t<Allocator>); | |
858 | ls.back()->ino = 1; | |
859 | // i am lazy. | |
860 | } | |
861 | ||
862 | template<template<typename> class Allocator> | |
863 | int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const | |
864 | { | |
11fdf7f2 | 865 | ceph_assert(ino == other.ino); |
94b18763 FG |
866 | *divergent = false; |
867 | if (version == other.version) { | |
868 | if (rdev != other.rdev || | |
869 | ctime != other.ctime || | |
870 | btime != other.btime || | |
871 | mode != other.mode || | |
872 | uid != other.uid || | |
873 | gid != other.gid || | |
874 | nlink != other.nlink || | |
875 | memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || | |
876 | layout != other.layout || | |
877 | old_pools != other.old_pools || | |
878 | size != other.size || | |
879 | max_size_ever != other.max_size_ever || | |
880 | truncate_seq != other.truncate_seq || | |
881 | truncate_size != other.truncate_size || | |
882 | truncate_from != other.truncate_from || | |
883 | truncate_pending != other.truncate_pending || | |
884 | change_attr != other.change_attr || | |
885 | mtime != other.mtime || | |
886 | atime != other.atime || | |
887 | time_warp_seq != other.time_warp_seq || | |
888 | inline_data != other.inline_data || | |
889 | client_ranges != other.client_ranges || | |
890 | !(dirstat == other.dirstat) || | |
891 | !(rstat == other.rstat) || | |
892 | !(accounted_rstat == other.accounted_rstat) || | |
893 | file_data_version != other.file_data_version || | |
894 | xattr_version != other.xattr_version || | |
895 | backtrace_version != other.backtrace_version) { | |
896 | *divergent = true; | |
897 | } | |
898 | return 0; | |
899 | } else if (version > other.version) { | |
900 | *divergent = !older_is_consistent(other); | |
901 | return 1; | |
902 | } else { | |
11fdf7f2 | 903 | ceph_assert(version < other.version); |
94b18763 FG |
904 | *divergent = !other.older_is_consistent(*this); |
905 | return -1; | |
906 | } | |
907 | } | |
908 | ||
909 | template<template<typename> class Allocator> | |
910 | bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const | |
911 | { | |
912 | if (max_size_ever < other.max_size_ever || | |
913 | truncate_seq < other.truncate_seq || | |
914 | time_warp_seq < other.time_warp_seq || | |
915 | inline_data.version < other.inline_data.version || | |
916 | dirstat.version < other.dirstat.version || | |
917 | rstat.version < other.rstat.version || | |
918 | accounted_rstat.version < other.accounted_rstat.version || | |
919 | file_data_version < other.file_data_version || | |
920 | xattr_version < other.xattr_version || | |
921 | backtrace_version < other.backtrace_version) { | |
922 | return false; | |
923 | } | |
924 | return true; | |
925 | } | |
926 | ||
927 | template<template<typename> class Allocator> | |
928 | inline void encode(const inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
929 | { | |
930 | ENCODE_DUMP_PRE(); | |
931 | c.encode(bl, features); | |
932 | ENCODE_DUMP_POST(cl); | |
933 | } | |
934 | template<template<typename> class Allocator> | |
11fdf7f2 | 935 | inline void decode(inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
936 | { |
937 | c.decode(p); | |
938 | } | |
939 | ||
940 | template<template<typename> class Allocator> | |
941 | using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>; | |
942 | ||
943 | template<template<typename> class Allocator> | |
944 | using xattr_map = compact_map<alloc_string<Allocator>, bufferptr, std::less<alloc_string<Allocator>>, Allocator<std::pair<const alloc_string<Allocator>, bufferptr>>>; // FIXME bufferptr not in mempool | |
7c673cae FG |
945 | |
946 | /* | |
947 | * old_inode_t | |
948 | */ | |
94b18763 | 949 | template<template<typename> class Allocator = std::allocator> |
7c673cae FG |
950 | struct old_inode_t { |
951 | snapid_t first; | |
94b18763 FG |
952 | inode_t<Allocator> inode; |
953 | xattr_map<Allocator> xattrs; | |
7c673cae FG |
954 | |
955 | void encode(bufferlist &bl, uint64_t features) const; | |
11fdf7f2 | 956 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 957 | void dump(Formatter *f) const; |
94b18763 | 958 | static void generate_test_instances(std::list<old_inode_t*>& ls); |
7c673cae | 959 | }; |
94b18763 FG |
960 | |
961 | // These methods may be moved back to mdstypes.cc when we have pmr | |
962 | template<template<typename> class Allocator> | |
963 | void old_inode_t<Allocator>::encode(bufferlist& bl, uint64_t features) const | |
964 | { | |
965 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
966 | encode(first, bl); |
967 | encode(inode, bl, features); | |
968 | encode(xattrs, bl); | |
94b18763 FG |
969 | ENCODE_FINISH(bl); |
970 | } | |
971 | ||
972 | template<template<typename> class Allocator> | |
11fdf7f2 | 973 | void old_inode_t<Allocator>::decode(bufferlist::const_iterator& bl) |
94b18763 FG |
974 | { |
975 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
976 | decode(first, bl); |
977 | decode(inode, bl); | |
978 | decode(xattrs, bl); | |
94b18763 FG |
979 | DECODE_FINISH(bl); |
980 | } | |
981 | ||
982 | template<template<typename> class Allocator> | |
983 | void old_inode_t<Allocator>::dump(Formatter *f) const | |
984 | { | |
985 | f->dump_unsigned("first", first); | |
986 | inode.dump(f); | |
987 | f->open_object_section("xattrs"); | |
988 | for (const auto &p : xattrs) { | |
989 | std::string v(p.second.c_str(), p.second.length()); | |
990 | f->dump_string(p.first.c_str(), v); | |
991 | } | |
992 | f->close_section(); | |
993 | } | |
994 | ||
995 | template<template<typename> class Allocator> | |
996 | void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls) | |
997 | { | |
998 | ls.push_back(new old_inode_t<Allocator>); | |
999 | ls.push_back(new old_inode_t<Allocator>); | |
1000 | ls.back()->first = 2; | |
1001 | std::list<inode_t<Allocator>*> ils; | |
1002 | inode_t<Allocator>::generate_test_instances(ils); | |
1003 | ls.back()->inode = *ils.back(); | |
1004 | ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); | |
1005 | ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); | |
1006 | } | |
1007 | ||
1008 | template<template<typename> class Allocator> | |
1009 | inline void encode(const old_inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
1010 | { | |
1011 | ENCODE_DUMP_PRE(); | |
1012 | c.encode(bl, features); | |
1013 | ENCODE_DUMP_POST(cl); | |
1014 | } | |
1015 | template<template<typename> class Allocator> | |
11fdf7f2 | 1016 | inline void decode(old_inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) |
94b18763 FG |
1017 | { |
1018 | c.decode(p); | |
1019 | } | |
7c673cae FG |
1020 | |
1021 | ||
1022 | /* | |
1023 | * like an inode, but for a dir frag | |
1024 | */ | |
1025 | struct fnode_t { | |
94b18763 | 1026 | version_t version = 0; |
7c673cae FG |
1027 | snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru |
1028 | frag_info_t fragstat, accounted_fragstat; | |
1029 | nest_info_t rstat, accounted_rstat; | |
94b18763 | 1030 | damage_flags_t damage_flags = 0; |
7c673cae FG |
1031 | |
1032 | // we know we and all our descendants have been scrubbed since this version | |
94b18763 | 1033 | version_t recursive_scrub_version = 0; |
7c673cae FG |
1034 | utime_t recursive_scrub_stamp; |
1035 | // version at which we last scrubbed our personal data structures | |
94b18763 | 1036 | version_t localized_scrub_version = 0; |
7c673cae FG |
1037 | utime_t localized_scrub_stamp; |
1038 | ||
1039 | void encode(bufferlist &bl) const; | |
11fdf7f2 | 1040 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
1041 | void dump(Formatter *f) const; |
1042 | static void generate_test_instances(list<fnode_t*>& ls); | |
94b18763 | 1043 | fnode_t() {} |
7c673cae FG |
1044 | }; |
1045 | WRITE_CLASS_ENCODER(fnode_t) | |
1046 | ||
1047 | ||
1048 | struct old_rstat_t { | |
1049 | snapid_t first; | |
1050 | nest_info_t rstat, accounted_rstat; | |
1051 | ||
1052 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1053 | void decode(bufferlist::const_iterator& p); |
7c673cae FG |
1054 | void dump(Formatter *f) const; |
1055 | static void generate_test_instances(list<old_rstat_t*>& ls); | |
1056 | }; | |
1057 | WRITE_CLASS_ENCODER(old_rstat_t) | |
1058 | ||
1059 | inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { | |
1060 | return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; | |
1061 | } | |
1062 | ||
11fdf7f2 TL |
1063 | /* |
1064 | * feature_bitset_t | |
1065 | */ | |
1066 | class feature_bitset_t { | |
1067 | public: | |
1068 | typedef uint64_t block_type; | |
1069 | static const size_t bits_per_block = sizeof(block_type) * 8; | |
1070 | ||
1071 | feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {} | |
1072 | feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {} | |
1073 | feature_bitset_t(unsigned long value = 0); | |
1074 | feature_bitset_t(const vector<size_t>& array); | |
1075 | feature_bitset_t& operator=(const feature_bitset_t& other) { | |
1076 | _vec = other._vec; | |
1077 | return *this; | |
1078 | } | |
1079 | feature_bitset_t& operator=(feature_bitset_t&& other) { | |
1080 | _vec = std::move(other._vec); | |
1081 | return *this; | |
1082 | } | |
1083 | bool empty() const { | |
1084 | for (auto& v : _vec) { | |
1085 | if (v) | |
1086 | return false; | |
1087 | } | |
1088 | return true; | |
1089 | } | |
1090 | bool test(size_t bit) const { | |
1091 | if (bit >= bits_per_block * _vec.size()) | |
1092 | return false; | |
1093 | return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block)); | |
1094 | } | |
1095 | void clear() { | |
1096 | _vec.clear(); | |
1097 | } | |
1098 | feature_bitset_t& operator-=(const feature_bitset_t& other); | |
1099 | void encode(bufferlist& bl) const; | |
1100 | void decode(bufferlist::const_iterator &p); | |
1101 | void print(ostream& out) const; | |
1102 | private: | |
1103 | vector<block_type> _vec; | |
1104 | }; | |
1105 | WRITE_CLASS_ENCODER(feature_bitset_t) | |
1106 | ||
1107 | inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) { | |
1108 | s.print(out); | |
1109 | return out; | |
1110 | } | |
1111 | ||
1112 | /* | |
1113 | * client_metadata_t | |
1114 | */ | |
1115 | struct client_metadata_t { | |
1116 | using kv_map_t = std::map<std::string,std::string>; | |
1117 | using iterator = kv_map_t::const_iterator; | |
1118 | ||
1119 | kv_map_t kv_map; | |
1120 | feature_bitset_t features; | |
1121 | ||
1122 | client_metadata_t() {} | |
1123 | client_metadata_t(const client_metadata_t& other) : | |
1124 | kv_map(other.kv_map), features(other.features) {} | |
1125 | client_metadata_t(client_metadata_t&& other) : | |
1126 | kv_map(std::move(other.kv_map)), features(std::move(other.features)) {} | |
1127 | client_metadata_t(kv_map_t&& kv, feature_bitset_t &&f) : | |
1128 | kv_map(std::move(kv)), features(std::move(f)) {} | |
1129 | client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f) : | |
1130 | kv_map(kv), features(f) {} | |
1131 | client_metadata_t& operator=(const client_metadata_t& other) { | |
1132 | kv_map = other.kv_map; | |
1133 | features = other.features; | |
1134 | return *this; | |
1135 | } | |
1136 | ||
1137 | bool empty() const { return kv_map.empty() && features.empty(); } | |
1138 | iterator find(const std::string& key) const { return kv_map.find(key); } | |
1139 | iterator begin() const { return kv_map.begin(); } | |
1140 | iterator end() const { return kv_map.end(); } | |
1141 | std::string& operator[](const std::string& key) { return kv_map[key]; } | |
1142 | void merge(const client_metadata_t& other) { | |
1143 | kv_map.insert(other.kv_map.begin(), other.kv_map.end()); | |
1144 | features = other.features; | |
1145 | } | |
1146 | void clear() { | |
1147 | kv_map.clear(); | |
1148 | features.clear(); | |
1149 | } | |
1150 | ||
1151 | void encode(bufferlist& bl) const; | |
1152 | void decode(bufferlist::const_iterator& p); | |
1153 | void dump(Formatter *f) const; | |
1154 | }; | |
1155 | WRITE_CLASS_ENCODER(client_metadata_t) | |
7c673cae FG |
1156 | |
1157 | /* | |
1158 | * session_info_t | |
1159 | */ | |
7c673cae FG |
1160 | struct session_info_t { |
1161 | entity_inst_t inst; | |
1162 | std::map<ceph_tid_t,inodeno_t> completed_requests; | |
1163 | interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use. | |
1164 | interval_set<inodeno_t> used_inos; // journaling use | |
11fdf7f2 | 1165 | client_metadata_t client_metadata; |
7c673cae FG |
1166 | std::set<ceph_tid_t> completed_flushes; |
1167 | EntityName auth_name; | |
1168 | ||
1169 | client_t get_client() const { return client_t(inst.name.num()); } | |
11fdf7f2 | 1170 | bool has_feature(size_t bit) const { return client_metadata.features.test(bit); } |
7c673cae FG |
1171 | const entity_name_t& get_source() const { return inst.name; } |
1172 | ||
1173 | void clear_meta() { | |
1174 | prealloc_inos.clear(); | |
1175 | used_inos.clear(); | |
1176 | completed_requests.clear(); | |
1177 | completed_flushes.clear(); | |
11fdf7f2 | 1178 | client_metadata.clear(); |
7c673cae FG |
1179 | } |
1180 | ||
1181 | void encode(bufferlist& bl, uint64_t features) const; | |
11fdf7f2 | 1182 | void decode(bufferlist::const_iterator& p); |
7c673cae FG |
1183 | void dump(Formatter *f) const; |
1184 | static void generate_test_instances(list<session_info_t*>& ls); | |
1185 | }; | |
1186 | WRITE_CLASS_ENCODER_FEATURES(session_info_t) | |
1187 | ||
1188 | ||
1189 | // ======= | |
1190 | // dentries | |
1191 | ||
1192 | struct dentry_key_t { | |
94b18763 | 1193 | snapid_t snapid = 0; |
11fdf7f2 | 1194 | std::string_view name; |
94b18763 FG |
1195 | __u32 hash = 0; |
1196 | dentry_key_t() {} | |
11fdf7f2 | 1197 | dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) : |
7c673cae FG |
1198 | snapid(s), name(n), hash(h) {} |
1199 | ||
94b18763 | 1200 | bool is_valid() { return name.length() || snapid; } |
7c673cae FG |
1201 | |
1202 | // encode into something that can be decoded as a string. | |
1203 | // name_ (head) or name_%x (!head) | |
1204 | void encode(bufferlist& bl) const { | |
1205 | string key; | |
1206 | encode(key); | |
11fdf7f2 TL |
1207 | using ceph::encode; |
1208 | encode(key, bl); | |
7c673cae FG |
1209 | } |
1210 | void encode(string& key) const { | |
1211 | char b[20]; | |
1212 | if (snapid != CEPH_NOSNAP) { | |
1213 | uint64_t val(snapid); | |
1214 | snprintf(b, sizeof(b), "%" PRIx64, val); | |
1215 | } else { | |
1216 | snprintf(b, sizeof(b), "%s", "head"); | |
1217 | } | |
1218 | ostringstream oss; | |
1219 | oss << name << "_" << b; | |
1220 | key = oss.str(); | |
1221 | } | |
11fdf7f2 | 1222 | static void decode_helper(bufferlist::const_iterator& bl, string& nm, snapid_t& sn) { |
7c673cae | 1223 | string key; |
11fdf7f2 | 1224 | decode(key, bl); |
7c673cae FG |
1225 | decode_helper(key, nm, sn); |
1226 | } | |
11fdf7f2 | 1227 | static void decode_helper(std::string_view key, string& nm, snapid_t& sn) { |
7c673cae | 1228 | size_t i = key.find_last_of('_'); |
11fdf7f2 TL |
1229 | ceph_assert(i != string::npos); |
1230 | if (key.compare(i+1, std::string_view::npos, "head") == 0) { | |
7c673cae FG |
1231 | // name_head |
1232 | sn = CEPH_NOSNAP; | |
1233 | } else { | |
1234 | // name_%x | |
1235 | long long unsigned x = 0; | |
94b18763 FG |
1236 | std::string x_str(key.substr(i+1)); |
1237 | sscanf(x_str.c_str(), "%llx", &x); | |
7c673cae FG |
1238 | sn = x; |
1239 | } | |
11fdf7f2 | 1240 | nm = key.substr(0, i); |
7c673cae FG |
1241 | } |
1242 | }; | |
1243 | ||
1244 | inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k) | |
1245 | { | |
1246 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1247 | } | |
1248 | ||
1249 | inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) | |
1250 | { | |
1251 | /* | |
1252 | * order by hash, name, snap | |
1253 | */ | |
1254 | int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash); | |
1255 | if (c) | |
1256 | return c < 0; | |
94b18763 | 1257 | c = k1.name.compare(k2.name); |
7c673cae FG |
1258 | if (c) |
1259 | return c < 0; | |
1260 | return k1.snapid < k2.snapid; | |
1261 | } | |
1262 | ||
1263 | ||
1264 | /* | |
1265 | * string_snap_t is a simple (string, snapid_t) pair | |
1266 | */ | |
1267 | struct string_snap_t { | |
1268 | string name; | |
1269 | snapid_t snapid; | |
1270 | string_snap_t() {} | |
11fdf7f2 | 1271 | string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {} |
7c673cae FG |
1272 | |
1273 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1274 | void decode(bufferlist::const_iterator& p); |
7c673cae FG |
1275 | void dump(Formatter *f) const; |
1276 | static void generate_test_instances(list<string_snap_t*>& ls); | |
1277 | }; | |
1278 | WRITE_CLASS_ENCODER(string_snap_t) | |
1279 | ||
1280 | inline bool operator<(const string_snap_t& l, const string_snap_t& r) { | |
94b18763 | 1281 | int c = l.name.compare(r.name); |
7c673cae FG |
1282 | return c < 0 || (c == 0 && l.snapid < r.snapid); |
1283 | } | |
1284 | ||
1285 | inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k) | |
1286 | { | |
1287 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1288 | } | |
1289 | ||
1290 | /* | |
1291 | * mds_table_pending_t | |
1292 | * | |
1293 | * mds's requesting any pending ops. child needs to encode the corresponding | |
1294 | * pending mutation state in the table. | |
1295 | */ | |
1296 | struct mds_table_pending_t { | |
94b18763 FG |
1297 | uint64_t reqid = 0; |
1298 | __s32 mds = 0; | |
1299 | version_t tid = 0; | |
1300 | mds_table_pending_t() {} | |
7c673cae | 1301 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1302 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
1303 | void dump(Formatter *f) const; |
1304 | static void generate_test_instances(list<mds_table_pending_t*>& ls); | |
1305 | }; | |
1306 | WRITE_CLASS_ENCODER(mds_table_pending_t) | |
1307 | ||
1308 | ||
1309 | // ========= | |
1310 | // requests | |
1311 | ||
1312 | struct metareqid_t { | |
1313 | entity_name_t name; | |
94b18763 FG |
1314 | uint64_t tid = 0; |
1315 | metareqid_t() {} | |
7c673cae FG |
1316 | metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {} |
1317 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1318 | using ceph::encode; |
1319 | encode(name, bl); | |
1320 | encode(tid, bl); | |
7c673cae | 1321 | } |
11fdf7f2 TL |
1322 | void decode(bufferlist::const_iterator &p) { |
1323 | using ceph::decode; | |
1324 | decode(name, p); | |
1325 | decode(tid, p); | |
7c673cae FG |
1326 | } |
1327 | }; | |
1328 | WRITE_CLASS_ENCODER(metareqid_t) | |
1329 | ||
1330 | inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) { | |
1331 | return out << r.name << ":" << r.tid; | |
1332 | } | |
1333 | ||
1334 | inline bool operator==(const metareqid_t& l, const metareqid_t& r) { | |
1335 | return (l.name == r.name) && (l.tid == r.tid); | |
1336 | } | |
1337 | inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { | |
1338 | return (l.name != r.name) || (l.tid != r.tid); | |
1339 | } | |
1340 | inline bool operator<(const metareqid_t& l, const metareqid_t& r) { | |
1341 | return (l.name < r.name) || | |
1342 | (l.name == r.name && l.tid < r.tid); | |
1343 | } | |
1344 | inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { | |
1345 | return (l.name < r.name) || | |
1346 | (l.name == r.name && l.tid <= r.tid); | |
1347 | } | |
1348 | inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } | |
1349 | inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } | |
1350 | ||
1351 | namespace std { | |
1352 | template<> struct hash<metareqid_t> { | |
1353 | size_t operator()(const metareqid_t &r) const { | |
1354 | hash<uint64_t> H; | |
1355 | return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); | |
1356 | } | |
1357 | }; | |
1358 | } // namespace std | |
1359 | ||
1360 | ||
1361 | // cap info for client reconnect | |
1362 | struct cap_reconnect_t { | |
1363 | string path; | |
1364 | mutable ceph_mds_cap_reconnect capinfo; | |
1365 | snapid_t snap_follows; | |
1366 | bufferlist flockbl; | |
1367 | ||
1368 | cap_reconnect_t() { | |
1369 | memset(&capinfo, 0, sizeof(capinfo)); | |
1370 | snap_follows = 0; | |
1371 | } | |
11fdf7f2 | 1372 | cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i, |
7c673cae FG |
1373 | inodeno_t sr, snapid_t sf, bufferlist& lb) : |
1374 | path(p) { | |
1375 | capinfo.cap_id = cap_id; | |
1376 | capinfo.wanted = w; | |
1377 | capinfo.issued = i; | |
1378 | capinfo.snaprealm = sr; | |
1379 | capinfo.pathbase = pino; | |
1380 | capinfo.flock_len = 0; | |
1381 | snap_follows = sf; | |
1382 | flockbl.claim(lb); | |
1383 | } | |
1384 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1385 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1386 | void encode_old(bufferlist& bl) const; |
11fdf7f2 | 1387 | void decode_old(bufferlist::const_iterator& bl); |
7c673cae FG |
1388 | |
1389 | void dump(Formatter *f) const; | |
1390 | static void generate_test_instances(list<cap_reconnect_t*>& ls); | |
1391 | }; | |
1392 | WRITE_CLASS_ENCODER(cap_reconnect_t) | |
1393 | ||
11fdf7f2 TL |
1394 | struct snaprealm_reconnect_t { |
1395 | mutable ceph_mds_snaprealm_reconnect realm; | |
1396 | ||
1397 | snaprealm_reconnect_t() { | |
1398 | memset(&realm, 0, sizeof(realm)); | |
1399 | } | |
1400 | snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) { | |
1401 | realm.ino = ino; | |
1402 | realm.seq = seq; | |
1403 | realm.parent = parent; | |
1404 | } | |
1405 | void encode(bufferlist& bl) const; | |
1406 | void decode(bufferlist::const_iterator& bl); | |
1407 | void encode_old(bufferlist& bl) const; | |
1408 | void decode_old(bufferlist::const_iterator& bl); | |
1409 | ||
1410 | void dump(Formatter *f) const; | |
1411 | static void generate_test_instances(list<snaprealm_reconnect_t*>& ls); | |
1412 | }; | |
1413 | WRITE_CLASS_ENCODER(snaprealm_reconnect_t) | |
7c673cae FG |
1414 | |
1415 | // compat for pre-FLOCK feature | |
1416 | struct old_ceph_mds_cap_reconnect { | |
1417 | __le64 cap_id; | |
1418 | __le32 wanted; | |
1419 | __le32 issued; | |
1420 | __le64 old_size; | |
1421 | struct ceph_timespec old_mtime, old_atime; | |
1422 | __le64 snaprealm; | |
1423 | __le64 pathbase; /* base ino for our path to this ino */ | |
1424 | } __attribute__ ((packed)); | |
1425 | WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect) | |
1426 | ||
1427 | struct old_cap_reconnect_t { | |
1428 | string path; | |
1429 | old_ceph_mds_cap_reconnect capinfo; | |
1430 | ||
1431 | const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) { | |
1432 | path = n.path; | |
1433 | capinfo.cap_id = n.capinfo.cap_id; | |
1434 | capinfo.wanted = n.capinfo.wanted; | |
1435 | capinfo.issued = n.capinfo.issued; | |
1436 | capinfo.snaprealm = n.capinfo.snaprealm; | |
1437 | capinfo.pathbase = n.capinfo.pathbase; | |
1438 | return *this; | |
1439 | } | |
1440 | operator cap_reconnect_t() { | |
1441 | cap_reconnect_t n; | |
1442 | n.path = path; | |
1443 | n.capinfo.cap_id = capinfo.cap_id; | |
1444 | n.capinfo.wanted = capinfo.wanted; | |
1445 | n.capinfo.issued = capinfo.issued; | |
1446 | n.capinfo.snaprealm = capinfo.snaprealm; | |
1447 | n.capinfo.pathbase = capinfo.pathbase; | |
1448 | return n; | |
1449 | } | |
1450 | ||
1451 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1452 | using ceph::encode; |
1453 | encode(path, bl); | |
1454 | encode(capinfo, bl); | |
7c673cae | 1455 | } |
11fdf7f2 TL |
1456 | void decode(bufferlist::const_iterator& bl) { |
1457 | using ceph::decode; | |
1458 | decode(path, bl); | |
1459 | decode(capinfo, bl); | |
7c673cae FG |
1460 | } |
1461 | }; | |
1462 | WRITE_CLASS_ENCODER(old_cap_reconnect_t) | |
1463 | ||
1464 | ||
1465 | // ================================================================ | |
1466 | // dir frag | |
1467 | ||
1468 | struct dirfrag_t { | |
94b18763 | 1469 | inodeno_t ino = 0; |
7c673cae FG |
1470 | frag_t frag; |
1471 | ||
94b18763 | 1472 | dirfrag_t() {} |
7c673cae FG |
1473 | dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } |
1474 | ||
1475 | void encode(bufferlist& bl) const { | |
11fdf7f2 TL |
1476 | using ceph::encode; |
1477 | encode(ino, bl); | |
1478 | encode(frag, bl); | |
7c673cae | 1479 | } |
11fdf7f2 TL |
1480 | void decode(bufferlist::const_iterator& bl) { |
1481 | using ceph::decode; | |
1482 | decode(ino, bl); | |
1483 | decode(frag, bl); | |
7c673cae FG |
1484 | } |
1485 | }; | |
1486 | WRITE_CLASS_ENCODER(dirfrag_t) | |
1487 | ||
1488 | ||
1489 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) { | |
1490 | out << df.ino; | |
1491 | if (!df.frag.is_root()) out << "." << df.frag; | |
1492 | return out; | |
1493 | } | |
1494 | inline bool operator<(dirfrag_t l, dirfrag_t r) { | |
1495 | if (l.ino < r.ino) return true; | |
1496 | if (l.ino == r.ino && l.frag < r.frag) return true; | |
1497 | return false; | |
1498 | } | |
1499 | inline bool operator==(dirfrag_t l, dirfrag_t r) { | |
1500 | return l.ino == r.ino && l.frag == r.frag; | |
1501 | } | |
1502 | ||
1503 | namespace std { | |
1504 | template<> struct hash<dirfrag_t> { | |
1505 | size_t operator()(const dirfrag_t &df) const { | |
1506 | static rjhash<uint64_t> H; | |
1507 | static rjhash<uint32_t> I; | |
1508 | return H(df.ino) ^ I(df.frag); | |
1509 | } | |
1510 | }; | |
1511 | } // namespace std | |
1512 | ||
1513 | ||
1514 | ||
1515 | // ================================================================ | |
1516 | ||
1517 | #define META_POP_IRD 0 | |
1518 | #define META_POP_IWR 1 | |
1519 | #define META_POP_READDIR 2 | |
1520 | #define META_POP_FETCH 3 | |
1521 | #define META_POP_STORE 4 | |
1522 | #define META_NPOP 5 | |
1523 | ||
1524 | class inode_load_vec_t { | |
7c673cae | 1525 | public: |
11fdf7f2 TL |
1526 | using time = DecayCounter::time; |
1527 | using clock = DecayCounter::clock; | |
1528 | static const size_t NUM = 2; | |
1529 | ||
1530 | inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {} | |
1531 | inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {} | |
1532 | ||
7c673cae | 1533 | DecayCounter &get(int t) { |
7c673cae FG |
1534 | return vec[t]; |
1535 | } | |
11fdf7f2 TL |
1536 | void zero() { |
1537 | for (auto &d : vec) { | |
1538 | d.reset(); | |
1539 | } | |
7c673cae FG |
1540 | } |
1541 | void encode(bufferlist &bl) const; | |
11fdf7f2 TL |
1542 | void decode(bufferlist::const_iterator& p); |
1543 | void dump(Formatter *f) const; | |
7c673cae | 1544 | static void generate_test_instances(list<inode_load_vec_t*>& ls); |
11fdf7f2 TL |
1545 | |
1546 | private: | |
1547 | std::array<DecayCounter, NUM> vec; | |
7c673cae | 1548 | }; |
11fdf7f2 TL |
1549 | inline void encode(const inode_load_vec_t &c, bufferlist &bl) { |
1550 | c.encode(bl); | |
7c673cae | 1551 | } |
11fdf7f2 TL |
1552 | inline void decode(inode_load_vec_t & c, bufferlist::const_iterator &p) { |
1553 | c.decode(p); | |
7c673cae FG |
1554 | } |
1555 | ||
1556 | class dirfrag_load_vec_t { | |
1557 | public: | |
11fdf7f2 TL |
1558 | using time = DecayCounter::time; |
1559 | using clock = DecayCounter::clock; | |
1560 | static const size_t NUM = 5; | |
1561 | ||
1562 | dirfrag_load_vec_t() : | |
1563 | vec{DecayCounter(DecayRate()), | |
1564 | DecayCounter(DecayRate()), | |
1565 | DecayCounter(DecayRate()), | |
1566 | DecayCounter(DecayRate()), | |
1567 | DecayCounter(DecayRate()) | |
1568 | } | |
7c673cae | 1569 | {} |
11fdf7f2 TL |
1570 | dirfrag_load_vec_t(const DecayRate &rate) : |
1571 | vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)} | |
1572 | {} | |
1573 | ||
7c673cae FG |
1574 | void encode(bufferlist &bl) const { |
1575 | ENCODE_START(2, 2, bl); | |
94b18763 | 1576 | for (const auto &i : vec) { |
11fdf7f2 | 1577 | encode(i, bl); |
94b18763 | 1578 | } |
7c673cae FG |
1579 | ENCODE_FINISH(bl); |
1580 | } | |
11fdf7f2 | 1581 | void decode(bufferlist::const_iterator &p) { |
7c673cae | 1582 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); |
94b18763 | 1583 | for (auto &i : vec) { |
11fdf7f2 | 1584 | decode(i, p); |
94b18763 | 1585 | } |
7c673cae FG |
1586 | DECODE_FINISH(p); |
1587 | } | |
7c673cae | 1588 | void dump(Formatter *f) const; |
11fdf7f2 TL |
1589 | void dump(Formatter *f, const DecayRate& rate) const; |
1590 | static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls); | |
7c673cae | 1591 | |
11fdf7f2 TL |
1592 | const DecayCounter &get(int t) const { |
1593 | return vec[t]; | |
7c673cae | 1594 | } |
11fdf7f2 TL |
1595 | DecayCounter &get(int t) { |
1596 | return vec[t]; | |
1597 | } | |
1598 | void adjust(double d) { | |
94b18763 | 1599 | for (auto &i : vec) { |
11fdf7f2 | 1600 | i.adjust(d); |
94b18763 | 1601 | } |
7c673cae | 1602 | } |
11fdf7f2 | 1603 | void zero() { |
94b18763 | 1604 | for (auto &i : vec) { |
11fdf7f2 | 1605 | i.reset(); |
94b18763 | 1606 | } |
7c673cae | 1607 | } |
28e407b8 | 1608 | double meta_load() const { |
7c673cae | 1609 | return |
11fdf7f2 TL |
1610 | 1*vec[META_POP_IRD].get() + |
1611 | 2*vec[META_POP_IWR].get() + | |
1612 | 1*vec[META_POP_READDIR].get() + | |
1613 | 2*vec[META_POP_FETCH].get() + | |
1614 | 4*vec[META_POP_STORE].get(); | |
7c673cae FG |
1615 | } |
1616 | ||
11fdf7f2 TL |
1617 | void add(dirfrag_load_vec_t& r) { |
1618 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1619 | vec[i].adjust(r.vec[i].get()); | |
7c673cae | 1620 | } |
11fdf7f2 TL |
1621 | void sub(dirfrag_load_vec_t& r) { |
1622 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1623 | vec[i].adjust(-r.vec[i].get()); | |
7c673cae FG |
1624 | } |
1625 | void scale(double f) { | |
11fdf7f2 | 1626 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) |
7c673cae FG |
1627 | vec[i].scale(f); |
1628 | } | |
11fdf7f2 TL |
1629 | |
1630 | private: | |
1631 | friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl); | |
1632 | std::array<DecayCounter, NUM> vec; | |
7c673cae FG |
1633 | }; |
1634 | ||
11fdf7f2 TL |
1635 | inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { |
1636 | c.encode(bl); | |
7c673cae | 1637 | } |
11fdf7f2 TL |
1638 | inline void decode(dirfrag_load_vec_t& c, bufferlist::const_iterator &p) { |
1639 | c.decode(p); | |
7c673cae FG |
1640 | } |
1641 | ||
28e407b8 | 1642 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl) |
7c673cae | 1643 | { |
11fdf7f2 TL |
1644 | std::ostringstream ss; |
1645 | ss << std::setprecision(1) << std::fixed | |
1646 | << "[pop" | |
1647 | " IRD:" << dl.vec[0] | |
1648 | << " IWR:" << dl.vec[1] | |
1649 | << " RDR:" << dl.vec[2] | |
1650 | << " FET:" << dl.vec[3] | |
1651 | << " STR:" << dl.vec[4] | |
1652 | << " *LOAD:" << dl.meta_load() << "]"; | |
1653 | return out << ss.str() << std::endl; | |
7c673cae FG |
1654 | } |
1655 | ||
1656 | ||
7c673cae FG |
1657 | /* mds_load_t |
1658 | * mds load | |
1659 | */ | |
1660 | ||
1661 | struct mds_load_t { | |
11fdf7f2 TL |
1662 | using clock = dirfrag_load_vec_t::clock; |
1663 | using time = dirfrag_load_vec_t::time; | |
1664 | ||
7c673cae FG |
1665 | dirfrag_load_vec_t auth; |
1666 | dirfrag_load_vec_t all; | |
1667 | ||
11fdf7f2 TL |
1668 | mds_load_t() : auth(DecayRate()), all(DecayRate()) {} |
1669 | mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {} | |
1670 | ||
94b18763 FG |
1671 | double req_rate = 0.0; |
1672 | double cache_hit_rate = 0.0; | |
1673 | double queue_len = 0.0; | |
7c673cae | 1674 | |
94b18763 | 1675 | double cpu_load_avg = 0.0; |
7c673cae | 1676 | |
11fdf7f2 | 1677 | double mds_load() const; // defiend in MDBalancer.cc |
7c673cae | 1678 | void encode(bufferlist& bl) const; |
11fdf7f2 | 1679 | void decode(bufferlist::const_iterator& bl); |
7c673cae | 1680 | void dump(Formatter *f) const; |
11fdf7f2 | 1681 | static void generate_test_instances(std::list<mds_load_t*>& ls); |
7c673cae | 1682 | }; |
11fdf7f2 TL |
1683 | inline void encode(const mds_load_t &c, bufferlist &bl) { |
1684 | c.encode(bl); | |
7c673cae | 1685 | } |
11fdf7f2 TL |
1686 | inline void decode(mds_load_t &c, bufferlist::const_iterator &p) { |
1687 | c.decode(p); | |
7c673cae FG |
1688 | } |
1689 | ||
28e407b8 | 1690 | inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load) |
7c673cae FG |
1691 | { |
1692 | return out << "mdsload<" << load.auth << "/" << load.all | |
1693 | << ", req " << load.req_rate | |
1694 | << ", hr " << load.cache_hit_rate | |
1695 | << ", qlen " << load.queue_len | |
1696 | << ", cpu " << load.cpu_load_avg | |
1697 | << ">"; | |
1698 | } | |
1699 | ||
1700 | class load_spread_t { | |
1701 | public: | |
11fdf7f2 TL |
1702 | using time = DecayCounter::time; |
1703 | using clock = DecayCounter::clock; | |
7c673cae FG |
1704 | static const int MAX = 4; |
1705 | int last[MAX]; | |
94b18763 | 1706 | int p = 0, n = 0; |
7c673cae FG |
1707 | DecayCounter count; |
1708 | ||
1709 | public: | |
11fdf7f2 TL |
1710 | load_spread_t() = delete; |
1711 | load_spread_t(const DecayRate &rate) : count(rate) | |
7c673cae FG |
1712 | { |
1713 | for (int i=0; i<MAX; i++) | |
1714 | last[i] = -1; | |
1715 | } | |
1716 | ||
11fdf7f2 | 1717 | double hit(int who) { |
7c673cae FG |
1718 | for (int i=0; i<n; i++) |
1719 | if (last[i] == who) | |
1720 | return count.get_last(); | |
1721 | ||
1722 | // we're new(ish) | |
1723 | last[p++] = who; | |
1724 | if (n < MAX) n++; | |
1725 | if (n == 1) return 0.0; | |
1726 | ||
1727 | if (p == MAX) p = 0; | |
1728 | ||
11fdf7f2 | 1729 | return count.hit(); |
7c673cae | 1730 | } |
11fdf7f2 TL |
1731 | double get() const { |
1732 | return count.get(); | |
7c673cae FG |
1733 | } |
1734 | }; | |
1735 | ||
1736 | ||
1737 | ||
1738 | // ================================================================ | |
1739 | typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t; | |
1740 | ||
1741 | // -- authority delegation -- | |
1742 | // directory authority types | |
1743 | // >= 0 is the auth mds | |
1744 | #define CDIR_AUTH_PARENT mds_rank_t(-1) // default | |
1745 | #define CDIR_AUTH_UNKNOWN mds_rank_t(-2) | |
1746 | #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN) | |
1747 | #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN) | |
1748 | //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2) | |
1749 | ||
1750 | class MDSCacheObjectInfo { | |
1751 | public: | |
94b18763 | 1752 | inodeno_t ino = 0; |
7c673cae FG |
1753 | dirfrag_t dirfrag; |
1754 | string dname; | |
1755 | snapid_t snapid; | |
1756 | ||
94b18763 | 1757 | MDSCacheObjectInfo() {} |
7c673cae FG |
1758 | |
1759 | void encode(bufferlist& bl) const; | |
11fdf7f2 | 1760 | void decode(bufferlist::const_iterator& bl); |
7c673cae FG |
1761 | void dump(Formatter *f) const; |
1762 | static void generate_test_instances(list<MDSCacheObjectInfo*>& ls); | |
1763 | }; | |
1764 | ||
1765 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) { | |
1766 | if (info.ino) return out << info.ino << "." << info.snapid; | |
1767 | if (info.dname.length()) return out << info.dirfrag << "/" << info.dname | |
1768 | << " snap " << info.snapid; | |
1769 | return out << info.dirfrag; | |
1770 | } | |
1771 | ||
1772 | inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { | |
1773 | if (l.ino || r.ino) | |
1774 | return l.ino == r.ino && l.snapid == r.snapid; | |
1775 | else | |
1776 | return l.dirfrag == r.dirfrag && l.dname == r.dname; | |
1777 | } | |
1778 | WRITE_CLASS_ENCODER(MDSCacheObjectInfo) | |
1779 | ||
1780 | ||
1781 | // parse a map of keys/values. | |
1782 | namespace qi = boost::spirit::qi; | |
1783 | ||
1784 | template <typename Iterator> | |
1785 | struct keys_and_values | |
1786 | : qi::grammar<Iterator, std::map<string, string>()> | |
1787 | { | |
1788 | keys_and_values() | |
1789 | : keys_and_values::base_type(query) | |
1790 | { | |
1791 | query = pair >> *(qi::lit(' ') >> pair); | |
1792 | pair = key >> '=' >> value; | |
1793 | key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); | |
1794 | value = +qi::char_("a-zA-Z_0-9"); | |
1795 | } | |
1796 | qi::rule<Iterator, std::map<string, string>()> query; | |
1797 | qi::rule<Iterator, std::pair<string, string>()> pair; | |
1798 | qi::rule<Iterator, string()> key, value; | |
1799 | }; | |
1800 | ||
1801 | #endif |