]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_MDSTYPES_H | |
4 | #define CEPH_MDSTYPES_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <math.h> | |
9 | #include <ostream> | |
10 | #include <set> | |
11 | #include <map> | |
12 | #include <string_view> | |
13 | ||
14 | #include "common/config.h" | |
15 | #include "common/Clock.h" | |
16 | #include "common/DecayCounter.h" | |
17 | #include "common/entity_name.h" | |
18 | ||
19 | #include "include/Context.h" | |
20 | #include "include/frag.h" | |
21 | #include "include/xlist.h" | |
22 | #include "include/interval_set.h" | |
23 | #include "include/compact_map.h" | |
24 | #include "include/compact_set.h" | |
25 | #include "include/fs_types.h" | |
26 | #include "include/ceph_fs.h" | |
27 | ||
28 | #include "inode_backtrace.h" | |
29 | ||
30 | #include <boost/spirit/include/qi.hpp> | |
31 | #include <boost/pool/pool.hpp> | |
32 | #include "include/ceph_assert.h" | |
33 | #include <boost/serialization/strong_typedef.hpp> | |
34 | ||
35 | #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" | |
36 | ||
37 | #define MDS_PORT_CACHE 0x200 | |
38 | #define MDS_PORT_LOCKER 0x300 | |
39 | #define MDS_PORT_MIGRATOR 0x400 | |
40 | ||
41 | #define MAX_MDS 0x100 | |
42 | #define NUM_STRAY 10 | |
43 | ||
44 | // Inode numbers 1,2 and 4 please see CEPH_INO_* in include/ceph_fs.h | |
45 | ||
46 | #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS) | |
47 | #define MDS_INO_STRAY_OFFSET (6*MAX_MDS) | |
48 | ||
49 | // Locations for journal data | |
50 | #define MDS_INO_LOG_OFFSET (2*MAX_MDS) | |
51 | #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS) | |
52 | #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS) | |
53 | #define MDS_INO_PURGE_QUEUE (5*MAX_MDS) | |
54 | ||
55 | #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY)) | |
56 | ||
57 | #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i)))) | |
58 | #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x)) | |
59 | ||
60 | #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY))) | |
61 | #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS)) | |
62 | #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET)) | |
63 | #define MDS_INO_IS_BASE(i) ((i) == CEPH_INO_ROOT || (i) == CEPH_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i)) | |
64 | #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY)) | |
65 | #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY) | |
66 | ||
67 | #define MDS_IS_PRIVATE_INO(i) ((i) < MDS_INO_SYSTEM_BASE && (i) >= MDS_INO_MDSDIR_OFFSET) | |
68 | ||
69 | typedef int32_t mds_rank_t; | |
70 | constexpr mds_rank_t MDS_RANK_NONE = -1; | |
71 | ||
72 | BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) | |
73 | extern const mds_gid_t MDS_GID_NONE; | |
74 | ||
75 | typedef int32_t fs_cluster_id_t; | |
76 | constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1; | |
77 | // The namespace ID of the anonymous default filesystem from legacy systems | |
78 | constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0; | |
79 | ||
80 | class mds_role_t { | |
81 | public: | |
82 | mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_) | |
83 | : fscid(fscid_), rank(rank_) | |
84 | {} | |
85 | mds_role_t() {} | |
86 | ||
87 | bool operator<(mds_role_t const &rhs) const { | |
88 | if (fscid < rhs.fscid) { | |
89 | return true; | |
90 | } else if (fscid == rhs.fscid) { | |
91 | return rank < rhs.rank; | |
92 | } else { | |
93 | return false; | |
94 | } | |
95 | } | |
96 | ||
97 | bool is_none() const { | |
98 | return (rank == MDS_RANK_NONE); | |
99 | } | |
100 | ||
101 | fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE; | |
102 | mds_rank_t rank = MDS_RANK_NONE; | |
103 | }; | |
104 | inline std::ostream& operator<<(std::ostream& out, const mds_role_t& role) { | |
105 | return out << role.fscid << ":" << role.rank; | |
106 | } | |
107 | ||
108 | // CAPS | |
109 | inline string gcap_string(int cap) | |
110 | { | |
111 | string s; | |
112 | if (cap & CEPH_CAP_GSHARED) s += "s"; | |
113 | if (cap & CEPH_CAP_GEXCL) s += "x"; | |
114 | if (cap & CEPH_CAP_GCACHE) s += "c"; | |
115 | if (cap & CEPH_CAP_GRD) s += "r"; | |
116 | if (cap & CEPH_CAP_GWR) s += "w"; | |
117 | if (cap & CEPH_CAP_GBUFFER) s += "b"; | |
118 | if (cap & CEPH_CAP_GWREXTEND) s += "a"; | |
119 | if (cap & CEPH_CAP_GLAZYIO) s += "l"; | |
120 | return s; | |
121 | } | |
122 | inline string ccap_string(int cap) | |
123 | { | |
124 | string s; | |
125 | if (cap & CEPH_CAP_PIN) s += "p"; | |
126 | ||
127 | int a = (cap >> CEPH_CAP_SAUTH) & 3; | |
128 | if (a) s += 'A' + gcap_string(a); | |
129 | ||
130 | a = (cap >> CEPH_CAP_SLINK) & 3; | |
131 | if (a) s += 'L' + gcap_string(a); | |
132 | ||
133 | a = (cap >> CEPH_CAP_SXATTR) & 3; | |
134 | if (a) s += 'X' + gcap_string(a); | |
135 | ||
136 | a = cap >> CEPH_CAP_SFILE; | |
137 | if (a) s += 'F' + gcap_string(a); | |
138 | ||
139 | if (s.length() == 0) | |
140 | s = "-"; | |
141 | return s; | |
142 | } | |
143 | ||
144 | struct scatter_info_t { | |
145 | version_t version = 0; | |
146 | }; | |
147 | ||
148 | struct frag_info_t : public scatter_info_t { | |
149 | int64_t size() const { return nfiles + nsubdirs; } | |
150 | ||
151 | void zero() { | |
152 | *this = frag_info_t(); | |
153 | } | |
154 | ||
155 | // *this += cur - acc; | |
156 | void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { | |
157 | if (cur.mtime > mtime) { | |
158 | mtime = cur.mtime; | |
159 | if (touched_mtime) | |
160 | *touched_mtime = true; | |
161 | } | |
162 | if (cur.change_attr > change_attr) { | |
163 | change_attr = cur.change_attr; | |
164 | if (touched_chattr) | |
165 | *touched_chattr = true; | |
166 | } | |
167 | nfiles += cur.nfiles - acc.nfiles; | |
168 | nsubdirs += cur.nsubdirs - acc.nsubdirs; | |
169 | } | |
170 | ||
171 | void add(const frag_info_t& other) { | |
172 | if (other.mtime > mtime) | |
173 | mtime = other.mtime; | |
174 | if (other.change_attr > change_attr) | |
175 | change_attr = other.change_attr; | |
176 | nfiles += other.nfiles; | |
177 | nsubdirs += other.nsubdirs; | |
178 | } | |
179 | ||
180 | bool same_sums(const frag_info_t &o) const { | |
181 | return mtime <= o.mtime && | |
182 | nfiles == o.nfiles && | |
183 | nsubdirs == o.nsubdirs; | |
184 | } | |
185 | ||
186 | void encode(bufferlist &bl) const; | |
187 | void decode(bufferlist::const_iterator& bl); | |
188 | void dump(Formatter *f) const; | |
189 | static void generate_test_instances(std::list<frag_info_t*>& ls); | |
190 | ||
191 | // this frag | |
192 | utime_t mtime; | |
193 | uint64_t change_attr = 0; | |
194 | int64_t nfiles = 0; // files | |
195 | int64_t nsubdirs = 0; // subdirs | |
196 | }; | |
197 | WRITE_CLASS_ENCODER(frag_info_t) | |
198 | ||
199 | inline bool operator==(const frag_info_t &l, const frag_info_t &r) { | |
200 | return memcmp(&l, &r, sizeof(l)) == 0; | |
201 | } | |
202 | inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { | |
203 | return !(l == r); | |
204 | } | |
205 | ||
206 | std::ostream& operator<<(std::ostream &out, const frag_info_t &f); | |
207 | ||
208 | ||
209 | struct nest_info_t : public scatter_info_t { | |
210 | int64_t rsize() const { return rfiles + rsubdirs; } | |
211 | ||
212 | void zero() { | |
213 | *this = nest_info_t(); | |
214 | } | |
215 | ||
216 | void sub(const nest_info_t &other) { | |
217 | add(other, -1); | |
218 | } | |
219 | void add(const nest_info_t &other, int fac=1) { | |
220 | if (other.rctime > rctime) | |
221 | rctime = other.rctime; | |
222 | rbytes += fac*other.rbytes; | |
223 | rfiles += fac*other.rfiles; | |
224 | rsubdirs += fac*other.rsubdirs; | |
225 | rsnaps += fac*other.rsnaps; | |
226 | } | |
227 | ||
228 | // *this += cur - acc; | |
229 | void add_delta(const nest_info_t &cur, const nest_info_t &acc) { | |
230 | if (cur.rctime > rctime) | |
231 | rctime = cur.rctime; | |
232 | rbytes += cur.rbytes - acc.rbytes; | |
233 | rfiles += cur.rfiles - acc.rfiles; | |
234 | rsubdirs += cur.rsubdirs - acc.rsubdirs; | |
235 | rsnaps += cur.rsnaps - acc.rsnaps; | |
236 | } | |
237 | ||
238 | bool same_sums(const nest_info_t &o) const { | |
239 | return rctime <= o.rctime && | |
240 | rbytes == o.rbytes && | |
241 | rfiles == o.rfiles && | |
242 | rsubdirs == o.rsubdirs && | |
243 | rsnaps == o.rsnaps; | |
244 | } | |
245 | ||
246 | void encode(bufferlist &bl) const; | |
247 | void decode(bufferlist::const_iterator& bl); | |
248 | void dump(Formatter *f) const; | |
249 | static void generate_test_instances(std::list<nest_info_t*>& ls); | |
250 | ||
251 | // this frag + children | |
252 | utime_t rctime; | |
253 | int64_t rbytes = 0; | |
254 | int64_t rfiles = 0; | |
255 | int64_t rsubdirs = 0; | |
256 | int64_t rsnaps = 0; | |
257 | }; | |
258 | WRITE_CLASS_ENCODER(nest_info_t) | |
259 | ||
260 | inline bool operator==(const nest_info_t &l, const nest_info_t &r) { | |
261 | return memcmp(&l, &r, sizeof(l)) == 0; | |
262 | } | |
263 | inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { | |
264 | return !(l == r); | |
265 | } | |
266 | ||
267 | std::ostream& operator<<(std::ostream &out, const nest_info_t &n); | |
268 | ||
269 | struct vinodeno_t { | |
270 | vinodeno_t() {} | |
271 | vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} | |
272 | ||
273 | void encode(bufferlist& bl) const { | |
274 | using ceph::encode; | |
275 | encode(ino, bl); | |
276 | encode(snapid, bl); | |
277 | } | |
278 | void decode(bufferlist::const_iterator& p) { | |
279 | using ceph::decode; | |
280 | decode(ino, p); | |
281 | decode(snapid, p); | |
282 | } | |
283 | ||
284 | inodeno_t ino; | |
285 | snapid_t snapid; | |
286 | }; | |
287 | WRITE_CLASS_ENCODER(vinodeno_t) | |
288 | ||
289 | inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { | |
290 | return l.ino == r.ino && l.snapid == r.snapid; | |
291 | } | |
292 | inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { | |
293 | return !(l == r); | |
294 | } | |
295 | inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { | |
296 | return | |
297 | l.ino < r.ino || | |
298 | (l.ino == r.ino && l.snapid < r.snapid); | |
299 | } | |
300 | ||
301 | struct quota_info_t | |
302 | { | |
303 | void encode(bufferlist& bl) const { | |
304 | ENCODE_START(1, 1, bl); | |
305 | encode(max_bytes, bl); | |
306 | encode(max_files, bl); | |
307 | ENCODE_FINISH(bl); | |
308 | } | |
309 | void decode(bufferlist::const_iterator& p) { | |
310 | DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); | |
311 | decode(max_bytes, p); | |
312 | decode(max_files, p); | |
313 | DECODE_FINISH(p); | |
314 | } | |
315 | ||
316 | void dump(Formatter *f) const; | |
317 | static void generate_test_instances(std::list<quota_info_t *>& ls); | |
318 | ||
319 | bool is_valid() const { | |
320 | return max_bytes >=0 && max_files >=0; | |
321 | } | |
322 | bool is_enable() const { | |
323 | return max_bytes || max_files; | |
324 | } | |
325 | ||
326 | int64_t max_bytes = 0; | |
327 | int64_t max_files = 0; | |
328 | }; | |
329 | WRITE_CLASS_ENCODER(quota_info_t) | |
330 | ||
331 | inline bool operator==(const quota_info_t &l, const quota_info_t &r) { | |
332 | return memcmp(&l, &r, sizeof(l)) == 0; | |
333 | } | |
334 | ||
335 | ostream& operator<<(ostream &out, const quota_info_t &n); | |
336 | ||
337 | namespace std { | |
338 | template<> struct hash<vinodeno_t> { | |
339 | size_t operator()(const vinodeno_t &vino) const { | |
340 | hash<inodeno_t> H; | |
341 | hash<uint64_t> I; | |
342 | return H(vino.ino) ^ I(vino.snapid); | |
343 | } | |
344 | }; | |
345 | } | |
346 | ||
347 | inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { | |
348 | out << vino.ino; | |
349 | if (vino.snapid == CEPH_NOSNAP) | |
350 | out << ".head"; | |
351 | else if (vino.snapid) | |
352 | out << '.' << vino.snapid; | |
353 | return out; | |
354 | } | |
355 | ||
356 | struct client_writeable_range_t { | |
357 | struct byte_range_t { | |
358 | uint64_t first = 0, last = 0; // interval client can write to | |
359 | }; | |
360 | ||
361 | void encode(bufferlist &bl) const; | |
362 | void decode(bufferlist::const_iterator& bl); | |
363 | void dump(Formatter *f) const; | |
364 | static void generate_test_instances(std::list<client_writeable_range_t*>& ls); | |
365 | ||
366 | byte_range_t range; | |
367 | snapid_t follows = 0; // aka "data+metadata flushed thru" | |
368 | }; | |
369 | ||
370 | inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::const_iterator& bl) { | |
371 | decode(range.first, bl); | |
372 | decode(range.last, bl); | |
373 | } | |
374 | ||
375 | WRITE_CLASS_ENCODER(client_writeable_range_t) | |
376 | ||
377 | std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); | |
378 | ||
379 | inline bool operator==(const client_writeable_range_t& l, | |
380 | const client_writeable_range_t& r) { | |
381 | return l.range.first == r.range.first && l.range.last == r.range.last && | |
382 | l.follows == r.follows; | |
383 | } | |
384 | ||
385 | struct inline_data_t { | |
386 | public: | |
387 | inline_data_t() {} | |
388 | inline_data_t(const inline_data_t& o) : version(o.version) { | |
389 | if (o.blp) | |
390 | get_data() = *o.blp; | |
391 | } | |
392 | inline_data_t& operator=(const inline_data_t& o) { | |
393 | version = o.version; | |
394 | if (o.blp) | |
395 | get_data() = *o.blp; | |
396 | else | |
397 | free_data(); | |
398 | return *this; | |
399 | } | |
400 | ||
401 | void free_data() { | |
402 | blp.reset(); | |
403 | } | |
404 | bufferlist& get_data() { | |
405 | if (!blp) | |
406 | blp.reset(new bufferlist); | |
407 | return *blp; | |
408 | } | |
409 | size_t length() const { return blp ? blp->length() : 0; } | |
410 | ||
411 | bool operator==(const inline_data_t& o) const { | |
412 | return length() == o.length() && | |
413 | (length() == 0 || | |
414 | (*const_cast<bufferlist*>(blp.get()) == *const_cast<bufferlist*>(o.blp.get()))); | |
415 | } | |
416 | bool operator!=(const inline_data_t& o) const { | |
417 | return !(*this == o); | |
418 | } | |
419 | void encode(bufferlist &bl) const; | |
420 | void decode(bufferlist::const_iterator& bl); | |
421 | ||
422 | version_t version = 1; | |
423 | ||
424 | private: | |
425 | std::unique_ptr<bufferlist> blp; | |
426 | }; | |
427 | WRITE_CLASS_ENCODER(inline_data_t) | |
428 | ||
429 | enum { | |
430 | DAMAGE_STATS, // statistics (dirstat, size, etc) | |
431 | DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) | |
432 | DAMAGE_FRAGTREE // fragtree -- repair by searching | |
433 | }; | |
434 | typedef uint32_t damage_flags_t; | |
435 | ||
436 | template<template<typename> class Allocator = std::allocator> | |
437 | struct inode_t { | |
438 | /** | |
439 | * *************** | |
440 | * Do not forget to add any new fields to the compare() function. | |
441 | * *************** | |
442 | */ | |
443 | using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; | |
444 | ||
445 | inode_t() | |
446 | { | |
447 | clear_layout(); | |
448 | } | |
449 | ||
450 | // file type | |
451 | bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } | |
452 | bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } | |
453 | bool is_file() const { return (mode & S_IFMT) == S_IFREG; } | |
454 | ||
455 | bool is_truncating() const { return (truncate_pending > 0); } | |
456 | void truncate(uint64_t old_size, uint64_t new_size) { | |
457 | ceph_assert(new_size < old_size); | |
458 | if (old_size > max_size_ever) | |
459 | max_size_ever = old_size; | |
460 | truncate_from = old_size; | |
461 | size = new_size; | |
462 | rstat.rbytes = new_size; | |
463 | truncate_size = size; | |
464 | truncate_seq++; | |
465 | truncate_pending++; | |
466 | } | |
467 | ||
468 | bool has_layout() const { | |
469 | return layout != file_layout_t(); | |
470 | } | |
471 | ||
472 | void clear_layout() { | |
473 | layout = file_layout_t(); | |
474 | } | |
475 | ||
476 | uint64_t get_layout_size_increment() const { | |
477 | return layout.get_period(); | |
478 | } | |
479 | ||
480 | bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } | |
481 | ||
482 | uint64_t get_client_range(client_t client) const { | |
483 | auto it = client_ranges.find(client); | |
484 | return it != client_ranges.end() ? it->second.range.last : 0; | |
485 | } | |
486 | ||
487 | uint64_t get_max_size() const { | |
488 | uint64_t max = 0; | |
489 | for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); | |
490 | p != client_ranges.end(); | |
491 | ++p) | |
492 | if (p->second.range.last > max) | |
493 | max = p->second.range.last; | |
494 | return max; | |
495 | } | |
496 | void set_max_size(uint64_t new_max) { | |
497 | if (new_max == 0) { | |
498 | client_ranges.clear(); | |
499 | } else { | |
500 | for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); | |
501 | p != client_ranges.end(); | |
502 | ++p) | |
503 | p->second.range.last = new_max; | |
504 | } | |
505 | } | |
506 | ||
507 | void trim_client_ranges(snapid_t last) { | |
508 | std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); | |
509 | while (p != client_ranges.end()) { | |
510 | if (p->second.follows >= last) | |
511 | client_ranges.erase(p++); | |
512 | else | |
513 | ++p; | |
514 | } | |
515 | } | |
516 | ||
517 | bool is_backtrace_updated() const { | |
518 | return backtrace_version == version; | |
519 | } | |
520 | void update_backtrace(version_t pv=0) { | |
521 | backtrace_version = pv ? pv : version; | |
522 | } | |
523 | ||
524 | void add_old_pool(int64_t l) { | |
525 | backtrace_version = version; | |
526 | old_pools.insert(l); | |
527 | } | |
528 | ||
529 | void encode(bufferlist &bl, uint64_t features) const; | |
530 | void decode(bufferlist::const_iterator& bl); | |
531 | void dump(Formatter *f) const; | |
532 | static void generate_test_instances(std::list<inode_t*>& ls); | |
533 | /** | |
534 | * Compare this inode_t with another that represent *the same inode* | |
535 | * at different points in time. | |
536 | * @pre The inodes are the same ino | |
537 | * | |
538 | * @param other The inode_t to compare ourselves with | |
539 | * @param divergent A bool pointer which will be set to true | |
540 | * if the values are different in a way that can't be explained | |
541 | * by one being a newer version than the other. | |
542 | * | |
543 | * @returns 1 if we are newer than the other, 0 if equal, -1 if older. | |
544 | */ | |
545 | int compare(const inode_t &other, bool *divergent) const; | |
546 | ||
547 | // base (immutable) | |
548 | inodeno_t ino = 0; | |
549 | uint32_t rdev = 0; // if special file | |
550 | ||
551 | // affected by any inode change... | |
552 | utime_t ctime; // inode change time | |
553 | utime_t btime; // birth time | |
554 | ||
555 | // perm (namespace permissions) | |
556 | uint32_t mode = 0; | |
557 | uid_t uid = 0; | |
558 | gid_t gid = 0; | |
559 | ||
560 | // nlink | |
561 | int32_t nlink = 0; | |
562 | ||
563 | // file (data access) | |
564 | ceph_dir_layout dir_layout = {}; // [dir only] | |
565 | file_layout_t layout; | |
566 | compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; | |
567 | uint64_t size = 0; // on directory, # dentries | |
568 | uint64_t max_size_ever = 0; // max size the file has ever been | |
569 | uint32_t truncate_seq = 0; | |
570 | uint64_t truncate_size = 0, truncate_from = 0; | |
571 | uint32_t truncate_pending = 0; | |
572 | utime_t mtime; // file data modify time. | |
573 | utime_t atime; // file data access time. | |
574 | uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) | |
575 | inline_data_t inline_data; // FIXME check | |
576 | ||
577 | // change attribute | |
578 | uint64_t change_attr = 0; | |
579 | ||
580 | client_range_map client_ranges; // client(s) can write to these ranges | |
581 | ||
582 | // dirfrag, recursive accountin | |
583 | frag_info_t dirstat; // protected by my filelock | |
584 | nest_info_t rstat; // protected by my nestlock | |
585 | nest_info_t accounted_rstat; // protected by parent's nestlock | |
586 | ||
587 | quota_info_t quota; | |
588 | ||
589 | mds_rank_t export_pin = MDS_RANK_NONE; | |
590 | ||
591 | double export_ephemeral_random_pin = 0; | |
592 | bool export_ephemeral_distributed_pin = false; | |
593 | ||
594 | // special stuff | |
595 | version_t version = 0; // auth only | |
596 | version_t file_data_version = 0; // auth only | |
597 | version_t xattr_version = 0; | |
598 | ||
599 | utime_t last_scrub_stamp; // start time of last complete scrub | |
600 | version_t last_scrub_version = 0;// (parent) start version of last complete scrub | |
601 | ||
602 | version_t backtrace_version = 0; | |
603 | ||
604 | snapid_t oldest_snap; | |
605 | ||
606 | std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink | |
607 | ||
608 | private: | |
609 | bool older_is_consistent(const inode_t &other) const; | |
610 | }; | |
611 | ||
612 | // These methods may be moved back to mdstypes.cc when we have pmr | |
613 | template<template<typename> class Allocator> | |
614 | void inode_t<Allocator>::encode(bufferlist &bl, uint64_t features) const | |
615 | { | |
616 | ENCODE_START(16, 6, bl); | |
617 | ||
618 | encode(ino, bl); | |
619 | encode(rdev, bl); | |
620 | encode(ctime, bl); | |
621 | ||
622 | encode(mode, bl); | |
623 | encode(uid, bl); | |
624 | encode(gid, bl); | |
625 | ||
626 | encode(nlink, bl); | |
627 | { | |
628 | // removed field | |
629 | bool anchored = 0; | |
630 | encode(anchored, bl); | |
631 | } | |
632 | ||
633 | encode(dir_layout, bl); | |
634 | encode(layout, bl, features); | |
635 | encode(size, bl); | |
636 | encode(truncate_seq, bl); | |
637 | encode(truncate_size, bl); | |
638 | encode(truncate_from, bl); | |
639 | encode(truncate_pending, bl); | |
640 | encode(mtime, bl); | |
641 | encode(atime, bl); | |
642 | encode(time_warp_seq, bl); | |
643 | encode(client_ranges, bl); | |
644 | ||
645 | encode(dirstat, bl); | |
646 | encode(rstat, bl); | |
647 | encode(accounted_rstat, bl); | |
648 | ||
649 | encode(version, bl); | |
650 | encode(file_data_version, bl); | |
651 | encode(xattr_version, bl); | |
652 | encode(backtrace_version, bl); | |
653 | encode(old_pools, bl); | |
654 | encode(max_size_ever, bl); | |
655 | encode(inline_data, bl); | |
656 | encode(quota, bl); | |
657 | ||
658 | encode(stray_prior_path, bl); | |
659 | ||
660 | encode(last_scrub_version, bl); | |
661 | encode(last_scrub_stamp, bl); | |
662 | ||
663 | encode(btime, bl); | |
664 | encode(change_attr, bl); | |
665 | ||
666 | encode(export_pin, bl); | |
667 | ||
668 | encode(export_ephemeral_random_pin, bl); | |
669 | encode(export_ephemeral_distributed_pin, bl); | |
670 | ||
671 | ENCODE_FINISH(bl); | |
672 | } | |
673 | ||
674 | template<template<typename> class Allocator> | |
675 | void inode_t<Allocator>::decode(bufferlist::const_iterator &p) | |
676 | { | |
677 | DECODE_START_LEGACY_COMPAT_LEN(16, 6, 6, p); | |
678 | ||
679 | decode(ino, p); | |
680 | decode(rdev, p); | |
681 | decode(ctime, p); | |
682 | ||
683 | decode(mode, p); | |
684 | decode(uid, p); | |
685 | decode(gid, p); | |
686 | ||
687 | decode(nlink, p); | |
688 | { | |
689 | bool anchored; | |
690 | decode(anchored, p); | |
691 | } | |
692 | ||
693 | if (struct_v >= 4) | |
694 | decode(dir_layout, p); | |
695 | else { | |
696 | // FIPS zeroization audit 20191117: this memset is not security related. | |
697 | memset(&dir_layout, 0, sizeof(dir_layout)); | |
698 | } | |
699 | decode(layout, p); | |
700 | decode(size, p); | |
701 | decode(truncate_seq, p); | |
702 | decode(truncate_size, p); | |
703 | decode(truncate_from, p); | |
704 | if (struct_v >= 5) | |
705 | decode(truncate_pending, p); | |
706 | else | |
707 | truncate_pending = 0; | |
708 | decode(mtime, p); | |
709 | decode(atime, p); | |
710 | decode(time_warp_seq, p); | |
711 | if (struct_v >= 3) { | |
712 | decode(client_ranges, p); | |
713 | } else { | |
714 | map<client_t, client_writeable_range_t::byte_range_t> m; | |
715 | decode(m, p); | |
716 | for (map<client_t, client_writeable_range_t::byte_range_t>::iterator | |
717 | q = m.begin(); q != m.end(); ++q) | |
718 | client_ranges[q->first].range = q->second; | |
719 | } | |
720 | ||
721 | decode(dirstat, p); | |
722 | decode(rstat, p); | |
723 | decode(accounted_rstat, p); | |
724 | ||
725 | decode(version, p); | |
726 | decode(file_data_version, p); | |
727 | decode(xattr_version, p); | |
728 | if (struct_v >= 2) | |
729 | decode(backtrace_version, p); | |
730 | if (struct_v >= 7) | |
731 | decode(old_pools, p); | |
732 | if (struct_v >= 8) | |
733 | decode(max_size_ever, p); | |
734 | if (struct_v >= 9) { | |
735 | decode(inline_data, p); | |
736 | } else { | |
737 | inline_data.version = CEPH_INLINE_NONE; | |
738 | } | |
739 | if (struct_v < 10) | |
740 | backtrace_version = 0; // force update backtrace | |
741 | if (struct_v >= 11) | |
742 | decode(quota, p); | |
743 | ||
744 | if (struct_v >= 12) { | |
745 | std::string tmp; | |
746 | decode(tmp, p); | |
747 | stray_prior_path = std::string_view(tmp); | |
748 | } | |
749 | ||
750 | if (struct_v >= 13) { | |
751 | decode(last_scrub_version, p); | |
752 | decode(last_scrub_stamp, p); | |
753 | } | |
754 | if (struct_v >= 14) { | |
755 | decode(btime, p); | |
756 | decode(change_attr, p); | |
757 | } else { | |
758 | btime = utime_t(); | |
759 | change_attr = 0; | |
760 | } | |
761 | ||
762 | if (struct_v >= 15) { | |
763 | decode(export_pin, p); | |
764 | } else { | |
765 | export_pin = MDS_RANK_NONE; | |
766 | } | |
767 | ||
768 | if (struct_v >= 16) { | |
769 | decode(export_ephemeral_random_pin, p); | |
770 | decode(export_ephemeral_distributed_pin, p); | |
771 | } else { | |
772 | export_ephemeral_random_pin = 0; | |
773 | export_ephemeral_distributed_pin = false; | |
774 | } | |
775 | ||
776 | DECODE_FINISH(p); | |
777 | } | |
778 | ||
779 | template<template<typename> class Allocator> | |
780 | void inode_t<Allocator>::dump(Formatter *f) const | |
781 | { | |
782 | f->dump_unsigned("ino", ino); | |
783 | f->dump_unsigned("rdev", rdev); | |
784 | f->dump_stream("ctime") << ctime; | |
785 | f->dump_stream("btime") << btime; | |
786 | f->dump_unsigned("mode", mode); | |
787 | f->dump_unsigned("uid", uid); | |
788 | f->dump_unsigned("gid", gid); | |
789 | f->dump_unsigned("nlink", nlink); | |
790 | ||
791 | f->open_object_section("dir_layout"); | |
792 | ::dump(dir_layout, f); | |
793 | f->close_section(); | |
794 | ||
795 | f->dump_object("layout", layout); | |
796 | ||
797 | f->open_array_section("old_pools"); | |
798 | for (const auto &p : old_pools) { | |
799 | f->dump_int("pool", p); | |
800 | } | |
801 | f->close_section(); | |
802 | ||
803 | f->dump_unsigned("size", size); | |
804 | f->dump_unsigned("truncate_seq", truncate_seq); | |
805 | f->dump_unsigned("truncate_size", truncate_size); | |
806 | f->dump_unsigned("truncate_from", truncate_from); | |
807 | f->dump_unsigned("truncate_pending", truncate_pending); | |
808 | f->dump_stream("mtime") << mtime; | |
809 | f->dump_stream("atime") << atime; | |
810 | f->dump_unsigned("time_warp_seq", time_warp_seq); | |
811 | f->dump_unsigned("change_attr", change_attr); | |
812 | f->dump_int("export_pin", export_pin); | |
813 | f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin); | |
814 | f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin); | |
815 | ||
816 | f->open_array_section("client_ranges"); | |
817 | for (const auto &p : client_ranges) { | |
818 | f->open_object_section("client"); | |
819 | f->dump_unsigned("client", p.first.v); | |
820 | p.second.dump(f); | |
821 | f->close_section(); | |
822 | } | |
823 | f->close_section(); | |
824 | ||
825 | f->open_object_section("dirstat"); | |
826 | dirstat.dump(f); | |
827 | f->close_section(); | |
828 | ||
829 | f->open_object_section("rstat"); | |
830 | rstat.dump(f); | |
831 | f->close_section(); | |
832 | ||
833 | f->open_object_section("accounted_rstat"); | |
834 | accounted_rstat.dump(f); | |
835 | f->close_section(); | |
836 | ||
837 | f->dump_unsigned("version", version); | |
838 | f->dump_unsigned("file_data_version", file_data_version); | |
839 | f->dump_unsigned("xattr_version", xattr_version); | |
840 | f->dump_unsigned("backtrace_version", backtrace_version); | |
841 | ||
842 | f->dump_string("stray_prior_path", stray_prior_path); | |
843 | f->dump_unsigned("max_size_ever", max_size_ever); | |
844 | ||
845 | f->open_object_section("quota"); | |
846 | quota.dump(f); | |
847 | f->close_section(); | |
848 | ||
849 | f->dump_stream("last_scrub_stamp") << last_scrub_stamp; | |
850 | f->dump_unsigned("last_scrub_version", last_scrub_version); | |
851 | } | |
852 | ||
853 | template<template<typename> class Allocator> | |
854 | void inode_t<Allocator>::generate_test_instances(std::list<inode_t*>& ls) | |
855 | { | |
856 | ls.push_back(new inode_t<Allocator>); | |
857 | ls.push_back(new inode_t<Allocator>); | |
858 | ls.back()->ino = 1; | |
859 | // i am lazy. | |
860 | } | |
861 | ||
862 | template<template<typename> class Allocator> | |
863 | int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const | |
864 | { | |
865 | ceph_assert(ino == other.ino); | |
866 | *divergent = false; | |
867 | if (version == other.version) { | |
868 | if (rdev != other.rdev || | |
869 | ctime != other.ctime || | |
870 | btime != other.btime || | |
871 | mode != other.mode || | |
872 | uid != other.uid || | |
873 | gid != other.gid || | |
874 | nlink != other.nlink || | |
875 | memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || | |
876 | layout != other.layout || | |
877 | old_pools != other.old_pools || | |
878 | size != other.size || | |
879 | max_size_ever != other.max_size_ever || | |
880 | truncate_seq != other.truncate_seq || | |
881 | truncate_size != other.truncate_size || | |
882 | truncate_from != other.truncate_from || | |
883 | truncate_pending != other.truncate_pending || | |
884 | change_attr != other.change_attr || | |
885 | mtime != other.mtime || | |
886 | atime != other.atime || | |
887 | time_warp_seq != other.time_warp_seq || | |
888 | inline_data != other.inline_data || | |
889 | client_ranges != other.client_ranges || | |
890 | !(dirstat == other.dirstat) || | |
891 | !(rstat == other.rstat) || | |
892 | !(accounted_rstat == other.accounted_rstat) || | |
893 | file_data_version != other.file_data_version || | |
894 | xattr_version != other.xattr_version || | |
895 | backtrace_version != other.backtrace_version) { | |
896 | *divergent = true; | |
897 | } | |
898 | return 0; | |
899 | } else if (version > other.version) { | |
900 | *divergent = !older_is_consistent(other); | |
901 | return 1; | |
902 | } else { | |
903 | ceph_assert(version < other.version); | |
904 | *divergent = !other.older_is_consistent(*this); | |
905 | return -1; | |
906 | } | |
907 | } | |
908 | ||
909 | template<template<typename> class Allocator> | |
910 | bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const | |
911 | { | |
912 | if (max_size_ever < other.max_size_ever || | |
913 | truncate_seq < other.truncate_seq || | |
914 | time_warp_seq < other.time_warp_seq || | |
915 | inline_data.version < other.inline_data.version || | |
916 | dirstat.version < other.dirstat.version || | |
917 | rstat.version < other.rstat.version || | |
918 | accounted_rstat.version < other.accounted_rstat.version || | |
919 | file_data_version < other.file_data_version || | |
920 | xattr_version < other.xattr_version || | |
921 | backtrace_version < other.backtrace_version) { | |
922 | return false; | |
923 | } | |
924 | return true; | |
925 | } | |
926 | ||
927 | template<template<typename> class Allocator> | |
928 | inline void encode(const inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
929 | { | |
930 | ENCODE_DUMP_PRE(); | |
931 | c.encode(bl, features); | |
932 | ENCODE_DUMP_POST(cl); | |
933 | } | |
934 | template<template<typename> class Allocator> | |
935 | inline void decode(inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) | |
936 | { | |
937 | c.decode(p); | |
938 | } | |
939 | ||
940 | template<template<typename> class Allocator> | |
941 | using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>; | |
942 | ||
943 | template<template<typename> class Allocator> | |
944 | using xattr_map = compact_map<alloc_string<Allocator>, bufferptr, std::less<alloc_string<Allocator>>, Allocator<std::pair<const alloc_string<Allocator>, bufferptr>>>; // FIXME bufferptr not in mempool | |
945 | ||
946 | template<template<typename> class Allocator> | |
947 | inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p) | |
948 | { | |
949 | __u32 n; | |
950 | decode(n, p); | |
951 | while (n-- > 0) { | |
952 | alloc_string<Allocator> key; | |
953 | decode(key, p); | |
954 | __u32 len; | |
955 | decode(len, p); | |
956 | p.copy_deep(len, xattrs[key]); | |
957 | } | |
958 | } | |
959 | ||
960 | template<template<typename> class Allocator = std::allocator> | |
961 | struct old_inode_t { | |
962 | snapid_t first; | |
963 | inode_t<Allocator> inode; | |
964 | xattr_map<Allocator> xattrs; | |
965 | ||
966 | void encode(bufferlist &bl, uint64_t features) const; | |
967 | void decode(bufferlist::const_iterator& bl); | |
968 | void dump(Formatter *f) const; | |
969 | static void generate_test_instances(std::list<old_inode_t*>& ls); | |
970 | }; | |
971 | ||
972 | // These methods may be moved back to mdstypes.cc when we have pmr | |
973 | template<template<typename> class Allocator> | |
974 | void old_inode_t<Allocator>::encode(bufferlist& bl, uint64_t features) const | |
975 | { | |
976 | ENCODE_START(2, 2, bl); | |
977 | encode(first, bl); | |
978 | encode(inode, bl, features); | |
979 | encode(xattrs, bl); | |
980 | ENCODE_FINISH(bl); | |
981 | } | |
982 | ||
983 | template<template<typename> class Allocator> | |
984 | void old_inode_t<Allocator>::decode(bufferlist::const_iterator& bl) | |
985 | { | |
986 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
987 | decode(first, bl); | |
988 | decode(inode, bl); | |
989 | decode_noshare<Allocator>(xattrs, bl); | |
990 | DECODE_FINISH(bl); | |
991 | } | |
992 | ||
993 | template<template<typename> class Allocator> | |
994 | void old_inode_t<Allocator>::dump(Formatter *f) const | |
995 | { | |
996 | f->dump_unsigned("first", first); | |
997 | inode.dump(f); | |
998 | f->open_object_section("xattrs"); | |
999 | for (const auto &p : xattrs) { | |
1000 | std::string v(p.second.c_str(), p.second.length()); | |
1001 | f->dump_string(p.first.c_str(), v); | |
1002 | } | |
1003 | f->close_section(); | |
1004 | } | |
1005 | ||
1006 | template<template<typename> class Allocator> | |
1007 | void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls) | |
1008 | { | |
1009 | ls.push_back(new old_inode_t<Allocator>); | |
1010 | ls.push_back(new old_inode_t<Allocator>); | |
1011 | ls.back()->first = 2; | |
1012 | std::list<inode_t<Allocator>*> ils; | |
1013 | inode_t<Allocator>::generate_test_instances(ils); | |
1014 | ls.back()->inode = *ils.back(); | |
1015 | ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); | |
1016 | ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); | |
1017 | } | |
1018 | ||
1019 | template<template<typename> class Allocator> | |
1020 | inline void encode(const old_inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) | |
1021 | { | |
1022 | ENCODE_DUMP_PRE(); | |
1023 | c.encode(bl, features); | |
1024 | ENCODE_DUMP_POST(cl); | |
1025 | } | |
1026 | template<template<typename> class Allocator> | |
1027 | inline void decode(old_inode_t<Allocator> &c, ::ceph::bufferlist::const_iterator &p) | |
1028 | { | |
1029 | c.decode(p); | |
1030 | } | |
1031 | ||
1032 | /* | |
1033 | * like an inode, but for a dir frag | |
1034 | */ | |
1035 | struct fnode_t { | |
1036 | void encode(bufferlist &bl) const; | |
1037 | void decode(bufferlist::const_iterator& bl); | |
1038 | void dump(Formatter *f) const; | |
1039 | static void generate_test_instances(std::list<fnode_t*>& ls); | |
1040 | ||
1041 | version_t version = 0; | |
1042 | snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru | |
1043 | frag_info_t fragstat, accounted_fragstat; | |
1044 | nest_info_t rstat, accounted_rstat; | |
1045 | damage_flags_t damage_flags = 0; | |
1046 | ||
1047 | // we know we and all our descendants have been scrubbed since this version | |
1048 | version_t recursive_scrub_version = 0; | |
1049 | utime_t recursive_scrub_stamp; | |
1050 | // version at which we last scrubbed our personal data structures | |
1051 | version_t localized_scrub_version = 0; | |
1052 | utime_t localized_scrub_stamp; | |
1053 | }; | |
1054 | WRITE_CLASS_ENCODER(fnode_t) | |
1055 | ||
1056 | ||
1057 | struct old_rstat_t { | |
1058 | void encode(bufferlist& bl) const; | |
1059 | void decode(bufferlist::const_iterator& p); | |
1060 | void dump(Formatter *f) const; | |
1061 | static void generate_test_instances(std::list<old_rstat_t*>& ls); | |
1062 | ||
1063 | snapid_t first; | |
1064 | nest_info_t rstat, accounted_rstat; | |
1065 | }; | |
1066 | WRITE_CLASS_ENCODER(old_rstat_t) | |
1067 | ||
1068 | inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { | |
1069 | return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; | |
1070 | } | |
1071 | ||
1072 | class feature_bitset_t { | |
1073 | public: | |
1074 | typedef uint64_t block_type; | |
1075 | static const size_t bits_per_block = sizeof(block_type) * 8; | |
1076 | ||
1077 | feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {} | |
1078 | feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {} | |
1079 | feature_bitset_t(unsigned long value = 0); | |
1080 | feature_bitset_t(const vector<size_t>& array); | |
1081 | feature_bitset_t& operator=(const feature_bitset_t& other) { | |
1082 | _vec = other._vec; | |
1083 | return *this; | |
1084 | } | |
1085 | feature_bitset_t& operator=(feature_bitset_t&& other) { | |
1086 | _vec = std::move(other._vec); | |
1087 | return *this; | |
1088 | } | |
1089 | feature_bitset_t& operator-=(const feature_bitset_t& other); | |
1090 | bool empty() const { | |
1091 | //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty" | |
1092 | for (auto& v : _vec) { | |
1093 | if (v) | |
1094 | return false; | |
1095 | } | |
1096 | return true; | |
1097 | } | |
1098 | bool test(size_t bit) const { | |
1099 | if (bit >= bits_per_block * _vec.size()) | |
1100 | return false; | |
1101 | return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block)); | |
1102 | } | |
1103 | void clear() { | |
1104 | _vec.clear(); | |
1105 | } | |
1106 | void encode(bufferlist& bl) const; | |
1107 | void decode(bufferlist::const_iterator &p); | |
1108 | void dump(Formatter *f) const; | |
1109 | void print(ostream& out) const; | |
1110 | private: | |
1111 | vector<block_type> _vec; | |
1112 | }; | |
1113 | WRITE_CLASS_ENCODER(feature_bitset_t) | |
1114 | ||
1115 | inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) { | |
1116 | s.print(out); | |
1117 | return out; | |
1118 | } | |
1119 | ||
1120 | struct metric_spec_t { | |
1121 | metric_spec_t() {} | |
1122 | metric_spec_t(const metric_spec_t& other) : | |
1123 | metric_flags(other.metric_flags) {} | |
1124 | metric_spec_t(metric_spec_t&& other) : | |
1125 | metric_flags(std::move(other.metric_flags)) {} | |
1126 | metric_spec_t(const feature_bitset_t& mf) : | |
1127 | metric_flags(mf) {} | |
1128 | metric_spec_t(feature_bitset_t&& mf) : | |
1129 | metric_flags(std::move(mf)) {} | |
1130 | ||
1131 | metric_spec_t& operator=(const metric_spec_t& other) { | |
1132 | metric_flags = other.metric_flags; | |
1133 | return *this; | |
1134 | } | |
1135 | metric_spec_t& operator=(metric_spec_t&& other) { | |
1136 | metric_flags = std::move(other.metric_flags); | |
1137 | return *this; | |
1138 | } | |
1139 | ||
1140 | bool empty() const { | |
1141 | return metric_flags.empty(); | |
1142 | } | |
1143 | ||
1144 | void clear() { | |
1145 | metric_flags.clear(); | |
1146 | } | |
1147 | ||
1148 | void encode(bufferlist& bl) const; | |
1149 | void decode(bufferlist::const_iterator& p); | |
1150 | void dump(Formatter *f) const; | |
1151 | void print(ostream& out) const; | |
1152 | ||
1153 | // set of metrics that a client is capable of forwarding | |
1154 | feature_bitset_t metric_flags; | |
1155 | }; | |
1156 | WRITE_CLASS_ENCODER(metric_spec_t) | |
1157 | ||
1158 | inline std::ostream& operator<<(std::ostream& out, const metric_spec_t& mst) { | |
1159 | mst.print(out); | |
1160 | return out; | |
1161 | } | |
1162 | ||
1163 | /* | |
1164 | * client_metadata_t | |
1165 | */ | |
1166 | struct client_metadata_t { | |
1167 | using kv_map_t = std::map<std::string,std::string>; | |
1168 | using iterator = kv_map_t::const_iterator; | |
1169 | ||
1170 | client_metadata_t() {} | |
1171 | client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f, const metric_spec_t &mst) : | |
1172 | kv_map(kv), | |
1173 | features(f), | |
1174 | metric_spec(mst) {} | |
1175 | client_metadata_t& operator=(const client_metadata_t& other) { | |
1176 | kv_map = other.kv_map; | |
1177 | features = other.features; | |
1178 | metric_spec = other.metric_spec; | |
1179 | return *this; | |
1180 | } | |
1181 | ||
1182 | bool empty() const { return kv_map.empty() && features.empty() && metric_spec.empty(); } | |
1183 | iterator find(const std::string& key) const { return kv_map.find(key); } | |
1184 | iterator begin() const { return kv_map.begin(); } | |
1185 | iterator end() const { return kv_map.end(); } | |
1186 | void erase(iterator it) { kv_map.erase(it); } | |
1187 | std::string& operator[](const std::string& key) { return kv_map[key]; } | |
1188 | void merge(const client_metadata_t& other) { | |
1189 | kv_map.insert(other.kv_map.begin(), other.kv_map.end()); | |
1190 | features = other.features; | |
1191 | metric_spec = other.metric_spec; | |
1192 | } | |
1193 | void clear() { | |
1194 | kv_map.clear(); | |
1195 | features.clear(); | |
1196 | metric_spec.clear(); | |
1197 | } | |
1198 | ||
1199 | void encode(bufferlist& bl) const; | |
1200 | void decode(bufferlist::const_iterator& p); | |
1201 | void dump(Formatter *f) const; | |
1202 | ||
1203 | kv_map_t kv_map; | |
1204 | feature_bitset_t features; | |
1205 | metric_spec_t metric_spec; | |
1206 | }; | |
1207 | WRITE_CLASS_ENCODER(client_metadata_t) | |
1208 | ||
1209 | /* | |
1210 | * session_info_t - durable part of a Session | |
1211 | */ | |
1212 | struct session_info_t { | |
1213 | client_t get_client() const { return client_t(inst.name.num()); } | |
1214 | bool has_feature(size_t bit) const { return client_metadata.features.test(bit); } | |
1215 | const entity_name_t& get_source() const { return inst.name; } | |
1216 | ||
1217 | void clear_meta() { | |
1218 | prealloc_inos.clear(); | |
1219 | used_inos.clear(); | |
1220 | completed_requests.clear(); | |
1221 | completed_flushes.clear(); | |
1222 | client_metadata.clear(); | |
1223 | } | |
1224 | ||
1225 | void encode(bufferlist& bl, uint64_t features) const; | |
1226 | void decode(bufferlist::const_iterator& p); | |
1227 | void dump(Formatter *f) const; | |
1228 | static void generate_test_instances(std::list<session_info_t*>& ls); | |
1229 | ||
1230 | entity_inst_t inst; | |
1231 | std::map<ceph_tid_t,inodeno_t> completed_requests; | |
1232 | interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use. | |
1233 | interval_set<inodeno_t> used_inos; // journaling use | |
1234 | client_metadata_t client_metadata; | |
1235 | std::set<ceph_tid_t> completed_flushes; | |
1236 | EntityName auth_name; | |
1237 | }; | |
1238 | WRITE_CLASS_ENCODER_FEATURES(session_info_t) | |
1239 | ||
1240 | // dentries | |
1241 | struct dentry_key_t { | |
1242 | dentry_key_t() {} | |
1243 | dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) : | |
1244 | snapid(s), name(n), hash(h) {} | |
1245 | ||
1246 | bool is_valid() { return name.length() || snapid; } | |
1247 | ||
1248 | // encode into something that can be decoded as a string. | |
1249 | // name_ (head) or name_%x (!head) | |
1250 | void encode(bufferlist& bl) const { | |
1251 | string key; | |
1252 | encode(key); | |
1253 | using ceph::encode; | |
1254 | encode(key, bl); | |
1255 | } | |
1256 | void encode(string& key) const { | |
1257 | char b[20]; | |
1258 | if (snapid != CEPH_NOSNAP) { | |
1259 | uint64_t val(snapid); | |
1260 | snprintf(b, sizeof(b), "%" PRIx64, val); | |
1261 | } else { | |
1262 | snprintf(b, sizeof(b), "%s", "head"); | |
1263 | } | |
1264 | ostringstream oss; | |
1265 | oss << name << "_" << b; | |
1266 | key = oss.str(); | |
1267 | } | |
1268 | static void decode_helper(bufferlist::const_iterator& bl, string& nm, snapid_t& sn) { | |
1269 | string key; | |
1270 | decode(key, bl); | |
1271 | decode_helper(key, nm, sn); | |
1272 | } | |
1273 | static void decode_helper(std::string_view key, string& nm, snapid_t& sn) { | |
1274 | size_t i = key.find_last_of('_'); | |
1275 | ceph_assert(i != string::npos); | |
1276 | if (key.compare(i+1, std::string_view::npos, "head") == 0) { | |
1277 | // name_head | |
1278 | sn = CEPH_NOSNAP; | |
1279 | } else { | |
1280 | // name_%x | |
1281 | long long unsigned x = 0; | |
1282 | std::string x_str(key.substr(i+1)); | |
1283 | sscanf(x_str.c_str(), "%llx", &x); | |
1284 | sn = x; | |
1285 | } | |
1286 | nm = key.substr(0, i); | |
1287 | } | |
1288 | ||
1289 | snapid_t snapid = 0; | |
1290 | std::string_view name; | |
1291 | __u32 hash = 0; | |
1292 | }; | |
1293 | ||
1294 | inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k) | |
1295 | { | |
1296 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1297 | } | |
1298 | ||
1299 | inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) | |
1300 | { | |
1301 | /* | |
1302 | * order by hash, name, snap | |
1303 | */ | |
1304 | int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash); | |
1305 | if (c) | |
1306 | return c < 0; | |
1307 | c = k1.name.compare(k2.name); | |
1308 | if (c) | |
1309 | return c < 0; | |
1310 | return k1.snapid < k2.snapid; | |
1311 | } | |
1312 | ||
1313 | /* | |
1314 | * string_snap_t is a simple (string, snapid_t) pair | |
1315 | */ | |
1316 | struct string_snap_t { | |
1317 | string_snap_t() {} | |
1318 | string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {} | |
1319 | ||
1320 | void encode(bufferlist& bl) const; | |
1321 | void decode(bufferlist::const_iterator& p); | |
1322 | void dump(Formatter *f) const; | |
1323 | static void generate_test_instances(std::list<string_snap_t*>& ls); | |
1324 | ||
1325 | string name; | |
1326 | snapid_t snapid; | |
1327 | }; | |
1328 | WRITE_CLASS_ENCODER(string_snap_t) | |
1329 | ||
1330 | inline bool operator<(const string_snap_t& l, const string_snap_t& r) { | |
1331 | int c = l.name.compare(r.name); | |
1332 | return c < 0 || (c == 0 && l.snapid < r.snapid); | |
1333 | } | |
1334 | ||
1335 | inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k) | |
1336 | { | |
1337 | return out << "(" << k.name << "," << k.snapid << ")"; | |
1338 | } | |
1339 | ||
1340 | /* | |
1341 | * mds_table_pending_t | |
1342 | * | |
1343 | * For mds's requesting any pending ops, child needs to encode the corresponding | |
1344 | * pending mutation state in the table. | |
1345 | */ | |
1346 | struct mds_table_pending_t { | |
1347 | void encode(bufferlist& bl) const; | |
1348 | void decode(bufferlist::const_iterator& bl); | |
1349 | void dump(Formatter *f) const; | |
1350 | static void generate_test_instances(std::list<mds_table_pending_t*>& ls); | |
1351 | ||
1352 | uint64_t reqid = 0; | |
1353 | __s32 mds = 0; | |
1354 | version_t tid = 0; | |
1355 | }; | |
1356 | WRITE_CLASS_ENCODER(mds_table_pending_t) | |
1357 | ||
1358 | // requests | |
1359 | struct metareqid_t { | |
1360 | metareqid_t() {} | |
1361 | metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {} | |
1362 | void encode(bufferlist& bl) const { | |
1363 | using ceph::encode; | |
1364 | encode(name, bl); | |
1365 | encode(tid, bl); | |
1366 | } | |
1367 | void decode(bufferlist::const_iterator &p) { | |
1368 | using ceph::decode; | |
1369 | decode(name, p); | |
1370 | decode(tid, p); | |
1371 | } | |
1372 | ||
1373 | entity_name_t name; | |
1374 | uint64_t tid = 0; | |
1375 | }; | |
1376 | WRITE_CLASS_ENCODER(metareqid_t) | |
1377 | ||
1378 | inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) { | |
1379 | return out << r.name << ":" << r.tid; | |
1380 | } | |
1381 | ||
1382 | inline bool operator==(const metareqid_t& l, const metareqid_t& r) { | |
1383 | return (l.name == r.name) && (l.tid == r.tid); | |
1384 | } | |
1385 | inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { | |
1386 | return (l.name != r.name) || (l.tid != r.tid); | |
1387 | } | |
1388 | inline bool operator<(const metareqid_t& l, const metareqid_t& r) { | |
1389 | return (l.name < r.name) || | |
1390 | (l.name == r.name && l.tid < r.tid); | |
1391 | } | |
1392 | inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { | |
1393 | return (l.name < r.name) || | |
1394 | (l.name == r.name && l.tid <= r.tid); | |
1395 | } | |
1396 | inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } | |
1397 | inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } | |
1398 | ||
1399 | namespace std { | |
1400 | template<> struct hash<metareqid_t> { | |
1401 | size_t operator()(const metareqid_t &r) const { | |
1402 | hash<uint64_t> H; | |
1403 | return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); | |
1404 | } | |
1405 | }; | |
1406 | } // namespace std | |
1407 | ||
1408 | // cap info for client reconnect | |
1409 | struct cap_reconnect_t { | |
1410 | cap_reconnect_t() {} | |
1411 | cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i, | |
1412 | inodeno_t sr, snapid_t sf, bufferlist& lb) : | |
1413 | path(p) { | |
1414 | capinfo.cap_id = cap_id; | |
1415 | capinfo.wanted = w; | |
1416 | capinfo.issued = i; | |
1417 | capinfo.snaprealm = sr; | |
1418 | capinfo.pathbase = pino; | |
1419 | capinfo.flock_len = 0; | |
1420 | snap_follows = sf; | |
1421 | flockbl.claim(lb); | |
1422 | } | |
1423 | void encode(bufferlist& bl) const; | |
1424 | void decode(bufferlist::const_iterator& bl); | |
1425 | void encode_old(bufferlist& bl) const; | |
1426 | void decode_old(bufferlist::const_iterator& bl); | |
1427 | ||
1428 | void dump(Formatter *f) const; | |
1429 | static void generate_test_instances(std::list<cap_reconnect_t*>& ls); | |
1430 | ||
1431 | string path; | |
1432 | mutable ceph_mds_cap_reconnect capinfo = {}; | |
1433 | snapid_t snap_follows = 0; | |
1434 | bufferlist flockbl; | |
1435 | }; | |
1436 | WRITE_CLASS_ENCODER(cap_reconnect_t) | |
1437 | ||
1438 | struct snaprealm_reconnect_t { | |
1439 | snaprealm_reconnect_t() {} | |
1440 | snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) { | |
1441 | realm.ino = ino; | |
1442 | realm.seq = seq; | |
1443 | realm.parent = parent; | |
1444 | } | |
1445 | void encode(bufferlist& bl) const; | |
1446 | void decode(bufferlist::const_iterator& bl); | |
1447 | void encode_old(bufferlist& bl) const; | |
1448 | void decode_old(bufferlist::const_iterator& bl); | |
1449 | ||
1450 | void dump(Formatter *f) const; | |
1451 | static void generate_test_instances(std::list<snaprealm_reconnect_t*>& ls); | |
1452 | ||
1453 | mutable ceph_mds_snaprealm_reconnect realm = {}; | |
1454 | }; | |
1455 | WRITE_CLASS_ENCODER(snaprealm_reconnect_t) | |
1456 | ||
1457 | // compat for pre-FLOCK feature | |
1458 | struct old_ceph_mds_cap_reconnect { | |
1459 | ceph_le64 cap_id; | |
1460 | ceph_le32 wanted; | |
1461 | ceph_le32 issued; | |
1462 | ceph_le64 old_size; | |
1463 | struct ceph_timespec old_mtime, old_atime; | |
1464 | ceph_le64 snaprealm; | |
1465 | ceph_le64 pathbase; /* base ino for our path to this ino */ | |
1466 | } __attribute__ ((packed)); | |
1467 | WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect) | |
1468 | ||
1469 | struct old_cap_reconnect_t { | |
1470 | const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) { | |
1471 | path = n.path; | |
1472 | capinfo.cap_id = n.capinfo.cap_id; | |
1473 | capinfo.wanted = n.capinfo.wanted; | |
1474 | capinfo.issued = n.capinfo.issued; | |
1475 | capinfo.snaprealm = n.capinfo.snaprealm; | |
1476 | capinfo.pathbase = n.capinfo.pathbase; | |
1477 | return *this; | |
1478 | } | |
1479 | operator cap_reconnect_t() { | |
1480 | cap_reconnect_t n; | |
1481 | n.path = path; | |
1482 | n.capinfo.cap_id = capinfo.cap_id; | |
1483 | n.capinfo.wanted = capinfo.wanted; | |
1484 | n.capinfo.issued = capinfo.issued; | |
1485 | n.capinfo.snaprealm = capinfo.snaprealm; | |
1486 | n.capinfo.pathbase = capinfo.pathbase; | |
1487 | return n; | |
1488 | } | |
1489 | ||
1490 | void encode(bufferlist& bl) const { | |
1491 | using ceph::encode; | |
1492 | encode(path, bl); | |
1493 | encode(capinfo, bl); | |
1494 | } | |
1495 | void decode(bufferlist::const_iterator& bl) { | |
1496 | using ceph::decode; | |
1497 | decode(path, bl); | |
1498 | decode(capinfo, bl); | |
1499 | } | |
1500 | ||
1501 | string path; | |
1502 | old_ceph_mds_cap_reconnect capinfo; | |
1503 | }; | |
1504 | WRITE_CLASS_ENCODER(old_cap_reconnect_t) | |
1505 | ||
1506 | // dir frag | |
1507 | struct dirfrag_t { | |
1508 | dirfrag_t() {} | |
1509 | dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } | |
1510 | ||
1511 | void encode(bufferlist& bl) const { | |
1512 | using ceph::encode; | |
1513 | encode(ino, bl); | |
1514 | encode(frag, bl); | |
1515 | } | |
1516 | void decode(bufferlist::const_iterator& bl) { | |
1517 | using ceph::decode; | |
1518 | decode(ino, bl); | |
1519 | decode(frag, bl); | |
1520 | } | |
1521 | ||
1522 | inodeno_t ino = 0; | |
1523 | frag_t frag; | |
1524 | }; | |
1525 | WRITE_CLASS_ENCODER(dirfrag_t) | |
1526 | ||
1527 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) { | |
1528 | out << df.ino; | |
1529 | if (!df.frag.is_root()) out << "." << df.frag; | |
1530 | return out; | |
1531 | } | |
1532 | inline bool operator<(dirfrag_t l, dirfrag_t r) { | |
1533 | if (l.ino < r.ino) return true; | |
1534 | if (l.ino == r.ino && l.frag < r.frag) return true; | |
1535 | return false; | |
1536 | } | |
1537 | inline bool operator==(dirfrag_t l, dirfrag_t r) { | |
1538 | return l.ino == r.ino && l.frag == r.frag; | |
1539 | } | |
1540 | ||
1541 | namespace std { | |
1542 | template<> struct hash<dirfrag_t> { | |
1543 | size_t operator()(const dirfrag_t &df) const { | |
1544 | static rjhash<uint64_t> H; | |
1545 | static rjhash<uint32_t> I; | |
1546 | return H(df.ino) ^ I(df.frag); | |
1547 | } | |
1548 | }; | |
1549 | } // namespace std | |
1550 | ||
1551 | // ================================================================ | |
1552 | #define META_POP_IRD 0 | |
1553 | #define META_POP_IWR 1 | |
1554 | #define META_POP_READDIR 2 | |
1555 | #define META_POP_FETCH 3 | |
1556 | #define META_POP_STORE 4 | |
1557 | #define META_NPOP 5 | |
1558 | ||
1559 | class inode_load_vec_t { | |
1560 | public: | |
1561 | using time = DecayCounter::time; | |
1562 | using clock = DecayCounter::clock; | |
1563 | static const size_t NUM = 2; | |
1564 | ||
1565 | inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {} | |
1566 | inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {} | |
1567 | ||
1568 | DecayCounter &get(int t) { | |
1569 | return vec[t]; | |
1570 | } | |
1571 | void zero() { | |
1572 | for (auto &d : vec) { | |
1573 | d.reset(); | |
1574 | } | |
1575 | } | |
1576 | void encode(bufferlist &bl) const; | |
1577 | void decode(bufferlist::const_iterator& p); | |
1578 | void dump(Formatter *f) const; | |
1579 | static void generate_test_instances(std::list<inode_load_vec_t*>& ls); | |
1580 | ||
1581 | private: | |
1582 | std::array<DecayCounter, NUM> vec; | |
1583 | }; | |
1584 | inline void encode(const inode_load_vec_t &c, bufferlist &bl) { | |
1585 | c.encode(bl); | |
1586 | } | |
1587 | inline void decode(inode_load_vec_t & c, bufferlist::const_iterator &p) { | |
1588 | c.decode(p); | |
1589 | } | |
1590 | ||
1591 | class dirfrag_load_vec_t { | |
1592 | public: | |
1593 | using time = DecayCounter::time; | |
1594 | using clock = DecayCounter::clock; | |
1595 | static const size_t NUM = 5; | |
1596 | ||
1597 | dirfrag_load_vec_t() : | |
1598 | vec{DecayCounter(DecayRate()), | |
1599 | DecayCounter(DecayRate()), | |
1600 | DecayCounter(DecayRate()), | |
1601 | DecayCounter(DecayRate()), | |
1602 | DecayCounter(DecayRate()) | |
1603 | } | |
1604 | {} | |
1605 | dirfrag_load_vec_t(const DecayRate &rate) : | |
1606 | vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)} | |
1607 | {} | |
1608 | ||
1609 | void encode(bufferlist &bl) const { | |
1610 | ENCODE_START(2, 2, bl); | |
1611 | for (const auto &i : vec) { | |
1612 | encode(i, bl); | |
1613 | } | |
1614 | ENCODE_FINISH(bl); | |
1615 | } | |
1616 | void decode(bufferlist::const_iterator &p) { | |
1617 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); | |
1618 | for (auto &i : vec) { | |
1619 | decode(i, p); | |
1620 | } | |
1621 | DECODE_FINISH(p); | |
1622 | } | |
1623 | void dump(Formatter *f) const; | |
1624 | void dump(Formatter *f, const DecayRate& rate) const; | |
1625 | static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls); | |
1626 | ||
1627 | const DecayCounter &get(int t) const { | |
1628 | return vec[t]; | |
1629 | } | |
1630 | DecayCounter &get(int t) { | |
1631 | return vec[t]; | |
1632 | } | |
1633 | void adjust(double d) { | |
1634 | for (auto &i : vec) { | |
1635 | i.adjust(d); | |
1636 | } | |
1637 | } | |
1638 | void zero() { | |
1639 | for (auto &i : vec) { | |
1640 | i.reset(); | |
1641 | } | |
1642 | } | |
1643 | double meta_load() const { | |
1644 | return | |
1645 | 1*vec[META_POP_IRD].get() + | |
1646 | 2*vec[META_POP_IWR].get() + | |
1647 | 1*vec[META_POP_READDIR].get() + | |
1648 | 2*vec[META_POP_FETCH].get() + | |
1649 | 4*vec[META_POP_STORE].get(); | |
1650 | } | |
1651 | ||
1652 | void add(dirfrag_load_vec_t& r) { | |
1653 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1654 | vec[i].adjust(r.vec[i].get()); | |
1655 | } | |
1656 | void sub(dirfrag_load_vec_t& r) { | |
1657 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1658 | vec[i].adjust(-r.vec[i].get()); | |
1659 | } | |
1660 | void scale(double f) { | |
1661 | for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1662 | vec[i].scale(f); | |
1663 | } | |
1664 | ||
1665 | private: | |
1666 | friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl); | |
1667 | std::array<DecayCounter, NUM> vec; | |
1668 | }; | |
1669 | ||
1670 | inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { | |
1671 | c.encode(bl); | |
1672 | } | |
1673 | inline void decode(dirfrag_load_vec_t& c, bufferlist::const_iterator &p) { | |
1674 | c.decode(p); | |
1675 | } | |
1676 | ||
1677 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl) | |
1678 | { | |
1679 | std::ostringstream ss; | |
1680 | ss << std::setprecision(1) << std::fixed | |
1681 | << "[pop" | |
1682 | " IRD:" << dl.vec[0] | |
1683 | << " IWR:" << dl.vec[1] | |
1684 | << " RDR:" << dl.vec[2] | |
1685 | << " FET:" << dl.vec[3] | |
1686 | << " STR:" << dl.vec[4] | |
1687 | << " *LOAD:" << dl.meta_load() << "]"; | |
1688 | return out << ss.str() << std::endl; | |
1689 | } | |
1690 | ||
1691 | struct mds_load_t { | |
1692 | using clock = dirfrag_load_vec_t::clock; | |
1693 | using time = dirfrag_load_vec_t::time; | |
1694 | ||
1695 | dirfrag_load_vec_t auth; | |
1696 | dirfrag_load_vec_t all; | |
1697 | ||
1698 | mds_load_t() : auth(DecayRate()), all(DecayRate()) {} | |
1699 | mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {} | |
1700 | ||
1701 | double req_rate = 0.0; | |
1702 | double cache_hit_rate = 0.0; | |
1703 | double queue_len = 0.0; | |
1704 | ||
1705 | double cpu_load_avg = 0.0; | |
1706 | ||
1707 | double mds_load() const; // defiend in MDBalancer.cc | |
1708 | void encode(bufferlist& bl) const; | |
1709 | void decode(bufferlist::const_iterator& bl); | |
1710 | void dump(Formatter *f) const; | |
1711 | static void generate_test_instances(std::list<mds_load_t*>& ls); | |
1712 | }; | |
1713 | inline void encode(const mds_load_t &c, bufferlist &bl) { | |
1714 | c.encode(bl); | |
1715 | } | |
1716 | inline void decode(mds_load_t &c, bufferlist::const_iterator &p) { | |
1717 | c.decode(p); | |
1718 | } | |
1719 | ||
1720 | inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load) | |
1721 | { | |
1722 | return out << "mdsload<" << load.auth << "/" << load.all | |
1723 | << ", req " << load.req_rate | |
1724 | << ", hr " << load.cache_hit_rate | |
1725 | << ", qlen " << load.queue_len | |
1726 | << ", cpu " << load.cpu_load_avg | |
1727 | << ">"; | |
1728 | } | |
1729 | ||
1730 | class load_spread_t { | |
1731 | public: | |
1732 | using time = DecayCounter::time; | |
1733 | using clock = DecayCounter::clock; | |
1734 | static const int MAX = 4; | |
1735 | ||
1736 | load_spread_t(const DecayRate &rate) : count(rate) | |
1737 | {} | |
1738 | ||
1739 | load_spread_t() = delete; | |
1740 | ||
1741 | double hit(int who) { | |
1742 | for (int i=0; i<n; i++) | |
1743 | if (last[i] == who) | |
1744 | return count.get_last(); | |
1745 | ||
1746 | // we're new(ish) | |
1747 | last[p++] = who; | |
1748 | if (n < MAX) n++; | |
1749 | if (n == 1) return 0.0; | |
1750 | ||
1751 | if (p == MAX) p = 0; | |
1752 | ||
1753 | return count.hit(); | |
1754 | } | |
1755 | double get() const { | |
1756 | return count.get(); | |
1757 | } | |
1758 | ||
1759 | std::array<int, MAX> last = {-1, -1, -1, -1}; | |
1760 | int p = 0, n = 0; | |
1761 | DecayCounter count; | |
1762 | }; | |
1763 | ||
1764 | // ================================================================ | |
1765 | typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t; | |
1766 | ||
1767 | // -- authority delegation -- | |
1768 | // directory authority types | |
1769 | // >= 0 is the auth mds | |
1770 | #define CDIR_AUTH_PARENT mds_rank_t(-1) // default | |
1771 | #define CDIR_AUTH_UNKNOWN mds_rank_t(-2) | |
1772 | #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN) | |
1773 | #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN) | |
1774 | //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2) | |
1775 | ||
1776 | class MDSCacheObjectInfo { | |
1777 | public: | |
1778 | void encode(bufferlist& bl) const; | |
1779 | void decode(bufferlist::const_iterator& bl); | |
1780 | void dump(Formatter *f) const; | |
1781 | static void generate_test_instances(std::list<MDSCacheObjectInfo*>& ls); | |
1782 | ||
1783 | inodeno_t ino = 0; | |
1784 | dirfrag_t dirfrag; | |
1785 | string dname; | |
1786 | snapid_t snapid; | |
1787 | }; | |
1788 | ||
1789 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) { | |
1790 | if (info.ino) return out << info.ino << "." << info.snapid; | |
1791 | if (info.dname.length()) return out << info.dirfrag << "/" << info.dname | |
1792 | << " snap " << info.snapid; | |
1793 | return out << info.dirfrag; | |
1794 | } | |
1795 | ||
1796 | inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { | |
1797 | if (l.ino || r.ino) | |
1798 | return l.ino == r.ino && l.snapid == r.snapid; | |
1799 | else | |
1800 | return l.dirfrag == r.dirfrag && l.dname == r.dname; | |
1801 | } | |
1802 | WRITE_CLASS_ENCODER(MDSCacheObjectInfo) | |
1803 | ||
1804 | // parse a map of keys/values. | |
1805 | namespace qi = boost::spirit::qi; | |
1806 | ||
1807 | template <typename Iterator> | |
1808 | struct keys_and_values | |
1809 | : qi::grammar<Iterator, std::map<string, string>()> | |
1810 | { | |
1811 | keys_and_values() | |
1812 | : keys_and_values::base_type(query) | |
1813 | { | |
1814 | query = pair >> *(qi::lit(' ') >> pair); | |
1815 | pair = key >> '=' >> value; | |
1816 | key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); | |
1817 | value = +qi::char_("a-zA-Z0-9-_."); | |
1818 | } | |
1819 | qi::rule<Iterator, std::map<string, string>()> query; | |
1820 | qi::rule<Iterator, std::pair<string, string>()> pair; | |
1821 | qi::rule<Iterator, string()> key, value; | |
1822 | }; | |
1823 | ||
1824 | #endif |