]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_MDSTYPES_H | |
4 | #define CEPH_MDSTYPES_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <math.h> | |
9 | #include <ostream> | |
10 | #include <set> | |
11 | #include <map> | |
12 | ||
13 | #include "common/config.h" | |
14 | #include "common/Clock.h" | |
15 | #include "common/DecayCounter.h" | |
16 | #include "common/entity_name.h" | |
17 | ||
18 | #include "include/Context.h" | |
19 | #include "include/frag.h" | |
20 | #include "include/xlist.h" | |
21 | #include "include/interval_set.h" | |
22 | #include "include/compact_map.h" | |
23 | #include "include/compact_set.h" | |
24 | #include "include/fs_types.h" | |
25 | ||
26 | #include "inode_backtrace.h" | |
27 | ||
28 | #include <boost/spirit/include/qi.hpp> | |
29 | #include <boost/pool/pool.hpp> | |
30 | #include "include/assert.h" | |
31 | #include <boost/serialization/strong_typedef.hpp> | |
32 | ||
33 | #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" | |
34 | ||
35 | #define MDS_PORT_CACHE 0x200 | |
36 | #define MDS_PORT_LOCKER 0x300 | |
37 | #define MDS_PORT_MIGRATOR 0x400 | |
38 | ||
39 | #define MAX_MDS 0x100 | |
40 | #define NUM_STRAY 10 | |
41 | ||
42 | #define MDS_INO_ROOT 1 | |
43 | ||
44 | // No longer created but recognised in existing filesystems | |
45 | // so that we don't try to fragment it. | |
46 | #define MDS_INO_CEPH 2 | |
47 | ||
48 | #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS) | |
49 | #define MDS_INO_STRAY_OFFSET (6*MAX_MDS) | |
50 | ||
51 | // Locations for journal data | |
52 | #define MDS_INO_LOG_OFFSET (2*MAX_MDS) | |
53 | #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS) | |
54 | #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS) | |
55 | #define MDS_INO_PURGE_QUEUE (5*MAX_MDS) | |
56 | ||
57 | #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY)) | |
58 | ||
59 | #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i)))) | |
60 | #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x)) | |
61 | ||
62 | #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY))) | |
63 | #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS)) | |
64 | #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET)) | |
65 | #define MDS_INO_IS_BASE(i) (MDS_INO_ROOT == (i) || MDS_INO_IS_MDSDIR(i)) | |
66 | #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY)) | |
67 | #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY) | |
68 | ||
69 | #define MDS_TRAVERSE_FORWARD 1 | |
70 | #define MDS_TRAVERSE_DISCOVER 2 // skips permissions checks etc. | |
71 | #define MDS_TRAVERSE_DISCOVERXLOCK 3 // succeeds on (foreign?) null, xlocked dentries. | |
72 | ||
73 | ||
74 | typedef int32_t mds_rank_t; | |
75 | typedef int32_t fs_cluster_id_t; | |
76 | ||
77 | BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) | |
78 | extern const mds_gid_t MDS_GID_NONE; | |
79 | constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = {-1}; | |
80 | // The namespace ID of the anonymous default filesystem from legacy systems | |
81 | constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = {0}; | |
82 | extern const mds_rank_t MDS_RANK_NONE; | |
83 | ||
84 | class mds_role_t | |
85 | { | |
86 | public: | |
87 | fs_cluster_id_t fscid; | |
88 | mds_rank_t rank; | |
89 | ||
90 | mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_) | |
91 | : fscid(fscid_), rank(rank_) | |
92 | {} | |
93 | mds_role_t() | |
94 | : fscid(FS_CLUSTER_ID_NONE), rank(MDS_RANK_NONE) | |
95 | {} | |
96 | bool operator<(mds_role_t const &rhs) const | |
97 | { | |
98 | if (fscid < rhs.fscid) { | |
99 | return true; | |
100 | } else if (fscid == rhs.fscid) { | |
101 | return rank < rhs.rank; | |
102 | } else { | |
103 | return false; | |
104 | } | |
105 | } | |
106 | ||
107 | bool is_none() const | |
108 | { | |
109 | return (rank == MDS_RANK_NONE); | |
110 | } | |
111 | }; | |
112 | std::ostream& operator<<(std::ostream &out, const mds_role_t &role); | |
113 | ||
114 | ||
115 | // CAPS | |
116 | ||
117 | inline string gcap_string(int cap) | |
118 | { | |
119 | string s; | |
120 | if (cap & CEPH_CAP_GSHARED) s += "s"; | |
121 | if (cap & CEPH_CAP_GEXCL) s += "x"; | |
122 | if (cap & CEPH_CAP_GCACHE) s += "c"; | |
123 | if (cap & CEPH_CAP_GRD) s += "r"; | |
124 | if (cap & CEPH_CAP_GWR) s += "w"; | |
125 | if (cap & CEPH_CAP_GBUFFER) s += "b"; | |
126 | if (cap & CEPH_CAP_GWREXTEND) s += "a"; | |
127 | if (cap & CEPH_CAP_GLAZYIO) s += "l"; | |
128 | return s; | |
129 | } | |
130 | inline string ccap_string(int cap) | |
131 | { | |
132 | string s; | |
133 | if (cap & CEPH_CAP_PIN) s += "p"; | |
134 | ||
135 | int a = (cap >> CEPH_CAP_SAUTH) & 3; | |
136 | if (a) s += 'A' + gcap_string(a); | |
137 | ||
138 | a = (cap >> CEPH_CAP_SLINK) & 3; | |
139 | if (a) s += 'L' + gcap_string(a); | |
140 | ||
141 | a = (cap >> CEPH_CAP_SXATTR) & 3; | |
142 | if (a) s += 'X' + gcap_string(a); | |
143 | ||
144 | a = cap >> CEPH_CAP_SFILE; | |
145 | if (a) s += 'F' + gcap_string(a); | |
146 | ||
147 | if (s.length() == 0) | |
148 | s = "-"; | |
149 | return s; | |
150 | } | |
151 | ||
152 | ||
153 | struct scatter_info_t { | |
154 | version_t version; | |
155 | ||
156 | scatter_info_t() : version(0) {} | |
157 | }; | |
158 | ||
159 | struct frag_info_t : public scatter_info_t { | |
160 | // this frag | |
161 | utime_t mtime; | |
162 | uint64_t change_attr; | |
163 | int64_t nfiles; // files | |
164 | int64_t nsubdirs; // subdirs | |
165 | ||
166 | frag_info_t() : change_attr(0), nfiles(0), nsubdirs(0) {} | |
167 | ||
168 | int64_t size() const { return nfiles + nsubdirs; } | |
169 | ||
170 | void zero() { | |
171 | *this = frag_info_t(); | |
172 | } | |
173 | ||
174 | // *this += cur - acc; | |
175 | void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { | |
176 | if (cur.mtime > mtime) { | |
177 | mtime = cur.mtime; | |
178 | if (touched_mtime) | |
179 | *touched_mtime = true; | |
180 | } | |
181 | if (cur.change_attr > change_attr) { | |
182 | change_attr = cur.change_attr; | |
183 | if (touched_chattr) | |
184 | *touched_chattr = true; | |
185 | } | |
186 | nfiles += cur.nfiles - acc.nfiles; | |
187 | nsubdirs += cur.nsubdirs - acc.nsubdirs; | |
188 | } | |
189 | ||
190 | void add(const frag_info_t& other) { | |
191 | if (other.mtime > mtime) | |
192 | mtime = other.mtime; | |
193 | if (other.change_attr > change_attr) | |
194 | change_attr = other.change_attr; | |
195 | nfiles += other.nfiles; | |
196 | nsubdirs += other.nsubdirs; | |
197 | } | |
198 | ||
199 | bool same_sums(const frag_info_t &o) const { | |
200 | return mtime <= o.mtime && | |
201 | nfiles == o.nfiles && | |
202 | nsubdirs == o.nsubdirs; | |
203 | } | |
204 | ||
205 | void encode(bufferlist &bl) const; | |
206 | void decode(bufferlist::iterator& bl); | |
207 | void dump(Formatter *f) const; | |
208 | static void generate_test_instances(list<frag_info_t*>& ls); | |
209 | }; | |
210 | WRITE_CLASS_ENCODER(frag_info_t) | |
211 | ||
212 | inline bool operator==(const frag_info_t &l, const frag_info_t &r) { | |
213 | return memcmp(&l, &r, sizeof(l)) == 0; | |
214 | } | |
215 | inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { | |
216 | return !(l == r); | |
217 | } | |
218 | ||
219 | std::ostream& operator<<(std::ostream &out, const frag_info_t &f); | |
220 | ||
221 | ||
222 | struct nest_info_t : public scatter_info_t { | |
223 | // this frag + children | |
224 | utime_t rctime; | |
225 | int64_t rbytes; | |
226 | int64_t rfiles; | |
227 | int64_t rsubdirs; | |
228 | int64_t rsize() const { return rfiles + rsubdirs; } | |
229 | ||
230 | int64_t rsnaprealms; | |
231 | ||
232 | nest_info_t() : rbytes(0), rfiles(0), rsubdirs(0), rsnaprealms(0) {} | |
233 | ||
234 | void zero() { | |
235 | *this = nest_info_t(); | |
236 | } | |
237 | ||
238 | void sub(const nest_info_t &other) { | |
239 | add(other, -1); | |
240 | } | |
241 | void add(const nest_info_t &other, int fac=1) { | |
242 | if (other.rctime > rctime) | |
243 | rctime = other.rctime; | |
244 | rbytes += fac*other.rbytes; | |
245 | rfiles += fac*other.rfiles; | |
246 | rsubdirs += fac*other.rsubdirs; | |
247 | rsnaprealms += fac*other.rsnaprealms; | |
248 | } | |
249 | ||
250 | // *this += cur - acc; | |
251 | void add_delta(const nest_info_t &cur, const nest_info_t &acc) { | |
252 | if (cur.rctime > rctime) | |
253 | rctime = cur.rctime; | |
254 | rbytes += cur.rbytes - acc.rbytes; | |
255 | rfiles += cur.rfiles - acc.rfiles; | |
256 | rsubdirs += cur.rsubdirs - acc.rsubdirs; | |
257 | rsnaprealms += cur.rsnaprealms - acc.rsnaprealms; | |
258 | } | |
259 | ||
260 | bool same_sums(const nest_info_t &o) const { | |
261 | return rctime <= o.rctime && | |
262 | rbytes == o.rbytes && | |
263 | rfiles == o.rfiles && | |
264 | rsubdirs == o.rsubdirs && | |
265 | rsnaprealms == o.rsnaprealms; | |
266 | } | |
267 | ||
268 | void encode(bufferlist &bl) const; | |
269 | void decode(bufferlist::iterator& bl); | |
270 | void dump(Formatter *f) const; | |
271 | static void generate_test_instances(list<nest_info_t*>& ls); | |
272 | }; | |
273 | WRITE_CLASS_ENCODER(nest_info_t) | |
274 | ||
275 | inline bool operator==(const nest_info_t &l, const nest_info_t &r) { | |
276 | return memcmp(&l, &r, sizeof(l)) == 0; | |
277 | } | |
278 | inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { | |
279 | return !(l == r); | |
280 | } | |
281 | ||
282 | std::ostream& operator<<(std::ostream &out, const nest_info_t &n); | |
283 | ||
284 | ||
285 | struct vinodeno_t { | |
286 | inodeno_t ino; | |
287 | snapid_t snapid; | |
288 | vinodeno_t() {} | |
289 | vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} | |
290 | ||
291 | void encode(bufferlist& bl) const { | |
292 | ::encode(ino, bl); | |
293 | ::encode(snapid, bl); | |
294 | } | |
295 | void decode(bufferlist::iterator& p) { | |
296 | ::decode(ino, p); | |
297 | ::decode(snapid, p); | |
298 | } | |
299 | }; | |
300 | WRITE_CLASS_ENCODER(vinodeno_t) | |
301 | ||
302 | inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { | |
303 | return l.ino == r.ino && l.snapid == r.snapid; | |
304 | } | |
305 | inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { | |
306 | return !(l == r); | |
307 | } | |
308 | inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { | |
309 | return | |
310 | l.ino < r.ino || | |
311 | (l.ino == r.ino && l.snapid < r.snapid); | |
312 | } | |
313 | ||
314 | struct quota_info_t | |
315 | { | |
316 | int64_t max_bytes; | |
317 | int64_t max_files; | |
318 | ||
319 | quota_info_t() : max_bytes(0), max_files(0) {} | |
320 | ||
321 | void encode(bufferlist& bl) const { | |
322 | ENCODE_START(1, 1, bl); | |
323 | ::encode(max_bytes, bl); | |
324 | ::encode(max_files, bl); | |
325 | ENCODE_FINISH(bl); | |
326 | } | |
327 | void decode(bufferlist::iterator& p) { | |
328 | DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); | |
329 | ::decode(max_bytes, p); | |
330 | ::decode(max_files, p); | |
331 | DECODE_FINISH(p); | |
332 | } | |
333 | ||
334 | void dump(Formatter *f) const; | |
335 | static void generate_test_instances(list<quota_info_t *>& ls); | |
336 | ||
337 | bool is_valid() const { | |
338 | return max_bytes >=0 && max_files >=0; | |
339 | } | |
340 | bool is_enable() const { | |
341 | return max_bytes || max_files; | |
342 | } | |
343 | }; | |
344 | WRITE_CLASS_ENCODER(quota_info_t) | |
345 | ||
346 | inline bool operator==(const quota_info_t &l, const quota_info_t &r) { | |
347 | return memcmp(&l, &r, sizeof(l)) == 0; | |
348 | } | |
349 | ||
350 | ostream& operator<<(ostream &out, const quota_info_t &n); | |
351 | ||
352 | namespace std { | |
353 | template<> struct hash<vinodeno_t> { | |
354 | size_t operator()(const vinodeno_t &vino) const { | |
355 | hash<inodeno_t> H; | |
356 | hash<uint64_t> I; | |
357 | return H(vino.ino) ^ I(vino.snapid); | |
358 | } | |
359 | }; | |
360 | } // namespace std | |
361 | ||
362 | ||
363 | ||
364 | ||
365 | inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { | |
366 | out << vino.ino; | |
367 | if (vino.snapid == CEPH_NOSNAP) | |
368 | out << ".head"; | |
369 | else if (vino.snapid) | |
370 | out << '.' << vino.snapid; | |
371 | return out; | |
372 | } | |
373 | ||
374 | ||
375 | /* | |
376 | * client_writeable_range_t | |
377 | */ | |
378 | struct client_writeable_range_t { | |
379 | struct byte_range_t { | |
380 | uint64_t first, last; // interval client can write to | |
381 | byte_range_t() : first(0), last(0) {} | |
382 | }; | |
383 | ||
384 | byte_range_t range; | |
385 | snapid_t follows; // aka "data+metadata flushed thru" | |
386 | ||
387 | client_writeable_range_t() : follows(0) {} | |
388 | ||
389 | void encode(bufferlist &bl) const; | |
390 | void decode(bufferlist::iterator& bl); | |
391 | void dump(Formatter *f) const; | |
392 | static void generate_test_instances(list<client_writeable_range_t*>& ls); | |
393 | }; | |
394 | ||
395 | inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::iterator& bl) { | |
396 | ::decode(range.first, bl); | |
397 | ::decode(range.last, bl); | |
398 | } | |
399 | ||
400 | WRITE_CLASS_ENCODER(client_writeable_range_t) | |
401 | ||
402 | std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); | |
403 | ||
404 | inline bool operator==(const client_writeable_range_t& l, | |
405 | const client_writeable_range_t& r) { | |
406 | return l.range.first == r.range.first && l.range.last == r.range.last && | |
407 | l.follows == r.follows; | |
408 | } | |
409 | ||
410 | struct inline_data_t { | |
411 | private: | |
412 | std::unique_ptr<bufferlist> blp; | |
413 | public: | |
414 | version_t version; | |
415 | ||
416 | void free_data() { | |
417 | blp.reset(); | |
418 | } | |
419 | bufferlist& get_data() { | |
420 | if (!blp) | |
421 | blp.reset(new bufferlist); | |
422 | return *blp; | |
423 | } | |
424 | size_t length() const { return blp ? blp->length() : 0; } | |
425 | ||
426 | inline_data_t() : version(1) {} | |
427 | inline_data_t(const inline_data_t& o) : version(o.version) { | |
428 | if (o.blp) | |
429 | get_data() = *o.blp; | |
430 | } | |
431 | inline_data_t& operator=(const inline_data_t& o) { | |
432 | version = o.version; | |
433 | if (o.blp) | |
434 | get_data() = *o.blp; | |
435 | else | |
436 | free_data(); | |
437 | return *this; | |
438 | } | |
439 | bool operator==(const inline_data_t& o) const { | |
440 | return length() == o.length() && | |
441 | (length() == 0 || | |
442 | (*const_cast<bufferlist*>(blp.get()) == *const_cast<bufferlist*>(o.blp.get()))); | |
443 | } | |
444 | bool operator!=(const inline_data_t& o) const { | |
445 | return !(*this == o); | |
446 | } | |
447 | void encode(bufferlist &bl) const; | |
448 | void decode(bufferlist::iterator& bl); | |
449 | }; | |
450 | WRITE_CLASS_ENCODER(inline_data_t) | |
451 | ||
452 | enum { | |
453 | DAMAGE_STATS, // statistics (dirstat, size, etc) | |
454 | DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) | |
455 | DAMAGE_FRAGTREE // fragtree -- repair by searching | |
456 | }; | |
457 | typedef uint32_t damage_flags_t; | |
458 | ||
459 | /* | |
460 | * inode_t | |
461 | */ | |
462 | struct inode_t { | |
463 | /** | |
464 | * *************** | |
465 | * Do not forget to add any new fields to the compare() function. | |
466 | * *************** | |
467 | */ | |
468 | // base (immutable) | |
469 | inodeno_t ino; | |
470 | uint32_t rdev; // if special file | |
471 | ||
472 | // affected by any inode change... | |
473 | utime_t ctime; // inode change time | |
474 | utime_t btime; // birth time | |
475 | ||
476 | // perm (namespace permissions) | |
477 | uint32_t mode; | |
478 | uid_t uid; | |
479 | gid_t gid; | |
480 | ||
481 | // nlink | |
482 | int32_t nlink; | |
483 | ||
484 | // file (data access) | |
485 | ceph_dir_layout dir_layout; // [dir only] | |
486 | file_layout_t layout; | |
487 | compact_set <int64_t> old_pools; | |
488 | uint64_t size; // on directory, # dentries | |
489 | uint64_t max_size_ever; // max size the file has ever been | |
490 | uint32_t truncate_seq; | |
491 | uint64_t truncate_size, truncate_from; | |
492 | uint32_t truncate_pending; | |
493 | utime_t mtime; // file data modify time. | |
494 | utime_t atime; // file data access time. | |
495 | uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) | |
496 | inline_data_t inline_data; | |
497 | ||
498 | // change attribute | |
499 | uint64_t change_attr; | |
500 | ||
501 | std::map<client_t,client_writeable_range_t> client_ranges; // client(s) can write to these ranges | |
502 | ||
503 | // dirfrag, recursive accountin | |
504 | frag_info_t dirstat; // protected by my filelock | |
505 | nest_info_t rstat; // protected by my nestlock | |
506 | nest_info_t accounted_rstat; // protected by parent's nestlock | |
507 | ||
508 | quota_info_t quota; | |
509 | ||
510 | mds_rank_t export_pin; | |
511 | ||
512 | // special stuff | |
513 | version_t version; // auth only | |
514 | version_t file_data_version; // auth only | |
515 | version_t xattr_version; | |
516 | ||
517 | utime_t last_scrub_stamp; // start time of last complete scrub | |
518 | version_t last_scrub_version;// (parent) start version of last complete scrub | |
519 | ||
520 | version_t backtrace_version; | |
521 | ||
522 | snapid_t oldest_snap; | |
523 | ||
524 | string stray_prior_path; //stores path before unlink | |
525 | ||
526 | inode_t() : ino(0), rdev(0), | |
527 | mode(0), uid(0), gid(0), nlink(0), | |
528 | size(0), max_size_ever(0), | |
529 | truncate_seq(0), truncate_size(0), truncate_from(0), | |
530 | truncate_pending(0), | |
531 | time_warp_seq(0), change_attr(0), | |
532 | export_pin(MDS_RANK_NONE), | |
533 | version(0), file_data_version(0), xattr_version(0), | |
534 | last_scrub_version(0), backtrace_version(0) { | |
535 | clear_layout(); | |
536 | memset(&dir_layout, 0, sizeof(dir_layout)); | |
537 | memset("a, 0, sizeof(quota)); | |
538 | } | |
539 | ||
540 | // file type | |
541 | bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } | |
542 | bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } | |
543 | bool is_file() const { return (mode & S_IFMT) == S_IFREG; } | |
544 | ||
545 | bool is_truncating() const { return (truncate_pending > 0); } | |
546 | void truncate(uint64_t old_size, uint64_t new_size) { | |
547 | assert(new_size < old_size); | |
548 | if (old_size > max_size_ever) | |
549 | max_size_ever = old_size; | |
550 | truncate_from = old_size; | |
551 | size = new_size; | |
552 | rstat.rbytes = new_size; | |
553 | truncate_size = size; | |
554 | truncate_seq++; | |
555 | truncate_pending++; | |
556 | } | |
557 | ||
558 | bool has_layout() const { | |
559 | return layout != file_layout_t(); | |
560 | } | |
561 | ||
562 | void clear_layout() { | |
563 | layout = file_layout_t(); | |
564 | } | |
565 | ||
566 | uint64_t get_layout_size_increment() const { | |
567 | return layout.get_period(); | |
568 | } | |
569 | ||
570 | bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } | |
571 | ||
572 | uint64_t get_max_size() const { | |
573 | uint64_t max = 0; | |
574 | for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); | |
575 | p != client_ranges.end(); | |
576 | ++p) | |
577 | if (p->second.range.last > max) | |
578 | max = p->second.range.last; | |
579 | return max; | |
580 | } | |
581 | void set_max_size(uint64_t new_max) { | |
582 | if (new_max == 0) { | |
583 | client_ranges.clear(); | |
584 | } else { | |
585 | for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); | |
586 | p != client_ranges.end(); | |
587 | ++p) | |
588 | p->second.range.last = new_max; | |
589 | } | |
590 | } | |
591 | ||
592 | void trim_client_ranges(snapid_t last) { | |
593 | std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); | |
594 | while (p != client_ranges.end()) { | |
595 | if (p->second.follows >= last) | |
596 | client_ranges.erase(p++); | |
597 | else | |
598 | ++p; | |
599 | } | |
600 | } | |
601 | ||
602 | bool is_backtrace_updated() const { | |
603 | return backtrace_version == version; | |
604 | } | |
605 | void update_backtrace(version_t pv=0) { | |
606 | backtrace_version = pv ? pv : version; | |
607 | } | |
608 | ||
609 | void add_old_pool(int64_t l) { | |
610 | backtrace_version = version; | |
611 | old_pools.insert(l); | |
612 | } | |
613 | ||
614 | void encode(bufferlist &bl, uint64_t features) const; | |
615 | void decode(bufferlist::iterator& bl); | |
616 | void dump(Formatter *f) const; | |
617 | static void generate_test_instances(list<inode_t*>& ls); | |
618 | /** | |
619 | * Compare this inode_t with another that represent *the same inode* | |
620 | * at different points in time. | |
621 | * @pre The inodes are the same ino | |
622 | * | |
623 | * @param other The inode_t to compare ourselves with | |
624 | * @param divergent A bool pointer which will be set to true | |
625 | * if the values are different in a way that can't be explained | |
626 | * by one being a newer version than the other. | |
627 | * | |
628 | * @returns 1 if we are newer than the other, 0 if equal, -1 if older. | |
629 | */ | |
630 | int compare(const inode_t &other, bool *divergent) const; | |
631 | private: | |
632 | bool older_is_consistent(const inode_t &other) const; | |
633 | }; | |
634 | WRITE_CLASS_ENCODER_FEATURES(inode_t) | |
635 | ||
636 | ||
637 | /* | |
638 | * old_inode_t | |
639 | */ | |
640 | struct old_inode_t { | |
641 | snapid_t first; | |
642 | inode_t inode; | |
643 | std::map<string,bufferptr> xattrs; | |
644 | ||
645 | void encode(bufferlist &bl, uint64_t features) const; | |
646 | void decode(bufferlist::iterator& bl); | |
647 | void dump(Formatter *f) const; | |
648 | static void generate_test_instances(list<old_inode_t*>& ls); | |
649 | }; | |
650 | WRITE_CLASS_ENCODER_FEATURES(old_inode_t) | |
651 | ||
652 | ||
653 | /* | |
654 | * like an inode, but for a dir frag | |
655 | */ | |
656 | struct fnode_t { | |
657 | version_t version; | |
658 | snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru | |
659 | frag_info_t fragstat, accounted_fragstat; | |
660 | nest_info_t rstat, accounted_rstat; | |
661 | damage_flags_t damage_flags; | |
662 | ||
663 | // we know we and all our descendants have been scrubbed since this version | |
664 | version_t recursive_scrub_version; | |
665 | utime_t recursive_scrub_stamp; | |
666 | // version at which we last scrubbed our personal data structures | |
667 | version_t localized_scrub_version; | |
668 | utime_t localized_scrub_stamp; | |
669 | ||
670 | void encode(bufferlist &bl) const; | |
671 | void decode(bufferlist::iterator& bl); | |
672 | void dump(Formatter *f) const; | |
673 | static void generate_test_instances(list<fnode_t*>& ls); | |
674 | fnode_t() : version(0), damage_flags(0), | |
675 | recursive_scrub_version(0), localized_scrub_version(0) {} | |
676 | }; | |
677 | WRITE_CLASS_ENCODER(fnode_t) | |
678 | ||
679 | ||
680 | struct old_rstat_t { | |
681 | snapid_t first; | |
682 | nest_info_t rstat, accounted_rstat; | |
683 | ||
684 | void encode(bufferlist& bl) const; | |
685 | void decode(bufferlist::iterator& p); | |
686 | void dump(Formatter *f) const; | |
687 | static void generate_test_instances(list<old_rstat_t*>& ls); | |
688 | }; | |
689 | WRITE_CLASS_ENCODER(old_rstat_t) | |
690 | ||
691 | inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) { | |
692 | return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")"; | |
693 | } | |
694 | ||
695 | ||
696 | /* | |
697 | * session_info_t | |
698 | */ | |
699 | ||
700 | struct session_info_t { | |
701 | entity_inst_t inst; | |
702 | std::map<ceph_tid_t,inodeno_t> completed_requests; | |
703 | interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use. | |
704 | interval_set<inodeno_t> used_inos; // journaling use | |
705 | std::map<std::string, std::string> client_metadata; | |
706 | std::set<ceph_tid_t> completed_flushes; | |
707 | EntityName auth_name; | |
708 | ||
709 | client_t get_client() const { return client_t(inst.name.num()); } | |
710 | const entity_name_t& get_source() const { return inst.name; } | |
711 | ||
712 | void clear_meta() { | |
713 | prealloc_inos.clear(); | |
714 | used_inos.clear(); | |
715 | completed_requests.clear(); | |
716 | completed_flushes.clear(); | |
717 | } | |
718 | ||
719 | void encode(bufferlist& bl, uint64_t features) const; | |
720 | void decode(bufferlist::iterator& p); | |
721 | void dump(Formatter *f) const; | |
722 | static void generate_test_instances(list<session_info_t*>& ls); | |
723 | }; | |
724 | WRITE_CLASS_ENCODER_FEATURES(session_info_t) | |
725 | ||
726 | ||
727 | // ======= | |
728 | // dentries | |
729 | ||
730 | struct dentry_key_t { | |
731 | snapid_t snapid; | |
732 | const char *name; | |
733 | __u32 hash; | |
734 | dentry_key_t() : snapid(0), name(0), hash(0) {} | |
735 | dentry_key_t(snapid_t s, const char *n, __u32 h=0) : | |
736 | snapid(s), name(n), hash(h) {} | |
737 | ||
738 | bool is_valid() { return name || snapid; } | |
739 | ||
740 | // encode into something that can be decoded as a string. | |
741 | // name_ (head) or name_%x (!head) | |
742 | void encode(bufferlist& bl) const { | |
743 | string key; | |
744 | encode(key); | |
745 | ::encode(key, bl); | |
746 | } | |
747 | void encode(string& key) const { | |
748 | char b[20]; | |
749 | if (snapid != CEPH_NOSNAP) { | |
750 | uint64_t val(snapid); | |
751 | snprintf(b, sizeof(b), "%" PRIx64, val); | |
752 | } else { | |
753 | snprintf(b, sizeof(b), "%s", "head"); | |
754 | } | |
755 | ostringstream oss; | |
756 | oss << name << "_" << b; | |
757 | key = oss.str(); | |
758 | } | |
759 | static void decode_helper(bufferlist::iterator& bl, string& nm, snapid_t& sn) { | |
760 | string key; | |
761 | ::decode(key, bl); | |
762 | decode_helper(key, nm, sn); | |
763 | } | |
764 | static void decode_helper(const string& key, string& nm, snapid_t& sn) { | |
765 | size_t i = key.find_last_of('_'); | |
766 | assert(i != string::npos); | |
767 | if (key.compare(i+1, string::npos, "head") == 0) { | |
768 | // name_head | |
769 | sn = CEPH_NOSNAP; | |
770 | } else { | |
771 | // name_%x | |
772 | long long unsigned x = 0; | |
773 | sscanf(key.c_str() + i + 1, "%llx", &x); | |
774 | sn = x; | |
775 | } | |
776 | nm = string(key.c_str(), i); | |
777 | } | |
778 | }; | |
779 | ||
780 | inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k) | |
781 | { | |
782 | return out << "(" << k.name << "," << k.snapid << ")"; | |
783 | } | |
784 | ||
785 | inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2) | |
786 | { | |
787 | /* | |
788 | * order by hash, name, snap | |
789 | */ | |
790 | int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash); | |
791 | if (c) | |
792 | return c < 0; | |
793 | c = strcmp(k1.name, k2.name); | |
794 | if (c) | |
795 | return c < 0; | |
796 | return k1.snapid < k2.snapid; | |
797 | } | |
798 | ||
799 | ||
800 | /* | |
801 | * string_snap_t is a simple (string, snapid_t) pair | |
802 | */ | |
803 | struct string_snap_t { | |
804 | string name; | |
805 | snapid_t snapid; | |
806 | string_snap_t() {} | |
807 | string_snap_t(const string& n, snapid_t s) : name(n), snapid(s) {} | |
808 | string_snap_t(const char *n, snapid_t s) : name(n), snapid(s) {} | |
809 | ||
810 | void encode(bufferlist& bl) const; | |
811 | void decode(bufferlist::iterator& p); | |
812 | void dump(Formatter *f) const; | |
813 | static void generate_test_instances(list<string_snap_t*>& ls); | |
814 | }; | |
815 | WRITE_CLASS_ENCODER(string_snap_t) | |
816 | ||
817 | inline bool operator<(const string_snap_t& l, const string_snap_t& r) { | |
818 | int c = strcmp(l.name.c_str(), r.name.c_str()); | |
819 | return c < 0 || (c == 0 && l.snapid < r.snapid); | |
820 | } | |
821 | ||
822 | inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k) | |
823 | { | |
824 | return out << "(" << k.name << "," << k.snapid << ")"; | |
825 | } | |
826 | ||
827 | /* | |
828 | * mds_table_pending_t | |
829 | * | |
830 | * mds's requesting any pending ops. child needs to encode the corresponding | |
831 | * pending mutation state in the table. | |
832 | */ | |
833 | struct mds_table_pending_t { | |
834 | uint64_t reqid; | |
835 | __s32 mds; | |
836 | version_t tid; | |
837 | mds_table_pending_t() : reqid(0), mds(0), tid(0) {} | |
838 | void encode(bufferlist& bl) const; | |
839 | void decode(bufferlist::iterator& bl); | |
840 | void dump(Formatter *f) const; | |
841 | static void generate_test_instances(list<mds_table_pending_t*>& ls); | |
842 | }; | |
843 | WRITE_CLASS_ENCODER(mds_table_pending_t) | |
844 | ||
845 | ||
846 | // ========= | |
847 | // requests | |
848 | ||
849 | struct metareqid_t { | |
850 | entity_name_t name; | |
851 | uint64_t tid; | |
852 | metareqid_t() : tid(0) {} | |
853 | metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {} | |
854 | void encode(bufferlist& bl) const { | |
855 | ::encode(name, bl); | |
856 | ::encode(tid, bl); | |
857 | } | |
858 | void decode(bufferlist::iterator &p) { | |
859 | ::decode(name, p); | |
860 | ::decode(tid, p); | |
861 | } | |
862 | }; | |
863 | WRITE_CLASS_ENCODER(metareqid_t) | |
864 | ||
865 | inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) { | |
866 | return out << r.name << ":" << r.tid; | |
867 | } | |
868 | ||
869 | inline bool operator==(const metareqid_t& l, const metareqid_t& r) { | |
870 | return (l.name == r.name) && (l.tid == r.tid); | |
871 | } | |
872 | inline bool operator!=(const metareqid_t& l, const metareqid_t& r) { | |
873 | return (l.name != r.name) || (l.tid != r.tid); | |
874 | } | |
875 | inline bool operator<(const metareqid_t& l, const metareqid_t& r) { | |
876 | return (l.name < r.name) || | |
877 | (l.name == r.name && l.tid < r.tid); | |
878 | } | |
879 | inline bool operator<=(const metareqid_t& l, const metareqid_t& r) { | |
880 | return (l.name < r.name) || | |
881 | (l.name == r.name && l.tid <= r.tid); | |
882 | } | |
883 | inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); } | |
884 | inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); } | |
885 | ||
886 | namespace std { | |
887 | template<> struct hash<metareqid_t> { | |
888 | size_t operator()(const metareqid_t &r) const { | |
889 | hash<uint64_t> H; | |
890 | return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid); | |
891 | } | |
892 | }; | |
893 | } // namespace std | |
894 | ||
895 | ||
896 | // cap info for client reconnect | |
897 | struct cap_reconnect_t { | |
898 | string path; | |
899 | mutable ceph_mds_cap_reconnect capinfo; | |
900 | snapid_t snap_follows; | |
901 | bufferlist flockbl; | |
902 | ||
903 | cap_reconnect_t() { | |
904 | memset(&capinfo, 0, sizeof(capinfo)); | |
905 | snap_follows = 0; | |
906 | } | |
907 | cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, | |
908 | inodeno_t sr, snapid_t sf, bufferlist& lb) : | |
909 | path(p) { | |
910 | capinfo.cap_id = cap_id; | |
911 | capinfo.wanted = w; | |
912 | capinfo.issued = i; | |
913 | capinfo.snaprealm = sr; | |
914 | capinfo.pathbase = pino; | |
915 | capinfo.flock_len = 0; | |
916 | snap_follows = sf; | |
917 | flockbl.claim(lb); | |
918 | } | |
919 | void encode(bufferlist& bl) const; | |
920 | void decode(bufferlist::iterator& bl); | |
921 | void encode_old(bufferlist& bl) const; | |
922 | void decode_old(bufferlist::iterator& bl); | |
923 | ||
924 | void dump(Formatter *f) const; | |
925 | static void generate_test_instances(list<cap_reconnect_t*>& ls); | |
926 | }; | |
927 | WRITE_CLASS_ENCODER(cap_reconnect_t) | |
928 | ||
929 | ||
930 | // compat for pre-FLOCK feature | |
931 | struct old_ceph_mds_cap_reconnect { | |
932 | __le64 cap_id; | |
933 | __le32 wanted; | |
934 | __le32 issued; | |
935 | __le64 old_size; | |
936 | struct ceph_timespec old_mtime, old_atime; | |
937 | __le64 snaprealm; | |
938 | __le64 pathbase; /* base ino for our path to this ino */ | |
939 | } __attribute__ ((packed)); | |
940 | WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect) | |
941 | ||
942 | struct old_cap_reconnect_t { | |
943 | string path; | |
944 | old_ceph_mds_cap_reconnect capinfo; | |
945 | ||
946 | const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) { | |
947 | path = n.path; | |
948 | capinfo.cap_id = n.capinfo.cap_id; | |
949 | capinfo.wanted = n.capinfo.wanted; | |
950 | capinfo.issued = n.capinfo.issued; | |
951 | capinfo.snaprealm = n.capinfo.snaprealm; | |
952 | capinfo.pathbase = n.capinfo.pathbase; | |
953 | return *this; | |
954 | } | |
955 | operator cap_reconnect_t() { | |
956 | cap_reconnect_t n; | |
957 | n.path = path; | |
958 | n.capinfo.cap_id = capinfo.cap_id; | |
959 | n.capinfo.wanted = capinfo.wanted; | |
960 | n.capinfo.issued = capinfo.issued; | |
961 | n.capinfo.snaprealm = capinfo.snaprealm; | |
962 | n.capinfo.pathbase = capinfo.pathbase; | |
963 | return n; | |
964 | } | |
965 | ||
966 | void encode(bufferlist& bl) const { | |
967 | ::encode(path, bl); | |
968 | ::encode(capinfo, bl); | |
969 | } | |
970 | void decode(bufferlist::iterator& bl) { | |
971 | ::decode(path, bl); | |
972 | ::decode(capinfo, bl); | |
973 | } | |
974 | }; | |
975 | WRITE_CLASS_ENCODER(old_cap_reconnect_t) | |
976 | ||
977 | ||
978 | // ================================================================ | |
979 | // dir frag | |
980 | ||
981 | struct dirfrag_t { | |
982 | inodeno_t ino; | |
983 | frag_t frag; | |
984 | ||
985 | dirfrag_t() : ino(0) { } | |
986 | dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } | |
987 | ||
988 | void encode(bufferlist& bl) const { | |
989 | ::encode(ino, bl); | |
990 | ::encode(frag, bl); | |
991 | } | |
992 | void decode(bufferlist::iterator& bl) { | |
993 | ::decode(ino, bl); | |
994 | ::decode(frag, bl); | |
995 | } | |
996 | }; | |
997 | WRITE_CLASS_ENCODER(dirfrag_t) | |
998 | ||
999 | ||
1000 | inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) { | |
1001 | out << df.ino; | |
1002 | if (!df.frag.is_root()) out << "." << df.frag; | |
1003 | return out; | |
1004 | } | |
1005 | inline bool operator<(dirfrag_t l, dirfrag_t r) { | |
1006 | if (l.ino < r.ino) return true; | |
1007 | if (l.ino == r.ino && l.frag < r.frag) return true; | |
1008 | return false; | |
1009 | } | |
1010 | inline bool operator==(dirfrag_t l, dirfrag_t r) { | |
1011 | return l.ino == r.ino && l.frag == r.frag; | |
1012 | } | |
1013 | ||
1014 | namespace std { | |
1015 | template<> struct hash<dirfrag_t> { | |
1016 | size_t operator()(const dirfrag_t &df) const { | |
1017 | static rjhash<uint64_t> H; | |
1018 | static rjhash<uint32_t> I; | |
1019 | return H(df.ino) ^ I(df.frag); | |
1020 | } | |
1021 | }; | |
1022 | } // namespace std | |
1023 | ||
1024 | ||
1025 | ||
1026 | // ================================================================ | |
1027 | ||
1028 | #define META_POP_IRD 0 | |
1029 | #define META_POP_IWR 1 | |
1030 | #define META_POP_READDIR 2 | |
1031 | #define META_POP_FETCH 3 | |
1032 | #define META_POP_STORE 4 | |
1033 | #define META_NPOP 5 | |
1034 | ||
1035 | class inode_load_vec_t { | |
1036 | static const int NUM = 2; | |
1037 | std::vector < DecayCounter > vec; | |
1038 | public: | |
1039 | explicit inode_load_vec_t(const utime_t &now) | |
1040 | : vec(NUM, DecayCounter(now)) | |
1041 | {} | |
1042 | // for dencoder infrastructure | |
1043 | inode_load_vec_t() : | |
1044 | vec(NUM, DecayCounter()) | |
1045 | {} | |
1046 | DecayCounter &get(int t) { | |
1047 | assert(t < NUM); | |
1048 | return vec[t]; | |
1049 | } | |
1050 | void zero(utime_t now) { | |
1051 | for (int i=0; i<NUM; i++) | |
1052 | vec[i].reset(now); | |
1053 | } | |
1054 | void encode(bufferlist &bl) const; | |
1055 | void decode(const utime_t &t, bufferlist::iterator &p); | |
1056 | // for dencoder | |
1057 | void decode(bufferlist::iterator& p) { utime_t sample; decode(sample, p); } | |
1058 | void dump(Formatter *f); | |
1059 | static void generate_test_instances(list<inode_load_vec_t*>& ls); | |
1060 | }; | |
1061 | inline void encode(const inode_load_vec_t &c, bufferlist &bl) { c.encode(bl); } | |
1062 | inline void decode(inode_load_vec_t & c, const utime_t &t, bufferlist::iterator &p) { | |
1063 | c.decode(t, p); | |
1064 | } | |
1065 | // for dencoder | |
1066 | inline void decode(inode_load_vec_t & c, bufferlist::iterator &p) { | |
1067 | utime_t sample; | |
1068 | c.decode(sample, p); | |
1069 | } | |
1070 | ||
1071 | class dirfrag_load_vec_t { | |
1072 | public: | |
1073 | static const int NUM = 5; | |
1074 | std::vector < DecayCounter > vec; | |
1075 | explicit dirfrag_load_vec_t(const utime_t &now) | |
1076 | : vec(NUM, DecayCounter(now)) | |
1077 | { } | |
1078 | // for dencoder infrastructure | |
1079 | dirfrag_load_vec_t() | |
1080 | : vec(NUM, DecayCounter()) | |
1081 | {} | |
1082 | void encode(bufferlist &bl) const { | |
1083 | ENCODE_START(2, 2, bl); | |
1084 | for (int i=0; i<NUM; i++) | |
1085 | ::encode(vec[i], bl); | |
1086 | ENCODE_FINISH(bl); | |
1087 | } | |
1088 | void decode(const utime_t &t, bufferlist::iterator &p) { | |
1089 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); | |
1090 | for (int i=0; i<NUM; i++) | |
1091 | ::decode(vec[i], t, p); | |
1092 | DECODE_FINISH(p); | |
1093 | } | |
1094 | // for dencoder infrastructure | |
1095 | void decode(bufferlist::iterator& p) { | |
1096 | utime_t sample; | |
1097 | decode(sample, p); | |
1098 | } | |
1099 | void dump(Formatter *f) const; | |
1100 | static void generate_test_instances(list<dirfrag_load_vec_t*>& ls); | |
1101 | ||
1102 | DecayCounter &get(int t) { | |
1103 | assert(t < NUM); | |
1104 | return vec[t]; | |
1105 | } | |
1106 | void adjust(utime_t now, const DecayRate& rate, double d) { | |
1107 | for (int i=0; i<NUM; i++) | |
1108 | vec[i].adjust(now, rate, d); | |
1109 | } | |
1110 | void zero(utime_t now) { | |
1111 | for (int i=0; i<NUM; i++) | |
1112 | vec[i].reset(now); | |
1113 | } | |
1114 | double meta_load(utime_t now, const DecayRate& rate) { | |
1115 | return | |
1116 | 1*vec[META_POP_IRD].get(now, rate) + | |
1117 | 2*vec[META_POP_IWR].get(now, rate) + | |
1118 | 1*vec[META_POP_READDIR].get(now, rate) + | |
1119 | 2*vec[META_POP_FETCH].get(now, rate) + | |
1120 | 4*vec[META_POP_STORE].get(now, rate); | |
1121 | } | |
1122 | double meta_load() { | |
1123 | return | |
1124 | 1*vec[META_POP_IRD].get_last() + | |
1125 | 2*vec[META_POP_IWR].get_last() + | |
1126 | 1*vec[META_POP_READDIR].get_last() + | |
1127 | 2*vec[META_POP_FETCH].get_last() + | |
1128 | 4*vec[META_POP_STORE].get_last(); | |
1129 | } | |
1130 | ||
1131 | void add(utime_t now, DecayRate& rate, dirfrag_load_vec_t& r) { | |
1132 | for (int i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1133 | vec[i].adjust(r.vec[i].get(now, rate)); | |
1134 | } | |
1135 | void sub(utime_t now, DecayRate& rate, dirfrag_load_vec_t& r) { | |
1136 | for (int i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1137 | vec[i].adjust(-r.vec[i].get(now, rate)); | |
1138 | } | |
1139 | void scale(double f) { | |
1140 | for (int i=0; i<dirfrag_load_vec_t::NUM; i++) | |
1141 | vec[i].scale(f); | |
1142 | } | |
1143 | }; | |
1144 | ||
1145 | inline void encode(const dirfrag_load_vec_t &c, bufferlist &bl) { c.encode(bl); } | |
1146 | inline void decode(dirfrag_load_vec_t& c, const utime_t &t, bufferlist::iterator &p) { | |
1147 | c.decode(t, p); | |
1148 | } | |
1149 | // this for dencoder | |
1150 | inline void decode(dirfrag_load_vec_t& c, bufferlist::iterator &p) { | |
1151 | utime_t sample; | |
1152 | c.decode(sample, p); | |
1153 | } | |
1154 | ||
1155 | inline std::ostream& operator<<(std::ostream& out, dirfrag_load_vec_t& dl) | |
1156 | { | |
1157 | // ugliness! | |
1158 | utime_t now = ceph_clock_now(); | |
1159 | DecayRate rate(g_conf->mds_decay_halflife); | |
1160 | return out << "[" << dl.vec[0].get(now, rate) << "," << dl.vec[1].get(now, rate) | |
1161 | << " " << dl.meta_load(now, rate) | |
1162 | << "]"; | |
1163 | } | |
1164 | ||
1165 | ||
1166 | ||
1167 | ||
1168 | ||
1169 | ||
1170 | /* mds_load_t | |
1171 | * mds load | |
1172 | */ | |
1173 | ||
1174 | struct mds_load_t { | |
1175 | dirfrag_load_vec_t auth; | |
1176 | dirfrag_load_vec_t all; | |
1177 | ||
1178 | double req_rate; | |
1179 | double cache_hit_rate; | |
1180 | double queue_len; | |
1181 | ||
1182 | double cpu_load_avg; | |
1183 | ||
1184 | explicit mds_load_t(const utime_t &t) : | |
1185 | auth(t), all(t), req_rate(0), cache_hit_rate(0), | |
1186 | queue_len(0), cpu_load_avg(0) | |
1187 | {} | |
1188 | // mostly for the dencoder infrastructure | |
1189 | mds_load_t() : | |
1190 | auth(), all(), | |
1191 | req_rate(0), cache_hit_rate(0), queue_len(0), cpu_load_avg(0) | |
1192 | {} | |
1193 | ||
1194 | double mds_load(); // defiend in MDBalancer.cc | |
1195 | void encode(bufferlist& bl) const; | |
1196 | void decode(const utime_t& now, bufferlist::iterator& bl); | |
1197 | //this one is for dencoder infrastructure | |
1198 | void decode(bufferlist::iterator& bl) { utime_t sample; decode(sample, bl); } | |
1199 | void dump(Formatter *f) const; | |
1200 | static void generate_test_instances(list<mds_load_t*>& ls); | |
1201 | }; | |
1202 | inline void encode(const mds_load_t &c, bufferlist &bl) { c.encode(bl); } | |
1203 | inline void decode(mds_load_t &c, const utime_t &t, bufferlist::iterator &p) { | |
1204 | c.decode(t, p); | |
1205 | } | |
1206 | // this one is for dencoder | |
1207 | inline void decode(mds_load_t &c, bufferlist::iterator &p) { | |
1208 | utime_t sample; | |
1209 | c.decode(sample, p); | |
1210 | } | |
1211 | ||
1212 | inline std::ostream& operator<<( std::ostream& out, mds_load_t& load ) | |
1213 | { | |
1214 | return out << "mdsload<" << load.auth << "/" << load.all | |
1215 | << ", req " << load.req_rate | |
1216 | << ", hr " << load.cache_hit_rate | |
1217 | << ", qlen " << load.queue_len | |
1218 | << ", cpu " << load.cpu_load_avg | |
1219 | << ">"; | |
1220 | } | |
1221 | ||
1222 | class load_spread_t { | |
1223 | public: | |
1224 | static const int MAX = 4; | |
1225 | int last[MAX]; | |
1226 | int p, n; | |
1227 | DecayCounter count; | |
1228 | ||
1229 | public: | |
1230 | load_spread_t() : p(0), n(0), count(ceph_clock_now()) | |
1231 | { | |
1232 | for (int i=0; i<MAX; i++) | |
1233 | last[i] = -1; | |
1234 | } | |
1235 | ||
1236 | double hit(utime_t now, const DecayRate& rate, int who) { | |
1237 | for (int i=0; i<n; i++) | |
1238 | if (last[i] == who) | |
1239 | return count.get_last(); | |
1240 | ||
1241 | // we're new(ish) | |
1242 | last[p++] = who; | |
1243 | if (n < MAX) n++; | |
1244 | if (n == 1) return 0.0; | |
1245 | ||
1246 | if (p == MAX) p = 0; | |
1247 | ||
1248 | return count.hit(now, rate); | |
1249 | } | |
1250 | double get(utime_t now, const DecayRate& rate) { | |
1251 | return count.get(now, rate); | |
1252 | } | |
1253 | }; | |
1254 | ||
1255 | ||
1256 | ||
1257 | // ================================================================ | |
1258 | typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t; | |
1259 | ||
1260 | // -- authority delegation -- | |
1261 | // directory authority types | |
1262 | // >= 0 is the auth mds | |
1263 | #define CDIR_AUTH_PARENT mds_rank_t(-1) // default | |
1264 | #define CDIR_AUTH_UNKNOWN mds_rank_t(-2) | |
1265 | #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN) | |
1266 | #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN) | |
1267 | //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2) | |
1268 | ||
1269 | class MDSCacheObjectInfo { | |
1270 | public: | |
1271 | inodeno_t ino; | |
1272 | dirfrag_t dirfrag; | |
1273 | string dname; | |
1274 | snapid_t snapid; | |
1275 | ||
1276 | MDSCacheObjectInfo() : ino(0) {} | |
1277 | ||
1278 | void encode(bufferlist& bl) const; | |
1279 | void decode(bufferlist::iterator& bl); | |
1280 | void dump(Formatter *f) const; | |
1281 | static void generate_test_instances(list<MDSCacheObjectInfo*>& ls); | |
1282 | }; | |
1283 | ||
1284 | inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) { | |
1285 | if (info.ino) return out << info.ino << "." << info.snapid; | |
1286 | if (info.dname.length()) return out << info.dirfrag << "/" << info.dname | |
1287 | << " snap " << info.snapid; | |
1288 | return out << info.dirfrag; | |
1289 | } | |
1290 | ||
1291 | inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) { | |
1292 | if (l.ino || r.ino) | |
1293 | return l.ino == r.ino && l.snapid == r.snapid; | |
1294 | else | |
1295 | return l.dirfrag == r.dirfrag && l.dname == r.dname; | |
1296 | } | |
1297 | WRITE_CLASS_ENCODER(MDSCacheObjectInfo) | |
1298 | ||
1299 | ||
1300 | // parse a map of keys/values. | |
1301 | namespace qi = boost::spirit::qi; | |
1302 | ||
1303 | template <typename Iterator> | |
1304 | struct keys_and_values | |
1305 | : qi::grammar<Iterator, std::map<string, string>()> | |
1306 | { | |
1307 | keys_and_values() | |
1308 | : keys_and_values::base_type(query) | |
1309 | { | |
1310 | query = pair >> *(qi::lit(' ') >> pair); | |
1311 | pair = key >> '=' >> value; | |
1312 | key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); | |
1313 | value = +qi::char_("a-zA-Z_0-9"); | |
1314 | } | |
1315 | qi::rule<Iterator, std::map<string, string>()> query; | |
1316 | qi::rule<Iterator, std::pair<string, string>()> pair; | |
1317 | qi::rule<Iterator, string()> key, value; | |
1318 | }; | |
1319 | ||
1320 | #endif |