]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/mdstypes.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / mds / mdstypes.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_MDSTYPES_H
4 #define CEPH_MDSTYPES_H
5
6 #include "include/int_types.h"
7
8 #include <ostream>
9 #include <set>
10 #include <map>
11 #include <string_view>
12
13 #include "common/config.h"
14 #include "common/Clock.h"
15 #include "common/DecayCounter.h"
16 #include "common/StackStringStream.h"
17 #include "common/entity_name.h"
18
19 #include "include/compat.h"
20 #include "include/Context.h"
21 #include "include/frag.h"
22 #include "include/xlist.h"
23 #include "include/interval_set.h"
24 #include "include/compact_set.h"
25 #include "include/fs_types.h"
26 #include "include/ceph_fs.h"
27
28 #include "inode_backtrace.h"
29
30 #include <boost/spirit/include/qi.hpp>
31 #include <boost/pool/pool.hpp>
32 #include "include/ceph_assert.h"
33 #include "common/ceph_json.h"
34 #include "include/cephfs/types.h"
35
36 #define MDS_PORT_CACHE 0x200
37 #define MDS_PORT_LOCKER 0x300
38 #define MDS_PORT_MIGRATOR 0x400
39
40 #define NUM_STRAY 10
41
42 // Inode numbers 1,2 and 4 please see CEPH_INO_* in include/ceph_fs.h
43
44 #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
45 #define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
46
47 // Locations for journal data
48 #define MDS_INO_LOG_OFFSET (2*MAX_MDS)
49 #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
50 #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
51 #define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
52
53 #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
54
55 #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
56 #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
57
58 #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
59 #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
60 #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
61 #define MDS_INO_IS_BASE(i) ((i) == CEPH_INO_ROOT || (i) == CEPH_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
62 #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
63 #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
64
65 #define MDS_IS_PRIVATE_INO(i) ((i) < MDS_INO_SYSTEM_BASE && (i) >= MDS_INO_MDSDIR_OFFSET)
66
67 class mds_role_t {
68 public:
69 mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_)
70 : fscid(fscid_), rank(rank_)
71 {}
72 mds_role_t() {}
73
74 bool operator<(mds_role_t const &rhs) const {
75 if (fscid < rhs.fscid) {
76 return true;
77 } else if (fscid == rhs.fscid) {
78 return rank < rhs.rank;
79 } else {
80 return false;
81 }
82 }
83
84 bool is_none() const {
85 return (rank == MDS_RANK_NONE);
86 }
87
88 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
89 mds_rank_t rank = MDS_RANK_NONE;
90 };
91 inline std::ostream& operator<<(std::ostream& out, const mds_role_t& role) {
92 return out << role.fscid << ":" << role.rank;
93 }
94
95 // CAPS
96 inline std::string gcap_string(int cap)
97 {
98 std::string s;
99 if (cap & CEPH_CAP_GSHARED) s += "s";
100 if (cap & CEPH_CAP_GEXCL) s += "x";
101 if (cap & CEPH_CAP_GCACHE) s += "c";
102 if (cap & CEPH_CAP_GRD) s += "r";
103 if (cap & CEPH_CAP_GWR) s += "w";
104 if (cap & CEPH_CAP_GBUFFER) s += "b";
105 if (cap & CEPH_CAP_GWREXTEND) s += "a";
106 if (cap & CEPH_CAP_GLAZYIO) s += "l";
107 return s;
108 }
109 inline std::string ccap_string(int cap)
110 {
111 std::string s;
112 if (cap & CEPH_CAP_PIN) s += "p";
113
114 int a = (cap >> CEPH_CAP_SAUTH) & 3;
115 if (a) s += 'A' + gcap_string(a);
116
117 a = (cap >> CEPH_CAP_SLINK) & 3;
118 if (a) s += 'L' + gcap_string(a);
119
120 a = (cap >> CEPH_CAP_SXATTR) & 3;
121 if (a) s += 'X' + gcap_string(a);
122
123 a = cap >> CEPH_CAP_SFILE;
124 if (a) s += 'F' + gcap_string(a);
125
126 if (s.length() == 0)
127 s = "-";
128 return s;
129 }
130
131 namespace std {
132 template<> struct hash<vinodeno_t> {
133 size_t operator()(const vinodeno_t &vino) const {
134 hash<inodeno_t> H;
135 hash<uint64_t> I;
136 return H(vino.ino) ^ I(vino.snapid);
137 }
138 };
139 }
140
141 inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) {
142 out << vino.ino;
143 if (vino.snapid == CEPH_NOSNAP)
144 out << ".head";
145 else if (vino.snapid)
146 out << '.' << vino.snapid;
147 return out;
148 }
149
150 typedef uint32_t damage_flags_t;
151
152 template<template<typename> class Allocator>
153 using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>;
154
155 template<template<typename> class Allocator>
156 using xattr_map = std::map<alloc_string<Allocator>,
157 ceph::bufferptr,
158 std::less<alloc_string<Allocator>>,
159 Allocator<std::pair<const alloc_string<Allocator>,
160 ceph::bufferptr>>>; // FIXME bufferptr not in mempool
161
162 template<template<typename> class Allocator>
163 inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p)
164 {
165 __u32 n;
166 decode(n, p);
167 while (n-- > 0) {
168 alloc_string<Allocator> key;
169 decode(key, p);
170 __u32 len;
171 decode(len, p);
172 p.copy_deep(len, xattrs[key]);
173 }
174 }
175
176 template<template<typename> class Allocator = std::allocator>
177 struct old_inode_t {
178 snapid_t first;
179 inode_t<Allocator> inode;
180 xattr_map<Allocator> xattrs;
181
182 void encode(ceph::buffer::list &bl, uint64_t features) const;
183 void decode(ceph::buffer::list::const_iterator& bl);
184 void dump(ceph::Formatter *f) const;
185 static void generate_test_instances(std::list<old_inode_t*>& ls);
186 };
187
188 // These methods may be moved back to mdstypes.cc when we have pmr
189 template<template<typename> class Allocator>
190 void old_inode_t<Allocator>::encode(ceph::buffer::list& bl, uint64_t features) const
191 {
192 ENCODE_START(2, 2, bl);
193 encode(first, bl);
194 encode(inode, bl, features);
195 encode(xattrs, bl);
196 ENCODE_FINISH(bl);
197 }
198
199 template<template<typename> class Allocator>
200 void old_inode_t<Allocator>::decode(ceph::buffer::list::const_iterator& bl)
201 {
202 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
203 decode(first, bl);
204 decode(inode, bl);
205 decode_noshare<Allocator>(xattrs, bl);
206 DECODE_FINISH(bl);
207 }
208
209 template<template<typename> class Allocator>
210 void old_inode_t<Allocator>::dump(ceph::Formatter *f) const
211 {
212 f->dump_unsigned("first", first);
213 inode.dump(f);
214 f->open_object_section("xattrs");
215 for (const auto &p : xattrs) {
216 std::string v(p.second.c_str(), p.second.length());
217 f->dump_string(p.first.c_str(), v);
218 }
219 f->close_section();
220 }
221
222 template<template<typename> class Allocator>
223 void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls)
224 {
225 ls.push_back(new old_inode_t<Allocator>);
226 ls.push_back(new old_inode_t<Allocator>);
227 ls.back()->first = 2;
228 std::list<inode_t<Allocator>*> ils;
229 inode_t<Allocator>::generate_test_instances(ils);
230 ls.back()->inode = *ils.back();
231 ls.back()->xattrs["user.foo"] = ceph::buffer::copy("asdf", 4);
232 ls.back()->xattrs["user.unprintable"] = ceph::buffer::copy("\000\001\002", 3);
233 }
234
235 template<template<typename> class Allocator>
236 inline void encode(const old_inode_t<Allocator> &c, ::ceph::buffer::list &bl, uint64_t features)
237 {
238 ENCODE_DUMP_PRE();
239 c.encode(bl, features);
240 ENCODE_DUMP_POST(cl);
241 }
242 template<template<typename> class Allocator>
243 inline void decode(old_inode_t<Allocator> &c, ::ceph::buffer::list::const_iterator &p)
244 {
245 c.decode(p);
246 }
247
248 /*
249 * like an inode, but for a dir frag
250 */
251 struct fnode_t {
252 void encode(ceph::buffer::list &bl) const;
253 void decode(ceph::buffer::list::const_iterator& bl);
254 void dump(ceph::Formatter *f) const;
255 void decode_json(JSONObj *obj);
256 static void generate_test_instances(std::list<fnode_t*>& ls);
257
258 version_t version = 0;
259 snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru
260 frag_info_t fragstat, accounted_fragstat;
261 nest_info_t rstat, accounted_rstat;
262 damage_flags_t damage_flags = 0;
263
264 // we know we and all our descendants have been scrubbed since this version
265 version_t recursive_scrub_version = 0;
266 utime_t recursive_scrub_stamp;
267 // version at which we last scrubbed our personal data structures
268 version_t localized_scrub_version = 0;
269 utime_t localized_scrub_stamp;
270 };
271 WRITE_CLASS_ENCODER(fnode_t)
272
273
274 struct old_rstat_t {
275 void encode(ceph::buffer::list& bl) const;
276 void decode(ceph::buffer::list::const_iterator& p);
277 void dump(ceph::Formatter *f) const;
278 static void generate_test_instances(std::list<old_rstat_t*>& ls);
279
280 snapid_t first;
281 nest_info_t rstat, accounted_rstat;
282 };
283 WRITE_CLASS_ENCODER(old_rstat_t)
284
285 inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) {
286 return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")";
287 }
288
289 class feature_bitset_t {
290 public:
291 typedef uint64_t block_type;
292 static const size_t bits_per_block = sizeof(block_type) * 8;
293
294 feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {}
295 feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {}
296 feature_bitset_t(unsigned long value = 0);
297 feature_bitset_t(const std::vector<size_t>& array);
298 feature_bitset_t& operator=(const feature_bitset_t& other) {
299 _vec = other._vec;
300 return *this;
301 }
302 feature_bitset_t& operator=(feature_bitset_t&& other) {
303 _vec = std::move(other._vec);
304 return *this;
305 }
306 feature_bitset_t& operator-=(const feature_bitset_t& other);
307 bool empty() const {
308 //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty"
309 for (auto& v : _vec) {
310 if (v)
311 return false;
312 }
313 return true;
314 }
315 bool test(size_t bit) const {
316 if (bit >= bits_per_block * _vec.size())
317 return false;
318 return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block));
319 }
320 void insert(size_t bit) {
321 size_t n = bit / bits_per_block;
322 if (n >= _vec.size())
323 _vec.resize(n + 1);
324 _vec[n] |= ((block_type)1 << (bit % bits_per_block));
325 }
326 void erase(size_t bit) {
327 size_t n = bit / bits_per_block;
328 if (n >= _vec.size())
329 return;
330 _vec[n] &= ~((block_type)1 << (bit % bits_per_block));
331 if (n + 1 == _vec.size()) {
332 while (!_vec.empty() && _vec.back() == 0)
333 _vec.pop_back();
334 }
335 }
336 void clear() {
337 _vec.clear();
338 }
339 bool operator==(const feature_bitset_t& other) const {
340 return _vec == other._vec;
341 }
342 bool operator!=(const feature_bitset_t& other) const {
343 return _vec != other._vec;
344 }
345 void encode(ceph::buffer::list& bl) const;
346 void decode(ceph::buffer::list::const_iterator &p);
347 void dump(ceph::Formatter *f) const;
348 void print(std::ostream& out) const;
349 private:
350 std::vector<block_type> _vec;
351 };
352 WRITE_CLASS_ENCODER(feature_bitset_t)
353
354 inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) {
355 s.print(out);
356 return out;
357 }
358
359 struct metric_spec_t {
360 metric_spec_t() {}
361 metric_spec_t(const metric_spec_t& other) :
362 metric_flags(other.metric_flags) {}
363 metric_spec_t(metric_spec_t&& other) :
364 metric_flags(std::move(other.metric_flags)) {}
365 metric_spec_t(const feature_bitset_t& mf) :
366 metric_flags(mf) {}
367 metric_spec_t(feature_bitset_t&& mf) :
368 metric_flags(std::move(mf)) {}
369
370 metric_spec_t& operator=(const metric_spec_t& other) {
371 metric_flags = other.metric_flags;
372 return *this;
373 }
374 metric_spec_t& operator=(metric_spec_t&& other) {
375 metric_flags = std::move(other.metric_flags);
376 return *this;
377 }
378
379 bool empty() const {
380 return metric_flags.empty();
381 }
382
383 void clear() {
384 metric_flags.clear();
385 }
386
387 void encode(ceph::buffer::list& bl) const;
388 void decode(ceph::buffer::list::const_iterator& p);
389 void dump(ceph::Formatter *f) const;
390 void print(std::ostream& out) const;
391
392 // set of metrics that a client is capable of forwarding
393 feature_bitset_t metric_flags;
394 };
395 WRITE_CLASS_ENCODER(metric_spec_t)
396
397 inline std::ostream& operator<<(std::ostream& out, const metric_spec_t& mst) {
398 mst.print(out);
399 return out;
400 }
401
402 /*
403 * client_metadata_t
404 */
405 struct client_metadata_t {
406 using kv_map_t = std::map<std::string,std::string>;
407 using iterator = kv_map_t::const_iterator;
408
409 client_metadata_t() {}
410 client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f, const metric_spec_t &mst) :
411 kv_map(kv),
412 features(f),
413 metric_spec(mst) {}
414 client_metadata_t& operator=(const client_metadata_t& other) {
415 kv_map = other.kv_map;
416 features = other.features;
417 metric_spec = other.metric_spec;
418 return *this;
419 }
420
421 bool empty() const { return kv_map.empty() && features.empty() && metric_spec.empty(); }
422 iterator find(const std::string& key) const { return kv_map.find(key); }
423 iterator begin() const { return kv_map.begin(); }
424 iterator end() const { return kv_map.end(); }
425 void erase(iterator it) { kv_map.erase(it); }
426 std::string& operator[](const std::string& key) { return kv_map[key]; }
427 void merge(const client_metadata_t& other) {
428 kv_map.insert(other.kv_map.begin(), other.kv_map.end());
429 features = other.features;
430 metric_spec = other.metric_spec;
431 }
432 void clear() {
433 kv_map.clear();
434 features.clear();
435 metric_spec.clear();
436 }
437
438 void encode(ceph::buffer::list& bl) const;
439 void decode(ceph::buffer::list::const_iterator& p);
440 void dump(ceph::Formatter *f) const;
441
442 kv_map_t kv_map;
443 feature_bitset_t features;
444 metric_spec_t metric_spec;
445 };
446 WRITE_CLASS_ENCODER(client_metadata_t)
447
448 /*
449 * session_info_t - durable part of a Session
450 */
451 struct session_info_t {
452 client_t get_client() const { return client_t(inst.name.num()); }
453 bool has_feature(size_t bit) const { return client_metadata.features.test(bit); }
454 const entity_name_t& get_source() const { return inst.name; }
455
456 void clear_meta() {
457 prealloc_inos.clear();
458 completed_requests.clear();
459 completed_flushes.clear();
460 client_metadata.clear();
461 }
462
463 void encode(ceph::buffer::list& bl, uint64_t features) const;
464 void decode(ceph::buffer::list::const_iterator& p);
465 void dump(ceph::Formatter *f) const;
466 static void generate_test_instances(std::list<session_info_t*>& ls);
467
468 entity_inst_t inst;
469 std::map<ceph_tid_t,inodeno_t> completed_requests;
470 interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use.
471 client_metadata_t client_metadata;
472 std::set<ceph_tid_t> completed_flushes;
473 EntityName auth_name;
474 };
475 WRITE_CLASS_ENCODER_FEATURES(session_info_t)
476
477 // dentries
478 struct dentry_key_t {
479 dentry_key_t() {}
480 dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) :
481 snapid(s), name(n), hash(h) {}
482
483 bool is_valid() { return name.length() || snapid; }
484
485 // encode into something that can be decoded as a string.
486 // name_ (head) or name_%x (!head)
487 void encode(ceph::buffer::list& bl) const {
488 std::string key;
489 encode(key);
490 using ceph::encode;
491 encode(key, bl);
492 }
493 void encode(std::string& key) const {
494 char b[20];
495 if (snapid != CEPH_NOSNAP) {
496 uint64_t val(snapid);
497 snprintf(b, sizeof(b), "%" PRIx64, val);
498 } else {
499 snprintf(b, sizeof(b), "%s", "head");
500 }
501 CachedStackStringStream css;
502 *css << name << "_" << b;
503 key = css->strv();
504 }
505 static void decode_helper(ceph::buffer::list::const_iterator& bl, std::string& nm,
506 snapid_t& sn) {
507 std::string key;
508 using ceph::decode;
509 decode(key, bl);
510 decode_helper(key, nm, sn);
511 }
512 static void decode_helper(std::string_view key, std::string& nm, snapid_t& sn) {
513 size_t i = key.find_last_of('_');
514 ceph_assert(i != std::string::npos);
515 if (key.compare(i+1, std::string_view::npos, "head") == 0) {
516 // name_head
517 sn = CEPH_NOSNAP;
518 } else {
519 // name_%x
520 long long unsigned x = 0;
521 std::string x_str(key.substr(i+1));
522 sscanf(x_str.c_str(), "%llx", &x);
523 sn = x;
524 }
525 nm = key.substr(0, i);
526 }
527
528 snapid_t snapid = 0;
529 std::string_view name;
530 __u32 hash = 0;
531 };
532
533 inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k)
534 {
535 return out << "(" << k.name << "," << k.snapid << ")";
536 }
537
538 inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2)
539 {
540 /*
541 * order by hash, name, snap
542 */
543 int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash);
544 if (c)
545 return c < 0;
546 c = k1.name.compare(k2.name);
547 if (c)
548 return c < 0;
549 return k1.snapid < k2.snapid;
550 }
551
552 /*
553 * string_snap_t is a simple (string, snapid_t) pair
554 */
555 struct string_snap_t {
556 string_snap_t() {}
557 string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {}
558
559 int compare(const string_snap_t& r) const {
560 int ret = name.compare(r.name);
561 if (ret)
562 return ret;
563 if (snapid == r.snapid)
564 return 0;
565 return snapid > r.snapid ? 1 : -1;
566 }
567
568 void encode(ceph::buffer::list& bl) const;
569 void decode(ceph::buffer::list::const_iterator& p);
570 void dump(ceph::Formatter *f) const;
571 static void generate_test_instances(std::list<string_snap_t*>& ls);
572
573 std::string name;
574 snapid_t snapid;
575 };
576 WRITE_CLASS_ENCODER(string_snap_t)
577
578 inline bool operator==(const string_snap_t& l, const string_snap_t& r) {
579 return l.name == r.name && l.snapid == r.snapid;
580 }
581
582 inline bool operator<(const string_snap_t& l, const string_snap_t& r) {
583 int c = l.name.compare(r.name);
584 return c < 0 || (c == 0 && l.snapid < r.snapid);
585 }
586
587 inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k)
588 {
589 return out << "(" << k.name << "," << k.snapid << ")";
590 }
591
592 /*
593 * mds_table_pending_t
594 *
595 * For mds's requesting any pending ops, child needs to encode the corresponding
596 * pending mutation state in the table.
597 */
598 struct mds_table_pending_t {
599 void encode(ceph::buffer::list& bl) const;
600 void decode(ceph::buffer::list::const_iterator& bl);
601 void dump(ceph::Formatter *f) const;
602 static void generate_test_instances(std::list<mds_table_pending_t*>& ls);
603
604 uint64_t reqid = 0;
605 __s32 mds = 0;
606 version_t tid = 0;
607 };
608 WRITE_CLASS_ENCODER(mds_table_pending_t)
609
610 // requests
611 struct metareqid_t {
612 metareqid_t() {}
613 metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {}
614 void encode(ceph::buffer::list& bl) const {
615 using ceph::encode;
616 encode(name, bl);
617 encode(tid, bl);
618 }
619 void decode(ceph::buffer::list::const_iterator &p) {
620 using ceph::decode;
621 decode(name, p);
622 decode(tid, p);
623 }
624 void dump(ceph::Formatter *f) const;
625
626 entity_name_t name;
627 uint64_t tid = 0;
628 };
629 WRITE_CLASS_ENCODER(metareqid_t)
630
631 inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) {
632 return out << r.name << ":" << r.tid;
633 }
634
635 inline bool operator==(const metareqid_t& l, const metareqid_t& r) {
636 return (l.name == r.name) && (l.tid == r.tid);
637 }
638 inline bool operator!=(const metareqid_t& l, const metareqid_t& r) {
639 return (l.name != r.name) || (l.tid != r.tid);
640 }
641 inline bool operator<(const metareqid_t& l, const metareqid_t& r) {
642 return (l.name < r.name) ||
643 (l.name == r.name && l.tid < r.tid);
644 }
645 inline bool operator<=(const metareqid_t& l, const metareqid_t& r) {
646 return (l.name < r.name) ||
647 (l.name == r.name && l.tid <= r.tid);
648 }
649 inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); }
650 inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); }
651
652 namespace std {
653 template<> struct hash<metareqid_t> {
654 size_t operator()(const metareqid_t &r) const {
655 hash<uint64_t> H;
656 return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid);
657 }
658 };
659 } // namespace std
660
661 // cap info for client reconnect
662 struct cap_reconnect_t {
663 cap_reconnect_t() {}
664 cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i,
665 inodeno_t sr, snapid_t sf, ceph::buffer::list& lb) :
666 path(p) {
667 capinfo.cap_id = cap_id;
668 capinfo.wanted = w;
669 capinfo.issued = i;
670 capinfo.snaprealm = sr;
671 capinfo.pathbase = pino;
672 capinfo.flock_len = 0;
673 snap_follows = sf;
674 flockbl = std::move(lb);
675 }
676 void encode(ceph::buffer::list& bl) const;
677 void decode(ceph::buffer::list::const_iterator& bl);
678 void encode_old(ceph::buffer::list& bl) const;
679 void decode_old(ceph::buffer::list::const_iterator& bl);
680
681 void dump(ceph::Formatter *f) const;
682 static void generate_test_instances(std::list<cap_reconnect_t*>& ls);
683
684 std::string path;
685 mutable ceph_mds_cap_reconnect capinfo = {};
686 snapid_t snap_follows = 0;
687 ceph::buffer::list flockbl;
688 };
689 WRITE_CLASS_ENCODER(cap_reconnect_t)
690
691 struct snaprealm_reconnect_t {
692 snaprealm_reconnect_t() {}
693 snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) {
694 realm.ino = ino;
695 realm.seq = seq;
696 realm.parent = parent;
697 }
698 void encode(ceph::buffer::list& bl) const;
699 void decode(ceph::buffer::list::const_iterator& bl);
700 void encode_old(ceph::buffer::list& bl) const;
701 void decode_old(ceph::buffer::list::const_iterator& bl);
702
703 void dump(ceph::Formatter *f) const;
704 static void generate_test_instances(std::list<snaprealm_reconnect_t*>& ls);
705
706 mutable ceph_mds_snaprealm_reconnect realm = {};
707 };
708 WRITE_CLASS_ENCODER(snaprealm_reconnect_t)
709
710 // compat for pre-FLOCK feature
711 struct old_ceph_mds_cap_reconnect {
712 ceph_le64 cap_id;
713 ceph_le32 wanted;
714 ceph_le32 issued;
715 ceph_le64 old_size;
716 struct ceph_timespec old_mtime, old_atime;
717 ceph_le64 snaprealm;
718 ceph_le64 pathbase; /* base ino for our path to this ino */
719 } __attribute__ ((packed));
720 WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect)
721
722 struct old_cap_reconnect_t {
723 const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) {
724 path = n.path;
725 capinfo.cap_id = n.capinfo.cap_id;
726 capinfo.wanted = n.capinfo.wanted;
727 capinfo.issued = n.capinfo.issued;
728 capinfo.snaprealm = n.capinfo.snaprealm;
729 capinfo.pathbase = n.capinfo.pathbase;
730 return *this;
731 }
732 operator cap_reconnect_t() {
733 cap_reconnect_t n;
734 n.path = path;
735 n.capinfo.cap_id = capinfo.cap_id;
736 n.capinfo.wanted = capinfo.wanted;
737 n.capinfo.issued = capinfo.issued;
738 n.capinfo.snaprealm = capinfo.snaprealm;
739 n.capinfo.pathbase = capinfo.pathbase;
740 return n;
741 }
742
743 void encode(ceph::buffer::list& bl) const {
744 using ceph::encode;
745 encode(path, bl);
746 encode(capinfo, bl);
747 }
748 void decode(ceph::buffer::list::const_iterator& bl) {
749 using ceph::decode;
750 decode(path, bl);
751 decode(capinfo, bl);
752 }
753
754 std::string path;
755 old_ceph_mds_cap_reconnect capinfo;
756 };
757 WRITE_CLASS_ENCODER(old_cap_reconnect_t)
758
759 // dir frag
760 struct dirfrag_t {
761 dirfrag_t() {}
762 dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { }
763
764 void encode(ceph::buffer::list& bl) const {
765 using ceph::encode;
766 encode(ino, bl);
767 encode(frag, bl);
768 }
769 void decode(ceph::buffer::list::const_iterator& bl) {
770 using ceph::decode;
771 decode(ino, bl);
772 decode(frag, bl);
773 }
774
775 inodeno_t ino = 0;
776 frag_t frag;
777 };
778 WRITE_CLASS_ENCODER(dirfrag_t)
779
780 inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) {
781 out << df.ino;
782 if (!df.frag.is_root()) out << "." << df.frag;
783 return out;
784 }
785 inline bool operator<(dirfrag_t l, dirfrag_t r) {
786 if (l.ino < r.ino) return true;
787 if (l.ino == r.ino && l.frag < r.frag) return true;
788 return false;
789 }
790 inline bool operator==(dirfrag_t l, dirfrag_t r) {
791 return l.ino == r.ino && l.frag == r.frag;
792 }
793
794 namespace std {
795 template<> struct hash<dirfrag_t> {
796 size_t operator()(const dirfrag_t &df) const {
797 static rjhash<uint64_t> H;
798 static rjhash<uint32_t> I;
799 return H(df.ino) ^ I(df.frag);
800 }
801 };
802 } // namespace std
803
804 // ================================================================
805 #define META_POP_IRD 0
806 #define META_POP_IWR 1
807 #define META_POP_READDIR 2
808 #define META_POP_FETCH 3
809 #define META_POP_STORE 4
810 #define META_NPOP 5
811
812 class inode_load_vec_t {
813 public:
814 using time = DecayCounter::time;
815 using clock = DecayCounter::clock;
816 static const size_t NUM = 2;
817
818 inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
819 inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {}
820
821 DecayCounter &get(int t) {
822 return vec[t];
823 }
824 void zero() {
825 for (auto &d : vec) {
826 d.reset();
827 }
828 }
829 void encode(ceph::buffer::list &bl) const;
830 void decode(ceph::buffer::list::const_iterator& p);
831 void dump(ceph::Formatter *f) const;
832 static void generate_test_instances(std::list<inode_load_vec_t*>& ls);
833
834 private:
835 std::array<DecayCounter, NUM> vec;
836 };
837 inline void encode(const inode_load_vec_t &c, ceph::buffer::list &bl) {
838 c.encode(bl);
839 }
840 inline void decode(inode_load_vec_t & c, ceph::buffer::list::const_iterator &p) {
841 c.decode(p);
842 }
843
844 class dirfrag_load_vec_t {
845 public:
846 using time = DecayCounter::time;
847 using clock = DecayCounter::clock;
848 static const size_t NUM = 5;
849
850 dirfrag_load_vec_t() :
851 vec{DecayCounter(DecayRate()),
852 DecayCounter(DecayRate()),
853 DecayCounter(DecayRate()),
854 DecayCounter(DecayRate()),
855 DecayCounter(DecayRate())
856 }
857 {}
858 dirfrag_load_vec_t(const DecayRate &rate) :
859 vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)}
860 {}
861
862 void encode(ceph::buffer::list &bl) const {
863 ENCODE_START(2, 2, bl);
864 for (const auto &i : vec) {
865 encode(i, bl);
866 }
867 ENCODE_FINISH(bl);
868 }
869 void decode(ceph::buffer::list::const_iterator &p) {
870 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
871 for (auto &i : vec) {
872 decode(i, p);
873 }
874 DECODE_FINISH(p);
875 }
876 void dump(ceph::Formatter *f) const;
877 void dump(ceph::Formatter *f, const DecayRate& rate) const;
878 static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls);
879
880 const DecayCounter &get(int t) const {
881 return vec[t];
882 }
883 DecayCounter &get(int t) {
884 return vec[t];
885 }
886 void adjust(double d) {
887 for (auto &i : vec) {
888 i.adjust(d);
889 }
890 }
891 void zero() {
892 for (auto &i : vec) {
893 i.reset();
894 }
895 }
896 double meta_load() const {
897 return
898 1*vec[META_POP_IRD].get() +
899 2*vec[META_POP_IWR].get() +
900 1*vec[META_POP_READDIR].get() +
901 2*vec[META_POP_FETCH].get() +
902 4*vec[META_POP_STORE].get();
903 }
904
905 void add(dirfrag_load_vec_t& r) {
906 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
907 vec[i].adjust(r.vec[i].get());
908 }
909 void sub(dirfrag_load_vec_t& r) {
910 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
911 vec[i].adjust(-r.vec[i].get());
912 }
913 void scale(double f) {
914 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
915 vec[i].scale(f);
916 }
917
918 private:
919 friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl);
920 std::array<DecayCounter, NUM> vec;
921 };
922
923 inline void encode(const dirfrag_load_vec_t &c, ceph::buffer::list &bl) {
924 c.encode(bl);
925 }
926 inline void decode(dirfrag_load_vec_t& c, ceph::buffer::list::const_iterator &p) {
927 c.decode(p);
928 }
929
930 inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl)
931 {
932 CachedStackStringStream css;
933 *css << std::setprecision(1) << std::fixed
934 << "[pop"
935 " IRD:" << dl.vec[0]
936 << " IWR:" << dl.vec[1]
937 << " RDR:" << dl.vec[2]
938 << " FET:" << dl.vec[3]
939 << " STR:" << dl.vec[4]
940 << " *LOAD:" << dl.meta_load() << "]";
941 return out << css->strv();
942 }
943
944 struct mds_load_t {
945 using clock = dirfrag_load_vec_t::clock;
946 using time = dirfrag_load_vec_t::time;
947
948 dirfrag_load_vec_t auth;
949 dirfrag_load_vec_t all;
950
951 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
952 mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {}
953
954 double req_rate = 0.0;
955 double cache_hit_rate = 0.0;
956 double queue_len = 0.0;
957
958 double cpu_load_avg = 0.0;
959
960 double mds_load() const; // defiend in MDBalancer.cc
961 void encode(ceph::buffer::list& bl) const;
962 void decode(ceph::buffer::list::const_iterator& bl);
963 void dump(ceph::Formatter *f) const;
964 static void generate_test_instances(std::list<mds_load_t*>& ls);
965 };
966 inline void encode(const mds_load_t &c, ceph::buffer::list &bl) {
967 c.encode(bl);
968 }
969 inline void decode(mds_load_t &c, ceph::buffer::list::const_iterator &p) {
970 c.decode(p);
971 }
972
973 inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load)
974 {
975 return out << "mdsload<" << load.auth << "/" << load.all
976 << ", req " << load.req_rate
977 << ", hr " << load.cache_hit_rate
978 << ", qlen " << load.queue_len
979 << ", cpu " << load.cpu_load_avg
980 << ">";
981 }
982
983 // ================================================================
984 typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t;
985
986 // -- authority delegation --
987 // directory authority types
988 // >= 0 is the auth mds
989 #define CDIR_AUTH_PARENT mds_rank_t(-1) // default
990 #define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
991 #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
992 #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
993 //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
994
995 class MDSCacheObjectInfo {
996 public:
997 void encode(ceph::buffer::list& bl) const;
998 void decode(ceph::buffer::list::const_iterator& bl);
999 void dump(ceph::Formatter *f) const;
1000 static void generate_test_instances(std::list<MDSCacheObjectInfo*>& ls);
1001
1002 inodeno_t ino = 0;
1003 dirfrag_t dirfrag;
1004 std::string dname;
1005 snapid_t snapid;
1006 };
1007
1008 inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) {
1009 if (info.ino) return out << info.ino << "." << info.snapid;
1010 if (info.dname.length()) return out << info.dirfrag << "/" << info.dname
1011 << " snap " << info.snapid;
1012 return out << info.dirfrag;
1013 }
1014
1015 inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) {
1016 if (l.ino || r.ino)
1017 return l.ino == r.ino && l.snapid == r.snapid;
1018 else
1019 return l.dirfrag == r.dirfrag && l.dname == r.dname;
1020 }
1021 WRITE_CLASS_ENCODER(MDSCacheObjectInfo)
1022
1023 #endif