]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/mdstypes.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / mds / mdstypes.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#ifndef CEPH_MDSTYPES_H
4#define CEPH_MDSTYPES_H
5
6#include "include/int_types.h"
7
7c673cae
FG
8#include <ostream>
9#include <set>
10#include <map>
11fdf7f2 11#include <string_view>
7c673cae
FG
12
13#include "common/config.h"
14#include "common/Clock.h"
15#include "common/DecayCounter.h"
f67539c2 16#include "common/StackStringStream.h"
7c673cae
FG
17#include "common/entity_name.h"
18
f67539c2 19#include "include/compat.h"
7c673cae
FG
20#include "include/Context.h"
21#include "include/frag.h"
22#include "include/xlist.h"
23#include "include/interval_set.h"
7c673cae
FG
24#include "include/compact_set.h"
25#include "include/fs_types.h"
b3b6e05e 26#include "include/ceph_fs.h"
7c673cae
FG
27
28#include "inode_backtrace.h"
29
30#include <boost/spirit/include/qi.hpp>
31#include <boost/pool/pool.hpp>
11fdf7f2 32#include "include/ceph_assert.h"
f67539c2 33#include "common/ceph_json.h"
1e59de90 34#include "include/cephfs/types.h"
7c673cae
FG
35
36#define MDS_PORT_CACHE 0x200
37#define MDS_PORT_LOCKER 0x300
38#define MDS_PORT_MIGRATOR 0x400
39
7c673cae
FG
40#define NUM_STRAY 10
41
b3b6e05e 42// Inode numbers 1,2 and 4 please see CEPH_INO_* in include/ceph_fs.h
11fdf7f2 43
7c673cae
FG
44#define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
45#define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
46
47// Locations for journal data
48#define MDS_INO_LOG_OFFSET (2*MAX_MDS)
49#define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
50#define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
51#define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
52
53#define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
54
55#define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
56#define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
57
58#define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
59#define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
60#define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
b3b6e05e 61#define MDS_INO_IS_BASE(i) ((i) == CEPH_INO_ROOT || (i) == CEPH_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
7c673cae
FG
62#define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
63#define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
64
b3b6e05e
TL
65#define MDS_IS_PRIVATE_INO(i) ((i) < MDS_INO_SYSTEM_BASE && (i) >= MDS_INO_MDSDIR_OFFSET)
66
9f95a23c
TL
67class mds_role_t {
68public:
7c673cae
FG
69 mds_role_t(fs_cluster_id_t fscid_, mds_rank_t rank_)
70 : fscid(fscid_), rank(rank_)
71 {}
9f95a23c
TL
72 mds_role_t() {}
73
74 bool operator<(mds_role_t const &rhs) const {
7c673cae
FG
75 if (fscid < rhs.fscid) {
76 return true;
77 } else if (fscid == rhs.fscid) {
78 return rank < rhs.rank;
79 } else {
80 return false;
81 }
82 }
83
9f95a23c 84 bool is_none() const {
7c673cae
FG
85 return (rank == MDS_RANK_NONE);
86 }
7c673cae 87
9f95a23c
TL
88 fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
89 mds_rank_t rank = MDS_RANK_NONE;
90};
91inline std::ostream& operator<<(std::ostream& out, const mds_role_t& role) {
92 return out << role.fscid << ":" << role.rank;
93}
7c673cae
FG
94
95// CAPS
f67539c2 96inline std::string gcap_string(int cap)
7c673cae 97{
f67539c2 98 std::string s;
7c673cae
FG
99 if (cap & CEPH_CAP_GSHARED) s += "s";
100 if (cap & CEPH_CAP_GEXCL) s += "x";
101 if (cap & CEPH_CAP_GCACHE) s += "c";
102 if (cap & CEPH_CAP_GRD) s += "r";
103 if (cap & CEPH_CAP_GWR) s += "w";
104 if (cap & CEPH_CAP_GBUFFER) s += "b";
105 if (cap & CEPH_CAP_GWREXTEND) s += "a";
106 if (cap & CEPH_CAP_GLAZYIO) s += "l";
107 return s;
108}
f67539c2 109inline std::string ccap_string(int cap)
7c673cae 110{
f67539c2 111 std::string s;
7c673cae
FG
112 if (cap & CEPH_CAP_PIN) s += "p";
113
114 int a = (cap >> CEPH_CAP_SAUTH) & 3;
115 if (a) s += 'A' + gcap_string(a);
116
117 a = (cap >> CEPH_CAP_SLINK) & 3;
118 if (a) s += 'L' + gcap_string(a);
119
120 a = (cap >> CEPH_CAP_SXATTR) & 3;
121 if (a) s += 'X' + gcap_string(a);
122
123 a = cap >> CEPH_CAP_SFILE;
124 if (a) s += 'F' + gcap_string(a);
125
126 if (s.length() == 0)
127 s = "-";
128 return s;
129}
130
7c673cae
FG
131namespace std {
132 template<> struct hash<vinodeno_t> {
133 size_t operator()(const vinodeno_t &vino) const {
134 hash<inodeno_t> H;
135 hash<uint64_t> I;
136 return H(vino.ino) ^ I(vino.snapid);
137 }
138 };
9f95a23c 139}
7c673cae
FG
140
141inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) {
142 out << vino.ino;
143 if (vino.snapid == CEPH_NOSNAP)
144 out << ".head";
145 else if (vino.snapid)
146 out << '.' << vino.snapid;
147 return out;
148}
149
7c673cae
FG
150typedef uint32_t damage_flags_t;
151
94b18763
FG
152template<template<typename> class Allocator>
153using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>;
154
155template<template<typename> class Allocator>
f67539c2
TL
156using xattr_map = std::map<alloc_string<Allocator>,
157 ceph::bufferptr,
158 std::less<alloc_string<Allocator>>,
159 Allocator<std::pair<const alloc_string<Allocator>,
160 ceph::bufferptr>>>; // FIXME bufferptr not in mempool
7c673cae 161
e306af50
TL
162template<template<typename> class Allocator>
163inline void decode_noshare(xattr_map<Allocator>& xattrs, ceph::buffer::list::const_iterator &p)
164{
165 __u32 n;
166 decode(n, p);
167 while (n-- > 0) {
168 alloc_string<Allocator> key;
169 decode(key, p);
170 __u32 len;
171 decode(len, p);
172 p.copy_deep(len, xattrs[key]);
173 }
174}
175
94b18763 176template<template<typename> class Allocator = std::allocator>
7c673cae
FG
177struct old_inode_t {
178 snapid_t first;
94b18763
FG
179 inode_t<Allocator> inode;
180 xattr_map<Allocator> xattrs;
7c673cae 181
f67539c2
TL
182 void encode(ceph::buffer::list &bl, uint64_t features) const;
183 void decode(ceph::buffer::list::const_iterator& bl);
184 void dump(ceph::Formatter *f) const;
94b18763 185 static void generate_test_instances(std::list<old_inode_t*>& ls);
7c673cae 186};
94b18763
FG
187
188// These methods may be moved back to mdstypes.cc when we have pmr
189template<template<typename> class Allocator>
f67539c2 190void old_inode_t<Allocator>::encode(ceph::buffer::list& bl, uint64_t features) const
94b18763
FG
191{
192 ENCODE_START(2, 2, bl);
11fdf7f2
TL
193 encode(first, bl);
194 encode(inode, bl, features);
195 encode(xattrs, bl);
94b18763
FG
196 ENCODE_FINISH(bl);
197}
198
199template<template<typename> class Allocator>
f67539c2 200void old_inode_t<Allocator>::decode(ceph::buffer::list::const_iterator& bl)
94b18763
FG
201{
202 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
11fdf7f2
TL
203 decode(first, bl);
204 decode(inode, bl);
e306af50 205 decode_noshare<Allocator>(xattrs, bl);
94b18763
FG
206 DECODE_FINISH(bl);
207}
208
209template<template<typename> class Allocator>
f67539c2 210void old_inode_t<Allocator>::dump(ceph::Formatter *f) const
94b18763
FG
211{
212 f->dump_unsigned("first", first);
213 inode.dump(f);
214 f->open_object_section("xattrs");
215 for (const auto &p : xattrs) {
216 std::string v(p.second.c_str(), p.second.length());
217 f->dump_string(p.first.c_str(), v);
218 }
219 f->close_section();
220}
221
222template<template<typename> class Allocator>
223void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls)
224{
225 ls.push_back(new old_inode_t<Allocator>);
226 ls.push_back(new old_inode_t<Allocator>);
227 ls.back()->first = 2;
228 std::list<inode_t<Allocator>*> ils;
229 inode_t<Allocator>::generate_test_instances(ils);
230 ls.back()->inode = *ils.back();
f67539c2
TL
231 ls.back()->xattrs["user.foo"] = ceph::buffer::copy("asdf", 4);
232 ls.back()->xattrs["user.unprintable"] = ceph::buffer::copy("\000\001\002", 3);
94b18763
FG
233}
234
235template<template<typename> class Allocator>
f67539c2 236inline void encode(const old_inode_t<Allocator> &c, ::ceph::buffer::list &bl, uint64_t features)
94b18763
FG
237{
238 ENCODE_DUMP_PRE();
239 c.encode(bl, features);
240 ENCODE_DUMP_POST(cl);
241}
242template<template<typename> class Allocator>
f67539c2 243inline void decode(old_inode_t<Allocator> &c, ::ceph::buffer::list::const_iterator &p)
94b18763
FG
244{
245 c.decode(p);
246}
7c673cae 247
7c673cae
FG
248/*
249 * like an inode, but for a dir frag
250 */
251struct fnode_t {
f67539c2
TL
252 void encode(ceph::buffer::list &bl) const;
253 void decode(ceph::buffer::list::const_iterator& bl);
254 void dump(ceph::Formatter *f) const;
255 void decode_json(JSONObj *obj);
9f95a23c
TL
256 static void generate_test_instances(std::list<fnode_t*>& ls);
257
94b18763 258 version_t version = 0;
7c673cae
FG
259 snapid_t snap_purged_thru; // the max_last_destroy snapid we've been purged thru
260 frag_info_t fragstat, accounted_fragstat;
261 nest_info_t rstat, accounted_rstat;
94b18763 262 damage_flags_t damage_flags = 0;
7c673cae
FG
263
264 // we know we and all our descendants have been scrubbed since this version
94b18763 265 version_t recursive_scrub_version = 0;
7c673cae
FG
266 utime_t recursive_scrub_stamp;
267 // version at which we last scrubbed our personal data structures
94b18763 268 version_t localized_scrub_version = 0;
7c673cae 269 utime_t localized_scrub_stamp;
7c673cae
FG
270};
271WRITE_CLASS_ENCODER(fnode_t)
272
273
274struct old_rstat_t {
f67539c2
TL
275 void encode(ceph::buffer::list& bl) const;
276 void decode(ceph::buffer::list::const_iterator& p);
277 void dump(ceph::Formatter *f) const;
9f95a23c
TL
278 static void generate_test_instances(std::list<old_rstat_t*>& ls);
279
280 snapid_t first;
281 nest_info_t rstat, accounted_rstat;
7c673cae
FG
282};
283WRITE_CLASS_ENCODER(old_rstat_t)
284
285inline std::ostream& operator<<(std::ostream& out, const old_rstat_t& o) {
286 return out << "old_rstat(first " << o.first << " " << o.rstat << " " << o.accounted_rstat << ")";
287}
288
11fdf7f2
TL
289class feature_bitset_t {
290public:
291 typedef uint64_t block_type;
292 static const size_t bits_per_block = sizeof(block_type) * 8;
293
294 feature_bitset_t(const feature_bitset_t& other) : _vec(other._vec) {}
295 feature_bitset_t(feature_bitset_t&& other) : _vec(std::move(other._vec)) {}
296 feature_bitset_t(unsigned long value = 0);
f67539c2 297 feature_bitset_t(const std::vector<size_t>& array);
11fdf7f2
TL
298 feature_bitset_t& operator=(const feature_bitset_t& other) {
299 _vec = other._vec;
300 return *this;
301 }
302 feature_bitset_t& operator=(feature_bitset_t&& other) {
303 _vec = std::move(other._vec);
304 return *this;
305 }
9f95a23c 306 feature_bitset_t& operator-=(const feature_bitset_t& other);
11fdf7f2 307 bool empty() const {
9f95a23c 308 //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty"
11fdf7f2
TL
309 for (auto& v : _vec) {
310 if (v)
311 return false;
312 }
313 return true;
314 }
315 bool test(size_t bit) const {
316 if (bit >= bits_per_block * _vec.size())
317 return false;
318 return _vec[bit / bits_per_block] & ((block_type)1 << (bit % bits_per_block));
319 }
f67539c2
TL
320 void insert(size_t bit) {
321 size_t n = bit / bits_per_block;
322 if (n >= _vec.size())
323 _vec.resize(n + 1);
324 _vec[n] |= ((block_type)1 << (bit % bits_per_block));
325 }
326 void erase(size_t bit) {
327 size_t n = bit / bits_per_block;
328 if (n >= _vec.size())
329 return;
330 _vec[n] &= ~((block_type)1 << (bit % bits_per_block));
331 if (n + 1 == _vec.size()) {
332 while (!_vec.empty() && _vec.back() == 0)
333 _vec.pop_back();
334 }
335 }
11fdf7f2
TL
336 void clear() {
337 _vec.clear();
338 }
f67539c2
TL
339 bool operator==(const feature_bitset_t& other) const {
340 return _vec == other._vec;
341 }
342 bool operator!=(const feature_bitset_t& other) const {
343 return _vec != other._vec;
344 }
345 void encode(ceph::buffer::list& bl) const;
346 void decode(ceph::buffer::list::const_iterator &p);
347 void dump(ceph::Formatter *f) const;
348 void print(std::ostream& out) const;
11fdf7f2 349private:
f67539c2 350 std::vector<block_type> _vec;
11fdf7f2
TL
351};
352WRITE_CLASS_ENCODER(feature_bitset_t)
353
354inline std::ostream& operator<<(std::ostream& out, const feature_bitset_t& s) {
355 s.print(out);
356 return out;
357}
358
9f95a23c
TL
359struct metric_spec_t {
360 metric_spec_t() {}
361 metric_spec_t(const metric_spec_t& other) :
362 metric_flags(other.metric_flags) {}
363 metric_spec_t(metric_spec_t&& other) :
364 metric_flags(std::move(other.metric_flags)) {}
365 metric_spec_t(const feature_bitset_t& mf) :
366 metric_flags(mf) {}
367 metric_spec_t(feature_bitset_t&& mf) :
368 metric_flags(std::move(mf)) {}
369
370 metric_spec_t& operator=(const metric_spec_t& other) {
371 metric_flags = other.metric_flags;
372 return *this;
373 }
374 metric_spec_t& operator=(metric_spec_t&& other) {
375 metric_flags = std::move(other.metric_flags);
376 return *this;
377 }
378
379 bool empty() const {
380 return metric_flags.empty();
381 }
382
383 void clear() {
384 metric_flags.clear();
385 }
386
f67539c2
TL
387 void encode(ceph::buffer::list& bl) const;
388 void decode(ceph::buffer::list::const_iterator& p);
389 void dump(ceph::Formatter *f) const;
390 void print(std::ostream& out) const;
9f95a23c
TL
391
392 // set of metrics that a client is capable of forwarding
393 feature_bitset_t metric_flags;
394};
395WRITE_CLASS_ENCODER(metric_spec_t)
396
397inline std::ostream& operator<<(std::ostream& out, const metric_spec_t& mst) {
398 mst.print(out);
399 return out;
400}
401
11fdf7f2
TL
402/*
403 * client_metadata_t
404 */
405struct client_metadata_t {
406 using kv_map_t = std::map<std::string,std::string>;
407 using iterator = kv_map_t::const_iterator;
408
11fdf7f2 409 client_metadata_t() {}
9f95a23c
TL
410 client_metadata_t(const kv_map_t& kv, const feature_bitset_t &f, const metric_spec_t &mst) :
411 kv_map(kv),
412 features(f),
413 metric_spec(mst) {}
11fdf7f2
TL
414 client_metadata_t& operator=(const client_metadata_t& other) {
415 kv_map = other.kv_map;
416 features = other.features;
9f95a23c 417 metric_spec = other.metric_spec;
11fdf7f2
TL
418 return *this;
419 }
420
9f95a23c 421 bool empty() const { return kv_map.empty() && features.empty() && metric_spec.empty(); }
11fdf7f2
TL
422 iterator find(const std::string& key) const { return kv_map.find(key); }
423 iterator begin() const { return kv_map.begin(); }
424 iterator end() const { return kv_map.end(); }
92f5a8d4 425 void erase(iterator it) { kv_map.erase(it); }
11fdf7f2
TL
426 std::string& operator[](const std::string& key) { return kv_map[key]; }
427 void merge(const client_metadata_t& other) {
428 kv_map.insert(other.kv_map.begin(), other.kv_map.end());
429 features = other.features;
9f95a23c 430 metric_spec = other.metric_spec;
11fdf7f2
TL
431 }
432 void clear() {
433 kv_map.clear();
434 features.clear();
9f95a23c 435 metric_spec.clear();
11fdf7f2
TL
436 }
437
f67539c2
TL
438 void encode(ceph::buffer::list& bl) const;
439 void decode(ceph::buffer::list::const_iterator& p);
440 void dump(ceph::Formatter *f) const;
9f95a23c
TL
441
442 kv_map_t kv_map;
443 feature_bitset_t features;
444 metric_spec_t metric_spec;
11fdf7f2
TL
445};
446WRITE_CLASS_ENCODER(client_metadata_t)
7c673cae
FG
447
448/*
9f95a23c 449 * session_info_t - durable part of a Session
7c673cae 450 */
7c673cae 451struct session_info_t {
7c673cae 452 client_t get_client() const { return client_t(inst.name.num()); }
11fdf7f2 453 bool has_feature(size_t bit) const { return client_metadata.features.test(bit); }
7c673cae
FG
454 const entity_name_t& get_source() const { return inst.name; }
455
456 void clear_meta() {
457 prealloc_inos.clear();
7c673cae
FG
458 completed_requests.clear();
459 completed_flushes.clear();
11fdf7f2 460 client_metadata.clear();
7c673cae
FG
461 }
462
f67539c2
TL
463 void encode(ceph::buffer::list& bl, uint64_t features) const;
464 void decode(ceph::buffer::list::const_iterator& p);
465 void dump(ceph::Formatter *f) const;
9f95a23c
TL
466 static void generate_test_instances(std::list<session_info_t*>& ls);
467
468 entity_inst_t inst;
469 std::map<ceph_tid_t,inodeno_t> completed_requests;
470 interval_set<inodeno_t> prealloc_inos; // preallocated, ready to use.
9f95a23c
TL
471 client_metadata_t client_metadata;
472 std::set<ceph_tid_t> completed_flushes;
473 EntityName auth_name;
7c673cae
FG
474};
475WRITE_CLASS_ENCODER_FEATURES(session_info_t)
476
7c673cae 477// dentries
7c673cae 478struct dentry_key_t {
94b18763 479 dentry_key_t() {}
11fdf7f2 480 dentry_key_t(snapid_t s, std::string_view n, __u32 h=0) :
7c673cae
FG
481 snapid(s), name(n), hash(h) {}
482
94b18763 483 bool is_valid() { return name.length() || snapid; }
7c673cae
FG
484
485 // encode into something that can be decoded as a string.
486 // name_ (head) or name_%x (!head)
f67539c2
TL
487 void encode(ceph::buffer::list& bl) const {
488 std::string key;
7c673cae 489 encode(key);
11fdf7f2
TL
490 using ceph::encode;
491 encode(key, bl);
7c673cae 492 }
f67539c2 493 void encode(std::string& key) const {
7c673cae
FG
494 char b[20];
495 if (snapid != CEPH_NOSNAP) {
496 uint64_t val(snapid);
497 snprintf(b, sizeof(b), "%" PRIx64, val);
498 } else {
499 snprintf(b, sizeof(b), "%s", "head");
500 }
f67539c2
TL
501 CachedStackStringStream css;
502 *css << name << "_" << b;
503 key = css->strv();
7c673cae 504 }
f67539c2
TL
505 static void decode_helper(ceph::buffer::list::const_iterator& bl, std::string& nm,
506 snapid_t& sn) {
507 std::string key;
508 using ceph::decode;
11fdf7f2 509 decode(key, bl);
7c673cae
FG
510 decode_helper(key, nm, sn);
511 }
f67539c2 512 static void decode_helper(std::string_view key, std::string& nm, snapid_t& sn) {
7c673cae 513 size_t i = key.find_last_of('_');
f67539c2 514 ceph_assert(i != std::string::npos);
11fdf7f2 515 if (key.compare(i+1, std::string_view::npos, "head") == 0) {
7c673cae
FG
516 // name_head
517 sn = CEPH_NOSNAP;
518 } else {
519 // name_%x
520 long long unsigned x = 0;
94b18763
FG
521 std::string x_str(key.substr(i+1));
522 sscanf(x_str.c_str(), "%llx", &x);
7c673cae 523 sn = x;
9f95a23c 524 }
11fdf7f2 525 nm = key.substr(0, i);
7c673cae 526 }
9f95a23c
TL
527
528 snapid_t snapid = 0;
529 std::string_view name;
530 __u32 hash = 0;
7c673cae
FG
531};
532
533inline std::ostream& operator<<(std::ostream& out, const dentry_key_t &k)
534{
535 return out << "(" << k.name << "," << k.snapid << ")";
536}
537
538inline bool operator<(const dentry_key_t& k1, const dentry_key_t& k2)
539{
540 /*
541 * order by hash, name, snap
542 */
543 int c = ceph_frag_value(k1.hash) - ceph_frag_value(k2.hash);
544 if (c)
545 return c < 0;
94b18763 546 c = k1.name.compare(k2.name);
7c673cae
FG
547 if (c)
548 return c < 0;
549 return k1.snapid < k2.snapid;
550}
551
7c673cae
FG
552/*
553 * string_snap_t is a simple (string, snapid_t) pair
554 */
555struct string_snap_t {
7c673cae 556 string_snap_t() {}
11fdf7f2 557 string_snap_t(std::string_view n, snapid_t s) : name(n), snapid(s) {}
7c673cae 558
1e59de90
TL
559 int compare(const string_snap_t& r) const {
560 int ret = name.compare(r.name);
561 if (ret)
562 return ret;
563 if (snapid == r.snapid)
564 return 0;
565 return snapid > r.snapid ? 1 : -1;
566 }
567
f67539c2
TL
568 void encode(ceph::buffer::list& bl) const;
569 void decode(ceph::buffer::list::const_iterator& p);
570 void dump(ceph::Formatter *f) const;
9f95a23c
TL
571 static void generate_test_instances(std::list<string_snap_t*>& ls);
572
f67539c2 573 std::string name;
9f95a23c 574 snapid_t snapid;
7c673cae
FG
575};
576WRITE_CLASS_ENCODER(string_snap_t)
577
1e59de90
TL
578inline bool operator==(const string_snap_t& l, const string_snap_t& r) {
579 return l.name == r.name && l.snapid == r.snapid;
580}
581
7c673cae 582inline bool operator<(const string_snap_t& l, const string_snap_t& r) {
94b18763 583 int c = l.name.compare(r.name);
7c673cae
FG
584 return c < 0 || (c == 0 && l.snapid < r.snapid);
585}
586
587inline std::ostream& operator<<(std::ostream& out, const string_snap_t &k)
588{
589 return out << "(" << k.name << "," << k.snapid << ")";
590}
591
592/*
593 * mds_table_pending_t
594 *
9f95a23c 595 * For mds's requesting any pending ops, child needs to encode the corresponding
7c673cae
FG
596 * pending mutation state in the table.
597 */
598struct mds_table_pending_t {
f67539c2
TL
599 void encode(ceph::buffer::list& bl) const;
600 void decode(ceph::buffer::list::const_iterator& bl);
601 void dump(ceph::Formatter *f) const;
9f95a23c
TL
602 static void generate_test_instances(std::list<mds_table_pending_t*>& ls);
603
604 uint64_t reqid = 0;
605 __s32 mds = 0;
606 version_t tid = 0;
7c673cae
FG
607};
608WRITE_CLASS_ENCODER(mds_table_pending_t)
609
7c673cae 610// requests
7c673cae 611struct metareqid_t {
94b18763 612 metareqid_t() {}
7c673cae 613 metareqid_t(entity_name_t n, ceph_tid_t t) : name(n), tid(t) {}
f67539c2 614 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
615 using ceph::encode;
616 encode(name, bl);
617 encode(tid, bl);
7c673cae 618 }
f67539c2 619 void decode(ceph::buffer::list::const_iterator &p) {
11fdf7f2
TL
620 using ceph::decode;
621 decode(name, p);
622 decode(tid, p);
7c673cae 623 }
9f95a23c
TL
624
625 entity_name_t name;
626 uint64_t tid = 0;
7c673cae
FG
627};
628WRITE_CLASS_ENCODER(metareqid_t)
629
630inline std::ostream& operator<<(std::ostream& out, const metareqid_t& r) {
631 return out << r.name << ":" << r.tid;
632}
633
634inline bool operator==(const metareqid_t& l, const metareqid_t& r) {
635 return (l.name == r.name) && (l.tid == r.tid);
636}
637inline bool operator!=(const metareqid_t& l, const metareqid_t& r) {
638 return (l.name != r.name) || (l.tid != r.tid);
639}
640inline bool operator<(const metareqid_t& l, const metareqid_t& r) {
641 return (l.name < r.name) ||
642 (l.name == r.name && l.tid < r.tid);
643}
644inline bool operator<=(const metareqid_t& l, const metareqid_t& r) {
645 return (l.name < r.name) ||
646 (l.name == r.name && l.tid <= r.tid);
647}
648inline bool operator>(const metareqid_t& l, const metareqid_t& r) { return !(l <= r); }
649inline bool operator>=(const metareqid_t& l, const metareqid_t& r) { return !(l < r); }
650
651namespace std {
652 template<> struct hash<metareqid_t> {
653 size_t operator()(const metareqid_t &r) const {
654 hash<uint64_t> H;
655 return H(r.name.num()) ^ H(r.name.type()) ^ H(r.tid);
656 }
657 };
658} // namespace std
659
7c673cae
FG
660// cap info for client reconnect
661struct cap_reconnect_t {
9f95a23c 662 cap_reconnect_t() {}
11fdf7f2 663 cap_reconnect_t(uint64_t cap_id, inodeno_t pino, std::string_view p, int w, int i,
f67539c2 664 inodeno_t sr, snapid_t sf, ceph::buffer::list& lb) :
7c673cae
FG
665 path(p) {
666 capinfo.cap_id = cap_id;
667 capinfo.wanted = w;
668 capinfo.issued = i;
669 capinfo.snaprealm = sr;
670 capinfo.pathbase = pino;
671 capinfo.flock_len = 0;
672 snap_follows = sf;
f67539c2 673 flockbl = std::move(lb);
7c673cae 674 }
f67539c2
TL
675 void encode(ceph::buffer::list& bl) const;
676 void decode(ceph::buffer::list::const_iterator& bl);
677 void encode_old(ceph::buffer::list& bl) const;
678 void decode_old(ceph::buffer::list::const_iterator& bl);
7c673cae 679
f67539c2 680 void dump(ceph::Formatter *f) const;
9f95a23c
TL
681 static void generate_test_instances(std::list<cap_reconnect_t*>& ls);
682
f67539c2 683 std::string path;
9f95a23c
TL
684 mutable ceph_mds_cap_reconnect capinfo = {};
685 snapid_t snap_follows = 0;
f67539c2 686 ceph::buffer::list flockbl;
7c673cae
FG
687};
688WRITE_CLASS_ENCODER(cap_reconnect_t)
689
11fdf7f2 690struct snaprealm_reconnect_t {
9f95a23c 691 snaprealm_reconnect_t() {}
11fdf7f2
TL
692 snaprealm_reconnect_t(inodeno_t ino, snapid_t seq, inodeno_t parent) {
693 realm.ino = ino;
694 realm.seq = seq;
695 realm.parent = parent;
696 }
f67539c2
TL
697 void encode(ceph::buffer::list& bl) const;
698 void decode(ceph::buffer::list::const_iterator& bl);
699 void encode_old(ceph::buffer::list& bl) const;
700 void decode_old(ceph::buffer::list::const_iterator& bl);
11fdf7f2 701
f67539c2 702 void dump(ceph::Formatter *f) const;
9f95a23c
TL
703 static void generate_test_instances(std::list<snaprealm_reconnect_t*>& ls);
704
705 mutable ceph_mds_snaprealm_reconnect realm = {};
11fdf7f2
TL
706};
707WRITE_CLASS_ENCODER(snaprealm_reconnect_t)
7c673cae
FG
708
709// compat for pre-FLOCK feature
710struct old_ceph_mds_cap_reconnect {
eafe8130
TL
711 ceph_le64 cap_id;
712 ceph_le32 wanted;
713 ceph_le32 issued;
714 ceph_le64 old_size;
7c673cae 715 struct ceph_timespec old_mtime, old_atime;
eafe8130
TL
716 ceph_le64 snaprealm;
717 ceph_le64 pathbase; /* base ino for our path to this ino */
7c673cae
FG
718} __attribute__ ((packed));
719WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect)
720
721struct old_cap_reconnect_t {
7c673cae
FG
722 const old_cap_reconnect_t& operator=(const cap_reconnect_t& n) {
723 path = n.path;
724 capinfo.cap_id = n.capinfo.cap_id;
725 capinfo.wanted = n.capinfo.wanted;
726 capinfo.issued = n.capinfo.issued;
727 capinfo.snaprealm = n.capinfo.snaprealm;
728 capinfo.pathbase = n.capinfo.pathbase;
729 return *this;
730 }
731 operator cap_reconnect_t() {
732 cap_reconnect_t n;
733 n.path = path;
734 n.capinfo.cap_id = capinfo.cap_id;
735 n.capinfo.wanted = capinfo.wanted;
736 n.capinfo.issued = capinfo.issued;
737 n.capinfo.snaprealm = capinfo.snaprealm;
738 n.capinfo.pathbase = capinfo.pathbase;
739 return n;
740 }
741
f67539c2 742 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
743 using ceph::encode;
744 encode(path, bl);
745 encode(capinfo, bl);
7c673cae 746 }
f67539c2 747 void decode(ceph::buffer::list::const_iterator& bl) {
11fdf7f2
TL
748 using ceph::decode;
749 decode(path, bl);
750 decode(capinfo, bl);
7c673cae 751 }
9f95a23c 752
f67539c2 753 std::string path;
9f95a23c 754 old_ceph_mds_cap_reconnect capinfo;
7c673cae
FG
755};
756WRITE_CLASS_ENCODER(old_cap_reconnect_t)
757
7c673cae 758// dir frag
7c673cae 759struct dirfrag_t {
94b18763 760 dirfrag_t() {}
7c673cae
FG
761 dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { }
762
f67539c2 763 void encode(ceph::buffer::list& bl) const {
11fdf7f2
TL
764 using ceph::encode;
765 encode(ino, bl);
766 encode(frag, bl);
7c673cae 767 }
f67539c2 768 void decode(ceph::buffer::list::const_iterator& bl) {
11fdf7f2
TL
769 using ceph::decode;
770 decode(ino, bl);
771 decode(frag, bl);
7c673cae 772 }
9f95a23c
TL
773
774 inodeno_t ino = 0;
775 frag_t frag;
7c673cae
FG
776};
777WRITE_CLASS_ENCODER(dirfrag_t)
778
7c673cae
FG
779inline std::ostream& operator<<(std::ostream& out, const dirfrag_t &df) {
780 out << df.ino;
781 if (!df.frag.is_root()) out << "." << df.frag;
782 return out;
783}
784inline bool operator<(dirfrag_t l, dirfrag_t r) {
785 if (l.ino < r.ino) return true;
786 if (l.ino == r.ino && l.frag < r.frag) return true;
787 return false;
788}
789inline bool operator==(dirfrag_t l, dirfrag_t r) {
790 return l.ino == r.ino && l.frag == r.frag;
791}
792
793namespace std {
794 template<> struct hash<dirfrag_t> {
795 size_t operator()(const dirfrag_t &df) const {
796 static rjhash<uint64_t> H;
797 static rjhash<uint32_t> I;
798 return H(df.ino) ^ I(df.frag);
799 }
800 };
801} // namespace std
802
7c673cae 803// ================================================================
7c673cae
FG
804#define META_POP_IRD 0
805#define META_POP_IWR 1
806#define META_POP_READDIR 2
807#define META_POP_FETCH 3
808#define META_POP_STORE 4
809#define META_NPOP 5
810
811class inode_load_vec_t {
7c673cae 812public:
11fdf7f2
TL
813 using time = DecayCounter::time;
814 using clock = DecayCounter::clock;
815 static const size_t NUM = 2;
816
817 inode_load_vec_t() : vec{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
818 inode_load_vec_t(const DecayRate &rate) : vec{DecayCounter(rate), DecayCounter(rate)} {}
819
f67539c2
TL
820 DecayCounter &get(int t) {
821 return vec[t];
7c673cae 822 }
11fdf7f2
TL
823 void zero() {
824 for (auto &d : vec) {
825 d.reset();
826 }
7c673cae 827 }
f67539c2
TL
828 void encode(ceph::buffer::list &bl) const;
829 void decode(ceph::buffer::list::const_iterator& p);
830 void dump(ceph::Formatter *f) const;
9f95a23c 831 static void generate_test_instances(std::list<inode_load_vec_t*>& ls);
11fdf7f2
TL
832
833private:
834 std::array<DecayCounter, NUM> vec;
7c673cae 835};
f67539c2 836inline void encode(const inode_load_vec_t &c, ceph::buffer::list &bl) {
11fdf7f2 837 c.encode(bl);
7c673cae 838}
f67539c2 839inline void decode(inode_load_vec_t & c, ceph::buffer::list::const_iterator &p) {
11fdf7f2 840 c.decode(p);
7c673cae
FG
841}
842
843class dirfrag_load_vec_t {
844public:
11fdf7f2
TL
845 using time = DecayCounter::time;
846 using clock = DecayCounter::clock;
847 static const size_t NUM = 5;
848
849 dirfrag_load_vec_t() :
850 vec{DecayCounter(DecayRate()),
851 DecayCounter(DecayRate()),
852 DecayCounter(DecayRate()),
853 DecayCounter(DecayRate()),
854 DecayCounter(DecayRate())
855 }
7c673cae 856 {}
11fdf7f2
TL
857 dirfrag_load_vec_t(const DecayRate &rate) :
858 vec{DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate), DecayCounter(rate)}
859 {}
860
f67539c2 861 void encode(ceph::buffer::list &bl) const {
7c673cae 862 ENCODE_START(2, 2, bl);
94b18763 863 for (const auto &i : vec) {
11fdf7f2 864 encode(i, bl);
94b18763 865 }
7c673cae
FG
866 ENCODE_FINISH(bl);
867 }
f67539c2 868 void decode(ceph::buffer::list::const_iterator &p) {
7c673cae 869 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p);
94b18763 870 for (auto &i : vec) {
11fdf7f2 871 decode(i, p);
94b18763 872 }
7c673cae
FG
873 DECODE_FINISH(p);
874 }
f67539c2
TL
875 void dump(ceph::Formatter *f) const;
876 void dump(ceph::Formatter *f, const DecayRate& rate) const;
11fdf7f2 877 static void generate_test_instances(std::list<dirfrag_load_vec_t*>& ls);
7c673cae 878
11fdf7f2
TL
879 const DecayCounter &get(int t) const {
880 return vec[t];
7c673cae 881 }
11fdf7f2
TL
882 DecayCounter &get(int t) {
883 return vec[t];
884 }
885 void adjust(double d) {
94b18763 886 for (auto &i : vec) {
11fdf7f2 887 i.adjust(d);
94b18763 888 }
7c673cae 889 }
11fdf7f2 890 void zero() {
94b18763 891 for (auto &i : vec) {
11fdf7f2 892 i.reset();
94b18763 893 }
7c673cae 894 }
28e407b8 895 double meta_load() const {
7c673cae 896 return
11fdf7f2
TL
897 1*vec[META_POP_IRD].get() +
898 2*vec[META_POP_IWR].get() +
899 1*vec[META_POP_READDIR].get() +
900 2*vec[META_POP_FETCH].get() +
901 4*vec[META_POP_STORE].get();
7c673cae
FG
902 }
903
11fdf7f2
TL
904 void add(dirfrag_load_vec_t& r) {
905 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
906 vec[i].adjust(r.vec[i].get());
7c673cae 907 }
11fdf7f2
TL
908 void sub(dirfrag_load_vec_t& r) {
909 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
910 vec[i].adjust(-r.vec[i].get());
7c673cae
FG
911 }
912 void scale(double f) {
11fdf7f2 913 for (size_t i=0; i<dirfrag_load_vec_t::NUM; i++)
7c673cae
FG
914 vec[i].scale(f);
915 }
11fdf7f2
TL
916
917private:
918 friend inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl);
919 std::array<DecayCounter, NUM> vec;
7c673cae
FG
920};
921
f67539c2 922inline void encode(const dirfrag_load_vec_t &c, ceph::buffer::list &bl) {
11fdf7f2 923 c.encode(bl);
7c673cae 924}
f67539c2 925inline void decode(dirfrag_load_vec_t& c, ceph::buffer::list::const_iterator &p) {
11fdf7f2 926 c.decode(p);
7c673cae
FG
927}
928
28e407b8 929inline std::ostream& operator<<(std::ostream& out, const dirfrag_load_vec_t& dl)
7c673cae 930{
f67539c2
TL
931 CachedStackStringStream css;
932 *css << std::setprecision(1) << std::fixed
11fdf7f2
TL
933 << "[pop"
934 " IRD:" << dl.vec[0]
935 << " IWR:" << dl.vec[1]
936 << " RDR:" << dl.vec[2]
937 << " FET:" << dl.vec[3]
938 << " STR:" << dl.vec[4]
939 << " *LOAD:" << dl.meta_load() << "]";
1e59de90 940 return out << css->strv();
7c673cae
FG
941}
942
7c673cae 943struct mds_load_t {
11fdf7f2
TL
944 using clock = dirfrag_load_vec_t::clock;
945 using time = dirfrag_load_vec_t::time;
946
7c673cae
FG
947 dirfrag_load_vec_t auth;
948 dirfrag_load_vec_t all;
949
11fdf7f2
TL
950 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
951 mds_load_t(const DecayRate &rate) : auth(rate), all(rate) {}
952
94b18763
FG
953 double req_rate = 0.0;
954 double cache_hit_rate = 0.0;
955 double queue_len = 0.0;
7c673cae 956
94b18763 957 double cpu_load_avg = 0.0;
7c673cae 958
11fdf7f2 959 double mds_load() const; // defiend in MDBalancer.cc
f67539c2
TL
960 void encode(ceph::buffer::list& bl) const;
961 void decode(ceph::buffer::list::const_iterator& bl);
962 void dump(ceph::Formatter *f) const;
11fdf7f2 963 static void generate_test_instances(std::list<mds_load_t*>& ls);
7c673cae 964};
f67539c2 965inline void encode(const mds_load_t &c, ceph::buffer::list &bl) {
11fdf7f2 966 c.encode(bl);
7c673cae 967}
f67539c2 968inline void decode(mds_load_t &c, ceph::buffer::list::const_iterator &p) {
11fdf7f2 969 c.decode(p);
7c673cae
FG
970}
971
28e407b8 972inline std::ostream& operator<<(std::ostream& out, const mds_load_t& load)
7c673cae
FG
973{
974 return out << "mdsload<" << load.auth << "/" << load.all
975 << ", req " << load.req_rate
976 << ", hr " << load.cache_hit_rate
977 << ", qlen " << load.queue_len
978 << ", cpu " << load.cpu_load_avg
979 << ">";
980}
981
7c673cae
FG
982// ================================================================
983typedef std::pair<mds_rank_t, mds_rank_t> mds_authority_t;
984
985// -- authority delegation --
986// directory authority types
987// >= 0 is the auth mds
988#define CDIR_AUTH_PARENT mds_rank_t(-1) // default
989#define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
990#define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
991#define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
992//#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
993
994class MDSCacheObjectInfo {
995public:
f67539c2
TL
996 void encode(ceph::buffer::list& bl) const;
997 void decode(ceph::buffer::list::const_iterator& bl);
998 void dump(ceph::Formatter *f) const;
9f95a23c
TL
999 static void generate_test_instances(std::list<MDSCacheObjectInfo*>& ls);
1000
94b18763 1001 inodeno_t ino = 0;
7c673cae 1002 dirfrag_t dirfrag;
f67539c2 1003 std::string dname;
7c673cae 1004 snapid_t snapid;
7c673cae
FG
1005};
1006
1007inline std::ostream& operator<<(std::ostream& out, const MDSCacheObjectInfo &info) {
1008 if (info.ino) return out << info.ino << "." << info.snapid;
1009 if (info.dname.length()) return out << info.dirfrag << "/" << info.dname
1010 << " snap " << info.snapid;
1011 return out << info.dirfrag;
1012}
1013
1014inline bool operator==(const MDSCacheObjectInfo& l, const MDSCacheObjectInfo& r) {
1015 if (l.ino || r.ino)
1016 return l.ino == r.ino && l.snapid == r.snapid;
1017 else
1018 return l.dirfrag == r.dirfrag && l.dname == r.dname;
1019}
1020WRITE_CLASS_ENCODER(MDSCacheObjectInfo)
1021
7c673cae 1022#endif