1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_MDSTYPES_H
4 #define CEPH_MDSTYPES_H
6 #include "include/int_types.h"
11 #include <string_view>
13 #include "common/config.h"
14 #include "common/Clock.h"
15 #include "common/DecayCounter.h"
16 #include "common/StackStringStream.h"
17 #include "common/entity_name.h"
19 #include "include/compat.h"
20 #include "include/Context.h"
21 #include "include/frag.h"
22 #include "include/xlist.h"
23 #include "include/interval_set.h"
24 #include "include/compact_set.h"
25 #include "include/fs_types.h"
26 #include "include/ceph_fs.h"
28 #include "inode_backtrace.h"
30 #include <boost/spirit/include/qi.hpp>
31 #include <boost/pool/pool.hpp>
32 #include "include/ceph_assert.h"
33 #include <boost/serialization/strong_typedef.hpp>
34 #include "common/ceph_json.h"
36 #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011"
38 #define MDS_PORT_CACHE 0x200
39 #define MDS_PORT_LOCKER 0x300
40 #define MDS_PORT_MIGRATOR 0x400
45 // Inode numbers 1,2 and 4 please see CEPH_INO_* in include/ceph_fs.h
47 #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
48 #define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
50 // Locations for journal data
51 #define MDS_INO_LOG_OFFSET (2*MAX_MDS)
52 #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
53 #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
54 #define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
56 #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
58 #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
59 #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
61 #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
62 #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
63 #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
64 #define MDS_INO_IS_BASE(i) ((i) == CEPH_INO_ROOT || (i) == CEPH_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
65 #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
66 #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
68 #define MDS_IS_PRIVATE_INO(i) ((i) < MDS_INO_SYSTEM_BASE && (i) >= MDS_INO_MDSDIR_OFFSET)
70 typedef int32_t mds_rank_t
;
71 constexpr mds_rank_t MDS_RANK_NONE
= -1;
72 constexpr mds_rank_t MDS_RANK_EPHEMERAL_DIST
= -2;
73 constexpr mds_rank_t MDS_RANK_EPHEMERAL_RAND
= -3;
75 BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t
)
76 extern const mds_gid_t MDS_GID_NONE
;
78 typedef int32_t fs_cluster_id_t
;
79 constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE
= -1;
81 // The namespace ID of the anonymous default filesystem from legacy systems
82 constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS
= 0;
86 mds_role_t(fs_cluster_id_t fscid_
, mds_rank_t rank_
)
87 : fscid(fscid_
), rank(rank_
)
91 bool operator<(mds_role_t
const &rhs
) const {
92 if (fscid
< rhs
.fscid
) {
94 } else if (fscid
== rhs
.fscid
) {
95 return rank
< rhs
.rank
;
101 bool is_none() const {
102 return (rank
== MDS_RANK_NONE
);
105 fs_cluster_id_t fscid
= FS_CLUSTER_ID_NONE
;
106 mds_rank_t rank
= MDS_RANK_NONE
;
108 inline std::ostream
& operator<<(std::ostream
& out
, const mds_role_t
& role
) {
109 return out
<< role
.fscid
<< ":" << role
.rank
;
113 inline std::string
gcap_string(int cap
)
116 if (cap
& CEPH_CAP_GSHARED
) s
+= "s";
117 if (cap
& CEPH_CAP_GEXCL
) s
+= "x";
118 if (cap
& CEPH_CAP_GCACHE
) s
+= "c";
119 if (cap
& CEPH_CAP_GRD
) s
+= "r";
120 if (cap
& CEPH_CAP_GWR
) s
+= "w";
121 if (cap
& CEPH_CAP_GBUFFER
) s
+= "b";
122 if (cap
& CEPH_CAP_GWREXTEND
) s
+= "a";
123 if (cap
& CEPH_CAP_GLAZYIO
) s
+= "l";
126 inline std::string
ccap_string(int cap
)
129 if (cap
& CEPH_CAP_PIN
) s
+= "p";
131 int a
= (cap
>> CEPH_CAP_SAUTH
) & 3;
132 if (a
) s
+= 'A' + gcap_string(a
);
134 a
= (cap
>> CEPH_CAP_SLINK
) & 3;
135 if (a
) s
+= 'L' + gcap_string(a
);
137 a
= (cap
>> CEPH_CAP_SXATTR
) & 3;
138 if (a
) s
+= 'X' + gcap_string(a
);
140 a
= cap
>> CEPH_CAP_SFILE
;
141 if (a
) s
+= 'F' + gcap_string(a
);
148 struct scatter_info_t
{
149 version_t version
= 0;
152 struct frag_info_t
: public scatter_info_t
{
153 int64_t size() const { return nfiles
+ nsubdirs
; }
156 *this = frag_info_t();
159 // *this += cur - acc;
160 void add_delta(const frag_info_t
&cur
, const frag_info_t
&acc
, bool *touched_mtime
=0, bool *touched_chattr
=0) {
161 if (cur
.mtime
> mtime
) {
164 *touched_mtime
= true;
166 if (cur
.change_attr
> change_attr
) {
167 change_attr
= cur
.change_attr
;
169 *touched_chattr
= true;
171 nfiles
+= cur
.nfiles
- acc
.nfiles
;
172 nsubdirs
+= cur
.nsubdirs
- acc
.nsubdirs
;
175 void add(const frag_info_t
& other
) {
176 if (other
.mtime
> mtime
)
178 if (other
.change_attr
> change_attr
)
179 change_attr
= other
.change_attr
;
180 nfiles
+= other
.nfiles
;
181 nsubdirs
+= other
.nsubdirs
;
184 bool same_sums(const frag_info_t
&o
) const {
185 return mtime
<= o
.mtime
&&
186 nfiles
== o
.nfiles
&&
187 nsubdirs
== o
.nsubdirs
;
190 void encode(ceph::buffer::list
&bl
) const;
191 void decode(ceph::buffer::list::const_iterator
& bl
);
192 void dump(ceph::Formatter
*f
) const;
193 void decode_json(JSONObj
*obj
);
194 static void generate_test_instances(std::list
<frag_info_t
*>& ls
);
198 uint64_t change_attr
= 0;
199 int64_t nfiles
= 0; // files
200 int64_t nsubdirs
= 0; // subdirs
202 WRITE_CLASS_ENCODER(frag_info_t
)
204 inline bool operator==(const frag_info_t
&l
, const frag_info_t
&r
) {
205 return memcmp(&l
, &r
, sizeof(l
)) == 0;
207 inline bool operator!=(const frag_info_t
&l
, const frag_info_t
&r
) {
211 std::ostream
& operator<<(std::ostream
&out
, const frag_info_t
&f
);
214 struct nest_info_t
: public scatter_info_t
{
215 int64_t rsize() const { return rfiles
+ rsubdirs
; }
218 *this = nest_info_t();
221 void sub(const nest_info_t
&other
) {
224 void add(const nest_info_t
&other
, int fac
=1) {
225 if (other
.rctime
> rctime
)
226 rctime
= other
.rctime
;
227 rbytes
+= fac
*other
.rbytes
;
228 rfiles
+= fac
*other
.rfiles
;
229 rsubdirs
+= fac
*other
.rsubdirs
;
230 rsnaps
+= fac
*other
.rsnaps
;
233 // *this += cur - acc;
234 void add_delta(const nest_info_t
&cur
, const nest_info_t
&acc
) {
235 if (cur
.rctime
> rctime
)
237 rbytes
+= cur
.rbytes
- acc
.rbytes
;
238 rfiles
+= cur
.rfiles
- acc
.rfiles
;
239 rsubdirs
+= cur
.rsubdirs
- acc
.rsubdirs
;
240 rsnaps
+= cur
.rsnaps
- acc
.rsnaps
;
243 bool same_sums(const nest_info_t
&o
) const {
244 return rctime
<= o
.rctime
&&
245 rbytes
== o
.rbytes
&&
246 rfiles
== o
.rfiles
&&
247 rsubdirs
== o
.rsubdirs
&&
251 void encode(ceph::buffer::list
&bl
) const;
252 void decode(ceph::buffer::list::const_iterator
& bl
);
253 void dump(ceph::Formatter
*f
) const;
254 void decode_json(JSONObj
*obj
);
255 static void generate_test_instances(std::list
<nest_info_t
*>& ls
);
257 // this frag + children
261 int64_t rsubdirs
= 0;
264 WRITE_CLASS_ENCODER(nest_info_t
)
266 inline bool operator==(const nest_info_t
&l
, const nest_info_t
&r
) {
267 return memcmp(&l
, &r
, sizeof(l
)) == 0;
269 inline bool operator!=(const nest_info_t
&l
, const nest_info_t
&r
) {
273 std::ostream
& operator<<(std::ostream
&out
, const nest_info_t
&n
);
277 vinodeno_t(inodeno_t i
, snapid_t s
) : ino(i
), snapid(s
) {}
279 void encode(ceph::buffer::list
& bl
) const {
284 void decode(ceph::buffer::list::const_iterator
& p
) {
293 WRITE_CLASS_ENCODER(vinodeno_t
)
295 inline bool operator==(const vinodeno_t
&l
, const vinodeno_t
&r
) {
296 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
298 inline bool operator!=(const vinodeno_t
&l
, const vinodeno_t
&r
) {
301 inline bool operator<(const vinodeno_t
&l
, const vinodeno_t
&r
) {
304 (l
.ino
== r
.ino
&& l
.snapid
< r
.snapid
);
309 void encode(ceph::buffer::list
& bl
) const {
310 ENCODE_START(1, 1, bl
);
311 encode(max_bytes
, bl
);
312 encode(max_files
, bl
);
315 void decode(ceph::buffer::list::const_iterator
& p
) {
316 DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p
);
317 decode(max_bytes
, p
);
318 decode(max_files
, p
);
322 void dump(ceph::Formatter
*f
) const;
323 static void generate_test_instances(std::list
<quota_info_t
*>& ls
);
325 bool is_valid() const {
326 return max_bytes
>=0 && max_files
>=0;
328 bool is_enable() const {
329 return max_bytes
|| max_files
;
331 void decode_json(JSONObj
*obj
);
333 int64_t max_bytes
= 0;
334 int64_t max_files
= 0;
336 WRITE_CLASS_ENCODER(quota_info_t
)
338 inline bool operator==(const quota_info_t
&l
, const quota_info_t
&r
) {
339 return memcmp(&l
, &r
, sizeof(l
)) == 0;
342 std::ostream
& operator<<(std::ostream
&out
, const quota_info_t
&n
);
345 template<> struct hash
<vinodeno_t
> {
346 size_t operator()(const vinodeno_t
&vino
) const {
349 return H(vino
.ino
) ^ I(vino
.snapid
);
354 inline std::ostream
& operator<<(std::ostream
&out
, const vinodeno_t
&vino
) {
356 if (vino
.snapid
== CEPH_NOSNAP
)
358 else if (vino
.snapid
)
359 out
<< '.' << vino
.snapid
;
363 struct client_writeable_range_t
{
364 struct byte_range_t
{
365 uint64_t first
= 0, last
= 0; // interval client can write to
367 void decode_json(JSONObj
*obj
);
370 void encode(ceph::buffer::list
&bl
) const;
371 void decode(ceph::buffer::list::const_iterator
& bl
);
372 void dump(ceph::Formatter
*f
) const;
373 static void generate_test_instances(std::list
<client_writeable_range_t
*>& ls
);
376 snapid_t follows
= 0; // aka "data+metadata flushed thru"
379 inline void decode(client_writeable_range_t::byte_range_t
& range
, ceph::buffer::list::const_iterator
& bl
) {
381 decode(range
.first
, bl
);
382 decode(range
.last
, bl
);
385 WRITE_CLASS_ENCODER(client_writeable_range_t
)
387 std::ostream
& operator<<(std::ostream
& out
, const client_writeable_range_t
& r
);
389 inline bool operator==(const client_writeable_range_t
& l
,
390 const client_writeable_range_t
& r
) {
391 return l
.range
.first
== r
.range
.first
&& l
.range
.last
== r
.range
.last
&&
392 l
.follows
== r
.follows
;
395 struct inline_data_t
{
398 inline_data_t(const inline_data_t
& o
) : version(o
.version
) {
402 inline_data_t
& operator=(const inline_data_t
& o
) {
414 void get_data(ceph::buffer::list
& ret
) const {
420 void set_data(const ceph::buffer::list
& bl
) {
422 blp
.reset(new ceph::buffer::list
);
425 size_t length() const { return blp
? blp
->length() : 0; }
427 bool operator==(const inline_data_t
& o
) const {
428 return length() == o
.length() &&
430 (*const_cast<ceph::buffer::list
*>(blp
.get()) == *const_cast<ceph::buffer::list
*>(o
.blp
.get())));
432 bool operator!=(const inline_data_t
& o
) const {
433 return !(*this == o
);
435 void encode(ceph::buffer::list
&bl
) const;
436 void decode(ceph::buffer::list::const_iterator
& bl
);
438 version_t version
= 1;
441 std::unique_ptr
<ceph::buffer::list
> blp
;
443 WRITE_CLASS_ENCODER(inline_data_t
)
446 DAMAGE_STATS
, // statistics (dirstat, size, etc)
447 DAMAGE_RSTATS
, // recursive statistics (rstat, accounted_rstat)
448 DAMAGE_FRAGTREE
// fragtree -- repair by searching
450 typedef uint32_t damage_flags_t
;
452 template<template<typename
> class Allocator
= std::allocator
>
456 * Do not forget to add any new fields to the compare() function.
459 using client_range_map
= std::map
<client_t
,client_writeable_range_t
,std::less
<client_t
>,Allocator
<std::pair
<const client_t
,client_writeable_range_t
>>>;
467 bool is_symlink() const { return (mode
& S_IFMT
) == S_IFLNK
; }
468 bool is_dir() const { return (mode
& S_IFMT
) == S_IFDIR
; }
469 bool is_file() const { return (mode
& S_IFMT
) == S_IFREG
; }
471 bool is_truncating() const { return (truncate_pending
> 0); }
472 void truncate(uint64_t old_size
, uint64_t new_size
) {
473 ceph_assert(new_size
< old_size
);
474 if (old_size
> max_size_ever
)
475 max_size_ever
= old_size
;
476 truncate_from
= old_size
;
478 rstat
.rbytes
= new_size
;
479 truncate_size
= size
;
484 bool has_layout() const {
485 return layout
!= file_layout_t();
488 void clear_layout() {
489 layout
= file_layout_t();
492 uint64_t get_layout_size_increment() const {
493 return layout
.get_period();
496 bool is_dirty_rstat() const { return !(rstat
== accounted_rstat
); }
498 uint64_t get_client_range(client_t client
) const {
499 auto it
= client_ranges
.find(client
);
500 return it
!= client_ranges
.end() ? it
->second
.range
.last
: 0;
503 uint64_t get_max_size() const {
505 for (std::map
<client_t
,client_writeable_range_t
>::const_iterator p
= client_ranges
.begin();
506 p
!= client_ranges
.end();
508 if (p
->second
.range
.last
> max
)
509 max
= p
->second
.range
.last
;
512 void set_max_size(uint64_t new_max
) {
514 client_ranges
.clear();
516 for (std::map
<client_t
,client_writeable_range_t
>::iterator p
= client_ranges
.begin();
517 p
!= client_ranges
.end();
519 p
->second
.range
.last
= new_max
;
523 void trim_client_ranges(snapid_t last
) {
524 std::map
<client_t
, client_writeable_range_t
>::iterator p
= client_ranges
.begin();
525 while (p
!= client_ranges
.end()) {
526 if (p
->second
.follows
>= last
)
527 client_ranges
.erase(p
++);
533 bool is_backtrace_updated() const {
534 return backtrace_version
== version
;
536 void update_backtrace(version_t pv
=0) {
537 backtrace_version
= pv
? pv
: version
;
540 void add_old_pool(int64_t l
) {
541 backtrace_version
= version
;
545 void encode(ceph::buffer::list
&bl
, uint64_t features
) const;
546 void decode(ceph::buffer::list::const_iterator
& bl
);
547 void dump(ceph::Formatter
*f
) const;
548 static void client_ranges_cb(client_range_map
& c
, JSONObj
*obj
);
549 static void old_pools_cb(compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t> >& c
, JSONObj
*obj
);
550 void decode_json(JSONObj
*obj
);
551 static void generate_test_instances(std::list
<inode_t
*>& ls
);
553 * Compare this inode_t with another that represent *the same inode*
554 * at different points in time.
555 * @pre The inodes are the same ino
557 * @param other The inode_t to compare ourselves with
558 * @param divergent A bool pointer which will be set to true
559 * if the values are different in a way that can't be explained
560 * by one being a newer version than the other.
562 * @returns 1 if we are newer than the other, 0 if equal, -1 if older.
564 int compare(const inode_t
&other
, bool *divergent
) const;
568 uint32_t rdev
= 0; // if special file
570 // affected by any inode change...
571 utime_t ctime
; // inode change time
572 utime_t btime
; // birth time
574 // perm (namespace permissions)
582 // file (data access)
583 ceph_dir_layout dir_layout
= {}; // [dir only]
584 file_layout_t layout
;
585 compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t>> old_pools
;
586 uint64_t size
= 0; // on directory, # dentries
587 uint64_t max_size_ever
= 0; // max size the file has ever been
588 uint32_t truncate_seq
= 0;
589 uint64_t truncate_size
= 0, truncate_from
= 0;
590 uint32_t truncate_pending
= 0;
591 utime_t mtime
; // file data modify time.
592 utime_t atime
; // file data access time.
593 uint32_t time_warp_seq
= 0; // count of (potential) mtime/atime timewarps (i.e., utimes())
594 inline_data_t inline_data
; // FIXME check
597 uint64_t change_attr
= 0;
599 client_range_map client_ranges
; // client(s) can write to these ranges
601 // dirfrag, recursive accountin
602 frag_info_t dirstat
; // protected by my filelock
603 nest_info_t rstat
; // protected by my nestlock
604 nest_info_t accounted_rstat
; // protected by parent's nestlock
608 mds_rank_t export_pin
= MDS_RANK_NONE
;
610 double export_ephemeral_random_pin
= 0;
611 bool export_ephemeral_distributed_pin
= false;
614 version_t version
= 0; // auth only
615 version_t file_data_version
= 0; // auth only
616 version_t xattr_version
= 0;
618 utime_t last_scrub_stamp
; // start time of last complete scrub
619 version_t last_scrub_version
= 0;// (parent) start version of last complete scrub
621 version_t backtrace_version
= 0;
623 snapid_t oldest_snap
;
625 std::basic_string
<char,std::char_traits
<char>,Allocator
<char>> stray_prior_path
; //stores path before unlink
627 bool fscrypt
= false; // fscrypt enabled ?
630 bool older_is_consistent(const inode_t
&other
) const;
633 // These methods may be moved back to mdstypes.cc when we have pmr
634 template<template<typename
> class Allocator
>
635 void inode_t
<Allocator
>::encode(ceph::buffer::list
&bl
, uint64_t features
) const
637 ENCODE_START(17, 6, bl
);
651 encode(anchored
, bl
);
654 encode(dir_layout
, bl
);
655 encode(layout
, bl
, features
);
657 encode(truncate_seq
, bl
);
658 encode(truncate_size
, bl
);
659 encode(truncate_from
, bl
);
660 encode(truncate_pending
, bl
);
663 encode(time_warp_seq
, bl
);
664 encode(client_ranges
, bl
);
668 encode(accounted_rstat
, bl
);
671 encode(file_data_version
, bl
);
672 encode(xattr_version
, bl
);
673 encode(backtrace_version
, bl
);
674 encode(old_pools
, bl
);
675 encode(max_size_ever
, bl
);
676 encode(inline_data
, bl
);
679 encode(stray_prior_path
, bl
);
681 encode(last_scrub_version
, bl
);
682 encode(last_scrub_stamp
, bl
);
685 encode(change_attr
, bl
);
687 encode(export_pin
, bl
);
689 encode(export_ephemeral_random_pin
, bl
);
690 encode(export_ephemeral_distributed_pin
, bl
);
697 template<template<typename
> class Allocator
>
698 void inode_t
<Allocator
>::decode(ceph::buffer::list::const_iterator
&p
)
700 DECODE_START_LEGACY_COMPAT_LEN(17, 6, 6, p
);
717 decode(dir_layout
, p
);
719 // FIPS zeroization audit 20191117: this memset is not security related.
720 memset(&dir_layout
, 0, sizeof(dir_layout
));
724 decode(truncate_seq
, p
);
725 decode(truncate_size
, p
);
726 decode(truncate_from
, p
);
728 decode(truncate_pending
, p
);
730 truncate_pending
= 0;
733 decode(time_warp_seq
, p
);
735 decode(client_ranges
, p
);
737 std::map
<client_t
, client_writeable_range_t::byte_range_t
> m
;
739 for (auto q
= m
.begin(); q
!= m
.end(); ++q
)
740 client_ranges
[q
->first
].range
= q
->second
;
745 decode(accounted_rstat
, p
);
748 decode(file_data_version
, p
);
749 decode(xattr_version
, p
);
751 decode(backtrace_version
, p
);
753 decode(old_pools
, p
);
755 decode(max_size_ever
, p
);
757 decode(inline_data
, p
);
759 inline_data
.version
= CEPH_INLINE_NONE
;
762 backtrace_version
= 0; // force update backtrace
766 if (struct_v
>= 12) {
769 stray_prior_path
= std::string_view(tmp
);
772 if (struct_v
>= 13) {
773 decode(last_scrub_version
, p
);
774 decode(last_scrub_stamp
, p
);
776 if (struct_v
>= 14) {
778 decode(change_attr
, p
);
784 if (struct_v
>= 15) {
785 decode(export_pin
, p
);
787 export_pin
= MDS_RANK_NONE
;
790 if (struct_v
>= 16) {
791 decode(export_ephemeral_random_pin
, p
);
792 decode(export_ephemeral_distributed_pin
, p
);
794 export_ephemeral_random_pin
= 0;
795 export_ephemeral_distributed_pin
= false;
798 if (struct_v
>= 17) {
807 template<template<typename
> class Allocator
>
808 void inode_t
<Allocator
>::dump(ceph::Formatter
*f
) const
810 f
->dump_unsigned("ino", ino
);
811 f
->dump_unsigned("rdev", rdev
);
812 f
->dump_stream("ctime") << ctime
;
813 f
->dump_stream("btime") << btime
;
814 f
->dump_unsigned("mode", mode
);
815 f
->dump_unsigned("uid", uid
);
816 f
->dump_unsigned("gid", gid
);
817 f
->dump_unsigned("nlink", nlink
);
819 f
->open_object_section("dir_layout");
820 ::dump(dir_layout
, f
);
823 f
->dump_object("layout", layout
);
825 f
->open_array_section("old_pools");
826 for (const auto &p
: old_pools
) {
827 f
->dump_int("pool", p
);
831 f
->dump_unsigned("size", size
);
832 f
->dump_unsigned("truncate_seq", truncate_seq
);
833 f
->dump_unsigned("truncate_size", truncate_size
);
834 f
->dump_unsigned("truncate_from", truncate_from
);
835 f
->dump_unsigned("truncate_pending", truncate_pending
);
836 f
->dump_stream("mtime") << mtime
;
837 f
->dump_stream("atime") << atime
;
838 f
->dump_unsigned("time_warp_seq", time_warp_seq
);
839 f
->dump_unsigned("change_attr", change_attr
);
840 f
->dump_int("export_pin", export_pin
);
841 f
->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin
);
842 f
->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin
);
844 f
->open_array_section("client_ranges");
845 for (const auto &p
: client_ranges
) {
846 f
->open_object_section("client");
847 f
->dump_unsigned("client", p
.first
.v
);
853 f
->open_object_section("dirstat");
857 f
->open_object_section("rstat");
861 f
->open_object_section("accounted_rstat");
862 accounted_rstat
.dump(f
);
865 f
->dump_unsigned("version", version
);
866 f
->dump_unsigned("file_data_version", file_data_version
);
867 f
->dump_unsigned("xattr_version", xattr_version
);
868 f
->dump_unsigned("backtrace_version", backtrace_version
);
870 f
->dump_string("stray_prior_path", stray_prior_path
);
871 f
->dump_unsigned("max_size_ever", max_size_ever
);
873 f
->open_object_section("quota");
877 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
878 f
->dump_unsigned("last_scrub_version", last_scrub_version
);
881 template<template<typename
> class Allocator
>
882 void inode_t
<Allocator
>::client_ranges_cb(typename inode_t
<Allocator
>::client_range_map
& c
, JSONObj
*obj
){
885 JSONDecoder::decode_json("client", client
, obj
, true);
886 client_writeable_range_t client_range_tmp
;
887 JSONDecoder::decode_json("byte range", client_range_tmp
.range
, obj
, true);
888 JSONDecoder::decode_json("follows", client_range_tmp
.follows
.val
, obj
, true);
889 c
[client
] = client_range_tmp
;
892 template<template<typename
> class Allocator
>
893 void inode_t
<Allocator
>::old_pools_cb(compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t> >& c
, JSONObj
*obj
){
896 decode_json_obj(tmp
, obj
);
900 template<template<typename
> class Allocator
>
901 void inode_t
<Allocator
>::decode_json(JSONObj
*obj
)
904 JSONDecoder::decode_json("ino", ino
.val
, obj
, true);
905 JSONDecoder::decode_json("rdev", rdev
, obj
, true);
906 //JSONDecoder::decode_json("ctime", ctime, obj, true);
907 //JSONDecoder::decode_json("btime", btime, obj, true);
908 JSONDecoder::decode_json("mode", mode
, obj
, true);
909 JSONDecoder::decode_json("uid", uid
, obj
, true);
910 JSONDecoder::decode_json("gid", gid
, obj
, true);
911 JSONDecoder::decode_json("nlink", nlink
, obj
, true);
912 JSONDecoder::decode_json("dir_layout", dir_layout
, obj
, true);
913 JSONDecoder::decode_json("layout", layout
, obj
, true);
914 JSONDecoder::decode_json("old_pools", old_pools
, inode_t
<Allocator
>::old_pools_cb
, obj
, true);
915 JSONDecoder::decode_json("size", size
, obj
, true);
916 JSONDecoder::decode_json("truncate_seq", truncate_seq
, obj
, true);
917 JSONDecoder::decode_json("truncate_size", truncate_size
, obj
, true);
918 JSONDecoder::decode_json("truncate_from", truncate_from
, obj
, true);
919 JSONDecoder::decode_json("truncate_pending", truncate_pending
, obj
, true);
920 //JSONDecoder::decode_json("mtime", mtime, obj, true);
921 //JSONDecoder::decode_json("atime", atime, obj, true);
922 JSONDecoder::decode_json("time_warp_seq", time_warp_seq
, obj
, true);
923 JSONDecoder::decode_json("change_attr", change_attr
, obj
, true);
924 JSONDecoder::decode_json("export_pin", export_pin
, obj
, true);
925 JSONDecoder::decode_json("client_ranges", client_ranges
, inode_t
<Allocator
>::client_ranges_cb
, obj
, true);
926 JSONDecoder::decode_json("dirstat", dirstat
, obj
, true);
927 JSONDecoder::decode_json("rstat", rstat
, obj
, true);
928 JSONDecoder::decode_json("accounted_rstat", accounted_rstat
, obj
, true);
929 JSONDecoder::decode_json("version", version
, obj
, true);
930 JSONDecoder::decode_json("file_data_version", file_data_version
, obj
, true);
931 JSONDecoder::decode_json("xattr_version", xattr_version
, obj
, true);
932 JSONDecoder::decode_json("backtrace_version", backtrace_version
, obj
, true);
933 JSONDecoder::decode_json("stray_prior_path", stray_prior_path
, obj
, true);
934 JSONDecoder::decode_json("max_size_ever", max_size_ever
, obj
, true);
935 JSONDecoder::decode_json("quota", quota
, obj
, true);
936 JSONDecoder::decode_json("last_scrub_stamp", last_scrub_stamp
, obj
, true);
937 JSONDecoder::decode_json("last_scrub_version", last_scrub_version
, obj
, true);
940 template<template<typename
> class Allocator
>
941 void inode_t
<Allocator
>::generate_test_instances(std::list
<inode_t
*>& ls
)
943 ls
.push_back(new inode_t
<Allocator
>);
944 ls
.push_back(new inode_t
<Allocator
>);
949 template<template<typename
> class Allocator
>
950 int inode_t
<Allocator
>::compare(const inode_t
<Allocator
> &other
, bool *divergent
) const
952 ceph_assert(ino
== other
.ino
);
954 if (version
== other
.version
) {
955 if (rdev
!= other
.rdev
||
956 ctime
!= other
.ctime
||
957 btime
!= other
.btime
||
958 mode
!= other
.mode
||
961 nlink
!= other
.nlink
||
962 memcmp(&dir_layout
, &other
.dir_layout
, sizeof(dir_layout
)) ||
963 layout
!= other
.layout
||
964 old_pools
!= other
.old_pools
||
965 size
!= other
.size
||
966 max_size_ever
!= other
.max_size_ever
||
967 truncate_seq
!= other
.truncate_seq
||
968 truncate_size
!= other
.truncate_size
||
969 truncate_from
!= other
.truncate_from
||
970 truncate_pending
!= other
.truncate_pending
||
971 change_attr
!= other
.change_attr
||
972 mtime
!= other
.mtime
||
973 atime
!= other
.atime
||
974 time_warp_seq
!= other
.time_warp_seq
||
975 inline_data
!= other
.inline_data
||
976 client_ranges
!= other
.client_ranges
||
977 !(dirstat
== other
.dirstat
) ||
978 !(rstat
== other
.rstat
) ||
979 !(accounted_rstat
== other
.accounted_rstat
) ||
980 file_data_version
!= other
.file_data_version
||
981 xattr_version
!= other
.xattr_version
||
982 backtrace_version
!= other
.backtrace_version
) {
986 } else if (version
> other
.version
) {
987 *divergent
= !older_is_consistent(other
);
990 ceph_assert(version
< other
.version
);
991 *divergent
= !other
.older_is_consistent(*this);
996 template<template<typename
> class Allocator
>
997 bool inode_t
<Allocator
>::older_is_consistent(const inode_t
<Allocator
> &other
) const
999 if (max_size_ever
< other
.max_size_ever
||
1000 truncate_seq
< other
.truncate_seq
||
1001 time_warp_seq
< other
.time_warp_seq
||
1002 inline_data
.version
< other
.inline_data
.version
||
1003 dirstat
.version
< other
.dirstat
.version
||
1004 rstat
.version
< other
.rstat
.version
||
1005 accounted_rstat
.version
< other
.accounted_rstat
.version
||
1006 file_data_version
< other
.file_data_version
||
1007 xattr_version
< other
.xattr_version
||
1008 backtrace_version
< other
.backtrace_version
) {
1014 template<template<typename
> class Allocator
>
1015 inline void encode(const inode_t
<Allocator
> &c
, ::ceph::buffer::list
&bl
, uint64_t features
)
1018 c
.encode(bl
, features
);
1019 ENCODE_DUMP_POST(cl
);
1021 template<template<typename
> class Allocator
>
1022 inline void decode(inode_t
<Allocator
> &c
, ::ceph::buffer::list::const_iterator
&p
)
1027 template<template<typename
> class Allocator
>
1028 using alloc_string
= std::basic_string
<char,std::char_traits
<char>,Allocator
<char>>;
1030 template<template<typename
> class Allocator
>
1031 using xattr_map
= std::map
<alloc_string
<Allocator
>,
1033 std::less
<alloc_string
<Allocator
>>,
1034 Allocator
<std::pair
<const alloc_string
<Allocator
>,
1035 ceph::bufferptr
>>>; // FIXME bufferptr not in mempool
1037 template<template<typename
> class Allocator
>
1038 inline void decode_noshare(xattr_map
<Allocator
>& xattrs
, ceph::buffer::list::const_iterator
&p
)
1043 alloc_string
<Allocator
> key
;
1047 p
.copy_deep(len
, xattrs
[key
]);
1051 template<template<typename
> class Allocator
= std::allocator
>
1052 struct old_inode_t
{
1054 inode_t
<Allocator
> inode
;
1055 xattr_map
<Allocator
> xattrs
;
1057 void encode(ceph::buffer::list
&bl
, uint64_t features
) const;
1058 void decode(ceph::buffer::list::const_iterator
& bl
);
1059 void dump(ceph::Formatter
*f
) const;
1060 static void generate_test_instances(std::list
<old_inode_t
*>& ls
);
1063 // These methods may be moved back to mdstypes.cc when we have pmr
1064 template<template<typename
> class Allocator
>
1065 void old_inode_t
<Allocator
>::encode(ceph::buffer::list
& bl
, uint64_t features
) const
1067 ENCODE_START(2, 2, bl
);
1069 encode(inode
, bl
, features
);
1074 template<template<typename
> class Allocator
>
1075 void old_inode_t
<Allocator
>::decode(ceph::buffer::list::const_iterator
& bl
)
1077 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
1080 decode_noshare
<Allocator
>(xattrs
, bl
);
1084 template<template<typename
> class Allocator
>
1085 void old_inode_t
<Allocator
>::dump(ceph::Formatter
*f
) const
1087 f
->dump_unsigned("first", first
);
1089 f
->open_object_section("xattrs");
1090 for (const auto &p
: xattrs
) {
1091 std::string
v(p
.second
.c_str(), p
.second
.length());
1092 f
->dump_string(p
.first
.c_str(), v
);
1097 template<template<typename
> class Allocator
>
1098 void old_inode_t
<Allocator
>::generate_test_instances(std::list
<old_inode_t
<Allocator
>*>& ls
)
1100 ls
.push_back(new old_inode_t
<Allocator
>);
1101 ls
.push_back(new old_inode_t
<Allocator
>);
1102 ls
.back()->first
= 2;
1103 std::list
<inode_t
<Allocator
>*> ils
;
1104 inode_t
<Allocator
>::generate_test_instances(ils
);
1105 ls
.back()->inode
= *ils
.back();
1106 ls
.back()->xattrs
["user.foo"] = ceph::buffer::copy("asdf", 4);
1107 ls
.back()->xattrs
["user.unprintable"] = ceph::buffer::copy("\000\001\002", 3);
1110 template<template<typename
> class Allocator
>
1111 inline void encode(const old_inode_t
<Allocator
> &c
, ::ceph::buffer::list
&bl
, uint64_t features
)
1114 c
.encode(bl
, features
);
1115 ENCODE_DUMP_POST(cl
);
1117 template<template<typename
> class Allocator
>
1118 inline void decode(old_inode_t
<Allocator
> &c
, ::ceph::buffer::list::const_iterator
&p
)
1124 * like an inode, but for a dir frag
1127 void encode(ceph::buffer::list
&bl
) const;
1128 void decode(ceph::buffer::list::const_iterator
& bl
);
1129 void dump(ceph::Formatter
*f
) const;
1130 void decode_json(JSONObj
*obj
);
1131 static void generate_test_instances(std::list
<fnode_t
*>& ls
);
1133 version_t version
= 0;
1134 snapid_t snap_purged_thru
; // the max_last_destroy snapid we've been purged thru
1135 frag_info_t fragstat
, accounted_fragstat
;
1136 nest_info_t rstat
, accounted_rstat
;
1137 damage_flags_t damage_flags
= 0;
1139 // we know we and all our descendants have been scrubbed since this version
1140 version_t recursive_scrub_version
= 0;
1141 utime_t recursive_scrub_stamp
;
1142 // version at which we last scrubbed our personal data structures
1143 version_t localized_scrub_version
= 0;
1144 utime_t localized_scrub_stamp
;
1146 WRITE_CLASS_ENCODER(fnode_t
)
1149 struct old_rstat_t
{
1150 void encode(ceph::buffer::list
& bl
) const;
1151 void decode(ceph::buffer::list::const_iterator
& p
);
1152 void dump(ceph::Formatter
*f
) const;
1153 static void generate_test_instances(std::list
<old_rstat_t
*>& ls
);
1156 nest_info_t rstat
, accounted_rstat
;
1158 WRITE_CLASS_ENCODER(old_rstat_t
)
1160 inline std::ostream
& operator<<(std::ostream
& out
, const old_rstat_t
& o
) {
1161 return out
<< "old_rstat(first " << o
.first
<< " " << o
.rstat
<< " " << o
.accounted_rstat
<< ")";
1164 class feature_bitset_t
{
1166 typedef uint64_t block_type
;
1167 static const size_t bits_per_block
= sizeof(block_type
) * 8;
1169 feature_bitset_t(const feature_bitset_t
& other
) : _vec(other
._vec
) {}
1170 feature_bitset_t(feature_bitset_t
&& other
) : _vec(std::move(other
._vec
)) {}
1171 feature_bitset_t(unsigned long value
= 0);
1172 feature_bitset_t(const std::vector
<size_t>& array
);
1173 feature_bitset_t
& operator=(const feature_bitset_t
& other
) {
1177 feature_bitset_t
& operator=(feature_bitset_t
&& other
) {
1178 _vec
= std::move(other
._vec
);
1181 feature_bitset_t
& operator-=(const feature_bitset_t
& other
);
1182 bool empty() const {
1183 //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty"
1184 for (auto& v
: _vec
) {
1190 bool test(size_t bit
) const {
1191 if (bit
>= bits_per_block
* _vec
.size())
1193 return _vec
[bit
/ bits_per_block
] & ((block_type
)1 << (bit
% bits_per_block
));
1195 void insert(size_t bit
) {
1196 size_t n
= bit
/ bits_per_block
;
1197 if (n
>= _vec
.size())
1199 _vec
[n
] |= ((block_type
)1 << (bit
% bits_per_block
));
1201 void erase(size_t bit
) {
1202 size_t n
= bit
/ bits_per_block
;
1203 if (n
>= _vec
.size())
1205 _vec
[n
] &= ~((block_type
)1 << (bit
% bits_per_block
));
1206 if (n
+ 1 == _vec
.size()) {
1207 while (!_vec
.empty() && _vec
.back() == 0)
1214 bool operator==(const feature_bitset_t
& other
) const {
1215 return _vec
== other
._vec
;
1217 bool operator!=(const feature_bitset_t
& other
) const {
1218 return _vec
!= other
._vec
;
1220 void encode(ceph::buffer::list
& bl
) const;
1221 void decode(ceph::buffer::list::const_iterator
&p
);
1222 void dump(ceph::Formatter
*f
) const;
1223 void print(std::ostream
& out
) const;
1225 std::vector
<block_type
> _vec
;
1227 WRITE_CLASS_ENCODER(feature_bitset_t
)
1229 inline std::ostream
& operator<<(std::ostream
& out
, const feature_bitset_t
& s
) {
1234 struct metric_spec_t
{
1236 metric_spec_t(const metric_spec_t
& other
) :
1237 metric_flags(other
.metric_flags
) {}
1238 metric_spec_t(metric_spec_t
&& other
) :
1239 metric_flags(std::move(other
.metric_flags
)) {}
1240 metric_spec_t(const feature_bitset_t
& mf
) :
1242 metric_spec_t(feature_bitset_t
&& mf
) :
1243 metric_flags(std::move(mf
)) {}
1245 metric_spec_t
& operator=(const metric_spec_t
& other
) {
1246 metric_flags
= other
.metric_flags
;
1249 metric_spec_t
& operator=(metric_spec_t
&& other
) {
1250 metric_flags
= std::move(other
.metric_flags
);
1254 bool empty() const {
1255 return metric_flags
.empty();
1259 metric_flags
.clear();
1262 void encode(ceph::buffer::list
& bl
) const;
1263 void decode(ceph::buffer::list::const_iterator
& p
);
1264 void dump(ceph::Formatter
*f
) const;
1265 void print(std::ostream
& out
) const;
1267 // set of metrics that a client is capable of forwarding
1268 feature_bitset_t metric_flags
;
1270 WRITE_CLASS_ENCODER(metric_spec_t
)
1272 inline std::ostream
& operator<<(std::ostream
& out
, const metric_spec_t
& mst
) {
1280 struct client_metadata_t
{
1281 using kv_map_t
= std::map
<std::string
,std::string
>;
1282 using iterator
= kv_map_t::const_iterator
;
1284 client_metadata_t() {}
1285 client_metadata_t(const kv_map_t
& kv
, const feature_bitset_t
&f
, const metric_spec_t
&mst
) :
1289 client_metadata_t
& operator=(const client_metadata_t
& other
) {
1290 kv_map
= other
.kv_map
;
1291 features
= other
.features
;
1292 metric_spec
= other
.metric_spec
;
1296 bool empty() const { return kv_map
.empty() && features
.empty() && metric_spec
.empty(); }
1297 iterator
find(const std::string
& key
) const { return kv_map
.find(key
); }
1298 iterator
begin() const { return kv_map
.begin(); }
1299 iterator
end() const { return kv_map
.end(); }
1300 void erase(iterator it
) { kv_map
.erase(it
); }
1301 std::string
& operator[](const std::string
& key
) { return kv_map
[key
]; }
1302 void merge(const client_metadata_t
& other
) {
1303 kv_map
.insert(other
.kv_map
.begin(), other
.kv_map
.end());
1304 features
= other
.features
;
1305 metric_spec
= other
.metric_spec
;
1310 metric_spec
.clear();
1313 void encode(ceph::buffer::list
& bl
) const;
1314 void decode(ceph::buffer::list::const_iterator
& p
);
1315 void dump(ceph::Formatter
*f
) const;
1318 feature_bitset_t features
;
1319 metric_spec_t metric_spec
;
1321 WRITE_CLASS_ENCODER(client_metadata_t
)
1324 * session_info_t - durable part of a Session
1326 struct session_info_t
{
1327 client_t
get_client() const { return client_t(inst
.name
.num()); }
1328 bool has_feature(size_t bit
) const { return client_metadata
.features
.test(bit
); }
1329 const entity_name_t
& get_source() const { return inst
.name
; }
1332 prealloc_inos
.clear();
1333 completed_requests
.clear();
1334 completed_flushes
.clear();
1335 client_metadata
.clear();
1338 void encode(ceph::buffer::list
& bl
, uint64_t features
) const;
1339 void decode(ceph::buffer::list::const_iterator
& p
);
1340 void dump(ceph::Formatter
*f
) const;
1341 static void generate_test_instances(std::list
<session_info_t
*>& ls
);
1344 std::map
<ceph_tid_t
,inodeno_t
> completed_requests
;
1345 interval_set
<inodeno_t
> prealloc_inos
; // preallocated, ready to use.
1346 client_metadata_t client_metadata
;
1347 std::set
<ceph_tid_t
> completed_flushes
;
1348 EntityName auth_name
;
1350 WRITE_CLASS_ENCODER_FEATURES(session_info_t
)
1353 struct dentry_key_t
{
1355 dentry_key_t(snapid_t s
, std::string_view n
, __u32 h
=0) :
1356 snapid(s
), name(n
), hash(h
) {}
1358 bool is_valid() { return name
.length() || snapid
; }
1360 // encode into something that can be decoded as a string.
1361 // name_ (head) or name_%x (!head)
1362 void encode(ceph::buffer::list
& bl
) const {
1368 void encode(std::string
& key
) const {
1370 if (snapid
!= CEPH_NOSNAP
) {
1371 uint64_t val(snapid
);
1372 snprintf(b
, sizeof(b
), "%" PRIx64
, val
);
1374 snprintf(b
, sizeof(b
), "%s", "head");
1376 CachedStackStringStream css
;
1377 *css
<< name
<< "_" << b
;
1380 static void decode_helper(ceph::buffer::list::const_iterator
& bl
, std::string
& nm
,
1385 decode_helper(key
, nm
, sn
);
1387 static void decode_helper(std::string_view key
, std::string
& nm
, snapid_t
& sn
) {
1388 size_t i
= key
.find_last_of('_');
1389 ceph_assert(i
!= std::string::npos
);
1390 if (key
.compare(i
+1, std::string_view::npos
, "head") == 0) {
1395 long long unsigned x
= 0;
1396 std::string
x_str(key
.substr(i
+1));
1397 sscanf(x_str
.c_str(), "%llx", &x
);
1400 nm
= key
.substr(0, i
);
1403 snapid_t snapid
= 0;
1404 std::string_view name
;
1408 inline std::ostream
& operator<<(std::ostream
& out
, const dentry_key_t
&k
)
1410 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1413 inline bool operator<(const dentry_key_t
& k1
, const dentry_key_t
& k2
)
1416 * order by hash, name, snap
1418 int c
= ceph_frag_value(k1
.hash
) - ceph_frag_value(k2
.hash
);
1421 c
= k1
.name
.compare(k2
.name
);
1424 return k1
.snapid
< k2
.snapid
;
1428 * string_snap_t is a simple (string, snapid_t) pair
1430 struct string_snap_t
{
1432 string_snap_t(std::string_view n
, snapid_t s
) : name(n
), snapid(s
) {}
1434 void encode(ceph::buffer::list
& bl
) const;
1435 void decode(ceph::buffer::list::const_iterator
& p
);
1436 void dump(ceph::Formatter
*f
) const;
1437 static void generate_test_instances(std::list
<string_snap_t
*>& ls
);
1442 WRITE_CLASS_ENCODER(string_snap_t
)
1444 inline bool operator<(const string_snap_t
& l
, const string_snap_t
& r
) {
1445 int c
= l
.name
.compare(r
.name
);
1446 return c
< 0 || (c
== 0 && l
.snapid
< r
.snapid
);
1449 inline std::ostream
& operator<<(std::ostream
& out
, const string_snap_t
&k
)
1451 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1455 * mds_table_pending_t
1457 * For mds's requesting any pending ops, child needs to encode the corresponding
1458 * pending mutation state in the table.
1460 struct mds_table_pending_t
{
1461 void encode(ceph::buffer::list
& bl
) const;
1462 void decode(ceph::buffer::list::const_iterator
& bl
);
1463 void dump(ceph::Formatter
*f
) const;
1464 static void generate_test_instances(std::list
<mds_table_pending_t
*>& ls
);
1470 WRITE_CLASS_ENCODER(mds_table_pending_t
)
1473 struct metareqid_t
{
1475 metareqid_t(entity_name_t n
, ceph_tid_t t
) : name(n
), tid(t
) {}
1476 void encode(ceph::buffer::list
& bl
) const {
1481 void decode(ceph::buffer::list::const_iterator
&p
) {
1490 WRITE_CLASS_ENCODER(metareqid_t
)
1492 inline std::ostream
& operator<<(std::ostream
& out
, const metareqid_t
& r
) {
1493 return out
<< r
.name
<< ":" << r
.tid
;
1496 inline bool operator==(const metareqid_t
& l
, const metareqid_t
& r
) {
1497 return (l
.name
== r
.name
) && (l
.tid
== r
.tid
);
1499 inline bool operator!=(const metareqid_t
& l
, const metareqid_t
& r
) {
1500 return (l
.name
!= r
.name
) || (l
.tid
!= r
.tid
);
1502 inline bool operator<(const metareqid_t
& l
, const metareqid_t
& r
) {
1503 return (l
.name
< r
.name
) ||
1504 (l
.name
== r
.name
&& l
.tid
< r
.tid
);
1506 inline bool operator<=(const metareqid_t
& l
, const metareqid_t
& r
) {
1507 return (l
.name
< r
.name
) ||
1508 (l
.name
== r
.name
&& l
.tid
<= r
.tid
);
1510 inline bool operator>(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
<= r
); }
1511 inline bool operator>=(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
< r
); }
1514 template<> struct hash
<metareqid_t
> {
1515 size_t operator()(const metareqid_t
&r
) const {
1517 return H(r
.name
.num()) ^ H(r
.name
.type()) ^ H(r
.tid
);
1522 // cap info for client reconnect
1523 struct cap_reconnect_t
{
1524 cap_reconnect_t() {}
1525 cap_reconnect_t(uint64_t cap_id
, inodeno_t pino
, std::string_view p
, int w
, int i
,
1526 inodeno_t sr
, snapid_t sf
, ceph::buffer::list
& lb
) :
1528 capinfo
.cap_id
= cap_id
;
1531 capinfo
.snaprealm
= sr
;
1532 capinfo
.pathbase
= pino
;
1533 capinfo
.flock_len
= 0;
1535 flockbl
= std::move(lb
);
1537 void encode(ceph::buffer::list
& bl
) const;
1538 void decode(ceph::buffer::list::const_iterator
& bl
);
1539 void encode_old(ceph::buffer::list
& bl
) const;
1540 void decode_old(ceph::buffer::list::const_iterator
& bl
);
1542 void dump(ceph::Formatter
*f
) const;
1543 static void generate_test_instances(std::list
<cap_reconnect_t
*>& ls
);
1546 mutable ceph_mds_cap_reconnect capinfo
= {};
1547 snapid_t snap_follows
= 0;
1548 ceph::buffer::list flockbl
;
1550 WRITE_CLASS_ENCODER(cap_reconnect_t
)
1552 struct snaprealm_reconnect_t
{
1553 snaprealm_reconnect_t() {}
1554 snaprealm_reconnect_t(inodeno_t ino
, snapid_t seq
, inodeno_t parent
) {
1557 realm
.parent
= parent
;
1559 void encode(ceph::buffer::list
& bl
) const;
1560 void decode(ceph::buffer::list::const_iterator
& bl
);
1561 void encode_old(ceph::buffer::list
& bl
) const;
1562 void decode_old(ceph::buffer::list::const_iterator
& bl
);
1564 void dump(ceph::Formatter
*f
) const;
1565 static void generate_test_instances(std::list
<snaprealm_reconnect_t
*>& ls
);
1567 mutable ceph_mds_snaprealm_reconnect realm
= {};
1569 WRITE_CLASS_ENCODER(snaprealm_reconnect_t
)
1571 // compat for pre-FLOCK feature
1572 struct old_ceph_mds_cap_reconnect
{
1577 struct ceph_timespec old_mtime
, old_atime
;
1578 ceph_le64 snaprealm
;
1579 ceph_le64 pathbase
; /* base ino for our path to this ino */
1580 } __attribute__ ((packed
));
1581 WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect
)
1583 struct old_cap_reconnect_t
{
1584 const old_cap_reconnect_t
& operator=(const cap_reconnect_t
& n
) {
1586 capinfo
.cap_id
= n
.capinfo
.cap_id
;
1587 capinfo
.wanted
= n
.capinfo
.wanted
;
1588 capinfo
.issued
= n
.capinfo
.issued
;
1589 capinfo
.snaprealm
= n
.capinfo
.snaprealm
;
1590 capinfo
.pathbase
= n
.capinfo
.pathbase
;
1593 operator cap_reconnect_t() {
1596 n
.capinfo
.cap_id
= capinfo
.cap_id
;
1597 n
.capinfo
.wanted
= capinfo
.wanted
;
1598 n
.capinfo
.issued
= capinfo
.issued
;
1599 n
.capinfo
.snaprealm
= capinfo
.snaprealm
;
1600 n
.capinfo
.pathbase
= capinfo
.pathbase
;
1604 void encode(ceph::buffer::list
& bl
) const {
1607 encode(capinfo
, bl
);
1609 void decode(ceph::buffer::list::const_iterator
& bl
) {
1612 decode(capinfo
, bl
);
1616 old_ceph_mds_cap_reconnect capinfo
;
1618 WRITE_CLASS_ENCODER(old_cap_reconnect_t
)
1623 dirfrag_t(inodeno_t i
, frag_t f
) : ino(i
), frag(f
) { }
1625 void encode(ceph::buffer::list
& bl
) const {
1630 void decode(ceph::buffer::list::const_iterator
& bl
) {
1639 WRITE_CLASS_ENCODER(dirfrag_t
)
1641 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_t
&df
) {
1643 if (!df
.frag
.is_root()) out
<< "." << df
.frag
;
1646 inline bool operator<(dirfrag_t l
, dirfrag_t r
) {
1647 if (l
.ino
< r
.ino
) return true;
1648 if (l
.ino
== r
.ino
&& l
.frag
< r
.frag
) return true;
1651 inline bool operator==(dirfrag_t l
, dirfrag_t r
) {
1652 return l
.ino
== r
.ino
&& l
.frag
== r
.frag
;
1656 template<> struct hash
<dirfrag_t
> {
1657 size_t operator()(const dirfrag_t
&df
) const {
1658 static rjhash
<uint64_t> H
;
1659 static rjhash
<uint32_t> I
;
1660 return H(df
.ino
) ^ I(df
.frag
);
1665 // ================================================================
1666 #define META_POP_IRD 0
1667 #define META_POP_IWR 1
1668 #define META_POP_READDIR 2
1669 #define META_POP_FETCH 3
1670 #define META_POP_STORE 4
1673 class inode_load_vec_t
{
1675 using time
= DecayCounter::time
;
1676 using clock
= DecayCounter::clock
;
1677 static const size_t NUM
= 2;
1679 inode_load_vec_t() : vec
{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
1680 inode_load_vec_t(const DecayRate
&rate
) : vec
{DecayCounter(rate
), DecayCounter(rate
)} {}
1682 DecayCounter
&get(int t
) {
1686 for (auto &d
: vec
) {
1690 void encode(ceph::buffer::list
&bl
) const;
1691 void decode(ceph::buffer::list::const_iterator
& p
);
1692 void dump(ceph::Formatter
*f
) const;
1693 static void generate_test_instances(std::list
<inode_load_vec_t
*>& ls
);
1696 std::array
<DecayCounter
, NUM
> vec
;
1698 inline void encode(const inode_load_vec_t
&c
, ceph::buffer::list
&bl
) {
1701 inline void decode(inode_load_vec_t
& c
, ceph::buffer::list::const_iterator
&p
) {
1705 class dirfrag_load_vec_t
{
1707 using time
= DecayCounter::time
;
1708 using clock
= DecayCounter::clock
;
1709 static const size_t NUM
= 5;
1711 dirfrag_load_vec_t() :
1712 vec
{DecayCounter(DecayRate()),
1713 DecayCounter(DecayRate()),
1714 DecayCounter(DecayRate()),
1715 DecayCounter(DecayRate()),
1716 DecayCounter(DecayRate())
1719 dirfrag_load_vec_t(const DecayRate
&rate
) :
1720 vec
{DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
)}
1723 void encode(ceph::buffer::list
&bl
) const {
1724 ENCODE_START(2, 2, bl
);
1725 for (const auto &i
: vec
) {
1730 void decode(ceph::buffer::list::const_iterator
&p
) {
1731 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p
);
1732 for (auto &i
: vec
) {
1737 void dump(ceph::Formatter
*f
) const;
1738 void dump(ceph::Formatter
*f
, const DecayRate
& rate
) const;
1739 static void generate_test_instances(std::list
<dirfrag_load_vec_t
*>& ls
);
1741 const DecayCounter
&get(int t
) const {
1744 DecayCounter
&get(int t
) {
1747 void adjust(double d
) {
1748 for (auto &i
: vec
) {
1753 for (auto &i
: vec
) {
1757 double meta_load() const {
1759 1*vec
[META_POP_IRD
].get() +
1760 2*vec
[META_POP_IWR
].get() +
1761 1*vec
[META_POP_READDIR
].get() +
1762 2*vec
[META_POP_FETCH
].get() +
1763 4*vec
[META_POP_STORE
].get();
1766 void add(dirfrag_load_vec_t
& r
) {
1767 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1768 vec
[i
].adjust(r
.vec
[i
].get());
1770 void sub(dirfrag_load_vec_t
& r
) {
1771 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1772 vec
[i
].adjust(-r
.vec
[i
].get());
1774 void scale(double f
) {
1775 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1780 friend inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
);
1781 std::array
<DecayCounter
, NUM
> vec
;
1784 inline void encode(const dirfrag_load_vec_t
&c
, ceph::buffer::list
&bl
) {
1787 inline void decode(dirfrag_load_vec_t
& c
, ceph::buffer::list::const_iterator
&p
) {
1791 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
)
1793 CachedStackStringStream css
;
1794 *css
<< std::setprecision(1) << std::fixed
1796 " IRD:" << dl
.vec
[0]
1797 << " IWR:" << dl
.vec
[1]
1798 << " RDR:" << dl
.vec
[2]
1799 << " FET:" << dl
.vec
[3]
1800 << " STR:" << dl
.vec
[4]
1801 << " *LOAD:" << dl
.meta_load() << "]";
1802 return out
<< css
->strv() << std::endl
;
1806 using clock
= dirfrag_load_vec_t::clock
;
1807 using time
= dirfrag_load_vec_t::time
;
1809 dirfrag_load_vec_t auth
;
1810 dirfrag_load_vec_t all
;
1812 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
1813 mds_load_t(const DecayRate
&rate
) : auth(rate
), all(rate
) {}
1815 double req_rate
= 0.0;
1816 double cache_hit_rate
= 0.0;
1817 double queue_len
= 0.0;
1819 double cpu_load_avg
= 0.0;
1821 double mds_load() const; // defiend in MDBalancer.cc
1822 void encode(ceph::buffer::list
& bl
) const;
1823 void decode(ceph::buffer::list::const_iterator
& bl
);
1824 void dump(ceph::Formatter
*f
) const;
1825 static void generate_test_instances(std::list
<mds_load_t
*>& ls
);
1827 inline void encode(const mds_load_t
&c
, ceph::buffer::list
&bl
) {
1830 inline void decode(mds_load_t
&c
, ceph::buffer::list::const_iterator
&p
) {
1834 inline std::ostream
& operator<<(std::ostream
& out
, const mds_load_t
& load
)
1836 return out
<< "mdsload<" << load
.auth
<< "/" << load
.all
1837 << ", req " << load
.req_rate
1838 << ", hr " << load
.cache_hit_rate
1839 << ", qlen " << load
.queue_len
1840 << ", cpu " << load
.cpu_load_avg
1844 class load_spread_t
{
1846 using time
= DecayCounter::time
;
1847 using clock
= DecayCounter::clock
;
1848 static const int MAX
= 4;
1850 load_spread_t(const DecayRate
&rate
) : count(rate
)
1853 load_spread_t() = delete;
1855 double hit(int who
) {
1856 for (int i
=0; i
<n
; i
++)
1858 return count
.get_last();
1863 if (n
== 1) return 0.0;
1865 if (p
== MAX
) p
= 0;
1869 double get() const {
1873 std::array
<int, MAX
> last
= {-1, -1, -1, -1};
1878 // ================================================================
1879 typedef std::pair
<mds_rank_t
, mds_rank_t
> mds_authority_t
;
1881 // -- authority delegation --
1882 // directory authority types
1883 // >= 0 is the auth mds
1884 #define CDIR_AUTH_PARENT mds_rank_t(-1) // default
1885 #define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
1886 #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
1887 #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
1888 //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
1890 class MDSCacheObjectInfo
{
1892 void encode(ceph::buffer::list
& bl
) const;
1893 void decode(ceph::buffer::list::const_iterator
& bl
);
1894 void dump(ceph::Formatter
*f
) const;
1895 static void generate_test_instances(std::list
<MDSCacheObjectInfo
*>& ls
);
1903 inline std::ostream
& operator<<(std::ostream
& out
, const MDSCacheObjectInfo
&info
) {
1904 if (info
.ino
) return out
<< info
.ino
<< "." << info
.snapid
;
1905 if (info
.dname
.length()) return out
<< info
.dirfrag
<< "/" << info
.dname
1906 << " snap " << info
.snapid
;
1907 return out
<< info
.dirfrag
;
1910 inline bool operator==(const MDSCacheObjectInfo
& l
, const MDSCacheObjectInfo
& r
) {
1912 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
1914 return l
.dirfrag
== r
.dirfrag
&& l
.dname
== r
.dname
;
1916 WRITE_CLASS_ENCODER(MDSCacheObjectInfo
)
1918 // parse a map of keys/values.
1919 namespace qi
= boost::spirit::qi
;
1921 template <typename Iterator
>
1922 struct keys_and_values
1923 : qi::grammar
<Iterator
, std::map
<std::string
, std::string
>()>
1926 : keys_and_values::base_type(query
)
1928 query
= pair
>> *(qi::lit(' ') >> pair
);
1929 pair
= key
>> '=' >> value
;
1930 key
= qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
1931 value
= +qi::char_("a-zA-Z0-9-_.");
1933 qi::rule
<Iterator
, std::map
<std::string
, std::string
>()> query
;
1934 qi::rule
<Iterator
, std::pair
<std::string
, std::string
>()> pair
;
1935 qi::rule
<Iterator
, std::string()> key
, value
;