1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_MDSTYPES_H
4 #define CEPH_MDSTYPES_H
6 #include "include/int_types.h"
12 #include <string_view>
14 #include "common/config.h"
15 #include "common/Clock.h"
16 #include "common/DecayCounter.h"
17 #include "common/entity_name.h"
19 #include "include/Context.h"
20 #include "include/frag.h"
21 #include "include/xlist.h"
22 #include "include/interval_set.h"
23 #include "include/compact_map.h"
24 #include "include/compact_set.h"
25 #include "include/fs_types.h"
27 #include "inode_backtrace.h"
29 #include <boost/spirit/include/qi.hpp>
30 #include <boost/pool/pool.hpp>
31 #include "include/ceph_assert.h"
32 #include <boost/serialization/strong_typedef.hpp>
34 #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011"
36 #define MDS_PORT_CACHE 0x200
37 #define MDS_PORT_LOCKER 0x300
38 #define MDS_PORT_MIGRATOR 0x400
43 #define MDS_INO_ROOT 1
45 // No longer created but recognised in existing filesystems
46 // so that we don't try to fragment it.
47 #define MDS_INO_CEPH 2
49 #define MDS_INO_GLOBAL_SNAPREALM 3
51 #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
52 #define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
54 // Locations for journal data
55 #define MDS_INO_LOG_OFFSET (2*MAX_MDS)
56 #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
57 #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
58 #define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
60 #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
62 #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
63 #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
65 #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
66 #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
67 #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
68 #define MDS_INO_IS_BASE(i) ((i) == MDS_INO_ROOT || (i) == MDS_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
69 #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
70 #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
72 typedef int32_t mds_rank_t
;
73 constexpr mds_rank_t MDS_RANK_NONE
= -1;
75 BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t
)
76 extern const mds_gid_t MDS_GID_NONE
;
78 typedef int32_t fs_cluster_id_t
;
79 constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE
= -1;
80 // The namespace ID of the anonymous default filesystem from legacy systems
81 constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS
= 0;
85 mds_role_t(fs_cluster_id_t fscid_
, mds_rank_t rank_
)
86 : fscid(fscid_
), rank(rank_
)
90 bool operator<(mds_role_t
const &rhs
) const {
91 if (fscid
< rhs
.fscid
) {
93 } else if (fscid
== rhs
.fscid
) {
94 return rank
< rhs
.rank
;
100 bool is_none() const {
101 return (rank
== MDS_RANK_NONE
);
104 fs_cluster_id_t fscid
= FS_CLUSTER_ID_NONE
;
105 mds_rank_t rank
= MDS_RANK_NONE
;
107 inline std::ostream
& operator<<(std::ostream
& out
, const mds_role_t
& role
) {
108 return out
<< role
.fscid
<< ":" << role
.rank
;
112 inline string
gcap_string(int cap
)
115 if (cap
& CEPH_CAP_GSHARED
) s
+= "s";
116 if (cap
& CEPH_CAP_GEXCL
) s
+= "x";
117 if (cap
& CEPH_CAP_GCACHE
) s
+= "c";
118 if (cap
& CEPH_CAP_GRD
) s
+= "r";
119 if (cap
& CEPH_CAP_GWR
) s
+= "w";
120 if (cap
& CEPH_CAP_GBUFFER
) s
+= "b";
121 if (cap
& CEPH_CAP_GWREXTEND
) s
+= "a";
122 if (cap
& CEPH_CAP_GLAZYIO
) s
+= "l";
125 inline string
ccap_string(int cap
)
128 if (cap
& CEPH_CAP_PIN
) s
+= "p";
130 int a
= (cap
>> CEPH_CAP_SAUTH
) & 3;
131 if (a
) s
+= 'A' + gcap_string(a
);
133 a
= (cap
>> CEPH_CAP_SLINK
) & 3;
134 if (a
) s
+= 'L' + gcap_string(a
);
136 a
= (cap
>> CEPH_CAP_SXATTR
) & 3;
137 if (a
) s
+= 'X' + gcap_string(a
);
139 a
= cap
>> CEPH_CAP_SFILE
;
140 if (a
) s
+= 'F' + gcap_string(a
);
147 struct scatter_info_t
{
148 version_t version
= 0;
151 struct frag_info_t
: public scatter_info_t
{
152 int64_t size() const { return nfiles
+ nsubdirs
; }
155 *this = frag_info_t();
158 // *this += cur - acc;
159 void add_delta(const frag_info_t
&cur
, const frag_info_t
&acc
, bool *touched_mtime
=0, bool *touched_chattr
=0) {
160 if (cur
.mtime
> mtime
) {
163 *touched_mtime
= true;
165 if (cur
.change_attr
> change_attr
) {
166 change_attr
= cur
.change_attr
;
168 *touched_chattr
= true;
170 nfiles
+= cur
.nfiles
- acc
.nfiles
;
171 nsubdirs
+= cur
.nsubdirs
- acc
.nsubdirs
;
174 void add(const frag_info_t
& other
) {
175 if (other
.mtime
> mtime
)
177 if (other
.change_attr
> change_attr
)
178 change_attr
= other
.change_attr
;
179 nfiles
+= other
.nfiles
;
180 nsubdirs
+= other
.nsubdirs
;
183 bool same_sums(const frag_info_t
&o
) const {
184 return mtime
<= o
.mtime
&&
185 nfiles
== o
.nfiles
&&
186 nsubdirs
== o
.nsubdirs
;
189 void encode(bufferlist
&bl
) const;
190 void decode(bufferlist::const_iterator
& bl
);
191 void dump(Formatter
*f
) const;
192 static void generate_test_instances(std::list
<frag_info_t
*>& ls
);
196 uint64_t change_attr
= 0;
197 int64_t nfiles
= 0; // files
198 int64_t nsubdirs
= 0; // subdirs
200 WRITE_CLASS_ENCODER(frag_info_t
)
202 inline bool operator==(const frag_info_t
&l
, const frag_info_t
&r
) {
203 return memcmp(&l
, &r
, sizeof(l
)) == 0;
205 inline bool operator!=(const frag_info_t
&l
, const frag_info_t
&r
) {
209 std::ostream
& operator<<(std::ostream
&out
, const frag_info_t
&f
);
212 struct nest_info_t
: public scatter_info_t
{
213 int64_t rsize() const { return rfiles
+ rsubdirs
; }
216 *this = nest_info_t();
219 void sub(const nest_info_t
&other
) {
222 void add(const nest_info_t
&other
, int fac
=1) {
223 if (other
.rctime
> rctime
)
224 rctime
= other
.rctime
;
225 rbytes
+= fac
*other
.rbytes
;
226 rfiles
+= fac
*other
.rfiles
;
227 rsubdirs
+= fac
*other
.rsubdirs
;
228 rsnaps
+= fac
*other
.rsnaps
;
231 // *this += cur - acc;
232 void add_delta(const nest_info_t
&cur
, const nest_info_t
&acc
) {
233 if (cur
.rctime
> rctime
)
235 rbytes
+= cur
.rbytes
- acc
.rbytes
;
236 rfiles
+= cur
.rfiles
- acc
.rfiles
;
237 rsubdirs
+= cur
.rsubdirs
- acc
.rsubdirs
;
238 rsnaps
+= cur
.rsnaps
- acc
.rsnaps
;
241 bool same_sums(const nest_info_t
&o
) const {
242 return rctime
<= o
.rctime
&&
243 rbytes
== o
.rbytes
&&
244 rfiles
== o
.rfiles
&&
245 rsubdirs
== o
.rsubdirs
&&
249 void encode(bufferlist
&bl
) const;
250 void decode(bufferlist::const_iterator
& bl
);
251 void dump(Formatter
*f
) const;
252 static void generate_test_instances(std::list
<nest_info_t
*>& ls
);
254 // this frag + children
258 int64_t rsubdirs
= 0;
261 WRITE_CLASS_ENCODER(nest_info_t
)
263 inline bool operator==(const nest_info_t
&l
, const nest_info_t
&r
) {
264 return memcmp(&l
, &r
, sizeof(l
)) == 0;
266 inline bool operator!=(const nest_info_t
&l
, const nest_info_t
&r
) {
270 std::ostream
& operator<<(std::ostream
&out
, const nest_info_t
&n
);
274 vinodeno_t(inodeno_t i
, snapid_t s
) : ino(i
), snapid(s
) {}
276 void encode(bufferlist
& bl
) const {
281 void decode(bufferlist::const_iterator
& p
) {
290 WRITE_CLASS_ENCODER(vinodeno_t
)
292 inline bool operator==(const vinodeno_t
&l
, const vinodeno_t
&r
) {
293 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
295 inline bool operator!=(const vinodeno_t
&l
, const vinodeno_t
&r
) {
298 inline bool operator<(const vinodeno_t
&l
, const vinodeno_t
&r
) {
301 (l
.ino
== r
.ino
&& l
.snapid
< r
.snapid
);
306 void encode(bufferlist
& bl
) const {
307 ENCODE_START(1, 1, bl
);
308 encode(max_bytes
, bl
);
309 encode(max_files
, bl
);
312 void decode(bufferlist::const_iterator
& p
) {
313 DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p
);
314 decode(max_bytes
, p
);
315 decode(max_files
, p
);
319 void dump(Formatter
*f
) const;
320 static void generate_test_instances(std::list
<quota_info_t
*>& ls
);
322 bool is_valid() const {
323 return max_bytes
>=0 && max_files
>=0;
325 bool is_enable() const {
326 return max_bytes
|| max_files
;
329 int64_t max_bytes
= 0;
330 int64_t max_files
= 0;
332 WRITE_CLASS_ENCODER(quota_info_t
)
334 inline bool operator==(const quota_info_t
&l
, const quota_info_t
&r
) {
335 return memcmp(&l
, &r
, sizeof(l
)) == 0;
338 ostream
& operator<<(ostream
&out
, const quota_info_t
&n
);
341 template<> struct hash
<vinodeno_t
> {
342 size_t operator()(const vinodeno_t
&vino
) const {
345 return H(vino
.ino
) ^ I(vino
.snapid
);
350 inline std::ostream
& operator<<(std::ostream
&out
, const vinodeno_t
&vino
) {
352 if (vino
.snapid
== CEPH_NOSNAP
)
354 else if (vino
.snapid
)
355 out
<< '.' << vino
.snapid
;
359 struct client_writeable_range_t
{
360 struct byte_range_t
{
361 uint64_t first
= 0, last
= 0; // interval client can write to
364 void encode(bufferlist
&bl
) const;
365 void decode(bufferlist::const_iterator
& bl
);
366 void dump(Formatter
*f
) const;
367 static void generate_test_instances(std::list
<client_writeable_range_t
*>& ls
);
370 snapid_t follows
= 0; // aka "data+metadata flushed thru"
373 inline void decode(client_writeable_range_t::byte_range_t
& range
, bufferlist::const_iterator
& bl
) {
374 decode(range
.first
, bl
);
375 decode(range
.last
, bl
);
378 WRITE_CLASS_ENCODER(client_writeable_range_t
)
380 std::ostream
& operator<<(std::ostream
& out
, const client_writeable_range_t
& r
);
382 inline bool operator==(const client_writeable_range_t
& l
,
383 const client_writeable_range_t
& r
) {
384 return l
.range
.first
== r
.range
.first
&& l
.range
.last
== r
.range
.last
&&
385 l
.follows
== r
.follows
;
388 struct inline_data_t
{
391 inline_data_t(const inline_data_t
& o
) : version(o
.version
) {
395 inline_data_t
& operator=(const inline_data_t
& o
) {
407 bufferlist
& get_data() {
409 blp
.reset(new bufferlist
);
412 size_t length() const { return blp
? blp
->length() : 0; }
414 bool operator==(const inline_data_t
& o
) const {
415 return length() == o
.length() &&
417 (*const_cast<bufferlist
*>(blp
.get()) == *const_cast<bufferlist
*>(o
.blp
.get())));
419 bool operator!=(const inline_data_t
& o
) const {
420 return !(*this == o
);
422 void encode(bufferlist
&bl
) const;
423 void decode(bufferlist::const_iterator
& bl
);
425 version_t version
= 1;
428 std::unique_ptr
<bufferlist
> blp
;
430 WRITE_CLASS_ENCODER(inline_data_t
)
433 DAMAGE_STATS
, // statistics (dirstat, size, etc)
434 DAMAGE_RSTATS
, // recursive statistics (rstat, accounted_rstat)
435 DAMAGE_FRAGTREE
// fragtree -- repair by searching
437 typedef uint32_t damage_flags_t
;
439 template<template<typename
> class Allocator
= std::allocator
>
443 * Do not forget to add any new fields to the compare() function.
446 using client_range_map
= std::map
<client_t
,client_writeable_range_t
,std::less
<client_t
>,Allocator
<std::pair
<const client_t
,client_writeable_range_t
>>>;
454 bool is_symlink() const { return (mode
& S_IFMT
) == S_IFLNK
; }
455 bool is_dir() const { return (mode
& S_IFMT
) == S_IFDIR
; }
456 bool is_file() const { return (mode
& S_IFMT
) == S_IFREG
; }
458 bool is_truncating() const { return (truncate_pending
> 0); }
459 void truncate(uint64_t old_size
, uint64_t new_size
) {
460 ceph_assert(new_size
< old_size
);
461 if (old_size
> max_size_ever
)
462 max_size_ever
= old_size
;
463 truncate_from
= old_size
;
465 rstat
.rbytes
= new_size
;
466 truncate_size
= size
;
471 bool has_layout() const {
472 return layout
!= file_layout_t();
475 void clear_layout() {
476 layout
= file_layout_t();
479 uint64_t get_layout_size_increment() const {
480 return layout
.get_period();
483 bool is_dirty_rstat() const { return !(rstat
== accounted_rstat
); }
485 uint64_t get_max_size() const {
487 for (std::map
<client_t
,client_writeable_range_t
>::const_iterator p
= client_ranges
.begin();
488 p
!= client_ranges
.end();
490 if (p
->second
.range
.last
> max
)
491 max
= p
->second
.range
.last
;
494 void set_max_size(uint64_t new_max
) {
496 client_ranges
.clear();
498 for (std::map
<client_t
,client_writeable_range_t
>::iterator p
= client_ranges
.begin();
499 p
!= client_ranges
.end();
501 p
->second
.range
.last
= new_max
;
505 void trim_client_ranges(snapid_t last
) {
506 std::map
<client_t
, client_writeable_range_t
>::iterator p
= client_ranges
.begin();
507 while (p
!= client_ranges
.end()) {
508 if (p
->second
.follows
>= last
)
509 client_ranges
.erase(p
++);
515 bool is_backtrace_updated() const {
516 return backtrace_version
== version
;
518 void update_backtrace(version_t pv
=0) {
519 backtrace_version
= pv
? pv
: version
;
522 void add_old_pool(int64_t l
) {
523 backtrace_version
= version
;
527 void encode(bufferlist
&bl
, uint64_t features
) const;
528 void decode(bufferlist::const_iterator
& bl
);
529 void dump(Formatter
*f
) const;
530 static void generate_test_instances(std::list
<inode_t
*>& ls
);
532 * Compare this inode_t with another that represent *the same inode*
533 * at different points in time.
534 * @pre The inodes are the same ino
536 * @param other The inode_t to compare ourselves with
537 * @param divergent A bool pointer which will be set to true
538 * if the values are different in a way that can't be explained
539 * by one being a newer version than the other.
541 * @returns 1 if we are newer than the other, 0 if equal, -1 if older.
543 int compare(const inode_t
&other
, bool *divergent
) const;
547 uint32_t rdev
= 0; // if special file
549 // affected by any inode change...
550 utime_t ctime
; // inode change time
551 utime_t btime
; // birth time
553 // perm (namespace permissions)
561 // file (data access)
562 ceph_dir_layout dir_layout
= {}; // [dir only]
563 file_layout_t layout
;
564 compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t>> old_pools
;
565 uint64_t size
= 0; // on directory, # dentries
566 uint64_t max_size_ever
= 0; // max size the file has ever been
567 uint32_t truncate_seq
= 0;
568 uint64_t truncate_size
= 0, truncate_from
= 0;
569 uint32_t truncate_pending
= 0;
570 utime_t mtime
; // file data modify time.
571 utime_t atime
; // file data access time.
572 uint32_t time_warp_seq
= 0; // count of (potential) mtime/atime timewarps (i.e., utimes())
573 inline_data_t inline_data
; // FIXME check
576 uint64_t change_attr
= 0;
578 client_range_map client_ranges
; // client(s) can write to these ranges
580 // dirfrag, recursive accountin
581 frag_info_t dirstat
; // protected by my filelock
582 nest_info_t rstat
; // protected by my nestlock
583 nest_info_t accounted_rstat
; // protected by parent's nestlock
587 mds_rank_t export_pin
= MDS_RANK_NONE
;
589 double export_ephemeral_random_pin
= 0;
590 bool export_ephemeral_distributed_pin
= false;
593 version_t version
= 0; // auth only
594 version_t file_data_version
= 0; // auth only
595 version_t xattr_version
= 0;
597 utime_t last_scrub_stamp
; // start time of last complete scrub
598 version_t last_scrub_version
= 0;// (parent) start version of last complete scrub
600 version_t backtrace_version
= 0;
602 snapid_t oldest_snap
;
604 std::basic_string
<char,std::char_traits
<char>,Allocator
<char>> stray_prior_path
; //stores path before unlink
607 bool older_is_consistent(const inode_t
&other
) const;
610 // These methods may be moved back to mdstypes.cc when we have pmr
611 template<template<typename
> class Allocator
>
612 void inode_t
<Allocator
>::encode(bufferlist
&bl
, uint64_t features
) const
614 ENCODE_START(16, 6, bl
);
628 encode(anchored
, bl
);
631 encode(dir_layout
, bl
);
632 encode(layout
, bl
, features
);
634 encode(truncate_seq
, bl
);
635 encode(truncate_size
, bl
);
636 encode(truncate_from
, bl
);
637 encode(truncate_pending
, bl
);
640 encode(time_warp_seq
, bl
);
641 encode(client_ranges
, bl
);
645 encode(accounted_rstat
, bl
);
648 encode(file_data_version
, bl
);
649 encode(xattr_version
, bl
);
650 encode(backtrace_version
, bl
);
651 encode(old_pools
, bl
);
652 encode(max_size_ever
, bl
);
653 encode(inline_data
, bl
);
656 encode(stray_prior_path
, bl
);
658 encode(last_scrub_version
, bl
);
659 encode(last_scrub_stamp
, bl
);
662 encode(change_attr
, bl
);
664 encode(export_pin
, bl
);
666 encode(export_ephemeral_random_pin
, bl
);
667 encode(export_ephemeral_distributed_pin
, bl
);
672 template<template<typename
> class Allocator
>
673 void inode_t
<Allocator
>::decode(bufferlist::const_iterator
&p
)
675 DECODE_START_LEGACY_COMPAT_LEN(16, 6, 6, p
);
692 decode(dir_layout
, p
);
694 // FIPS zeroization audit 20191117: this memset is not security related.
695 memset(&dir_layout
, 0, sizeof(dir_layout
));
699 decode(truncate_seq
, p
);
700 decode(truncate_size
, p
);
701 decode(truncate_from
, p
);
703 decode(truncate_pending
, p
);
705 truncate_pending
= 0;
708 decode(time_warp_seq
, p
);
710 decode(client_ranges
, p
);
712 map
<client_t
, client_writeable_range_t::byte_range_t
> m
;
714 for (map
<client_t
, client_writeable_range_t::byte_range_t
>::iterator
715 q
= m
.begin(); q
!= m
.end(); ++q
)
716 client_ranges
[q
->first
].range
= q
->second
;
721 decode(accounted_rstat
, p
);
724 decode(file_data_version
, p
);
725 decode(xattr_version
, p
);
727 decode(backtrace_version
, p
);
729 decode(old_pools
, p
);
731 decode(max_size_ever
, p
);
733 decode(inline_data
, p
);
735 inline_data
.version
= CEPH_INLINE_NONE
;
738 backtrace_version
= 0; // force update backtrace
742 if (struct_v
>= 12) {
745 stray_prior_path
= std::string_view(tmp
);
748 if (struct_v
>= 13) {
749 decode(last_scrub_version
, p
);
750 decode(last_scrub_stamp
, p
);
752 if (struct_v
>= 14) {
754 decode(change_attr
, p
);
760 if (struct_v
>= 15) {
761 decode(export_pin
, p
);
763 export_pin
= MDS_RANK_NONE
;
766 if (struct_v
>= 16) {
767 decode(export_ephemeral_random_pin
, p
);
768 decode(export_ephemeral_distributed_pin
, p
);
770 export_ephemeral_random_pin
= 0;
771 export_ephemeral_distributed_pin
= false;
777 template<template<typename
> class Allocator
>
778 void inode_t
<Allocator
>::dump(Formatter
*f
) const
780 f
->dump_unsigned("ino", ino
);
781 f
->dump_unsigned("rdev", rdev
);
782 f
->dump_stream("ctime") << ctime
;
783 f
->dump_stream("btime") << btime
;
784 f
->dump_unsigned("mode", mode
);
785 f
->dump_unsigned("uid", uid
);
786 f
->dump_unsigned("gid", gid
);
787 f
->dump_unsigned("nlink", nlink
);
789 f
->open_object_section("dir_layout");
790 ::dump(dir_layout
, f
);
793 f
->dump_object("layout", layout
);
795 f
->open_array_section("old_pools");
796 for (const auto &p
: old_pools
) {
797 f
->dump_int("pool", p
);
801 f
->dump_unsigned("size", size
);
802 f
->dump_unsigned("truncate_seq", truncate_seq
);
803 f
->dump_unsigned("truncate_size", truncate_size
);
804 f
->dump_unsigned("truncate_from", truncate_from
);
805 f
->dump_unsigned("truncate_pending", truncate_pending
);
806 f
->dump_stream("mtime") << mtime
;
807 f
->dump_stream("atime") << atime
;
808 f
->dump_unsigned("time_warp_seq", time_warp_seq
);
809 f
->dump_unsigned("change_attr", change_attr
);
810 f
->dump_int("export_pin", export_pin
);
811 f
->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin
);
812 f
->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin
);
814 f
->open_array_section("client_ranges");
815 for (const auto &p
: client_ranges
) {
816 f
->open_object_section("client");
817 f
->dump_unsigned("client", p
.first
.v
);
823 f
->open_object_section("dirstat");
827 f
->open_object_section("rstat");
831 f
->open_object_section("accounted_rstat");
832 accounted_rstat
.dump(f
);
835 f
->dump_unsigned("version", version
);
836 f
->dump_unsigned("file_data_version", file_data_version
);
837 f
->dump_unsigned("xattr_version", xattr_version
);
838 f
->dump_unsigned("backtrace_version", backtrace_version
);
840 f
->dump_string("stray_prior_path", stray_prior_path
);
841 f
->dump_unsigned("max_size_ever", max_size_ever
);
843 f
->open_object_section("quota");
847 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
848 f
->dump_unsigned("last_scrub_version", last_scrub_version
);
851 template<template<typename
> class Allocator
>
852 void inode_t
<Allocator
>::generate_test_instances(std::list
<inode_t
*>& ls
)
854 ls
.push_back(new inode_t
<Allocator
>);
855 ls
.push_back(new inode_t
<Allocator
>);
860 template<template<typename
> class Allocator
>
861 int inode_t
<Allocator
>::compare(const inode_t
<Allocator
> &other
, bool *divergent
) const
863 ceph_assert(ino
== other
.ino
);
865 if (version
== other
.version
) {
866 if (rdev
!= other
.rdev
||
867 ctime
!= other
.ctime
||
868 btime
!= other
.btime
||
869 mode
!= other
.mode
||
872 nlink
!= other
.nlink
||
873 memcmp(&dir_layout
, &other
.dir_layout
, sizeof(dir_layout
)) ||
874 layout
!= other
.layout
||
875 old_pools
!= other
.old_pools
||
876 size
!= other
.size
||
877 max_size_ever
!= other
.max_size_ever
||
878 truncate_seq
!= other
.truncate_seq
||
879 truncate_size
!= other
.truncate_size
||
880 truncate_from
!= other
.truncate_from
||
881 truncate_pending
!= other
.truncate_pending
||
882 change_attr
!= other
.change_attr
||
883 mtime
!= other
.mtime
||
884 atime
!= other
.atime
||
885 time_warp_seq
!= other
.time_warp_seq
||
886 inline_data
!= other
.inline_data
||
887 client_ranges
!= other
.client_ranges
||
888 !(dirstat
== other
.dirstat
) ||
889 !(rstat
== other
.rstat
) ||
890 !(accounted_rstat
== other
.accounted_rstat
) ||
891 file_data_version
!= other
.file_data_version
||
892 xattr_version
!= other
.xattr_version
||
893 backtrace_version
!= other
.backtrace_version
) {
897 } else if (version
> other
.version
) {
898 *divergent
= !older_is_consistent(other
);
901 ceph_assert(version
< other
.version
);
902 *divergent
= !other
.older_is_consistent(*this);
907 template<template<typename
> class Allocator
>
908 bool inode_t
<Allocator
>::older_is_consistent(const inode_t
<Allocator
> &other
) const
910 if (max_size_ever
< other
.max_size_ever
||
911 truncate_seq
< other
.truncate_seq
||
912 time_warp_seq
< other
.time_warp_seq
||
913 inline_data
.version
< other
.inline_data
.version
||
914 dirstat
.version
< other
.dirstat
.version
||
915 rstat
.version
< other
.rstat
.version
||
916 accounted_rstat
.version
< other
.accounted_rstat
.version
||
917 file_data_version
< other
.file_data_version
||
918 xattr_version
< other
.xattr_version
||
919 backtrace_version
< other
.backtrace_version
) {
925 template<template<typename
> class Allocator
>
926 inline void encode(const inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
929 c
.encode(bl
, features
);
930 ENCODE_DUMP_POST(cl
);
932 template<template<typename
> class Allocator
>
933 inline void decode(inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
938 template<template<typename
> class Allocator
>
939 using alloc_string
= std::basic_string
<char,std::char_traits
<char>,Allocator
<char>>;
941 template<template<typename
> class Allocator
>
942 using xattr_map
= compact_map
<alloc_string
<Allocator
>, bufferptr
, std::less
<alloc_string
<Allocator
>>, Allocator
<std::pair
<const alloc_string
<Allocator
>, bufferptr
>>>; // FIXME bufferptr not in mempool
944 template<template<typename
> class Allocator
>
945 inline void decode_noshare(xattr_map
<Allocator
>& xattrs
, ceph::buffer::list::const_iterator
&p
)
950 alloc_string
<Allocator
> key
;
954 p
.copy_deep(len
, xattrs
[key
]);
958 template<template<typename
> class Allocator
= std::allocator
>
961 inode_t
<Allocator
> inode
;
962 xattr_map
<Allocator
> xattrs
;
964 void encode(bufferlist
&bl
, uint64_t features
) const;
965 void decode(bufferlist::const_iterator
& bl
);
966 void dump(Formatter
*f
) const;
967 static void generate_test_instances(std::list
<old_inode_t
*>& ls
);
970 // These methods may be moved back to mdstypes.cc when we have pmr
971 template<template<typename
> class Allocator
>
972 void old_inode_t
<Allocator
>::encode(bufferlist
& bl
, uint64_t features
) const
974 ENCODE_START(2, 2, bl
);
976 encode(inode
, bl
, features
);
981 template<template<typename
> class Allocator
>
982 void old_inode_t
<Allocator
>::decode(bufferlist::const_iterator
& bl
)
984 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
987 decode_noshare
<Allocator
>(xattrs
, bl
);
991 template<template<typename
> class Allocator
>
992 void old_inode_t
<Allocator
>::dump(Formatter
*f
) const
994 f
->dump_unsigned("first", first
);
996 f
->open_object_section("xattrs");
997 for (const auto &p
: xattrs
) {
998 std::string
v(p
.second
.c_str(), p
.second
.length());
999 f
->dump_string(p
.first
.c_str(), v
);
1004 template<template<typename
> class Allocator
>
1005 void old_inode_t
<Allocator
>::generate_test_instances(std::list
<old_inode_t
<Allocator
>*>& ls
)
1007 ls
.push_back(new old_inode_t
<Allocator
>);
1008 ls
.push_back(new old_inode_t
<Allocator
>);
1009 ls
.back()->first
= 2;
1010 std::list
<inode_t
<Allocator
>*> ils
;
1011 inode_t
<Allocator
>::generate_test_instances(ils
);
1012 ls
.back()->inode
= *ils
.back();
1013 ls
.back()->xattrs
["user.foo"] = buffer::copy("asdf", 4);
1014 ls
.back()->xattrs
["user.unprintable"] = buffer::copy("\000\001\002", 3);
1017 template<template<typename
> class Allocator
>
1018 inline void encode(const old_inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
1021 c
.encode(bl
, features
);
1022 ENCODE_DUMP_POST(cl
);
1024 template<template<typename
> class Allocator
>
1025 inline void decode(old_inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
1031 * like an inode, but for a dir frag
1034 void encode(bufferlist
&bl
) const;
1035 void decode(bufferlist::const_iterator
& bl
);
1036 void dump(Formatter
*f
) const;
1037 static void generate_test_instances(std::list
<fnode_t
*>& ls
);
1039 version_t version
= 0;
1040 snapid_t snap_purged_thru
; // the max_last_destroy snapid we've been purged thru
1041 frag_info_t fragstat
, accounted_fragstat
;
1042 nest_info_t rstat
, accounted_rstat
;
1043 damage_flags_t damage_flags
= 0;
1045 // we know we and all our descendants have been scrubbed since this version
1046 version_t recursive_scrub_version
= 0;
1047 utime_t recursive_scrub_stamp
;
1048 // version at which we last scrubbed our personal data structures
1049 version_t localized_scrub_version
= 0;
1050 utime_t localized_scrub_stamp
;
1052 WRITE_CLASS_ENCODER(fnode_t
)
1055 struct old_rstat_t
{
1056 void encode(bufferlist
& bl
) const;
1057 void decode(bufferlist::const_iterator
& p
);
1058 void dump(Formatter
*f
) const;
1059 static void generate_test_instances(std::list
<old_rstat_t
*>& ls
);
1062 nest_info_t rstat
, accounted_rstat
;
1064 WRITE_CLASS_ENCODER(old_rstat_t
)
1066 inline std::ostream
& operator<<(std::ostream
& out
, const old_rstat_t
& o
) {
1067 return out
<< "old_rstat(first " << o
.first
<< " " << o
.rstat
<< " " << o
.accounted_rstat
<< ")";
1070 class feature_bitset_t
{
1072 typedef uint64_t block_type
;
1073 static const size_t bits_per_block
= sizeof(block_type
) * 8;
1075 feature_bitset_t(const feature_bitset_t
& other
) : _vec(other
._vec
) {}
1076 feature_bitset_t(feature_bitset_t
&& other
) : _vec(std::move(other
._vec
)) {}
1077 feature_bitset_t(unsigned long value
= 0);
1078 feature_bitset_t(const vector
<size_t>& array
);
1079 feature_bitset_t
& operator=(const feature_bitset_t
& other
) {
1083 feature_bitset_t
& operator=(feature_bitset_t
&& other
) {
1084 _vec
= std::move(other
._vec
);
1087 feature_bitset_t
& operator-=(const feature_bitset_t
& other
);
1088 bool empty() const {
1089 //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty"
1090 for (auto& v
: _vec
) {
1096 bool test(size_t bit
) const {
1097 if (bit
>= bits_per_block
* _vec
.size())
1099 return _vec
[bit
/ bits_per_block
] & ((block_type
)1 << (bit
% bits_per_block
));
1104 void encode(bufferlist
& bl
) const;
1105 void decode(bufferlist::const_iterator
&p
);
1106 void dump(Formatter
*f
) const;
1107 void print(ostream
& out
) const;
1109 vector
<block_type
> _vec
;
1111 WRITE_CLASS_ENCODER(feature_bitset_t
)
1113 inline std::ostream
& operator<<(std::ostream
& out
, const feature_bitset_t
& s
) {
1118 struct metric_spec_t
{
1120 metric_spec_t(const metric_spec_t
& other
) :
1121 metric_flags(other
.metric_flags
) {}
1122 metric_spec_t(metric_spec_t
&& other
) :
1123 metric_flags(std::move(other
.metric_flags
)) {}
1124 metric_spec_t(const feature_bitset_t
& mf
) :
1126 metric_spec_t(feature_bitset_t
&& mf
) :
1127 metric_flags(std::move(mf
)) {}
1129 metric_spec_t
& operator=(const metric_spec_t
& other
) {
1130 metric_flags
= other
.metric_flags
;
1133 metric_spec_t
& operator=(metric_spec_t
&& other
) {
1134 metric_flags
= std::move(other
.metric_flags
);
1138 bool empty() const {
1139 return metric_flags
.empty();
1143 metric_flags
.clear();
1146 void encode(bufferlist
& bl
) const;
1147 void decode(bufferlist::const_iterator
& p
);
1148 void dump(Formatter
*f
) const;
1149 void print(ostream
& out
) const;
1151 // set of metrics that a client is capable of forwarding
1152 feature_bitset_t metric_flags
;
1154 WRITE_CLASS_ENCODER(metric_spec_t
)
1156 inline std::ostream
& operator<<(std::ostream
& out
, const metric_spec_t
& mst
) {
1164 struct client_metadata_t
{
1165 using kv_map_t
= std::map
<std::string
,std::string
>;
1166 using iterator
= kv_map_t::const_iterator
;
1168 client_metadata_t() {}
1169 client_metadata_t(const kv_map_t
& kv
, const feature_bitset_t
&f
, const metric_spec_t
&mst
) :
1173 client_metadata_t
& operator=(const client_metadata_t
& other
) {
1174 kv_map
= other
.kv_map
;
1175 features
= other
.features
;
1176 metric_spec
= other
.metric_spec
;
1180 bool empty() const { return kv_map
.empty() && features
.empty() && metric_spec
.empty(); }
1181 iterator
find(const std::string
& key
) const { return kv_map
.find(key
); }
1182 iterator
begin() const { return kv_map
.begin(); }
1183 iterator
end() const { return kv_map
.end(); }
1184 void erase(iterator it
) { kv_map
.erase(it
); }
1185 std::string
& operator[](const std::string
& key
) { return kv_map
[key
]; }
1186 void merge(const client_metadata_t
& other
) {
1187 kv_map
.insert(other
.kv_map
.begin(), other
.kv_map
.end());
1188 features
= other
.features
;
1189 metric_spec
= other
.metric_spec
;
1194 metric_spec
.clear();
1197 void encode(bufferlist
& bl
) const;
1198 void decode(bufferlist::const_iterator
& p
);
1199 void dump(Formatter
*f
) const;
1202 feature_bitset_t features
;
1203 metric_spec_t metric_spec
;
1205 WRITE_CLASS_ENCODER(client_metadata_t
)
1208 * session_info_t - durable part of a Session
1210 struct session_info_t
{
1211 client_t
get_client() const { return client_t(inst
.name
.num()); }
1212 bool has_feature(size_t bit
) const { return client_metadata
.features
.test(bit
); }
1213 const entity_name_t
& get_source() const { return inst
.name
; }
1216 prealloc_inos
.clear();
1218 completed_requests
.clear();
1219 completed_flushes
.clear();
1220 client_metadata
.clear();
1223 void encode(bufferlist
& bl
, uint64_t features
) const;
1224 void decode(bufferlist::const_iterator
& p
);
1225 void dump(Formatter
*f
) const;
1226 static void generate_test_instances(std::list
<session_info_t
*>& ls
);
1229 std::map
<ceph_tid_t
,inodeno_t
> completed_requests
;
1230 interval_set
<inodeno_t
> prealloc_inos
; // preallocated, ready to use.
1231 interval_set
<inodeno_t
> used_inos
; // journaling use
1232 client_metadata_t client_metadata
;
1233 std::set
<ceph_tid_t
> completed_flushes
;
1234 EntityName auth_name
;
1236 WRITE_CLASS_ENCODER_FEATURES(session_info_t
)
1239 struct dentry_key_t
{
1241 dentry_key_t(snapid_t s
, std::string_view n
, __u32 h
=0) :
1242 snapid(s
), name(n
), hash(h
) {}
1244 bool is_valid() { return name
.length() || snapid
; }
1246 // encode into something that can be decoded as a string.
1247 // name_ (head) or name_%x (!head)
1248 void encode(bufferlist
& bl
) const {
1254 void encode(string
& key
) const {
1256 if (snapid
!= CEPH_NOSNAP
) {
1257 uint64_t val(snapid
);
1258 snprintf(b
, sizeof(b
), "%" PRIx64
, val
);
1260 snprintf(b
, sizeof(b
), "%s", "head");
1263 oss
<< name
<< "_" << b
;
1266 static void decode_helper(bufferlist::const_iterator
& bl
, string
& nm
, snapid_t
& sn
) {
1269 decode_helper(key
, nm
, sn
);
1271 static void decode_helper(std::string_view key
, string
& nm
, snapid_t
& sn
) {
1272 size_t i
= key
.find_last_of('_');
1273 ceph_assert(i
!= string::npos
);
1274 if (key
.compare(i
+1, std::string_view::npos
, "head") == 0) {
1279 long long unsigned x
= 0;
1280 std::string
x_str(key
.substr(i
+1));
1281 sscanf(x_str
.c_str(), "%llx", &x
);
1284 nm
= key
.substr(0, i
);
1287 snapid_t snapid
= 0;
1288 std::string_view name
;
1292 inline std::ostream
& operator<<(std::ostream
& out
, const dentry_key_t
&k
)
1294 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1297 inline bool operator<(const dentry_key_t
& k1
, const dentry_key_t
& k2
)
1300 * order by hash, name, snap
1302 int c
= ceph_frag_value(k1
.hash
) - ceph_frag_value(k2
.hash
);
1305 c
= k1
.name
.compare(k2
.name
);
1308 return k1
.snapid
< k2
.snapid
;
1312 * string_snap_t is a simple (string, snapid_t) pair
1314 struct string_snap_t
{
1316 string_snap_t(std::string_view n
, snapid_t s
) : name(n
), snapid(s
) {}
1318 void encode(bufferlist
& bl
) const;
1319 void decode(bufferlist::const_iterator
& p
);
1320 void dump(Formatter
*f
) const;
1321 static void generate_test_instances(std::list
<string_snap_t
*>& ls
);
1326 WRITE_CLASS_ENCODER(string_snap_t
)
1328 inline bool operator<(const string_snap_t
& l
, const string_snap_t
& r
) {
1329 int c
= l
.name
.compare(r
.name
);
1330 return c
< 0 || (c
== 0 && l
.snapid
< r
.snapid
);
1333 inline std::ostream
& operator<<(std::ostream
& out
, const string_snap_t
&k
)
1335 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1339 * mds_table_pending_t
1341 * For mds's requesting any pending ops, child needs to encode the corresponding
1342 * pending mutation state in the table.
1344 struct mds_table_pending_t
{
1345 void encode(bufferlist
& bl
) const;
1346 void decode(bufferlist::const_iterator
& bl
);
1347 void dump(Formatter
*f
) const;
1348 static void generate_test_instances(std::list
<mds_table_pending_t
*>& ls
);
1354 WRITE_CLASS_ENCODER(mds_table_pending_t
)
1357 struct metareqid_t
{
1359 metareqid_t(entity_name_t n
, ceph_tid_t t
) : name(n
), tid(t
) {}
1360 void encode(bufferlist
& bl
) const {
1365 void decode(bufferlist::const_iterator
&p
) {
1374 WRITE_CLASS_ENCODER(metareqid_t
)
1376 inline std::ostream
& operator<<(std::ostream
& out
, const metareqid_t
& r
) {
1377 return out
<< r
.name
<< ":" << r
.tid
;
1380 inline bool operator==(const metareqid_t
& l
, const metareqid_t
& r
) {
1381 return (l
.name
== r
.name
) && (l
.tid
== r
.tid
);
1383 inline bool operator!=(const metareqid_t
& l
, const metareqid_t
& r
) {
1384 return (l
.name
!= r
.name
) || (l
.tid
!= r
.tid
);
1386 inline bool operator<(const metareqid_t
& l
, const metareqid_t
& r
) {
1387 return (l
.name
< r
.name
) ||
1388 (l
.name
== r
.name
&& l
.tid
< r
.tid
);
1390 inline bool operator<=(const metareqid_t
& l
, const metareqid_t
& r
) {
1391 return (l
.name
< r
.name
) ||
1392 (l
.name
== r
.name
&& l
.tid
<= r
.tid
);
1394 inline bool operator>(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
<= r
); }
1395 inline bool operator>=(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
< r
); }
1398 template<> struct hash
<metareqid_t
> {
1399 size_t operator()(const metareqid_t
&r
) const {
1401 return H(r
.name
.num()) ^ H(r
.name
.type()) ^ H(r
.tid
);
1406 // cap info for client reconnect
1407 struct cap_reconnect_t
{
1408 cap_reconnect_t() {}
1409 cap_reconnect_t(uint64_t cap_id
, inodeno_t pino
, std::string_view p
, int w
, int i
,
1410 inodeno_t sr
, snapid_t sf
, bufferlist
& lb
) :
1412 capinfo
.cap_id
= cap_id
;
1415 capinfo
.snaprealm
= sr
;
1416 capinfo
.pathbase
= pino
;
1417 capinfo
.flock_len
= 0;
1421 void encode(bufferlist
& bl
) const;
1422 void decode(bufferlist::const_iterator
& bl
);
1423 void encode_old(bufferlist
& bl
) const;
1424 void decode_old(bufferlist::const_iterator
& bl
);
1426 void dump(Formatter
*f
) const;
1427 static void generate_test_instances(std::list
<cap_reconnect_t
*>& ls
);
1430 mutable ceph_mds_cap_reconnect capinfo
= {};
1431 snapid_t snap_follows
= 0;
1434 WRITE_CLASS_ENCODER(cap_reconnect_t
)
1436 struct snaprealm_reconnect_t
{
1437 snaprealm_reconnect_t() {}
1438 snaprealm_reconnect_t(inodeno_t ino
, snapid_t seq
, inodeno_t parent
) {
1441 realm
.parent
= parent
;
1443 void encode(bufferlist
& bl
) const;
1444 void decode(bufferlist::const_iterator
& bl
);
1445 void encode_old(bufferlist
& bl
) const;
1446 void decode_old(bufferlist::const_iterator
& bl
);
1448 void dump(Formatter
*f
) const;
1449 static void generate_test_instances(std::list
<snaprealm_reconnect_t
*>& ls
);
1451 mutable ceph_mds_snaprealm_reconnect realm
= {};
1453 WRITE_CLASS_ENCODER(snaprealm_reconnect_t
)
1455 // compat for pre-FLOCK feature
1456 struct old_ceph_mds_cap_reconnect
{
1461 struct ceph_timespec old_mtime
, old_atime
;
1462 ceph_le64 snaprealm
;
1463 ceph_le64 pathbase
; /* base ino for our path to this ino */
1464 } __attribute__ ((packed
));
1465 WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect
)
1467 struct old_cap_reconnect_t
{
1468 const old_cap_reconnect_t
& operator=(const cap_reconnect_t
& n
) {
1470 capinfo
.cap_id
= n
.capinfo
.cap_id
;
1471 capinfo
.wanted
= n
.capinfo
.wanted
;
1472 capinfo
.issued
= n
.capinfo
.issued
;
1473 capinfo
.snaprealm
= n
.capinfo
.snaprealm
;
1474 capinfo
.pathbase
= n
.capinfo
.pathbase
;
1477 operator cap_reconnect_t() {
1480 n
.capinfo
.cap_id
= capinfo
.cap_id
;
1481 n
.capinfo
.wanted
= capinfo
.wanted
;
1482 n
.capinfo
.issued
= capinfo
.issued
;
1483 n
.capinfo
.snaprealm
= capinfo
.snaprealm
;
1484 n
.capinfo
.pathbase
= capinfo
.pathbase
;
1488 void encode(bufferlist
& bl
) const {
1491 encode(capinfo
, bl
);
1493 void decode(bufferlist::const_iterator
& bl
) {
1496 decode(capinfo
, bl
);
1500 old_ceph_mds_cap_reconnect capinfo
;
1502 WRITE_CLASS_ENCODER(old_cap_reconnect_t
)
1507 dirfrag_t(inodeno_t i
, frag_t f
) : ino(i
), frag(f
) { }
1509 void encode(bufferlist
& bl
) const {
1514 void decode(bufferlist::const_iterator
& bl
) {
1523 WRITE_CLASS_ENCODER(dirfrag_t
)
1525 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_t
&df
) {
1527 if (!df
.frag
.is_root()) out
<< "." << df
.frag
;
1530 inline bool operator<(dirfrag_t l
, dirfrag_t r
) {
1531 if (l
.ino
< r
.ino
) return true;
1532 if (l
.ino
== r
.ino
&& l
.frag
< r
.frag
) return true;
1535 inline bool operator==(dirfrag_t l
, dirfrag_t r
) {
1536 return l
.ino
== r
.ino
&& l
.frag
== r
.frag
;
1540 template<> struct hash
<dirfrag_t
> {
1541 size_t operator()(const dirfrag_t
&df
) const {
1542 static rjhash
<uint64_t> H
;
1543 static rjhash
<uint32_t> I
;
1544 return H(df
.ino
) ^ I(df
.frag
);
1549 // ================================================================
1550 #define META_POP_IRD 0
1551 #define META_POP_IWR 1
1552 #define META_POP_READDIR 2
1553 #define META_POP_FETCH 3
1554 #define META_POP_STORE 4
1557 class inode_load_vec_t
{
1559 using time
= DecayCounter::time
;
1560 using clock
= DecayCounter::clock
;
1561 static const size_t NUM
= 2;
1563 inode_load_vec_t() : vec
{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
1564 inode_load_vec_t(const DecayRate
&rate
) : vec
{DecayCounter(rate
), DecayCounter(rate
)} {}
1566 DecayCounter
&get(int t
) {
1570 for (auto &d
: vec
) {
1574 void encode(bufferlist
&bl
) const;
1575 void decode(bufferlist::const_iterator
& p
);
1576 void dump(Formatter
*f
) const;
1577 static void generate_test_instances(std::list
<inode_load_vec_t
*>& ls
);
1580 std::array
<DecayCounter
, NUM
> vec
;
1582 inline void encode(const inode_load_vec_t
&c
, bufferlist
&bl
) {
1585 inline void decode(inode_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1589 class dirfrag_load_vec_t
{
1591 using time
= DecayCounter::time
;
1592 using clock
= DecayCounter::clock
;
1593 static const size_t NUM
= 5;
1595 dirfrag_load_vec_t() :
1596 vec
{DecayCounter(DecayRate()),
1597 DecayCounter(DecayRate()),
1598 DecayCounter(DecayRate()),
1599 DecayCounter(DecayRate()),
1600 DecayCounter(DecayRate())
1603 dirfrag_load_vec_t(const DecayRate
&rate
) :
1604 vec
{DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
)}
1607 void encode(bufferlist
&bl
) const {
1608 ENCODE_START(2, 2, bl
);
1609 for (const auto &i
: vec
) {
1614 void decode(bufferlist::const_iterator
&p
) {
1615 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p
);
1616 for (auto &i
: vec
) {
1621 void dump(Formatter
*f
) const;
1622 void dump(Formatter
*f
, const DecayRate
& rate
) const;
1623 static void generate_test_instances(std::list
<dirfrag_load_vec_t
*>& ls
);
1625 const DecayCounter
&get(int t
) const {
1628 DecayCounter
&get(int t
) {
1631 void adjust(double d
) {
1632 for (auto &i
: vec
) {
1637 for (auto &i
: vec
) {
1641 double meta_load() const {
1643 1*vec
[META_POP_IRD
].get() +
1644 2*vec
[META_POP_IWR
].get() +
1645 1*vec
[META_POP_READDIR
].get() +
1646 2*vec
[META_POP_FETCH
].get() +
1647 4*vec
[META_POP_STORE
].get();
1650 void add(dirfrag_load_vec_t
& r
) {
1651 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1652 vec
[i
].adjust(r
.vec
[i
].get());
1654 void sub(dirfrag_load_vec_t
& r
) {
1655 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1656 vec
[i
].adjust(-r
.vec
[i
].get());
1658 void scale(double f
) {
1659 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1664 friend inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
);
1665 std::array
<DecayCounter
, NUM
> vec
;
1668 inline void encode(const dirfrag_load_vec_t
&c
, bufferlist
&bl
) {
1671 inline void decode(dirfrag_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1675 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
)
1677 std::ostringstream ss
;
1678 ss
<< std::setprecision(1) << std::fixed
1680 " IRD:" << dl
.vec
[0]
1681 << " IWR:" << dl
.vec
[1]
1682 << " RDR:" << dl
.vec
[2]
1683 << " FET:" << dl
.vec
[3]
1684 << " STR:" << dl
.vec
[4]
1685 << " *LOAD:" << dl
.meta_load() << "]";
1686 return out
<< ss
.str() << std::endl
;
1690 using clock
= dirfrag_load_vec_t::clock
;
1691 using time
= dirfrag_load_vec_t::time
;
1693 dirfrag_load_vec_t auth
;
1694 dirfrag_load_vec_t all
;
1696 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
1697 mds_load_t(const DecayRate
&rate
) : auth(rate
), all(rate
) {}
1699 double req_rate
= 0.0;
1700 double cache_hit_rate
= 0.0;
1701 double queue_len
= 0.0;
1703 double cpu_load_avg
= 0.0;
1705 double mds_load() const; // defiend in MDBalancer.cc
1706 void encode(bufferlist
& bl
) const;
1707 void decode(bufferlist::const_iterator
& bl
);
1708 void dump(Formatter
*f
) const;
1709 static void generate_test_instances(std::list
<mds_load_t
*>& ls
);
1711 inline void encode(const mds_load_t
&c
, bufferlist
&bl
) {
1714 inline void decode(mds_load_t
&c
, bufferlist::const_iterator
&p
) {
1718 inline std::ostream
& operator<<(std::ostream
& out
, const mds_load_t
& load
)
1720 return out
<< "mdsload<" << load
.auth
<< "/" << load
.all
1721 << ", req " << load
.req_rate
1722 << ", hr " << load
.cache_hit_rate
1723 << ", qlen " << load
.queue_len
1724 << ", cpu " << load
.cpu_load_avg
1728 class load_spread_t
{
1730 using time
= DecayCounter::time
;
1731 using clock
= DecayCounter::clock
;
1732 static const int MAX
= 4;
1734 load_spread_t(const DecayRate
&rate
) : count(rate
)
1737 load_spread_t() = delete;
1739 double hit(int who
) {
1740 for (int i
=0; i
<n
; i
++)
1742 return count
.get_last();
1747 if (n
== 1) return 0.0;
1749 if (p
== MAX
) p
= 0;
1753 double get() const {
1757 std::array
<int, MAX
> last
= {-1, -1, -1, -1};
1762 // ================================================================
1763 typedef std::pair
<mds_rank_t
, mds_rank_t
> mds_authority_t
;
1765 // -- authority delegation --
1766 // directory authority types
1767 // >= 0 is the auth mds
1768 #define CDIR_AUTH_PARENT mds_rank_t(-1) // default
1769 #define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
1770 #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
1771 #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
1772 //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
1774 class MDSCacheObjectInfo
{
1776 void encode(bufferlist
& bl
) const;
1777 void decode(bufferlist::const_iterator
& bl
);
1778 void dump(Formatter
*f
) const;
1779 static void generate_test_instances(std::list
<MDSCacheObjectInfo
*>& ls
);
1787 inline std::ostream
& operator<<(std::ostream
& out
, const MDSCacheObjectInfo
&info
) {
1788 if (info
.ino
) return out
<< info
.ino
<< "." << info
.snapid
;
1789 if (info
.dname
.length()) return out
<< info
.dirfrag
<< "/" << info
.dname
1790 << " snap " << info
.snapid
;
1791 return out
<< info
.dirfrag
;
1794 inline bool operator==(const MDSCacheObjectInfo
& l
, const MDSCacheObjectInfo
& r
) {
1796 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
1798 return l
.dirfrag
== r
.dirfrag
&& l
.dname
== r
.dname
;
1800 WRITE_CLASS_ENCODER(MDSCacheObjectInfo
)
1802 // parse a map of keys/values.
1803 namespace qi
= boost::spirit::qi
;
1805 template <typename Iterator
>
1806 struct keys_and_values
1807 : qi::grammar
<Iterator
, std::map
<string
, string
>()>
1810 : keys_and_values::base_type(query
)
1812 query
= pair
>> *(qi::lit(' ') >> pair
);
1813 pair
= key
>> '=' >> value
;
1814 key
= qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
1815 value
= +qi::char_("a-zA-Z0-9-_.");
1817 qi::rule
<Iterator
, std::map
<string
, string
>()> query
;
1818 qi::rule
<Iterator
, std::pair
<string
, string
>()> pair
;
1819 qi::rule
<Iterator
, string()> key
, value
;