1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_MDSTYPES_H
4 #define CEPH_MDSTYPES_H
6 #include "include/int_types.h"
12 #include <string_view>
14 #include "common/config.h"
15 #include "common/Clock.h"
16 #include "common/DecayCounter.h"
17 #include "common/entity_name.h"
19 #include "include/Context.h"
20 #include "include/frag.h"
21 #include "include/xlist.h"
22 #include "include/interval_set.h"
23 #include "include/compact_map.h"
24 #include "include/compact_set.h"
25 #include "include/fs_types.h"
27 #include "inode_backtrace.h"
29 #include <boost/spirit/include/qi.hpp>
30 #include <boost/pool/pool.hpp>
31 #include "include/ceph_assert.h"
32 #include <boost/serialization/strong_typedef.hpp>
34 #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011"
36 #define MDS_PORT_CACHE 0x200
37 #define MDS_PORT_LOCKER 0x300
38 #define MDS_PORT_MIGRATOR 0x400
43 #define MDS_INO_ROOT 1
45 // No longer created but recognised in existing filesystems
46 // so that we don't try to fragment it.
47 #define MDS_INO_CEPH 2
49 #define MDS_INO_GLOBAL_SNAPREALM 3
51 #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
52 #define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
54 // Locations for journal data
55 #define MDS_INO_LOG_OFFSET (2*MAX_MDS)
56 #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
57 #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
58 #define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
60 #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
62 #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
63 #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
65 #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
66 #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
67 #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
68 #define MDS_INO_IS_BASE(i) ((i) == MDS_INO_ROOT || (i) == MDS_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
69 #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
70 #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
72 #define MDS_TRAVERSE_FORWARD 1
73 #define MDS_TRAVERSE_DISCOVER 2 // skips permissions checks etc.
74 #define MDS_TRAVERSE_DISCOVERXLOCK 3 // succeeds on (foreign?) null, xlocked dentries.
77 typedef int32_t mds_rank_t
;
78 constexpr mds_rank_t MDS_RANK_NONE
= -1;
80 BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t
)
81 extern const mds_gid_t MDS_GID_NONE
;
83 typedef int32_t fs_cluster_id_t
;
84 constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE
= -1;
85 // The namespace ID of the anonymous default filesystem from legacy systems
86 constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS
= 0;
91 fs_cluster_id_t fscid
;
94 mds_role_t(fs_cluster_id_t fscid_
, mds_rank_t rank_
)
95 : fscid(fscid_
), rank(rank_
)
98 : fscid(FS_CLUSTER_ID_NONE
), rank(MDS_RANK_NONE
)
100 bool operator<(mds_role_t
const &rhs
) const
102 if (fscid
< rhs
.fscid
) {
104 } else if (fscid
== rhs
.fscid
) {
105 return rank
< rhs
.rank
;
113 return (rank
== MDS_RANK_NONE
);
116 std::ostream
& operator<<(std::ostream
&out
, const mds_role_t
&role
);
121 inline string
gcap_string(int cap
)
124 if (cap
& CEPH_CAP_GSHARED
) s
+= "s";
125 if (cap
& CEPH_CAP_GEXCL
) s
+= "x";
126 if (cap
& CEPH_CAP_GCACHE
) s
+= "c";
127 if (cap
& CEPH_CAP_GRD
) s
+= "r";
128 if (cap
& CEPH_CAP_GWR
) s
+= "w";
129 if (cap
& CEPH_CAP_GBUFFER
) s
+= "b";
130 if (cap
& CEPH_CAP_GWREXTEND
) s
+= "a";
131 if (cap
& CEPH_CAP_GLAZYIO
) s
+= "l";
134 inline string
ccap_string(int cap
)
137 if (cap
& CEPH_CAP_PIN
) s
+= "p";
139 int a
= (cap
>> CEPH_CAP_SAUTH
) & 3;
140 if (a
) s
+= 'A' + gcap_string(a
);
142 a
= (cap
>> CEPH_CAP_SLINK
) & 3;
143 if (a
) s
+= 'L' + gcap_string(a
);
145 a
= (cap
>> CEPH_CAP_SXATTR
) & 3;
146 if (a
) s
+= 'X' + gcap_string(a
);
148 a
= cap
>> CEPH_CAP_SFILE
;
149 if (a
) s
+= 'F' + gcap_string(a
);
157 struct scatter_info_t
{
158 version_t version
= 0;
163 struct frag_info_t
: public scatter_info_t
{
166 uint64_t change_attr
= 0;
167 int64_t nfiles
= 0; // files
168 int64_t nsubdirs
= 0; // subdirs
172 int64_t size() const { return nfiles
+ nsubdirs
; }
175 *this = frag_info_t();
178 // *this += cur - acc;
179 void add_delta(const frag_info_t
&cur
, const frag_info_t
&acc
, bool *touched_mtime
=0, bool *touched_chattr
=0) {
180 if (cur
.mtime
> mtime
) {
183 *touched_mtime
= true;
185 if (cur
.change_attr
> change_attr
) {
186 change_attr
= cur
.change_attr
;
188 *touched_chattr
= true;
190 nfiles
+= cur
.nfiles
- acc
.nfiles
;
191 nsubdirs
+= cur
.nsubdirs
- acc
.nsubdirs
;
194 void add(const frag_info_t
& other
) {
195 if (other
.mtime
> mtime
)
197 if (other
.change_attr
> change_attr
)
198 change_attr
= other
.change_attr
;
199 nfiles
+= other
.nfiles
;
200 nsubdirs
+= other
.nsubdirs
;
203 bool same_sums(const frag_info_t
&o
) const {
204 return mtime
<= o
.mtime
&&
205 nfiles
== o
.nfiles
&&
206 nsubdirs
== o
.nsubdirs
;
209 void encode(bufferlist
&bl
) const;
210 void decode(bufferlist::const_iterator
& bl
);
211 void dump(Formatter
*f
) const;
212 static void generate_test_instances(list
<frag_info_t
*>& ls
);
214 WRITE_CLASS_ENCODER(frag_info_t
)
216 inline bool operator==(const frag_info_t
&l
, const frag_info_t
&r
) {
217 return memcmp(&l
, &r
, sizeof(l
)) == 0;
219 inline bool operator!=(const frag_info_t
&l
, const frag_info_t
&r
) {
223 std::ostream
& operator<<(std::ostream
&out
, const frag_info_t
&f
);
226 struct nest_info_t
: public scatter_info_t
{
227 // this frag + children
231 int64_t rsubdirs
= 0;
232 int64_t rsize() const { return rfiles
+ rsubdirs
; }
239 *this = nest_info_t();
242 void sub(const nest_info_t
&other
) {
245 void add(const nest_info_t
&other
, int fac
=1) {
246 if (other
.rctime
> rctime
)
247 rctime
= other
.rctime
;
248 rbytes
+= fac
*other
.rbytes
;
249 rfiles
+= fac
*other
.rfiles
;
250 rsubdirs
+= fac
*other
.rsubdirs
;
251 rsnaps
+= fac
*other
.rsnaps
;
254 // *this += cur - acc;
255 void add_delta(const nest_info_t
&cur
, const nest_info_t
&acc
) {
256 if (cur
.rctime
> rctime
)
258 rbytes
+= cur
.rbytes
- acc
.rbytes
;
259 rfiles
+= cur
.rfiles
- acc
.rfiles
;
260 rsubdirs
+= cur
.rsubdirs
- acc
.rsubdirs
;
261 rsnaps
+= cur
.rsnaps
- acc
.rsnaps
;
264 bool same_sums(const nest_info_t
&o
) const {
265 return rctime
<= o
.rctime
&&
266 rbytes
== o
.rbytes
&&
267 rfiles
== o
.rfiles
&&
268 rsubdirs
== o
.rsubdirs
&&
272 void encode(bufferlist
&bl
) const;
273 void decode(bufferlist::const_iterator
& bl
);
274 void dump(Formatter
*f
) const;
275 static void generate_test_instances(list
<nest_info_t
*>& ls
);
277 WRITE_CLASS_ENCODER(nest_info_t
)
279 inline bool operator==(const nest_info_t
&l
, const nest_info_t
&r
) {
280 return memcmp(&l
, &r
, sizeof(l
)) == 0;
282 inline bool operator!=(const nest_info_t
&l
, const nest_info_t
&r
) {
286 std::ostream
& operator<<(std::ostream
&out
, const nest_info_t
&n
);
293 vinodeno_t(inodeno_t i
, snapid_t s
) : ino(i
), snapid(s
) {}
295 void encode(bufferlist
& bl
) const {
300 void decode(bufferlist::const_iterator
& p
) {
306 WRITE_CLASS_ENCODER(vinodeno_t
)
308 inline bool operator==(const vinodeno_t
&l
, const vinodeno_t
&r
) {
309 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
311 inline bool operator!=(const vinodeno_t
&l
, const vinodeno_t
&r
) {
314 inline bool operator<(const vinodeno_t
&l
, const vinodeno_t
&r
) {
317 (l
.ino
== r
.ino
&& l
.snapid
< r
.snapid
);
322 int64_t max_bytes
= 0;
323 int64_t max_files
= 0;
327 void encode(bufferlist
& bl
) const {
328 ENCODE_START(1, 1, bl
);
329 encode(max_bytes
, bl
);
330 encode(max_files
, bl
);
333 void decode(bufferlist::const_iterator
& p
) {
334 DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p
);
335 decode(max_bytes
, p
);
336 decode(max_files
, p
);
340 void dump(Formatter
*f
) const;
341 static void generate_test_instances(list
<quota_info_t
*>& ls
);
343 bool is_valid() const {
344 return max_bytes
>=0 && max_files
>=0;
346 bool is_enable() const {
347 return max_bytes
|| max_files
;
350 WRITE_CLASS_ENCODER(quota_info_t
)
352 inline bool operator==(const quota_info_t
&l
, const quota_info_t
&r
) {
353 return memcmp(&l
, &r
, sizeof(l
)) == 0;
356 ostream
& operator<<(ostream
&out
, const quota_info_t
&n
);
359 template<> struct hash
<vinodeno_t
> {
360 size_t operator()(const vinodeno_t
&vino
) const {
363 return H(vino
.ino
) ^ I(vino
.snapid
);
371 inline std::ostream
& operator<<(std::ostream
&out
, const vinodeno_t
&vino
) {
373 if (vino
.snapid
== CEPH_NOSNAP
)
375 else if (vino
.snapid
)
376 out
<< '.' << vino
.snapid
;
382 * client_writeable_range_t
384 struct client_writeable_range_t
{
385 struct byte_range_t
{
386 uint64_t first
= 0, last
= 0; // interval client can write to
391 snapid_t follows
= 0; // aka "data+metadata flushed thru"
393 client_writeable_range_t() {}
395 void encode(bufferlist
&bl
) const;
396 void decode(bufferlist::const_iterator
& bl
);
397 void dump(Formatter
*f
) const;
398 static void generate_test_instances(std::list
<client_writeable_range_t
*>& ls
);
401 inline void decode(client_writeable_range_t::byte_range_t
& range
, bufferlist::const_iterator
& bl
) {
402 decode(range
.first
, bl
);
403 decode(range
.last
, bl
);
406 WRITE_CLASS_ENCODER(client_writeable_range_t
)
408 std::ostream
& operator<<(std::ostream
& out
, const client_writeable_range_t
& r
);
410 inline bool operator==(const client_writeable_range_t
& l
,
411 const client_writeable_range_t
& r
) {
412 return l
.range
.first
== r
.range
.first
&& l
.range
.last
== r
.range
.last
&&
413 l
.follows
== r
.follows
;
416 struct inline_data_t
{
418 std::unique_ptr
<bufferlist
> blp
;
420 version_t version
= 1;
425 bufferlist
& get_data() {
427 blp
.reset(new bufferlist
);
430 size_t length() const { return blp
? blp
->length() : 0; }
433 inline_data_t(const inline_data_t
& o
) : version(o
.version
) {
437 inline_data_t
& operator=(const inline_data_t
& o
) {
445 bool operator==(const inline_data_t
& o
) const {
446 return length() == o
.length() &&
448 (*const_cast<bufferlist
*>(blp
.get()) == *const_cast<bufferlist
*>(o
.blp
.get())));
450 bool operator!=(const inline_data_t
& o
) const {
451 return !(*this == o
);
453 void encode(bufferlist
&bl
) const;
454 void decode(bufferlist::const_iterator
& bl
);
456 WRITE_CLASS_ENCODER(inline_data_t
)
459 DAMAGE_STATS
, // statistics (dirstat, size, etc)
460 DAMAGE_RSTATS
, // recursive statistics (rstat, accounted_rstat)
461 DAMAGE_FRAGTREE
// fragtree -- repair by searching
463 typedef uint32_t damage_flags_t
;
468 template<template<typename
> class Allocator
= std::allocator
>
472 * Do not forget to add any new fields to the compare() function.
477 uint32_t rdev
= 0; // if special file
479 // affected by any inode change...
480 utime_t ctime
; // inode change time
481 utime_t btime
; // birth time
483 // perm (namespace permissions)
491 // file (data access)
492 ceph_dir_layout dir_layout
; // [dir only]
493 file_layout_t layout
;
494 compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t>> old_pools
;
495 uint64_t size
= 0; // on directory, # dentries
496 uint64_t max_size_ever
= 0; // max size the file has ever been
497 uint32_t truncate_seq
= 0;
498 uint64_t truncate_size
= 0, truncate_from
= 0;
499 uint32_t truncate_pending
= 0;
500 utime_t mtime
; // file data modify time.
501 utime_t atime
; // file data access time.
502 uint32_t time_warp_seq
= 0; // count of (potential) mtime/atime timewarps (i.e., utimes())
503 inline_data_t inline_data
; // FIXME check
506 uint64_t change_attr
= 0;
508 using client_range_map
= std::map
<client_t
,client_writeable_range_t
,std::less
<client_t
>,Allocator
<std::pair
<const client_t
,client_writeable_range_t
>>>;
509 client_range_map client_ranges
; // client(s) can write to these ranges
511 // dirfrag, recursive accountin
512 frag_info_t dirstat
; // protected by my filelock
513 nest_info_t rstat
; // protected by my nestlock
514 nest_info_t accounted_rstat
; // protected by parent's nestlock
518 mds_rank_t export_pin
= MDS_RANK_NONE
;
521 version_t version
= 0; // auth only
522 version_t file_data_version
= 0; // auth only
523 version_t xattr_version
= 0;
525 utime_t last_scrub_stamp
; // start time of last complete scrub
526 version_t last_scrub_version
= 0;// (parent) start version of last complete scrub
528 version_t backtrace_version
= 0;
530 snapid_t oldest_snap
;
532 std::basic_string
<char,std::char_traits
<char>,Allocator
<char>> stray_prior_path
; //stores path before unlink
537 memset(&dir_layout
, 0, sizeof(dir_layout
));
541 bool is_symlink() const { return (mode
& S_IFMT
) == S_IFLNK
; }
542 bool is_dir() const { return (mode
& S_IFMT
) == S_IFDIR
; }
543 bool is_file() const { return (mode
& S_IFMT
) == S_IFREG
; }
545 bool is_truncating() const { return (truncate_pending
> 0); }
546 void truncate(uint64_t old_size
, uint64_t new_size
) {
547 ceph_assert(new_size
< old_size
);
548 if (old_size
> max_size_ever
)
549 max_size_ever
= old_size
;
550 truncate_from
= old_size
;
552 rstat
.rbytes
= new_size
;
553 truncate_size
= size
;
558 bool has_layout() const {
559 return layout
!= file_layout_t();
562 void clear_layout() {
563 layout
= file_layout_t();
566 uint64_t get_layout_size_increment() const {
567 return layout
.get_period();
570 bool is_dirty_rstat() const { return !(rstat
== accounted_rstat
); }
572 uint64_t get_max_size() const {
574 for (std::map
<client_t
,client_writeable_range_t
>::const_iterator p
= client_ranges
.begin();
575 p
!= client_ranges
.end();
577 if (p
->second
.range
.last
> max
)
578 max
= p
->second
.range
.last
;
581 void set_max_size(uint64_t new_max
) {
583 client_ranges
.clear();
585 for (std::map
<client_t
,client_writeable_range_t
>::iterator p
= client_ranges
.begin();
586 p
!= client_ranges
.end();
588 p
->second
.range
.last
= new_max
;
592 void trim_client_ranges(snapid_t last
) {
593 std::map
<client_t
, client_writeable_range_t
>::iterator p
= client_ranges
.begin();
594 while (p
!= client_ranges
.end()) {
595 if (p
->second
.follows
>= last
)
596 client_ranges
.erase(p
++);
602 bool is_backtrace_updated() const {
603 return backtrace_version
== version
;
605 void update_backtrace(version_t pv
=0) {
606 backtrace_version
= pv
? pv
: version
;
609 void add_old_pool(int64_t l
) {
610 backtrace_version
= version
;
614 void encode(bufferlist
&bl
, uint64_t features
) const;
615 void decode(bufferlist::const_iterator
& bl
);
616 void dump(Formatter
*f
) const;
617 static void generate_test_instances(std::list
<inode_t
*>& ls
);
619 * Compare this inode_t with another that represent *the same inode*
620 * at different points in time.
621 * @pre The inodes are the same ino
623 * @param other The inode_t to compare ourselves with
624 * @param divergent A bool pointer which will be set to true
625 * if the values are different in a way that can't be explained
626 * by one being a newer version than the other.
628 * @returns 1 if we are newer than the other, 0 if equal, -1 if older.
630 int compare(const inode_t
&other
, bool *divergent
) const;
632 bool older_is_consistent(const inode_t
&other
) const;
635 // These methods may be moved back to mdstypes.cc when we have pmr
636 template<template<typename
> class Allocator
>
637 void inode_t
<Allocator
>::encode(bufferlist
&bl
, uint64_t features
) const
639 ENCODE_START(15, 6, bl
);
653 encode(anchored
, bl
);
656 encode(dir_layout
, bl
);
657 encode(layout
, bl
, features
);
659 encode(truncate_seq
, bl
);
660 encode(truncate_size
, bl
);
661 encode(truncate_from
, bl
);
662 encode(truncate_pending
, bl
);
665 encode(time_warp_seq
, bl
);
666 encode(client_ranges
, bl
);
670 encode(accounted_rstat
, bl
);
673 encode(file_data_version
, bl
);
674 encode(xattr_version
, bl
);
675 encode(backtrace_version
, bl
);
676 encode(old_pools
, bl
);
677 encode(max_size_ever
, bl
);
678 encode(inline_data
, bl
);
681 encode(stray_prior_path
, bl
);
683 encode(last_scrub_version
, bl
);
684 encode(last_scrub_stamp
, bl
);
687 encode(change_attr
, bl
);
689 encode(export_pin
, bl
);
694 template<template<typename
> class Allocator
>
695 void inode_t
<Allocator
>::decode(bufferlist::const_iterator
&p
)
697 DECODE_START_LEGACY_COMPAT_LEN(15, 6, 6, p
);
714 decode(dir_layout
, p
);
716 memset(&dir_layout
, 0, sizeof(dir_layout
));
719 decode(truncate_seq
, p
);
720 decode(truncate_size
, p
);
721 decode(truncate_from
, p
);
723 decode(truncate_pending
, p
);
725 truncate_pending
= 0;
728 decode(time_warp_seq
, p
);
730 decode(client_ranges
, p
);
732 map
<client_t
, client_writeable_range_t::byte_range_t
> m
;
734 for (map
<client_t
, client_writeable_range_t::byte_range_t
>::iterator
735 q
= m
.begin(); q
!= m
.end(); ++q
)
736 client_ranges
[q
->first
].range
= q
->second
;
741 decode(accounted_rstat
, p
);
744 decode(file_data_version
, p
);
745 decode(xattr_version
, p
);
747 decode(backtrace_version
, p
);
749 decode(old_pools
, p
);
751 decode(max_size_ever
, p
);
753 decode(inline_data
, p
);
755 inline_data
.version
= CEPH_INLINE_NONE
;
758 backtrace_version
= 0; // force update backtrace
762 if (struct_v
>= 12) {
765 stray_prior_path
= std::string_view(tmp
);
768 if (struct_v
>= 13) {
769 decode(last_scrub_version
, p
);
770 decode(last_scrub_stamp
, p
);
772 if (struct_v
>= 14) {
774 decode(change_attr
, p
);
780 if (struct_v
>= 15) {
781 decode(export_pin
, p
);
783 export_pin
= MDS_RANK_NONE
;
789 template<template<typename
> class Allocator
>
790 void inode_t
<Allocator
>::dump(Formatter
*f
) const
792 f
->dump_unsigned("ino", ino
);
793 f
->dump_unsigned("rdev", rdev
);
794 f
->dump_stream("ctime") << ctime
;
795 f
->dump_stream("btime") << btime
;
796 f
->dump_unsigned("mode", mode
);
797 f
->dump_unsigned("uid", uid
);
798 f
->dump_unsigned("gid", gid
);
799 f
->dump_unsigned("nlink", nlink
);
801 f
->open_object_section("dir_layout");
802 ::dump(dir_layout
, f
);
805 f
->dump_object("layout", layout
);
807 f
->open_array_section("old_pools");
808 for (const auto &p
: old_pools
) {
809 f
->dump_int("pool", p
);
813 f
->dump_unsigned("size", size
);
814 f
->dump_unsigned("truncate_seq", truncate_seq
);
815 f
->dump_unsigned("truncate_size", truncate_size
);
816 f
->dump_unsigned("truncate_from", truncate_from
);
817 f
->dump_unsigned("truncate_pending", truncate_pending
);
818 f
->dump_stream("mtime") << mtime
;
819 f
->dump_stream("atime") << atime
;
820 f
->dump_unsigned("time_warp_seq", time_warp_seq
);
821 f
->dump_unsigned("change_attr", change_attr
);
822 f
->dump_int("export_pin", export_pin
);
824 f
->open_array_section("client_ranges");
825 for (const auto &p
: client_ranges
) {
826 f
->open_object_section("client");
827 f
->dump_unsigned("client", p
.first
.v
);
833 f
->open_object_section("dirstat");
837 f
->open_object_section("rstat");
841 f
->open_object_section("accounted_rstat");
842 accounted_rstat
.dump(f
);
845 f
->dump_unsigned("version", version
);
846 f
->dump_unsigned("file_data_version", file_data_version
);
847 f
->dump_unsigned("xattr_version", xattr_version
);
848 f
->dump_unsigned("backtrace_version", backtrace_version
);
850 f
->dump_string("stray_prior_path", stray_prior_path
);
853 template<template<typename
> class Allocator
>
854 void inode_t
<Allocator
>::generate_test_instances(list
<inode_t
*>& ls
)
856 ls
.push_back(new inode_t
<Allocator
>);
857 ls
.push_back(new inode_t
<Allocator
>);
862 template<template<typename
> class Allocator
>
863 int inode_t
<Allocator
>::compare(const inode_t
<Allocator
> &other
, bool *divergent
) const
865 ceph_assert(ino
== other
.ino
);
867 if (version
== other
.version
) {
868 if (rdev
!= other
.rdev
||
869 ctime
!= other
.ctime
||
870 btime
!= other
.btime
||
871 mode
!= other
.mode
||
874 nlink
!= other
.nlink
||
875 memcmp(&dir_layout
, &other
.dir_layout
, sizeof(dir_layout
)) ||
876 layout
!= other
.layout
||
877 old_pools
!= other
.old_pools
||
878 size
!= other
.size
||
879 max_size_ever
!= other
.max_size_ever
||
880 truncate_seq
!= other
.truncate_seq
||
881 truncate_size
!= other
.truncate_size
||
882 truncate_from
!= other
.truncate_from
||
883 truncate_pending
!= other
.truncate_pending
||
884 change_attr
!= other
.change_attr
||
885 mtime
!= other
.mtime
||
886 atime
!= other
.atime
||
887 time_warp_seq
!= other
.time_warp_seq
||
888 inline_data
!= other
.inline_data
||
889 client_ranges
!= other
.client_ranges
||
890 !(dirstat
== other
.dirstat
) ||
891 !(rstat
== other
.rstat
) ||
892 !(accounted_rstat
== other
.accounted_rstat
) ||
893 file_data_version
!= other
.file_data_version
||
894 xattr_version
!= other
.xattr_version
||
895 backtrace_version
!= other
.backtrace_version
) {
899 } else if (version
> other
.version
) {
900 *divergent
= !older_is_consistent(other
);
903 ceph_assert(version
< other
.version
);
904 *divergent
= !other
.older_is_consistent(*this);
909 template<template<typename
> class Allocator
>
910 bool inode_t
<Allocator
>::older_is_consistent(const inode_t
<Allocator
> &other
) const
912 if (max_size_ever
< other
.max_size_ever
||
913 truncate_seq
< other
.truncate_seq
||
914 time_warp_seq
< other
.time_warp_seq
||
915 inline_data
.version
< other
.inline_data
.version
||
916 dirstat
.version
< other
.dirstat
.version
||
917 rstat
.version
< other
.rstat
.version
||
918 accounted_rstat
.version
< other
.accounted_rstat
.version
||
919 file_data_version
< other
.file_data_version
||
920 xattr_version
< other
.xattr_version
||
921 backtrace_version
< other
.backtrace_version
) {
927 template<template<typename
> class Allocator
>
928 inline void encode(const inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
931 c
.encode(bl
, features
);
932 ENCODE_DUMP_POST(cl
);
934 template<template<typename
> class Allocator
>
935 inline void decode(inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
940 template<template<typename
> class Allocator
>
941 using alloc_string
= std::basic_string
<char,std::char_traits
<char>,Allocator
<char>>;
943 template<template<typename
> class Allocator
>
944 using xattr_map
= compact_map
<alloc_string
<Allocator
>, bufferptr
, std::less
<alloc_string
<Allocator
>>, Allocator
<std::pair
<const alloc_string
<Allocator
>, bufferptr
>>>; // FIXME bufferptr not in mempool
949 template<template<typename
> class Allocator
= std::allocator
>
952 inode_t
<Allocator
> inode
;
953 xattr_map
<Allocator
> xattrs
;
955 void encode(bufferlist
&bl
, uint64_t features
) const;
956 void decode(bufferlist::const_iterator
& bl
);
957 void dump(Formatter
*f
) const;
958 static void generate_test_instances(std::list
<old_inode_t
*>& ls
);
961 // These methods may be moved back to mdstypes.cc when we have pmr
962 template<template<typename
> class Allocator
>
963 void old_inode_t
<Allocator
>::encode(bufferlist
& bl
, uint64_t features
) const
965 ENCODE_START(2, 2, bl
);
967 encode(inode
, bl
, features
);
972 template<template<typename
> class Allocator
>
973 void old_inode_t
<Allocator
>::decode(bufferlist::const_iterator
& bl
)
975 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
982 template<template<typename
> class Allocator
>
983 void old_inode_t
<Allocator
>::dump(Formatter
*f
) const
985 f
->dump_unsigned("first", first
);
987 f
->open_object_section("xattrs");
988 for (const auto &p
: xattrs
) {
989 std::string
v(p
.second
.c_str(), p
.second
.length());
990 f
->dump_string(p
.first
.c_str(), v
);
995 template<template<typename
> class Allocator
>
996 void old_inode_t
<Allocator
>::generate_test_instances(std::list
<old_inode_t
<Allocator
>*>& ls
)
998 ls
.push_back(new old_inode_t
<Allocator
>);
999 ls
.push_back(new old_inode_t
<Allocator
>);
1000 ls
.back()->first
= 2;
1001 std::list
<inode_t
<Allocator
>*> ils
;
1002 inode_t
<Allocator
>::generate_test_instances(ils
);
1003 ls
.back()->inode
= *ils
.back();
1004 ls
.back()->xattrs
["user.foo"] = buffer::copy("asdf", 4);
1005 ls
.back()->xattrs
["user.unprintable"] = buffer::copy("\000\001\002", 3);
1008 template<template<typename
> class Allocator
>
1009 inline void encode(const old_inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
1012 c
.encode(bl
, features
);
1013 ENCODE_DUMP_POST(cl
);
1015 template<template<typename
> class Allocator
>
1016 inline void decode(old_inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
1023 * like an inode, but for a dir frag
1026 version_t version
= 0;
1027 snapid_t snap_purged_thru
; // the max_last_destroy snapid we've been purged thru
1028 frag_info_t fragstat
, accounted_fragstat
;
1029 nest_info_t rstat
, accounted_rstat
;
1030 damage_flags_t damage_flags
= 0;
1032 // we know we and all our descendants have been scrubbed since this version
1033 version_t recursive_scrub_version
= 0;
1034 utime_t recursive_scrub_stamp
;
1035 // version at which we last scrubbed our personal data structures
1036 version_t localized_scrub_version
= 0;
1037 utime_t localized_scrub_stamp
;
1039 void encode(bufferlist
&bl
) const;
1040 void decode(bufferlist::const_iterator
& bl
);
1041 void dump(Formatter
*f
) const;
1042 static void generate_test_instances(list
<fnode_t
*>& ls
);
1045 WRITE_CLASS_ENCODER(fnode_t
)
1048 struct old_rstat_t
{
1050 nest_info_t rstat
, accounted_rstat
;
1052 void encode(bufferlist
& bl
) const;
1053 void decode(bufferlist::const_iterator
& p
);
1054 void dump(Formatter
*f
) const;
1055 static void generate_test_instances(list
<old_rstat_t
*>& ls
);
1057 WRITE_CLASS_ENCODER(old_rstat_t
)
1059 inline std::ostream
& operator<<(std::ostream
& out
, const old_rstat_t
& o
) {
1060 return out
<< "old_rstat(first " << o
.first
<< " " << o
.rstat
<< " " << o
.accounted_rstat
<< ")";
1066 class feature_bitset_t
{
1068 typedef uint64_t block_type
;
1069 static const size_t bits_per_block
= sizeof(block_type
) * 8;
1071 feature_bitset_t(const feature_bitset_t
& other
) : _vec(other
._vec
) {}
1072 feature_bitset_t(feature_bitset_t
&& other
) : _vec(std::move(other
._vec
)) {}
1073 feature_bitset_t(unsigned long value
= 0);
1074 feature_bitset_t(const vector
<size_t>& array
);
1075 feature_bitset_t
& operator=(const feature_bitset_t
& other
) {
1079 feature_bitset_t
& operator=(feature_bitset_t
&& other
) {
1080 _vec
= std::move(other
._vec
);
1083 bool empty() const {
1084 for (auto& v
: _vec
) {
1090 bool test(size_t bit
) const {
1091 if (bit
>= bits_per_block
* _vec
.size())
1093 return _vec
[bit
/ bits_per_block
] & ((block_type
)1 << (bit
% bits_per_block
));
1098 feature_bitset_t
& operator-=(const feature_bitset_t
& other
);
1099 void encode(bufferlist
& bl
) const;
1100 void decode(bufferlist::const_iterator
&p
);
1101 void print(ostream
& out
) const;
1103 vector
<block_type
> _vec
;
1105 WRITE_CLASS_ENCODER(feature_bitset_t
)
1107 inline std::ostream
& operator<<(std::ostream
& out
, const feature_bitset_t
& s
) {
1115 struct client_metadata_t
{
1116 using kv_map_t
= std::map
<std::string
,std::string
>;
1117 using iterator
= kv_map_t::const_iterator
;
1120 feature_bitset_t features
;
1122 client_metadata_t() {}
1123 client_metadata_t(const client_metadata_t
& other
) :
1124 kv_map(other
.kv_map
), features(other
.features
) {}
1125 client_metadata_t(client_metadata_t
&& other
) :
1126 kv_map(std::move(other
.kv_map
)), features(std::move(other
.features
)) {}
1127 client_metadata_t(kv_map_t
&& kv
, feature_bitset_t
&&f
) :
1128 kv_map(std::move(kv
)), features(std::move(f
)) {}
1129 client_metadata_t(const kv_map_t
& kv
, const feature_bitset_t
&f
) :
1130 kv_map(kv
), features(f
) {}
1131 client_metadata_t
& operator=(const client_metadata_t
& other
) {
1132 kv_map
= other
.kv_map
;
1133 features
= other
.features
;
1137 bool empty() const { return kv_map
.empty() && features
.empty(); }
1138 iterator
find(const std::string
& key
) const { return kv_map
.find(key
); }
1139 iterator
begin() const { return kv_map
.begin(); }
1140 iterator
end() const { return kv_map
.end(); }
1141 std::string
& operator[](const std::string
& key
) { return kv_map
[key
]; }
1142 void merge(const client_metadata_t
& other
) {
1143 kv_map
.insert(other
.kv_map
.begin(), other
.kv_map
.end());
1144 features
= other
.features
;
1151 void encode(bufferlist
& bl
) const;
1152 void decode(bufferlist::const_iterator
& p
);
1153 void dump(Formatter
*f
) const;
1155 WRITE_CLASS_ENCODER(client_metadata_t
)
1160 struct session_info_t
{
1162 std::map
<ceph_tid_t
,inodeno_t
> completed_requests
;
1163 interval_set
<inodeno_t
> prealloc_inos
; // preallocated, ready to use.
1164 interval_set
<inodeno_t
> used_inos
; // journaling use
1165 client_metadata_t client_metadata
;
1166 std::set
<ceph_tid_t
> completed_flushes
;
1167 EntityName auth_name
;
1169 client_t
get_client() const { return client_t(inst
.name
.num()); }
1170 bool has_feature(size_t bit
) const { return client_metadata
.features
.test(bit
); }
1171 const entity_name_t
& get_source() const { return inst
.name
; }
1174 prealloc_inos
.clear();
1176 completed_requests
.clear();
1177 completed_flushes
.clear();
1178 client_metadata
.clear();
1181 void encode(bufferlist
& bl
, uint64_t features
) const;
1182 void decode(bufferlist::const_iterator
& p
);
1183 void dump(Formatter
*f
) const;
1184 static void generate_test_instances(list
<session_info_t
*>& ls
);
1186 WRITE_CLASS_ENCODER_FEATURES(session_info_t
)
1192 struct dentry_key_t
{
1193 snapid_t snapid
= 0;
1194 std::string_view name
;
1197 dentry_key_t(snapid_t s
, std::string_view n
, __u32 h
=0) :
1198 snapid(s
), name(n
), hash(h
) {}
1200 bool is_valid() { return name
.length() || snapid
; }
1202 // encode into something that can be decoded as a string.
1203 // name_ (head) or name_%x (!head)
1204 void encode(bufferlist
& bl
) const {
1210 void encode(string
& key
) const {
1212 if (snapid
!= CEPH_NOSNAP
) {
1213 uint64_t val(snapid
);
1214 snprintf(b
, sizeof(b
), "%" PRIx64
, val
);
1216 snprintf(b
, sizeof(b
), "%s", "head");
1219 oss
<< name
<< "_" << b
;
1222 static void decode_helper(bufferlist::const_iterator
& bl
, string
& nm
, snapid_t
& sn
) {
1225 decode_helper(key
, nm
, sn
);
1227 static void decode_helper(std::string_view key
, string
& nm
, snapid_t
& sn
) {
1228 size_t i
= key
.find_last_of('_');
1229 ceph_assert(i
!= string::npos
);
1230 if (key
.compare(i
+1, std::string_view::npos
, "head") == 0) {
1235 long long unsigned x
= 0;
1236 std::string
x_str(key
.substr(i
+1));
1237 sscanf(x_str
.c_str(), "%llx", &x
);
1240 nm
= key
.substr(0, i
);
1244 inline std::ostream
& operator<<(std::ostream
& out
, const dentry_key_t
&k
)
1246 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1249 inline bool operator<(const dentry_key_t
& k1
, const dentry_key_t
& k2
)
1252 * order by hash, name, snap
1254 int c
= ceph_frag_value(k1
.hash
) - ceph_frag_value(k2
.hash
);
1257 c
= k1
.name
.compare(k2
.name
);
1260 return k1
.snapid
< k2
.snapid
;
1265 * string_snap_t is a simple (string, snapid_t) pair
1267 struct string_snap_t
{
1271 string_snap_t(std::string_view n
, snapid_t s
) : name(n
), snapid(s
) {}
1273 void encode(bufferlist
& bl
) const;
1274 void decode(bufferlist::const_iterator
& p
);
1275 void dump(Formatter
*f
) const;
1276 static void generate_test_instances(list
<string_snap_t
*>& ls
);
1278 WRITE_CLASS_ENCODER(string_snap_t
)
1280 inline bool operator<(const string_snap_t
& l
, const string_snap_t
& r
) {
1281 int c
= l
.name
.compare(r
.name
);
1282 return c
< 0 || (c
== 0 && l
.snapid
< r
.snapid
);
1285 inline std::ostream
& operator<<(std::ostream
& out
, const string_snap_t
&k
)
1287 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1291 * mds_table_pending_t
1293 * mds's requesting any pending ops. child needs to encode the corresponding
1294 * pending mutation state in the table.
1296 struct mds_table_pending_t
{
1300 mds_table_pending_t() {}
1301 void encode(bufferlist
& bl
) const;
1302 void decode(bufferlist::const_iterator
& bl
);
1303 void dump(Formatter
*f
) const;
1304 static void generate_test_instances(list
<mds_table_pending_t
*>& ls
);
1306 WRITE_CLASS_ENCODER(mds_table_pending_t
)
1312 struct metareqid_t
{
1316 metareqid_t(entity_name_t n
, ceph_tid_t t
) : name(n
), tid(t
) {}
1317 void encode(bufferlist
& bl
) const {
1322 void decode(bufferlist::const_iterator
&p
) {
1328 WRITE_CLASS_ENCODER(metareqid_t
)
1330 inline std::ostream
& operator<<(std::ostream
& out
, const metareqid_t
& r
) {
1331 return out
<< r
.name
<< ":" << r
.tid
;
1334 inline bool operator==(const metareqid_t
& l
, const metareqid_t
& r
) {
1335 return (l
.name
== r
.name
) && (l
.tid
== r
.tid
);
1337 inline bool operator!=(const metareqid_t
& l
, const metareqid_t
& r
) {
1338 return (l
.name
!= r
.name
) || (l
.tid
!= r
.tid
);
1340 inline bool operator<(const metareqid_t
& l
, const metareqid_t
& r
) {
1341 return (l
.name
< r
.name
) ||
1342 (l
.name
== r
.name
&& l
.tid
< r
.tid
);
1344 inline bool operator<=(const metareqid_t
& l
, const metareqid_t
& r
) {
1345 return (l
.name
< r
.name
) ||
1346 (l
.name
== r
.name
&& l
.tid
<= r
.tid
);
1348 inline bool operator>(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
<= r
); }
1349 inline bool operator>=(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
< r
); }
1352 template<> struct hash
<metareqid_t
> {
1353 size_t operator()(const metareqid_t
&r
) const {
1355 return H(r
.name
.num()) ^ H(r
.name
.type()) ^ H(r
.tid
);
1361 // cap info for client reconnect
1362 struct cap_reconnect_t
{
1364 mutable ceph_mds_cap_reconnect capinfo
;
1365 snapid_t snap_follows
;
1369 memset(&capinfo
, 0, sizeof(capinfo
));
1372 cap_reconnect_t(uint64_t cap_id
, inodeno_t pino
, std::string_view p
, int w
, int i
,
1373 inodeno_t sr
, snapid_t sf
, bufferlist
& lb
) :
1375 capinfo
.cap_id
= cap_id
;
1378 capinfo
.snaprealm
= sr
;
1379 capinfo
.pathbase
= pino
;
1380 capinfo
.flock_len
= 0;
1384 void encode(bufferlist
& bl
) const;
1385 void decode(bufferlist::const_iterator
& bl
);
1386 void encode_old(bufferlist
& bl
) const;
1387 void decode_old(bufferlist::const_iterator
& bl
);
1389 void dump(Formatter
*f
) const;
1390 static void generate_test_instances(list
<cap_reconnect_t
*>& ls
);
1392 WRITE_CLASS_ENCODER(cap_reconnect_t
)
1394 struct snaprealm_reconnect_t
{
1395 mutable ceph_mds_snaprealm_reconnect realm
;
1397 snaprealm_reconnect_t() {
1398 memset(&realm
, 0, sizeof(realm
));
1400 snaprealm_reconnect_t(inodeno_t ino
, snapid_t seq
, inodeno_t parent
) {
1403 realm
.parent
= parent
;
1405 void encode(bufferlist
& bl
) const;
1406 void decode(bufferlist::const_iterator
& bl
);
1407 void encode_old(bufferlist
& bl
) const;
1408 void decode_old(bufferlist::const_iterator
& bl
);
1410 void dump(Formatter
*f
) const;
1411 static void generate_test_instances(list
<snaprealm_reconnect_t
*>& ls
);
1413 WRITE_CLASS_ENCODER(snaprealm_reconnect_t
)
1415 // compat for pre-FLOCK feature
1416 struct old_ceph_mds_cap_reconnect
{
1421 struct ceph_timespec old_mtime
, old_atime
;
1422 ceph_le64 snaprealm
;
1423 ceph_le64 pathbase
; /* base ino for our path to this ino */
1424 } __attribute__ ((packed
));
1425 WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect
)
1427 struct old_cap_reconnect_t
{
1429 old_ceph_mds_cap_reconnect capinfo
;
1431 const old_cap_reconnect_t
& operator=(const cap_reconnect_t
& n
) {
1433 capinfo
.cap_id
= n
.capinfo
.cap_id
;
1434 capinfo
.wanted
= n
.capinfo
.wanted
;
1435 capinfo
.issued
= n
.capinfo
.issued
;
1436 capinfo
.snaprealm
= n
.capinfo
.snaprealm
;
1437 capinfo
.pathbase
= n
.capinfo
.pathbase
;
1440 operator cap_reconnect_t() {
1443 n
.capinfo
.cap_id
= capinfo
.cap_id
;
1444 n
.capinfo
.wanted
= capinfo
.wanted
;
1445 n
.capinfo
.issued
= capinfo
.issued
;
1446 n
.capinfo
.snaprealm
= capinfo
.snaprealm
;
1447 n
.capinfo
.pathbase
= capinfo
.pathbase
;
1451 void encode(bufferlist
& bl
) const {
1454 encode(capinfo
, bl
);
1456 void decode(bufferlist::const_iterator
& bl
) {
1459 decode(capinfo
, bl
);
1462 WRITE_CLASS_ENCODER(old_cap_reconnect_t
)
1465 // ================================================================
1473 dirfrag_t(inodeno_t i
, frag_t f
) : ino(i
), frag(f
) { }
1475 void encode(bufferlist
& bl
) const {
1480 void decode(bufferlist::const_iterator
& bl
) {
1486 WRITE_CLASS_ENCODER(dirfrag_t
)
1489 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_t
&df
) {
1491 if (!df
.frag
.is_root()) out
<< "." << df
.frag
;
1494 inline bool operator<(dirfrag_t l
, dirfrag_t r
) {
1495 if (l
.ino
< r
.ino
) return true;
1496 if (l
.ino
== r
.ino
&& l
.frag
< r
.frag
) return true;
1499 inline bool operator==(dirfrag_t l
, dirfrag_t r
) {
1500 return l
.ino
== r
.ino
&& l
.frag
== r
.frag
;
1504 template<> struct hash
<dirfrag_t
> {
1505 size_t operator()(const dirfrag_t
&df
) const {
1506 static rjhash
<uint64_t> H
;
1507 static rjhash
<uint32_t> I
;
1508 return H(df
.ino
) ^ I(df
.frag
);
1515 // ================================================================
1517 #define META_POP_IRD 0
1518 #define META_POP_IWR 1
1519 #define META_POP_READDIR 2
1520 #define META_POP_FETCH 3
1521 #define META_POP_STORE 4
1524 class inode_load_vec_t
{
1526 using time
= DecayCounter::time
;
1527 using clock
= DecayCounter::clock
;
1528 static const size_t NUM
= 2;
1530 inode_load_vec_t() : vec
{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
1531 inode_load_vec_t(const DecayRate
&rate
) : vec
{DecayCounter(rate
), DecayCounter(rate
)} {}
1533 DecayCounter
&get(int t
) {
1537 for (auto &d
: vec
) {
1541 void encode(bufferlist
&bl
) const;
1542 void decode(bufferlist::const_iterator
& p
);
1543 void dump(Formatter
*f
) const;
1544 static void generate_test_instances(list
<inode_load_vec_t
*>& ls
);
1547 std::array
<DecayCounter
, NUM
> vec
;
1549 inline void encode(const inode_load_vec_t
&c
, bufferlist
&bl
) {
1552 inline void decode(inode_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1556 class dirfrag_load_vec_t
{
1558 using time
= DecayCounter::time
;
1559 using clock
= DecayCounter::clock
;
1560 static const size_t NUM
= 5;
1562 dirfrag_load_vec_t() :
1563 vec
{DecayCounter(DecayRate()),
1564 DecayCounter(DecayRate()),
1565 DecayCounter(DecayRate()),
1566 DecayCounter(DecayRate()),
1567 DecayCounter(DecayRate())
1570 dirfrag_load_vec_t(const DecayRate
&rate
) :
1571 vec
{DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
)}
1574 void encode(bufferlist
&bl
) const {
1575 ENCODE_START(2, 2, bl
);
1576 for (const auto &i
: vec
) {
1581 void decode(bufferlist::const_iterator
&p
) {
1582 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p
);
1583 for (auto &i
: vec
) {
1588 void dump(Formatter
*f
) const;
1589 void dump(Formatter
*f
, const DecayRate
& rate
) const;
1590 static void generate_test_instances(std::list
<dirfrag_load_vec_t
*>& ls
);
1592 const DecayCounter
&get(int t
) const {
1595 DecayCounter
&get(int t
) {
1598 void adjust(double d
) {
1599 for (auto &i
: vec
) {
1604 for (auto &i
: vec
) {
1608 double meta_load() const {
1610 1*vec
[META_POP_IRD
].get() +
1611 2*vec
[META_POP_IWR
].get() +
1612 1*vec
[META_POP_READDIR
].get() +
1613 2*vec
[META_POP_FETCH
].get() +
1614 4*vec
[META_POP_STORE
].get();
1617 void add(dirfrag_load_vec_t
& r
) {
1618 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1619 vec
[i
].adjust(r
.vec
[i
].get());
1621 void sub(dirfrag_load_vec_t
& r
) {
1622 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1623 vec
[i
].adjust(-r
.vec
[i
].get());
1625 void scale(double f
) {
1626 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1631 friend inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
);
1632 std::array
<DecayCounter
, NUM
> vec
;
1635 inline void encode(const dirfrag_load_vec_t
&c
, bufferlist
&bl
) {
1638 inline void decode(dirfrag_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1642 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
)
1644 std::ostringstream ss
;
1645 ss
<< std::setprecision(1) << std::fixed
1647 " IRD:" << dl
.vec
[0]
1648 << " IWR:" << dl
.vec
[1]
1649 << " RDR:" << dl
.vec
[2]
1650 << " FET:" << dl
.vec
[3]
1651 << " STR:" << dl
.vec
[4]
1652 << " *LOAD:" << dl
.meta_load() << "]";
1653 return out
<< ss
.str() << std::endl
;
1662 using clock
= dirfrag_load_vec_t::clock
;
1663 using time
= dirfrag_load_vec_t::time
;
1665 dirfrag_load_vec_t auth
;
1666 dirfrag_load_vec_t all
;
1668 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
1669 mds_load_t(const DecayRate
&rate
) : auth(rate
), all(rate
) {}
1671 double req_rate
= 0.0;
1672 double cache_hit_rate
= 0.0;
1673 double queue_len
= 0.0;
1675 double cpu_load_avg
= 0.0;
1677 double mds_load() const; // defiend in MDBalancer.cc
1678 void encode(bufferlist
& bl
) const;
1679 void decode(bufferlist::const_iterator
& bl
);
1680 void dump(Formatter
*f
) const;
1681 static void generate_test_instances(std::list
<mds_load_t
*>& ls
);
1683 inline void encode(const mds_load_t
&c
, bufferlist
&bl
) {
1686 inline void decode(mds_load_t
&c
, bufferlist::const_iterator
&p
) {
1690 inline std::ostream
& operator<<(std::ostream
& out
, const mds_load_t
& load
)
1692 return out
<< "mdsload<" << load
.auth
<< "/" << load
.all
1693 << ", req " << load
.req_rate
1694 << ", hr " << load
.cache_hit_rate
1695 << ", qlen " << load
.queue_len
1696 << ", cpu " << load
.cpu_load_avg
1700 class load_spread_t
{
1702 using time
= DecayCounter::time
;
1703 using clock
= DecayCounter::clock
;
1704 static const int MAX
= 4;
1710 load_spread_t() = delete;
1711 load_spread_t(const DecayRate
&rate
) : count(rate
)
1713 for (int i
=0; i
<MAX
; i
++)
1717 double hit(int who
) {
1718 for (int i
=0; i
<n
; i
++)
1720 return count
.get_last();
1725 if (n
== 1) return 0.0;
1727 if (p
== MAX
) p
= 0;
1731 double get() const {
1738 // ================================================================
1739 typedef std::pair
<mds_rank_t
, mds_rank_t
> mds_authority_t
;
1741 // -- authority delegation --
1742 // directory authority types
1743 // >= 0 is the auth mds
1744 #define CDIR_AUTH_PARENT mds_rank_t(-1) // default
1745 #define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
1746 #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
1747 #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
1748 //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
1750 class MDSCacheObjectInfo
{
1757 MDSCacheObjectInfo() {}
1759 void encode(bufferlist
& bl
) const;
1760 void decode(bufferlist::const_iterator
& bl
);
1761 void dump(Formatter
*f
) const;
1762 static void generate_test_instances(list
<MDSCacheObjectInfo
*>& ls
);
1765 inline std::ostream
& operator<<(std::ostream
& out
, const MDSCacheObjectInfo
&info
) {
1766 if (info
.ino
) return out
<< info
.ino
<< "." << info
.snapid
;
1767 if (info
.dname
.length()) return out
<< info
.dirfrag
<< "/" << info
.dname
1768 << " snap " << info
.snapid
;
1769 return out
<< info
.dirfrag
;
1772 inline bool operator==(const MDSCacheObjectInfo
& l
, const MDSCacheObjectInfo
& r
) {
1774 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
1776 return l
.dirfrag
== r
.dirfrag
&& l
.dname
== r
.dname
;
1778 WRITE_CLASS_ENCODER(MDSCacheObjectInfo
)
1781 // parse a map of keys/values.
1782 namespace qi
= boost::spirit::qi
;
1784 template <typename Iterator
>
1785 struct keys_and_values
1786 : qi::grammar
<Iterator
, std::map
<string
, string
>()>
1789 : keys_and_values::base_type(query
)
1791 query
= pair
>> *(qi::lit(' ') >> pair
);
1792 pair
= key
>> '=' >> value
;
1793 key
= qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
1794 value
= +qi::char_("a-zA-Z_0-9");
1796 qi::rule
<Iterator
, std::map
<string
, string
>()> query
;
1797 qi::rule
<Iterator
, std::pair
<string
, string
>()> pair
;
1798 qi::rule
<Iterator
, string()> key
, value
;