1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #ifndef CEPH_MDSTYPES_H
4 #define CEPH_MDSTYPES_H
6 #include "include/int_types.h"
12 #include <string_view>
14 #include "common/config.h"
15 #include "common/Clock.h"
16 #include "common/DecayCounter.h"
17 #include "common/entity_name.h"
19 #include "include/Context.h"
20 #include "include/frag.h"
21 #include "include/xlist.h"
22 #include "include/interval_set.h"
23 #include "include/compact_map.h"
24 #include "include/compact_set.h"
25 #include "include/fs_types.h"
26 #include "include/ceph_fs.h"
28 #include "inode_backtrace.h"
30 #include <boost/spirit/include/qi.hpp>
31 #include <boost/pool/pool.hpp>
32 #include "include/ceph_assert.h"
33 #include <boost/serialization/strong_typedef.hpp>
35 #define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011"
37 #define MDS_PORT_CACHE 0x200
38 #define MDS_PORT_LOCKER 0x300
39 #define MDS_PORT_MIGRATOR 0x400
44 // Inode numbers 1,2 and 4 please see CEPH_INO_* in include/ceph_fs.h
46 #define MDS_INO_MDSDIR_OFFSET (1*MAX_MDS)
47 #define MDS_INO_STRAY_OFFSET (6*MAX_MDS)
49 // Locations for journal data
50 #define MDS_INO_LOG_OFFSET (2*MAX_MDS)
51 #define MDS_INO_LOG_BACKUP_OFFSET (3*MAX_MDS)
52 #define MDS_INO_LOG_POINTER_OFFSET (4*MAX_MDS)
53 #define MDS_INO_PURGE_QUEUE (5*MAX_MDS)
55 #define MDS_INO_SYSTEM_BASE ((6*MAX_MDS) + (MAX_MDS * NUM_STRAY))
57 #define MDS_INO_STRAY(x,i) (MDS_INO_STRAY_OFFSET+((((unsigned)(x))*NUM_STRAY)+((unsigned)(i))))
58 #define MDS_INO_MDSDIR(x) (MDS_INO_MDSDIR_OFFSET+((unsigned)x))
60 #define MDS_INO_IS_STRAY(i) ((i) >= MDS_INO_STRAY_OFFSET && (i) < (MDS_INO_STRAY_OFFSET+(MAX_MDS*NUM_STRAY)))
61 #define MDS_INO_IS_MDSDIR(i) ((i) >= MDS_INO_MDSDIR_OFFSET && (i) < (MDS_INO_MDSDIR_OFFSET+MAX_MDS))
62 #define MDS_INO_MDSDIR_OWNER(i) (signed ((unsigned (i)) - MDS_INO_MDSDIR_OFFSET))
63 #define MDS_INO_IS_BASE(i) ((i) == CEPH_INO_ROOT || (i) == CEPH_INO_GLOBAL_SNAPREALM || MDS_INO_IS_MDSDIR(i))
64 #define MDS_INO_STRAY_OWNER(i) (signed (((unsigned (i)) - MDS_INO_STRAY_OFFSET) / NUM_STRAY))
65 #define MDS_INO_STRAY_INDEX(i) (((unsigned (i)) - MDS_INO_STRAY_OFFSET) % NUM_STRAY)
67 #define MDS_IS_PRIVATE_INO(i) ((i) < MDS_INO_SYSTEM_BASE && (i) >= MDS_INO_MDSDIR_OFFSET)
69 typedef int32_t mds_rank_t
;
70 constexpr mds_rank_t MDS_RANK_NONE
= -1;
72 BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t
)
73 extern const mds_gid_t MDS_GID_NONE
;
75 typedef int32_t fs_cluster_id_t
;
76 constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE
= -1;
77 // The namespace ID of the anonymous default filesystem from legacy systems
78 constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS
= 0;
82 mds_role_t(fs_cluster_id_t fscid_
, mds_rank_t rank_
)
83 : fscid(fscid_
), rank(rank_
)
87 bool operator<(mds_role_t
const &rhs
) const {
88 if (fscid
< rhs
.fscid
) {
90 } else if (fscid
== rhs
.fscid
) {
91 return rank
< rhs
.rank
;
97 bool is_none() const {
98 return (rank
== MDS_RANK_NONE
);
101 fs_cluster_id_t fscid
= FS_CLUSTER_ID_NONE
;
102 mds_rank_t rank
= MDS_RANK_NONE
;
104 inline std::ostream
& operator<<(std::ostream
& out
, const mds_role_t
& role
) {
105 return out
<< role
.fscid
<< ":" << role
.rank
;
109 inline string
gcap_string(int cap
)
112 if (cap
& CEPH_CAP_GSHARED
) s
+= "s";
113 if (cap
& CEPH_CAP_GEXCL
) s
+= "x";
114 if (cap
& CEPH_CAP_GCACHE
) s
+= "c";
115 if (cap
& CEPH_CAP_GRD
) s
+= "r";
116 if (cap
& CEPH_CAP_GWR
) s
+= "w";
117 if (cap
& CEPH_CAP_GBUFFER
) s
+= "b";
118 if (cap
& CEPH_CAP_GWREXTEND
) s
+= "a";
119 if (cap
& CEPH_CAP_GLAZYIO
) s
+= "l";
122 inline string
ccap_string(int cap
)
125 if (cap
& CEPH_CAP_PIN
) s
+= "p";
127 int a
= (cap
>> CEPH_CAP_SAUTH
) & 3;
128 if (a
) s
+= 'A' + gcap_string(a
);
130 a
= (cap
>> CEPH_CAP_SLINK
) & 3;
131 if (a
) s
+= 'L' + gcap_string(a
);
133 a
= (cap
>> CEPH_CAP_SXATTR
) & 3;
134 if (a
) s
+= 'X' + gcap_string(a
);
136 a
= cap
>> CEPH_CAP_SFILE
;
137 if (a
) s
+= 'F' + gcap_string(a
);
144 struct scatter_info_t
{
145 version_t version
= 0;
148 struct frag_info_t
: public scatter_info_t
{
149 int64_t size() const { return nfiles
+ nsubdirs
; }
152 *this = frag_info_t();
155 // *this += cur - acc;
156 void add_delta(const frag_info_t
&cur
, const frag_info_t
&acc
, bool *touched_mtime
=0, bool *touched_chattr
=0) {
157 if (cur
.mtime
> mtime
) {
160 *touched_mtime
= true;
162 if (cur
.change_attr
> change_attr
) {
163 change_attr
= cur
.change_attr
;
165 *touched_chattr
= true;
167 nfiles
+= cur
.nfiles
- acc
.nfiles
;
168 nsubdirs
+= cur
.nsubdirs
- acc
.nsubdirs
;
171 void add(const frag_info_t
& other
) {
172 if (other
.mtime
> mtime
)
174 if (other
.change_attr
> change_attr
)
175 change_attr
= other
.change_attr
;
176 nfiles
+= other
.nfiles
;
177 nsubdirs
+= other
.nsubdirs
;
180 bool same_sums(const frag_info_t
&o
) const {
181 return mtime
<= o
.mtime
&&
182 nfiles
== o
.nfiles
&&
183 nsubdirs
== o
.nsubdirs
;
186 void encode(bufferlist
&bl
) const;
187 void decode(bufferlist::const_iterator
& bl
);
188 void dump(Formatter
*f
) const;
189 static void generate_test_instances(std::list
<frag_info_t
*>& ls
);
193 uint64_t change_attr
= 0;
194 int64_t nfiles
= 0; // files
195 int64_t nsubdirs
= 0; // subdirs
197 WRITE_CLASS_ENCODER(frag_info_t
)
199 inline bool operator==(const frag_info_t
&l
, const frag_info_t
&r
) {
200 return memcmp(&l
, &r
, sizeof(l
)) == 0;
202 inline bool operator!=(const frag_info_t
&l
, const frag_info_t
&r
) {
206 std::ostream
& operator<<(std::ostream
&out
, const frag_info_t
&f
);
209 struct nest_info_t
: public scatter_info_t
{
210 int64_t rsize() const { return rfiles
+ rsubdirs
; }
213 *this = nest_info_t();
216 void sub(const nest_info_t
&other
) {
219 void add(const nest_info_t
&other
, int fac
=1) {
220 if (other
.rctime
> rctime
)
221 rctime
= other
.rctime
;
222 rbytes
+= fac
*other
.rbytes
;
223 rfiles
+= fac
*other
.rfiles
;
224 rsubdirs
+= fac
*other
.rsubdirs
;
225 rsnaps
+= fac
*other
.rsnaps
;
228 // *this += cur - acc;
229 void add_delta(const nest_info_t
&cur
, const nest_info_t
&acc
) {
230 if (cur
.rctime
> rctime
)
232 rbytes
+= cur
.rbytes
- acc
.rbytes
;
233 rfiles
+= cur
.rfiles
- acc
.rfiles
;
234 rsubdirs
+= cur
.rsubdirs
- acc
.rsubdirs
;
235 rsnaps
+= cur
.rsnaps
- acc
.rsnaps
;
238 bool same_sums(const nest_info_t
&o
) const {
239 return rctime
<= o
.rctime
&&
240 rbytes
== o
.rbytes
&&
241 rfiles
== o
.rfiles
&&
242 rsubdirs
== o
.rsubdirs
&&
246 void encode(bufferlist
&bl
) const;
247 void decode(bufferlist::const_iterator
& bl
);
248 void dump(Formatter
*f
) const;
249 static void generate_test_instances(std::list
<nest_info_t
*>& ls
);
251 // this frag + children
255 int64_t rsubdirs
= 0;
258 WRITE_CLASS_ENCODER(nest_info_t
)
260 inline bool operator==(const nest_info_t
&l
, const nest_info_t
&r
) {
261 return memcmp(&l
, &r
, sizeof(l
)) == 0;
263 inline bool operator!=(const nest_info_t
&l
, const nest_info_t
&r
) {
267 std::ostream
& operator<<(std::ostream
&out
, const nest_info_t
&n
);
271 vinodeno_t(inodeno_t i
, snapid_t s
) : ino(i
), snapid(s
) {}
273 void encode(bufferlist
& bl
) const {
278 void decode(bufferlist::const_iterator
& p
) {
287 WRITE_CLASS_ENCODER(vinodeno_t
)
289 inline bool operator==(const vinodeno_t
&l
, const vinodeno_t
&r
) {
290 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
292 inline bool operator!=(const vinodeno_t
&l
, const vinodeno_t
&r
) {
295 inline bool operator<(const vinodeno_t
&l
, const vinodeno_t
&r
) {
298 (l
.ino
== r
.ino
&& l
.snapid
< r
.snapid
);
303 void encode(bufferlist
& bl
) const {
304 ENCODE_START(1, 1, bl
);
305 encode(max_bytes
, bl
);
306 encode(max_files
, bl
);
309 void decode(bufferlist::const_iterator
& p
) {
310 DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p
);
311 decode(max_bytes
, p
);
312 decode(max_files
, p
);
316 void dump(Formatter
*f
) const;
317 static void generate_test_instances(std::list
<quota_info_t
*>& ls
);
319 bool is_valid() const {
320 return max_bytes
>=0 && max_files
>=0;
322 bool is_enable() const {
323 return max_bytes
|| max_files
;
326 int64_t max_bytes
= 0;
327 int64_t max_files
= 0;
329 WRITE_CLASS_ENCODER(quota_info_t
)
331 inline bool operator==(const quota_info_t
&l
, const quota_info_t
&r
) {
332 return memcmp(&l
, &r
, sizeof(l
)) == 0;
335 ostream
& operator<<(ostream
&out
, const quota_info_t
&n
);
338 template<> struct hash
<vinodeno_t
> {
339 size_t operator()(const vinodeno_t
&vino
) const {
342 return H(vino
.ino
) ^ I(vino
.snapid
);
347 inline std::ostream
& operator<<(std::ostream
&out
, const vinodeno_t
&vino
) {
349 if (vino
.snapid
== CEPH_NOSNAP
)
351 else if (vino
.snapid
)
352 out
<< '.' << vino
.snapid
;
356 struct client_writeable_range_t
{
357 struct byte_range_t
{
358 uint64_t first
= 0, last
= 0; // interval client can write to
361 void encode(bufferlist
&bl
) const;
362 void decode(bufferlist::const_iterator
& bl
);
363 void dump(Formatter
*f
) const;
364 static void generate_test_instances(std::list
<client_writeable_range_t
*>& ls
);
367 snapid_t follows
= 0; // aka "data+metadata flushed thru"
370 inline void decode(client_writeable_range_t::byte_range_t
& range
, bufferlist::const_iterator
& bl
) {
371 decode(range
.first
, bl
);
372 decode(range
.last
, bl
);
375 WRITE_CLASS_ENCODER(client_writeable_range_t
)
377 std::ostream
& operator<<(std::ostream
& out
, const client_writeable_range_t
& r
);
379 inline bool operator==(const client_writeable_range_t
& l
,
380 const client_writeable_range_t
& r
) {
381 return l
.range
.first
== r
.range
.first
&& l
.range
.last
== r
.range
.last
&&
382 l
.follows
== r
.follows
;
385 struct inline_data_t
{
388 inline_data_t(const inline_data_t
& o
) : version(o
.version
) {
392 inline_data_t
& operator=(const inline_data_t
& o
) {
404 bufferlist
& get_data() {
406 blp
.reset(new bufferlist
);
409 size_t length() const { return blp
? blp
->length() : 0; }
411 bool operator==(const inline_data_t
& o
) const {
412 return length() == o
.length() &&
414 (*const_cast<bufferlist
*>(blp
.get()) == *const_cast<bufferlist
*>(o
.blp
.get())));
416 bool operator!=(const inline_data_t
& o
) const {
417 return !(*this == o
);
419 void encode(bufferlist
&bl
) const;
420 void decode(bufferlist::const_iterator
& bl
);
422 version_t version
= 1;
425 std::unique_ptr
<bufferlist
> blp
;
427 WRITE_CLASS_ENCODER(inline_data_t
)
430 DAMAGE_STATS
, // statistics (dirstat, size, etc)
431 DAMAGE_RSTATS
, // recursive statistics (rstat, accounted_rstat)
432 DAMAGE_FRAGTREE
// fragtree -- repair by searching
434 typedef uint32_t damage_flags_t
;
436 template<template<typename
> class Allocator
= std::allocator
>
440 * Do not forget to add any new fields to the compare() function.
443 using client_range_map
= std::map
<client_t
,client_writeable_range_t
,std::less
<client_t
>,Allocator
<std::pair
<const client_t
,client_writeable_range_t
>>>;
451 bool is_symlink() const { return (mode
& S_IFMT
) == S_IFLNK
; }
452 bool is_dir() const { return (mode
& S_IFMT
) == S_IFDIR
; }
453 bool is_file() const { return (mode
& S_IFMT
) == S_IFREG
; }
455 bool is_truncating() const { return (truncate_pending
> 0); }
456 void truncate(uint64_t old_size
, uint64_t new_size
) {
457 ceph_assert(new_size
< old_size
);
458 if (old_size
> max_size_ever
)
459 max_size_ever
= old_size
;
460 truncate_from
= old_size
;
462 rstat
.rbytes
= new_size
;
463 truncate_size
= size
;
468 bool has_layout() const {
469 return layout
!= file_layout_t();
472 void clear_layout() {
473 layout
= file_layout_t();
476 uint64_t get_layout_size_increment() const {
477 return layout
.get_period();
480 bool is_dirty_rstat() const { return !(rstat
== accounted_rstat
); }
482 uint64_t get_client_range(client_t client
) const {
483 auto it
= client_ranges
.find(client
);
484 return it
!= client_ranges
.end() ? it
->second
.range
.last
: 0;
487 uint64_t get_max_size() const {
489 for (std::map
<client_t
,client_writeable_range_t
>::const_iterator p
= client_ranges
.begin();
490 p
!= client_ranges
.end();
492 if (p
->second
.range
.last
> max
)
493 max
= p
->second
.range
.last
;
496 void set_max_size(uint64_t new_max
) {
498 client_ranges
.clear();
500 for (std::map
<client_t
,client_writeable_range_t
>::iterator p
= client_ranges
.begin();
501 p
!= client_ranges
.end();
503 p
->second
.range
.last
= new_max
;
507 void trim_client_ranges(snapid_t last
) {
508 std::map
<client_t
, client_writeable_range_t
>::iterator p
= client_ranges
.begin();
509 while (p
!= client_ranges
.end()) {
510 if (p
->second
.follows
>= last
)
511 client_ranges
.erase(p
++);
517 bool is_backtrace_updated() const {
518 return backtrace_version
== version
;
520 void update_backtrace(version_t pv
=0) {
521 backtrace_version
= pv
? pv
: version
;
524 void add_old_pool(int64_t l
) {
525 backtrace_version
= version
;
529 void encode(bufferlist
&bl
, uint64_t features
) const;
530 void decode(bufferlist::const_iterator
& bl
);
531 void dump(Formatter
*f
) const;
532 static void generate_test_instances(std::list
<inode_t
*>& ls
);
534 * Compare this inode_t with another that represent *the same inode*
535 * at different points in time.
536 * @pre The inodes are the same ino
538 * @param other The inode_t to compare ourselves with
539 * @param divergent A bool pointer which will be set to true
540 * if the values are different in a way that can't be explained
541 * by one being a newer version than the other.
543 * @returns 1 if we are newer than the other, 0 if equal, -1 if older.
545 int compare(const inode_t
&other
, bool *divergent
) const;
549 uint32_t rdev
= 0; // if special file
551 // affected by any inode change...
552 utime_t ctime
; // inode change time
553 utime_t btime
; // birth time
555 // perm (namespace permissions)
563 // file (data access)
564 ceph_dir_layout dir_layout
= {}; // [dir only]
565 file_layout_t layout
;
566 compact_set
<int64_t, std::less
<int64_t>, Allocator
<int64_t>> old_pools
;
567 uint64_t size
= 0; // on directory, # dentries
568 uint64_t max_size_ever
= 0; // max size the file has ever been
569 uint32_t truncate_seq
= 0;
570 uint64_t truncate_size
= 0, truncate_from
= 0;
571 uint32_t truncate_pending
= 0;
572 utime_t mtime
; // file data modify time.
573 utime_t atime
; // file data access time.
574 uint32_t time_warp_seq
= 0; // count of (potential) mtime/atime timewarps (i.e., utimes())
575 inline_data_t inline_data
; // FIXME check
578 uint64_t change_attr
= 0;
580 client_range_map client_ranges
; // client(s) can write to these ranges
582 // dirfrag, recursive accountin
583 frag_info_t dirstat
; // protected by my filelock
584 nest_info_t rstat
; // protected by my nestlock
585 nest_info_t accounted_rstat
; // protected by parent's nestlock
589 mds_rank_t export_pin
= MDS_RANK_NONE
;
591 double export_ephemeral_random_pin
= 0;
592 bool export_ephemeral_distributed_pin
= false;
595 version_t version
= 0; // auth only
596 version_t file_data_version
= 0; // auth only
597 version_t xattr_version
= 0;
599 utime_t last_scrub_stamp
; // start time of last complete scrub
600 version_t last_scrub_version
= 0;// (parent) start version of last complete scrub
602 version_t backtrace_version
= 0;
604 snapid_t oldest_snap
;
606 std::basic_string
<char,std::char_traits
<char>,Allocator
<char>> stray_prior_path
; //stores path before unlink
609 bool older_is_consistent(const inode_t
&other
) const;
612 // These methods may be moved back to mdstypes.cc when we have pmr
613 template<template<typename
> class Allocator
>
614 void inode_t
<Allocator
>::encode(bufferlist
&bl
, uint64_t features
) const
616 ENCODE_START(16, 6, bl
);
630 encode(anchored
, bl
);
633 encode(dir_layout
, bl
);
634 encode(layout
, bl
, features
);
636 encode(truncate_seq
, bl
);
637 encode(truncate_size
, bl
);
638 encode(truncate_from
, bl
);
639 encode(truncate_pending
, bl
);
642 encode(time_warp_seq
, bl
);
643 encode(client_ranges
, bl
);
647 encode(accounted_rstat
, bl
);
650 encode(file_data_version
, bl
);
651 encode(xattr_version
, bl
);
652 encode(backtrace_version
, bl
);
653 encode(old_pools
, bl
);
654 encode(max_size_ever
, bl
);
655 encode(inline_data
, bl
);
658 encode(stray_prior_path
, bl
);
660 encode(last_scrub_version
, bl
);
661 encode(last_scrub_stamp
, bl
);
664 encode(change_attr
, bl
);
666 encode(export_pin
, bl
);
668 encode(export_ephemeral_random_pin
, bl
);
669 encode(export_ephemeral_distributed_pin
, bl
);
674 template<template<typename
> class Allocator
>
675 void inode_t
<Allocator
>::decode(bufferlist::const_iterator
&p
)
677 DECODE_START_LEGACY_COMPAT_LEN(16, 6, 6, p
);
694 decode(dir_layout
, p
);
696 // FIPS zeroization audit 20191117: this memset is not security related.
697 memset(&dir_layout
, 0, sizeof(dir_layout
));
701 decode(truncate_seq
, p
);
702 decode(truncate_size
, p
);
703 decode(truncate_from
, p
);
705 decode(truncate_pending
, p
);
707 truncate_pending
= 0;
710 decode(time_warp_seq
, p
);
712 decode(client_ranges
, p
);
714 map
<client_t
, client_writeable_range_t::byte_range_t
> m
;
716 for (map
<client_t
, client_writeable_range_t::byte_range_t
>::iterator
717 q
= m
.begin(); q
!= m
.end(); ++q
)
718 client_ranges
[q
->first
].range
= q
->second
;
723 decode(accounted_rstat
, p
);
726 decode(file_data_version
, p
);
727 decode(xattr_version
, p
);
729 decode(backtrace_version
, p
);
731 decode(old_pools
, p
);
733 decode(max_size_ever
, p
);
735 decode(inline_data
, p
);
737 inline_data
.version
= CEPH_INLINE_NONE
;
740 backtrace_version
= 0; // force update backtrace
744 if (struct_v
>= 12) {
747 stray_prior_path
= std::string_view(tmp
);
750 if (struct_v
>= 13) {
751 decode(last_scrub_version
, p
);
752 decode(last_scrub_stamp
, p
);
754 if (struct_v
>= 14) {
756 decode(change_attr
, p
);
762 if (struct_v
>= 15) {
763 decode(export_pin
, p
);
765 export_pin
= MDS_RANK_NONE
;
768 if (struct_v
>= 16) {
769 decode(export_ephemeral_random_pin
, p
);
770 decode(export_ephemeral_distributed_pin
, p
);
772 export_ephemeral_random_pin
= 0;
773 export_ephemeral_distributed_pin
= false;
779 template<template<typename
> class Allocator
>
780 void inode_t
<Allocator
>::dump(Formatter
*f
) const
782 f
->dump_unsigned("ino", ino
);
783 f
->dump_unsigned("rdev", rdev
);
784 f
->dump_stream("ctime") << ctime
;
785 f
->dump_stream("btime") << btime
;
786 f
->dump_unsigned("mode", mode
);
787 f
->dump_unsigned("uid", uid
);
788 f
->dump_unsigned("gid", gid
);
789 f
->dump_unsigned("nlink", nlink
);
791 f
->open_object_section("dir_layout");
792 ::dump(dir_layout
, f
);
795 f
->dump_object("layout", layout
);
797 f
->open_array_section("old_pools");
798 for (const auto &p
: old_pools
) {
799 f
->dump_int("pool", p
);
803 f
->dump_unsigned("size", size
);
804 f
->dump_unsigned("truncate_seq", truncate_seq
);
805 f
->dump_unsigned("truncate_size", truncate_size
);
806 f
->dump_unsigned("truncate_from", truncate_from
);
807 f
->dump_unsigned("truncate_pending", truncate_pending
);
808 f
->dump_stream("mtime") << mtime
;
809 f
->dump_stream("atime") << atime
;
810 f
->dump_unsigned("time_warp_seq", time_warp_seq
);
811 f
->dump_unsigned("change_attr", change_attr
);
812 f
->dump_int("export_pin", export_pin
);
813 f
->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin
);
814 f
->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin
);
816 f
->open_array_section("client_ranges");
817 for (const auto &p
: client_ranges
) {
818 f
->open_object_section("client");
819 f
->dump_unsigned("client", p
.first
.v
);
825 f
->open_object_section("dirstat");
829 f
->open_object_section("rstat");
833 f
->open_object_section("accounted_rstat");
834 accounted_rstat
.dump(f
);
837 f
->dump_unsigned("version", version
);
838 f
->dump_unsigned("file_data_version", file_data_version
);
839 f
->dump_unsigned("xattr_version", xattr_version
);
840 f
->dump_unsigned("backtrace_version", backtrace_version
);
842 f
->dump_string("stray_prior_path", stray_prior_path
);
843 f
->dump_unsigned("max_size_ever", max_size_ever
);
845 f
->open_object_section("quota");
849 f
->dump_stream("last_scrub_stamp") << last_scrub_stamp
;
850 f
->dump_unsigned("last_scrub_version", last_scrub_version
);
853 template<template<typename
> class Allocator
>
854 void inode_t
<Allocator
>::generate_test_instances(std::list
<inode_t
*>& ls
)
856 ls
.push_back(new inode_t
<Allocator
>);
857 ls
.push_back(new inode_t
<Allocator
>);
862 template<template<typename
> class Allocator
>
863 int inode_t
<Allocator
>::compare(const inode_t
<Allocator
> &other
, bool *divergent
) const
865 ceph_assert(ino
== other
.ino
);
867 if (version
== other
.version
) {
868 if (rdev
!= other
.rdev
||
869 ctime
!= other
.ctime
||
870 btime
!= other
.btime
||
871 mode
!= other
.mode
||
874 nlink
!= other
.nlink
||
875 memcmp(&dir_layout
, &other
.dir_layout
, sizeof(dir_layout
)) ||
876 layout
!= other
.layout
||
877 old_pools
!= other
.old_pools
||
878 size
!= other
.size
||
879 max_size_ever
!= other
.max_size_ever
||
880 truncate_seq
!= other
.truncate_seq
||
881 truncate_size
!= other
.truncate_size
||
882 truncate_from
!= other
.truncate_from
||
883 truncate_pending
!= other
.truncate_pending
||
884 change_attr
!= other
.change_attr
||
885 mtime
!= other
.mtime
||
886 atime
!= other
.atime
||
887 time_warp_seq
!= other
.time_warp_seq
||
888 inline_data
!= other
.inline_data
||
889 client_ranges
!= other
.client_ranges
||
890 !(dirstat
== other
.dirstat
) ||
891 !(rstat
== other
.rstat
) ||
892 !(accounted_rstat
== other
.accounted_rstat
) ||
893 file_data_version
!= other
.file_data_version
||
894 xattr_version
!= other
.xattr_version
||
895 backtrace_version
!= other
.backtrace_version
) {
899 } else if (version
> other
.version
) {
900 *divergent
= !older_is_consistent(other
);
903 ceph_assert(version
< other
.version
);
904 *divergent
= !other
.older_is_consistent(*this);
909 template<template<typename
> class Allocator
>
910 bool inode_t
<Allocator
>::older_is_consistent(const inode_t
<Allocator
> &other
) const
912 if (max_size_ever
< other
.max_size_ever
||
913 truncate_seq
< other
.truncate_seq
||
914 time_warp_seq
< other
.time_warp_seq
||
915 inline_data
.version
< other
.inline_data
.version
||
916 dirstat
.version
< other
.dirstat
.version
||
917 rstat
.version
< other
.rstat
.version
||
918 accounted_rstat
.version
< other
.accounted_rstat
.version
||
919 file_data_version
< other
.file_data_version
||
920 xattr_version
< other
.xattr_version
||
921 backtrace_version
< other
.backtrace_version
) {
927 template<template<typename
> class Allocator
>
928 inline void encode(const inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
931 c
.encode(bl
, features
);
932 ENCODE_DUMP_POST(cl
);
934 template<template<typename
> class Allocator
>
935 inline void decode(inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
940 template<template<typename
> class Allocator
>
941 using alloc_string
= std::basic_string
<char,std::char_traits
<char>,Allocator
<char>>;
943 template<template<typename
> class Allocator
>
944 using xattr_map
= compact_map
<alloc_string
<Allocator
>, bufferptr
, std::less
<alloc_string
<Allocator
>>, Allocator
<std::pair
<const alloc_string
<Allocator
>, bufferptr
>>>; // FIXME bufferptr not in mempool
946 template<template<typename
> class Allocator
>
947 inline void decode_noshare(xattr_map
<Allocator
>& xattrs
, ceph::buffer::list::const_iterator
&p
)
952 alloc_string
<Allocator
> key
;
956 p
.copy_deep(len
, xattrs
[key
]);
960 template<template<typename
> class Allocator
= std::allocator
>
963 inode_t
<Allocator
> inode
;
964 xattr_map
<Allocator
> xattrs
;
966 void encode(bufferlist
&bl
, uint64_t features
) const;
967 void decode(bufferlist::const_iterator
& bl
);
968 void dump(Formatter
*f
) const;
969 static void generate_test_instances(std::list
<old_inode_t
*>& ls
);
972 // These methods may be moved back to mdstypes.cc when we have pmr
973 template<template<typename
> class Allocator
>
974 void old_inode_t
<Allocator
>::encode(bufferlist
& bl
, uint64_t features
) const
976 ENCODE_START(2, 2, bl
);
978 encode(inode
, bl
, features
);
983 template<template<typename
> class Allocator
>
984 void old_inode_t
<Allocator
>::decode(bufferlist::const_iterator
& bl
)
986 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl
);
989 decode_noshare
<Allocator
>(xattrs
, bl
);
993 template<template<typename
> class Allocator
>
994 void old_inode_t
<Allocator
>::dump(Formatter
*f
) const
996 f
->dump_unsigned("first", first
);
998 f
->open_object_section("xattrs");
999 for (const auto &p
: xattrs
) {
1000 std::string
v(p
.second
.c_str(), p
.second
.length());
1001 f
->dump_string(p
.first
.c_str(), v
);
1006 template<template<typename
> class Allocator
>
1007 void old_inode_t
<Allocator
>::generate_test_instances(std::list
<old_inode_t
<Allocator
>*>& ls
)
1009 ls
.push_back(new old_inode_t
<Allocator
>);
1010 ls
.push_back(new old_inode_t
<Allocator
>);
1011 ls
.back()->first
= 2;
1012 std::list
<inode_t
<Allocator
>*> ils
;
1013 inode_t
<Allocator
>::generate_test_instances(ils
);
1014 ls
.back()->inode
= *ils
.back();
1015 ls
.back()->xattrs
["user.foo"] = buffer::copy("asdf", 4);
1016 ls
.back()->xattrs
["user.unprintable"] = buffer::copy("\000\001\002", 3);
1019 template<template<typename
> class Allocator
>
1020 inline void encode(const old_inode_t
<Allocator
> &c
, ::ceph::bufferlist
&bl
, uint64_t features
)
1023 c
.encode(bl
, features
);
1024 ENCODE_DUMP_POST(cl
);
1026 template<template<typename
> class Allocator
>
1027 inline void decode(old_inode_t
<Allocator
> &c
, ::ceph::bufferlist::const_iterator
&p
)
1033 * like an inode, but for a dir frag
1036 void encode(bufferlist
&bl
) const;
1037 void decode(bufferlist::const_iterator
& bl
);
1038 void dump(Formatter
*f
) const;
1039 static void generate_test_instances(std::list
<fnode_t
*>& ls
);
1041 version_t version
= 0;
1042 snapid_t snap_purged_thru
; // the max_last_destroy snapid we've been purged thru
1043 frag_info_t fragstat
, accounted_fragstat
;
1044 nest_info_t rstat
, accounted_rstat
;
1045 damage_flags_t damage_flags
= 0;
1047 // we know we and all our descendants have been scrubbed since this version
1048 version_t recursive_scrub_version
= 0;
1049 utime_t recursive_scrub_stamp
;
1050 // version at which we last scrubbed our personal data structures
1051 version_t localized_scrub_version
= 0;
1052 utime_t localized_scrub_stamp
;
1054 WRITE_CLASS_ENCODER(fnode_t
)
1057 struct old_rstat_t
{
1058 void encode(bufferlist
& bl
) const;
1059 void decode(bufferlist::const_iterator
& p
);
1060 void dump(Formatter
*f
) const;
1061 static void generate_test_instances(std::list
<old_rstat_t
*>& ls
);
1064 nest_info_t rstat
, accounted_rstat
;
1066 WRITE_CLASS_ENCODER(old_rstat_t
)
1068 inline std::ostream
& operator<<(std::ostream
& out
, const old_rstat_t
& o
) {
1069 return out
<< "old_rstat(first " << o
.first
<< " " << o
.rstat
<< " " << o
.accounted_rstat
<< ")";
1072 class feature_bitset_t
{
1074 typedef uint64_t block_type
;
1075 static const size_t bits_per_block
= sizeof(block_type
) * 8;
1077 feature_bitset_t(const feature_bitset_t
& other
) : _vec(other
._vec
) {}
1078 feature_bitset_t(feature_bitset_t
&& other
) : _vec(std::move(other
._vec
)) {}
1079 feature_bitset_t(unsigned long value
= 0);
1080 feature_bitset_t(const vector
<size_t>& array
);
1081 feature_bitset_t
& operator=(const feature_bitset_t
& other
) {
1085 feature_bitset_t
& operator=(feature_bitset_t
&& other
) {
1086 _vec
= std::move(other
._vec
);
1089 feature_bitset_t
& operator-=(const feature_bitset_t
& other
);
1090 bool empty() const {
1091 //block_type is a uint64_t. If the vector is only composed of 0s, then it's still "empty"
1092 for (auto& v
: _vec
) {
1098 bool test(size_t bit
) const {
1099 if (bit
>= bits_per_block
* _vec
.size())
1101 return _vec
[bit
/ bits_per_block
] & ((block_type
)1 << (bit
% bits_per_block
));
1106 void encode(bufferlist
& bl
) const;
1107 void decode(bufferlist::const_iterator
&p
);
1108 void dump(Formatter
*f
) const;
1109 void print(ostream
& out
) const;
1111 vector
<block_type
> _vec
;
1113 WRITE_CLASS_ENCODER(feature_bitset_t
)
1115 inline std::ostream
& operator<<(std::ostream
& out
, const feature_bitset_t
& s
) {
1120 struct metric_spec_t
{
1122 metric_spec_t(const metric_spec_t
& other
) :
1123 metric_flags(other
.metric_flags
) {}
1124 metric_spec_t(metric_spec_t
&& other
) :
1125 metric_flags(std::move(other
.metric_flags
)) {}
1126 metric_spec_t(const feature_bitset_t
& mf
) :
1128 metric_spec_t(feature_bitset_t
&& mf
) :
1129 metric_flags(std::move(mf
)) {}
1131 metric_spec_t
& operator=(const metric_spec_t
& other
) {
1132 metric_flags
= other
.metric_flags
;
1135 metric_spec_t
& operator=(metric_spec_t
&& other
) {
1136 metric_flags
= std::move(other
.metric_flags
);
1140 bool empty() const {
1141 return metric_flags
.empty();
1145 metric_flags
.clear();
1148 void encode(bufferlist
& bl
) const;
1149 void decode(bufferlist::const_iterator
& p
);
1150 void dump(Formatter
*f
) const;
1151 void print(ostream
& out
) const;
1153 // set of metrics that a client is capable of forwarding
1154 feature_bitset_t metric_flags
;
1156 WRITE_CLASS_ENCODER(metric_spec_t
)
1158 inline std::ostream
& operator<<(std::ostream
& out
, const metric_spec_t
& mst
) {
1166 struct client_metadata_t
{
1167 using kv_map_t
= std::map
<std::string
,std::string
>;
1168 using iterator
= kv_map_t::const_iterator
;
1170 client_metadata_t() {}
1171 client_metadata_t(const kv_map_t
& kv
, const feature_bitset_t
&f
, const metric_spec_t
&mst
) :
1175 client_metadata_t
& operator=(const client_metadata_t
& other
) {
1176 kv_map
= other
.kv_map
;
1177 features
= other
.features
;
1178 metric_spec
= other
.metric_spec
;
1182 bool empty() const { return kv_map
.empty() && features
.empty() && metric_spec
.empty(); }
1183 iterator
find(const std::string
& key
) const { return kv_map
.find(key
); }
1184 iterator
begin() const { return kv_map
.begin(); }
1185 iterator
end() const { return kv_map
.end(); }
1186 void erase(iterator it
) { kv_map
.erase(it
); }
1187 std::string
& operator[](const std::string
& key
) { return kv_map
[key
]; }
1188 void merge(const client_metadata_t
& other
) {
1189 kv_map
.insert(other
.kv_map
.begin(), other
.kv_map
.end());
1190 features
= other
.features
;
1191 metric_spec
= other
.metric_spec
;
1196 metric_spec
.clear();
1199 void encode(bufferlist
& bl
) const;
1200 void decode(bufferlist::const_iterator
& p
);
1201 void dump(Formatter
*f
) const;
1204 feature_bitset_t features
;
1205 metric_spec_t metric_spec
;
1207 WRITE_CLASS_ENCODER(client_metadata_t
)
1210 * session_info_t - durable part of a Session
1212 struct session_info_t
{
1213 client_t
get_client() const { return client_t(inst
.name
.num()); }
1214 bool has_feature(size_t bit
) const { return client_metadata
.features
.test(bit
); }
1215 const entity_name_t
& get_source() const { return inst
.name
; }
1218 prealloc_inos
.clear();
1220 completed_requests
.clear();
1221 completed_flushes
.clear();
1222 client_metadata
.clear();
1225 void encode(bufferlist
& bl
, uint64_t features
) const;
1226 void decode(bufferlist::const_iterator
& p
);
1227 void dump(Formatter
*f
) const;
1228 static void generate_test_instances(std::list
<session_info_t
*>& ls
);
1231 std::map
<ceph_tid_t
,inodeno_t
> completed_requests
;
1232 interval_set
<inodeno_t
> prealloc_inos
; // preallocated, ready to use.
1233 interval_set
<inodeno_t
> used_inos
; // journaling use
1234 client_metadata_t client_metadata
;
1235 std::set
<ceph_tid_t
> completed_flushes
;
1236 EntityName auth_name
;
1238 WRITE_CLASS_ENCODER_FEATURES(session_info_t
)
1241 struct dentry_key_t
{
1243 dentry_key_t(snapid_t s
, std::string_view n
, __u32 h
=0) :
1244 snapid(s
), name(n
), hash(h
) {}
1246 bool is_valid() { return name
.length() || snapid
; }
1248 // encode into something that can be decoded as a string.
1249 // name_ (head) or name_%x (!head)
1250 void encode(bufferlist
& bl
) const {
1256 void encode(string
& key
) const {
1258 if (snapid
!= CEPH_NOSNAP
) {
1259 uint64_t val(snapid
);
1260 snprintf(b
, sizeof(b
), "%" PRIx64
, val
);
1262 snprintf(b
, sizeof(b
), "%s", "head");
1265 oss
<< name
<< "_" << b
;
1268 static void decode_helper(bufferlist::const_iterator
& bl
, string
& nm
, snapid_t
& sn
) {
1271 decode_helper(key
, nm
, sn
);
1273 static void decode_helper(std::string_view key
, string
& nm
, snapid_t
& sn
) {
1274 size_t i
= key
.find_last_of('_');
1275 ceph_assert(i
!= string::npos
);
1276 if (key
.compare(i
+1, std::string_view::npos
, "head") == 0) {
1281 long long unsigned x
= 0;
1282 std::string
x_str(key
.substr(i
+1));
1283 sscanf(x_str
.c_str(), "%llx", &x
);
1286 nm
= key
.substr(0, i
);
1289 snapid_t snapid
= 0;
1290 std::string_view name
;
1294 inline std::ostream
& operator<<(std::ostream
& out
, const dentry_key_t
&k
)
1296 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1299 inline bool operator<(const dentry_key_t
& k1
, const dentry_key_t
& k2
)
1302 * order by hash, name, snap
1304 int c
= ceph_frag_value(k1
.hash
) - ceph_frag_value(k2
.hash
);
1307 c
= k1
.name
.compare(k2
.name
);
1310 return k1
.snapid
< k2
.snapid
;
1314 * string_snap_t is a simple (string, snapid_t) pair
1316 struct string_snap_t
{
1318 string_snap_t(std::string_view n
, snapid_t s
) : name(n
), snapid(s
) {}
1320 void encode(bufferlist
& bl
) const;
1321 void decode(bufferlist::const_iterator
& p
);
1322 void dump(Formatter
*f
) const;
1323 static void generate_test_instances(std::list
<string_snap_t
*>& ls
);
1328 WRITE_CLASS_ENCODER(string_snap_t
)
1330 inline bool operator<(const string_snap_t
& l
, const string_snap_t
& r
) {
1331 int c
= l
.name
.compare(r
.name
);
1332 return c
< 0 || (c
== 0 && l
.snapid
< r
.snapid
);
1335 inline std::ostream
& operator<<(std::ostream
& out
, const string_snap_t
&k
)
1337 return out
<< "(" << k
.name
<< "," << k
.snapid
<< ")";
1341 * mds_table_pending_t
1343 * For mds's requesting any pending ops, child needs to encode the corresponding
1344 * pending mutation state in the table.
1346 struct mds_table_pending_t
{
1347 void encode(bufferlist
& bl
) const;
1348 void decode(bufferlist::const_iterator
& bl
);
1349 void dump(Formatter
*f
) const;
1350 static void generate_test_instances(std::list
<mds_table_pending_t
*>& ls
);
1356 WRITE_CLASS_ENCODER(mds_table_pending_t
)
1359 struct metareqid_t
{
1361 metareqid_t(entity_name_t n
, ceph_tid_t t
) : name(n
), tid(t
) {}
1362 void encode(bufferlist
& bl
) const {
1367 void decode(bufferlist::const_iterator
&p
) {
1376 WRITE_CLASS_ENCODER(metareqid_t
)
1378 inline std::ostream
& operator<<(std::ostream
& out
, const metareqid_t
& r
) {
1379 return out
<< r
.name
<< ":" << r
.tid
;
1382 inline bool operator==(const metareqid_t
& l
, const metareqid_t
& r
) {
1383 return (l
.name
== r
.name
) && (l
.tid
== r
.tid
);
1385 inline bool operator!=(const metareqid_t
& l
, const metareqid_t
& r
) {
1386 return (l
.name
!= r
.name
) || (l
.tid
!= r
.tid
);
1388 inline bool operator<(const metareqid_t
& l
, const metareqid_t
& r
) {
1389 return (l
.name
< r
.name
) ||
1390 (l
.name
== r
.name
&& l
.tid
< r
.tid
);
1392 inline bool operator<=(const metareqid_t
& l
, const metareqid_t
& r
) {
1393 return (l
.name
< r
.name
) ||
1394 (l
.name
== r
.name
&& l
.tid
<= r
.tid
);
1396 inline bool operator>(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
<= r
); }
1397 inline bool operator>=(const metareqid_t
& l
, const metareqid_t
& r
) { return !(l
< r
); }
1400 template<> struct hash
<metareqid_t
> {
1401 size_t operator()(const metareqid_t
&r
) const {
1403 return H(r
.name
.num()) ^ H(r
.name
.type()) ^ H(r
.tid
);
1408 // cap info for client reconnect
1409 struct cap_reconnect_t
{
1410 cap_reconnect_t() {}
1411 cap_reconnect_t(uint64_t cap_id
, inodeno_t pino
, std::string_view p
, int w
, int i
,
1412 inodeno_t sr
, snapid_t sf
, bufferlist
& lb
) :
1414 capinfo
.cap_id
= cap_id
;
1417 capinfo
.snaprealm
= sr
;
1418 capinfo
.pathbase
= pino
;
1419 capinfo
.flock_len
= 0;
1423 void encode(bufferlist
& bl
) const;
1424 void decode(bufferlist::const_iterator
& bl
);
1425 void encode_old(bufferlist
& bl
) const;
1426 void decode_old(bufferlist::const_iterator
& bl
);
1428 void dump(Formatter
*f
) const;
1429 static void generate_test_instances(std::list
<cap_reconnect_t
*>& ls
);
1432 mutable ceph_mds_cap_reconnect capinfo
= {};
1433 snapid_t snap_follows
= 0;
1436 WRITE_CLASS_ENCODER(cap_reconnect_t
)
1438 struct snaprealm_reconnect_t
{
1439 snaprealm_reconnect_t() {}
1440 snaprealm_reconnect_t(inodeno_t ino
, snapid_t seq
, inodeno_t parent
) {
1443 realm
.parent
= parent
;
1445 void encode(bufferlist
& bl
) const;
1446 void decode(bufferlist::const_iterator
& bl
);
1447 void encode_old(bufferlist
& bl
) const;
1448 void decode_old(bufferlist::const_iterator
& bl
);
1450 void dump(Formatter
*f
) const;
1451 static void generate_test_instances(std::list
<snaprealm_reconnect_t
*>& ls
);
1453 mutable ceph_mds_snaprealm_reconnect realm
= {};
1455 WRITE_CLASS_ENCODER(snaprealm_reconnect_t
)
1457 // compat for pre-FLOCK feature
1458 struct old_ceph_mds_cap_reconnect
{
1463 struct ceph_timespec old_mtime
, old_atime
;
1464 ceph_le64 snaprealm
;
1465 ceph_le64 pathbase
; /* base ino for our path to this ino */
1466 } __attribute__ ((packed
));
1467 WRITE_RAW_ENCODER(old_ceph_mds_cap_reconnect
)
1469 struct old_cap_reconnect_t
{
1470 const old_cap_reconnect_t
& operator=(const cap_reconnect_t
& n
) {
1472 capinfo
.cap_id
= n
.capinfo
.cap_id
;
1473 capinfo
.wanted
= n
.capinfo
.wanted
;
1474 capinfo
.issued
= n
.capinfo
.issued
;
1475 capinfo
.snaprealm
= n
.capinfo
.snaprealm
;
1476 capinfo
.pathbase
= n
.capinfo
.pathbase
;
1479 operator cap_reconnect_t() {
1482 n
.capinfo
.cap_id
= capinfo
.cap_id
;
1483 n
.capinfo
.wanted
= capinfo
.wanted
;
1484 n
.capinfo
.issued
= capinfo
.issued
;
1485 n
.capinfo
.snaprealm
= capinfo
.snaprealm
;
1486 n
.capinfo
.pathbase
= capinfo
.pathbase
;
1490 void encode(bufferlist
& bl
) const {
1493 encode(capinfo
, bl
);
1495 void decode(bufferlist::const_iterator
& bl
) {
1498 decode(capinfo
, bl
);
1502 old_ceph_mds_cap_reconnect capinfo
;
1504 WRITE_CLASS_ENCODER(old_cap_reconnect_t
)
1509 dirfrag_t(inodeno_t i
, frag_t f
) : ino(i
), frag(f
) { }
1511 void encode(bufferlist
& bl
) const {
1516 void decode(bufferlist::const_iterator
& bl
) {
1525 WRITE_CLASS_ENCODER(dirfrag_t
)
1527 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_t
&df
) {
1529 if (!df
.frag
.is_root()) out
<< "." << df
.frag
;
1532 inline bool operator<(dirfrag_t l
, dirfrag_t r
) {
1533 if (l
.ino
< r
.ino
) return true;
1534 if (l
.ino
== r
.ino
&& l
.frag
< r
.frag
) return true;
1537 inline bool operator==(dirfrag_t l
, dirfrag_t r
) {
1538 return l
.ino
== r
.ino
&& l
.frag
== r
.frag
;
1542 template<> struct hash
<dirfrag_t
> {
1543 size_t operator()(const dirfrag_t
&df
) const {
1544 static rjhash
<uint64_t> H
;
1545 static rjhash
<uint32_t> I
;
1546 return H(df
.ino
) ^ I(df
.frag
);
1551 // ================================================================
1552 #define META_POP_IRD 0
1553 #define META_POP_IWR 1
1554 #define META_POP_READDIR 2
1555 #define META_POP_FETCH 3
1556 #define META_POP_STORE 4
1559 class inode_load_vec_t
{
1561 using time
= DecayCounter::time
;
1562 using clock
= DecayCounter::clock
;
1563 static const size_t NUM
= 2;
1565 inode_load_vec_t() : vec
{DecayCounter(DecayRate()), DecayCounter(DecayRate())} {}
1566 inode_load_vec_t(const DecayRate
&rate
) : vec
{DecayCounter(rate
), DecayCounter(rate
)} {}
1568 DecayCounter
&get(int t
) {
1572 for (auto &d
: vec
) {
1576 void encode(bufferlist
&bl
) const;
1577 void decode(bufferlist::const_iterator
& p
);
1578 void dump(Formatter
*f
) const;
1579 static void generate_test_instances(std::list
<inode_load_vec_t
*>& ls
);
1582 std::array
<DecayCounter
, NUM
> vec
;
1584 inline void encode(const inode_load_vec_t
&c
, bufferlist
&bl
) {
1587 inline void decode(inode_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1591 class dirfrag_load_vec_t
{
1593 using time
= DecayCounter::time
;
1594 using clock
= DecayCounter::clock
;
1595 static const size_t NUM
= 5;
1597 dirfrag_load_vec_t() :
1598 vec
{DecayCounter(DecayRate()),
1599 DecayCounter(DecayRate()),
1600 DecayCounter(DecayRate()),
1601 DecayCounter(DecayRate()),
1602 DecayCounter(DecayRate())
1605 dirfrag_load_vec_t(const DecayRate
&rate
) :
1606 vec
{DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
), DecayCounter(rate
)}
1609 void encode(bufferlist
&bl
) const {
1610 ENCODE_START(2, 2, bl
);
1611 for (const auto &i
: vec
) {
1616 void decode(bufferlist::const_iterator
&p
) {
1617 DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p
);
1618 for (auto &i
: vec
) {
1623 void dump(Formatter
*f
) const;
1624 void dump(Formatter
*f
, const DecayRate
& rate
) const;
1625 static void generate_test_instances(std::list
<dirfrag_load_vec_t
*>& ls
);
1627 const DecayCounter
&get(int t
) const {
1630 DecayCounter
&get(int t
) {
1633 void adjust(double d
) {
1634 for (auto &i
: vec
) {
1639 for (auto &i
: vec
) {
1643 double meta_load() const {
1645 1*vec
[META_POP_IRD
].get() +
1646 2*vec
[META_POP_IWR
].get() +
1647 1*vec
[META_POP_READDIR
].get() +
1648 2*vec
[META_POP_FETCH
].get() +
1649 4*vec
[META_POP_STORE
].get();
1652 void add(dirfrag_load_vec_t
& r
) {
1653 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1654 vec
[i
].adjust(r
.vec
[i
].get());
1656 void sub(dirfrag_load_vec_t
& r
) {
1657 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1658 vec
[i
].adjust(-r
.vec
[i
].get());
1660 void scale(double f
) {
1661 for (size_t i
=0; i
<dirfrag_load_vec_t::NUM
; i
++)
1666 friend inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
);
1667 std::array
<DecayCounter
, NUM
> vec
;
1670 inline void encode(const dirfrag_load_vec_t
&c
, bufferlist
&bl
) {
1673 inline void decode(dirfrag_load_vec_t
& c
, bufferlist::const_iterator
&p
) {
1677 inline std::ostream
& operator<<(std::ostream
& out
, const dirfrag_load_vec_t
& dl
)
1679 std::ostringstream ss
;
1680 ss
<< std::setprecision(1) << std::fixed
1682 " IRD:" << dl
.vec
[0]
1683 << " IWR:" << dl
.vec
[1]
1684 << " RDR:" << dl
.vec
[2]
1685 << " FET:" << dl
.vec
[3]
1686 << " STR:" << dl
.vec
[4]
1687 << " *LOAD:" << dl
.meta_load() << "]";
1688 return out
<< ss
.str() << std::endl
;
1692 using clock
= dirfrag_load_vec_t::clock
;
1693 using time
= dirfrag_load_vec_t::time
;
1695 dirfrag_load_vec_t auth
;
1696 dirfrag_load_vec_t all
;
1698 mds_load_t() : auth(DecayRate()), all(DecayRate()) {}
1699 mds_load_t(const DecayRate
&rate
) : auth(rate
), all(rate
) {}
1701 double req_rate
= 0.0;
1702 double cache_hit_rate
= 0.0;
1703 double queue_len
= 0.0;
1705 double cpu_load_avg
= 0.0;
1707 double mds_load() const; // defiend in MDBalancer.cc
1708 void encode(bufferlist
& bl
) const;
1709 void decode(bufferlist::const_iterator
& bl
);
1710 void dump(Formatter
*f
) const;
1711 static void generate_test_instances(std::list
<mds_load_t
*>& ls
);
1713 inline void encode(const mds_load_t
&c
, bufferlist
&bl
) {
1716 inline void decode(mds_load_t
&c
, bufferlist::const_iterator
&p
) {
1720 inline std::ostream
& operator<<(std::ostream
& out
, const mds_load_t
& load
)
1722 return out
<< "mdsload<" << load
.auth
<< "/" << load
.all
1723 << ", req " << load
.req_rate
1724 << ", hr " << load
.cache_hit_rate
1725 << ", qlen " << load
.queue_len
1726 << ", cpu " << load
.cpu_load_avg
1730 class load_spread_t
{
1732 using time
= DecayCounter::time
;
1733 using clock
= DecayCounter::clock
;
1734 static const int MAX
= 4;
1736 load_spread_t(const DecayRate
&rate
) : count(rate
)
1739 load_spread_t() = delete;
1741 double hit(int who
) {
1742 for (int i
=0; i
<n
; i
++)
1744 return count
.get_last();
1749 if (n
== 1) return 0.0;
1751 if (p
== MAX
) p
= 0;
1755 double get() const {
1759 std::array
<int, MAX
> last
= {-1, -1, -1, -1};
1764 // ================================================================
1765 typedef std::pair
<mds_rank_t
, mds_rank_t
> mds_authority_t
;
1767 // -- authority delegation --
1768 // directory authority types
1769 // >= 0 is the auth mds
1770 #define CDIR_AUTH_PARENT mds_rank_t(-1) // default
1771 #define CDIR_AUTH_UNKNOWN mds_rank_t(-2)
1772 #define CDIR_AUTH_DEFAULT mds_authority_t(CDIR_AUTH_PARENT, CDIR_AUTH_UNKNOWN)
1773 #define CDIR_AUTH_UNDEF mds_authority_t(CDIR_AUTH_UNKNOWN, CDIR_AUTH_UNKNOWN)
1774 //#define CDIR_AUTH_ROOTINODE pair<int,int>( 0, -2)
1776 class MDSCacheObjectInfo
{
1778 void encode(bufferlist
& bl
) const;
1779 void decode(bufferlist::const_iterator
& bl
);
1780 void dump(Formatter
*f
) const;
1781 static void generate_test_instances(std::list
<MDSCacheObjectInfo
*>& ls
);
1789 inline std::ostream
& operator<<(std::ostream
& out
, const MDSCacheObjectInfo
&info
) {
1790 if (info
.ino
) return out
<< info
.ino
<< "." << info
.snapid
;
1791 if (info
.dname
.length()) return out
<< info
.dirfrag
<< "/" << info
.dname
1792 << " snap " << info
.snapid
;
1793 return out
<< info
.dirfrag
;
1796 inline bool operator==(const MDSCacheObjectInfo
& l
, const MDSCacheObjectInfo
& r
) {
1798 return l
.ino
== r
.ino
&& l
.snapid
== r
.snapid
;
1800 return l
.dirfrag
== r
.dirfrag
&& l
.dname
== r
.dname
;
1802 WRITE_CLASS_ENCODER(MDSCacheObjectInfo
)
1804 // parse a map of keys/values.
1805 namespace qi
= boost::spirit::qi
;
1807 template <typename Iterator
>
1808 struct keys_and_values
1809 : qi::grammar
<Iterator
, std::map
<string
, string
>()>
1812 : keys_and_values::base_type(query
)
1814 query
= pair
>> *(qi::lit(' ') >> pair
);
1815 pair
= key
>> '=' >> value
;
1816 key
= qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
1817 value
= +qi::char_("a-zA-Z0-9-_.");
1819 qi::rule
<Iterator
, std::map
<string
, string
>()> query
;
1820 qi::rule
<Iterator
, std::pair
<string
, string
>()> pair
;
1821 qi::rule
<Iterator
, string()> key
, value
;