]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDBalancer.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
14 #ifndef CEPH_MDBALANCER_H
15 #define CEPH_MDBALANCER_H
17 #include "include/types.h"
18 #include "common/Clock.h"
19 #include "common/Cond.h"
21 #include "msg/Message.h"
22 #include "messages/MHeartbeat.h"
35 using clock
= ceph::coarse_mono_clock
;
36 using time
= ceph::coarse_mono_time
;
37 friend class C_Bal_SendHeartbeat
;
39 MDBalancer(MDSRank
*m
, Messenger
*msgr
, MonClient
*monc
);
41 void handle_conf_change(const std::set
<std::string
>& changed
, const MDSMap
& mds_map
);
43 int proc_message(const cref_t
<Message
> &m
);
46 * Regularly called upkeep function.
48 * Sends MHeartbeat messages to the mons.
52 void handle_export_pins(void);
54 void subtract_export(CDir
*ex
);
55 void add_import(CDir
*im
);
56 void adjust_pop_for_rename(CDir
*pdir
, CDir
*dir
, bool inc
);
58 void hit_inode(CInode
*in
, int type
, int who
=-1);
59 void hit_dir(CDir
*dir
, int type
, int who
=-1, double amount
=1.0);
61 void queue_split(const CDir
*dir
, bool fast
);
62 void queue_merge(CDir
*dir
);
63 bool is_fragment_pending(dirfrag_t df
) {
64 return split_pending
.count(df
) || merge_pending
.count(df
);
68 * Based on size and configuration, decide whether to issue a queue_split
69 * or queue_merge for this CDir.
71 * \param hot whether the directory's temperature is enough to split it
73 void maybe_fragment(CDir
*dir
, bool hot
);
75 void handle_mds_failure(mds_rank_t who
);
77 int dump_loads(Formatter
*f
) const;
81 std::map
<mds_rank_t
, double> targets
;
82 std::map
<mds_rank_t
, double> imported
;
83 std::map
<mds_rank_t
, double> exported
;
86 //set up the rebalancing targets for export and do one if the
87 //MDSMap is up to date
88 void prep_rebalance(int beat
);
89 int mantle_prep_rebalance();
91 mds_load_t
get_load();
92 int localize_balancer();
93 void send_heartbeat();
94 void handle_heartbeat(const cref_t
<MHeartbeat
> &m
);
95 void find_exports(CDir
*dir
,
97 std::vector
<CDir
*>* exports
,
99 set
<CDir
*>& already_exporting
);
101 double try_match(balance_state_t
&state
,
102 mds_rank_t ex
, double& maxex
,
103 mds_rank_t im
, double& maxim
);
105 double get_maxim(balance_state_t
&state
, mds_rank_t im
) {
106 return target_load
- mds_meta_load
[im
] - state
.imported
[im
];
108 double get_maxex(balance_state_t
&state
, mds_rank_t ex
) {
109 return mds_meta_load
[ex
] - target_load
- state
.exported
[ex
];
115 * Check if the monitor has recorded the current export targets;
116 * if it has then do the actual export. Otherwise send off our
117 * export targets message again.
119 void try_rebalance(balance_state_t
& state
);
121 bool bal_fragment_dirs
;
122 int64_t bal_fragment_interval
;
123 static const unsigned int AUTH_TREES_THRESHOLD
= 5;
126 Messenger
*messenger
;
127 MonClient
*mon_client
;
133 time last_heartbeat
= clock::zero();
134 time last_sample
= clock::zero();
135 time rebalance_time
= clock::zero(); //ensure a consistent view of load for rebalance
137 time last_get_load
= clock::zero();
138 uint64_t last_num_requests
= 0;
139 uint64_t last_cpu_time
= 0;
140 uint64_t last_num_traverse
= 0;
141 uint64_t last_num_traverse_hit
= 0;
143 // Dirfrags which are marked to be passed on to MDCache::[split|merge]_dir
144 // just as soon as a delayed context comes back and triggers it.
145 // These sets just prevent us from spawning extra timer contexts for
146 // dirfrags that already have one in flight.
147 set
<dirfrag_t
> split_pending
, merge_pending
;
149 // per-epoch scatter/gathered info
150 std::map
<mds_rank_t
, mds_load_t
> mds_load
;
151 std::map
<mds_rank_t
, double> mds_meta_load
;
152 std::map
<mds_rank_t
, map
<mds_rank_t
, float> > mds_import_map
;
153 std::map
<mds_rank_t
, int> mds_last_epoch_under_map
;
157 double target_load
= 0;