]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/MDBalancer.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #ifndef CEPH_MDBALANCER_H
18 #define CEPH_MDBALANCER_H
25 #include "include/types.h"
26 #include "common/Clock.h"
27 #include "common/Cond.h"
38 friend class C_Bal_SendHeartbeat
;
40 MDBalancer(MDSRank
*m
, Messenger
*msgr
, MonClient
*monc
) :
45 last_epoch_under(0), my_load(0.0), target_load(0.0)
48 mds_load_t
get_load(utime_t
);
50 int proc_message(Message
*m
);
53 * Regularly called upkeep function.
55 * Sends MHeartbeat messages to the mons.
59 void subtract_export(CDir
*ex
, utime_t now
);
60 void add_import(CDir
*im
, utime_t now
);
62 void hit_inode(utime_t now
, CInode
*in
, int type
, int who
=-1);
63 void hit_dir(utime_t now
, CDir
*dir
, int type
, int who
=-1, double amount
=1.0);
65 void queue_split(const CDir
*dir
, bool fast
);
66 void queue_merge(CDir
*dir
);
69 * Based on size and configuration, decide whether to issue a queue_split
70 * or queue_merge for this CDir.
72 * \param hot whether the directory's temperature is enough to split it
74 void maybe_fragment(CDir
*dir
, bool hot
);
76 void handle_mds_failure(mds_rank_t who
);
80 std::map
<mds_rank_t
, double> targets
;
81 std::map
<mds_rank_t
, double> imported
;
82 std::map
<mds_rank_t
, double> exported
;
85 //set up the rebalancing targets for export and do one if the
86 //MDSMap is up to date
87 void prep_rebalance(int beat
);
88 int mantle_prep_rebalance();
90 void handle_export_pins(void);
92 void export_empties();
93 int localize_balancer();
94 void send_heartbeat();
95 void handle_heartbeat(MHeartbeat
*m
);
96 void find_exports(CDir
*dir
,
100 set
<CDir
*>& already_exporting
);
102 double try_match(balance_state_t
&state
,
103 mds_rank_t ex
, double& maxex
,
104 mds_rank_t im
, double& maxim
);
106 double get_maxim(balance_state_t
&state
, mds_rank_t im
) {
107 return target_load
- mds_meta_load
[im
] - state
.imported
[im
];
109 double get_maxex(balance_state_t
&state
, mds_rank_t ex
) {
110 return mds_meta_load
[ex
] - target_load
- state
.exported
[ex
];
116 * Check if the monitor has recorded the current export targets;
117 * if it has then do the actual export. Otherwise send off our
118 * export targets message again.
120 void try_rebalance(balance_state_t
& state
);
123 Messenger
*messenger
;
124 MonClient
*mon_client
;
127 int last_epoch_under
;
131 utime_t last_heartbeat
;
133 utime_t rebalance_time
; //ensure a consistent view of load for rebalance
135 // Dirfrags which are marked to be passed on to MDCache::[split|merge]_dir
136 // just as soon as a delayed context comes back and triggers it.
137 // These sets just prevent us from spawning extra timer contexts for
138 // dirfrags that already have one in flight.
139 set
<dirfrag_t
> split_pending
, merge_pending
;
141 // per-epoch scatter/gathered info
142 map
<mds_rank_t
, mds_load_t
> mds_load
;
143 map
<mds_rank_t
, double> mds_meta_load
;
144 map
<mds_rank_t
, map
<mds_rank_t
, float> > mds_import_map
;
147 double my_load
, target_load
;