1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 * Handles the import and export of mds authorities and actual cache data.
14 * See src/doc/exports.txt for a description.
17 #ifndef CEPH_MDS_MIGRATOR_H
18 #define CEPH_MDS_MIGRATOR_H
20 #include "include/types.h"
36 class MExportDirDiscover
;
37 class MExportDirDiscoverAck
;
38 class MExportDirCancel
;
40 class MExportDirPrepAck
;
43 class MExportDirNotify
;
44 class MExportDirNotifyAck
;
45 class MExportDirFinish
;
55 // export stages. used to clean up intelligently if there's a failure.
56 const static int EXPORT_CANCELLED
= 0; // cancelled
57 const static int EXPORT_CANCELLING
= 1; // waiting for cancel notifyacks
58 const static int EXPORT_LOCKING
= 2; // acquiring locks
59 const static int EXPORT_DISCOVERING
= 3; // dest is disovering export dir
60 const static int EXPORT_FREEZING
= 4; // we're freezing the dir tree
61 const static int EXPORT_PREPPING
= 5; // sending dest spanning tree to export bounds
62 const static int EXPORT_WARNING
= 6; // warning bystanders of dir_auth_pending
63 const static int EXPORT_EXPORTING
= 7; // sent actual export, waiting for ack
64 const static int EXPORT_LOGGINGFINISH
= 8; // logging EExportFinish
65 const static int EXPORT_NOTIFYING
= 9; // waiting for notifyacks
66 static const char *get_export_statename(int s
) {
68 case EXPORT_CANCELLING
: return "cancelling";
69 case EXPORT_LOCKING
: return "locking";
70 case EXPORT_DISCOVERING
: return "discovering";
71 case EXPORT_FREEZING
: return "freezing";
72 case EXPORT_PREPPING
: return "prepping";
73 case EXPORT_WARNING
: return "warning";
74 case EXPORT_EXPORTING
: return "exporting";
75 case EXPORT_LOGGINGFINISH
: return "loggingfinish";
76 case EXPORT_NOTIFYING
: return "notifying";
77 default: ceph_abort(); return 0;
82 const static int IMPORT_DISCOVERING
= 1; // waiting for prep
83 const static int IMPORT_DISCOVERED
= 2; // waiting for prep
84 const static int IMPORT_PREPPING
= 3; // opening dirs on bounds
85 const static int IMPORT_PREPPED
= 4; // opened bounds, waiting for import
86 const static int IMPORT_LOGGINGSTART
= 5; // got import, logging EImportStart
87 const static int IMPORT_ACKING
= 6; // logged EImportStart, sent ack, waiting for finish
88 const static int IMPORT_FINISHING
= 7; // sent cap imports, waiting for finish
89 const static int IMPORT_ABORTING
= 8; // notifying bystanders of an abort before unfreezing
90 static const char *get_import_statename(int s
) {
92 case IMPORT_DISCOVERING
: return "discovering";
93 case IMPORT_DISCOVERED
: return "discovered";
94 case IMPORT_PREPPING
: return "prepping";
95 case IMPORT_PREPPED
: return "prepped";
96 case IMPORT_LOGGINGSTART
: return "loggingstart";
97 case IMPORT_ACKING
: return "acking";
98 case IMPORT_FINISHING
: return "finishing";
99 case IMPORT_ABORTING
: return "aborting";
100 default: ceph_abort(); return 0;
105 Migrator(MDSRank
*m
, MDCache
*c
) : mds(m
), cache(c
) {
106 inject_session_race
= g_conf
->get_val
<bool>("mds_inject_migrator_session_race");
109 void handle_conf_change(const struct md_config_t
*conf
,
110 const std::set
<std::string
> &changed
,
111 const MDSMap
&mds_map
);
115 struct export_state_t
{
119 set
<mds_rank_t
> warning_ack_waiting
;
120 set
<mds_rank_t
> notify_ack_waiting
;
121 map
<inodeno_t
,map
<client_t
,Capability::Import
> > peer_imported
;
122 set
<CDir
*> residual_dirs
;
125 // for freeze tree deadlock detection
126 utime_t last_cum_auth_pins_change
;
127 int last_cum_auth_pins
;
128 int num_remote_waiters
; // number of remote authpin waiters
129 export_state_t() : state(0), peer(0), tid(0), mut(),
130 last_cum_auth_pins(0), num_remote_waiters(0) {}
133 map
<CDir
*, export_state_t
> export_state
;
135 list
<pair
<dirfrag_t
,mds_rank_t
> > export_queue
;
138 struct import_state_t
{
142 set
<mds_rank_t
> bystanders
;
143 list
<dirfrag_t
> bound_ls
;
144 list
<ScatterLock
*> updated_scatterlocks
;
145 map
<client_t
,pair
<Session
*,uint64_t> > session_map
;
146 map
<CInode
*, map
<client_t
,Capability::Export
> > peer_exports
;
148 import_state_t() : state(0), peer(0), tid(0), mut() {}
151 map
<dirfrag_t
, import_state_t
> import_state
;
153 void handle_export_discover_ack(MExportDirDiscoverAck
*m
);
154 void export_frozen(CDir
*dir
, uint64_t tid
);
155 void check_export_size(CDir
*dir
, export_state_t
& stat
, set
<client_t
> &client_set
);
156 void handle_export_prep_ack(MExportDirPrepAck
*m
);
157 void export_sessions_flushed(CDir
*dir
, uint64_t tid
);
158 void export_go(CDir
*dir
);
159 void export_go_synced(CDir
*dir
, uint64_t tid
);
160 void export_try_cancel(CDir
*dir
, bool notify_peer
=true);
161 void export_cancel_finish(CDir
*dir
);
162 void export_reverse(CDir
*dir
, export_state_t
& stat
);
163 void export_notify_abort(CDir
*dir
, export_state_t
& stat
, set
<CDir
*>& bounds
);
164 void handle_export_ack(MExportDirAck
*m
);
165 void export_logged_finish(CDir
*dir
);
166 void handle_export_notify_ack(MExportDirNotifyAck
*m
);
167 void export_finish(CDir
*dir
);
169 void handle_gather_caps(MGatherCaps
*m
);
171 friend class C_MDC_ExportFreeze
;
172 friend class C_MDS_ExportFinishLogged
;
173 friend class C_M_ExportGo
;
174 friend class C_M_ExportSessionsFlushed
;
175 friend class MigratorContext
;
176 friend class MigratorLogContext
;
179 void handle_export_discover(MExportDirDiscover
*m
);
180 void handle_export_cancel(MExportDirCancel
*m
);
181 void handle_export_prep(MExportDirPrep
*m
);
182 void handle_export_dir(MExportDir
*m
);
184 void import_reverse_discovering(dirfrag_t df
);
185 void import_reverse_discovered(dirfrag_t df
, CInode
*diri
);
186 void import_reverse_prepping(CDir
*dir
, import_state_t
& stat
);
187 void import_remove_pins(CDir
*dir
, set
<CDir
*>& bounds
);
188 void import_reverse_unfreeze(CDir
*dir
);
189 void import_reverse_final(CDir
*dir
);
190 void import_notify_abort(CDir
*dir
, set
<CDir
*>& bounds
);
191 void import_notify_finish(CDir
*dir
, set
<CDir
*>& bounds
);
192 void import_logged_start(dirfrag_t df
, CDir
*dir
, mds_rank_t from
,
193 map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
);
194 void handle_export_finish(MExportDirFinish
*m
);
196 void handle_export_caps(MExportCaps
*m
);
197 void logged_import_caps(CInode
*in
,
199 map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
,
200 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
);
203 friend class C_MDS_ImportDirLoggedStart
;
204 friend class C_MDS_ImportDirLoggedFinish
;
205 friend class C_M_LoggedImportCaps
;
208 void handle_export_notify(MExportDirNotify
*m
);
213 void dispatch(Message
*);
215 void show_importing();
216 void show_exporting();
218 int get_num_exporting() const { return export_state
.size(); }
219 int get_export_queue_size() const { return export_queue
.size(); }
222 int is_exporting(CDir
*dir
) const {
223 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
224 if (it
!= export_state
.end()) return it
->second
.state
;
227 bool is_exporting() const { return !export_state
.empty(); }
228 int is_importing(dirfrag_t df
) const {
229 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
230 if (it
!= import_state
.end()) return it
->second
.state
;
233 bool is_importing() const { return !import_state
.empty(); }
235 bool is_ambiguous_import(dirfrag_t df
) const {
236 map
<dirfrag_t
, import_state_t
>::const_iterator p
= import_state
.find(df
);
237 if (p
== import_state
.end())
239 if (p
->second
.state
>= IMPORT_LOGGINGSTART
&&
240 p
->second
.state
< IMPORT_ABORTING
)
245 int get_import_state(dirfrag_t df
) const {
246 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
247 assert(it
!= import_state
.end());
248 return it
->second
.state
;
250 int get_import_peer(dirfrag_t df
) const {
251 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
252 assert(it
!= import_state
.end());
253 return it
->second
.peer
;
256 int get_export_state(CDir
*dir
) const {
257 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
258 assert(it
!= export_state
.end());
259 return it
->second
.state
;
261 // this returns true if we are export @dir,
262 // and are not waiting for @who to be
263 // be warned of ambiguous auth.
264 // only returns meaningful results during EXPORT_WARNING state.
265 bool export_has_warned(CDir
*dir
, mds_rank_t who
) {
266 map
<CDir
*, export_state_t
>::iterator it
= export_state
.find(dir
);
267 assert(it
!= export_state
.end());
268 assert(it
->second
.state
== EXPORT_WARNING
);
269 return (it
->second
.warning_ack_waiting
.count(who
) == 0);
272 bool export_has_notified(CDir
*dir
, mds_rank_t who
) const {
273 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
274 assert(it
!= export_state
.end());
275 assert(it
->second
.state
== EXPORT_NOTIFYING
);
276 return (it
->second
.notify_ack_waiting
.count(who
) == 0);
279 void export_freeze_inc_num_waiters(CDir
*dir
) {
280 map
<CDir
*, export_state_t
>::iterator it
= export_state
.find(dir
);
281 assert(it
!= export_state
.end());
282 it
->second
.num_remote_waiters
++;
284 void find_stale_export_freeze();
287 void handle_mds_failure_or_stop(mds_rank_t who
);
291 // -- import/export --
293 void dispatch_export_dir(MDRequestRef
& mdr
, int count
);
294 void export_dir(CDir
*dir
, mds_rank_t dest
);
295 void export_empty_import(CDir
*dir
);
297 void export_dir_nicely(CDir
*dir
, mds_rank_t dest
);
298 void maybe_do_queued_export();
299 void clear_export_queue() {
300 export_queue
.clear();
303 void get_export_lock_set(CDir
*dir
, set
<SimpleLock
*>& locks
);
304 void get_export_client_set(CInode
*in
, set
<client_t
> &client_set
);
306 void encode_export_inode(CInode
*in
, bufferlist
& bl
,
307 map
<client_t
,entity_inst_t
>& exported_client_map
);
308 void encode_export_inode_caps(CInode
*in
, bool auth_cap
, bufferlist
& bl
,
309 map
<client_t
,entity_inst_t
>& exported_client_map
);
310 void finish_export_inode(CInode
*in
, utime_t now
, mds_rank_t target
,
311 map
<client_t
,Capability::Import
>& peer_imported
,
312 list
<MDSInternalContextBase
*>& finished
);
313 void finish_export_inode_caps(CInode
*in
, mds_rank_t target
,
314 map
<client_t
,Capability::Import
>& peer_imported
);
317 uint64_t encode_export_dir(bufferlist
& exportbl
,
319 map
<client_t
,entity_inst_t
>& exported_client_map
,
321 void finish_export_dir(CDir
*dir
, utime_t now
, mds_rank_t target
,
322 map
<inodeno_t
,map
<client_t
,Capability::Import
> >& peer_imported
,
323 list
<MDSInternalContextBase
*>& finished
, int *num_dentries
);
325 void clear_export_proxy_pins(CDir
*dir
);
327 void export_caps(CInode
*in
);
329 void decode_import_inode(CDentry
*dn
, bufferlist::iterator
& blp
,
330 mds_rank_t oldauth
, LogSegment
*ls
,
331 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
,
332 list
<ScatterLock
*>& updated_scatterlocks
);
333 void decode_import_inode_caps(CInode
*in
, bool auth_cap
, bufferlist::iterator
&blp
,
334 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
);
335 void finish_import_inode_caps(CInode
*in
, mds_rank_t from
, bool auth_cap
,
336 const map
<client_t
,pair
<Session
*,uint64_t> >& smap
,
337 const map
<client_t
,Capability::Export
> &export_map
,
338 map
<client_t
,Capability::Import
> &import_map
);
339 int decode_import_dir(bufferlist::iterator
& blp
,
344 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
,
345 list
<ScatterLock
*>& updated_scatterlocks
, utime_t now
);
347 void import_reverse(CDir
*dir
);
349 void import_finish(CDir
*dir
, bool notify
, bool last
=true);
354 bool inject_session_race
= false;