1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 * Handles the import and export of mds authorities and actual cache data.
14 * See src/doc/exports.txt for a description.
17 #ifndef CEPH_MDS_MIGRATOR_H
18 #define CEPH_MDS_MIGRATOR_H
20 #include "include/types.h"
35 class MExportDirDiscover
;
36 class MExportDirDiscoverAck
;
37 class MExportDirCancel
;
39 class MExportDirPrepAck
;
42 class MExportDirNotify
;
43 class MExportDirNotifyAck
;
44 class MExportDirFinish
;
54 // export stages. used to clean up intelligently if there's a failure.
55 const static int EXPORT_CANCELLED
= 0; // cancelled
56 const static int EXPORT_CANCELLING
= 1; // waiting for cancel notifyacks
57 const static int EXPORT_LOCKING
= 2; // acquiring locks
58 const static int EXPORT_DISCOVERING
= 3; // dest is disovering export dir
59 const static int EXPORT_FREEZING
= 4; // we're freezing the dir tree
60 const static int EXPORT_PREPPING
= 5; // sending dest spanning tree to export bounds
61 const static int EXPORT_WARNING
= 6; // warning bystanders of dir_auth_pending
62 const static int EXPORT_EXPORTING
= 7; // sent actual export, waiting for ack
63 const static int EXPORT_LOGGINGFINISH
= 8; // logging EExportFinish
64 const static int EXPORT_NOTIFYING
= 9; // waiting for notifyacks
65 static const char *get_export_statename(int s
) {
67 case EXPORT_CANCELLING
: return "cancelling";
68 case EXPORT_LOCKING
: return "locking";
69 case EXPORT_DISCOVERING
: return "discovering";
70 case EXPORT_FREEZING
: return "freezing";
71 case EXPORT_PREPPING
: return "prepping";
72 case EXPORT_WARNING
: return "warning";
73 case EXPORT_EXPORTING
: return "exporting";
74 case EXPORT_LOGGINGFINISH
: return "loggingfinish";
75 case EXPORT_NOTIFYING
: return "notifying";
76 default: ceph_abort(); return 0;
81 const static int IMPORT_DISCOVERING
= 1; // waiting for prep
82 const static int IMPORT_DISCOVERED
= 2; // waiting for prep
83 const static int IMPORT_PREPPING
= 3; // opening dirs on bounds
84 const static int IMPORT_PREPPED
= 4; // opened bounds, waiting for import
85 const static int IMPORT_LOGGINGSTART
= 5; // got import, logging EImportStart
86 const static int IMPORT_ACKING
= 6; // logged EImportStart, sent ack, waiting for finish
87 const static int IMPORT_FINISHING
= 7; // sent cap imports, waiting for finish
88 const static int IMPORT_ABORTING
= 8; // notifying bystanders of an abort before unfreezing
89 static const char *get_import_statename(int s
) {
91 case IMPORT_DISCOVERING
: return "discovering";
92 case IMPORT_DISCOVERED
: return "discovered";
93 case IMPORT_PREPPING
: return "prepping";
94 case IMPORT_PREPPED
: return "prepped";
95 case IMPORT_LOGGINGSTART
: return "loggingstart";
96 case IMPORT_ACKING
: return "acking";
97 case IMPORT_FINISHING
: return "finishing";
98 case IMPORT_ABORTING
: return "aborting";
99 default: ceph_abort(); return 0;
104 Migrator(MDSRank
*m
, MDCache
*c
) : mds(m
), cache(c
) {}
110 struct export_state_t
{
114 set
<mds_rank_t
> warning_ack_waiting
;
115 set
<mds_rank_t
> notify_ack_waiting
;
116 map
<inodeno_t
,map
<client_t
,Capability::Import
> > peer_imported
;
117 set
<CDir
*> residual_dirs
;
120 // for freeze tree deadlock detection
121 utime_t last_cum_auth_pins_change
;
122 int last_cum_auth_pins
;
123 int num_remote_waiters
; // number of remote authpin waiters
124 export_state_t() : state(0), peer(0), tid(0), mut(),
125 last_cum_auth_pins(0), num_remote_waiters(0) {}
128 map
<CDir
*, export_state_t
> export_state
;
130 list
<pair
<dirfrag_t
,mds_rank_t
> > export_queue
;
133 struct import_state_t
{
137 set
<mds_rank_t
> bystanders
;
138 list
<dirfrag_t
> bound_ls
;
139 list
<ScatterLock
*> updated_scatterlocks
;
140 map
<client_t
,entity_inst_t
> client_map
;
141 map
<CInode
*, map
<client_t
,Capability::Export
> > peer_exports
;
143 import_state_t() : state(0), peer(0), tid(0), mut() {}
146 map
<dirfrag_t
, import_state_t
> import_state
;
148 void handle_export_discover_ack(MExportDirDiscoverAck
*m
);
149 void export_frozen(CDir
*dir
, uint64_t tid
);
150 void check_export_size(CDir
*dir
, export_state_t
& stat
, set
<client_t
> &client_set
);
151 void handle_export_prep_ack(MExportDirPrepAck
*m
);
152 void export_sessions_flushed(CDir
*dir
, uint64_t tid
);
153 void export_go(CDir
*dir
);
154 void export_go_synced(CDir
*dir
, uint64_t tid
);
155 void export_try_cancel(CDir
*dir
, bool notify_peer
=true);
156 void export_cancel_finish(CDir
*dir
);
157 void export_reverse(CDir
*dir
, export_state_t
& stat
);
158 void export_notify_abort(CDir
*dir
, export_state_t
& stat
, set
<CDir
*>& bounds
);
159 void handle_export_ack(MExportDirAck
*m
);
160 void export_logged_finish(CDir
*dir
);
161 void handle_export_notify_ack(MExportDirNotifyAck
*m
);
162 void export_finish(CDir
*dir
);
164 void handle_gather_caps(MGatherCaps
*m
);
166 friend class C_MDC_ExportFreeze
;
167 friend class C_MDS_ExportFinishLogged
;
168 friend class C_M_ExportGo
;
169 friend class C_M_ExportSessionsFlushed
;
170 friend class MigratorContext
;
171 friend class MigratorLogContext
;
174 void handle_export_discover(MExportDirDiscover
*m
);
175 void handle_export_cancel(MExportDirCancel
*m
);
176 void handle_export_prep(MExportDirPrep
*m
);
177 void handle_export_dir(MExportDir
*m
);
179 void import_reverse_discovering(dirfrag_t df
);
180 void import_reverse_discovered(dirfrag_t df
, CInode
*diri
);
181 void import_reverse_prepping(CDir
*dir
, import_state_t
& stat
);
182 void import_remove_pins(CDir
*dir
, set
<CDir
*>& bounds
);
183 void import_reverse_unfreeze(CDir
*dir
);
184 void import_reverse_final(CDir
*dir
);
185 void import_notify_abort(CDir
*dir
, set
<CDir
*>& bounds
);
186 void import_notify_finish(CDir
*dir
, set
<CDir
*>& bounds
);
187 void import_logged_start(dirfrag_t df
, CDir
*dir
, mds_rank_t from
,
188 map
<client_t
,entity_inst_t
> &imported_client_map
,
189 map
<client_t
,uint64_t>& sseqmap
);
190 void handle_export_finish(MExportDirFinish
*m
);
192 void handle_export_caps(MExportCaps
*m
);
193 void logged_import_caps(CInode
*in
,
195 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
,
196 map
<client_t
,entity_inst_t
>& client_map
,
197 map
<client_t
,uint64_t>& sseqmap
);
200 friend class C_MDS_ImportDirLoggedStart
;
201 friend class C_MDS_ImportDirLoggedFinish
;
202 friend class C_M_LoggedImportCaps
;
205 void handle_export_notify(MExportDirNotify
*m
);
210 void dispatch(Message
*);
212 void show_importing();
213 void show_exporting();
215 int get_num_exporting() const { return export_state
.size(); }
216 int get_export_queue_size() const { return export_queue
.size(); }
219 int is_exporting(CDir
*dir
) const {
220 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
221 if (it
!= export_state
.end()) return it
->second
.state
;
224 bool is_exporting() const { return !export_state
.empty(); }
225 int is_importing(dirfrag_t df
) const {
226 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
227 if (it
!= import_state
.end()) return it
->second
.state
;
230 bool is_importing() const { return !import_state
.empty(); }
232 bool is_ambiguous_import(dirfrag_t df
) const {
233 map
<dirfrag_t
, import_state_t
>::const_iterator p
= import_state
.find(df
);
234 if (p
== import_state
.end())
236 if (p
->second
.state
>= IMPORT_LOGGINGSTART
&&
237 p
->second
.state
< IMPORT_ABORTING
)
242 int get_import_state(dirfrag_t df
) const {
243 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
244 assert(it
!= import_state
.end());
245 return it
->second
.state
;
247 int get_import_peer(dirfrag_t df
) const {
248 map
<dirfrag_t
, import_state_t
>::const_iterator it
= import_state
.find(df
);
249 assert(it
!= import_state
.end());
250 return it
->second
.peer
;
253 int get_export_state(CDir
*dir
) const {
254 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
255 assert(it
!= export_state
.end());
256 return it
->second
.state
;
258 // this returns true if we are export @dir,
259 // and are not waiting for @who to be
260 // be warned of ambiguous auth.
261 // only returns meaningful results during EXPORT_WARNING state.
262 bool export_has_warned(CDir
*dir
, mds_rank_t who
) {
263 map
<CDir
*, export_state_t
>::iterator it
= export_state
.find(dir
);
264 assert(it
!= export_state
.end());
265 assert(it
->second
.state
== EXPORT_WARNING
);
266 return (it
->second
.warning_ack_waiting
.count(who
) == 0);
269 bool export_has_notified(CDir
*dir
, mds_rank_t who
) const {
270 map
<CDir
*, export_state_t
>::const_iterator it
= export_state
.find(dir
);
271 assert(it
!= export_state
.end());
272 assert(it
->second
.state
== EXPORT_NOTIFYING
);
273 return (it
->second
.notify_ack_waiting
.count(who
) == 0);
276 void export_freeze_inc_num_waiters(CDir
*dir
) {
277 map
<CDir
*, export_state_t
>::iterator it
= export_state
.find(dir
);
278 assert(it
!= export_state
.end());
279 it
->second
.num_remote_waiters
++;
281 void find_stale_export_freeze();
284 void handle_mds_failure_or_stop(mds_rank_t who
);
288 // -- import/export --
290 void dispatch_export_dir(MDRequestRef
& mdr
, int count
);
291 void export_dir(CDir
*dir
, mds_rank_t dest
);
292 void export_empty_import(CDir
*dir
);
294 void export_dir_nicely(CDir
*dir
, mds_rank_t dest
);
295 void maybe_do_queued_export();
296 void clear_export_queue() {
297 export_queue
.clear();
300 void get_export_lock_set(CDir
*dir
, set
<SimpleLock
*>& locks
);
301 void get_export_client_set(CInode
*in
, set
<client_t
> &client_set
);
303 void encode_export_inode(CInode
*in
, bufferlist
& bl
,
304 map
<client_t
,entity_inst_t
>& exported_client_map
);
305 void encode_export_inode_caps(CInode
*in
, bool auth_cap
, bufferlist
& bl
,
306 map
<client_t
,entity_inst_t
>& exported_client_map
);
307 void finish_export_inode(CInode
*in
, utime_t now
, mds_rank_t target
,
308 map
<client_t
,Capability::Import
>& peer_imported
,
309 list
<MDSInternalContextBase
*>& finished
);
310 void finish_export_inode_caps(CInode
*in
, mds_rank_t target
,
311 map
<client_t
,Capability::Import
>& peer_imported
);
314 uint64_t encode_export_dir(bufferlist
& exportbl
,
316 map
<client_t
,entity_inst_t
>& exported_client_map
,
318 void finish_export_dir(CDir
*dir
, utime_t now
, mds_rank_t target
,
319 map
<inodeno_t
,map
<client_t
,Capability::Import
> >& peer_imported
,
320 list
<MDSInternalContextBase
*>& finished
, int *num_dentries
);
322 void clear_export_proxy_pins(CDir
*dir
);
324 void export_caps(CInode
*in
);
326 void decode_import_inode(CDentry
*dn
, bufferlist::iterator
& blp
,
327 mds_rank_t oldauth
, LogSegment
*ls
,
328 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
,
329 list
<ScatterLock
*>& updated_scatterlocks
);
330 void decode_import_inode_caps(CInode
*in
, bool auth_cap
, bufferlist::iterator
&blp
,
331 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
);
332 void finish_import_inode_caps(CInode
*in
, mds_rank_t from
, bool auth_cap
,
333 map
<client_t
,Capability::Export
> &export_map
,
334 map
<client_t
,Capability::Import
> &import_map
);
335 int decode_import_dir(bufferlist::iterator
& blp
,
340 map
<CInode
*, map
<client_t
,Capability::Export
> >& cap_imports
,
341 list
<ScatterLock
*>& updated_scatterlocks
, utime_t now
);
343 void import_reverse(CDir
*dir
);
345 void import_finish(CDir
*dir
, bool notify
, bool last
=true);