]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | * Handles the import and export of mds authorities and actual cache data. | |
14 | * See src/doc/exports.txt for a description. | |
15 | */ | |
16 | ||
17 | #ifndef CEPH_MDS_MIGRATOR_H | |
18 | #define CEPH_MDS_MIGRATOR_H | |
19 | ||
20 | #include "include/types.h" | |
21 | ||
22 | #include <map> | |
23 | #include <list> | |
24 | #include <set> | |
25 | using std::map; | |
26 | using std::list; | |
27 | using std::set; | |
28 | ||
29 | ||
30 | class MDSRank; | |
31 | class CDir; | |
32 | class CInode; | |
33 | class CDentry; | |
28e407b8 | 34 | class Session; |
7c673cae FG |
35 | |
36 | class MExportDirDiscover; | |
37 | class MExportDirDiscoverAck; | |
38 | class MExportDirCancel; | |
39 | class MExportDirPrep; | |
40 | class MExportDirPrepAck; | |
41 | class MExportDir; | |
42 | class MExportDirAck; | |
43 | class MExportDirNotify; | |
44 | class MExportDirNotifyAck; | |
45 | class MExportDirFinish; | |
46 | ||
47 | class MExportCaps; | |
48 | class MExportCapsAck; | |
49 | class MGatherCaps; | |
50 | ||
51 | class EImportStart; | |
52 | ||
53 | class Migrator { | |
54 | public: | |
55 | // export stages. used to clean up intelligently if there's a failure. | |
56 | const static int EXPORT_CANCELLED = 0; // cancelled | |
57 | const static int EXPORT_CANCELLING = 1; // waiting for cancel notifyacks | |
58 | const static int EXPORT_LOCKING = 2; // acquiring locks | |
59 | const static int EXPORT_DISCOVERING = 3; // dest is disovering export dir | |
60 | const static int EXPORT_FREEZING = 4; // we're freezing the dir tree | |
61 | const static int EXPORT_PREPPING = 5; // sending dest spanning tree to export bounds | |
62 | const static int EXPORT_WARNING = 6; // warning bystanders of dir_auth_pending | |
63 | const static int EXPORT_EXPORTING = 7; // sent actual export, waiting for ack | |
64 | const static int EXPORT_LOGGINGFINISH = 8; // logging EExportFinish | |
65 | const static int EXPORT_NOTIFYING = 9; // waiting for notifyacks | |
66 | static const char *get_export_statename(int s) { | |
67 | switch (s) { | |
68 | case EXPORT_CANCELLING: return "cancelling"; | |
69 | case EXPORT_LOCKING: return "locking"; | |
70 | case EXPORT_DISCOVERING: return "discovering"; | |
71 | case EXPORT_FREEZING: return "freezing"; | |
72 | case EXPORT_PREPPING: return "prepping"; | |
73 | case EXPORT_WARNING: return "warning"; | |
74 | case EXPORT_EXPORTING: return "exporting"; | |
75 | case EXPORT_LOGGINGFINISH: return "loggingfinish"; | |
76 | case EXPORT_NOTIFYING: return "notifying"; | |
77 | default: ceph_abort(); return 0; | |
78 | } | |
79 | } | |
80 | ||
81 | // -- imports -- | |
82 | const static int IMPORT_DISCOVERING = 1; // waiting for prep | |
83 | const static int IMPORT_DISCOVERED = 2; // waiting for prep | |
84 | const static int IMPORT_PREPPING = 3; // opening dirs on bounds | |
85 | const static int IMPORT_PREPPED = 4; // opened bounds, waiting for import | |
86 | const static int IMPORT_LOGGINGSTART = 5; // got import, logging EImportStart | |
87 | const static int IMPORT_ACKING = 6; // logged EImportStart, sent ack, waiting for finish | |
88 | const static int IMPORT_FINISHING = 7; // sent cap imports, waiting for finish | |
89 | const static int IMPORT_ABORTING = 8; // notifying bystanders of an abort before unfreezing | |
90 | static const char *get_import_statename(int s) { | |
91 | switch (s) { | |
92 | case IMPORT_DISCOVERING: return "discovering"; | |
93 | case IMPORT_DISCOVERED: return "discovered"; | |
94 | case IMPORT_PREPPING: return "prepping"; | |
95 | case IMPORT_PREPPED: return "prepped"; | |
96 | case IMPORT_LOGGINGSTART: return "loggingstart"; | |
97 | case IMPORT_ACKING: return "acking"; | |
98 | case IMPORT_FINISHING: return "finishing"; | |
99 | case IMPORT_ABORTING: return "aborting"; | |
100 | default: ceph_abort(); return 0; | |
101 | } | |
102 | } | |
103 | ||
104 | // -- cons -- | |
28e407b8 AA |
105 | Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) { |
106 | inject_session_race = g_conf->get_val<bool>("mds_inject_migrator_session_race"); | |
1adf2230 | 107 | inject_message_loss = g_conf->get_val<int64_t>("mds_inject_migrator_message_loss"); |
28e407b8 | 108 | } |
7c673cae | 109 | |
28e407b8 AA |
110 | void handle_conf_change(const struct md_config_t *conf, |
111 | const std::set <std::string> &changed, | |
112 | const MDSMap &mds_map); | |
7c673cae FG |
113 | |
114 | protected: | |
115 | // export fun | |
116 | struct export_state_t { | |
117 | int state; | |
118 | mds_rank_t peer; | |
119 | uint64_t tid; | |
120 | set<mds_rank_t> warning_ack_waiting; | |
121 | set<mds_rank_t> notify_ack_waiting; | |
122 | map<inodeno_t,map<client_t,Capability::Import> > peer_imported; | |
b32b8144 FG |
123 | set<CDir*> residual_dirs; |
124 | ||
7c673cae FG |
125 | MutationRef mut; |
126 | // for freeze tree deadlock detection | |
127 | utime_t last_cum_auth_pins_change; | |
128 | int last_cum_auth_pins; | |
129 | int num_remote_waiters; // number of remote authpin waiters | |
130 | export_state_t() : state(0), peer(0), tid(0), mut(), | |
131 | last_cum_auth_pins(0), num_remote_waiters(0) {} | |
132 | }; | |
133 | ||
134 | map<CDir*, export_state_t> export_state; | |
135 | ||
136 | list<pair<dirfrag_t,mds_rank_t> > export_queue; | |
137 | ||
138 | // import fun | |
139 | struct import_state_t { | |
140 | int state; | |
141 | mds_rank_t peer; | |
142 | uint64_t tid; | |
143 | set<mds_rank_t> bystanders; | |
144 | list<dirfrag_t> bound_ls; | |
145 | list<ScatterLock*> updated_scatterlocks; | |
28e407b8 | 146 | map<client_t,pair<Session*,uint64_t> > session_map; |
7c673cae FG |
147 | map<CInode*, map<client_t,Capability::Export> > peer_exports; |
148 | MutationRef mut; | |
149 | import_state_t() : state(0), peer(0), tid(0), mut() {} | |
150 | }; | |
151 | ||
152 | map<dirfrag_t, import_state_t> import_state; | |
153 | ||
154 | void handle_export_discover_ack(MExportDirDiscoverAck *m); | |
155 | void export_frozen(CDir *dir, uint64_t tid); | |
b32b8144 | 156 | void check_export_size(CDir *dir, export_state_t& stat, set<client_t> &client_set); |
7c673cae FG |
157 | void handle_export_prep_ack(MExportDirPrepAck *m); |
158 | void export_sessions_flushed(CDir *dir, uint64_t tid); | |
159 | void export_go(CDir *dir); | |
160 | void export_go_synced(CDir *dir, uint64_t tid); | |
161 | void export_try_cancel(CDir *dir, bool notify_peer=true); | |
162 | void export_cancel_finish(CDir *dir); | |
b32b8144 FG |
163 | void export_reverse(CDir *dir, export_state_t& stat); |
164 | void export_notify_abort(CDir *dir, export_state_t& stat, set<CDir*>& bounds); | |
7c673cae FG |
165 | void handle_export_ack(MExportDirAck *m); |
166 | void export_logged_finish(CDir *dir); | |
167 | void handle_export_notify_ack(MExportDirNotifyAck *m); | |
168 | void export_finish(CDir *dir); | |
169 | ||
170 | void handle_gather_caps(MGatherCaps *m); | |
171 | ||
172 | friend class C_MDC_ExportFreeze; | |
173 | friend class C_MDS_ExportFinishLogged; | |
174 | friend class C_M_ExportGo; | |
175 | friend class C_M_ExportSessionsFlushed; | |
176 | friend class MigratorContext; | |
177 | friend class MigratorLogContext; | |
178 | ||
179 | // importer | |
180 | void handle_export_discover(MExportDirDiscover *m); | |
181 | void handle_export_cancel(MExportDirCancel *m); | |
182 | void handle_export_prep(MExportDirPrep *m); | |
183 | void handle_export_dir(MExportDir *m); | |
184 | ||
185 | void import_reverse_discovering(dirfrag_t df); | |
186 | void import_reverse_discovered(dirfrag_t df, CInode *diri); | |
b32b8144 | 187 | void import_reverse_prepping(CDir *dir, import_state_t& stat); |
7c673cae FG |
188 | void import_remove_pins(CDir *dir, set<CDir*>& bounds); |
189 | void import_reverse_unfreeze(CDir *dir); | |
190 | void import_reverse_final(CDir *dir); | |
191 | void import_notify_abort(CDir *dir, set<CDir*>& bounds); | |
192 | void import_notify_finish(CDir *dir, set<CDir*>& bounds); | |
193 | void import_logged_start(dirfrag_t df, CDir *dir, mds_rank_t from, | |
28e407b8 | 194 | map<client_t,pair<Session*,uint64_t> >& imported_session_map); |
7c673cae FG |
195 | void handle_export_finish(MExportDirFinish *m); |
196 | ||
197 | void handle_export_caps(MExportCaps *m); | |
1adf2230 | 198 | void handle_export_caps_ack(MExportCapsAck *m); |
7c673cae FG |
199 | void logged_import_caps(CInode *in, |
200 | mds_rank_t from, | |
28e407b8 AA |
201 | map<client_t,pair<Session*,uint64_t> >& imported_session_map, |
202 | map<CInode*, map<client_t,Capability::Export> >& cap_imports); | |
7c673cae FG |
203 | |
204 | ||
205 | friend class C_MDS_ImportDirLoggedStart; | |
206 | friend class C_MDS_ImportDirLoggedFinish; | |
207 | friend class C_M_LoggedImportCaps; | |
208 | ||
209 | // bystander | |
210 | void handle_export_notify(MExportDirNotify *m); | |
211 | ||
212 | ||
213 | public: | |
214 | ||
215 | void dispatch(Message*); | |
216 | ||
217 | void show_importing(); | |
218 | void show_exporting(); | |
219 | ||
220 | int get_num_exporting() const { return export_state.size(); } | |
221 | int get_export_queue_size() const { return export_queue.size(); } | |
222 | ||
223 | // -- status -- | |
224 | int is_exporting(CDir *dir) const { | |
225 | map<CDir*, export_state_t>::const_iterator it = export_state.find(dir); | |
226 | if (it != export_state.end()) return it->second.state; | |
227 | return 0; | |
228 | } | |
229 | bool is_exporting() const { return !export_state.empty(); } | |
230 | int is_importing(dirfrag_t df) const { | |
231 | map<dirfrag_t, import_state_t>::const_iterator it = import_state.find(df); | |
232 | if (it != import_state.end()) return it->second.state; | |
233 | return 0; | |
234 | } | |
235 | bool is_importing() const { return !import_state.empty(); } | |
236 | ||
237 | bool is_ambiguous_import(dirfrag_t df) const { | |
238 | map<dirfrag_t, import_state_t>::const_iterator p = import_state.find(df); | |
239 | if (p == import_state.end()) | |
240 | return false; | |
241 | if (p->second.state >= IMPORT_LOGGINGSTART && | |
242 | p->second.state < IMPORT_ABORTING) | |
243 | return true; | |
244 | return false; | |
245 | } | |
246 | ||
247 | int get_import_state(dirfrag_t df) const { | |
248 | map<dirfrag_t, import_state_t>::const_iterator it = import_state.find(df); | |
249 | assert(it != import_state.end()); | |
250 | return it->second.state; | |
251 | } | |
252 | int get_import_peer(dirfrag_t df) const { | |
253 | map<dirfrag_t, import_state_t>::const_iterator it = import_state.find(df); | |
254 | assert(it != import_state.end()); | |
255 | return it->second.peer; | |
256 | } | |
257 | ||
258 | int get_export_state(CDir *dir) const { | |
259 | map<CDir*, export_state_t>::const_iterator it = export_state.find(dir); | |
260 | assert(it != export_state.end()); | |
261 | return it->second.state; | |
262 | } | |
263 | // this returns true if we are export @dir, | |
264 | // and are not waiting for @who to be | |
265 | // be warned of ambiguous auth. | |
266 | // only returns meaningful results during EXPORT_WARNING state. | |
267 | bool export_has_warned(CDir *dir, mds_rank_t who) { | |
268 | map<CDir*, export_state_t>::iterator it = export_state.find(dir); | |
269 | assert(it != export_state.end()); | |
270 | assert(it->second.state == EXPORT_WARNING); | |
271 | return (it->second.warning_ack_waiting.count(who) == 0); | |
272 | } | |
273 | ||
274 | bool export_has_notified(CDir *dir, mds_rank_t who) const { | |
275 | map<CDir*, export_state_t>::const_iterator it = export_state.find(dir); | |
276 | assert(it != export_state.end()); | |
277 | assert(it->second.state == EXPORT_NOTIFYING); | |
278 | return (it->second.notify_ack_waiting.count(who) == 0); | |
279 | } | |
280 | ||
281 | void export_freeze_inc_num_waiters(CDir *dir) { | |
282 | map<CDir*, export_state_t>::iterator it = export_state.find(dir); | |
283 | assert(it != export_state.end()); | |
284 | it->second.num_remote_waiters++; | |
285 | } | |
286 | void find_stale_export_freeze(); | |
287 | ||
288 | // -- misc -- | |
289 | void handle_mds_failure_or_stop(mds_rank_t who); | |
290 | ||
291 | void audit(); | |
292 | ||
293 | // -- import/export -- | |
294 | // exporter | |
295 | void dispatch_export_dir(MDRequestRef& mdr, int count); | |
296 | void export_dir(CDir *dir, mds_rank_t dest); | |
297 | void export_empty_import(CDir *dir); | |
298 | ||
299 | void export_dir_nicely(CDir *dir, mds_rank_t dest); | |
300 | void maybe_do_queued_export(); | |
301 | void clear_export_queue() { | |
302 | export_queue.clear(); | |
303 | } | |
304 | ||
305 | void get_export_lock_set(CDir *dir, set<SimpleLock*>& locks); | |
7c673cae FG |
306 | void get_export_client_set(CInode *in, set<client_t> &client_set); |
307 | ||
308 | void encode_export_inode(CInode *in, bufferlist& bl, | |
309 | map<client_t,entity_inst_t>& exported_client_map); | |
310 | void encode_export_inode_caps(CInode *in, bool auth_cap, bufferlist& bl, | |
311 | map<client_t,entity_inst_t>& exported_client_map); | |
312 | void finish_export_inode(CInode *in, utime_t now, mds_rank_t target, | |
313 | map<client_t,Capability::Import>& peer_imported, | |
314 | list<MDSInternalContextBase*>& finished); | |
315 | void finish_export_inode_caps(CInode *in, mds_rank_t target, | |
316 | map<client_t,Capability::Import>& peer_imported); | |
317 | ||
318 | ||
319 | uint64_t encode_export_dir(bufferlist& exportbl, | |
320 | CDir *dir, | |
321 | map<client_t,entity_inst_t>& exported_client_map, | |
322 | utime_t now); | |
323 | void finish_export_dir(CDir *dir, utime_t now, mds_rank_t target, | |
324 | map<inodeno_t,map<client_t,Capability::Import> >& peer_imported, | |
325 | list<MDSInternalContextBase*>& finished, int *num_dentries); | |
326 | ||
327 | void clear_export_proxy_pins(CDir *dir); | |
328 | ||
329 | void export_caps(CInode *in); | |
330 | ||
331 | void decode_import_inode(CDentry *dn, bufferlist::iterator& blp, | |
332 | mds_rank_t oldauth, LogSegment *ls, | |
333 | map<CInode*, map<client_t,Capability::Export> >& cap_imports, | |
334 | list<ScatterLock*>& updated_scatterlocks); | |
335 | void decode_import_inode_caps(CInode *in, bool auth_cap, bufferlist::iterator &blp, | |
336 | map<CInode*, map<client_t,Capability::Export> >& cap_imports); | |
337 | void finish_import_inode_caps(CInode *in, mds_rank_t from, bool auth_cap, | |
28e407b8 AA |
338 | const map<client_t,pair<Session*,uint64_t> >& smap, |
339 | const map<client_t,Capability::Export> &export_map, | |
7c673cae FG |
340 | map<client_t,Capability::Import> &import_map); |
341 | int decode_import_dir(bufferlist::iterator& blp, | |
342 | mds_rank_t oldauth, | |
343 | CDir *import_root, | |
344 | EImportStart *le, | |
345 | LogSegment *ls, | |
346 | map<CInode*, map<client_t,Capability::Export> >& cap_imports, | |
347 | list<ScatterLock*>& updated_scatterlocks, utime_t now); | |
348 | ||
349 | void import_reverse(CDir *dir); | |
350 | ||
351 | void import_finish(CDir *dir, bool notify, bool last=true); | |
352 | ||
353 | private: | |
354 | MDSRank *mds; | |
355 | MDCache *cache; | |
28e407b8 | 356 | bool inject_session_race = false; |
1adf2230 | 357 | int inject_message_loss = 0; |
7c673cae FG |
358 | }; |
359 | ||
360 | #endif |