]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/Migrator.h
Import ceph 15.2.8
[ceph.git] / ceph / src / mds / Migrator.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 * Handles the import and export of mds authorities and actual cache data.
14 * See src/doc/exports.txt for a description.
15 */
16
17 #ifndef CEPH_MDS_MIGRATOR_H
18 #define CEPH_MDS_MIGRATOR_H
19
20 #include "include/types.h"
21
22 #include "MDSContext.h"
23
24 #include <map>
25 #include <list>
26 #include <set>
27 #include <string_view>
28
29 #include "messages/MExportCaps.h"
30 #include "messages/MExportCapsAck.h"
31 #include "messages/MExportDir.h"
32 #include "messages/MExportDirAck.h"
33 #include "messages/MExportDirCancel.h"
34 #include "messages/MExportDirDiscover.h"
35 #include "messages/MExportDirDiscoverAck.h"
36 #include "messages/MExportDirFinish.h"
37 #include "messages/MExportDirNotify.h"
38 #include "messages/MExportDirNotifyAck.h"
39 #include "messages/MExportDirPrep.h"
40 #include "messages/MExportDirPrepAck.h"
41 #include "messages/MGatherCaps.h"
42
43 class MDSRank;
44 class CDir;
45 class CInode;
46 class CDentry;
47 class Session;
48 class EImportStart;
49
50 class Migrator {
51 public:
52 // export stages. used to clean up intelligently if there's a failure.
53 const static int EXPORT_CANCELLED = 0; // cancelled
54 const static int EXPORT_CANCELLING = 1; // waiting for cancel notifyacks
55 const static int EXPORT_LOCKING = 2; // acquiring locks
56 const static int EXPORT_DISCOVERING = 3; // dest is disovering export dir
57 const static int EXPORT_FREEZING = 4; // we're freezing the dir tree
58 const static int EXPORT_PREPPING = 5; // sending dest spanning tree to export bounds
59 const static int EXPORT_WARNING = 6; // warning bystanders of dir_auth_pending
60 const static int EXPORT_EXPORTING = 7; // sent actual export, waiting for ack
61 const static int EXPORT_LOGGINGFINISH = 8; // logging EExportFinish
62 const static int EXPORT_NOTIFYING = 9; // waiting for notifyacks
63
64 // -- imports --
65 const static int IMPORT_DISCOVERING = 1; // waiting for prep
66 const static int IMPORT_DISCOVERED = 2; // waiting for prep
67 const static int IMPORT_PREPPING = 3; // opening dirs on bounds
68 const static int IMPORT_PREPPED = 4; // opened bounds, waiting for import
69 const static int IMPORT_LOGGINGSTART = 5; // got import, logging EImportStart
70 const static int IMPORT_ACKING = 6; // logged EImportStart, sent ack, waiting for finish
71 const static int IMPORT_FINISHING = 7; // sent cap imports, waiting for finish
72 const static int IMPORT_ABORTING = 8; // notifying bystanders of an abort before unfreezing
73
74 // -- cons --
75 Migrator(MDSRank *m, MDCache *c);
76
77 static std::string_view get_export_statename(int s) {
78 switch (s) {
79 case EXPORT_CANCELLING: return "cancelling";
80 case EXPORT_LOCKING: return "locking";
81 case EXPORT_DISCOVERING: return "discovering";
82 case EXPORT_FREEZING: return "freezing";
83 case EXPORT_PREPPING: return "prepping";
84 case EXPORT_WARNING: return "warning";
85 case EXPORT_EXPORTING: return "exporting";
86 case EXPORT_LOGGINGFINISH: return "loggingfinish";
87 case EXPORT_NOTIFYING: return "notifying";
88 default: ceph_abort(); return std::string_view();
89 }
90 }
91
92 static std::string_view get_import_statename(int s) {
93 switch (s) {
94 case IMPORT_DISCOVERING: return "discovering";
95 case IMPORT_DISCOVERED: return "discovered";
96 case IMPORT_PREPPING: return "prepping";
97 case IMPORT_PREPPED: return "prepped";
98 case IMPORT_LOGGINGSTART: return "loggingstart";
99 case IMPORT_ACKING: return "acking";
100 case IMPORT_FINISHING: return "finishing";
101 case IMPORT_ABORTING: return "aborting";
102 default: ceph_abort(); return std::string_view();
103 }
104 }
105
106 void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map);
107
108 void dispatch(const cref_t<Message> &);
109
110 void show_importing();
111 void show_exporting();
112
113 int get_num_exporting() const { return export_state.size(); }
114 int get_export_queue_size() const { return export_queue.size(); }
115
116 // -- status --
117 int is_exporting(CDir *dir) const {
118 auto it = export_state.find(dir);
119 if (it != export_state.end()) return it->second.state;
120 return 0;
121 }
122 bool is_exporting() const { return !export_state.empty(); }
123 int is_importing(dirfrag_t df) const {
124 auto it = import_state.find(df);
125 if (it != import_state.end()) return it->second.state;
126 return 0;
127 }
128 bool is_importing() const { return !import_state.empty(); }
129
130 bool is_ambiguous_import(dirfrag_t df) const {
131 auto it = import_state.find(df);
132 if (it == import_state.end())
133 return false;
134 if (it->second.state >= IMPORT_LOGGINGSTART &&
135 it->second.state < IMPORT_ABORTING)
136 return true;
137 return false;
138 }
139
140 int get_import_state(dirfrag_t df) const {
141 auto it = import_state.find(df);
142 ceph_assert(it != import_state.end());
143 return it->second.state;
144 }
145 int get_import_peer(dirfrag_t df) const {
146 auto it = import_state.find(df);
147 ceph_assert(it != import_state.end());
148 return it->second.peer;
149 }
150
151 int get_export_state(CDir *dir) const {
152 auto it = export_state.find(dir);
153 ceph_assert(it != export_state.end());
154 return it->second.state;
155 }
156 // this returns true if we are export @dir,
157 // and are not waiting for @who to be
158 // be warned of ambiguous auth.
159 // only returns meaningful results during EXPORT_WARNING state.
160 bool export_has_warned(CDir *dir, mds_rank_t who) {
161 auto it = export_state.find(dir);
162 ceph_assert(it != export_state.end());
163 ceph_assert(it->second.state == EXPORT_WARNING);
164 return (it->second.warning_ack_waiting.count(who) == 0);
165 }
166
167 bool export_has_notified(CDir *dir, mds_rank_t who) const {
168 auto it = export_state.find(dir);
169 ceph_assert(it != export_state.end());
170 ceph_assert(it->second.state == EXPORT_NOTIFYING);
171 return (it->second.notify_ack_waiting.count(who) == 0);
172 }
173
174 void export_freeze_inc_num_waiters(CDir *dir) {
175 auto it = export_state.find(dir);
176 ceph_assert(it != export_state.end());
177 it->second.num_remote_waiters++;
178 }
179 void find_stale_export_freeze();
180
181 // -- misc --
182 void handle_mds_failure_or_stop(mds_rank_t who);
183
184 void audit();
185
186 // -- import/export --
187 // exporter
188 void dispatch_export_dir(MDRequestRef& mdr, int count);
189 void export_dir(CDir *dir, mds_rank_t dest);
190 void export_empty_import(CDir *dir);
191
192 void export_dir_nicely(CDir *dir, mds_rank_t dest);
193 void maybe_do_queued_export();
194 void clear_export_queue() {
195 export_queue.clear();
196 export_queue_gen++;
197 }
198
199 void maybe_split_export(CDir* dir, uint64_t max_size, bool null_okay,
200 vector<pair<CDir*, size_t> >& results);
201
202 bool export_try_grab_locks(CDir *dir, MutationRef& mut);
203 void get_export_client_set(CDir *dir, std::set<client_t> &client_set);
204 void get_export_client_set(CInode *in, std::set<client_t> &client_set);
205
206 void encode_export_inode(CInode *in, bufferlist& bl,
207 std::map<client_t,entity_inst_t>& exported_client_map,
208 std::map<client_t,client_metadata_t>& exported_client_metadata_map);
209 void encode_export_inode_caps(CInode *in, bool auth_cap, bufferlist& bl,
210 std::map<client_t,entity_inst_t>& exported_client_map,
211 std::map<client_t,client_metadata_t>& exported_client_metadata_map);
212 void finish_export_inode(CInode *in, mds_rank_t target,
213 std::map<client_t,Capability::Import>& peer_imported,
214 MDSContext::vec& finished);
215 void finish_export_inode_caps(CInode *in, mds_rank_t target,
216 std::map<client_t,Capability::Import>& peer_imported);
217
218
219 void encode_export_dir(bufferlist& exportbl,
220 CDir *dir,
221 std::map<client_t,entity_inst_t>& exported_client_map,
222 std::map<client_t,client_metadata_t>& exported_client_metadata_map,
223 uint64_t &num_exported);
224 void finish_export_dir(CDir *dir, mds_rank_t target,
225 std::map<inodeno_t,std::map<client_t,Capability::Import> >& peer_imported,
226 MDSContext::vec& finished, int *num_dentries);
227
228 void clear_export_proxy_pins(CDir *dir);
229
230 void export_caps(CInode *in);
231
232 void decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
233 mds_rank_t oldauth, LogSegment *ls,
234 std::map<CInode*, std::map<client_t,Capability::Export> >& cap_imports,
235 std::list<ScatterLock*>& updated_scatterlocks);
236 void decode_import_inode_caps(CInode *in, bool auth_cap, bufferlist::const_iterator &blp,
237 std::map<CInode*, std::map<client_t,Capability::Export> >& cap_imports);
238 void finish_import_inode_caps(CInode *in, mds_rank_t from, bool auth_cap,
239 const std::map<client_t,pair<Session*,uint64_t> >& smap,
240 const std::map<client_t,Capability::Export> &export_map,
241 std::map<client_t,Capability::Import> &import_map);
242 void decode_import_dir(bufferlist::const_iterator& blp,
243 mds_rank_t oldauth,
244 CDir *import_root,
245 EImportStart *le,
246 LogSegment *ls,
247 std::map<CInode*, std::map<client_t,Capability::Export> >& cap_imports,
248 std::list<ScatterLock*>& updated_scatterlocks, int &num_imported);
249
250 void import_reverse(CDir *dir);
251
252 void import_finish(CDir *dir, bool notify, bool last=true);
253
254 protected:
255 struct export_base_t {
256 export_base_t(dirfrag_t df, mds_rank_t d, unsigned c, uint64_t g) :
257 dirfrag(df), dest(d), pending_children(c), export_queue_gen(g) {}
258 dirfrag_t dirfrag;
259 mds_rank_t dest;
260 unsigned pending_children;
261 uint64_t export_queue_gen;
262 bool restart = false;
263 };
264
265 // export fun
266 struct export_state_t {
267 export_state_t() {}
268
269 int state = 0;
270 mds_rank_t peer = MDS_RANK_NONE;
271 uint64_t tid = 0;
272 std::set<mds_rank_t> warning_ack_waiting;
273 std::set<mds_rank_t> notify_ack_waiting;
274 std::map<inodeno_t,std::map<client_t,Capability::Import> > peer_imported;
275 MutationRef mut;
276 size_t approx_size = 0;
277 // for freeze tree deadlock detection
278 utime_t last_cum_auth_pins_change;
279 int last_cum_auth_pins = 0;
280 int num_remote_waiters = 0; // number of remote authpin waiters
281 std::shared_ptr<export_base_t> parent;
282 };
283
284 // import fun
285 struct import_state_t {
286 import_state_t() : mut() {}
287 int state = 0;
288 mds_rank_t peer = 0;
289 uint64_t tid = 0;
290 std::set<mds_rank_t> bystanders;
291 std::list<dirfrag_t> bound_ls;
292 std::list<ScatterLock*> updated_scatterlocks;
293 std::map<client_t,pair<Session*,uint64_t> > session_map;
294 std::map<CInode*, std::map<client_t,Capability::Export> > peer_exports;
295 MutationRef mut;
296 };
297
298 typedef map<CDir*, export_state_t>::iterator export_state_iterator;
299
300 friend class C_MDC_ExportFreeze;
301 friend class C_MDS_ExportFinishLogged;
302 friend class C_M_ExportGo;
303 friend class C_M_ExportSessionsFlushed;
304 friend class C_MDS_ExportDiscover;
305 friend class C_MDS_ExportPrep;
306 friend class MigratorContext;
307 friend class MigratorLogContext;
308 friend class C_MDS_ImportDirLoggedStart;
309 friend class C_MDS_ImportDirLoggedFinish;
310 friend class C_M_LoggedImportCaps;
311
312 void handle_export_discover_ack(const cref_t<MExportDirDiscoverAck> &m);
313 void export_frozen(CDir *dir, uint64_t tid);
314 void handle_export_prep_ack(const cref_t<MExportDirPrepAck> &m);
315 void export_sessions_flushed(CDir *dir, uint64_t tid);
316 void export_go(CDir *dir);
317 void export_go_synced(CDir *dir, uint64_t tid);
318 void export_try_cancel(CDir *dir, bool notify_peer=true);
319 void export_cancel_finish(export_state_iterator& it);
320 void export_reverse(CDir *dir, export_state_t& stat);
321 void export_notify_abort(CDir *dir, export_state_t& stat, std::set<CDir*>& bounds);
322 void handle_export_ack(const cref_t<MExportDirAck> &m);
323 void export_logged_finish(CDir *dir);
324 void handle_export_notify_ack(const cref_t<MExportDirNotifyAck> &m);
325 void export_finish(CDir *dir);
326 void child_export_finish(std::shared_ptr<export_base_t>& parent, bool success);
327 void encode_export_prep_trace(bufferlist& bl, CDir *bound, CDir *dir, export_state_t &es,
328 set<inodeno_t> &inodes_added, set<dirfrag_t> &dirfrags_added);
329 void decode_export_prep_trace(bufferlist::const_iterator& blp, mds_rank_t oldauth, MDSContext::vec &finished);
330
331 void handle_gather_caps(const cref_t<MGatherCaps> &m);
332
333 // importer
334 void handle_export_discover(const cref_t<MExportDirDiscover> &m, bool started=false);
335 void handle_export_cancel(const cref_t<MExportDirCancel> &m);
336 void handle_export_prep(const cref_t<MExportDirPrep> &m, bool did_assim=false);
337 void handle_export_dir(const cref_t<MExportDir> &m);
338
339 void import_reverse_discovering(dirfrag_t df);
340 void import_reverse_discovered(dirfrag_t df, CInode *diri);
341 void import_reverse_prepping(CDir *dir, import_state_t& stat);
342 void import_remove_pins(CDir *dir, std::set<CDir*>& bounds);
343 void import_reverse_unfreeze(CDir *dir);
344 void import_reverse_final(CDir *dir);
345 void import_notify_abort(CDir *dir, std::set<CDir*>& bounds);
346 void import_notify_finish(CDir *dir, std::set<CDir*>& bounds);
347 void import_logged_start(dirfrag_t df, CDir *dir, mds_rank_t from,
348 std::map<client_t,pair<Session*,uint64_t> >& imported_session_map);
349 void handle_export_finish(const cref_t<MExportDirFinish> &m);
350
351 void handle_export_caps(const cref_t<MExportCaps> &m);
352 void handle_export_caps_ack(const cref_t<MExportCapsAck> &m);
353 void logged_import_caps(CInode *in,
354 mds_rank_t from,
355 std::map<client_t,pair<Session*,uint64_t> >& imported_session_map,
356 std::map<CInode*, std::map<client_t,Capability::Export> >& cap_imports);
357
358 // bystander
359 void handle_export_notify(const cref_t<MExportDirNotify> &m);
360
361 std::map<CDir*, export_state_t> export_state;
362
363 uint64_t total_exporting_size = 0;
364 unsigned num_locking_exports = 0; // exports in locking state (approx_size == 0)
365
366 std::list<pair<dirfrag_t,mds_rank_t> > export_queue;
367 uint64_t export_queue_gen = 1;
368
369 std::map<dirfrag_t, import_state_t> import_state;
370
371 private:
372 MDSRank *mds;
373 MDCache *cache;
374 uint64_t max_export_size = 0;
375 bool inject_session_race = false;
376 };
377
378 #endif