1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 * Handles the import and export of mds authorities and actual cache data.
14 * See src/doc/exports.txt for a description.
17 #ifndef CEPH_MDS_MIGRATOR_H
18 #define CEPH_MDS_MIGRATOR_H
20 #include "include/types.h"
22 #include "MDSContext.h"
27 #include <string_view>
29 #include "messages/MExportCaps.h"
30 #include "messages/MExportCapsAck.h"
31 #include "messages/MExportDir.h"
32 #include "messages/MExportDirAck.h"
33 #include "messages/MExportDirCancel.h"
34 #include "messages/MExportDirDiscover.h"
35 #include "messages/MExportDirDiscoverAck.h"
36 #include "messages/MExportDirFinish.h"
37 #include "messages/MExportDirNotify.h"
38 #include "messages/MExportDirNotifyAck.h"
39 #include "messages/MExportDirPrep.h"
40 #include "messages/MExportDirPrepAck.h"
41 #include "messages/MGatherCaps.h"
52 // export stages. used to clean up intelligently if there's a failure.
53 const static int EXPORT_CANCELLED
= 0; // cancelled
54 const static int EXPORT_CANCELLING
= 1; // waiting for cancel notifyacks
55 const static int EXPORT_LOCKING
= 2; // acquiring locks
56 const static int EXPORT_DISCOVERING
= 3; // dest is disovering export dir
57 const static int EXPORT_FREEZING
= 4; // we're freezing the dir tree
58 const static int EXPORT_PREPPING
= 5; // sending dest spanning tree to export bounds
59 const static int EXPORT_WARNING
= 6; // warning bystanders of dir_auth_pending
60 const static int EXPORT_EXPORTING
= 7; // sent actual export, waiting for ack
61 const static int EXPORT_LOGGINGFINISH
= 8; // logging EExportFinish
62 const static int EXPORT_NOTIFYING
= 9; // waiting for notifyacks
65 const static int IMPORT_DISCOVERING
= 1; // waiting for prep
66 const static int IMPORT_DISCOVERED
= 2; // waiting for prep
67 const static int IMPORT_PREPPING
= 3; // opening dirs on bounds
68 const static int IMPORT_PREPPED
= 4; // opened bounds, waiting for import
69 const static int IMPORT_LOGGINGSTART
= 5; // got import, logging EImportStart
70 const static int IMPORT_ACKING
= 6; // logged EImportStart, sent ack, waiting for finish
71 const static int IMPORT_FINISHING
= 7; // sent cap imports, waiting for finish
72 const static int IMPORT_ABORTING
= 8; // notifying bystanders of an abort before unfreezing
75 Migrator(MDSRank
*m
, MDCache
*c
);
77 static std::string_view
get_export_statename(int s
) {
79 case EXPORT_CANCELLING
: return "cancelling";
80 case EXPORT_LOCKING
: return "locking";
81 case EXPORT_DISCOVERING
: return "discovering";
82 case EXPORT_FREEZING
: return "freezing";
83 case EXPORT_PREPPING
: return "prepping";
84 case EXPORT_WARNING
: return "warning";
85 case EXPORT_EXPORTING
: return "exporting";
86 case EXPORT_LOGGINGFINISH
: return "loggingfinish";
87 case EXPORT_NOTIFYING
: return "notifying";
88 default: ceph_abort(); return std::string_view();
92 static std::string_view
get_import_statename(int s
) {
94 case IMPORT_DISCOVERING
: return "discovering";
95 case IMPORT_DISCOVERED
: return "discovered";
96 case IMPORT_PREPPING
: return "prepping";
97 case IMPORT_PREPPED
: return "prepped";
98 case IMPORT_LOGGINGSTART
: return "loggingstart";
99 case IMPORT_ACKING
: return "acking";
100 case IMPORT_FINISHING
: return "finishing";
101 case IMPORT_ABORTING
: return "aborting";
102 default: ceph_abort(); return std::string_view();
106 void handle_conf_change(const std::set
<std::string
>& changed
, const MDSMap
& mds_map
);
108 void dispatch(const cref_t
<Message
> &);
110 void show_importing();
111 void show_exporting();
113 int get_num_exporting() const { return export_state
.size(); }
114 int get_export_queue_size() const { return export_queue
.size(); }
117 int is_exporting(CDir
*dir
) const {
118 auto it
= export_state
.find(dir
);
119 if (it
!= export_state
.end()) return it
->second
.state
;
122 bool is_exporting() const { return !export_state
.empty(); }
123 int is_importing(dirfrag_t df
) const {
124 auto it
= import_state
.find(df
);
125 if (it
!= import_state
.end()) return it
->second
.state
;
128 bool is_importing() const { return !import_state
.empty(); }
130 bool is_ambiguous_import(dirfrag_t df
) const {
131 auto it
= import_state
.find(df
);
132 if (it
== import_state
.end())
134 if (it
->second
.state
>= IMPORT_LOGGINGSTART
&&
135 it
->second
.state
< IMPORT_ABORTING
)
140 int get_import_state(dirfrag_t df
) const {
141 auto it
= import_state
.find(df
);
142 ceph_assert(it
!= import_state
.end());
143 return it
->second
.state
;
145 int get_import_peer(dirfrag_t df
) const {
146 auto it
= import_state
.find(df
);
147 ceph_assert(it
!= import_state
.end());
148 return it
->second
.peer
;
151 int get_export_state(CDir
*dir
) const {
152 auto it
= export_state
.find(dir
);
153 ceph_assert(it
!= export_state
.end());
154 return it
->second
.state
;
156 // this returns true if we are export @dir,
157 // and are not waiting for @who to be
158 // be warned of ambiguous auth.
159 // only returns meaningful results during EXPORT_WARNING state.
160 bool export_has_warned(CDir
*dir
, mds_rank_t who
) {
161 auto it
= export_state
.find(dir
);
162 ceph_assert(it
!= export_state
.end());
163 ceph_assert(it
->second
.state
== EXPORT_WARNING
);
164 return (it
->second
.warning_ack_waiting
.count(who
) == 0);
167 bool export_has_notified(CDir
*dir
, mds_rank_t who
) const {
168 auto it
= export_state
.find(dir
);
169 ceph_assert(it
!= export_state
.end());
170 ceph_assert(it
->second
.state
== EXPORT_NOTIFYING
);
171 return (it
->second
.notify_ack_waiting
.count(who
) == 0);
174 void export_freeze_inc_num_waiters(CDir
*dir
) {
175 auto it
= export_state
.find(dir
);
176 ceph_assert(it
!= export_state
.end());
177 it
->second
.num_remote_waiters
++;
179 void find_stale_export_freeze();
182 void handle_mds_failure_or_stop(mds_rank_t who
);
186 // -- import/export --
188 void dispatch_export_dir(MDRequestRef
& mdr
, int count
);
189 void export_dir(CDir
*dir
, mds_rank_t dest
);
190 void export_empty_import(CDir
*dir
);
192 void export_dir_nicely(CDir
*dir
, mds_rank_t dest
);
193 void maybe_do_queued_export();
194 void clear_export_queue() {
195 export_queue
.clear();
199 void maybe_split_export(CDir
* dir
, uint64_t max_size
, bool null_okay
,
200 vector
<pair
<CDir
*, size_t> >& results
);
202 bool export_try_grab_locks(CDir
*dir
, MutationRef
& mut
);
203 void get_export_client_set(CDir
*dir
, std::set
<client_t
> &client_set
);
204 void get_export_client_set(CInode
*in
, std::set
<client_t
> &client_set
);
206 void encode_export_inode(CInode
*in
, bufferlist
& bl
,
207 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
208 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
);
209 void encode_export_inode_caps(CInode
*in
, bool auth_cap
, bufferlist
& bl
,
210 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
211 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
);
212 void finish_export_inode(CInode
*in
, mds_rank_t target
,
213 std::map
<client_t
,Capability::Import
>& peer_imported
,
214 MDSContext::vec
& finished
);
215 void finish_export_inode_caps(CInode
*in
, mds_rank_t target
,
216 std::map
<client_t
,Capability::Import
>& peer_imported
);
219 void encode_export_dir(bufferlist
& exportbl
,
221 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
222 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
,
223 uint64_t &num_exported
);
224 void finish_export_dir(CDir
*dir
, mds_rank_t target
,
225 std::map
<inodeno_t
,std::map
<client_t
,Capability::Import
> >& peer_imported
,
226 MDSContext::vec
& finished
, int *num_dentries
);
228 void clear_export_proxy_pins(CDir
*dir
);
230 void export_caps(CInode
*in
);
232 void decode_import_inode(CDentry
*dn
, bufferlist::const_iterator
& blp
,
233 mds_rank_t oldauth
, LogSegment
*ls
,
234 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
,
235 std::list
<ScatterLock
*>& updated_scatterlocks
);
236 void decode_import_inode_caps(CInode
*in
, bool auth_cap
, bufferlist::const_iterator
&blp
,
237 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
);
238 void finish_import_inode_caps(CInode
*in
, mds_rank_t from
, bool auth_cap
,
239 const std::map
<client_t
,pair
<Session
*,uint64_t> >& smap
,
240 const std::map
<client_t
,Capability::Export
> &export_map
,
241 std::map
<client_t
,Capability::Import
> &import_map
);
242 void decode_import_dir(bufferlist::const_iterator
& blp
,
247 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
,
248 std::list
<ScatterLock
*>& updated_scatterlocks
, int &num_imported
);
250 void import_reverse(CDir
*dir
);
252 void import_finish(CDir
*dir
, bool notify
, bool last
=true);
255 struct export_base_t
{
256 export_base_t(dirfrag_t df
, mds_rank_t d
, unsigned c
, uint64_t g
) :
257 dirfrag(df
), dest(d
), pending_children(c
), export_queue_gen(g
) {}
260 unsigned pending_children
;
261 uint64_t export_queue_gen
;
262 bool restart
= false;
266 struct export_state_t
{
270 mds_rank_t peer
= MDS_RANK_NONE
;
272 std::set
<mds_rank_t
> warning_ack_waiting
;
273 std::set
<mds_rank_t
> notify_ack_waiting
;
274 std::map
<inodeno_t
,std::map
<client_t
,Capability::Import
> > peer_imported
;
276 size_t approx_size
= 0;
277 // for freeze tree deadlock detection
278 utime_t last_cum_auth_pins_change
;
279 int last_cum_auth_pins
= 0;
280 int num_remote_waiters
= 0; // number of remote authpin waiters
281 std::shared_ptr
<export_base_t
> parent
;
285 struct import_state_t
{
286 import_state_t() : mut() {}
290 std::set
<mds_rank_t
> bystanders
;
291 std::list
<dirfrag_t
> bound_ls
;
292 std::list
<ScatterLock
*> updated_scatterlocks
;
293 std::map
<client_t
,pair
<Session
*,uint64_t> > session_map
;
294 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> > peer_exports
;
298 typedef map
<CDir
*, export_state_t
>::iterator export_state_iterator
;
300 friend class C_MDC_ExportFreeze
;
301 friend class C_MDS_ExportFinishLogged
;
302 friend class C_M_ExportGo
;
303 friend class C_M_ExportSessionsFlushed
;
304 friend class C_MDS_ExportDiscover
;
305 friend class C_MDS_ExportPrep
;
306 friend class MigratorContext
;
307 friend class MigratorLogContext
;
308 friend class C_MDS_ImportDirLoggedStart
;
309 friend class C_MDS_ImportDirLoggedFinish
;
310 friend class C_M_LoggedImportCaps
;
312 void handle_export_discover_ack(const cref_t
<MExportDirDiscoverAck
> &m
);
313 void export_frozen(CDir
*dir
, uint64_t tid
);
314 void handle_export_prep_ack(const cref_t
<MExportDirPrepAck
> &m
);
315 void export_sessions_flushed(CDir
*dir
, uint64_t tid
);
316 void export_go(CDir
*dir
);
317 void export_go_synced(CDir
*dir
, uint64_t tid
);
318 void export_try_cancel(CDir
*dir
, bool notify_peer
=true);
319 void export_cancel_finish(export_state_iterator
& it
);
320 void export_reverse(CDir
*dir
, export_state_t
& stat
);
321 void export_notify_abort(CDir
*dir
, export_state_t
& stat
, std::set
<CDir
*>& bounds
);
322 void handle_export_ack(const cref_t
<MExportDirAck
> &m
);
323 void export_logged_finish(CDir
*dir
);
324 void handle_export_notify_ack(const cref_t
<MExportDirNotifyAck
> &m
);
325 void export_finish(CDir
*dir
);
326 void child_export_finish(std::shared_ptr
<export_base_t
>& parent
, bool success
);
327 void encode_export_prep_trace(bufferlist
& bl
, CDir
*bound
, CDir
*dir
, export_state_t
&es
,
328 set
<inodeno_t
> &inodes_added
, set
<dirfrag_t
> &dirfrags_added
);
329 void decode_export_prep_trace(bufferlist::const_iterator
& blp
, mds_rank_t oldauth
, MDSContext::vec
&finished
);
331 void handle_gather_caps(const cref_t
<MGatherCaps
> &m
);
334 void handle_export_discover(const cref_t
<MExportDirDiscover
> &m
, bool started
=false);
335 void handle_export_cancel(const cref_t
<MExportDirCancel
> &m
);
336 void handle_export_prep(const cref_t
<MExportDirPrep
> &m
, bool did_assim
=false);
337 void handle_export_dir(const cref_t
<MExportDir
> &m
);
339 void import_reverse_discovering(dirfrag_t df
);
340 void import_reverse_discovered(dirfrag_t df
, CInode
*diri
);
341 void import_reverse_prepping(CDir
*dir
, import_state_t
& stat
);
342 void import_remove_pins(CDir
*dir
, std::set
<CDir
*>& bounds
);
343 void import_reverse_unfreeze(CDir
*dir
);
344 void import_reverse_final(CDir
*dir
);
345 void import_notify_abort(CDir
*dir
, std::set
<CDir
*>& bounds
);
346 void import_notify_finish(CDir
*dir
, std::set
<CDir
*>& bounds
);
347 void import_logged_start(dirfrag_t df
, CDir
*dir
, mds_rank_t from
,
348 std::map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
);
349 void handle_export_finish(const cref_t
<MExportDirFinish
> &m
);
351 void handle_export_caps(const cref_t
<MExportCaps
> &m
);
352 void handle_export_caps_ack(const cref_t
<MExportCapsAck
> &m
);
353 void logged_import_caps(CInode
*in
,
355 std::map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
,
356 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
);
359 void handle_export_notify(const cref_t
<MExportDirNotify
> &m
);
361 std::map
<CDir
*, export_state_t
> export_state
;
363 uint64_t total_exporting_size
= 0;
364 unsigned num_locking_exports
= 0; // exports in locking state (approx_size == 0)
366 std::list
<pair
<dirfrag_t
,mds_rank_t
> > export_queue
;
367 uint64_t export_queue_gen
= 1;
369 std::map
<dirfrag_t
, import_state_t
> import_state
;
374 uint64_t max_export_size
= 0;
375 bool inject_session_race
= false;