1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
13 * Handles the import and export of mds authorities and actual cache data.
14 * See src/doc/exports.txt for a description.
17 #ifndef CEPH_MDS_MIGRATOR_H
18 #define CEPH_MDS_MIGRATOR_H
20 #include "include/types.h"
22 #include "MDSContext.h"
27 #include <string_view>
35 #include "messages/MExportCaps.h"
36 #include "messages/MExportCapsAck.h"
37 #include "messages/MExportDir.h"
38 #include "messages/MExportDirAck.h"
39 #include "messages/MExportDirCancel.h"
40 #include "messages/MExportDirDiscover.h"
41 #include "messages/MExportDirDiscoverAck.h"
42 #include "messages/MExportDirFinish.h"
43 #include "messages/MExportDirNotify.h"
44 #include "messages/MExportDirNotifyAck.h"
45 #include "messages/MExportDirPrep.h"
46 #include "messages/MExportDirPrepAck.h"
47 #include "messages/MGatherCaps.h"
53 // export stages. used to clean up intelligently if there's a failure.
54 const static int EXPORT_CANCELLED
= 0; // cancelled
55 const static int EXPORT_CANCELLING
= 1; // waiting for cancel notifyacks
56 const static int EXPORT_LOCKING
= 2; // acquiring locks
57 const static int EXPORT_DISCOVERING
= 3; // dest is disovering export dir
58 const static int EXPORT_FREEZING
= 4; // we're freezing the dir tree
59 const static int EXPORT_PREPPING
= 5; // sending dest spanning tree to export bounds
60 const static int EXPORT_WARNING
= 6; // warning bystanders of dir_auth_pending
61 const static int EXPORT_EXPORTING
= 7; // sent actual export, waiting for ack
62 const static int EXPORT_LOGGINGFINISH
= 8; // logging EExportFinish
63 const static int EXPORT_NOTIFYING
= 9; // waiting for notifyacks
64 static std::string_view
get_export_statename(int s
) {
66 case EXPORT_CANCELLING
: return "cancelling";
67 case EXPORT_LOCKING
: return "locking";
68 case EXPORT_DISCOVERING
: return "discovering";
69 case EXPORT_FREEZING
: return "freezing";
70 case EXPORT_PREPPING
: return "prepping";
71 case EXPORT_WARNING
: return "warning";
72 case EXPORT_EXPORTING
: return "exporting";
73 case EXPORT_LOGGINGFINISH
: return "loggingfinish";
74 case EXPORT_NOTIFYING
: return "notifying";
75 default: ceph_abort(); return std::string_view();
80 const static int IMPORT_DISCOVERING
= 1; // waiting for prep
81 const static int IMPORT_DISCOVERED
= 2; // waiting for prep
82 const static int IMPORT_PREPPING
= 3; // opening dirs on bounds
83 const static int IMPORT_PREPPED
= 4; // opened bounds, waiting for import
84 const static int IMPORT_LOGGINGSTART
= 5; // got import, logging EImportStart
85 const static int IMPORT_ACKING
= 6; // logged EImportStart, sent ack, waiting for finish
86 const static int IMPORT_FINISHING
= 7; // sent cap imports, waiting for finish
87 const static int IMPORT_ABORTING
= 8; // notifying bystanders of an abort before unfreezing
88 static std::string_view
get_import_statename(int s
) {
90 case IMPORT_DISCOVERING
: return "discovering";
91 case IMPORT_DISCOVERED
: return "discovered";
92 case IMPORT_PREPPING
: return "prepping";
93 case IMPORT_PREPPED
: return "prepped";
94 case IMPORT_LOGGINGSTART
: return "loggingstart";
95 case IMPORT_ACKING
: return "acking";
96 case IMPORT_FINISHING
: return "finishing";
97 case IMPORT_ABORTING
: return "aborting";
98 default: ceph_abort(); return std::string_view();
103 Migrator(MDSRank
*m
, MDCache
*c
);
105 void handle_conf_change(const ConfigProxy
& conf
,
106 const std::set
<std::string
> &changed
,
107 const MDSMap
&mds_map
);
110 struct export_base_t
{
113 unsigned pending_children
;
114 uint64_t export_queue_gen
;
115 bool restart
= false;
116 export_base_t(dirfrag_t df
, mds_rank_t d
, unsigned c
, uint64_t g
) :
117 dirfrag(df
), dest(d
), pending_children(c
), export_queue_gen(g
) {}
121 struct export_state_t
{
123 mds_rank_t peer
= MDS_RANK_NONE
;
125 std::set
<mds_rank_t
> warning_ack_waiting
;
126 std::set
<mds_rank_t
> notify_ack_waiting
;
127 std::map
<inodeno_t
,std::map
<client_t
,Capability::Import
> > peer_imported
;
129 size_t approx_size
= 0;
130 // for freeze tree deadlock detection
131 utime_t last_cum_auth_pins_change
;
132 int last_cum_auth_pins
= 0;
133 int num_remote_waiters
= 0; // number of remote authpin waiters
136 std::shared_ptr
<export_base_t
> parent
;
138 std::map
<CDir
*, export_state_t
> export_state
;
139 typedef map
<CDir
*, export_state_t
>::iterator export_state_iterator
;
141 uint64_t total_exporting_size
= 0;
142 unsigned num_locking_exports
= 0; // exports in locking state (approx_size == 0)
144 std::list
<pair
<dirfrag_t
,mds_rank_t
> > export_queue
;
145 uint64_t export_queue_gen
= 1;
148 struct import_state_t
{
152 std::set
<mds_rank_t
> bystanders
;
153 std::list
<dirfrag_t
> bound_ls
;
154 std::list
<ScatterLock
*> updated_scatterlocks
;
155 std::map
<client_t
,pair
<Session
*,uint64_t> > session_map
;
156 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> > peer_exports
;
158 import_state_t() : state(0), peer(0), tid(0), mut() {}
161 std::map
<dirfrag_t
, import_state_t
> import_state
;
163 void handle_export_discover_ack(const MExportDirDiscoverAck::const_ref
&m
);
164 void export_frozen(CDir
*dir
, uint64_t tid
);
165 void handle_export_prep_ack(const MExportDirPrepAck::const_ref
&m
);
166 void export_sessions_flushed(CDir
*dir
, uint64_t tid
);
167 void export_go(CDir
*dir
);
168 void export_go_synced(CDir
*dir
, uint64_t tid
);
169 void export_try_cancel(CDir
*dir
, bool notify_peer
=true);
170 void export_cancel_finish(export_state_iterator
& it
);
171 void export_reverse(CDir
*dir
, export_state_t
& stat
);
172 void export_notify_abort(CDir
*dir
, export_state_t
& stat
, std::set
<CDir
*>& bounds
);
173 void handle_export_ack(const MExportDirAck::const_ref
&m
);
174 void export_logged_finish(CDir
*dir
);
175 void handle_export_notify_ack(const MExportDirNotifyAck::const_ref
&m
);
176 void export_finish(CDir
*dir
);
178 void handle_gather_caps(const MGatherCaps::const_ref
&m
);
180 friend class C_MDC_ExportFreeze
;
181 friend class C_MDS_ExportFinishLogged
;
182 friend class C_M_ExportGo
;
183 friend class C_M_ExportSessionsFlushed
;
184 friend class C_MDS_ExportDiscover
;
185 friend class C_MDS_ExportPrep
;
186 friend class MigratorContext
;
187 friend class MigratorLogContext
;
190 void handle_export_discover(const MExportDirDiscover::const_ref
&m
, bool started
=false);
191 void handle_export_cancel(const MExportDirCancel::const_ref
&m
);
192 void handle_export_prep(const MExportDirPrep::const_ref
&m
, bool did_assim
=false);
193 void handle_export_dir(const MExportDir::const_ref
&m
);
195 void import_reverse_discovering(dirfrag_t df
);
196 void import_reverse_discovered(dirfrag_t df
, CInode
*diri
);
197 void import_reverse_prepping(CDir
*dir
, import_state_t
& stat
);
198 void import_remove_pins(CDir
*dir
, std::set
<CDir
*>& bounds
);
199 void import_reverse_unfreeze(CDir
*dir
);
200 void import_reverse_final(CDir
*dir
);
201 void import_notify_abort(CDir
*dir
, std::set
<CDir
*>& bounds
);
202 void import_notify_finish(CDir
*dir
, std::set
<CDir
*>& bounds
);
203 void import_logged_start(dirfrag_t df
, CDir
*dir
, mds_rank_t from
,
204 std::map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
);
205 void handle_export_finish(const MExportDirFinish::const_ref
&m
);
207 void handle_export_caps(const MExportCaps::const_ref
&m
);
208 void handle_export_caps_ack(const MExportCapsAck::const_ref
&m
);
209 void logged_import_caps(CInode
*in
,
211 std::map
<client_t
,pair
<Session
*,uint64_t> >& imported_session_map
,
212 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
);
215 friend class C_MDS_ImportDirLoggedStart
;
216 friend class C_MDS_ImportDirLoggedFinish
;
217 friend class C_M_LoggedImportCaps
;
220 void handle_export_notify(const MExportDirNotify::const_ref
&m
);
225 void dispatch(const Message::const_ref
&);
227 void show_importing();
228 void show_exporting();
230 int get_num_exporting() const { return export_state
.size(); }
231 int get_export_queue_size() const { return export_queue
.size(); }
234 int is_exporting(CDir
*dir
) const {
235 auto it
= export_state
.find(dir
);
236 if (it
!= export_state
.end()) return it
->second
.state
;
239 bool is_exporting() const { return !export_state
.empty(); }
240 int is_importing(dirfrag_t df
) const {
241 auto it
= import_state
.find(df
);
242 if (it
!= import_state
.end()) return it
->second
.state
;
245 bool is_importing() const { return !import_state
.empty(); }
247 bool is_ambiguous_import(dirfrag_t df
) const {
248 auto it
= import_state
.find(df
);
249 if (it
== import_state
.end())
251 if (it
->second
.state
>= IMPORT_LOGGINGSTART
&&
252 it
->second
.state
< IMPORT_ABORTING
)
257 int get_import_state(dirfrag_t df
) const {
258 auto it
= import_state
.find(df
);
259 ceph_assert(it
!= import_state
.end());
260 return it
->second
.state
;
262 int get_import_peer(dirfrag_t df
) const {
263 auto it
= import_state
.find(df
);
264 ceph_assert(it
!= import_state
.end());
265 return it
->second
.peer
;
268 int get_export_state(CDir
*dir
) const {
269 auto it
= export_state
.find(dir
);
270 ceph_assert(it
!= export_state
.end());
271 return it
->second
.state
;
273 // this returns true if we are export @dir,
274 // and are not waiting for @who to be
275 // be warned of ambiguous auth.
276 // only returns meaningful results during EXPORT_WARNING state.
277 bool export_has_warned(CDir
*dir
, mds_rank_t who
) {
278 auto it
= export_state
.find(dir
);
279 ceph_assert(it
!= export_state
.end());
280 ceph_assert(it
->second
.state
== EXPORT_WARNING
);
281 return (it
->second
.warning_ack_waiting
.count(who
) == 0);
284 bool export_has_notified(CDir
*dir
, mds_rank_t who
) const {
285 auto it
= export_state
.find(dir
);
286 ceph_assert(it
!= export_state
.end());
287 ceph_assert(it
->second
.state
== EXPORT_NOTIFYING
);
288 return (it
->second
.notify_ack_waiting
.count(who
) == 0);
291 void export_freeze_inc_num_waiters(CDir
*dir
) {
292 auto it
= export_state
.find(dir
);
293 ceph_assert(it
!= export_state
.end());
294 it
->second
.num_remote_waiters
++;
296 void find_stale_export_freeze();
299 void handle_mds_failure_or_stop(mds_rank_t who
);
303 // -- import/export --
305 void dispatch_export_dir(MDRequestRef
& mdr
, int count
);
306 void export_dir(CDir
*dir
, mds_rank_t dest
);
307 void export_empty_import(CDir
*dir
);
309 void export_dir_nicely(CDir
*dir
, mds_rank_t dest
);
310 void maybe_do_queued_export();
311 void clear_export_queue() {
312 export_queue
.clear();
316 void maybe_split_export(CDir
* dir
, uint64_t max_size
, bool null_okay
,
317 vector
<pair
<CDir
*, size_t> >& results
);
318 void child_export_finish(std::shared_ptr
<export_base_t
>& parent
, bool success
);
320 void get_export_lock_set(CDir
*dir
, MutationImpl::LockOpVec
& lov
);
321 void get_export_client_set(CDir
*dir
, std::set
<client_t
> &client_set
);
322 void get_export_client_set(CInode
*in
, std::set
<client_t
> &client_set
);
324 void encode_export_inode(CInode
*in
, bufferlist
& bl
,
325 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
326 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
);
327 void encode_export_inode_caps(CInode
*in
, bool auth_cap
, bufferlist
& bl
,
328 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
329 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
);
330 void finish_export_inode(CInode
*in
, mds_rank_t target
,
331 std::map
<client_t
,Capability::Import
>& peer_imported
,
332 MDSContext::vec
& finished
);
333 void finish_export_inode_caps(CInode
*in
, mds_rank_t target
,
334 std::map
<client_t
,Capability::Import
>& peer_imported
);
337 uint64_t encode_export_dir(bufferlist
& exportbl
,
339 std::map
<client_t
,entity_inst_t
>& exported_client_map
,
340 std::map
<client_t
,client_metadata_t
>& exported_client_metadata_map
);
341 void finish_export_dir(CDir
*dir
, mds_rank_t target
,
342 std::map
<inodeno_t
,std::map
<client_t
,Capability::Import
> >& peer_imported
,
343 MDSContext::vec
& finished
, int *num_dentries
);
345 void clear_export_proxy_pins(CDir
*dir
);
347 void export_caps(CInode
*in
);
349 void decode_import_inode(CDentry
*dn
, bufferlist::const_iterator
& blp
,
350 mds_rank_t oldauth
, LogSegment
*ls
,
351 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
,
352 std::list
<ScatterLock
*>& updated_scatterlocks
);
353 void decode_import_inode_caps(CInode
*in
, bool auth_cap
, bufferlist::const_iterator
&blp
,
354 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
);
355 void finish_import_inode_caps(CInode
*in
, mds_rank_t from
, bool auth_cap
,
356 const std::map
<client_t
,pair
<Session
*,uint64_t> >& smap
,
357 const std::map
<client_t
,Capability::Export
> &export_map
,
358 std::map
<client_t
,Capability::Import
> &import_map
);
359 int decode_import_dir(bufferlist::const_iterator
& blp
,
364 std::map
<CInode
*, std::map
<client_t
,Capability::Export
> >& cap_imports
,
365 std::list
<ScatterLock
*>& updated_scatterlocks
);
367 void import_reverse(CDir
*dir
);
369 void import_finish(CDir
*dir
, bool notify
, bool last
=true);
374 uint64_t max_export_size
= 0;
375 bool inject_session_race
= false;