1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_MDS_SERVER_H
16 #define CEPH_MDS_SERVER_H
18 #include <boost/utility/string_view.hpp>
28 class MMDSSlaveRequest
;
36 l_mdss_dispatch_client_request
,
37 l_mdss_dispatch_slave_request
,
38 l_mdss_handle_client_request
,
39 l_mdss_handle_client_session
,
40 l_mdss_handle_slave_request
,
43 l_mdss_req_getfilelock
,
46 l_mdss_req_lookuphash
,
48 l_mdss_req_lookupname
,
49 l_mdss_req_lookupparent
,
50 l_mdss_req_lookupsnap
,
58 l_mdss_req_renamesnap
,
63 l_mdss_req_setdirlayout
,
64 l_mdss_req_setfilelock
,
79 // OSDMap full status, used to generate ENOSPC on some operations
82 // State for while in reconnect
83 MDSInternalContext
*reconnect_done
;
84 int failed_reconnects
;
85 bool reconnect_evicting
; // true if I am waiting for evictions to complete
86 // before proceeding to reconnect_gather_finish
88 friend class MDSContinuation
;
89 friend class ServerContext
;
90 friend class ServerLogContext
;
93 bool terminating_sessions
;
95 explicit Server(MDSRank
*m
);
97 g_ceph_context
->get_perfcounters_collection()->remove(logger
);
99 delete reconnect_done
;
102 void create_logger();
105 void dispatch(Message
*m
);
107 void handle_osd_map();
109 // -- sessions and recovery --
110 utime_t reconnect_start
;
111 set
<client_t
> client_reconnect_gather
; // clients i need a reconnect msg from.
112 bool waiting_for_reconnect(client_t c
) const;
113 void dump_reconnect_status(Formatter
*f
) const;
115 void handle_client_session(class MClientSession
*m
);
116 void _session_logged(Session
*session
, uint64_t state_seq
,
117 bool open
, version_t pv
, interval_set
<inodeno_t
>& inos
,version_t piv
);
118 version_t
prepare_force_open_sessions(map
<client_t
,entity_inst_t
> &cm
,
119 map
<client_t
,uint64_t>& sseqmap
);
120 void finish_force_open_sessions(map
<client_t
,entity_inst_t
> &cm
,
121 map
<client_t
,uint64_t>& sseqmap
,
122 bool dec_import
=true);
123 void flush_client_sessions(set
<client_t
>& client_set
, MDSGatherBuilder
& gather
);
124 void finish_flush_session(Session
*session
, version_t seq
);
125 void terminate_sessions();
126 void find_idle_sessions();
127 void kill_session(Session
*session
, Context
*on_safe
);
128 size_t apply_blacklist(const std::set
<entity_addr_t
> &blacklist
);
129 void journal_close_session(Session
*session
, int state
, Context
*on_safe
);
130 void reconnect_clients(MDSInternalContext
*reconnect_done_
);
131 void handle_client_reconnect(class MClientReconnect
*m
);
132 //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo);
133 void reconnect_gather_finish();
134 void reconnect_tick();
135 void recover_filelocks(CInode
*in
, bufferlist locks
, int64_t client
);
137 void recall_client_state(void);
138 void force_clients_readonly();
141 void handle_client_request(MClientRequest
*m
);
143 void journal_and_reply(MDRequestRef
& mdr
, CInode
*tracei
, CDentry
*tracedn
,
144 LogEvent
*le
, MDSLogContextBase
*fin
);
145 void submit_mdlog_entry(LogEvent
*le
, MDSLogContextBase
*fin
,
146 MDRequestRef
& mdr
, const char *evt
);
147 void dispatch_client_request(MDRequestRef
& mdr
);
148 void early_reply(MDRequestRef
& mdr
, CInode
*tracei
, CDentry
*tracedn
);
149 void respond_to_request(MDRequestRef
& mdr
, int r
= 0);
150 void set_trace_dist(Session
*session
, MClientReply
*reply
, CInode
*in
, CDentry
*dn
,
152 int num_dentries_wanted
,
155 void encode_empty_dirstat(bufferlist
& bl
);
156 void encode_infinite_lease(bufferlist
& bl
);
157 void encode_null_lease(bufferlist
& bl
);
159 void handle_slave_request(MMDSSlaveRequest
*m
);
160 void handle_slave_request_reply(MMDSSlaveRequest
*m
);
161 void dispatch_slave_request(MDRequestRef
& mdr
);
162 void handle_slave_auth_pin(MDRequestRef
& mdr
);
163 void handle_slave_auth_pin_ack(MDRequestRef
& mdr
, MMDSSlaveRequest
*ack
);
166 bool check_fragment_space(MDRequestRef
& mdr
, CDir
*in
);
167 bool check_access(MDRequestRef
& mdr
, CInode
*in
, unsigned mask
);
168 bool _check_access(Session
*session
, CInode
*in
, unsigned mask
, int caller_uid
, int caller_gid
, int setattr_uid
, int setattr_gid
);
169 CDir
*validate_dentry_dir(MDRequestRef
& mdr
, CInode
*diri
, boost::string_view dname
);
170 CDir
*traverse_to_auth_dir(MDRequestRef
& mdr
, vector
<CDentry
*> &trace
, filepath refpath
);
171 CDentry
*prepare_null_dentry(MDRequestRef
& mdr
, CDir
*dir
, boost::string_view dname
, bool okexist
=false);
172 CDentry
*prepare_stray_dentry(MDRequestRef
& mdr
, CInode
*in
);
173 CInode
* prepare_new_inode(MDRequestRef
& mdr
, CDir
*dir
, inodeno_t useino
, unsigned mode
,
174 file_layout_t
*layout
=NULL
);
175 void journal_allocated_inos(MDRequestRef
& mdr
, EMetaBlob
*blob
);
176 void apply_allocated_inos(MDRequestRef
& mdr
, Session
*session
);
178 CInode
* rdlock_path_pin_ref(MDRequestRef
& mdr
, int n
, set
<SimpleLock
*>& rdlocks
, bool want_auth
,
179 bool no_want_auth
=false,
180 file_layout_t
**layout
=NULL
,
181 bool no_lookup
=false);
182 CDentry
* rdlock_path_xlock_dentry(MDRequestRef
& mdr
, int n
,
183 set
<SimpleLock
*>& rdlocks
,
184 set
<SimpleLock
*>& wrlocks
,
185 set
<SimpleLock
*>& xlocks
, bool okexist
,
186 bool mustexist
, bool alwaysxlock
,
187 file_layout_t
**layout
=NULL
);
189 CDir
* try_open_auth_dirfrag(CInode
*diri
, frag_t fg
, MDRequestRef
& mdr
);
192 // requests on existing inodes.
193 void handle_client_getattr(MDRequestRef
& mdr
, bool is_lookup
);
194 void handle_client_lookup_ino(MDRequestRef
& mdr
,
195 bool want_parent
, bool want_dentry
);
196 void _lookup_ino_2(MDRequestRef
& mdr
, int r
);
197 void handle_client_readdir(MDRequestRef
& mdr
);
198 void handle_client_file_setlock(MDRequestRef
& mdr
);
199 void handle_client_file_readlock(MDRequestRef
& mdr
);
201 void handle_client_setattr(MDRequestRef
& mdr
);
202 void handle_client_setlayout(MDRequestRef
& mdr
);
203 void handle_client_setdirlayout(MDRequestRef
& mdr
);
205 int parse_layout_vxattr(string name
, string value
, const OSDMap
& osdmap
,
206 file_layout_t
*layout
, bool validate
=true);
207 int parse_quota_vxattr(string name
, string value
, quota_info_t
*quota
);
208 int check_layout_vxattr(MDRequestRef
& mdr
,
211 file_layout_t
*layout
);
212 void handle_set_vxattr(MDRequestRef
& mdr
, CInode
*cur
,
213 file_layout_t
*dir_layout
,
214 set
<SimpleLock
*> rdlocks
,
215 set
<SimpleLock
*> wrlocks
,
216 set
<SimpleLock
*> xlocks
);
217 void handle_remove_vxattr(MDRequestRef
& mdr
, CInode
*cur
,
218 file_layout_t
*dir_layout
,
219 set
<SimpleLock
*> rdlocks
,
220 set
<SimpleLock
*> wrlocks
,
221 set
<SimpleLock
*> xlocks
);
222 void handle_client_setxattr(MDRequestRef
& mdr
);
223 void handle_client_removexattr(MDRequestRef
& mdr
);
225 void handle_client_fsync(MDRequestRef
& mdr
);
228 void handle_client_open(MDRequestRef
& mdr
);
229 void handle_client_openc(MDRequestRef
& mdr
); // O_CREAT variant.
230 void do_open_truncate(MDRequestRef
& mdr
, int cmode
); // O_TRUNC variant.
233 void handle_client_mknod(MDRequestRef
& mdr
);
234 void handle_client_mkdir(MDRequestRef
& mdr
);
235 void handle_client_symlink(MDRequestRef
& mdr
);
238 void handle_client_link(MDRequestRef
& mdr
);
239 void _link_local(MDRequestRef
& mdr
, CDentry
*dn
, CInode
*targeti
);
240 void _link_local_finish(MDRequestRef
& mdr
,
241 CDentry
*dn
, CInode
*targeti
,
242 version_t
, version_t
);
244 void _link_remote(MDRequestRef
& mdr
, bool inc
, CDentry
*dn
, CInode
*targeti
);
245 void _link_remote_finish(MDRequestRef
& mdr
, bool inc
, CDentry
*dn
, CInode
*targeti
,
248 void handle_slave_link_prep(MDRequestRef
& mdr
);
249 void _logged_slave_link(MDRequestRef
& mdr
, CInode
*targeti
);
250 void _commit_slave_link(MDRequestRef
& mdr
, int r
, CInode
*targeti
);
251 void _committed_slave(MDRequestRef
& mdr
); // use for rename, too
252 void handle_slave_link_prep_ack(MDRequestRef
& mdr
, MMDSSlaveRequest
*m
);
253 void do_link_rollback(bufferlist
&rbl
, mds_rank_t master
, MDRequestRef
& mdr
);
254 void _link_rollback_finish(MutationRef
& mut
, MDRequestRef
& mdr
);
257 void handle_client_unlink(MDRequestRef
& mdr
);
258 bool _dir_is_nonempty_unlocked(MDRequestRef
& mdr
, CInode
*rmdiri
);
259 bool _dir_is_nonempty(MDRequestRef
& mdr
, CInode
*rmdiri
);
260 void _unlink_local(MDRequestRef
& mdr
, CDentry
*dn
, CDentry
*straydn
);
261 void _unlink_local_finish(MDRequestRef
& mdr
,
262 CDentry
*dn
, CDentry
*straydn
,
264 bool _rmdir_prepare_witness(MDRequestRef
& mdr
, mds_rank_t who
, vector
<CDentry
*>& trace
, CDentry
*straydn
);
265 void handle_slave_rmdir_prep(MDRequestRef
& mdr
);
266 void _logged_slave_rmdir(MDRequestRef
& mdr
, CDentry
*srcdn
, CDentry
*straydn
);
267 void _commit_slave_rmdir(MDRequestRef
& mdr
, int r
, CDentry
*straydn
);
268 void handle_slave_rmdir_prep_ack(MDRequestRef
& mdr
, MMDSSlaveRequest
*ack
);
269 void do_rmdir_rollback(bufferlist
&rbl
, mds_rank_t master
, MDRequestRef
& mdr
);
270 void _rmdir_rollback_finish(MDRequestRef
& mdr
, metareqid_t reqid
, CDentry
*dn
, CDentry
*straydn
);
273 void handle_client_rename(MDRequestRef
& mdr
);
274 void _rename_finish(MDRequestRef
& mdr
,
275 CDentry
*srcdn
, CDentry
*destdn
, CDentry
*straydn
);
277 void handle_client_lssnap(MDRequestRef
& mdr
);
278 void handle_client_mksnap(MDRequestRef
& mdr
);
279 void _mksnap_finish(MDRequestRef
& mdr
, CInode
*diri
, SnapInfo
&info
);
280 void handle_client_rmsnap(MDRequestRef
& mdr
);
281 void _rmsnap_finish(MDRequestRef
& mdr
, CInode
*diri
, snapid_t snapid
);
282 void handle_client_renamesnap(MDRequestRef
& mdr
);
283 void _renamesnap_finish(MDRequestRef
& mdr
, CInode
*diri
, snapid_t snapid
);
287 bool _rename_prepare_witness(MDRequestRef
& mdr
, mds_rank_t who
, set
<mds_rank_t
> &witnesse
,
288 vector
<CDentry
*>& srctrace
, vector
<CDentry
*>& dsttrace
, CDentry
*straydn
);
289 version_t
_rename_prepare_import(MDRequestRef
& mdr
, CDentry
*srcdn
, bufferlist
*client_map_bl
);
290 bool _need_force_journal(CInode
*diri
, bool empty
);
291 void _rename_prepare(MDRequestRef
& mdr
,
292 EMetaBlob
*metablob
, bufferlist
*client_map_bl
,
293 CDentry
*srcdn
, CDentry
*destdn
, CDentry
*straydn
);
294 /* set not_journaling=true if you're going to discard the results --
295 * this bypasses the asserts to make sure we're journaling the right
296 * things on the right nodes */
297 void _rename_apply(MDRequestRef
& mdr
, CDentry
*srcdn
, CDentry
*destdn
, CDentry
*straydn
);
300 void handle_slave_rename_prep(MDRequestRef
& mdr
);
301 void handle_slave_rename_prep_ack(MDRequestRef
& mdr
, MMDSSlaveRequest
*m
);
302 void handle_slave_rename_notify_ack(MDRequestRef
& mdr
, MMDSSlaveRequest
*m
);
303 void _slave_rename_sessions_flushed(MDRequestRef
& mdr
);
304 void _logged_slave_rename(MDRequestRef
& mdr
, CDentry
*srcdn
, CDentry
*destdn
, CDentry
*straydn
);
305 void _commit_slave_rename(MDRequestRef
& mdr
, int r
, CDentry
*srcdn
, CDentry
*destdn
, CDentry
*straydn
);
306 void do_rename_rollback(bufferlist
&rbl
, mds_rank_t master
, MDRequestRef
& mdr
, bool finish_mdr
=false);
307 void _rename_rollback_finish(MutationRef
& mut
, MDRequestRef
& mdr
, CDentry
*srcdn
, version_t srcdnpv
,
308 CDentry
*destdn
, CDentry
*staydn
, bool finish_mdr
);
311 void reply_client_request(MDRequestRef
& mdr
, MClientReply
*reply
);