]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MDS_SERVER_H | |
16 | #define CEPH_MDS_SERVER_H | |
17 | ||
94b18763 FG |
18 | #include <boost/utility/string_view.hpp> |
19 | ||
a8e16298 TL |
20 | #include <common/DecayCounter.h> |
21 | ||
7c673cae FG |
22 | #include "MDSRank.h" |
23 | #include "Mutation.h" | |
24 | ||
25 | class OSDMap; | |
26 | class PerfCounters; | |
27 | class LogEvent; | |
28 | class EMetaBlob; | |
29 | class EUpdate; | |
30 | class MMDSSlaveRequest; | |
31 | struct SnapInfo; | |
32 | class MClientRequest; | |
33 | class MClientReply; | |
34 | class MDLog; | |
35 | ||
36 | enum { | |
37 | l_mdss_first = 1000, | |
d2e6a577 FG |
38 | l_mdss_dispatch_client_request, |
39 | l_mdss_dispatch_slave_request, | |
7c673cae | 40 | l_mdss_handle_client_request, |
7c673cae | 41 | l_mdss_handle_client_session, |
d2e6a577 | 42 | l_mdss_handle_slave_request, |
91327a77 AA |
43 | l_mdss_req_create_latency, |
44 | l_mdss_req_getattr_latency, | |
45 | l_mdss_req_getfilelock_latency, | |
46 | l_mdss_req_link_latency, | |
47 | l_mdss_req_lookup_latency, | |
48 | l_mdss_req_lookuphash_latency, | |
49 | l_mdss_req_lookupino_latency, | |
50 | l_mdss_req_lookupname_latency, | |
51 | l_mdss_req_lookupparent_latency, | |
52 | l_mdss_req_lookupsnap_latency, | |
53 | l_mdss_req_lssnap_latency, | |
54 | l_mdss_req_mkdir_latency, | |
55 | l_mdss_req_mknod_latency, | |
56 | l_mdss_req_mksnap_latency, | |
57 | l_mdss_req_open_latency, | |
58 | l_mdss_req_readdir_latency, | |
59 | l_mdss_req_rename_latency, | |
60 | l_mdss_req_renamesnap_latency, | |
61 | l_mdss_req_rmdir_latency, | |
62 | l_mdss_req_rmsnap_latency, | |
63 | l_mdss_req_rmxattr_latency, | |
64 | l_mdss_req_setattr_latency, | |
65 | l_mdss_req_setdirlayout_latency, | |
66 | l_mdss_req_setfilelock_latency, | |
67 | l_mdss_req_setlayout_latency, | |
68 | l_mdss_req_setxattr_latency, | |
69 | l_mdss_req_symlink_latency, | |
70 | l_mdss_req_unlink_latency, | |
71 | l_mdss_cap_revoke_eviction, | |
7c673cae FG |
72 | l_mdss_last, |
73 | }; | |
74 | ||
75 | class Server { | |
91327a77 AA |
76 | public: |
77 | using clock = ceph::coarse_mono_clock; | |
78 | using time = ceph::coarse_mono_time; | |
79 | ||
7c673cae FG |
80 | private: |
81 | MDSRank *mds; | |
82 | MDCache *mdcache; | |
83 | MDLog *mdlog; | |
84 | PerfCounters *logger; | |
85 | ||
86 | // OSDMap full status, used to generate ENOSPC on some operations | |
87 | bool is_full; | |
88 | ||
89 | // State for while in reconnect | |
90 | MDSInternalContext *reconnect_done; | |
91 | int failed_reconnects; | |
31f18b77 FG |
92 | bool reconnect_evicting; // true if I am waiting for evictions to complete |
93 | // before proceeding to reconnect_gather_finish | |
f64942e4 AA |
94 | time reconnect_start = time::min(); |
95 | time reconnect_last_seen = time::min(); | |
96 | set<client_t> client_reconnect_gather; // clients i need a reconnect msg from. | |
7c673cae | 97 | |
91327a77 AA |
98 | double cap_revoke_eviction_timeout = 0; |
99 | ||
7c673cae FG |
100 | friend class MDSContinuation; |
101 | friend class ServerContext; | |
102 | friend class ServerLogContext; | |
103 | ||
104 | public: | |
105 | bool terminating_sessions; | |
106 | ||
107 | explicit Server(MDSRank *m); | |
108 | ~Server() { | |
109 | g_ceph_context->get_perfcounters_collection()->remove(logger); | |
110 | delete logger; | |
111 | delete reconnect_done; | |
112 | } | |
113 | ||
114 | void create_logger(); | |
115 | ||
116 | // message handler | |
117 | void dispatch(Message *m); | |
118 | ||
119 | void handle_osd_map(); | |
120 | ||
121 | // -- sessions and recovery -- | |
7c673cae FG |
122 | bool waiting_for_reconnect(client_t c) const; |
123 | void dump_reconnect_status(Formatter *f) const; | |
124 | ||
7c673cae | 125 | void handle_client_session(class MClientSession *m); |
a8e16298 TL |
126 | time last_recalled() const { |
127 | return last_recall_state; | |
128 | } | |
7c673cae FG |
129 | void _session_logged(Session *session, uint64_t state_seq, |
130 | bool open, version_t pv, interval_set<inodeno_t>& inos,version_t piv); | |
131 | version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm, | |
28e407b8 AA |
132 | map<client_t,pair<Session*,uint64_t> >& smap); |
133 | void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap, | |
7c673cae FG |
134 | bool dec_import=true); |
135 | void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather); | |
136 | void finish_flush_session(Session *session, version_t seq); | |
137 | void terminate_sessions(); | |
138 | void find_idle_sessions(); | |
139 | void kill_session(Session *session, Context *on_safe); | |
31f18b77 | 140 | size_t apply_blacklist(const std::set<entity_addr_t> &blacklist); |
7c673cae FG |
141 | void journal_close_session(Session *session, int state, Context *on_safe); |
142 | void reconnect_clients(MDSInternalContext *reconnect_done_); | |
143 | void handle_client_reconnect(class MClientReconnect *m); | |
144 | //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo); | |
145 | void reconnect_gather_finish(); | |
146 | void reconnect_tick(); | |
147 | void recover_filelocks(CInode *in, bufferlist locks, int64_t client); | |
148 | ||
a8e16298 TL |
149 | enum RecallFlags { |
150 | NONE = 0, | |
151 | STEADY = (1<<0), | |
152 | ENFORCE_MAX = (1<<1), | |
153 | }; | |
154 | std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, enum RecallFlags=RecallFlags::NONE); | |
7c673cae FG |
155 | void force_clients_readonly(); |
156 | ||
157 | // -- requests -- | |
158 | void handle_client_request(MClientRequest *m); | |
159 | ||
160 | void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn, | |
161 | LogEvent *le, MDSLogContextBase *fin); | |
162 | void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin, | |
163 | MDRequestRef& mdr, const char *evt); | |
164 | void dispatch_client_request(MDRequestRef& mdr); | |
91327a77 | 165 | void perf_gather_op_latency(const MClientRequest* req, utime_t lat); |
7c673cae FG |
166 | void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn); |
167 | void respond_to_request(MDRequestRef& mdr, int r = 0); | |
168 | void set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn, | |
169 | snapid_t snapid, | |
170 | int num_dentries_wanted, | |
171 | MDRequestRef& mdr); | |
172 | ||
173 | void encode_empty_dirstat(bufferlist& bl); | |
174 | void encode_infinite_lease(bufferlist& bl); | |
175 | void encode_null_lease(bufferlist& bl); | |
176 | ||
177 | void handle_slave_request(MMDSSlaveRequest *m); | |
178 | void handle_slave_request_reply(MMDSSlaveRequest *m); | |
179 | void dispatch_slave_request(MDRequestRef& mdr); | |
180 | void handle_slave_auth_pin(MDRequestRef& mdr); | |
181 | void handle_slave_auth_pin_ack(MDRequestRef& mdr, MMDSSlaveRequest *ack); | |
182 | ||
183 | // some helpers | |
184 | bool check_fragment_space(MDRequestRef& mdr, CDir *in); | |
185 | bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask); | |
186 | bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid); | |
94b18763 | 187 | CDir *validate_dentry_dir(MDRequestRef& mdr, CInode *diri, boost::string_view dname); |
7c673cae | 188 | CDir *traverse_to_auth_dir(MDRequestRef& mdr, vector<CDentry*> &trace, filepath refpath); |
94b18763 | 189 | CDentry *prepare_null_dentry(MDRequestRef& mdr, CDir *dir, boost::string_view dname, bool okexist=false); |
7c673cae FG |
190 | CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in); |
191 | CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, | |
192 | file_layout_t *layout=NULL); | |
193 | void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob); | |
194 | void apply_allocated_inos(MDRequestRef& mdr, Session *session); | |
195 | ||
196 | CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, set<SimpleLock*>& rdlocks, bool want_auth, | |
197 | bool no_want_auth=false, | |
198 | file_layout_t **layout=NULL, | |
199 | bool no_lookup=false); | |
200 | CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n, | |
201 | set<SimpleLock*>& rdlocks, | |
202 | set<SimpleLock*>& wrlocks, | |
203 | set<SimpleLock*>& xlocks, bool okexist, | |
204 | bool mustexist, bool alwaysxlock, | |
205 | file_layout_t **layout=NULL); | |
206 | ||
207 | CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr); | |
208 | ||
209 | ||
210 | // requests on existing inodes. | |
211 | void handle_client_getattr(MDRequestRef& mdr, bool is_lookup); | |
212 | void handle_client_lookup_ino(MDRequestRef& mdr, | |
213 | bool want_parent, bool want_dentry); | |
214 | void _lookup_ino_2(MDRequestRef& mdr, int r); | |
215 | void handle_client_readdir(MDRequestRef& mdr); | |
216 | void handle_client_file_setlock(MDRequestRef& mdr); | |
217 | void handle_client_file_readlock(MDRequestRef& mdr); | |
218 | ||
219 | void handle_client_setattr(MDRequestRef& mdr); | |
220 | void handle_client_setlayout(MDRequestRef& mdr); | |
221 | void handle_client_setdirlayout(MDRequestRef& mdr); | |
222 | ||
223 | int parse_layout_vxattr(string name, string value, const OSDMap& osdmap, | |
224 | file_layout_t *layout, bool validate=true); | |
225 | int parse_quota_vxattr(string name, string value, quota_info_t *quota); | |
226 | int check_layout_vxattr(MDRequestRef& mdr, | |
227 | string name, | |
228 | string value, | |
229 | file_layout_t *layout); | |
230 | void handle_set_vxattr(MDRequestRef& mdr, CInode *cur, | |
231 | file_layout_t *dir_layout, | |
232 | set<SimpleLock*> rdlocks, | |
233 | set<SimpleLock*> wrlocks, | |
234 | set<SimpleLock*> xlocks); | |
235 | void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, | |
236 | file_layout_t *dir_layout, | |
237 | set<SimpleLock*> rdlocks, | |
238 | set<SimpleLock*> wrlocks, | |
239 | set<SimpleLock*> xlocks); | |
240 | void handle_client_setxattr(MDRequestRef& mdr); | |
241 | void handle_client_removexattr(MDRequestRef& mdr); | |
242 | ||
243 | void handle_client_fsync(MDRequestRef& mdr); | |
244 | ||
245 | // open | |
246 | void handle_client_open(MDRequestRef& mdr); | |
247 | void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant. | |
248 | void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant. | |
249 | ||
250 | // namespace changes | |
251 | void handle_client_mknod(MDRequestRef& mdr); | |
252 | void handle_client_mkdir(MDRequestRef& mdr); | |
253 | void handle_client_symlink(MDRequestRef& mdr); | |
254 | ||
255 | // link | |
256 | void handle_client_link(MDRequestRef& mdr); | |
257 | void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti); | |
258 | void _link_local_finish(MDRequestRef& mdr, | |
259 | CDentry *dn, CInode *targeti, | |
260 | version_t, version_t); | |
261 | ||
262 | void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti); | |
263 | void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti, | |
264 | version_t); | |
265 | ||
266 | void handle_slave_link_prep(MDRequestRef& mdr); | |
267 | void _logged_slave_link(MDRequestRef& mdr, CInode *targeti); | |
268 | void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti); | |
269 | void _committed_slave(MDRequestRef& mdr); // use for rename, too | |
270 | void handle_slave_link_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *m); | |
271 | void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); | |
272 | void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr); | |
273 | ||
274 | // unlink | |
275 | void handle_client_unlink(MDRequestRef& mdr); | |
276 | bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri); | |
277 | bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri); | |
278 | void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn); | |
279 | void _unlink_local_finish(MDRequestRef& mdr, | |
280 | CDentry *dn, CDentry *straydn, | |
281 | version_t); | |
282 | bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn); | |
283 | void handle_slave_rmdir_prep(MDRequestRef& mdr); | |
284 | void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn); | |
31f18b77 | 285 | void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn); |
7c673cae FG |
286 | void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *ack); |
287 | void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); | |
288 | void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn); | |
289 | ||
290 | // rename | |
291 | void handle_client_rename(MDRequestRef& mdr); | |
292 | void _rename_finish(MDRequestRef& mdr, | |
293 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
294 | ||
295 | void handle_client_lssnap(MDRequestRef& mdr); | |
296 | void handle_client_mksnap(MDRequestRef& mdr); | |
297 | void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info); | |
298 | void handle_client_rmsnap(MDRequestRef& mdr); | |
299 | void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
300 | void handle_client_renamesnap(MDRequestRef& mdr); | |
301 | void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
302 | ||
303 | ||
304 | // helpers | |
305 | bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse, | |
306 | vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn); | |
307 | version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl); | |
308 | bool _need_force_journal(CInode *diri, bool empty); | |
309 | void _rename_prepare(MDRequestRef& mdr, | |
310 | EMetaBlob *metablob, bufferlist *client_map_bl, | |
311 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
312 | /* set not_journaling=true if you're going to discard the results -- | |
313 | * this bypasses the asserts to make sure we're journaling the right | |
314 | * things on the right nodes */ | |
315 | void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
316 | ||
317 | // slaving | |
318 | void handle_slave_rename_prep(MDRequestRef& mdr); | |
319 | void handle_slave_rename_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *m); | |
320 | void handle_slave_rename_notify_ack(MDRequestRef& mdr, MMDSSlaveRequest *m); | |
321 | void _slave_rename_sessions_flushed(MDRequestRef& mdr); | |
322 | void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
323 | void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
324 | void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false); | |
325 | void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv, | |
326 | CDentry *destdn, CDentry *staydn, bool finish_mdr); | |
327 | ||
91327a77 AA |
328 | void evict_cap_revoke_non_responders(); |
329 | void handle_conf_change(const struct md_config_t *, | |
330 | const std::set <std::string> &changed); | |
331 | ||
7c673cae FG |
332 | private: |
333 | void reply_client_request(MDRequestRef& mdr, MClientReply *reply); | |
f64942e4 | 334 | void flush_session(Session *session, MDSGatherBuilder *gather); |
a8e16298 TL |
335 | |
336 | DecayCounter recall_throttle; | |
337 | time last_recall_state; | |
7c673cae FG |
338 | }; |
339 | ||
340 | #endif |