]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MDS_SERVER_H | |
16 | #define CEPH_MDS_SERVER_H | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
a8e16298 TL |
20 | #include <common/DecayCounter.h> |
21 | ||
11fdf7f2 TL |
22 | #include "messages/MClientReconnect.h" |
23 | #include "messages/MClientReply.h" | |
24 | #include "messages/MClientRequest.h" | |
25 | #include "messages/MClientSession.h" | |
26 | #include "messages/MClientSnap.h" | |
27 | #include "messages/MClientReclaim.h" | |
28 | #include "messages/MClientReclaimReply.h" | |
29 | #include "messages/MLock.h" | |
30 | ||
7c673cae FG |
31 | #include "MDSRank.h" |
32 | #include "Mutation.h" | |
11fdf7f2 | 33 | #include "MDSContext.h" |
7c673cae FG |
34 | |
35 | class OSDMap; | |
36 | class PerfCounters; | |
37 | class LogEvent; | |
38 | class EMetaBlob; | |
39 | class EUpdate; | |
7c673cae | 40 | class MDLog; |
11fdf7f2 | 41 | struct SnapInfo; |
7c673cae FG |
42 | |
43 | enum { | |
44 | l_mdss_first = 1000, | |
d2e6a577 FG |
45 | l_mdss_dispatch_client_request, |
46 | l_mdss_dispatch_slave_request, | |
7c673cae | 47 | l_mdss_handle_client_request, |
7c673cae | 48 | l_mdss_handle_client_session, |
d2e6a577 | 49 | l_mdss_handle_slave_request, |
91327a77 AA |
50 | l_mdss_req_create_latency, |
51 | l_mdss_req_getattr_latency, | |
52 | l_mdss_req_getfilelock_latency, | |
53 | l_mdss_req_link_latency, | |
54 | l_mdss_req_lookup_latency, | |
55 | l_mdss_req_lookuphash_latency, | |
56 | l_mdss_req_lookupino_latency, | |
57 | l_mdss_req_lookupname_latency, | |
58 | l_mdss_req_lookupparent_latency, | |
59 | l_mdss_req_lookupsnap_latency, | |
60 | l_mdss_req_lssnap_latency, | |
61 | l_mdss_req_mkdir_latency, | |
62 | l_mdss_req_mknod_latency, | |
63 | l_mdss_req_mksnap_latency, | |
64 | l_mdss_req_open_latency, | |
65 | l_mdss_req_readdir_latency, | |
66 | l_mdss_req_rename_latency, | |
67 | l_mdss_req_renamesnap_latency, | |
68 | l_mdss_req_rmdir_latency, | |
69 | l_mdss_req_rmsnap_latency, | |
70 | l_mdss_req_rmxattr_latency, | |
71 | l_mdss_req_setattr_latency, | |
72 | l_mdss_req_setdirlayout_latency, | |
73 | l_mdss_req_setfilelock_latency, | |
74 | l_mdss_req_setlayout_latency, | |
75 | l_mdss_req_setxattr_latency, | |
76 | l_mdss_req_symlink_latency, | |
77 | l_mdss_req_unlink_latency, | |
78 | l_mdss_cap_revoke_eviction, | |
7c673cae FG |
79 | l_mdss_last, |
80 | }; | |
81 | ||
82 | class Server { | |
91327a77 AA |
83 | public: |
84 | using clock = ceph::coarse_mono_clock; | |
85 | using time = ceph::coarse_mono_time; | |
86 | ||
7c673cae FG |
87 | private: |
88 | MDSRank *mds; | |
89 | MDCache *mdcache; | |
90 | MDLog *mdlog; | |
91 | PerfCounters *logger; | |
92 | ||
93 | // OSDMap full status, used to generate ENOSPC on some operations | |
94 | bool is_full; | |
95 | ||
96 | // State for while in reconnect | |
11fdf7f2 | 97 | MDSContext *reconnect_done; |
7c673cae | 98 | int failed_reconnects; |
31f18b77 FG |
99 | bool reconnect_evicting; // true if I am waiting for evictions to complete |
100 | // before proceeding to reconnect_gather_finish | |
11fdf7f2 TL |
101 | time reconnect_start = clock::zero(); |
102 | time reconnect_last_seen = clock::zero(); | |
f64942e4 | 103 | set<client_t> client_reconnect_gather; // clients i need a reconnect msg from. |
7c673cae | 104 | |
11fdf7f2 TL |
105 | feature_bitset_t supported_features; |
106 | feature_bitset_t required_client_features; | |
107 | ||
92f5a8d4 | 108 | bool replay_unsafe_with_closed_session = false; |
91327a77 AA |
109 | double cap_revoke_eviction_timeout = 0; |
110 | ||
7c673cae FG |
111 | friend class MDSContinuation; |
112 | friend class ServerContext; | |
113 | friend class ServerLogContext; | |
114 | ||
115 | public: | |
116 | bool terminating_sessions; | |
117 | ||
118 | explicit Server(MDSRank *m); | |
119 | ~Server() { | |
120 | g_ceph_context->get_perfcounters_collection()->remove(logger); | |
121 | delete logger; | |
122 | delete reconnect_done; | |
123 | } | |
124 | ||
125 | void create_logger(); | |
126 | ||
127 | // message handler | |
11fdf7f2 | 128 | void dispatch(const Message::const_ref &m); |
7c673cae FG |
129 | |
130 | void handle_osd_map(); | |
131 | ||
132 | // -- sessions and recovery -- | |
7c673cae FG |
133 | bool waiting_for_reconnect(client_t c) const; |
134 | void dump_reconnect_status(Formatter *f) const; | |
135 | ||
a8e16298 TL |
136 | time last_recalled() const { |
137 | return last_recall_state; | |
138 | } | |
11fdf7f2 TL |
139 | |
140 | void handle_client_session(const MClientSession::const_ref &m); | |
7c673cae FG |
141 | void _session_logged(Session *session, uint64_t state_seq, |
142 | bool open, version_t pv, interval_set<inodeno_t>& inos,version_t piv); | |
143 | version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm, | |
11fdf7f2 | 144 | map<client_t,client_metadata_t>& cmm, |
28e407b8 AA |
145 | map<client_t,pair<Session*,uint64_t> >& smap); |
146 | void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap, | |
7c673cae FG |
147 | bool dec_import=true); |
148 | void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather); | |
149 | void finish_flush_session(Session *session, version_t seq); | |
150 | void terminate_sessions(); | |
151 | void find_idle_sessions(); | |
152 | void kill_session(Session *session, Context *on_safe); | |
31f18b77 | 153 | size_t apply_blacklist(const std::set<entity_addr_t> &blacklist); |
7c673cae | 154 | void journal_close_session(Session *session, int state, Context *on_safe); |
11fdf7f2 TL |
155 | |
156 | set<client_t> client_reclaim_gather; | |
157 | size_t get_num_pending_reclaim() const { return client_reclaim_gather.size(); } | |
158 | Session *find_session_by_uuid(std::string_view uuid); | |
159 | void reclaim_session(Session *session, const MClientReclaim::const_ref &m); | |
160 | void finish_reclaim_session(Session *session, const MClientReclaimReply::ref &reply=nullptr); | |
161 | void handle_client_reclaim(const MClientReclaim::const_ref &m); | |
162 | ||
163 | void reconnect_clients(MDSContext *reconnect_done_); | |
164 | void handle_client_reconnect(const MClientReconnect::const_ref &m); | |
165 | void infer_supported_features(Session *session, client_metadata_t& client_metadata); | |
166 | void update_required_client_features(); | |
167 | ||
7c673cae FG |
168 | //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo); |
169 | void reconnect_gather_finish(); | |
170 | void reconnect_tick(); | |
171 | void recover_filelocks(CInode *in, bufferlist locks, int64_t client); | |
172 | ||
92f5a8d4 | 173 | enum class RecallFlags : uint64_t { |
a8e16298 TL |
174 | NONE = 0, |
175 | STEADY = (1<<0), | |
176 | ENFORCE_MAX = (1<<1), | |
92f5a8d4 TL |
177 | TRIM = (1<<2), |
178 | ENFORCE_LIVENESS = (1<<3), | |
a8e16298 | 179 | }; |
92f5a8d4 | 180 | std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE); |
7c673cae FG |
181 | void force_clients_readonly(); |
182 | ||
183 | // -- requests -- | |
11fdf7f2 | 184 | void handle_client_request(const MClientRequest::const_ref &m); |
7c673cae FG |
185 | |
186 | void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn, | |
187 | LogEvent *le, MDSLogContextBase *fin); | |
188 | void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin, | |
11fdf7f2 | 189 | MDRequestRef& mdr, std::string_view event); |
7c673cae | 190 | void dispatch_client_request(MDRequestRef& mdr); |
11fdf7f2 | 191 | void perf_gather_op_latency(const MClientRequest::const_ref &req, utime_t lat); |
7c673cae FG |
192 | void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn); |
193 | void respond_to_request(MDRequestRef& mdr, int r = 0); | |
11fdf7f2 | 194 | void set_trace_dist(Session *session, const MClientReply::ref &reply, CInode *in, CDentry *dn, |
7c673cae FG |
195 | snapid_t snapid, |
196 | int num_dentries_wanted, | |
197 | MDRequestRef& mdr); | |
198 | ||
7c673cae | 199 | |
11fdf7f2 TL |
200 | void handle_slave_request(const MMDSSlaveRequest::const_ref &m); |
201 | void handle_slave_request_reply(const MMDSSlaveRequest::const_ref &m); | |
7c673cae FG |
202 | void dispatch_slave_request(MDRequestRef& mdr); |
203 | void handle_slave_auth_pin(MDRequestRef& mdr); | |
11fdf7f2 | 204 | void handle_slave_auth_pin_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &ack); |
7c673cae FG |
205 | |
206 | // some helpers | |
207 | bool check_fragment_space(MDRequestRef& mdr, CDir *in); | |
208 | bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask); | |
209 | bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid); | |
11fdf7f2 | 210 | CDir *validate_dentry_dir(MDRequestRef& mdr, CInode *diri, std::string_view dname); |
7c673cae | 211 | CDir *traverse_to_auth_dir(MDRequestRef& mdr, vector<CDentry*> &trace, filepath refpath); |
11fdf7f2 | 212 | CDentry *prepare_null_dentry(MDRequestRef& mdr, CDir *dir, std::string_view dname, bool okexist=false); |
7c673cae FG |
213 | CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in); |
214 | CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, | |
215 | file_layout_t *layout=NULL); | |
216 | void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob); | |
217 | void apply_allocated_inos(MDRequestRef& mdr, Session *session); | |
218 | ||
11fdf7f2 TL |
219 | CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov, |
220 | bool want_auth, bool no_want_auth=false, | |
221 | file_layout_t **layout=nullptr, | |
7c673cae FG |
222 | bool no_lookup=false); |
223 | CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n, | |
11fdf7f2 TL |
224 | MutationImpl::LockOpVec& lov, |
225 | bool okexist, bool mustexist, bool alwaysxlock, | |
226 | file_layout_t **layout=nullptr); | |
7c673cae FG |
227 | |
228 | CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr); | |
229 | ||
230 | ||
231 | // requests on existing inodes. | |
232 | void handle_client_getattr(MDRequestRef& mdr, bool is_lookup); | |
233 | void handle_client_lookup_ino(MDRequestRef& mdr, | |
234 | bool want_parent, bool want_dentry); | |
11fdf7f2 | 235 | void _lookup_snap_ino(MDRequestRef& mdr); |
7c673cae FG |
236 | void _lookup_ino_2(MDRequestRef& mdr, int r); |
237 | void handle_client_readdir(MDRequestRef& mdr); | |
238 | void handle_client_file_setlock(MDRequestRef& mdr); | |
239 | void handle_client_file_readlock(MDRequestRef& mdr); | |
240 | ||
241 | void handle_client_setattr(MDRequestRef& mdr); | |
242 | void handle_client_setlayout(MDRequestRef& mdr); | |
243 | void handle_client_setdirlayout(MDRequestRef& mdr); | |
244 | ||
11fdf7f2 TL |
245 | int parse_quota_vxattr(string name, string value, quota_info_t *quota); |
246 | void create_quota_realm(CInode *in); | |
7c673cae FG |
247 | int parse_layout_vxattr(string name, string value, const OSDMap& osdmap, |
248 | file_layout_t *layout, bool validate=true); | |
7c673cae FG |
249 | int check_layout_vxattr(MDRequestRef& mdr, |
250 | string name, | |
251 | string value, | |
252 | file_layout_t *layout); | |
253 | void handle_set_vxattr(MDRequestRef& mdr, CInode *cur, | |
254 | file_layout_t *dir_layout, | |
11fdf7f2 | 255 | MutationImpl::LockOpVec& lov); |
7c673cae FG |
256 | void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, |
257 | file_layout_t *dir_layout, | |
11fdf7f2 | 258 | MutationImpl::LockOpVec& lov); |
7c673cae FG |
259 | void handle_client_setxattr(MDRequestRef& mdr); |
260 | void handle_client_removexattr(MDRequestRef& mdr); | |
261 | ||
262 | void handle_client_fsync(MDRequestRef& mdr); | |
263 | ||
264 | // open | |
265 | void handle_client_open(MDRequestRef& mdr); | |
266 | void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant. | |
267 | void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant. | |
268 | ||
269 | // namespace changes | |
270 | void handle_client_mknod(MDRequestRef& mdr); | |
271 | void handle_client_mkdir(MDRequestRef& mdr); | |
272 | void handle_client_symlink(MDRequestRef& mdr); | |
273 | ||
274 | // link | |
275 | void handle_client_link(MDRequestRef& mdr); | |
276 | void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti); | |
11fdf7f2 TL |
277 | void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti, |
278 | version_t, version_t, bool); | |
7c673cae FG |
279 | |
280 | void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti); | |
281 | void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti, | |
282 | version_t); | |
283 | ||
284 | void handle_slave_link_prep(MDRequestRef& mdr); | |
11fdf7f2 | 285 | void _logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm); |
7c673cae FG |
286 | void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti); |
287 | void _committed_slave(MDRequestRef& mdr); // use for rename, too | |
11fdf7f2 | 288 | void handle_slave_link_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m); |
7c673cae | 289 | void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); |
11fdf7f2 TL |
290 | void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr, |
291 | map<client_t,MClientSnap::ref>& split); | |
7c673cae FG |
292 | |
293 | // unlink | |
294 | void handle_client_unlink(MDRequestRef& mdr); | |
295 | bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri); | |
296 | bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri); | |
297 | void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn); | |
298 | void _unlink_local_finish(MDRequestRef& mdr, | |
299 | CDentry *dn, CDentry *straydn, | |
300 | version_t); | |
301 | bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn); | |
302 | void handle_slave_rmdir_prep(MDRequestRef& mdr); | |
303 | void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn); | |
31f18b77 | 304 | void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn); |
11fdf7f2 | 305 | void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &ack); |
7c673cae FG |
306 | void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); |
307 | void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn); | |
308 | ||
309 | // rename | |
310 | void handle_client_rename(MDRequestRef& mdr); | |
311 | void _rename_finish(MDRequestRef& mdr, | |
312 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
313 | ||
314 | void handle_client_lssnap(MDRequestRef& mdr); | |
315 | void handle_client_mksnap(MDRequestRef& mdr); | |
316 | void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info); | |
317 | void handle_client_rmsnap(MDRequestRef& mdr); | |
318 | void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
319 | void handle_client_renamesnap(MDRequestRef& mdr); | |
320 | void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
321 | ||
322 | ||
323 | // helpers | |
324 | bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse, | |
325 | vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn); | |
326 | version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl); | |
327 | bool _need_force_journal(CInode *diri, bool empty); | |
328 | void _rename_prepare(MDRequestRef& mdr, | |
329 | EMetaBlob *metablob, bufferlist *client_map_bl, | |
330 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
331 | /* set not_journaling=true if you're going to discard the results -- | |
332 | * this bypasses the asserts to make sure we're journaling the right | |
333 | * things on the right nodes */ | |
334 | void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
335 | ||
336 | // slaving | |
337 | void handle_slave_rename_prep(MDRequestRef& mdr); | |
11fdf7f2 TL |
338 | void handle_slave_rename_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m); |
339 | void handle_slave_rename_notify_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m); | |
7c673cae FG |
340 | void _slave_rename_sessions_flushed(MDRequestRef& mdr); |
341 | void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
342 | void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
343 | void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false); | |
344 | void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv, | |
11fdf7f2 TL |
345 | CDentry *destdn, CDentry *staydn, map<client_t,MClientSnap::ref> splits[2], |
346 | bool finish_mdr); | |
7c673cae | 347 | |
91327a77 | 348 | void evict_cap_revoke_non_responders(); |
92f5a8d4 | 349 | void handle_conf_change(const std::set<std::string>& changed); |
91327a77 | 350 | |
7c673cae | 351 | private: |
11fdf7f2 | 352 | void reply_client_request(MDRequestRef& mdr, const MClientReply::ref &reply); |
f64942e4 | 353 | void flush_session(Session *session, MDSGatherBuilder *gather); |
a8e16298 TL |
354 | |
355 | DecayCounter recall_throttle; | |
356 | time last_recall_state; | |
7c673cae FG |
357 | }; |
358 | ||
92f5a8d4 TL |
359 | static inline constexpr auto operator|(Server::RecallFlags a, Server::RecallFlags b) { |
360 | using T = std::underlying_type<Server::RecallFlags>::type; | |
361 | return static_cast<Server::RecallFlags>(static_cast<T>(a) | static_cast<T>(b)); | |
362 | } | |
363 | static inline constexpr auto operator&(Server::RecallFlags a, Server::RecallFlags b) { | |
364 | using T = std::underlying_type<Server::RecallFlags>::type; | |
365 | return static_cast<Server::RecallFlags>(static_cast<T>(a) & static_cast<T>(b)); | |
366 | } | |
367 | static inline std::ostream& operator<<(std::ostream& os, const Server::RecallFlags& f) { | |
368 | using T = std::underlying_type<Server::RecallFlags>::type; | |
369 | return os << "0x" << std::hex << static_cast<T>(f) << std::dec; | |
370 | } | |
371 | static inline constexpr bool operator!(const Server::RecallFlags& f) { | |
372 | using T = std::underlying_type<Server::RecallFlags>::type; | |
373 | return static_cast<T>(f) == static_cast<T>(0); | |
374 | } | |
375 | ||
7c673cae | 376 | #endif |