]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef CEPH_MDS_SERVER_H | |
16 | #define CEPH_MDS_SERVER_H | |
17 | ||
11fdf7f2 | 18 | #include <string_view> |
94b18763 | 19 | |
a8e16298 TL |
20 | #include <common/DecayCounter.h> |
21 | ||
9f95a23c TL |
22 | #include "include/common_fwd.h" |
23 | ||
11fdf7f2 TL |
24 | #include "messages/MClientReconnect.h" |
25 | #include "messages/MClientReply.h" | |
26 | #include "messages/MClientRequest.h" | |
27 | #include "messages/MClientSession.h" | |
28 | #include "messages/MClientSnap.h" | |
29 | #include "messages/MClientReclaim.h" | |
30 | #include "messages/MClientReclaimReply.h" | |
31 | #include "messages/MLock.h" | |
32 | ||
7c673cae FG |
33 | #include "MDSRank.h" |
34 | #include "Mutation.h" | |
11fdf7f2 | 35 | #include "MDSContext.h" |
7c673cae FG |
36 | |
37 | class OSDMap; | |
7c673cae FG |
38 | class LogEvent; |
39 | class EMetaBlob; | |
40 | class EUpdate; | |
7c673cae | 41 | class MDLog; |
11fdf7f2 | 42 | struct SnapInfo; |
7c673cae FG |
43 | |
44 | enum { | |
45 | l_mdss_first = 1000, | |
d2e6a577 FG |
46 | l_mdss_dispatch_client_request, |
47 | l_mdss_dispatch_slave_request, | |
7c673cae | 48 | l_mdss_handle_client_request, |
7c673cae | 49 | l_mdss_handle_client_session, |
d2e6a577 | 50 | l_mdss_handle_slave_request, |
91327a77 AA |
51 | l_mdss_req_create_latency, |
52 | l_mdss_req_getattr_latency, | |
53 | l_mdss_req_getfilelock_latency, | |
54 | l_mdss_req_link_latency, | |
55 | l_mdss_req_lookup_latency, | |
56 | l_mdss_req_lookuphash_latency, | |
57 | l_mdss_req_lookupino_latency, | |
58 | l_mdss_req_lookupname_latency, | |
59 | l_mdss_req_lookupparent_latency, | |
60 | l_mdss_req_lookupsnap_latency, | |
61 | l_mdss_req_lssnap_latency, | |
62 | l_mdss_req_mkdir_latency, | |
63 | l_mdss_req_mknod_latency, | |
64 | l_mdss_req_mksnap_latency, | |
65 | l_mdss_req_open_latency, | |
66 | l_mdss_req_readdir_latency, | |
67 | l_mdss_req_rename_latency, | |
68 | l_mdss_req_renamesnap_latency, | |
69 | l_mdss_req_rmdir_latency, | |
70 | l_mdss_req_rmsnap_latency, | |
71 | l_mdss_req_rmxattr_latency, | |
72 | l_mdss_req_setattr_latency, | |
73 | l_mdss_req_setdirlayout_latency, | |
74 | l_mdss_req_setfilelock_latency, | |
75 | l_mdss_req_setlayout_latency, | |
76 | l_mdss_req_setxattr_latency, | |
77 | l_mdss_req_symlink_latency, | |
78 | l_mdss_req_unlink_latency, | |
79 | l_mdss_cap_revoke_eviction, | |
adb31ebb | 80 | l_mdss_cap_acquisition_throttle, |
7c673cae FG |
81 | l_mdss_last, |
82 | }; | |
83 | ||
84 | class Server { | |
91327a77 AA |
85 | public: |
86 | using clock = ceph::coarse_mono_clock; | |
87 | using time = ceph::coarse_mono_time; | |
88 | ||
9f95a23c TL |
89 | enum class RecallFlags : uint64_t { |
90 | NONE = 0, | |
91 | STEADY = (1<<0), | |
92 | ENFORCE_MAX = (1<<1), | |
93 | TRIM = (1<<2), | |
94 | ENFORCE_LIVENESS = (1<<3), | |
95 | }; | |
7c673cae FG |
96 | explicit Server(MDSRank *m); |
97 | ~Server() { | |
98 | g_ceph_context->get_perfcounters_collection()->remove(logger); | |
99 | delete logger; | |
100 | delete reconnect_done; | |
101 | } | |
102 | ||
103 | void create_logger(); | |
104 | ||
105 | // message handler | |
9f95a23c | 106 | void dispatch(const cref_t<Message> &m); |
7c673cae FG |
107 | |
108 | void handle_osd_map(); | |
109 | ||
110 | // -- sessions and recovery -- | |
7c673cae FG |
111 | bool waiting_for_reconnect(client_t c) const; |
112 | void dump_reconnect_status(Formatter *f) const; | |
113 | ||
a8e16298 TL |
114 | time last_recalled() const { |
115 | return last_recall_state; | |
116 | } | |
11fdf7f2 | 117 | |
9f95a23c | 118 | void handle_client_session(const cref_t<MClientSession> &m); |
7c673cae | 119 | void _session_logged(Session *session, uint64_t state_seq, |
9f95a23c TL |
120 | bool open, version_t pv, const interval_set<inodeno_t>& inos,version_t piv, |
121 | const interval_set<inodeno_t>& purge_inos, LogSegment *ls); | |
7c673cae | 122 | version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm, |
11fdf7f2 | 123 | map<client_t,client_metadata_t>& cmm, |
28e407b8 AA |
124 | map<client_t,pair<Session*,uint64_t> >& smap); |
125 | void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap, | |
7c673cae FG |
126 | bool dec_import=true); |
127 | void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather); | |
128 | void finish_flush_session(Session *session, version_t seq); | |
129 | void terminate_sessions(); | |
130 | void find_idle_sessions(); | |
9f95a23c | 131 | void kill_session(Session *session, Context *on_safe, bool need_purge_inos = false); |
31f18b77 | 132 | size_t apply_blacklist(const std::set<entity_addr_t> &blacklist); |
9f95a23c | 133 | void journal_close_session(Session *session, int state, Context *on_safe, bool need_purge_inos = false); |
11fdf7f2 | 134 | |
11fdf7f2 TL |
135 | size_t get_num_pending_reclaim() const { return client_reclaim_gather.size(); } |
136 | Session *find_session_by_uuid(std::string_view uuid); | |
9f95a23c TL |
137 | void reclaim_session(Session *session, const cref_t<MClientReclaim> &m); |
138 | void finish_reclaim_session(Session *session, const ref_t<MClientReclaimReply> &reply=nullptr); | |
139 | void handle_client_reclaim(const cref_t<MClientReclaim> &m); | |
11fdf7f2 TL |
140 | |
141 | void reconnect_clients(MDSContext *reconnect_done_); | |
9f95a23c | 142 | void handle_client_reconnect(const cref_t<MClientReconnect> &m); |
11fdf7f2 TL |
143 | void infer_supported_features(Session *session, client_metadata_t& client_metadata); |
144 | void update_required_client_features(); | |
145 | ||
7c673cae FG |
146 | //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo); |
147 | void reconnect_gather_finish(); | |
148 | void reconnect_tick(); | |
149 | void recover_filelocks(CInode *in, bufferlist locks, int64_t client); | |
150 | ||
92f5a8d4 | 151 | std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE); |
7c673cae FG |
152 | void force_clients_readonly(); |
153 | ||
154 | // -- requests -- | |
9f95a23c | 155 | void handle_client_request(const cref_t<MClientRequest> &m); |
7c673cae FG |
156 | |
157 | void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn, | |
158 | LogEvent *le, MDSLogContextBase *fin); | |
159 | void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin, | |
11fdf7f2 | 160 | MDRequestRef& mdr, std::string_view event); |
7c673cae | 161 | void dispatch_client_request(MDRequestRef& mdr); |
9f95a23c | 162 | void perf_gather_op_latency(const cref_t<MClientRequest> &req, utime_t lat); |
7c673cae FG |
163 | void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn); |
164 | void respond_to_request(MDRequestRef& mdr, int r = 0); | |
9f95a23c | 165 | void set_trace_dist(const ref_t<MClientReply> &reply, CInode *in, CDentry *dn, |
7c673cae FG |
166 | MDRequestRef& mdr); |
167 | ||
9f95a23c TL |
168 | void handle_slave_request(const cref_t<MMDSSlaveRequest> &m); |
169 | void handle_slave_request_reply(const cref_t<MMDSSlaveRequest> &m); | |
7c673cae FG |
170 | void dispatch_slave_request(MDRequestRef& mdr); |
171 | void handle_slave_auth_pin(MDRequestRef& mdr); | |
9f95a23c | 172 | void handle_slave_auth_pin_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &ack); |
7c673cae FG |
173 | |
174 | // some helpers | |
175 | bool check_fragment_space(MDRequestRef& mdr, CDir *in); | |
176 | bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask); | |
177 | bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid); | |
7c673cae FG |
178 | CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in); |
179 | CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode, | |
180 | file_layout_t *layout=NULL); | |
181 | void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob); | |
182 | void apply_allocated_inos(MDRequestRef& mdr, Session *session); | |
183 | ||
9f95a23c TL |
184 | CInode* rdlock_path_pin_ref(MDRequestRef& mdr, bool want_auth, |
185 | bool no_want_auth=false); | |
186 | CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, bool create, | |
187 | bool okexist=false, bool want_layout=false); | |
188 | std::pair<CDentry*, CDentry*> | |
189 | rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn); | |
7c673cae FG |
190 | |
191 | CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr); | |
192 | ||
7c673cae FG |
193 | // requests on existing inodes. |
194 | void handle_client_getattr(MDRequestRef& mdr, bool is_lookup); | |
195 | void handle_client_lookup_ino(MDRequestRef& mdr, | |
196 | bool want_parent, bool want_dentry); | |
11fdf7f2 | 197 | void _lookup_snap_ino(MDRequestRef& mdr); |
7c673cae FG |
198 | void _lookup_ino_2(MDRequestRef& mdr, int r); |
199 | void handle_client_readdir(MDRequestRef& mdr); | |
200 | void handle_client_file_setlock(MDRequestRef& mdr); | |
201 | void handle_client_file_readlock(MDRequestRef& mdr); | |
202 | ||
9f95a23c TL |
203 | bool xlock_policylock(MDRequestRef& mdr, CInode *in, |
204 | bool want_layout=false, bool xlock_snaplock=false); | |
205 | CInode* try_get_auth_inode(MDRequestRef& mdr, inodeno_t ino); | |
7c673cae FG |
206 | void handle_client_setattr(MDRequestRef& mdr); |
207 | void handle_client_setlayout(MDRequestRef& mdr); | |
208 | void handle_client_setdirlayout(MDRequestRef& mdr); | |
209 | ||
11fdf7f2 TL |
210 | int parse_quota_vxattr(string name, string value, quota_info_t *quota); |
211 | void create_quota_realm(CInode *in); | |
7c673cae FG |
212 | int parse_layout_vxattr(string name, string value, const OSDMap& osdmap, |
213 | file_layout_t *layout, bool validate=true); | |
7c673cae FG |
214 | int check_layout_vxattr(MDRequestRef& mdr, |
215 | string name, | |
216 | string value, | |
217 | file_layout_t *layout); | |
9f95a23c TL |
218 | void handle_set_vxattr(MDRequestRef& mdr, CInode *cur); |
219 | void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur); | |
7c673cae FG |
220 | void handle_client_setxattr(MDRequestRef& mdr); |
221 | void handle_client_removexattr(MDRequestRef& mdr); | |
222 | ||
223 | void handle_client_fsync(MDRequestRef& mdr); | |
224 | ||
225 | // open | |
226 | void handle_client_open(MDRequestRef& mdr); | |
227 | void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant. | |
228 | void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant. | |
229 | ||
230 | // namespace changes | |
231 | void handle_client_mknod(MDRequestRef& mdr); | |
232 | void handle_client_mkdir(MDRequestRef& mdr); | |
233 | void handle_client_symlink(MDRequestRef& mdr); | |
234 | ||
235 | // link | |
236 | void handle_client_link(MDRequestRef& mdr); | |
adb31ebb | 237 | void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti, SnapRealm *target_realm); |
11fdf7f2 TL |
238 | void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti, |
239 | version_t, version_t, bool); | |
7c673cae FG |
240 | |
241 | void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti); | |
242 | void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti, | |
243 | version_t); | |
244 | ||
245 | void handle_slave_link_prep(MDRequestRef& mdr); | |
11fdf7f2 | 246 | void _logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm); |
7c673cae FG |
247 | void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti); |
248 | void _committed_slave(MDRequestRef& mdr); // use for rename, too | |
9f95a23c | 249 | void handle_slave_link_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m); |
7c673cae | 250 | void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); |
11fdf7f2 | 251 | void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr, |
9f95a23c | 252 | map<client_t,ref_t<MClientSnap>>& split); |
7c673cae FG |
253 | |
254 | // unlink | |
255 | void handle_client_unlink(MDRequestRef& mdr); | |
256 | bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri); | |
257 | bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri); | |
258 | void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn); | |
259 | void _unlink_local_finish(MDRequestRef& mdr, | |
260 | CDentry *dn, CDentry *straydn, | |
261 | version_t); | |
262 | bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn); | |
263 | void handle_slave_rmdir_prep(MDRequestRef& mdr); | |
264 | void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn); | |
31f18b77 | 265 | void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn); |
9f95a23c | 266 | void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &ack); |
7c673cae FG |
267 | void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr); |
268 | void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn); | |
269 | ||
270 | // rename | |
271 | void handle_client_rename(MDRequestRef& mdr); | |
272 | void _rename_finish(MDRequestRef& mdr, | |
273 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
274 | ||
275 | void handle_client_lssnap(MDRequestRef& mdr); | |
276 | void handle_client_mksnap(MDRequestRef& mdr); | |
277 | void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info); | |
278 | void handle_client_rmsnap(MDRequestRef& mdr); | |
279 | void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
280 | void handle_client_renamesnap(MDRequestRef& mdr); | |
281 | void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid); | |
282 | ||
7c673cae FG |
283 | // helpers |
284 | bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse, | |
285 | vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn); | |
286 | version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl); | |
287 | bool _need_force_journal(CInode *diri, bool empty); | |
288 | void _rename_prepare(MDRequestRef& mdr, | |
289 | EMetaBlob *metablob, bufferlist *client_map_bl, | |
290 | CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
291 | /* set not_journaling=true if you're going to discard the results -- | |
292 | * this bypasses the asserts to make sure we're journaling the right | |
293 | * things on the right nodes */ | |
294 | void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
295 | ||
296 | // slaving | |
297 | void handle_slave_rename_prep(MDRequestRef& mdr); | |
9f95a23c TL |
298 | void handle_slave_rename_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m); |
299 | void handle_slave_rename_notify_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m); | |
7c673cae FG |
300 | void _slave_rename_sessions_flushed(MDRequestRef& mdr); |
301 | void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
302 | void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn); | |
303 | void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false); | |
304 | void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv, | |
9f95a23c | 305 | CDentry *destdn, CDentry *staydn, map<client_t,ref_t<MClientSnap>> splits[2], |
11fdf7f2 | 306 | bool finish_mdr); |
7c673cae | 307 | |
91327a77 | 308 | void evict_cap_revoke_non_responders(); |
92f5a8d4 | 309 | void handle_conf_change(const std::set<std::string>& changed); |
91327a77 | 310 | |
9f95a23c TL |
311 | bool terminating_sessions = false; |
312 | ||
313 | set<client_t> client_reclaim_gather; | |
314 | ||
7c673cae | 315 | private: |
9f95a23c TL |
316 | friend class MDSContinuation; |
317 | friend class ServerContext; | |
318 | friend class ServerLogContext; | |
319 | friend class Batch_Getattr_Lookup; | |
320 | ||
321 | void reply_client_request(MDRequestRef& mdr, const ref_t<MClientReply> &reply); | |
f91f0fd5 | 322 | void flush_session(Session *session, MDSGatherBuilder& gather); |
9f95a23c TL |
323 | |
324 | MDSRank *mds; | |
325 | MDCache *mdcache; | |
326 | MDLog *mdlog; | |
327 | PerfCounters *logger = nullptr; | |
328 | ||
329 | // OSDMap full status, used to generate ENOSPC on some operations | |
330 | bool is_full = false; | |
331 | ||
332 | // State for while in reconnect | |
333 | MDSContext *reconnect_done = nullptr; | |
334 | int failed_reconnects = 0; | |
335 | bool reconnect_evicting = false; // true if I am waiting for evictions to complete | |
336 | // before proceeding to reconnect_gather_finish | |
337 | time reconnect_start = clock::zero(); | |
338 | time reconnect_last_seen = clock::zero(); | |
339 | set<client_t> client_reconnect_gather; // clients i need a reconnect msg from. | |
340 | ||
341 | feature_bitset_t supported_features; | |
342 | feature_bitset_t required_client_features; | |
343 | ||
f91f0fd5 | 344 | bool forward_all_requests_to_auth = false; |
9f95a23c TL |
345 | bool replay_unsafe_with_closed_session = false; |
346 | double cap_revoke_eviction_timeout = 0; | |
347 | uint64_t max_snaps_per_dir = 100; | |
348 | unsigned delegate_inos_pct = 0; | |
a8e16298 TL |
349 | |
350 | DecayCounter recall_throttle; | |
351 | time last_recall_state; | |
adb31ebb TL |
352 | |
353 | // Cache cap acquisition throttle configs | |
354 | uint64_t max_caps_per_client; | |
355 | uint64_t cap_acquisition_throttle; | |
356 | double max_caps_throttle_ratio; | |
357 | double caps_throttle_retry_request_timeout; | |
7c673cae FG |
358 | }; |
359 | ||
92f5a8d4 TL |
360 | static inline constexpr auto operator|(Server::RecallFlags a, Server::RecallFlags b) { |
361 | using T = std::underlying_type<Server::RecallFlags>::type; | |
362 | return static_cast<Server::RecallFlags>(static_cast<T>(a) | static_cast<T>(b)); | |
363 | } | |
364 | static inline constexpr auto operator&(Server::RecallFlags a, Server::RecallFlags b) { | |
365 | using T = std::underlying_type<Server::RecallFlags>::type; | |
366 | return static_cast<Server::RecallFlags>(static_cast<T>(a) & static_cast<T>(b)); | |
367 | } | |
368 | static inline std::ostream& operator<<(std::ostream& os, const Server::RecallFlags& f) { | |
369 | using T = std::underlying_type<Server::RecallFlags>::type; | |
370 | return os << "0x" << std::hex << static_cast<T>(f) << std::dec; | |
371 | } | |
372 | static inline constexpr bool operator!(const Server::RecallFlags& f) { | |
373 | using T = std::underlying_type<Server::RecallFlags>::type; | |
374 | return static_cast<T>(f) == static_cast<T>(0); | |
375 | } | |
7c673cae | 376 | #endif |