]> git.proxmox.com Git - ceph.git/blob - ceph/src/mds/Server.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / mds / Server.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_MDS_SERVER_H
16 #define CEPH_MDS_SERVER_H
17
18 #include <string_view>
19
20 #include <common/DecayCounter.h>
21
22 #include "messages/MClientReconnect.h"
23 #include "messages/MClientReply.h"
24 #include "messages/MClientRequest.h"
25 #include "messages/MClientSession.h"
26 #include "messages/MClientSnap.h"
27 #include "messages/MClientReclaim.h"
28 #include "messages/MClientReclaimReply.h"
29 #include "messages/MLock.h"
30
31 #include "MDSRank.h"
32 #include "Mutation.h"
33 #include "MDSContext.h"
34
35 class OSDMap;
36 class PerfCounters;
37 class LogEvent;
38 class EMetaBlob;
39 class EUpdate;
40 class MDLog;
41 struct SnapInfo;
42
43 enum {
44 l_mdss_first = 1000,
45 l_mdss_dispatch_client_request,
46 l_mdss_dispatch_slave_request,
47 l_mdss_handle_client_request,
48 l_mdss_handle_client_session,
49 l_mdss_handle_slave_request,
50 l_mdss_req_create_latency,
51 l_mdss_req_getattr_latency,
52 l_mdss_req_getfilelock_latency,
53 l_mdss_req_link_latency,
54 l_mdss_req_lookup_latency,
55 l_mdss_req_lookuphash_latency,
56 l_mdss_req_lookupino_latency,
57 l_mdss_req_lookupname_latency,
58 l_mdss_req_lookupparent_latency,
59 l_mdss_req_lookupsnap_latency,
60 l_mdss_req_lssnap_latency,
61 l_mdss_req_mkdir_latency,
62 l_mdss_req_mknod_latency,
63 l_mdss_req_mksnap_latency,
64 l_mdss_req_open_latency,
65 l_mdss_req_readdir_latency,
66 l_mdss_req_rename_latency,
67 l_mdss_req_renamesnap_latency,
68 l_mdss_req_rmdir_latency,
69 l_mdss_req_rmsnap_latency,
70 l_mdss_req_rmxattr_latency,
71 l_mdss_req_setattr_latency,
72 l_mdss_req_setdirlayout_latency,
73 l_mdss_req_setfilelock_latency,
74 l_mdss_req_setlayout_latency,
75 l_mdss_req_setxattr_latency,
76 l_mdss_req_symlink_latency,
77 l_mdss_req_unlink_latency,
78 l_mdss_cap_revoke_eviction,
79 l_mdss_last,
80 };
81
82 class Server {
83 public:
84 using clock = ceph::coarse_mono_clock;
85 using time = ceph::coarse_mono_time;
86
87 private:
88 MDSRank *mds;
89 MDCache *mdcache;
90 MDLog *mdlog;
91 PerfCounters *logger;
92
93 // OSDMap full status, used to generate ENOSPC on some operations
94 bool is_full;
95
96 // State for while in reconnect
97 MDSContext *reconnect_done;
98 int failed_reconnects;
99 bool reconnect_evicting; // true if I am waiting for evictions to complete
100 // before proceeding to reconnect_gather_finish
101 time reconnect_start = clock::zero();
102 time reconnect_last_seen = clock::zero();
103 set<client_t> client_reconnect_gather; // clients i need a reconnect msg from.
104
105 feature_bitset_t supported_features;
106 feature_bitset_t required_client_features;
107
108 double cap_revoke_eviction_timeout = 0;
109
110 friend class MDSContinuation;
111 friend class ServerContext;
112 friend class ServerLogContext;
113
114 public:
115 bool terminating_sessions;
116
117 explicit Server(MDSRank *m);
118 ~Server() {
119 g_ceph_context->get_perfcounters_collection()->remove(logger);
120 delete logger;
121 delete reconnect_done;
122 }
123
124 void create_logger();
125
126 // message handler
127 void dispatch(const Message::const_ref &m);
128
129 void handle_osd_map();
130
131 // -- sessions and recovery --
132 bool waiting_for_reconnect(client_t c) const;
133 void dump_reconnect_status(Formatter *f) const;
134
135 time last_recalled() const {
136 return last_recall_state;
137 }
138
139 void handle_client_session(const MClientSession::const_ref &m);
140 void _session_logged(Session *session, uint64_t state_seq,
141 bool open, version_t pv, interval_set<inodeno_t>& inos,version_t piv);
142 version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm,
143 map<client_t,client_metadata_t>& cmm,
144 map<client_t,pair<Session*,uint64_t> >& smap);
145 void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap,
146 bool dec_import=true);
147 void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather);
148 void finish_flush_session(Session *session, version_t seq);
149 void terminate_sessions();
150 void find_idle_sessions();
151 void kill_session(Session *session, Context *on_safe);
152 size_t apply_blacklist(const std::set<entity_addr_t> &blacklist);
153 void journal_close_session(Session *session, int state, Context *on_safe);
154
155 set<client_t> client_reclaim_gather;
156 size_t get_num_pending_reclaim() const { return client_reclaim_gather.size(); }
157 Session *find_session_by_uuid(std::string_view uuid);
158 void reclaim_session(Session *session, const MClientReclaim::const_ref &m);
159 void finish_reclaim_session(Session *session, const MClientReclaimReply::ref &reply=nullptr);
160 void handle_client_reclaim(const MClientReclaim::const_ref &m);
161
162 void reconnect_clients(MDSContext *reconnect_done_);
163 void handle_client_reconnect(const MClientReconnect::const_ref &m);
164 void infer_supported_features(Session *session, client_metadata_t& client_metadata);
165 void update_required_client_features();
166
167 //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo);
168 void reconnect_gather_finish();
169 void reconnect_tick();
170 void recover_filelocks(CInode *in, bufferlist locks, int64_t client);
171
172 enum RecallFlags {
173 NONE = 0,
174 STEADY = (1<<0),
175 ENFORCE_MAX = (1<<1),
176 };
177 std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, enum RecallFlags=RecallFlags::NONE);
178 void force_clients_readonly();
179
180 // -- requests --
181 void handle_client_request(const MClientRequest::const_ref &m);
182
183 void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn,
184 LogEvent *le, MDSLogContextBase *fin);
185 void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin,
186 MDRequestRef& mdr, std::string_view event);
187 void dispatch_client_request(MDRequestRef& mdr);
188 void perf_gather_op_latency(const MClientRequest::const_ref &req, utime_t lat);
189 void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn);
190 void respond_to_request(MDRequestRef& mdr, int r = 0);
191 void set_trace_dist(Session *session, const MClientReply::ref &reply, CInode *in, CDentry *dn,
192 snapid_t snapid,
193 int num_dentries_wanted,
194 MDRequestRef& mdr);
195
196
197 void handle_slave_request(const MMDSSlaveRequest::const_ref &m);
198 void handle_slave_request_reply(const MMDSSlaveRequest::const_ref &m);
199 void dispatch_slave_request(MDRequestRef& mdr);
200 void handle_slave_auth_pin(MDRequestRef& mdr);
201 void handle_slave_auth_pin_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &ack);
202
203 // some helpers
204 bool check_fragment_space(MDRequestRef& mdr, CDir *in);
205 bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask);
206 bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid);
207 CDir *validate_dentry_dir(MDRequestRef& mdr, CInode *diri, std::string_view dname);
208 CDir *traverse_to_auth_dir(MDRequestRef& mdr, vector<CDentry*> &trace, filepath refpath);
209 CDentry *prepare_null_dentry(MDRequestRef& mdr, CDir *dir, std::string_view dname, bool okexist=false);
210 CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in);
211 CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode,
212 file_layout_t *layout=NULL);
213 void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
214 void apply_allocated_inos(MDRequestRef& mdr, Session *session);
215
216 CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov,
217 bool want_auth, bool no_want_auth=false,
218 file_layout_t **layout=nullptr,
219 bool no_lookup=false);
220 CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n,
221 MutationImpl::LockOpVec& lov,
222 bool okexist, bool mustexist, bool alwaysxlock,
223 file_layout_t **layout=nullptr);
224
225 CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr);
226
227
228 // requests on existing inodes.
229 void handle_client_getattr(MDRequestRef& mdr, bool is_lookup);
230 void handle_client_lookup_ino(MDRequestRef& mdr,
231 bool want_parent, bool want_dentry);
232 void _lookup_snap_ino(MDRequestRef& mdr);
233 void _lookup_ino_2(MDRequestRef& mdr, int r);
234 void handle_client_readdir(MDRequestRef& mdr);
235 void handle_client_file_setlock(MDRequestRef& mdr);
236 void handle_client_file_readlock(MDRequestRef& mdr);
237
238 void handle_client_setattr(MDRequestRef& mdr);
239 void handle_client_setlayout(MDRequestRef& mdr);
240 void handle_client_setdirlayout(MDRequestRef& mdr);
241
242 int parse_quota_vxattr(string name, string value, quota_info_t *quota);
243 void create_quota_realm(CInode *in);
244 int parse_layout_vxattr(string name, string value, const OSDMap& osdmap,
245 file_layout_t *layout, bool validate=true);
246 int check_layout_vxattr(MDRequestRef& mdr,
247 string name,
248 string value,
249 file_layout_t *layout);
250 void handle_set_vxattr(MDRequestRef& mdr, CInode *cur,
251 file_layout_t *dir_layout,
252 MutationImpl::LockOpVec& lov);
253 void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur,
254 file_layout_t *dir_layout,
255 MutationImpl::LockOpVec& lov);
256 void handle_client_setxattr(MDRequestRef& mdr);
257 void handle_client_removexattr(MDRequestRef& mdr);
258
259 void handle_client_fsync(MDRequestRef& mdr);
260
261 // open
262 void handle_client_open(MDRequestRef& mdr);
263 void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant.
264 void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant.
265
266 // namespace changes
267 void handle_client_mknod(MDRequestRef& mdr);
268 void handle_client_mkdir(MDRequestRef& mdr);
269 void handle_client_symlink(MDRequestRef& mdr);
270
271 // link
272 void handle_client_link(MDRequestRef& mdr);
273 void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti);
274 void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti,
275 version_t, version_t, bool);
276
277 void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti);
278 void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti,
279 version_t);
280
281 void handle_slave_link_prep(MDRequestRef& mdr);
282 void _logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm);
283 void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti);
284 void _committed_slave(MDRequestRef& mdr); // use for rename, too
285 void handle_slave_link_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m);
286 void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
287 void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr,
288 map<client_t,MClientSnap::ref>& split);
289
290 // unlink
291 void handle_client_unlink(MDRequestRef& mdr);
292 bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri);
293 bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri);
294 void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn);
295 void _unlink_local_finish(MDRequestRef& mdr,
296 CDentry *dn, CDentry *straydn,
297 version_t);
298 bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn);
299 void handle_slave_rmdir_prep(MDRequestRef& mdr);
300 void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn);
301 void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn);
302 void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &ack);
303 void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
304 void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn);
305
306 // rename
307 void handle_client_rename(MDRequestRef& mdr);
308 void _rename_finish(MDRequestRef& mdr,
309 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
310
311 void handle_client_lssnap(MDRequestRef& mdr);
312 void handle_client_mksnap(MDRequestRef& mdr);
313 void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info);
314 void handle_client_rmsnap(MDRequestRef& mdr);
315 void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
316 void handle_client_renamesnap(MDRequestRef& mdr);
317 void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
318
319
320 // helpers
321 bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse,
322 vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn);
323 version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl);
324 bool _need_force_journal(CInode *diri, bool empty);
325 void _rename_prepare(MDRequestRef& mdr,
326 EMetaBlob *metablob, bufferlist *client_map_bl,
327 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
328 /* set not_journaling=true if you're going to discard the results --
329 * this bypasses the asserts to make sure we're journaling the right
330 * things on the right nodes */
331 void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
332
333 // slaving
334 void handle_slave_rename_prep(MDRequestRef& mdr);
335 void handle_slave_rename_prep_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m);
336 void handle_slave_rename_notify_ack(MDRequestRef& mdr, const MMDSSlaveRequest::const_ref &m);
337 void _slave_rename_sessions_flushed(MDRequestRef& mdr);
338 void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
339 void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
340 void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false);
341 void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv,
342 CDentry *destdn, CDentry *staydn, map<client_t,MClientSnap::ref> splits[2],
343 bool finish_mdr);
344
345 void evict_cap_revoke_non_responders();
346 void handle_conf_change(const ConfigProxy& conf,
347 const std::set <std::string> &changed);
348
349 private:
350 void reply_client_request(MDRequestRef& mdr, const MClientReply::ref &reply);
351 void flush_session(Session *session, MDSGatherBuilder *gather);
352
353 DecayCounter recall_throttle;
354 time last_recall_state;
355 };
356
357 #endif