]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/Server.h
import ceph 12.2.12
[ceph.git] / ceph / src / mds / Server.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MDS_SERVER_H
16#define CEPH_MDS_SERVER_H
17
94b18763
FG
18#include <boost/utility/string_view.hpp>
19
a8e16298
TL
20#include <common/DecayCounter.h>
21
7c673cae
FG
22#include "MDSRank.h"
23#include "Mutation.h"
24
25class OSDMap;
26class PerfCounters;
27class LogEvent;
28class EMetaBlob;
29class EUpdate;
30class MMDSSlaveRequest;
31struct SnapInfo;
32class MClientRequest;
33class MClientReply;
34class MDLog;
35
36enum {
37 l_mdss_first = 1000,
d2e6a577
FG
38 l_mdss_dispatch_client_request,
39 l_mdss_dispatch_slave_request,
7c673cae 40 l_mdss_handle_client_request,
7c673cae 41 l_mdss_handle_client_session,
d2e6a577 42 l_mdss_handle_slave_request,
91327a77
AA
43 l_mdss_req_create_latency,
44 l_mdss_req_getattr_latency,
45 l_mdss_req_getfilelock_latency,
46 l_mdss_req_link_latency,
47 l_mdss_req_lookup_latency,
48 l_mdss_req_lookuphash_latency,
49 l_mdss_req_lookupino_latency,
50 l_mdss_req_lookupname_latency,
51 l_mdss_req_lookupparent_latency,
52 l_mdss_req_lookupsnap_latency,
53 l_mdss_req_lssnap_latency,
54 l_mdss_req_mkdir_latency,
55 l_mdss_req_mknod_latency,
56 l_mdss_req_mksnap_latency,
57 l_mdss_req_open_latency,
58 l_mdss_req_readdir_latency,
59 l_mdss_req_rename_latency,
60 l_mdss_req_renamesnap_latency,
61 l_mdss_req_rmdir_latency,
62 l_mdss_req_rmsnap_latency,
63 l_mdss_req_rmxattr_latency,
64 l_mdss_req_setattr_latency,
65 l_mdss_req_setdirlayout_latency,
66 l_mdss_req_setfilelock_latency,
67 l_mdss_req_setlayout_latency,
68 l_mdss_req_setxattr_latency,
69 l_mdss_req_symlink_latency,
70 l_mdss_req_unlink_latency,
71 l_mdss_cap_revoke_eviction,
7c673cae
FG
72 l_mdss_last,
73};
74
75class Server {
91327a77
AA
76public:
77 using clock = ceph::coarse_mono_clock;
78 using time = ceph::coarse_mono_time;
79
7c673cae
FG
80private:
81 MDSRank *mds;
82 MDCache *mdcache;
83 MDLog *mdlog;
84 PerfCounters *logger;
85
86 // OSDMap full status, used to generate ENOSPC on some operations
87 bool is_full;
88
89 // State for while in reconnect
90 MDSInternalContext *reconnect_done;
91 int failed_reconnects;
31f18b77
FG
92 bool reconnect_evicting; // true if I am waiting for evictions to complete
93 // before proceeding to reconnect_gather_finish
f64942e4
AA
94 time reconnect_start = time::min();
95 time reconnect_last_seen = time::min();
96 set<client_t> client_reconnect_gather; // clients i need a reconnect msg from.
7c673cae 97
91327a77
AA
98 double cap_revoke_eviction_timeout = 0;
99
7c673cae
FG
100 friend class MDSContinuation;
101 friend class ServerContext;
102 friend class ServerLogContext;
103
104public:
105 bool terminating_sessions;
106
107 explicit Server(MDSRank *m);
108 ~Server() {
109 g_ceph_context->get_perfcounters_collection()->remove(logger);
110 delete logger;
111 delete reconnect_done;
112 }
113
114 void create_logger();
115
116 // message handler
117 void dispatch(Message *m);
118
119 void handle_osd_map();
120
121 // -- sessions and recovery --
7c673cae
FG
122 bool waiting_for_reconnect(client_t c) const;
123 void dump_reconnect_status(Formatter *f) const;
124
7c673cae 125 void handle_client_session(class MClientSession *m);
a8e16298
TL
126 time last_recalled() const {
127 return last_recall_state;
128 }
7c673cae
FG
129 void _session_logged(Session *session, uint64_t state_seq,
130 bool open, version_t pv, interval_set<inodeno_t>& inos,version_t piv);
131 version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm,
28e407b8
AA
132 map<client_t,pair<Session*,uint64_t> >& smap);
133 void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap,
7c673cae
FG
134 bool dec_import=true);
135 void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather);
136 void finish_flush_session(Session *session, version_t seq);
137 void terminate_sessions();
138 void find_idle_sessions();
139 void kill_session(Session *session, Context *on_safe);
31f18b77 140 size_t apply_blacklist(const std::set<entity_addr_t> &blacklist);
7c673cae
FG
141 void journal_close_session(Session *session, int state, Context *on_safe);
142 void reconnect_clients(MDSInternalContext *reconnect_done_);
143 void handle_client_reconnect(class MClientReconnect *m);
144 //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo);
145 void reconnect_gather_finish();
146 void reconnect_tick();
147 void recover_filelocks(CInode *in, bufferlist locks, int64_t client);
148
a8e16298
TL
149 enum RecallFlags {
150 NONE = 0,
151 STEADY = (1<<0),
152 ENFORCE_MAX = (1<<1),
153 };
154 std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, enum RecallFlags=RecallFlags::NONE);
7c673cae
FG
155 void force_clients_readonly();
156
157 // -- requests --
158 void handle_client_request(MClientRequest *m);
159
160 void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn,
161 LogEvent *le, MDSLogContextBase *fin);
162 void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin,
163 MDRequestRef& mdr, const char *evt);
164 void dispatch_client_request(MDRequestRef& mdr);
91327a77 165 void perf_gather_op_latency(const MClientRequest* req, utime_t lat);
7c673cae
FG
166 void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn);
167 void respond_to_request(MDRequestRef& mdr, int r = 0);
168 void set_trace_dist(Session *session, MClientReply *reply, CInode *in, CDentry *dn,
169 snapid_t snapid,
170 int num_dentries_wanted,
171 MDRequestRef& mdr);
172
173 void encode_empty_dirstat(bufferlist& bl);
174 void encode_infinite_lease(bufferlist& bl);
175 void encode_null_lease(bufferlist& bl);
176
177 void handle_slave_request(MMDSSlaveRequest *m);
178 void handle_slave_request_reply(MMDSSlaveRequest *m);
179 void dispatch_slave_request(MDRequestRef& mdr);
180 void handle_slave_auth_pin(MDRequestRef& mdr);
181 void handle_slave_auth_pin_ack(MDRequestRef& mdr, MMDSSlaveRequest *ack);
182
183 // some helpers
184 bool check_fragment_space(MDRequestRef& mdr, CDir *in);
185 bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask);
186 bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid);
94b18763 187 CDir *validate_dentry_dir(MDRequestRef& mdr, CInode *diri, boost::string_view dname);
7c673cae 188 CDir *traverse_to_auth_dir(MDRequestRef& mdr, vector<CDentry*> &trace, filepath refpath);
94b18763 189 CDentry *prepare_null_dentry(MDRequestRef& mdr, CDir *dir, boost::string_view dname, bool okexist=false);
7c673cae
FG
190 CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in);
191 CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode,
192 file_layout_t *layout=NULL);
193 void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
194 void apply_allocated_inos(MDRequestRef& mdr, Session *session);
195
196 CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, set<SimpleLock*>& rdlocks, bool want_auth,
197 bool no_want_auth=false,
198 file_layout_t **layout=NULL,
199 bool no_lookup=false);
200 CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n,
201 set<SimpleLock*>& rdlocks,
202 set<SimpleLock*>& wrlocks,
203 set<SimpleLock*>& xlocks, bool okexist,
204 bool mustexist, bool alwaysxlock,
205 file_layout_t **layout=NULL);
206
207 CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr);
208
209
210 // requests on existing inodes.
211 void handle_client_getattr(MDRequestRef& mdr, bool is_lookup);
212 void handle_client_lookup_ino(MDRequestRef& mdr,
213 bool want_parent, bool want_dentry);
214 void _lookup_ino_2(MDRequestRef& mdr, int r);
215 void handle_client_readdir(MDRequestRef& mdr);
216 void handle_client_file_setlock(MDRequestRef& mdr);
217 void handle_client_file_readlock(MDRequestRef& mdr);
218
219 void handle_client_setattr(MDRequestRef& mdr);
220 void handle_client_setlayout(MDRequestRef& mdr);
221 void handle_client_setdirlayout(MDRequestRef& mdr);
222
223 int parse_layout_vxattr(string name, string value, const OSDMap& osdmap,
224 file_layout_t *layout, bool validate=true);
225 int parse_quota_vxattr(string name, string value, quota_info_t *quota);
226 int check_layout_vxattr(MDRequestRef& mdr,
227 string name,
228 string value,
229 file_layout_t *layout);
230 void handle_set_vxattr(MDRequestRef& mdr, CInode *cur,
231 file_layout_t *dir_layout,
232 set<SimpleLock*> rdlocks,
233 set<SimpleLock*> wrlocks,
234 set<SimpleLock*> xlocks);
235 void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur,
236 file_layout_t *dir_layout,
237 set<SimpleLock*> rdlocks,
238 set<SimpleLock*> wrlocks,
239 set<SimpleLock*> xlocks);
240 void handle_client_setxattr(MDRequestRef& mdr);
241 void handle_client_removexattr(MDRequestRef& mdr);
242
243 void handle_client_fsync(MDRequestRef& mdr);
244
245 // open
246 void handle_client_open(MDRequestRef& mdr);
247 void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant.
248 void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant.
249
250 // namespace changes
251 void handle_client_mknod(MDRequestRef& mdr);
252 void handle_client_mkdir(MDRequestRef& mdr);
253 void handle_client_symlink(MDRequestRef& mdr);
254
255 // link
256 void handle_client_link(MDRequestRef& mdr);
257 void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti);
258 void _link_local_finish(MDRequestRef& mdr,
259 CDentry *dn, CInode *targeti,
260 version_t, version_t);
261
262 void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti);
263 void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti,
264 version_t);
265
266 void handle_slave_link_prep(MDRequestRef& mdr);
267 void _logged_slave_link(MDRequestRef& mdr, CInode *targeti);
268 void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti);
269 void _committed_slave(MDRequestRef& mdr); // use for rename, too
270 void handle_slave_link_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *m);
271 void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
272 void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr);
273
274 // unlink
275 void handle_client_unlink(MDRequestRef& mdr);
276 bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri);
277 bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri);
278 void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn);
279 void _unlink_local_finish(MDRequestRef& mdr,
280 CDentry *dn, CDentry *straydn,
281 version_t);
282 bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn);
283 void handle_slave_rmdir_prep(MDRequestRef& mdr);
284 void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn);
31f18b77 285 void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn);
7c673cae
FG
286 void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *ack);
287 void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
288 void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn);
289
290 // rename
291 void handle_client_rename(MDRequestRef& mdr);
292 void _rename_finish(MDRequestRef& mdr,
293 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
294
295 void handle_client_lssnap(MDRequestRef& mdr);
296 void handle_client_mksnap(MDRequestRef& mdr);
297 void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info);
298 void handle_client_rmsnap(MDRequestRef& mdr);
299 void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
300 void handle_client_renamesnap(MDRequestRef& mdr);
301 void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
302
303
304 // helpers
305 bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse,
306 vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn);
307 version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl);
308 bool _need_force_journal(CInode *diri, bool empty);
309 void _rename_prepare(MDRequestRef& mdr,
310 EMetaBlob *metablob, bufferlist *client_map_bl,
311 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
312 /* set not_journaling=true if you're going to discard the results --
313 * this bypasses the asserts to make sure we're journaling the right
314 * things on the right nodes */
315 void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
316
317 // slaving
318 void handle_slave_rename_prep(MDRequestRef& mdr);
319 void handle_slave_rename_prep_ack(MDRequestRef& mdr, MMDSSlaveRequest *m);
320 void handle_slave_rename_notify_ack(MDRequestRef& mdr, MMDSSlaveRequest *m);
321 void _slave_rename_sessions_flushed(MDRequestRef& mdr);
322 void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
323 void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
324 void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false);
325 void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv,
326 CDentry *destdn, CDentry *staydn, bool finish_mdr);
327
91327a77
AA
328 void evict_cap_revoke_non_responders();
329 void handle_conf_change(const struct md_config_t *,
330 const std::set <std::string> &changed);
331
7c673cae
FG
332private:
333 void reply_client_request(MDRequestRef& mdr, MClientReply *reply);
f64942e4 334 void flush_session(Session *session, MDSGatherBuilder *gather);
a8e16298
TL
335
336 DecayCounter recall_throttle;
337 time last_recall_state;
7c673cae
FG
338};
339
340#endif