]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/Server.h
Import ceph 15.2.8
[ceph.git] / ceph / src / mds / Server.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MDS_SERVER_H
16#define CEPH_MDS_SERVER_H
17
11fdf7f2 18#include <string_view>
94b18763 19
a8e16298
TL
20#include <common/DecayCounter.h>
21
9f95a23c
TL
22#include "include/common_fwd.h"
23
11fdf7f2
TL
24#include "messages/MClientReconnect.h"
25#include "messages/MClientReply.h"
26#include "messages/MClientRequest.h"
27#include "messages/MClientSession.h"
28#include "messages/MClientSnap.h"
29#include "messages/MClientReclaim.h"
30#include "messages/MClientReclaimReply.h"
31#include "messages/MLock.h"
32
7c673cae
FG
33#include "MDSRank.h"
34#include "Mutation.h"
11fdf7f2 35#include "MDSContext.h"
7c673cae
FG
36
37class OSDMap;
7c673cae
FG
38class LogEvent;
39class EMetaBlob;
40class EUpdate;
7c673cae 41class MDLog;
11fdf7f2 42struct SnapInfo;
7c673cae
FG
43
44enum {
45 l_mdss_first = 1000,
d2e6a577
FG
46 l_mdss_dispatch_client_request,
47 l_mdss_dispatch_slave_request,
7c673cae 48 l_mdss_handle_client_request,
7c673cae 49 l_mdss_handle_client_session,
d2e6a577 50 l_mdss_handle_slave_request,
91327a77
AA
51 l_mdss_req_create_latency,
52 l_mdss_req_getattr_latency,
53 l_mdss_req_getfilelock_latency,
54 l_mdss_req_link_latency,
55 l_mdss_req_lookup_latency,
56 l_mdss_req_lookuphash_latency,
57 l_mdss_req_lookupino_latency,
58 l_mdss_req_lookupname_latency,
59 l_mdss_req_lookupparent_latency,
60 l_mdss_req_lookupsnap_latency,
61 l_mdss_req_lssnap_latency,
62 l_mdss_req_mkdir_latency,
63 l_mdss_req_mknod_latency,
64 l_mdss_req_mksnap_latency,
65 l_mdss_req_open_latency,
66 l_mdss_req_readdir_latency,
67 l_mdss_req_rename_latency,
68 l_mdss_req_renamesnap_latency,
69 l_mdss_req_rmdir_latency,
70 l_mdss_req_rmsnap_latency,
71 l_mdss_req_rmxattr_latency,
72 l_mdss_req_setattr_latency,
73 l_mdss_req_setdirlayout_latency,
74 l_mdss_req_setfilelock_latency,
75 l_mdss_req_setlayout_latency,
76 l_mdss_req_setxattr_latency,
77 l_mdss_req_symlink_latency,
78 l_mdss_req_unlink_latency,
79 l_mdss_cap_revoke_eviction,
7c673cae
FG
80 l_mdss_last,
81};
82
83class Server {
91327a77
AA
84public:
85 using clock = ceph::coarse_mono_clock;
86 using time = ceph::coarse_mono_time;
87
9f95a23c
TL
88 enum class RecallFlags : uint64_t {
89 NONE = 0,
90 STEADY = (1<<0),
91 ENFORCE_MAX = (1<<1),
92 TRIM = (1<<2),
93 ENFORCE_LIVENESS = (1<<3),
94 };
7c673cae
FG
95 explicit Server(MDSRank *m);
96 ~Server() {
97 g_ceph_context->get_perfcounters_collection()->remove(logger);
98 delete logger;
99 delete reconnect_done;
100 }
101
102 void create_logger();
103
104 // message handler
9f95a23c 105 void dispatch(const cref_t<Message> &m);
7c673cae
FG
106
107 void handle_osd_map();
108
109 // -- sessions and recovery --
7c673cae
FG
110 bool waiting_for_reconnect(client_t c) const;
111 void dump_reconnect_status(Formatter *f) const;
112
a8e16298
TL
113 time last_recalled() const {
114 return last_recall_state;
115 }
11fdf7f2 116
9f95a23c 117 void handle_client_session(const cref_t<MClientSession> &m);
7c673cae 118 void _session_logged(Session *session, uint64_t state_seq,
9f95a23c
TL
119 bool open, version_t pv, const interval_set<inodeno_t>& inos,version_t piv,
120 const interval_set<inodeno_t>& purge_inos, LogSegment *ls);
7c673cae 121 version_t prepare_force_open_sessions(map<client_t,entity_inst_t> &cm,
11fdf7f2 122 map<client_t,client_metadata_t>& cmm,
28e407b8
AA
123 map<client_t,pair<Session*,uint64_t> >& smap);
124 void finish_force_open_sessions(const map<client_t,pair<Session*,uint64_t> >& smap,
7c673cae
FG
125 bool dec_import=true);
126 void flush_client_sessions(set<client_t>& client_set, MDSGatherBuilder& gather);
127 void finish_flush_session(Session *session, version_t seq);
128 void terminate_sessions();
129 void find_idle_sessions();
9f95a23c 130 void kill_session(Session *session, Context *on_safe, bool need_purge_inos = false);
31f18b77 131 size_t apply_blacklist(const std::set<entity_addr_t> &blacklist);
9f95a23c 132 void journal_close_session(Session *session, int state, Context *on_safe, bool need_purge_inos = false);
11fdf7f2 133
11fdf7f2
TL
134 size_t get_num_pending_reclaim() const { return client_reclaim_gather.size(); }
135 Session *find_session_by_uuid(std::string_view uuid);
9f95a23c
TL
136 void reclaim_session(Session *session, const cref_t<MClientReclaim> &m);
137 void finish_reclaim_session(Session *session, const ref_t<MClientReclaimReply> &reply=nullptr);
138 void handle_client_reclaim(const cref_t<MClientReclaim> &m);
11fdf7f2
TL
139
140 void reconnect_clients(MDSContext *reconnect_done_);
9f95a23c 141 void handle_client_reconnect(const cref_t<MClientReconnect> &m);
11fdf7f2
TL
142 void infer_supported_features(Session *session, client_metadata_t& client_metadata);
143 void update_required_client_features();
144
7c673cae
FG
145 //void process_reconnect_cap(CInode *in, int from, ceph_mds_cap_reconnect& capinfo);
146 void reconnect_gather_finish();
147 void reconnect_tick();
148 void recover_filelocks(CInode *in, bufferlist locks, int64_t client);
149
92f5a8d4 150 std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE);
7c673cae
FG
151 void force_clients_readonly();
152
153 // -- requests --
9f95a23c 154 void handle_client_request(const cref_t<MClientRequest> &m);
7c673cae
FG
155
156 void journal_and_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn,
157 LogEvent *le, MDSLogContextBase *fin);
158 void submit_mdlog_entry(LogEvent *le, MDSLogContextBase *fin,
11fdf7f2 159 MDRequestRef& mdr, std::string_view event);
7c673cae 160 void dispatch_client_request(MDRequestRef& mdr);
9f95a23c 161 void perf_gather_op_latency(const cref_t<MClientRequest> &req, utime_t lat);
7c673cae
FG
162 void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn);
163 void respond_to_request(MDRequestRef& mdr, int r = 0);
9f95a23c 164 void set_trace_dist(const ref_t<MClientReply> &reply, CInode *in, CDentry *dn,
7c673cae
FG
165 MDRequestRef& mdr);
166
9f95a23c
TL
167 void handle_slave_request(const cref_t<MMDSSlaveRequest> &m);
168 void handle_slave_request_reply(const cref_t<MMDSSlaveRequest> &m);
7c673cae
FG
169 void dispatch_slave_request(MDRequestRef& mdr);
170 void handle_slave_auth_pin(MDRequestRef& mdr);
9f95a23c 171 void handle_slave_auth_pin_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &ack);
7c673cae
FG
172
173 // some helpers
174 bool check_fragment_space(MDRequestRef& mdr, CDir *in);
175 bool check_access(MDRequestRef& mdr, CInode *in, unsigned mask);
176 bool _check_access(Session *session, CInode *in, unsigned mask, int caller_uid, int caller_gid, int setattr_uid, int setattr_gid);
7c673cae
FG
177 CDentry *prepare_stray_dentry(MDRequestRef& mdr, CInode *in);
178 CInode* prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino, unsigned mode,
179 file_layout_t *layout=NULL);
180 void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
181 void apply_allocated_inos(MDRequestRef& mdr, Session *session);
182
9f95a23c
TL
183 CInode* rdlock_path_pin_ref(MDRequestRef& mdr, bool want_auth,
184 bool no_want_auth=false);
185 CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, bool create,
186 bool okexist=false, bool want_layout=false);
187 std::pair<CDentry*, CDentry*>
188 rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn);
7c673cae
FG
189
190 CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr);
191
7c673cae
FG
192 // requests on existing inodes.
193 void handle_client_getattr(MDRequestRef& mdr, bool is_lookup);
194 void handle_client_lookup_ino(MDRequestRef& mdr,
195 bool want_parent, bool want_dentry);
11fdf7f2 196 void _lookup_snap_ino(MDRequestRef& mdr);
7c673cae
FG
197 void _lookup_ino_2(MDRequestRef& mdr, int r);
198 void handle_client_readdir(MDRequestRef& mdr);
199 void handle_client_file_setlock(MDRequestRef& mdr);
200 void handle_client_file_readlock(MDRequestRef& mdr);
201
9f95a23c
TL
202 bool xlock_policylock(MDRequestRef& mdr, CInode *in,
203 bool want_layout=false, bool xlock_snaplock=false);
204 CInode* try_get_auth_inode(MDRequestRef& mdr, inodeno_t ino);
7c673cae
FG
205 void handle_client_setattr(MDRequestRef& mdr);
206 void handle_client_setlayout(MDRequestRef& mdr);
207 void handle_client_setdirlayout(MDRequestRef& mdr);
208
11fdf7f2
TL
209 int parse_quota_vxattr(string name, string value, quota_info_t *quota);
210 void create_quota_realm(CInode *in);
7c673cae
FG
211 int parse_layout_vxattr(string name, string value, const OSDMap& osdmap,
212 file_layout_t *layout, bool validate=true);
7c673cae
FG
213 int check_layout_vxattr(MDRequestRef& mdr,
214 string name,
215 string value,
216 file_layout_t *layout);
9f95a23c
TL
217 void handle_set_vxattr(MDRequestRef& mdr, CInode *cur);
218 void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur);
7c673cae
FG
219 void handle_client_setxattr(MDRequestRef& mdr);
220 void handle_client_removexattr(MDRequestRef& mdr);
221
222 void handle_client_fsync(MDRequestRef& mdr);
223
224 // open
225 void handle_client_open(MDRequestRef& mdr);
226 void handle_client_openc(MDRequestRef& mdr); // O_CREAT variant.
227 void do_open_truncate(MDRequestRef& mdr, int cmode); // O_TRUNC variant.
228
229 // namespace changes
230 void handle_client_mknod(MDRequestRef& mdr);
231 void handle_client_mkdir(MDRequestRef& mdr);
232 void handle_client_symlink(MDRequestRef& mdr);
233
234 // link
235 void handle_client_link(MDRequestRef& mdr);
236 void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti);
11fdf7f2
TL
237 void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti,
238 version_t, version_t, bool);
7c673cae
FG
239
240 void _link_remote(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti);
241 void _link_remote_finish(MDRequestRef& mdr, bool inc, CDentry *dn, CInode *targeti,
242 version_t);
243
244 void handle_slave_link_prep(MDRequestRef& mdr);
11fdf7f2 245 void _logged_slave_link(MDRequestRef& mdr, CInode *targeti, bool adjust_realm);
7c673cae
FG
246 void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti);
247 void _committed_slave(MDRequestRef& mdr); // use for rename, too
9f95a23c 248 void handle_slave_link_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m);
7c673cae 249 void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
11fdf7f2 250 void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr,
9f95a23c 251 map<client_t,ref_t<MClientSnap>>& split);
7c673cae
FG
252
253 // unlink
254 void handle_client_unlink(MDRequestRef& mdr);
255 bool _dir_is_nonempty_unlocked(MDRequestRef& mdr, CInode *rmdiri);
256 bool _dir_is_nonempty(MDRequestRef& mdr, CInode *rmdiri);
257 void _unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn);
258 void _unlink_local_finish(MDRequestRef& mdr,
259 CDentry *dn, CDentry *straydn,
260 version_t);
261 bool _rmdir_prepare_witness(MDRequestRef& mdr, mds_rank_t who, vector<CDentry*>& trace, CDentry *straydn);
262 void handle_slave_rmdir_prep(MDRequestRef& mdr);
263 void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn);
31f18b77 264 void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn);
9f95a23c 265 void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &ack);
7c673cae
FG
266 void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
267 void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn);
268
269 // rename
270 void handle_client_rename(MDRequestRef& mdr);
271 void _rename_finish(MDRequestRef& mdr,
272 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
273
274 void handle_client_lssnap(MDRequestRef& mdr);
275 void handle_client_mksnap(MDRequestRef& mdr);
276 void _mksnap_finish(MDRequestRef& mdr, CInode *diri, SnapInfo &info);
277 void handle_client_rmsnap(MDRequestRef& mdr);
278 void _rmsnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
279 void handle_client_renamesnap(MDRequestRef& mdr);
280 void _renamesnap_finish(MDRequestRef& mdr, CInode *diri, snapid_t snapid);
281
7c673cae
FG
282 // helpers
283 bool _rename_prepare_witness(MDRequestRef& mdr, mds_rank_t who, set<mds_rank_t> &witnesse,
284 vector<CDentry*>& srctrace, vector<CDentry*>& dsttrace, CDentry *straydn);
285 version_t _rename_prepare_import(MDRequestRef& mdr, CDentry *srcdn, bufferlist *client_map_bl);
286 bool _need_force_journal(CInode *diri, bool empty);
287 void _rename_prepare(MDRequestRef& mdr,
288 EMetaBlob *metablob, bufferlist *client_map_bl,
289 CDentry *srcdn, CDentry *destdn, CDentry *straydn);
290 /* set not_journaling=true if you're going to discard the results --
291 * this bypasses the asserts to make sure we're journaling the right
292 * things on the right nodes */
293 void _rename_apply(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
294
295 // slaving
296 void handle_slave_rename_prep(MDRequestRef& mdr);
9f95a23c
TL
297 void handle_slave_rename_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m);
298 void handle_slave_rename_notify_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m);
7c673cae
FG
299 void _slave_rename_sessions_flushed(MDRequestRef& mdr);
300 void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
301 void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
302 void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false);
303 void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv,
9f95a23c 304 CDentry *destdn, CDentry *staydn, map<client_t,ref_t<MClientSnap>> splits[2],
11fdf7f2 305 bool finish_mdr);
7c673cae 306
91327a77 307 void evict_cap_revoke_non_responders();
92f5a8d4 308 void handle_conf_change(const std::set<std::string>& changed);
91327a77 309
9f95a23c
TL
310 bool terminating_sessions = false;
311
312 set<client_t> client_reclaim_gather;
313
7c673cae 314private:
9f95a23c
TL
315 friend class MDSContinuation;
316 friend class ServerContext;
317 friend class ServerLogContext;
318 friend class Batch_Getattr_Lookup;
319
320 void reply_client_request(MDRequestRef& mdr, const ref_t<MClientReply> &reply);
f91f0fd5 321 void flush_session(Session *session, MDSGatherBuilder& gather);
9f95a23c
TL
322
323 MDSRank *mds;
324 MDCache *mdcache;
325 MDLog *mdlog;
326 PerfCounters *logger = nullptr;
327
328 // OSDMap full status, used to generate ENOSPC on some operations
329 bool is_full = false;
330
331 // State for while in reconnect
332 MDSContext *reconnect_done = nullptr;
333 int failed_reconnects = 0;
334 bool reconnect_evicting = false; // true if I am waiting for evictions to complete
335 // before proceeding to reconnect_gather_finish
336 time reconnect_start = clock::zero();
337 time reconnect_last_seen = clock::zero();
338 set<client_t> client_reconnect_gather; // clients i need a reconnect msg from.
339
340 feature_bitset_t supported_features;
341 feature_bitset_t required_client_features;
342
f91f0fd5 343 bool forward_all_requests_to_auth = false;
9f95a23c
TL
344 bool replay_unsafe_with_closed_session = false;
345 double cap_revoke_eviction_timeout = 0;
346 uint64_t max_snaps_per_dir = 100;
347 unsigned delegate_inos_pct = 0;
a8e16298
TL
348
349 DecayCounter recall_throttle;
350 time last_recall_state;
7c673cae
FG
351};
352
92f5a8d4
TL
353static inline constexpr auto operator|(Server::RecallFlags a, Server::RecallFlags b) {
354 using T = std::underlying_type<Server::RecallFlags>::type;
355 return static_cast<Server::RecallFlags>(static_cast<T>(a) | static_cast<T>(b));
356}
357static inline constexpr auto operator&(Server::RecallFlags a, Server::RecallFlags b) {
358 using T = std::underlying_type<Server::RecallFlags>::type;
359 return static_cast<Server::RecallFlags>(static_cast<T>(a) & static_cast<T>(b));
360}
361static inline std::ostream& operator<<(std::ostream& os, const Server::RecallFlags& f) {
362 using T = std::underlying_type<Server::RecallFlags>::type;
363 return os << "0x" << std::hex << static_cast<T>(f) << std::dec;
364}
365static inline constexpr bool operator!(const Server::RecallFlags& f) {
366 using T = std::underlying_type<Server::RecallFlags>::type;
367 return static_cast<T>(f) == static_cast<T>(0);
368}
7c673cae 369#endif