]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <unistd.h> | |
16 | ||
17 | #include "include/compat.h" | |
7c673cae FG |
18 | #include "include/types.h" |
19 | #include "include/str_list.h" | |
c07f9fc5 | 20 | |
7c673cae | 21 | #include "common/Clock.h" |
c07f9fc5 FG |
22 | #include "common/HeartbeatMap.h" |
23 | #include "common/Timer.h" | |
7c673cae | 24 | #include "common/ceph_argparse.h" |
c07f9fc5 FG |
25 | #include "common/config.h" |
26 | #include "common/entity_name.h" | |
7c673cae | 27 | #include "common/errno.h" |
c07f9fc5 FG |
28 | #include "common/perf_counters.h" |
29 | #include "common/signal.h" | |
30 | #include "common/version.h" | |
31 | ||
32 | #include "global/signal_handler.h" | |
7c673cae FG |
33 | |
34 | #include "msg/Messenger.h" | |
35 | #include "mon/MonClient.h" | |
36 | ||
37 | #include "osdc/Objecter.h" | |
38 | ||
39 | #include "MDSMap.h" | |
40 | ||
41 | #include "MDSDaemon.h" | |
42 | #include "Server.h" | |
43 | #include "Locker.h" | |
44 | ||
45 | #include "SnapServer.h" | |
46 | #include "SnapClient.h" | |
47 | ||
7c673cae FG |
48 | #include "events/ESession.h" |
49 | #include "events/ESubtreeMap.h" | |
50 | ||
7c673cae FG |
51 | #include "auth/AuthAuthorizeHandler.h" |
52 | #include "auth/RotatingKeyRing.h" | |
53 | #include "auth/KeyRing.h" | |
54 | ||
7c673cae FG |
55 | #include "perfglue/cpu_profiler.h" |
56 | #include "perfglue/heap_profiler.h" | |
57 | ||
58 | #define dout_context g_ceph_context | |
59 | #define dout_subsys ceph_subsys_mds | |
60 | #undef dout_prefix | |
61 | #define dout_prefix *_dout << "mds." << name << ' ' | |
20effc67 TL |
62 | |
63 | using std::string; | |
64 | using std::vector; | |
9f95a23c | 65 | using TOPNSPC::common::cmd_getval; |
20effc67 | 66 | |
7c673cae | 67 | // cons/des |
f67539c2 TL |
68 | MDSDaemon::MDSDaemon(std::string_view n, Messenger *m, MonClient *mc, |
69 | boost::asio::io_context& ioctx) : | |
7c673cae | 70 | Dispatcher(m->cct), |
7c673cae | 71 | timer(m->cct, mds_lock), |
11fdf7f2 | 72 | gss_ktfile_client(m->cct->_conf.get_val<std::string>("gss_ktab_client_file")), |
7c673cae | 73 | beacon(m->cct, mc, n), |
7c673cae FG |
74 | name(n), |
75 | messenger(m), | |
76 | monc(mc), | |
f67539c2 | 77 | ioctx(ioctx), |
9f95a23c | 78 | mgrc(m->cct, m, &mc->monmap), |
7c673cae | 79 | log_client(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS), |
94b18763 | 80 | starttime(mono_clock::now()) |
7c673cae FG |
81 | { |
82 | orig_argc = 0; | |
83 | orig_argv = NULL; | |
84 | ||
85 | clog = log_client.create_channel(); | |
11fdf7f2 TL |
86 | if (!gss_ktfile_client.empty()) { |
87 | // Assert we can export environment variable | |
88 | /* | |
89 | The default client keytab is used, if it is present and readable, | |
90 | to automatically obtain initial credentials for GSSAPI client | |
91 | applications. The principal name of the first entry in the client | |
92 | keytab is used by default when obtaining initial credentials. | |
93 | 1. The KRB5_CLIENT_KTNAME environment variable. | |
94 | 2. The default_client_keytab_name profile variable in [libdefaults]. | |
95 | 3. The hardcoded default, DEFCKTNAME. | |
96 | */ | |
97 | const int32_t set_result(setenv("KRB5_CLIENT_KTNAME", | |
98 | gss_ktfile_client.c_str(), 1)); | |
99 | ceph_assert(set_result == 0); | |
100 | } | |
7c673cae | 101 | |
11fdf7f2 | 102 | mdsmap.reset(new MDSMap); |
7c673cae FG |
103 | } |
104 | ||
105 | MDSDaemon::~MDSDaemon() { | |
11fdf7f2 | 106 | std::lock_guard lock(mds_lock); |
7c673cae FG |
107 | |
108 | delete mds_rank; | |
109 | mds_rank = NULL; | |
7c673cae FG |
110 | } |
111 | ||
112 | class MDSSocketHook : public AdminSocketHook { | |
113 | MDSDaemon *mds; | |
114 | public: | |
115 | explicit MDSSocketHook(MDSDaemon *m) : mds(m) {} | |
9f95a23c TL |
116 | int call( |
117 | std::string_view command, | |
118 | const cmdmap_t& cmdmap, | |
39ae355f | 119 | const bufferlist&, |
9f95a23c TL |
120 | Formatter *f, |
121 | std::ostream& errss, | |
122 | ceph::buffer::list& out) override { | |
f67539c2 | 123 | ceph_abort("should go to call_async"); |
9f95a23c TL |
124 | } |
125 | void call_async( | |
126 | std::string_view command, | |
127 | const cmdmap_t& cmdmap, | |
128 | Formatter *f, | |
129 | const bufferlist& inbl, | |
130 | std::function<void(int,const std::string&,bufferlist&)> on_finish) override { | |
131 | mds->asok_command(command, cmdmap, f, inbl, on_finish); | |
7c673cae FG |
132 | } |
133 | }; | |
134 | ||
9f95a23c TL |
135 | void MDSDaemon::asok_command( |
136 | std::string_view command, | |
137 | const cmdmap_t& cmdmap, | |
138 | Formatter *f, | |
139 | const bufferlist& inbl, | |
140 | std::function<void(int,const std::string&,bufferlist&)> on_finish) | |
7c673cae | 141 | { |
9f95a23c TL |
142 | dout(1) << "asok_command: " << command << " " << cmdmap |
143 | << " (starting...)" << dendl; | |
7c673cae | 144 | |
f67539c2 | 145 | int r = -CEPHFS_ENOSYS; |
9f95a23c | 146 | bufferlist outbl; |
f67539c2 TL |
147 | CachedStackStringStream css; |
148 | auto& ss = *css; | |
7c673cae FG |
149 | if (command == "status") { |
150 | dump_status(f); | |
9f95a23c TL |
151 | r = 0; |
152 | } else if (command == "exit") { | |
153 | outbl.append("Exiting...\n"); | |
154 | r = 0; | |
155 | std::thread t([this](){ | |
156 | // Wait a little to improve chances of caller getting | |
157 | // our response before seeing us disappear from mdsmap | |
158 | sleep(1); | |
159 | std::lock_guard l(mds_lock); | |
160 | suicide(); | |
161 | }); | |
162 | t.detach(); | |
163 | } else if (command == "respawn") { | |
164 | outbl.append("Respawning...\n"); | |
165 | r = 0; | |
166 | std::thread t([this](){ | |
167 | // Wait a little to improve chances of caller getting | |
168 | // our response before seeing us disappear from mdsmap | |
169 | sleep(1); | |
170 | std::lock_guard l(mds_lock); | |
171 | respawn(); | |
172 | }); | |
173 | t.detach(); | |
174 | } else if (command == "heap") { | |
175 | if (!ceph_using_tcmalloc()) { | |
176 | ss << "not using tcmalloc"; | |
f67539c2 | 177 | r = -CEPHFS_EOPNOTSUPP; |
9f95a23c TL |
178 | } else { |
179 | string heapcmd; | |
180 | cmd_getval(cmdmap, "heapcmd", heapcmd); | |
181 | vector<string> heapcmd_vec; | |
182 | get_str_vec(heapcmd, heapcmd_vec); | |
183 | string value; | |
184 | if (cmd_getval(cmdmap, "value", value)) { | |
185 | heapcmd_vec.push_back(value); | |
186 | } | |
2a845540 TL |
187 | std::stringstream outss; |
188 | ceph_heap_profiler_handle_command(heapcmd_vec, outss); | |
189 | outbl.append(outss); | |
b3b6e05e | 190 | r = 0; |
9f95a23c TL |
191 | } |
192 | } else if (command == "cpu_profiler") { | |
193 | string arg; | |
194 | cmd_getval(cmdmap, "arg", arg); | |
195 | vector<string> argvec; | |
196 | get_str_vec(arg, argvec); | |
197 | cpu_profiler_handle_command(argvec, ss); | |
20effc67 | 198 | r = 0; |
7c673cae FG |
199 | } else { |
200 | if (mds_rank == NULL) { | |
201 | dout(1) << "Can't run that command on an inactive MDS!" << dendl; | |
202 | f->dump_string("error", "mds_not_active"); | |
203 | } else { | |
11fdf7f2 | 204 | try { |
9f95a23c TL |
205 | mds_rank->handle_asok_command(command, cmdmap, f, inbl, on_finish); |
206 | return; | |
207 | } catch (const TOPNSPC::common::bad_cmd_get& e) { | |
11fdf7f2 | 208 | ss << e.what(); |
f67539c2 | 209 | r = -CEPHFS_EINVAL; |
11fdf7f2 | 210 | } |
7c673cae FG |
211 | } |
212 | } | |
9f95a23c | 213 | on_finish(r, ss.str(), outbl); |
7c673cae FG |
214 | } |
215 | ||
216 | void MDSDaemon::dump_status(Formatter *f) | |
217 | { | |
218 | f->open_object_section("status"); | |
219 | f->dump_stream("cluster_fsid") << monc->get_fsid(); | |
220 | if (mds_rank) { | |
221 | f->dump_int("whoami", mds_rank->get_nodeid()); | |
222 | } else { | |
223 | f->dump_int("whoami", MDS_RANK_NONE); | |
224 | } | |
225 | ||
226 | f->dump_int("id", monc->get_global_id()); | |
227 | f->dump_string("want_state", ceph_mds_state_name(beacon.get_want_state())); | |
228 | f->dump_string("state", ceph_mds_state_name(mdsmap->get_state_gid(mds_gid_t( | |
229 | monc->get_global_id())))); | |
230 | if (mds_rank) { | |
11fdf7f2 | 231 | std::lock_guard l(mds_lock); |
7c673cae FG |
232 | mds_rank->dump_status(f); |
233 | } | |
234 | ||
235 | f->dump_unsigned("mdsmap_epoch", mdsmap->get_epoch()); | |
236 | if (mds_rank) { | |
237 | f->dump_unsigned("osdmap_epoch", mds_rank->get_osd_epoch()); | |
238 | f->dump_unsigned("osdmap_epoch_barrier", mds_rank->get_osd_epoch_barrier()); | |
239 | } else { | |
240 | f->dump_unsigned("osdmap_epoch", 0); | |
241 | f->dump_unsigned("osdmap_epoch_barrier", 0); | |
242 | } | |
94b18763 FG |
243 | |
244 | f->dump_float("uptime", get_uptime().count()); | |
245 | ||
7c673cae FG |
246 | f->close_section(); // status |
247 | } | |
248 | ||
249 | void MDSDaemon::set_up_admin_socket() | |
250 | { | |
251 | int r; | |
252 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
11fdf7f2 | 253 | ceph_assert(asok_hook == nullptr); |
7c673cae | 254 | asok_hook = new MDSSocketHook(this); |
9f95a23c | 255 | r = admin_socket->register_command("status", asok_hook, |
7c673cae | 256 | "high-level status of MDS"); |
11fdf7f2 | 257 | ceph_assert(r == 0); |
9f95a23c | 258 | r = admin_socket->register_command("dump_ops_in_flight", asok_hook, |
7c673cae | 259 | "show the ops currently in flight"); |
11fdf7f2 | 260 | ceph_assert(r == 0); |
9f95a23c | 261 | r = admin_socket->register_command("ops", asok_hook, |
7c673cae | 262 | "show the ops currently in flight"); |
11fdf7f2 | 263 | ceph_assert(r == 0); |
9f95a23c | 264 | r = admin_socket->register_command("dump_blocked_ops", |
7c673cae FG |
265 | asok_hook, |
266 | "show the blocked ops currently in flight"); | |
11fdf7f2 | 267 | ceph_assert(r == 0); |
1e59de90 TL |
268 | r = admin_socket->register_command("dump_blocked_ops_count", |
269 | asok_hook, | |
270 | "show the count of blocked ops currently in flight"); | |
271 | ceph_assert(r == 0); | |
9f95a23c | 272 | r = admin_socket->register_command("dump_historic_ops", |
7c673cae | 273 | asok_hook, |
11fdf7f2 TL |
274 | "show recent ops"); |
275 | ceph_assert(r == 0); | |
9f95a23c | 276 | r = admin_socket->register_command("dump_historic_ops_by_duration", |
7c673cae | 277 | asok_hook, |
11fdf7f2 TL |
278 | "show recent ops, sorted by op duration"); |
279 | ceph_assert(r == 0); | |
9f95a23c | 280 | r = admin_socket->register_command("scrub_path name=path,type=CephString " |
7c673cae | 281 | "name=scrubops,type=CephChoices," |
9f95a23c TL |
282 | "strings=force|recursive|repair,n=N,req=false " |
283 | "name=tag,type=CephString,req=false", | |
7c673cae FG |
284 | asok_hook, |
285 | "scrub an inode and output results"); | |
11fdf7f2 | 286 | ceph_assert(r == 0); |
9f95a23c TL |
287 | r = admin_socket->register_command("scrub start " |
288 | "name=path,type=CephString " | |
289 | "name=scrubops,type=CephChoices,strings=force|recursive|repair,n=N,req=false " | |
290 | "name=tag,type=CephString,req=false", | |
291 | asok_hook, | |
292 | "scrub and inode and output results"); | |
293 | ceph_assert(r == 0); | |
294 | r = admin_socket->register_command("scrub abort", | |
295 | asok_hook, | |
296 | "Abort in progress scrub operations(s)"); | |
297 | ceph_assert(r == 0); | |
298 | r = admin_socket->register_command("scrub pause", | |
299 | asok_hook, | |
300 | "Pause in progress scrub operations(s)"); | |
301 | ceph_assert(r == 0); | |
302 | r = admin_socket->register_command("scrub resume", | |
303 | asok_hook, | |
304 | "Resume paused scrub operations(s)"); | |
305 | ceph_assert(r == 0); | |
306 | r = admin_socket->register_command("scrub status", | |
307 | asok_hook, | |
308 | "Status of scrub operations(s)"); | |
309 | ceph_assert(r == 0); | |
310 | r = admin_socket->register_command("tag path name=path,type=CephString" | |
7c673cae FG |
311 | " name=tag,type=CephString", |
312 | asok_hook, | |
313 | "Apply scrub tag recursively"); | |
11fdf7f2 | 314 | ceph_assert(r == 0); |
9f95a23c | 315 | r = admin_socket->register_command("flush_path name=path,type=CephString", |
7c673cae FG |
316 | asok_hook, |
317 | "flush an inode (and its dirfrags)"); | |
11fdf7f2 | 318 | ceph_assert(r == 0); |
9f95a23c | 319 | r = admin_socket->register_command("export dir " |
7c673cae FG |
320 | "name=path,type=CephString " |
321 | "name=rank,type=CephInt", | |
322 | asok_hook, | |
323 | "migrate a subtree to named MDS"); | |
11fdf7f2 | 324 | ceph_assert(r == 0); |
20effc67 TL |
325 | r = admin_socket->register_command("dump cache " |
326 | "name=path,type=CephString,req=false " | |
327 | "name=timeout,type=CephInt,range=0,req=false", | |
7c673cae FG |
328 | asok_hook, |
329 | "dump metadata cache (optionally to a file)"); | |
11fdf7f2 | 330 | ceph_assert(r == 0); |
9f95a23c TL |
331 | r = admin_socket->register_command("cache drop " |
332 | "name=timeout,type=CephInt,range=0,req=false", | |
333 | asok_hook, | |
334 | "trim cache and optionally request client to release all caps and flush the journal"); | |
335 | ceph_assert(r == 0); | |
181888fb | 336 | r = admin_socket->register_command("cache status", |
181888fb FG |
337 | asok_hook, |
338 | "show cache status"); | |
11fdf7f2 | 339 | ceph_assert(r == 0); |
9f95a23c | 340 | r = admin_socket->register_command("dump tree " |
7c673cae FG |
341 | "name=root,type=CephString,req=true " |
342 | "name=depth,type=CephInt,req=false ", | |
343 | asok_hook, | |
344 | "dump metadata cache for subtree"); | |
11fdf7f2 | 345 | ceph_assert(r == 0); |
1e59de90 TL |
346 | r = admin_socket->register_command("dump loads " |
347 | "name=depth,type=CephInt,range=0,req=false", | |
28e407b8 AA |
348 | asok_hook, |
349 | "dump metadata loads"); | |
11fdf7f2 | 350 | ceph_assert(r == 0); |
9f95a23c | 351 | r = admin_socket->register_command("dump snaps name=server,type=CephChoices,strings=--server,req=false", |
11fdf7f2 TL |
352 | asok_hook, |
353 | "dump snapshots"); | |
354 | ceph_assert(r == 0); | |
adb31ebb TL |
355 | r = admin_socket->register_command("session ls " |
356 | "name=cap_dump,type=CephBool,req=false " | |
357 | "name=filters,type=CephString,n=N,req=false ", | |
9f95a23c TL |
358 | asok_hook, |
359 | "List client sessions based on a filter"); | |
360 | ceph_assert(r == 0); | |
adb31ebb TL |
361 | r = admin_socket->register_command("client ls " |
362 | "name=cap_dump,type=CephBool,req=false " | |
363 | "name=filters,type=CephString,n=N,req=false ", | |
9f95a23c TL |
364 | asok_hook, |
365 | "List client sessions based on a filter"); | |
366 | ceph_assert(r == 0); | |
367 | r = admin_socket->register_command("session evict name=filters,type=CephString,n=N,req=false", | |
368 | asok_hook, | |
369 | "Evict client session(s) based on a filter"); | |
370 | ceph_assert(r == 0); | |
371 | r = admin_socket->register_command("client evict name=filters,type=CephString,n=N,req=false", | |
7c673cae | 372 | asok_hook, |
9f95a23c TL |
373 | "Evict client session(s) based on a filter"); |
374 | ceph_assert(r == 0); | |
375 | r = admin_socket->register_command("session kill name=client_id,type=CephString", | |
376 | asok_hook, | |
377 | "Evict a client session by id"); | |
11fdf7f2 | 378 | ceph_assert(r == 0); |
adb31ebb | 379 | r = admin_socket->register_command("session ls name=cap_dump,type=CephBool,req=false", |
7c673cae FG |
380 | asok_hook, |
381 | "Enumerate connected CephFS clients"); | |
11fdf7f2 | 382 | ceph_assert(r == 0); |
9f95a23c TL |
383 | r = admin_socket->register_command("session config " |
384 | "name=client_id,type=CephInt,req=true " | |
385 | "name=option,type=CephString,req=true " | |
386 | "name=value,type=CephString,req=false ", | |
387 | asok_hook, | |
388 | "Config a CephFS client session"); | |
20effc67 | 389 | ceph_assert(r == 0); |
9f95a23c TL |
390 | r = admin_socket->register_command("client config " |
391 | "name=client_id,type=CephInt,req=true " | |
92f5a8d4 TL |
392 | "name=option,type=CephString,req=true " |
393 | "name=value,type=CephString,req=false ", | |
394 | asok_hook, | |
395 | "Config a CephFS client session"); | |
20effc67 | 396 | ceph_assert(r == 0); |
9f95a23c TL |
397 | r = admin_socket->register_command("damage ls", |
398 | asok_hook, | |
399 | "List detected metadata damage"); | |
20effc67 | 400 | ceph_assert(r == 0); |
9f95a23c TL |
401 | r = admin_socket->register_command("damage rm " |
402 | "name=damage_id,type=CephInt", | |
403 | asok_hook, | |
404 | "Remove a damage table entry"); | |
20effc67 | 405 | ceph_assert(r == 0); |
9f95a23c | 406 | r = admin_socket->register_command("osdmap barrier name=target_epoch,type=CephInt", |
92f5a8d4 TL |
407 | asok_hook, |
408 | "Wait until the MDS has this OSD map epoch"); | |
409 | ceph_assert(r == 0); | |
7c673cae | 410 | r = admin_socket->register_command("flush journal", |
7c673cae FG |
411 | asok_hook, |
412 | "Flush the journal to the backing store"); | |
11fdf7f2 | 413 | ceph_assert(r == 0); |
7c673cae | 414 | r = admin_socket->register_command("force_readonly", |
7c673cae FG |
415 | asok_hook, |
416 | "Force MDS to read-only mode"); | |
11fdf7f2 | 417 | ceph_assert(r == 0); |
7c673cae | 418 | r = admin_socket->register_command("get subtrees", |
7c673cae FG |
419 | asok_hook, |
420 | "Return the subtree map"); | |
11fdf7f2 | 421 | ceph_assert(r == 0); |
9f95a23c | 422 | r = admin_socket->register_command("dirfrag split " |
7c673cae FG |
423 | "name=path,type=CephString,req=true " |
424 | "name=frag,type=CephString,req=true " | |
425 | "name=bits,type=CephInt,req=true ", | |
426 | asok_hook, | |
427 | "Fragment directory by path"); | |
11fdf7f2 | 428 | ceph_assert(r == 0); |
9f95a23c | 429 | r = admin_socket->register_command("dirfrag merge " |
7c673cae FG |
430 | "name=path,type=CephString,req=true " |
431 | "name=frag,type=CephString,req=true", | |
432 | asok_hook, | |
433 | "De-fragment directory by path"); | |
11fdf7f2 | 434 | ceph_assert(r == 0); |
9f95a23c | 435 | r = admin_socket->register_command("dirfrag ls " |
7c673cae FG |
436 | "name=path,type=CephString,req=true", |
437 | asok_hook, | |
438 | "List fragments in directory"); | |
11fdf7f2 TL |
439 | ceph_assert(r == 0); |
440 | r = admin_socket->register_command("openfiles ls", | |
11fdf7f2 TL |
441 | asok_hook, |
442 | "List the opening files and their caps"); | |
443 | ceph_assert(r == 0); | |
9f95a23c | 444 | r = admin_socket->register_command("dump inode " |
11fdf7f2 TL |
445 | "name=number,type=CephInt,req=true", |
446 | asok_hook, | |
447 | "dump inode by inode number"); | |
448 | ceph_assert(r == 0); | |
9f95a23c TL |
449 | r = admin_socket->register_command("exit", |
450 | asok_hook, | |
451 | "Terminate this MDS"); | |
452 | r = admin_socket->register_command("respawn", | |
453 | asok_hook, | |
454 | "Respawn this MDS"); | |
455 | ceph_assert(r == 0); | |
456 | r = admin_socket->register_command( | |
457 | "heap " \ | |
458 | "name=heapcmd,type=CephChoices,strings=" \ | |
459 | "dump|start_profiler|stop_profiler|release|get_release_rate|set_release_rate|stats " \ | |
460 | "name=value,type=CephString,req=false", | |
461 | asok_hook, | |
462 | "show heap usage info (available only if compiled with tcmalloc)"); | |
463 | ceph_assert(r == 0); | |
464 | r = admin_socket->register_command( | |
465 | "cpu_profiler " \ | |
466 | "name=arg,type=CephChoices,strings=status|flush", | |
467 | asok_hook, | |
468 | "run cpu profiling on daemon"); | |
469 | ceph_assert(r == 0); | |
7c673cae FG |
470 | } |
471 | ||
472 | void MDSDaemon::clean_up_admin_socket() | |
473 | { | |
11fdf7f2 | 474 | g_ceph_context->get_admin_socket()->unregister_commands(asok_hook); |
7c673cae FG |
475 | delete asok_hook; |
476 | asok_hook = NULL; | |
477 | } | |
478 | ||
7c673cae FG |
479 | int MDSDaemon::init() |
480 | { | |
f67539c2 TL |
481 | #ifdef _WIN32 |
482 | // Some file related flags and types are stubbed on Windows. In order to avoid | |
483 | // incorrect behavior, we're going to prevent the MDS from running on Windows | |
484 | // until those limitations are addressed. MDS clients, however, are allowed | |
485 | // to run on Windows. | |
486 | derr << "The Ceph MDS does not support running on Windows at the moment." | |
487 | << dendl; | |
488 | return -CEPHFS_ENOSYS; | |
489 | #endif // _WIN32 | |
490 | ||
491 | dout(10) << "Dumping misc struct sizes:" << dendl; | |
7c673cae FG |
492 | dout(10) << sizeof(MDSCacheObject) << "\tMDSCacheObject" << dendl; |
493 | dout(10) << sizeof(CInode) << "\tCInode" << dendl; | |
f67539c2 TL |
494 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; |
495 | dout(10) << sizeof(CInode::mempool_inode) << "\tinode" << dendl; | |
496 | dout(10) << sizeof(CInode::mempool_old_inode) << "\told_inode" << dendl; | |
497 | dout(10) << sizeof(nest_info_t) << "\tnest_info_t" << dendl; | |
498 | dout(10) << sizeof(frag_info_t) << "\tfrag_info_t" << dendl; | |
499 | dout(10) << sizeof(SimpleLock) << "\tSimpleLock" << dendl; | |
500 | dout(10) << sizeof(ScatterLock) << "\tScatterLock" << dendl; | |
7c673cae | 501 | dout(10) << sizeof(CDentry) << "\tCDentry" << dendl; |
f67539c2 TL |
502 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; |
503 | dout(10) << sizeof(SimpleLock) << "\tSimpleLock" << dendl; | |
504 | dout(10) << sizeof(CDir) << "\tCDir" << dendl; | |
505 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; | |
506 | dout(10) << sizeof(fnode_t) << "\tfnode_t" << dendl; | |
507 | dout(10) << sizeof(nest_info_t) << "\tnest_info_t" << dendl; | |
508 | dout(10) << sizeof(frag_info_t) << "\tfrag_info_t" << dendl; | |
509 | dout(10) << sizeof(Capability) << "\tCapability" << dendl; | |
510 | dout(10) << sizeof(xlist<void*>::item) << "\txlist<>::item" << dendl; | |
7c673cae FG |
511 | |
512 | messenger->add_dispatcher_tail(&beacon); | |
513 | messenger->add_dispatcher_tail(this); | |
514 | ||
11fdf7f2 | 515 | // init monc |
7c673cae FG |
516 | monc->set_messenger(messenger); |
517 | ||
518 | monc->set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | | |
519 | CEPH_ENTITY_TYPE_MDS | CEPH_ENTITY_TYPE_MGR); | |
520 | int r = 0; | |
521 | r = monc->init(); | |
522 | if (r < 0) { | |
11fdf7f2 | 523 | derr << "ERROR: failed to init monc: " << cpp_strerror(-r) << dendl; |
9f95a23c | 524 | mds_lock.lock(); |
7c673cae | 525 | suicide(); |
9f95a23c | 526 | mds_lock.unlock(); |
7c673cae FG |
527 | return r; |
528 | } | |
529 | ||
11fdf7f2 TL |
530 | messenger->set_auth_client(monc); |
531 | messenger->set_auth_server(monc); | |
532 | monc->set_handle_authentication_dispatcher(this); | |
533 | ||
7c673cae FG |
534 | // tell monc about log_client so it will know about mon session resets |
535 | monc->set_log_client(&log_client); | |
536 | ||
537 | r = monc->authenticate(); | |
538 | if (r < 0) { | |
539 | derr << "ERROR: failed to authenticate: " << cpp_strerror(-r) << dendl; | |
9f95a23c | 540 | mds_lock.lock(); |
7c673cae | 541 | suicide(); |
9f95a23c | 542 | mds_lock.unlock(); |
7c673cae FG |
543 | return r; |
544 | } | |
545 | ||
546 | int rotating_auth_attempts = 0; | |
11fdf7f2 TL |
547 | auto rotating_auth_timeout = |
548 | g_conf().get_val<int64_t>("rotating_keys_bootstrap_timeout"); | |
549 | while (monc->wait_auth_rotating(rotating_auth_timeout) < 0) { | |
550 | if (++rotating_auth_attempts <= g_conf()->max_rotating_auth_attempts) { | |
7c673cae FG |
551 | derr << "unable to obtain rotating service keys; retrying" << dendl; |
552 | continue; | |
553 | } | |
554 | derr << "ERROR: failed to refresh rotating keys, " | |
39ae355f TL |
555 | << "maximum retry time reached." |
556 | << " Maybe I have a clock skew against the monitors?" << dendl; | |
9f95a23c | 557 | std::lock_guard locker{mds_lock}; |
7c673cae | 558 | suicide(); |
f67539c2 | 559 | return -CEPHFS_ETIMEDOUT; |
7c673cae FG |
560 | } |
561 | ||
9f95a23c | 562 | mds_lock.lock(); |
7c673cae FG |
563 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) { |
564 | dout(4) << __func__ << ": terminated already, dropping out" << dendl; | |
9f95a23c | 565 | mds_lock.unlock(); |
7c673cae FG |
566 | return 0; |
567 | } | |
568 | ||
569 | monc->sub_want("mdsmap", 0, 0); | |
7c673cae FG |
570 | monc->renew_subs(); |
571 | ||
9f95a23c | 572 | mds_lock.unlock(); |
7c673cae FG |
573 | |
574 | // Set up admin socket before taking mds_lock, so that ordering | |
575 | // is consistent (later we take mds_lock within asok callbacks) | |
576 | set_up_admin_socket(); | |
9f95a23c | 577 | std::lock_guard locker{mds_lock}; |
7c673cae FG |
578 | if (beacon.get_want_state() == MDSMap::STATE_DNE) { |
579 | suicide(); // we could do something more graceful here | |
580 | dout(4) << __func__ << ": terminated already, dropping out" << dendl; | |
7c673cae FG |
581 | return 0; |
582 | } | |
583 | ||
584 | timer.init(); | |
585 | ||
11fdf7f2 | 586 | beacon.init(*mdsmap); |
7c673cae FG |
587 | messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE)); |
588 | ||
589 | // schedule tick | |
590 | reset_tick(); | |
7c673cae FG |
591 | return 0; |
592 | } | |
593 | ||
594 | void MDSDaemon::reset_tick() | |
595 | { | |
596 | // cancel old | |
597 | if (tick_event) timer.cancel_event(tick_event); | |
598 | ||
599 | // schedule | |
3efd9988 | 600 | tick_event = timer.add_event_after( |
11fdf7f2 | 601 | g_conf()->mds_tick_interval, |
9f95a23c TL |
602 | new LambdaContext([this](int) { |
603 | ceph_assert(ceph_mutex_is_locked_by_me(mds_lock)); | |
3efd9988 FG |
604 | tick(); |
605 | })); | |
7c673cae FG |
606 | } |
607 | ||
608 | void MDSDaemon::tick() | |
609 | { | |
7c673cae FG |
610 | // reschedule |
611 | reset_tick(); | |
612 | ||
613 | // Call through to subsystems' tick functions | |
614 | if (mds_rank) { | |
615 | mds_rank->tick(); | |
616 | } | |
617 | } | |
618 | ||
9f95a23c | 619 | void MDSDaemon::handle_command(const cref_t<MCommand> &m) |
7c673cae | 620 | { |
11fdf7f2 TL |
621 | auto priv = m->get_connection()->get_priv(); |
622 | auto session = static_cast<Session *>(priv.get()); | |
623 | ceph_assert(session != NULL); | |
9f95a23c TL |
624 | |
625 | int r = 0; | |
626 | cmdmap_t cmdmap; | |
f67539c2 TL |
627 | CachedStackStringStream css; |
628 | auto& ss = *css; | |
9f95a23c TL |
629 | bufferlist outbl; |
630 | ||
7c673cae FG |
631 | // If someone is using a closed session for sending commands (e.g. |
632 | // the ceph CLI) then we should feel free to clean up this connection | |
633 | // as soon as we've sent them a response. | |
94b18763 FG |
634 | const bool live_session = |
635 | session->get_state_seq() > 0 && | |
636 | mds_rank && | |
637 | mds_rank->sessionmap.get_session(session->info.inst.name); | |
7c673cae FG |
638 | |
639 | if (!live_session) { | |
640 | // This session only existed to issue commands, so terminate it | |
641 | // as soon as we can. | |
11fdf7f2 TL |
642 | ceph_assert(session->is_closed()); |
643 | session->get_connection()->mark_disposable(); | |
7c673cae | 644 | } |
11fdf7f2 | 645 | priv.reset(); |
7c673cae | 646 | |
7c673cae FG |
647 | if (!session->auth_caps.allow_all()) { |
648 | dout(1) << __func__ | |
649 | << ": received command from client without `tell` capability: " | |
11fdf7f2 | 650 | << *m->get_connection()->peer_addrs << dendl; |
7c673cae FG |
651 | |
652 | ss << "permission denied"; | |
f67539c2 | 653 | r = -CEPHFS_EACCES; |
7c673cae | 654 | } else if (m->cmd.empty()) { |
f67539c2 | 655 | r = -CEPHFS_EINVAL; |
7c673cae | 656 | ss << "no command given"; |
9f95a23c | 657 | } else if (!TOPNSPC::common::cmdmap_from_json(m->cmd, &cmdmap, ss)) { |
f67539c2 | 658 | r = -CEPHFS_EINVAL; |
7c673cae | 659 | } else { |
9f95a23c TL |
660 | cct->get_admin_socket()->queue_tell_command(m); |
661 | return; | |
7c673cae FG |
662 | } |
663 | ||
f67539c2 | 664 | auto reply = make_message<MCommandReply>(r, ss.str()); |
9f95a23c TL |
665 | reply->set_tid(m->get_tid()); |
666 | reply->set_data(outbl); | |
667 | m->get_connection()->send_message2(reply); | |
7c673cae FG |
668 | } |
669 | ||
9f95a23c | 670 | void MDSDaemon::handle_mds_map(const cref_t<MMDSMap> &m) |
7c673cae FG |
671 | { |
672 | version_t epoch = m->get_epoch(); | |
7c673cae FG |
673 | |
674 | // is it new? | |
675 | if (epoch <= mdsmap->get_epoch()) { | |
1adf2230 AA |
676 | dout(5) << "handle_mds_map old map epoch " << epoch << " <= " |
677 | << mdsmap->get_epoch() << ", discarding" << dendl; | |
7c673cae FG |
678 | return; |
679 | } | |
680 | ||
1adf2230 AA |
681 | dout(1) << "Updating MDS map to version " << epoch << " from " << m->get_source() << dendl; |
682 | ||
7c673cae | 683 | // keep old map, for a moment |
11fdf7f2 TL |
684 | std::unique_ptr<MDSMap> oldmap; |
685 | oldmap.swap(mdsmap); | |
7c673cae FG |
686 | |
687 | // decode and process | |
11fdf7f2 | 688 | mdsmap.reset(new MDSMap); |
7c673cae | 689 | mdsmap->decode(m->get_encoded()); |
7c673cae FG |
690 | |
691 | monc->sub_got("mdsmap", mdsmap->get_epoch()); | |
692 | ||
7c673cae | 693 | // verify compatset |
1adf2230 | 694 | CompatSet mdsmap_compat(MDSMap::get_compat_set_all()); |
7c673cae FG |
695 | dout(10) << " my compat " << mdsmap_compat << dendl; |
696 | dout(10) << " mdsmap compat " << mdsmap->compat << dendl; | |
697 | if (!mdsmap_compat.writeable(mdsmap->compat)) { | |
698 | dout(0) << "handle_mds_map mdsmap compatset " << mdsmap->compat | |
699 | << " not writeable with daemon features " << mdsmap_compat | |
700 | << ", killing myself" << dendl; | |
701 | suicide(); | |
9f95a23c | 702 | return; |
7c673cae FG |
703 | } |
704 | ||
9f95a23c TL |
705 | // Calculate my effective rank (either my owned rank or the rank I'm following if STATE_STANDBY_REPLAY |
706 | const auto addrs = messenger->get_myaddrs(); | |
707 | const auto myid = monc->get_global_id(); | |
708 | const auto mygid = mds_gid_t(myid); | |
709 | const auto whoami = mdsmap->get_rank_gid(mygid); | |
710 | const auto old_state = oldmap->get_state_gid(mygid); | |
711 | const auto new_state = mdsmap->get_state_gid(mygid); | |
712 | const auto incarnation = mdsmap->get_inc_gid(mygid); | |
713 | dout(10) << "my gid is " << myid << dendl; | |
11fdf7f2 | 714 | dout(10) << "map says I am mds." << whoami << "." << incarnation |
7c673cae | 715 | << " state " << ceph_mds_state_name(new_state) << dendl; |
9f95a23c TL |
716 | dout(10) << "msgr says I am " << addrs << dendl; |
717 | ||
718 | // If we're removed from the MDSMap, stop all processing. | |
719 | using DS = MDSMap::DaemonState; | |
720 | if (old_state != DS::STATE_NULL && new_state == DS::STATE_NULL) { | |
721 | const auto& oldinfo = oldmap->get_info_gid(mygid); | |
722 | dout(1) << "Map removed me " << oldinfo | |
723 | << " from cluster; respawning! See cluster/monitor logs for details." << dendl; | |
724 | respawn(); | |
725 | } | |
726 | ||
727 | if (old_state == DS::STATE_NULL && new_state != DS::STATE_NULL) { | |
728 | /* The MDS has been added to the FSMap, now we can init the MgrClient */ | |
729 | mgrc.init(); | |
730 | messenger->add_dispatcher_tail(&mgrc); | |
731 | monc->sub_want("mgrmap", 0, 0); | |
732 | monc->renew_subs(); /* MgrMap receipt drives connection to ceph-mgr */ | |
733 | } | |
7c673cae | 734 | |
9f95a23c TL |
735 | // mark down any failed peers |
736 | for (const auto& [gid, info] : oldmap->get_mds_info()) { | |
737 | if (mdsmap->get_mds_info().count(gid) == 0) { | |
738 | dout(10) << " peer mds gid " << gid << " removed from map" << dendl; | |
739 | messenger->mark_down_addrs(info.addrs); | |
740 | } | |
741 | } | |
11fdf7f2 | 742 | |
7c673cae | 743 | if (whoami == MDS_RANK_NONE) { |
9f95a23c TL |
744 | // We do not hold a rank: |
745 | dout(10) << __func__ << ": handling map in rankless mode" << dendl; | |
7c673cae | 746 | |
9f95a23c TL |
747 | if (new_state == DS::STATE_STANDBY) { |
748 | /* Note: STATE_BOOT is never an actual state in the FSMap. The Monitors | |
749 | * generally mark a new MDS as STANDBY (although it's possible to | |
750 | * immediately be assigned a rank). | |
751 | */ | |
752 | if (old_state == DS::STATE_NULL) { | |
753 | dout(1) << "Monitors have assigned me to become a standby." << dendl; | |
754 | beacon.set_want_state(*mdsmap, new_state); | |
755 | } else if (old_state == DS::STATE_STANDBY) { | |
756 | dout(5) << "I am still standby" << dendl; | |
757 | } | |
758 | } else if (new_state == DS::STATE_NULL) { | |
759 | /* We are not in the MDSMap yet! Keep waiting: */ | |
760 | ceph_assert(beacon.get_want_state() == DS::STATE_BOOT); | |
761 | dout(10) << "not in map yet" << dendl; | |
762 | } else { | |
763 | /* We moved to standby somehow from another state */ | |
764 | ceph_abort("invalid transition to standby"); | |
7c673cae | 765 | } |
7c673cae | 766 | } else { |
7c673cae FG |
767 | // Did we already hold a different rank? MDSMonitor shouldn't try |
768 | // to change that out from under me! | |
769 | if (mds_rank && whoami != mds_rank->get_nodeid()) { | |
770 | derr << "Invalid rank transition " << mds_rank->get_nodeid() << "->" | |
771 | << whoami << dendl; | |
772 | respawn(); | |
773 | } | |
774 | ||
775 | // Did I previously not hold a rank? Initialize! | |
776 | if (mds_rank == NULL) { | |
20effc67 | 777 | mds_rank = new MDSRankDispatcher(whoami, mds_lock, clog, |
9f95a23c TL |
778 | timer, beacon, mdsmap, messenger, monc, &mgrc, |
779 | new LambdaContext([this](int r){respawn();}), | |
f67539c2 TL |
780 | new LambdaContext([this](int r){suicide();}), |
781 | ioctx); | |
7c673cae FG |
782 | dout(10) << __func__ << ": initializing MDS rank " |
783 | << mds_rank->get_nodeid() << dendl; | |
784 | mds_rank->init(); | |
785 | } | |
786 | ||
787 | // MDSRank is active: let him process the map, we have no say. | |
788 | dout(10) << __func__ << ": handling map as rank " | |
789 | << mds_rank->get_nodeid() << dendl; | |
11fdf7f2 | 790 | mds_rank->handle_mds_map(m, *oldmap); |
7c673cae FG |
791 | } |
792 | ||
11fdf7f2 | 793 | beacon.notify_mdsmap(*mdsmap); |
7c673cae FG |
794 | } |
795 | ||
7c673cae FG |
796 | void MDSDaemon::handle_signal(int signum) |
797 | { | |
11fdf7f2 | 798 | ceph_assert(signum == SIGINT || signum == SIGTERM); |
7c673cae FG |
799 | derr << "*** got signal " << sig_str(signum) << " ***" << dendl; |
800 | { | |
11fdf7f2 | 801 | std::lock_guard l(mds_lock); |
7c673cae FG |
802 | if (stopping) { |
803 | return; | |
804 | } | |
805 | suicide(); | |
806 | } | |
807 | } | |
808 | ||
809 | void MDSDaemon::suicide() | |
810 | { | |
9f95a23c | 811 | ceph_assert(ceph_mutex_is_locked(mds_lock)); |
7c673cae FG |
812 | |
813 | // make sure we don't suicide twice | |
11fdf7f2 | 814 | ceph_assert(stopping == false); |
7c673cae FG |
815 | stopping = true; |
816 | ||
1adf2230 | 817 | dout(1) << "suicide! Wanted state " |
7c673cae FG |
818 | << ceph_mds_state_name(beacon.get_want_state()) << dendl; |
819 | ||
820 | if (tick_event) { | |
821 | timer.cancel_event(tick_event); | |
822 | tick_event = 0; | |
823 | } | |
824 | ||
7c673cae FG |
825 | clean_up_admin_socket(); |
826 | ||
9f95a23c TL |
827 | // Notify the Monitors (MDSMonitor) that we're dying, so that it doesn't have |
828 | // to wait for us to go laggy. Only do this if we're actually in the MDSMap, | |
829 | // because otherwise the MDSMonitor will drop our message. | |
11fdf7f2 | 830 | beacon.set_want_state(*mdsmap, MDSMap::STATE_DNE); |
7c673cae | 831 | if (!mdsmap->is_dne_gid(mds_gid_t(monc->get_global_id()))) { |
7c673cae FG |
832 | beacon.send_and_wait(1); |
833 | } | |
834 | beacon.shutdown(); | |
835 | ||
9f95a23c TL |
836 | if (mgrc.is_initialized()) |
837 | mgrc.shutdown(); | |
7c673cae FG |
838 | |
839 | if (mds_rank) { | |
840 | mds_rank->shutdown(); | |
841 | } else { | |
842 | timer.shutdown(); | |
843 | ||
844 | monc->shutdown(); | |
845 | messenger->shutdown(); | |
846 | } | |
847 | } | |
848 | ||
849 | void MDSDaemon::respawn() | |
850 | { | |
11fdf7f2 TL |
851 | // --- WARNING TO FUTURE COPY/PASTERS --- |
852 | // You must also add a call like | |
853 | // | |
854 | // ceph_pthread_setname(pthread_self(), "ceph-mds"); | |
855 | // | |
856 | // to main() so that /proc/$pid/stat field 2 contains "(ceph-mds)" | |
857 | // instead of "(exe)", so that killall (and log rotation) will work. | |
858 | ||
1adf2230 AA |
859 | dout(1) << "respawn!" << dendl; |
860 | ||
861 | /* Dump recent in case the MDS was stuck doing something which caused it to | |
862 | * be removed from the MDSMap leading to respawn. */ | |
863 | g_ceph_context->_log->dump_recent(); | |
7c673cae | 864 | |
f67539c2 TL |
865 | /* valgrind can't handle execve; just exit and let QA infra restart */ |
866 | if (g_conf().get_val<bool>("mds_valgrind_exit")) { | |
867 | _exit(0); | |
868 | } | |
869 | ||
7c673cae FG |
870 | char *new_argv[orig_argc+1]; |
871 | dout(1) << " e: '" << orig_argv[0] << "'" << dendl; | |
872 | for (int i=0; i<orig_argc; i++) { | |
873 | new_argv[i] = (char *)orig_argv[i]; | |
874 | dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl; | |
875 | } | |
876 | new_argv[orig_argc] = NULL; | |
877 | ||
878 | /* Determine the path to our executable, test if Linux /proc/self/exe exists. | |
879 | * This allows us to exec the same executable even if it has since been | |
880 | * unlinked. | |
881 | */ | |
882 | char exe_path[PATH_MAX] = ""; | |
11fdf7f2 TL |
883 | #ifdef PROCPREFIX |
884 | if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) != -1) { | |
885 | dout(1) << "respawning with exe " << exe_path << dendl; | |
886 | strcpy(exe_path, PROCPREFIX "/proc/self/exe"); | |
887 | } else { | |
888 | #else | |
889 | { | |
890 | #endif | |
7c673cae FG |
891 | /* Print CWD for the user's interest */ |
892 | char buf[PATH_MAX]; | |
893 | char *cwd = getcwd(buf, sizeof(buf)); | |
11fdf7f2 | 894 | ceph_assert(cwd); |
7c673cae FG |
895 | dout(1) << " cwd " << cwd << dendl; |
896 | ||
897 | /* Fall back to a best-effort: just running in our CWD */ | |
898 | strncpy(exe_path, orig_argv[0], PATH_MAX-1); | |
7c673cae FG |
899 | } |
900 | ||
901 | dout(1) << " exe_path " << exe_path << dendl; | |
902 | ||
903 | unblock_all_signals(NULL); | |
904 | execv(exe_path, new_argv); | |
905 | ||
906 | dout(0) << "respawn execv " << orig_argv[0] | |
907 | << " failed with " << cpp_strerror(errno) << dendl; | |
908 | ||
909 | // We have to assert out here, because suicide() returns, and callers | |
910 | // to respawn expect it never to return. | |
911 | ceph_abort(); | |
912 | } | |
913 | ||
914 | ||
915 | ||
9f95a23c | 916 | bool MDSDaemon::ms_dispatch2(const ref_t<Message> &m) |
7c673cae | 917 | { |
11fdf7f2 | 918 | std::lock_guard l(mds_lock); |
7c673cae FG |
919 | if (stopping) { |
920 | return false; | |
921 | } | |
922 | ||
923 | // Drop out early if shutting down | |
924 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) { | |
925 | dout(10) << " stopping, discarding " << *m << dendl; | |
7c673cae FG |
926 | return true; |
927 | } | |
928 | ||
929 | // First see if it's a daemon message | |
930 | const bool handled_core = handle_core_message(m); | |
931 | if (handled_core) { | |
932 | return true; | |
933 | } | |
934 | ||
935 | // Not core, try it as a rank message | |
936 | if (mds_rank) { | |
937 | return mds_rank->ms_dispatch(m); | |
938 | } else { | |
939 | return false; | |
940 | } | |
941 | } | |
942 | ||
7c673cae FG |
943 | /* |
944 | * high priority messages we always process | |
945 | */ | |
f6b5b4d7 TL |
946 | |
947 | #define ALLOW_MESSAGES_FROM(peers) \ | |
948 | do { \ | |
949 | if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ | |
950 | dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" \ | |
951 | << m->get_connection()->get_peer_type() << " allowing=" \ | |
952 | << #peers << " message=" << *m << dendl; \ | |
953 | return true; \ | |
954 | } \ | |
955 | } while (0) | |
956 | ||
9f95a23c | 957 | bool MDSDaemon::handle_core_message(const cref_t<Message> &m) |
7c673cae FG |
958 | { |
959 | switch (m->get_type()) { | |
960 | case CEPH_MSG_MON_MAP: | |
961 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
7c673cae FG |
962 | break; |
963 | ||
964 | // MDS | |
965 | case CEPH_MSG_MDS_MAP: | |
966 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_MDS); | |
9f95a23c TL |
967 | handle_mds_map(ref_cast<MMDSMap>(m)); |
968 | break; | |
969 | ||
970 | case MSG_REMOVE_SNAPS: | |
971 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
972 | mds_rank->snapserver->handle_remove_snaps(ref_cast<MRemoveSnaps>(m)); | |
7c673cae FG |
973 | break; |
974 | ||
975 | // OSD | |
976 | case MSG_COMMAND: | |
9f95a23c | 977 | handle_command(ref_cast<MCommand>(m)); |
7c673cae FG |
978 | break; |
979 | case CEPH_MSG_OSD_MAP: | |
980 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD); | |
981 | ||
982 | if (mds_rank) { | |
983 | mds_rank->handle_osd_map(); | |
984 | } | |
7c673cae FG |
985 | break; |
986 | ||
987 | case MSG_MON_COMMAND: | |
988 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
989 | clog->warn() << "dropping `mds tell` command from legacy monitor"; | |
7c673cae FG |
990 | break; |
991 | ||
992 | default: | |
993 | return false; | |
994 | } | |
995 | return true; | |
996 | } | |
997 | ||
998 | void MDSDaemon::ms_handle_connect(Connection *con) | |
999 | { | |
1000 | } | |
1001 | ||
1002 | bool MDSDaemon::ms_handle_reset(Connection *con) | |
1003 | { | |
1004 | if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT) | |
1005 | return false; | |
1006 | ||
11fdf7f2 | 1007 | std::lock_guard l(mds_lock); |
7c673cae FG |
1008 | if (stopping) { |
1009 | return false; | |
1010 | } | |
11fdf7f2 | 1011 | dout(5) << "ms_handle_reset on " << con->get_peer_socket_addr() << dendl; |
7c673cae FG |
1012 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) |
1013 | return false; | |
1014 | ||
11fdf7f2 TL |
1015 | auto priv = con->get_priv(); |
1016 | if (auto session = static_cast<Session *>(priv.get()); session) { | |
7c673cae FG |
1017 | if (session->is_closed()) { |
1018 | dout(3) << "ms_handle_reset closing connection for session " << session->info.inst << dendl; | |
1019 | con->mark_down(); | |
11fdf7f2 | 1020 | con->set_priv(nullptr); |
7c673cae | 1021 | } |
7c673cae FG |
1022 | } else { |
1023 | con->mark_down(); | |
1024 | } | |
1025 | return false; | |
1026 | } | |
1027 | ||
1028 | ||
1029 | void MDSDaemon::ms_handle_remote_reset(Connection *con) | |
1030 | { | |
1031 | if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT) | |
1032 | return; | |
1033 | ||
11fdf7f2 | 1034 | std::lock_guard l(mds_lock); |
7c673cae FG |
1035 | if (stopping) { |
1036 | return; | |
1037 | } | |
1038 | ||
11fdf7f2 | 1039 | dout(5) << "ms_handle_remote_reset on " << con->get_peer_socket_addr() << dendl; |
7c673cae FG |
1040 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) |
1041 | return; | |
1042 | ||
11fdf7f2 TL |
1043 | auto priv = con->get_priv(); |
1044 | if (auto session = static_cast<Session *>(priv.get()); session) { | |
7c673cae FG |
1045 | if (session->is_closed()) { |
1046 | dout(3) << "ms_handle_remote_reset closing connection for session " << session->info.inst << dendl; | |
1047 | con->mark_down(); | |
11fdf7f2 | 1048 | con->set_priv(nullptr); |
7c673cae | 1049 | } |
7c673cae FG |
1050 | } |
1051 | } | |
1052 | ||
1053 | bool MDSDaemon::ms_handle_refused(Connection *con) | |
1054 | { | |
1055 | // do nothing for now | |
1056 | return false; | |
1057 | } | |
1058 | ||
11fdf7f2 TL |
1059 | bool MDSDaemon::parse_caps(const AuthCapsInfo& info, MDSAuthCaps& caps) |
1060 | { | |
1061 | caps.clear(); | |
1062 | if (info.allow_all) { | |
1063 | caps.set_allow_all(); | |
7c673cae | 1064 | return true; |
c07f9fc5 | 1065 | } else { |
11fdf7f2 TL |
1066 | auto it = info.caps.begin(); |
1067 | string auth_cap_str; | |
1068 | try { | |
1069 | decode(auth_cap_str, it); | |
1070 | } catch (const buffer::error& e) { | |
1071 | dout(1) << __func__ << ": cannot decode auth caps buffer of length " << info.caps.length() << dendl; | |
1072 | return false; | |
7c673cae FG |
1073 | } |
1074 | ||
11fdf7f2 TL |
1075 | dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl; |
1076 | CachedStackStringStream cs; | |
1077 | if (caps.parse(g_ceph_context, auth_cap_str, cs.get())) { | |
1078 | return true; | |
b5b8bbf5 | 1079 | } else { |
11fdf7f2 TL |
1080 | dout(1) << __func__ << ": auth cap parse error: " << cs->strv() << " parsing '" << auth_cap_str << "'" << dendl; |
1081 | return false; | |
7c673cae FG |
1082 | } |
1083 | } | |
7c673cae FG |
1084 | } |
1085 | ||
11fdf7f2 TL |
1086 | int MDSDaemon::ms_handle_authentication(Connection *con) |
1087 | { | |
1088 | /* N.B. without mds_lock! */ | |
1089 | MDSAuthCaps caps; | |
1090 | return parse_caps(con->get_peer_caps_info(), caps) ? 0 : -1; | |
1091 | } | |
7c673cae FG |
1092 | |
1093 | void MDSDaemon::ms_handle_accept(Connection *con) | |
1094 | { | |
11fdf7f2 TL |
1095 | entity_name_t n(con->get_peer_type(), con->get_peer_global_id()); |
1096 | std::lock_guard l(mds_lock); | |
7c673cae FG |
1097 | if (stopping) { |
1098 | return; | |
1099 | } | |
1100 | ||
11fdf7f2 TL |
1101 | // We allow connections and assign Session instances to connections |
1102 | // even if we have not been assigned a rank, because clients with | |
1103 | // "allow *" are allowed to connect and do 'tell' operations before | |
1104 | // we have a rank. | |
1105 | Session *s = NULL; | |
1106 | if (mds_rank) { | |
1107 | // If we do hold a rank, see if this is an existing client establishing | |
1108 | // a new connection, rather than a new client | |
1109 | s = mds_rank->sessionmap.get_session(n); | |
1110 | } | |
1111 | ||
1112 | // Wire up a Session* to this connection | |
1113 | // It doesn't go into a SessionMap instance until it sends an explicit | |
1114 | // request to open a session (initial state of Session is `closed`) | |
1115 | if (!s) { | |
1116 | s = new Session(con); | |
11fdf7f2 TL |
1117 | dout(10) << " new session " << s << " for " << s->info.inst |
1118 | << " con " << con << dendl; | |
1119 | con->set_priv(RefCountedPtr{s, false}); | |
1120 | if (mds_rank) { | |
1121 | mds_rank->kick_waiters_for_any_client_connection(); | |
1122 | } | |
1123 | } else { | |
1124 | dout(10) << " existing session " << s << " for " << s->info.inst | |
1125 | << " existing con " << s->get_connection() | |
1126 | << ", new/authorizing con " << con << dendl; | |
1127 | con->set_priv(RefCountedPtr{s}); | |
1128 | } | |
1129 | ||
1130 | parse_caps(con->get_peer_caps_info(), s->auth_caps); | |
1131 | ||
1132 | dout(10) << "ms_handle_accept " << con->get_peer_socket_addr() << " con " << con << " session " << s << dendl; | |
7c673cae | 1133 | if (s) { |
11fdf7f2 TL |
1134 | if (s->get_connection() != con) { |
1135 | dout(10) << " session connection " << s->get_connection() | |
1136 | << " -> " << con << dendl; | |
1137 | s->set_connection(con); | |
7c673cae FG |
1138 | |
1139 | // send out any queued messages | |
1140 | while (!s->preopen_out_queue.empty()) { | |
11fdf7f2 | 1141 | con->send_message2(s->preopen_out_queue.front()); |
7c673cae FG |
1142 | s->preopen_out_queue.pop_front(); |
1143 | } | |
1144 | } | |
7c673cae FG |
1145 | } |
1146 | } | |
1147 | ||
1148 | bool MDSDaemon::is_clean_shutdown() | |
1149 | { | |
1150 | if (mds_rank) { | |
1151 | return mds_rank->is_stopped(); | |
1152 | } else { | |
1153 | return true; | |
1154 | } | |
1155 | } |