]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <unistd.h> | |
16 | ||
17 | #include "include/compat.h" | |
7c673cae FG |
18 | #include "include/types.h" |
19 | #include "include/str_list.h" | |
c07f9fc5 | 20 | |
7c673cae | 21 | #include "common/Clock.h" |
c07f9fc5 FG |
22 | #include "common/HeartbeatMap.h" |
23 | #include "common/Timer.h" | |
7c673cae | 24 | #include "common/ceph_argparse.h" |
c07f9fc5 FG |
25 | #include "common/config.h" |
26 | #include "common/entity_name.h" | |
7c673cae | 27 | #include "common/errno.h" |
c07f9fc5 FG |
28 | #include "common/perf_counters.h" |
29 | #include "common/signal.h" | |
30 | #include "common/version.h" | |
31 | ||
32 | #include "global/signal_handler.h" | |
7c673cae FG |
33 | |
34 | #include "msg/Messenger.h" | |
35 | #include "mon/MonClient.h" | |
36 | ||
37 | #include "osdc/Objecter.h" | |
38 | ||
39 | #include "MDSMap.h" | |
40 | ||
41 | #include "MDSDaemon.h" | |
42 | #include "Server.h" | |
43 | #include "Locker.h" | |
44 | ||
45 | #include "SnapServer.h" | |
46 | #include "SnapClient.h" | |
47 | ||
7c673cae FG |
48 | #include "events/ESession.h" |
49 | #include "events/ESubtreeMap.h" | |
50 | ||
7c673cae FG |
51 | #include "auth/AuthAuthorizeHandler.h" |
52 | #include "auth/RotatingKeyRing.h" | |
53 | #include "auth/KeyRing.h" | |
54 | ||
7c673cae FG |
55 | #include "perfglue/cpu_profiler.h" |
56 | #include "perfglue/heap_profiler.h" | |
57 | ||
58 | #define dout_context g_ceph_context | |
59 | #define dout_subsys ceph_subsys_mds | |
60 | #undef dout_prefix | |
61 | #define dout_prefix *_dout << "mds." << name << ' ' | |
20effc67 TL |
62 | |
63 | using std::string; | |
64 | using std::vector; | |
9f95a23c | 65 | using TOPNSPC::common::cmd_getval; |
20effc67 | 66 | |
7c673cae | 67 | // cons/des |
f67539c2 TL |
68 | MDSDaemon::MDSDaemon(std::string_view n, Messenger *m, MonClient *mc, |
69 | boost::asio::io_context& ioctx) : | |
7c673cae | 70 | Dispatcher(m->cct), |
7c673cae | 71 | timer(m->cct, mds_lock), |
11fdf7f2 | 72 | gss_ktfile_client(m->cct->_conf.get_val<std::string>("gss_ktab_client_file")), |
7c673cae | 73 | beacon(m->cct, mc, n), |
7c673cae FG |
74 | name(n), |
75 | messenger(m), | |
76 | monc(mc), | |
f67539c2 | 77 | ioctx(ioctx), |
9f95a23c | 78 | mgrc(m->cct, m, &mc->monmap), |
7c673cae | 79 | log_client(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS), |
94b18763 | 80 | starttime(mono_clock::now()) |
7c673cae FG |
81 | { |
82 | orig_argc = 0; | |
83 | orig_argv = NULL; | |
84 | ||
85 | clog = log_client.create_channel(); | |
11fdf7f2 TL |
86 | if (!gss_ktfile_client.empty()) { |
87 | // Assert we can export environment variable | |
88 | /* | |
89 | The default client keytab is used, if it is present and readable, | |
90 | to automatically obtain initial credentials for GSSAPI client | |
91 | applications. The principal name of the first entry in the client | |
92 | keytab is used by default when obtaining initial credentials. | |
93 | 1. The KRB5_CLIENT_KTNAME environment variable. | |
94 | 2. The default_client_keytab_name profile variable in [libdefaults]. | |
95 | 3. The hardcoded default, DEFCKTNAME. | |
96 | */ | |
97 | const int32_t set_result(setenv("KRB5_CLIENT_KTNAME", | |
98 | gss_ktfile_client.c_str(), 1)); | |
99 | ceph_assert(set_result == 0); | |
100 | } | |
7c673cae | 101 | |
11fdf7f2 | 102 | mdsmap.reset(new MDSMap); |
7c673cae FG |
103 | } |
104 | ||
105 | MDSDaemon::~MDSDaemon() { | |
11fdf7f2 | 106 | std::lock_guard lock(mds_lock); |
7c673cae FG |
107 | |
108 | delete mds_rank; | |
109 | mds_rank = NULL; | |
7c673cae FG |
110 | } |
111 | ||
112 | class MDSSocketHook : public AdminSocketHook { | |
113 | MDSDaemon *mds; | |
114 | public: | |
115 | explicit MDSSocketHook(MDSDaemon *m) : mds(m) {} | |
9f95a23c TL |
116 | int call( |
117 | std::string_view command, | |
118 | const cmdmap_t& cmdmap, | |
39ae355f | 119 | const bufferlist&, |
9f95a23c TL |
120 | Formatter *f, |
121 | std::ostream& errss, | |
122 | ceph::buffer::list& out) override { | |
f67539c2 | 123 | ceph_abort("should go to call_async"); |
9f95a23c TL |
124 | } |
125 | void call_async( | |
126 | std::string_view command, | |
127 | const cmdmap_t& cmdmap, | |
128 | Formatter *f, | |
129 | const bufferlist& inbl, | |
130 | std::function<void(int,const std::string&,bufferlist&)> on_finish) override { | |
131 | mds->asok_command(command, cmdmap, f, inbl, on_finish); | |
7c673cae FG |
132 | } |
133 | }; | |
134 | ||
9f95a23c TL |
135 | void MDSDaemon::asok_command( |
136 | std::string_view command, | |
137 | const cmdmap_t& cmdmap, | |
138 | Formatter *f, | |
139 | const bufferlist& inbl, | |
140 | std::function<void(int,const std::string&,bufferlist&)> on_finish) | |
7c673cae | 141 | { |
9f95a23c TL |
142 | dout(1) << "asok_command: " << command << " " << cmdmap |
143 | << " (starting...)" << dendl; | |
7c673cae | 144 | |
f67539c2 | 145 | int r = -CEPHFS_ENOSYS; |
9f95a23c | 146 | bufferlist outbl; |
f67539c2 TL |
147 | CachedStackStringStream css; |
148 | auto& ss = *css; | |
7c673cae FG |
149 | if (command == "status") { |
150 | dump_status(f); | |
9f95a23c TL |
151 | r = 0; |
152 | } else if (command == "exit") { | |
153 | outbl.append("Exiting...\n"); | |
154 | r = 0; | |
155 | std::thread t([this](){ | |
156 | // Wait a little to improve chances of caller getting | |
157 | // our response before seeing us disappear from mdsmap | |
158 | sleep(1); | |
159 | std::lock_guard l(mds_lock); | |
160 | suicide(); | |
161 | }); | |
162 | t.detach(); | |
163 | } else if (command == "respawn") { | |
164 | outbl.append("Respawning...\n"); | |
165 | r = 0; | |
166 | std::thread t([this](){ | |
167 | // Wait a little to improve chances of caller getting | |
168 | // our response before seeing us disappear from mdsmap | |
169 | sleep(1); | |
170 | std::lock_guard l(mds_lock); | |
171 | respawn(); | |
172 | }); | |
173 | t.detach(); | |
174 | } else if (command == "heap") { | |
175 | if (!ceph_using_tcmalloc()) { | |
176 | ss << "not using tcmalloc"; | |
f67539c2 | 177 | r = -CEPHFS_EOPNOTSUPP; |
9f95a23c TL |
178 | } else { |
179 | string heapcmd; | |
180 | cmd_getval(cmdmap, "heapcmd", heapcmd); | |
181 | vector<string> heapcmd_vec; | |
182 | get_str_vec(heapcmd, heapcmd_vec); | |
183 | string value; | |
184 | if (cmd_getval(cmdmap, "value", value)) { | |
185 | heapcmd_vec.push_back(value); | |
186 | } | |
2a845540 TL |
187 | std::stringstream outss; |
188 | ceph_heap_profiler_handle_command(heapcmd_vec, outss); | |
189 | outbl.append(outss); | |
b3b6e05e | 190 | r = 0; |
9f95a23c TL |
191 | } |
192 | } else if (command == "cpu_profiler") { | |
193 | string arg; | |
194 | cmd_getval(cmdmap, "arg", arg); | |
195 | vector<string> argvec; | |
196 | get_str_vec(arg, argvec); | |
197 | cpu_profiler_handle_command(argvec, ss); | |
20effc67 | 198 | r = 0; |
7c673cae FG |
199 | } else { |
200 | if (mds_rank == NULL) { | |
201 | dout(1) << "Can't run that command on an inactive MDS!" << dendl; | |
202 | f->dump_string("error", "mds_not_active"); | |
203 | } else { | |
11fdf7f2 | 204 | try { |
9f95a23c TL |
205 | mds_rank->handle_asok_command(command, cmdmap, f, inbl, on_finish); |
206 | return; | |
207 | } catch (const TOPNSPC::common::bad_cmd_get& e) { | |
11fdf7f2 | 208 | ss << e.what(); |
f67539c2 | 209 | r = -CEPHFS_EINVAL; |
11fdf7f2 | 210 | } |
7c673cae FG |
211 | } |
212 | } | |
9f95a23c | 213 | on_finish(r, ss.str(), outbl); |
7c673cae FG |
214 | } |
215 | ||
216 | void MDSDaemon::dump_status(Formatter *f) | |
217 | { | |
218 | f->open_object_section("status"); | |
219 | f->dump_stream("cluster_fsid") << monc->get_fsid(); | |
220 | if (mds_rank) { | |
221 | f->dump_int("whoami", mds_rank->get_nodeid()); | |
222 | } else { | |
223 | f->dump_int("whoami", MDS_RANK_NONE); | |
224 | } | |
225 | ||
226 | f->dump_int("id", monc->get_global_id()); | |
227 | f->dump_string("want_state", ceph_mds_state_name(beacon.get_want_state())); | |
228 | f->dump_string("state", ceph_mds_state_name(mdsmap->get_state_gid(mds_gid_t( | |
229 | monc->get_global_id())))); | |
230 | if (mds_rank) { | |
11fdf7f2 | 231 | std::lock_guard l(mds_lock); |
7c673cae FG |
232 | mds_rank->dump_status(f); |
233 | } | |
234 | ||
235 | f->dump_unsigned("mdsmap_epoch", mdsmap->get_epoch()); | |
236 | if (mds_rank) { | |
237 | f->dump_unsigned("osdmap_epoch", mds_rank->get_osd_epoch()); | |
238 | f->dump_unsigned("osdmap_epoch_barrier", mds_rank->get_osd_epoch_barrier()); | |
239 | } else { | |
240 | f->dump_unsigned("osdmap_epoch", 0); | |
241 | f->dump_unsigned("osdmap_epoch_barrier", 0); | |
242 | } | |
94b18763 FG |
243 | |
244 | f->dump_float("uptime", get_uptime().count()); | |
245 | ||
7c673cae FG |
246 | f->close_section(); // status |
247 | } | |
248 | ||
249 | void MDSDaemon::set_up_admin_socket() | |
250 | { | |
251 | int r; | |
252 | AdminSocket *admin_socket = g_ceph_context->get_admin_socket(); | |
11fdf7f2 | 253 | ceph_assert(asok_hook == nullptr); |
7c673cae | 254 | asok_hook = new MDSSocketHook(this); |
9f95a23c | 255 | r = admin_socket->register_command("status", asok_hook, |
7c673cae | 256 | "high-level status of MDS"); |
11fdf7f2 | 257 | ceph_assert(r == 0); |
9f95a23c | 258 | r = admin_socket->register_command("dump_ops_in_flight", asok_hook, |
7c673cae | 259 | "show the ops currently in flight"); |
11fdf7f2 | 260 | ceph_assert(r == 0); |
9f95a23c | 261 | r = admin_socket->register_command("ops", asok_hook, |
7c673cae | 262 | "show the ops currently in flight"); |
11fdf7f2 | 263 | ceph_assert(r == 0); |
9f95a23c | 264 | r = admin_socket->register_command("dump_blocked_ops", |
7c673cae FG |
265 | asok_hook, |
266 | "show the blocked ops currently in flight"); | |
11fdf7f2 | 267 | ceph_assert(r == 0); |
9f95a23c | 268 | r = admin_socket->register_command("dump_historic_ops", |
7c673cae | 269 | asok_hook, |
11fdf7f2 TL |
270 | "show recent ops"); |
271 | ceph_assert(r == 0); | |
9f95a23c | 272 | r = admin_socket->register_command("dump_historic_ops_by_duration", |
7c673cae | 273 | asok_hook, |
11fdf7f2 TL |
274 | "show recent ops, sorted by op duration"); |
275 | ceph_assert(r == 0); | |
9f95a23c | 276 | r = admin_socket->register_command("scrub_path name=path,type=CephString " |
7c673cae | 277 | "name=scrubops,type=CephChoices," |
9f95a23c TL |
278 | "strings=force|recursive|repair,n=N,req=false " |
279 | "name=tag,type=CephString,req=false", | |
7c673cae FG |
280 | asok_hook, |
281 | "scrub an inode and output results"); | |
11fdf7f2 | 282 | ceph_assert(r == 0); |
9f95a23c TL |
283 | r = admin_socket->register_command("scrub start " |
284 | "name=path,type=CephString " | |
285 | "name=scrubops,type=CephChoices,strings=force|recursive|repair,n=N,req=false " | |
286 | "name=tag,type=CephString,req=false", | |
287 | asok_hook, | |
288 | "scrub and inode and output results"); | |
289 | ceph_assert(r == 0); | |
290 | r = admin_socket->register_command("scrub abort", | |
291 | asok_hook, | |
292 | "Abort in progress scrub operations(s)"); | |
293 | ceph_assert(r == 0); | |
294 | r = admin_socket->register_command("scrub pause", | |
295 | asok_hook, | |
296 | "Pause in progress scrub operations(s)"); | |
297 | ceph_assert(r == 0); | |
298 | r = admin_socket->register_command("scrub resume", | |
299 | asok_hook, | |
300 | "Resume paused scrub operations(s)"); | |
301 | ceph_assert(r == 0); | |
302 | r = admin_socket->register_command("scrub status", | |
303 | asok_hook, | |
304 | "Status of scrub operations(s)"); | |
305 | ceph_assert(r == 0); | |
306 | r = admin_socket->register_command("tag path name=path,type=CephString" | |
7c673cae FG |
307 | " name=tag,type=CephString", |
308 | asok_hook, | |
309 | "Apply scrub tag recursively"); | |
11fdf7f2 | 310 | ceph_assert(r == 0); |
9f95a23c | 311 | r = admin_socket->register_command("flush_path name=path,type=CephString", |
7c673cae FG |
312 | asok_hook, |
313 | "flush an inode (and its dirfrags)"); | |
11fdf7f2 | 314 | ceph_assert(r == 0); |
9f95a23c | 315 | r = admin_socket->register_command("export dir " |
7c673cae FG |
316 | "name=path,type=CephString " |
317 | "name=rank,type=CephInt", | |
318 | asok_hook, | |
319 | "migrate a subtree to named MDS"); | |
11fdf7f2 | 320 | ceph_assert(r == 0); |
20effc67 TL |
321 | r = admin_socket->register_command("dump cache " |
322 | "name=path,type=CephString,req=false " | |
323 | "name=timeout,type=CephInt,range=0,req=false", | |
7c673cae FG |
324 | asok_hook, |
325 | "dump metadata cache (optionally to a file)"); | |
11fdf7f2 | 326 | ceph_assert(r == 0); |
9f95a23c TL |
327 | r = admin_socket->register_command("cache drop " |
328 | "name=timeout,type=CephInt,range=0,req=false", | |
329 | asok_hook, | |
330 | "trim cache and optionally request client to release all caps and flush the journal"); | |
331 | ceph_assert(r == 0); | |
181888fb | 332 | r = admin_socket->register_command("cache status", |
181888fb FG |
333 | asok_hook, |
334 | "show cache status"); | |
11fdf7f2 | 335 | ceph_assert(r == 0); |
9f95a23c | 336 | r = admin_socket->register_command("dump tree " |
7c673cae FG |
337 | "name=root,type=CephString,req=true " |
338 | "name=depth,type=CephInt,req=false ", | |
339 | asok_hook, | |
340 | "dump metadata cache for subtree"); | |
11fdf7f2 | 341 | ceph_assert(r == 0); |
28e407b8 | 342 | r = admin_socket->register_command("dump loads", |
28e407b8 AA |
343 | asok_hook, |
344 | "dump metadata loads"); | |
11fdf7f2 | 345 | ceph_assert(r == 0); |
9f95a23c | 346 | r = admin_socket->register_command("dump snaps name=server,type=CephChoices,strings=--server,req=false", |
11fdf7f2 TL |
347 | asok_hook, |
348 | "dump snapshots"); | |
349 | ceph_assert(r == 0); | |
adb31ebb TL |
350 | r = admin_socket->register_command("session ls " |
351 | "name=cap_dump,type=CephBool,req=false " | |
352 | "name=filters,type=CephString,n=N,req=false ", | |
9f95a23c TL |
353 | asok_hook, |
354 | "List client sessions based on a filter"); | |
355 | ceph_assert(r == 0); | |
adb31ebb TL |
356 | r = admin_socket->register_command("client ls " |
357 | "name=cap_dump,type=CephBool,req=false " | |
358 | "name=filters,type=CephString,n=N,req=false ", | |
9f95a23c TL |
359 | asok_hook, |
360 | "List client sessions based on a filter"); | |
361 | ceph_assert(r == 0); | |
362 | r = admin_socket->register_command("session evict name=filters,type=CephString,n=N,req=false", | |
363 | asok_hook, | |
364 | "Evict client session(s) based on a filter"); | |
365 | ceph_assert(r == 0); | |
366 | r = admin_socket->register_command("client evict name=filters,type=CephString,n=N,req=false", | |
7c673cae | 367 | asok_hook, |
9f95a23c TL |
368 | "Evict client session(s) based on a filter"); |
369 | ceph_assert(r == 0); | |
370 | r = admin_socket->register_command("session kill name=client_id,type=CephString", | |
371 | asok_hook, | |
372 | "Evict a client session by id"); | |
11fdf7f2 | 373 | ceph_assert(r == 0); |
adb31ebb | 374 | r = admin_socket->register_command("session ls name=cap_dump,type=CephBool,req=false", |
7c673cae FG |
375 | asok_hook, |
376 | "Enumerate connected CephFS clients"); | |
11fdf7f2 | 377 | ceph_assert(r == 0); |
9f95a23c TL |
378 | r = admin_socket->register_command("session config " |
379 | "name=client_id,type=CephInt,req=true " | |
380 | "name=option,type=CephString,req=true " | |
381 | "name=value,type=CephString,req=false ", | |
382 | asok_hook, | |
383 | "Config a CephFS client session"); | |
20effc67 | 384 | ceph_assert(r == 0); |
9f95a23c TL |
385 | r = admin_socket->register_command("client config " |
386 | "name=client_id,type=CephInt,req=true " | |
92f5a8d4 TL |
387 | "name=option,type=CephString,req=true " |
388 | "name=value,type=CephString,req=false ", | |
389 | asok_hook, | |
390 | "Config a CephFS client session"); | |
20effc67 | 391 | ceph_assert(r == 0); |
9f95a23c TL |
392 | r = admin_socket->register_command("damage ls", |
393 | asok_hook, | |
394 | "List detected metadata damage"); | |
20effc67 | 395 | ceph_assert(r == 0); |
9f95a23c TL |
396 | r = admin_socket->register_command("damage rm " |
397 | "name=damage_id,type=CephInt", | |
398 | asok_hook, | |
399 | "Remove a damage table entry"); | |
20effc67 | 400 | ceph_assert(r == 0); |
9f95a23c | 401 | r = admin_socket->register_command("osdmap barrier name=target_epoch,type=CephInt", |
92f5a8d4 TL |
402 | asok_hook, |
403 | "Wait until the MDS has this OSD map epoch"); | |
404 | ceph_assert(r == 0); | |
7c673cae | 405 | r = admin_socket->register_command("flush journal", |
7c673cae FG |
406 | asok_hook, |
407 | "Flush the journal to the backing store"); | |
11fdf7f2 | 408 | ceph_assert(r == 0); |
7c673cae | 409 | r = admin_socket->register_command("force_readonly", |
7c673cae FG |
410 | asok_hook, |
411 | "Force MDS to read-only mode"); | |
11fdf7f2 | 412 | ceph_assert(r == 0); |
7c673cae | 413 | r = admin_socket->register_command("get subtrees", |
7c673cae FG |
414 | asok_hook, |
415 | "Return the subtree map"); | |
11fdf7f2 | 416 | ceph_assert(r == 0); |
9f95a23c | 417 | r = admin_socket->register_command("dirfrag split " |
7c673cae FG |
418 | "name=path,type=CephString,req=true " |
419 | "name=frag,type=CephString,req=true " | |
420 | "name=bits,type=CephInt,req=true ", | |
421 | asok_hook, | |
422 | "Fragment directory by path"); | |
11fdf7f2 | 423 | ceph_assert(r == 0); |
9f95a23c | 424 | r = admin_socket->register_command("dirfrag merge " |
7c673cae FG |
425 | "name=path,type=CephString,req=true " |
426 | "name=frag,type=CephString,req=true", | |
427 | asok_hook, | |
428 | "De-fragment directory by path"); | |
11fdf7f2 | 429 | ceph_assert(r == 0); |
9f95a23c | 430 | r = admin_socket->register_command("dirfrag ls " |
7c673cae FG |
431 | "name=path,type=CephString,req=true", |
432 | asok_hook, | |
433 | "List fragments in directory"); | |
11fdf7f2 TL |
434 | ceph_assert(r == 0); |
435 | r = admin_socket->register_command("openfiles ls", | |
11fdf7f2 TL |
436 | asok_hook, |
437 | "List the opening files and their caps"); | |
438 | ceph_assert(r == 0); | |
9f95a23c | 439 | r = admin_socket->register_command("dump inode " |
11fdf7f2 TL |
440 | "name=number,type=CephInt,req=true", |
441 | asok_hook, | |
442 | "dump inode by inode number"); | |
443 | ceph_assert(r == 0); | |
9f95a23c TL |
444 | r = admin_socket->register_command("exit", |
445 | asok_hook, | |
446 | "Terminate this MDS"); | |
447 | r = admin_socket->register_command("respawn", | |
448 | asok_hook, | |
449 | "Respawn this MDS"); | |
450 | ceph_assert(r == 0); | |
451 | r = admin_socket->register_command( | |
452 | "heap " \ | |
453 | "name=heapcmd,type=CephChoices,strings=" \ | |
454 | "dump|start_profiler|stop_profiler|release|get_release_rate|set_release_rate|stats " \ | |
455 | "name=value,type=CephString,req=false", | |
456 | asok_hook, | |
457 | "show heap usage info (available only if compiled with tcmalloc)"); | |
458 | ceph_assert(r == 0); | |
459 | r = admin_socket->register_command( | |
460 | "cpu_profiler " \ | |
461 | "name=arg,type=CephChoices,strings=status|flush", | |
462 | asok_hook, | |
463 | "run cpu profiling on daemon"); | |
464 | ceph_assert(r == 0); | |
7c673cae FG |
465 | } |
466 | ||
467 | void MDSDaemon::clean_up_admin_socket() | |
468 | { | |
11fdf7f2 | 469 | g_ceph_context->get_admin_socket()->unregister_commands(asok_hook); |
7c673cae FG |
470 | delete asok_hook; |
471 | asok_hook = NULL; | |
472 | } | |
473 | ||
7c673cae FG |
474 | int MDSDaemon::init() |
475 | { | |
f67539c2 TL |
476 | #ifdef _WIN32 |
477 | // Some file related flags and types are stubbed on Windows. In order to avoid | |
478 | // incorrect behavior, we're going to prevent the MDS from running on Windows | |
479 | // until those limitations are addressed. MDS clients, however, are allowed | |
480 | // to run on Windows. | |
481 | derr << "The Ceph MDS does not support running on Windows at the moment." | |
482 | << dendl; | |
483 | return -CEPHFS_ENOSYS; | |
484 | #endif // _WIN32 | |
485 | ||
486 | dout(10) << "Dumping misc struct sizes:" << dendl; | |
7c673cae FG |
487 | dout(10) << sizeof(MDSCacheObject) << "\tMDSCacheObject" << dendl; |
488 | dout(10) << sizeof(CInode) << "\tCInode" << dendl; | |
f67539c2 TL |
489 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; |
490 | dout(10) << sizeof(CInode::mempool_inode) << "\tinode" << dendl; | |
491 | dout(10) << sizeof(CInode::mempool_old_inode) << "\told_inode" << dendl; | |
492 | dout(10) << sizeof(nest_info_t) << "\tnest_info_t" << dendl; | |
493 | dout(10) << sizeof(frag_info_t) << "\tfrag_info_t" << dendl; | |
494 | dout(10) << sizeof(SimpleLock) << "\tSimpleLock" << dendl; | |
495 | dout(10) << sizeof(ScatterLock) << "\tScatterLock" << dendl; | |
7c673cae | 496 | dout(10) << sizeof(CDentry) << "\tCDentry" << dendl; |
f67539c2 TL |
497 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; |
498 | dout(10) << sizeof(SimpleLock) << "\tSimpleLock" << dendl; | |
499 | dout(10) << sizeof(CDir) << "\tCDir" << dendl; | |
500 | dout(10) << sizeof(elist<void*>::item) << "\telist<>::item" << dendl; | |
501 | dout(10) << sizeof(fnode_t) << "\tfnode_t" << dendl; | |
502 | dout(10) << sizeof(nest_info_t) << "\tnest_info_t" << dendl; | |
503 | dout(10) << sizeof(frag_info_t) << "\tfrag_info_t" << dendl; | |
504 | dout(10) << sizeof(Capability) << "\tCapability" << dendl; | |
505 | dout(10) << sizeof(xlist<void*>::item) << "\txlist<>::item" << dendl; | |
7c673cae FG |
506 | |
507 | messenger->add_dispatcher_tail(&beacon); | |
508 | messenger->add_dispatcher_tail(this); | |
509 | ||
11fdf7f2 | 510 | // init monc |
7c673cae FG |
511 | monc->set_messenger(messenger); |
512 | ||
513 | monc->set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | | |
514 | CEPH_ENTITY_TYPE_MDS | CEPH_ENTITY_TYPE_MGR); | |
515 | int r = 0; | |
516 | r = monc->init(); | |
517 | if (r < 0) { | |
11fdf7f2 | 518 | derr << "ERROR: failed to init monc: " << cpp_strerror(-r) << dendl; |
9f95a23c | 519 | mds_lock.lock(); |
7c673cae | 520 | suicide(); |
9f95a23c | 521 | mds_lock.unlock(); |
7c673cae FG |
522 | return r; |
523 | } | |
524 | ||
11fdf7f2 TL |
525 | messenger->set_auth_client(monc); |
526 | messenger->set_auth_server(monc); | |
527 | monc->set_handle_authentication_dispatcher(this); | |
528 | ||
7c673cae FG |
529 | // tell monc about log_client so it will know about mon session resets |
530 | monc->set_log_client(&log_client); | |
531 | ||
532 | r = monc->authenticate(); | |
533 | if (r < 0) { | |
534 | derr << "ERROR: failed to authenticate: " << cpp_strerror(-r) << dendl; | |
9f95a23c | 535 | mds_lock.lock(); |
7c673cae | 536 | suicide(); |
9f95a23c | 537 | mds_lock.unlock(); |
7c673cae FG |
538 | return r; |
539 | } | |
540 | ||
541 | int rotating_auth_attempts = 0; | |
11fdf7f2 TL |
542 | auto rotating_auth_timeout = |
543 | g_conf().get_val<int64_t>("rotating_keys_bootstrap_timeout"); | |
544 | while (monc->wait_auth_rotating(rotating_auth_timeout) < 0) { | |
545 | if (++rotating_auth_attempts <= g_conf()->max_rotating_auth_attempts) { | |
7c673cae FG |
546 | derr << "unable to obtain rotating service keys; retrying" << dendl; |
547 | continue; | |
548 | } | |
549 | derr << "ERROR: failed to refresh rotating keys, " | |
39ae355f TL |
550 | << "maximum retry time reached." |
551 | << " Maybe I have a clock skew against the monitors?" << dendl; | |
9f95a23c | 552 | std::lock_guard locker{mds_lock}; |
7c673cae | 553 | suicide(); |
f67539c2 | 554 | return -CEPHFS_ETIMEDOUT; |
7c673cae FG |
555 | } |
556 | ||
9f95a23c | 557 | mds_lock.lock(); |
7c673cae FG |
558 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) { |
559 | dout(4) << __func__ << ": terminated already, dropping out" << dendl; | |
9f95a23c | 560 | mds_lock.unlock(); |
7c673cae FG |
561 | return 0; |
562 | } | |
563 | ||
564 | monc->sub_want("mdsmap", 0, 0); | |
7c673cae FG |
565 | monc->renew_subs(); |
566 | ||
9f95a23c | 567 | mds_lock.unlock(); |
7c673cae FG |
568 | |
569 | // Set up admin socket before taking mds_lock, so that ordering | |
570 | // is consistent (later we take mds_lock within asok callbacks) | |
571 | set_up_admin_socket(); | |
9f95a23c | 572 | std::lock_guard locker{mds_lock}; |
7c673cae FG |
573 | if (beacon.get_want_state() == MDSMap::STATE_DNE) { |
574 | suicide(); // we could do something more graceful here | |
575 | dout(4) << __func__ << ": terminated already, dropping out" << dendl; | |
7c673cae FG |
576 | return 0; |
577 | } | |
578 | ||
579 | timer.init(); | |
580 | ||
11fdf7f2 | 581 | beacon.init(*mdsmap); |
7c673cae FG |
582 | messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE)); |
583 | ||
584 | // schedule tick | |
585 | reset_tick(); | |
7c673cae FG |
586 | return 0; |
587 | } | |
588 | ||
589 | void MDSDaemon::reset_tick() | |
590 | { | |
591 | // cancel old | |
592 | if (tick_event) timer.cancel_event(tick_event); | |
593 | ||
594 | // schedule | |
3efd9988 | 595 | tick_event = timer.add_event_after( |
11fdf7f2 | 596 | g_conf()->mds_tick_interval, |
9f95a23c TL |
597 | new LambdaContext([this](int) { |
598 | ceph_assert(ceph_mutex_is_locked_by_me(mds_lock)); | |
3efd9988 FG |
599 | tick(); |
600 | })); | |
7c673cae FG |
601 | } |
602 | ||
603 | void MDSDaemon::tick() | |
604 | { | |
7c673cae FG |
605 | // reschedule |
606 | reset_tick(); | |
607 | ||
608 | // Call through to subsystems' tick functions | |
609 | if (mds_rank) { | |
610 | mds_rank->tick(); | |
611 | } | |
612 | } | |
613 | ||
9f95a23c | 614 | void MDSDaemon::handle_command(const cref_t<MCommand> &m) |
7c673cae | 615 | { |
11fdf7f2 TL |
616 | auto priv = m->get_connection()->get_priv(); |
617 | auto session = static_cast<Session *>(priv.get()); | |
618 | ceph_assert(session != NULL); | |
9f95a23c TL |
619 | |
620 | int r = 0; | |
621 | cmdmap_t cmdmap; | |
f67539c2 TL |
622 | CachedStackStringStream css; |
623 | auto& ss = *css; | |
9f95a23c TL |
624 | bufferlist outbl; |
625 | ||
7c673cae FG |
626 | // If someone is using a closed session for sending commands (e.g. |
627 | // the ceph CLI) then we should feel free to clean up this connection | |
628 | // as soon as we've sent them a response. | |
94b18763 FG |
629 | const bool live_session = |
630 | session->get_state_seq() > 0 && | |
631 | mds_rank && | |
632 | mds_rank->sessionmap.get_session(session->info.inst.name); | |
7c673cae FG |
633 | |
634 | if (!live_session) { | |
635 | // This session only existed to issue commands, so terminate it | |
636 | // as soon as we can. | |
11fdf7f2 TL |
637 | ceph_assert(session->is_closed()); |
638 | session->get_connection()->mark_disposable(); | |
7c673cae | 639 | } |
11fdf7f2 | 640 | priv.reset(); |
7c673cae | 641 | |
7c673cae FG |
642 | if (!session->auth_caps.allow_all()) { |
643 | dout(1) << __func__ | |
644 | << ": received command from client without `tell` capability: " | |
11fdf7f2 | 645 | << *m->get_connection()->peer_addrs << dendl; |
7c673cae FG |
646 | |
647 | ss << "permission denied"; | |
f67539c2 | 648 | r = -CEPHFS_EACCES; |
7c673cae | 649 | } else if (m->cmd.empty()) { |
f67539c2 | 650 | r = -CEPHFS_EINVAL; |
7c673cae | 651 | ss << "no command given"; |
9f95a23c | 652 | } else if (!TOPNSPC::common::cmdmap_from_json(m->cmd, &cmdmap, ss)) { |
f67539c2 | 653 | r = -CEPHFS_EINVAL; |
7c673cae | 654 | } else { |
9f95a23c TL |
655 | cct->get_admin_socket()->queue_tell_command(m); |
656 | return; | |
7c673cae FG |
657 | } |
658 | ||
f67539c2 | 659 | auto reply = make_message<MCommandReply>(r, ss.str()); |
9f95a23c TL |
660 | reply->set_tid(m->get_tid()); |
661 | reply->set_data(outbl); | |
662 | m->get_connection()->send_message2(reply); | |
7c673cae FG |
663 | } |
664 | ||
9f95a23c | 665 | void MDSDaemon::handle_mds_map(const cref_t<MMDSMap> &m) |
7c673cae FG |
666 | { |
667 | version_t epoch = m->get_epoch(); | |
7c673cae FG |
668 | |
669 | // is it new? | |
670 | if (epoch <= mdsmap->get_epoch()) { | |
1adf2230 AA |
671 | dout(5) << "handle_mds_map old map epoch " << epoch << " <= " |
672 | << mdsmap->get_epoch() << ", discarding" << dendl; | |
7c673cae FG |
673 | return; |
674 | } | |
675 | ||
1adf2230 AA |
676 | dout(1) << "Updating MDS map to version " << epoch << " from " << m->get_source() << dendl; |
677 | ||
7c673cae | 678 | // keep old map, for a moment |
11fdf7f2 TL |
679 | std::unique_ptr<MDSMap> oldmap; |
680 | oldmap.swap(mdsmap); | |
7c673cae FG |
681 | |
682 | // decode and process | |
11fdf7f2 | 683 | mdsmap.reset(new MDSMap); |
7c673cae | 684 | mdsmap->decode(m->get_encoded()); |
7c673cae FG |
685 | |
686 | monc->sub_got("mdsmap", mdsmap->get_epoch()); | |
687 | ||
7c673cae | 688 | // verify compatset |
1adf2230 | 689 | CompatSet mdsmap_compat(MDSMap::get_compat_set_all()); |
7c673cae FG |
690 | dout(10) << " my compat " << mdsmap_compat << dendl; |
691 | dout(10) << " mdsmap compat " << mdsmap->compat << dendl; | |
692 | if (!mdsmap_compat.writeable(mdsmap->compat)) { | |
693 | dout(0) << "handle_mds_map mdsmap compatset " << mdsmap->compat | |
694 | << " not writeable with daemon features " << mdsmap_compat | |
695 | << ", killing myself" << dendl; | |
696 | suicide(); | |
9f95a23c | 697 | return; |
7c673cae FG |
698 | } |
699 | ||
9f95a23c TL |
700 | // Calculate my effective rank (either my owned rank or the rank I'm following if STATE_STANDBY_REPLAY |
701 | const auto addrs = messenger->get_myaddrs(); | |
702 | const auto myid = monc->get_global_id(); | |
703 | const auto mygid = mds_gid_t(myid); | |
704 | const auto whoami = mdsmap->get_rank_gid(mygid); | |
705 | const auto old_state = oldmap->get_state_gid(mygid); | |
706 | const auto new_state = mdsmap->get_state_gid(mygid); | |
707 | const auto incarnation = mdsmap->get_inc_gid(mygid); | |
708 | dout(10) << "my gid is " << myid << dendl; | |
11fdf7f2 | 709 | dout(10) << "map says I am mds." << whoami << "." << incarnation |
7c673cae | 710 | << " state " << ceph_mds_state_name(new_state) << dendl; |
9f95a23c TL |
711 | dout(10) << "msgr says I am " << addrs << dendl; |
712 | ||
713 | // If we're removed from the MDSMap, stop all processing. | |
714 | using DS = MDSMap::DaemonState; | |
715 | if (old_state != DS::STATE_NULL && new_state == DS::STATE_NULL) { | |
716 | const auto& oldinfo = oldmap->get_info_gid(mygid); | |
717 | dout(1) << "Map removed me " << oldinfo | |
718 | << " from cluster; respawning! See cluster/monitor logs for details." << dendl; | |
719 | respawn(); | |
720 | } | |
721 | ||
722 | if (old_state == DS::STATE_NULL && new_state != DS::STATE_NULL) { | |
723 | /* The MDS has been added to the FSMap, now we can init the MgrClient */ | |
724 | mgrc.init(); | |
725 | messenger->add_dispatcher_tail(&mgrc); | |
726 | monc->sub_want("mgrmap", 0, 0); | |
727 | monc->renew_subs(); /* MgrMap receipt drives connection to ceph-mgr */ | |
728 | } | |
7c673cae | 729 | |
9f95a23c TL |
730 | // mark down any failed peers |
731 | for (const auto& [gid, info] : oldmap->get_mds_info()) { | |
732 | if (mdsmap->get_mds_info().count(gid) == 0) { | |
733 | dout(10) << " peer mds gid " << gid << " removed from map" << dendl; | |
734 | messenger->mark_down_addrs(info.addrs); | |
735 | } | |
736 | } | |
11fdf7f2 | 737 | |
7c673cae | 738 | if (whoami == MDS_RANK_NONE) { |
9f95a23c TL |
739 | // We do not hold a rank: |
740 | dout(10) << __func__ << ": handling map in rankless mode" << dendl; | |
7c673cae | 741 | |
9f95a23c TL |
742 | if (new_state == DS::STATE_STANDBY) { |
743 | /* Note: STATE_BOOT is never an actual state in the FSMap. The Monitors | |
744 | * generally mark a new MDS as STANDBY (although it's possible to | |
745 | * immediately be assigned a rank). | |
746 | */ | |
747 | if (old_state == DS::STATE_NULL) { | |
748 | dout(1) << "Monitors have assigned me to become a standby." << dendl; | |
749 | beacon.set_want_state(*mdsmap, new_state); | |
750 | } else if (old_state == DS::STATE_STANDBY) { | |
751 | dout(5) << "I am still standby" << dendl; | |
752 | } | |
753 | } else if (new_state == DS::STATE_NULL) { | |
754 | /* We are not in the MDSMap yet! Keep waiting: */ | |
755 | ceph_assert(beacon.get_want_state() == DS::STATE_BOOT); | |
756 | dout(10) << "not in map yet" << dendl; | |
757 | } else { | |
758 | /* We moved to standby somehow from another state */ | |
759 | ceph_abort("invalid transition to standby"); | |
7c673cae | 760 | } |
7c673cae | 761 | } else { |
7c673cae FG |
762 | // Did we already hold a different rank? MDSMonitor shouldn't try |
763 | // to change that out from under me! | |
764 | if (mds_rank && whoami != mds_rank->get_nodeid()) { | |
765 | derr << "Invalid rank transition " << mds_rank->get_nodeid() << "->" | |
766 | << whoami << dendl; | |
767 | respawn(); | |
768 | } | |
769 | ||
770 | // Did I previously not hold a rank? Initialize! | |
771 | if (mds_rank == NULL) { | |
20effc67 | 772 | mds_rank = new MDSRankDispatcher(whoami, mds_lock, clog, |
9f95a23c TL |
773 | timer, beacon, mdsmap, messenger, monc, &mgrc, |
774 | new LambdaContext([this](int r){respawn();}), | |
f67539c2 TL |
775 | new LambdaContext([this](int r){suicide();}), |
776 | ioctx); | |
7c673cae FG |
777 | dout(10) << __func__ << ": initializing MDS rank " |
778 | << mds_rank->get_nodeid() << dendl; | |
779 | mds_rank->init(); | |
780 | } | |
781 | ||
782 | // MDSRank is active: let him process the map, we have no say. | |
783 | dout(10) << __func__ << ": handling map as rank " | |
784 | << mds_rank->get_nodeid() << dendl; | |
11fdf7f2 | 785 | mds_rank->handle_mds_map(m, *oldmap); |
7c673cae FG |
786 | } |
787 | ||
11fdf7f2 | 788 | beacon.notify_mdsmap(*mdsmap); |
7c673cae FG |
789 | } |
790 | ||
7c673cae FG |
791 | void MDSDaemon::handle_signal(int signum) |
792 | { | |
11fdf7f2 | 793 | ceph_assert(signum == SIGINT || signum == SIGTERM); |
7c673cae FG |
794 | derr << "*** got signal " << sig_str(signum) << " ***" << dendl; |
795 | { | |
11fdf7f2 | 796 | std::lock_guard l(mds_lock); |
7c673cae FG |
797 | if (stopping) { |
798 | return; | |
799 | } | |
800 | suicide(); | |
801 | } | |
802 | } | |
803 | ||
804 | void MDSDaemon::suicide() | |
805 | { | |
9f95a23c | 806 | ceph_assert(ceph_mutex_is_locked(mds_lock)); |
7c673cae FG |
807 | |
808 | // make sure we don't suicide twice | |
11fdf7f2 | 809 | ceph_assert(stopping == false); |
7c673cae FG |
810 | stopping = true; |
811 | ||
1adf2230 | 812 | dout(1) << "suicide! Wanted state " |
7c673cae FG |
813 | << ceph_mds_state_name(beacon.get_want_state()) << dendl; |
814 | ||
815 | if (tick_event) { | |
816 | timer.cancel_event(tick_event); | |
817 | tick_event = 0; | |
818 | } | |
819 | ||
7c673cae FG |
820 | clean_up_admin_socket(); |
821 | ||
9f95a23c TL |
822 | // Notify the Monitors (MDSMonitor) that we're dying, so that it doesn't have |
823 | // to wait for us to go laggy. Only do this if we're actually in the MDSMap, | |
824 | // because otherwise the MDSMonitor will drop our message. | |
11fdf7f2 | 825 | beacon.set_want_state(*mdsmap, MDSMap::STATE_DNE); |
7c673cae | 826 | if (!mdsmap->is_dne_gid(mds_gid_t(monc->get_global_id()))) { |
7c673cae FG |
827 | beacon.send_and_wait(1); |
828 | } | |
829 | beacon.shutdown(); | |
830 | ||
9f95a23c TL |
831 | if (mgrc.is_initialized()) |
832 | mgrc.shutdown(); | |
7c673cae FG |
833 | |
834 | if (mds_rank) { | |
835 | mds_rank->shutdown(); | |
836 | } else { | |
837 | timer.shutdown(); | |
838 | ||
839 | monc->shutdown(); | |
840 | messenger->shutdown(); | |
841 | } | |
842 | } | |
843 | ||
844 | void MDSDaemon::respawn() | |
845 | { | |
11fdf7f2 TL |
846 | // --- WARNING TO FUTURE COPY/PASTERS --- |
847 | // You must also add a call like | |
848 | // | |
849 | // ceph_pthread_setname(pthread_self(), "ceph-mds"); | |
850 | // | |
851 | // to main() so that /proc/$pid/stat field 2 contains "(ceph-mds)" | |
852 | // instead of "(exe)", so that killall (and log rotation) will work. | |
853 | ||
1adf2230 AA |
854 | dout(1) << "respawn!" << dendl; |
855 | ||
856 | /* Dump recent in case the MDS was stuck doing something which caused it to | |
857 | * be removed from the MDSMap leading to respawn. */ | |
858 | g_ceph_context->_log->dump_recent(); | |
7c673cae | 859 | |
f67539c2 TL |
860 | /* valgrind can't handle execve; just exit and let QA infra restart */ |
861 | if (g_conf().get_val<bool>("mds_valgrind_exit")) { | |
862 | _exit(0); | |
863 | } | |
864 | ||
7c673cae FG |
865 | char *new_argv[orig_argc+1]; |
866 | dout(1) << " e: '" << orig_argv[0] << "'" << dendl; | |
867 | for (int i=0; i<orig_argc; i++) { | |
868 | new_argv[i] = (char *)orig_argv[i]; | |
869 | dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl; | |
870 | } | |
871 | new_argv[orig_argc] = NULL; | |
872 | ||
873 | /* Determine the path to our executable, test if Linux /proc/self/exe exists. | |
874 | * This allows us to exec the same executable even if it has since been | |
875 | * unlinked. | |
876 | */ | |
877 | char exe_path[PATH_MAX] = ""; | |
11fdf7f2 TL |
878 | #ifdef PROCPREFIX |
879 | if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) != -1) { | |
880 | dout(1) << "respawning with exe " << exe_path << dendl; | |
881 | strcpy(exe_path, PROCPREFIX "/proc/self/exe"); | |
882 | } else { | |
883 | #else | |
884 | { | |
885 | #endif | |
7c673cae FG |
886 | /* Print CWD for the user's interest */ |
887 | char buf[PATH_MAX]; | |
888 | char *cwd = getcwd(buf, sizeof(buf)); | |
11fdf7f2 | 889 | ceph_assert(cwd); |
7c673cae FG |
890 | dout(1) << " cwd " << cwd << dendl; |
891 | ||
892 | /* Fall back to a best-effort: just running in our CWD */ | |
893 | strncpy(exe_path, orig_argv[0], PATH_MAX-1); | |
7c673cae FG |
894 | } |
895 | ||
896 | dout(1) << " exe_path " << exe_path << dendl; | |
897 | ||
898 | unblock_all_signals(NULL); | |
899 | execv(exe_path, new_argv); | |
900 | ||
901 | dout(0) << "respawn execv " << orig_argv[0] | |
902 | << " failed with " << cpp_strerror(errno) << dendl; | |
903 | ||
904 | // We have to assert out here, because suicide() returns, and callers | |
905 | // to respawn expect it never to return. | |
906 | ceph_abort(); | |
907 | } | |
908 | ||
909 | ||
910 | ||
9f95a23c | 911 | bool MDSDaemon::ms_dispatch2(const ref_t<Message> &m) |
7c673cae | 912 | { |
11fdf7f2 | 913 | std::lock_guard l(mds_lock); |
7c673cae FG |
914 | if (stopping) { |
915 | return false; | |
916 | } | |
917 | ||
918 | // Drop out early if shutting down | |
919 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) { | |
920 | dout(10) << " stopping, discarding " << *m << dendl; | |
7c673cae FG |
921 | return true; |
922 | } | |
923 | ||
924 | // First see if it's a daemon message | |
925 | const bool handled_core = handle_core_message(m); | |
926 | if (handled_core) { | |
927 | return true; | |
928 | } | |
929 | ||
930 | // Not core, try it as a rank message | |
931 | if (mds_rank) { | |
932 | return mds_rank->ms_dispatch(m); | |
933 | } else { | |
934 | return false; | |
935 | } | |
936 | } | |
937 | ||
7c673cae FG |
938 | /* |
939 | * high priority messages we always process | |
940 | */ | |
f6b5b4d7 TL |
941 | |
942 | #define ALLOW_MESSAGES_FROM(peers) \ | |
943 | do { \ | |
944 | if (m->get_connection() && (m->get_connection()->get_peer_type() & (peers)) == 0) { \ | |
945 | dout(0) << __FILE__ << "." << __LINE__ << ": filtered out request, peer=" \ | |
946 | << m->get_connection()->get_peer_type() << " allowing=" \ | |
947 | << #peers << " message=" << *m << dendl; \ | |
948 | return true; \ | |
949 | } \ | |
950 | } while (0) | |
951 | ||
9f95a23c | 952 | bool MDSDaemon::handle_core_message(const cref_t<Message> &m) |
7c673cae FG |
953 | { |
954 | switch (m->get_type()) { | |
955 | case CEPH_MSG_MON_MAP: | |
956 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
7c673cae FG |
957 | break; |
958 | ||
959 | // MDS | |
960 | case CEPH_MSG_MDS_MAP: | |
961 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_MDS); | |
9f95a23c TL |
962 | handle_mds_map(ref_cast<MMDSMap>(m)); |
963 | break; | |
964 | ||
965 | case MSG_REMOVE_SNAPS: | |
966 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
967 | mds_rank->snapserver->handle_remove_snaps(ref_cast<MRemoveSnaps>(m)); | |
7c673cae FG |
968 | break; |
969 | ||
970 | // OSD | |
971 | case MSG_COMMAND: | |
9f95a23c | 972 | handle_command(ref_cast<MCommand>(m)); |
7c673cae FG |
973 | break; |
974 | case CEPH_MSG_OSD_MAP: | |
975 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD); | |
976 | ||
977 | if (mds_rank) { | |
978 | mds_rank->handle_osd_map(); | |
979 | } | |
7c673cae FG |
980 | break; |
981 | ||
982 | case MSG_MON_COMMAND: | |
983 | ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON); | |
984 | clog->warn() << "dropping `mds tell` command from legacy monitor"; | |
7c673cae FG |
985 | break; |
986 | ||
987 | default: | |
988 | return false; | |
989 | } | |
990 | return true; | |
991 | } | |
992 | ||
993 | void MDSDaemon::ms_handle_connect(Connection *con) | |
994 | { | |
995 | } | |
996 | ||
997 | bool MDSDaemon::ms_handle_reset(Connection *con) | |
998 | { | |
999 | if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT) | |
1000 | return false; | |
1001 | ||
11fdf7f2 | 1002 | std::lock_guard l(mds_lock); |
7c673cae FG |
1003 | if (stopping) { |
1004 | return false; | |
1005 | } | |
11fdf7f2 | 1006 | dout(5) << "ms_handle_reset on " << con->get_peer_socket_addr() << dendl; |
7c673cae FG |
1007 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) |
1008 | return false; | |
1009 | ||
11fdf7f2 TL |
1010 | auto priv = con->get_priv(); |
1011 | if (auto session = static_cast<Session *>(priv.get()); session) { | |
7c673cae FG |
1012 | if (session->is_closed()) { |
1013 | dout(3) << "ms_handle_reset closing connection for session " << session->info.inst << dendl; | |
1014 | con->mark_down(); | |
11fdf7f2 | 1015 | con->set_priv(nullptr); |
7c673cae | 1016 | } |
7c673cae FG |
1017 | } else { |
1018 | con->mark_down(); | |
1019 | } | |
1020 | return false; | |
1021 | } | |
1022 | ||
1023 | ||
1024 | void MDSDaemon::ms_handle_remote_reset(Connection *con) | |
1025 | { | |
1026 | if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT) | |
1027 | return; | |
1028 | ||
11fdf7f2 | 1029 | std::lock_guard l(mds_lock); |
7c673cae FG |
1030 | if (stopping) { |
1031 | return; | |
1032 | } | |
1033 | ||
11fdf7f2 | 1034 | dout(5) << "ms_handle_remote_reset on " << con->get_peer_socket_addr() << dendl; |
7c673cae FG |
1035 | if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) |
1036 | return; | |
1037 | ||
11fdf7f2 TL |
1038 | auto priv = con->get_priv(); |
1039 | if (auto session = static_cast<Session *>(priv.get()); session) { | |
7c673cae FG |
1040 | if (session->is_closed()) { |
1041 | dout(3) << "ms_handle_remote_reset closing connection for session " << session->info.inst << dendl; | |
1042 | con->mark_down(); | |
11fdf7f2 | 1043 | con->set_priv(nullptr); |
7c673cae | 1044 | } |
7c673cae FG |
1045 | } |
1046 | } | |
1047 | ||
1048 | bool MDSDaemon::ms_handle_refused(Connection *con) | |
1049 | { | |
1050 | // do nothing for now | |
1051 | return false; | |
1052 | } | |
1053 | ||
11fdf7f2 TL |
1054 | bool MDSDaemon::parse_caps(const AuthCapsInfo& info, MDSAuthCaps& caps) |
1055 | { | |
1056 | caps.clear(); | |
1057 | if (info.allow_all) { | |
1058 | caps.set_allow_all(); | |
7c673cae | 1059 | return true; |
c07f9fc5 | 1060 | } else { |
11fdf7f2 TL |
1061 | auto it = info.caps.begin(); |
1062 | string auth_cap_str; | |
1063 | try { | |
1064 | decode(auth_cap_str, it); | |
1065 | } catch (const buffer::error& e) { | |
1066 | dout(1) << __func__ << ": cannot decode auth caps buffer of length " << info.caps.length() << dendl; | |
1067 | return false; | |
7c673cae FG |
1068 | } |
1069 | ||
11fdf7f2 TL |
1070 | dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl; |
1071 | CachedStackStringStream cs; | |
1072 | if (caps.parse(g_ceph_context, auth_cap_str, cs.get())) { | |
1073 | return true; | |
b5b8bbf5 | 1074 | } else { |
11fdf7f2 TL |
1075 | dout(1) << __func__ << ": auth cap parse error: " << cs->strv() << " parsing '" << auth_cap_str << "'" << dendl; |
1076 | return false; | |
7c673cae FG |
1077 | } |
1078 | } | |
7c673cae FG |
1079 | } |
1080 | ||
11fdf7f2 TL |
1081 | int MDSDaemon::ms_handle_authentication(Connection *con) |
1082 | { | |
1083 | /* N.B. without mds_lock! */ | |
1084 | MDSAuthCaps caps; | |
1085 | return parse_caps(con->get_peer_caps_info(), caps) ? 0 : -1; | |
1086 | } | |
7c673cae FG |
1087 | |
1088 | void MDSDaemon::ms_handle_accept(Connection *con) | |
1089 | { | |
11fdf7f2 TL |
1090 | entity_name_t n(con->get_peer_type(), con->get_peer_global_id()); |
1091 | std::lock_guard l(mds_lock); | |
7c673cae FG |
1092 | if (stopping) { |
1093 | return; | |
1094 | } | |
1095 | ||
11fdf7f2 TL |
1096 | // We allow connections and assign Session instances to connections |
1097 | // even if we have not been assigned a rank, because clients with | |
1098 | // "allow *" are allowed to connect and do 'tell' operations before | |
1099 | // we have a rank. | |
1100 | Session *s = NULL; | |
1101 | if (mds_rank) { | |
1102 | // If we do hold a rank, see if this is an existing client establishing | |
1103 | // a new connection, rather than a new client | |
1104 | s = mds_rank->sessionmap.get_session(n); | |
1105 | } | |
1106 | ||
1107 | // Wire up a Session* to this connection | |
1108 | // It doesn't go into a SessionMap instance until it sends an explicit | |
1109 | // request to open a session (initial state of Session is `closed`) | |
1110 | if (!s) { | |
1111 | s = new Session(con); | |
11fdf7f2 TL |
1112 | dout(10) << " new session " << s << " for " << s->info.inst |
1113 | << " con " << con << dendl; | |
1114 | con->set_priv(RefCountedPtr{s, false}); | |
1115 | if (mds_rank) { | |
1116 | mds_rank->kick_waiters_for_any_client_connection(); | |
1117 | } | |
1118 | } else { | |
1119 | dout(10) << " existing session " << s << " for " << s->info.inst | |
1120 | << " existing con " << s->get_connection() | |
1121 | << ", new/authorizing con " << con << dendl; | |
1122 | con->set_priv(RefCountedPtr{s}); | |
1123 | } | |
1124 | ||
1125 | parse_caps(con->get_peer_caps_info(), s->auth_caps); | |
1126 | ||
1127 | dout(10) << "ms_handle_accept " << con->get_peer_socket_addr() << " con " << con << " session " << s << dendl; | |
7c673cae | 1128 | if (s) { |
11fdf7f2 TL |
1129 | if (s->get_connection() != con) { |
1130 | dout(10) << " session connection " << s->get_connection() | |
1131 | << " -> " << con << dendl; | |
1132 | s->set_connection(con); | |
7c673cae FG |
1133 | |
1134 | // send out any queued messages | |
1135 | while (!s->preopen_out_queue.empty()) { | |
11fdf7f2 | 1136 | con->send_message2(s->preopen_out_queue.front()); |
7c673cae FG |
1137 | s->preopen_out_queue.pop_front(); |
1138 | } | |
1139 | } | |
7c673cae FG |
1140 | } |
1141 | } | |
1142 | ||
1143 | bool MDSDaemon::is_clean_shutdown() | |
1144 | { | |
1145 | if (mds_rank) { | |
1146 | return mds_rank->is_stopped(); | |
1147 | } else { | |
1148 | return true; | |
1149 | } | |
1150 | } |