]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSDaemon.cc
update sources to 12.2.7
[ceph.git] / ceph / src / mds / MDSDaemon.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <unistd.h>
16
17#include "include/compat.h"
7c673cae
FG
18#include "include/types.h"
19#include "include/str_list.h"
c07f9fc5 20
7c673cae 21#include "common/Clock.h"
c07f9fc5
FG
22#include "common/HeartbeatMap.h"
23#include "common/Timer.h"
24#include "common/backport14.h"
7c673cae 25#include "common/ceph_argparse.h"
c07f9fc5
FG
26#include "common/config.h"
27#include "common/entity_name.h"
7c673cae 28#include "common/errno.h"
c07f9fc5
FG
29#include "common/perf_counters.h"
30#include "common/signal.h"
31#include "common/version.h"
32
33#include "global/signal_handler.h"
7c673cae
FG
34
35#include "msg/Messenger.h"
36#include "mon/MonClient.h"
37
38#include "osdc/Objecter.h"
39
40#include "MDSMap.h"
41
42#include "MDSDaemon.h"
43#include "Server.h"
44#include "Locker.h"
45
46#include "SnapServer.h"
47#include "SnapClient.h"
48
7c673cae
FG
49#include "events/ESession.h"
50#include "events/ESubtreeMap.h"
51
52#include "messages/MMDSMap.h"
53
54#include "messages/MGenericMessage.h"
55
56#include "messages/MMonCommand.h"
57#include "messages/MCommand.h"
58#include "messages/MCommandReply.h"
59
60#include "auth/AuthAuthorizeHandler.h"
61#include "auth/RotatingKeyRing.h"
62#include "auth/KeyRing.h"
63
7c673cae
FG
64#include "perfglue/cpu_profiler.h"
65#include "perfglue/heap_profiler.h"
66
67#define dout_context g_ceph_context
68#define dout_subsys ceph_subsys_mds
69#undef dout_prefix
70#define dout_prefix *_dout << "mds." << name << ' '
71
7c673cae 72// cons/des
94b18763 73MDSDaemon::MDSDaemon(boost::string_view n, Messenger *m, MonClient *mc) :
7c673cae
FG
74 Dispatcher(m->cct),
75 mds_lock("MDSDaemon::mds_lock"),
76 stopping(false),
77 timer(m->cct, mds_lock),
78 beacon(m->cct, mc, n),
79 authorize_handler_cluster_registry(new AuthAuthorizeHandlerRegistry(m->cct,
80 m->cct->_conf->auth_supported.empty() ?
81 m->cct->_conf->auth_cluster_required :
82 m->cct->_conf->auth_supported)),
83 authorize_handler_service_registry(new AuthAuthorizeHandlerRegistry(m->cct,
84 m->cct->_conf->auth_supported.empty() ?
85 m->cct->_conf->auth_service_required :
86 m->cct->_conf->auth_supported)),
87 name(n),
88 messenger(m),
89 monc(mc),
90 mgrc(m->cct, m),
91 log_client(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS),
92 mds_rank(NULL),
94b18763
FG
93 asok_hook(NULL),
94 starttime(mono_clock::now())
7c673cae
FG
95{
96 orig_argc = 0;
97 orig_argv = NULL;
98
99 clog = log_client.create_channel();
100
101 monc->set_messenger(messenger);
102
103 mdsmap = new MDSMap;
104}
105
106MDSDaemon::~MDSDaemon() {
107 Mutex::Locker lock(mds_lock);
108
109 delete mds_rank;
110 mds_rank = NULL;
111 delete mdsmap;
112 mdsmap = NULL;
113
114 delete authorize_handler_service_registry;
115 delete authorize_handler_cluster_registry;
116}
117
118class MDSSocketHook : public AdminSocketHook {
119 MDSDaemon *mds;
120public:
121 explicit MDSSocketHook(MDSDaemon *m) : mds(m) {}
122 bool call(std::string command, cmdmap_t& cmdmap, std::string format,
123 bufferlist& out) override {
124 stringstream ss;
125 bool r = mds->asok_command(command, cmdmap, format, ss);
126 out.append(ss);
127 return r;
128 }
129};
130
131bool MDSDaemon::asok_command(string command, cmdmap_t& cmdmap, string format,
132 ostream& ss)
133{
134 dout(1) << "asok_command: " << command << " (starting...)" << dendl;
135
136 Formatter *f = Formatter::create(format, "json-pretty", "json-pretty");
137 bool handled = false;
138 if (command == "status") {
139 dump_status(f);
140 handled = true;
141 } else {
142 if (mds_rank == NULL) {
143 dout(1) << "Can't run that command on an inactive MDS!" << dendl;
144 f->dump_string("error", "mds_not_active");
145 } else {
146 handled = mds_rank->handle_asok_command(command, cmdmap, f, ss);
147 }
148 }
149 f->flush(ss);
150 delete f;
151
152 dout(1) << "asok_command: " << command << " (complete)" << dendl;
153
154 return handled;
155}
156
157void MDSDaemon::dump_status(Formatter *f)
158{
159 f->open_object_section("status");
160 f->dump_stream("cluster_fsid") << monc->get_fsid();
161 if (mds_rank) {
162 f->dump_int("whoami", mds_rank->get_nodeid());
163 } else {
164 f->dump_int("whoami", MDS_RANK_NONE);
165 }
166
167 f->dump_int("id", monc->get_global_id());
168 f->dump_string("want_state", ceph_mds_state_name(beacon.get_want_state()));
169 f->dump_string("state", ceph_mds_state_name(mdsmap->get_state_gid(mds_gid_t(
170 monc->get_global_id()))));
171 if (mds_rank) {
172 Mutex::Locker l(mds_lock);
173 mds_rank->dump_status(f);
174 }
175
176 f->dump_unsigned("mdsmap_epoch", mdsmap->get_epoch());
177 if (mds_rank) {
178 f->dump_unsigned("osdmap_epoch", mds_rank->get_osd_epoch());
179 f->dump_unsigned("osdmap_epoch_barrier", mds_rank->get_osd_epoch_barrier());
180 } else {
181 f->dump_unsigned("osdmap_epoch", 0);
182 f->dump_unsigned("osdmap_epoch_barrier", 0);
183 }
94b18763
FG
184
185 f->dump_float("uptime", get_uptime().count());
186
7c673cae
FG
187 f->close_section(); // status
188}
189
190void MDSDaemon::set_up_admin_socket()
191{
192 int r;
193 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
194 assert(asok_hook == nullptr);
195 asok_hook = new MDSSocketHook(this);
196 r = admin_socket->register_command("status", "status", asok_hook,
197 "high-level status of MDS");
198 assert(r == 0);
199 r = admin_socket->register_command("dump_ops_in_flight",
200 "dump_ops_in_flight", asok_hook,
201 "show the ops currently in flight");
202 assert(r == 0);
203 r = admin_socket->register_command("ops",
204 "ops", asok_hook,
205 "show the ops currently in flight");
206 assert(r == 0);
207 r = admin_socket->register_command("dump_blocked_ops", "dump_blocked_ops",
208 asok_hook,
209 "show the blocked ops currently in flight");
210 assert(r == 0);
211 r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
212 asok_hook,
213 "show slowest recent ops");
214 assert(r == 0);
215 r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration",
216 asok_hook,
217 "show slowest recent ops, sorted by op duration");
218 assert(r == 0);
219 r = admin_socket->register_command("scrub_path",
220 "scrub_path name=path,type=CephString "
221 "name=scrubops,type=CephChoices,"
222 "strings=force|recursive|repair,n=N,req=false",
223 asok_hook,
224 "scrub an inode and output results");
225 assert(r == 0);
226 r = admin_socket->register_command("tag path",
227 "tag path name=path,type=CephString"
228 " name=tag,type=CephString",
229 asok_hook,
230 "Apply scrub tag recursively");
231 assert(r == 0);
232 r = admin_socket->register_command("flush_path",
233 "flush_path name=path,type=CephString",
234 asok_hook,
235 "flush an inode (and its dirfrags)");
236 assert(r == 0);
237 r = admin_socket->register_command("export dir",
238 "export dir "
239 "name=path,type=CephString "
240 "name=rank,type=CephInt",
241 asok_hook,
242 "migrate a subtree to named MDS");
243 assert(r == 0);
244 r = admin_socket->register_command("dump cache",
245 "dump cache name=path,type=CephString,req=false",
246 asok_hook,
247 "dump metadata cache (optionally to a file)");
248 assert(r == 0);
181888fb
FG
249 r = admin_socket->register_command("cache status",
250 "cache status",
251 asok_hook,
252 "show cache status");
253 assert(r == 0);
7c673cae
FG
254 r = admin_socket->register_command("dump tree",
255 "dump tree "
256 "name=root,type=CephString,req=true "
257 "name=depth,type=CephInt,req=false ",
258 asok_hook,
259 "dump metadata cache for subtree");
260 assert(r == 0);
28e407b8
AA
261 r = admin_socket->register_command("dump loads",
262 "dump loads",
263 asok_hook,
264 "dump metadata loads");
265 assert(r == 0);
7c673cae
FG
266 r = admin_socket->register_command("session evict",
267 "session evict name=client_id,type=CephString",
268 asok_hook,
269 "Evict a CephFS client");
270 assert(r == 0);
271 r = admin_socket->register_command("osdmap barrier",
272 "osdmap barrier name=target_epoch,type=CephInt",
273 asok_hook,
274 "Wait until the MDS has this OSD map epoch");
275 assert(r == 0);
276 r = admin_socket->register_command("session ls",
277 "session ls",
278 asok_hook,
279 "Enumerate connected CephFS clients");
280 assert(r == 0);
281 r = admin_socket->register_command("flush journal",
282 "flush journal",
283 asok_hook,
284 "Flush the journal to the backing store");
285 assert(r == 0);
286 r = admin_socket->register_command("force_readonly",
287 "force_readonly",
288 asok_hook,
289 "Force MDS to read-only mode");
290 assert(r == 0);
291 r = admin_socket->register_command("get subtrees",
292 "get subtrees",
293 asok_hook,
294 "Return the subtree map");
295 assert(r == 0);
296 r = admin_socket->register_command("dirfrag split",
297 "dirfrag split "
298 "name=path,type=CephString,req=true "
299 "name=frag,type=CephString,req=true "
300 "name=bits,type=CephInt,req=true ",
301 asok_hook,
302 "Fragment directory by path");
303 assert(r == 0);
304 r = admin_socket->register_command("dirfrag merge",
305 "dirfrag merge "
306 "name=path,type=CephString,req=true "
307 "name=frag,type=CephString,req=true",
308 asok_hook,
309 "De-fragment directory by path");
310 assert(r == 0);
311 r = admin_socket->register_command("dirfrag ls",
312 "dirfrag ls "
313 "name=path,type=CephString,req=true",
314 asok_hook,
315 "List fragments in directory");
316 assert(r == 0);
317}
318
319void MDSDaemon::clean_up_admin_socket()
320{
321 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
322 admin_socket->unregister_command("status");
323 admin_socket->unregister_command("dump_ops_in_flight");
324 admin_socket->unregister_command("ops");
325 admin_socket->unregister_command("dump_blocked_ops");
326 admin_socket->unregister_command("dump_historic_ops");
327 admin_socket->unregister_command("dump_historic_ops_by_duration");
328 admin_socket->unregister_command("scrub_path");
329 admin_socket->unregister_command("tag path");
330 admin_socket->unregister_command("flush_path");
331 admin_socket->unregister_command("export dir");
332 admin_socket->unregister_command("dump cache");
181888fb 333 admin_socket->unregister_command("cache status");
7c673cae 334 admin_socket->unregister_command("dump tree");
28e407b8 335 admin_socket->unregister_command("dump loads");
7c673cae
FG
336 admin_socket->unregister_command("session evict");
337 admin_socket->unregister_command("osdmap barrier");
338 admin_socket->unregister_command("session ls");
339 admin_socket->unregister_command("flush journal");
340 admin_socket->unregister_command("force_readonly");
341 admin_socket->unregister_command("get subtrees");
342 admin_socket->unregister_command("dirfrag split");
343 admin_socket->unregister_command("dirfrag merge");
344 admin_socket->unregister_command("dirfrag ls");
345 delete asok_hook;
346 asok_hook = NULL;
347}
348
349const char** MDSDaemon::get_tracked_conf_keys() const
350{
351 static const char* KEYS[] = {
352 "mds_op_complaint_time", "mds_op_log_threshold",
353 "mds_op_history_size", "mds_op_history_duration",
354 "mds_enable_op_tracker",
355 "mds_log_pause",
356 // clog & admin clog
357 "clog_to_monitors",
358 "clog_to_syslog",
359 "clog_to_syslog_facility",
360 "clog_to_syslog_level",
361 // PurgeQueue
362 "mds_max_purge_ops",
363 "mds_max_purge_ops_per_pg",
364 "mds_max_purge_files",
28e407b8 365 "mds_inject_migrator_session_race",
7c673cae
FG
366 "clog_to_graylog",
367 "clog_to_graylog_host",
368 "clog_to_graylog_port",
369 "host",
370 "fsid",
371 NULL
372 };
373 return KEYS;
374}
375
376void MDSDaemon::handle_conf_change(const struct md_config_t *conf,
377 const std::set <std::string> &changed)
378{
379 // We may be called within mds_lock (via `tell`) or outwith the
380 // lock (via admin socket `config set`), so handle either case.
381 const bool initially_locked = mds_lock.is_locked_by_me();
382 if (!initially_locked) {
383 mds_lock.Lock();
384 }
385
386 if (changed.count("mds_op_complaint_time") ||
387 changed.count("mds_op_log_threshold")) {
388 if (mds_rank) {
389 mds_rank->op_tracker.set_complaint_and_threshold(conf->mds_op_complaint_time,
390 conf->mds_op_log_threshold);
391 }
392 }
393 if (changed.count("mds_op_history_size") ||
394 changed.count("mds_op_history_duration")) {
395 if (mds_rank) {
396 mds_rank->op_tracker.set_history_size_and_duration(conf->mds_op_history_size,
397 conf->mds_op_history_duration);
398 }
399 }
400 if (changed.count("mds_enable_op_tracker")) {
401 if (mds_rank) {
402 mds_rank->op_tracker.set_tracking(conf->mds_enable_op_tracker);
403 }
404 }
405 if (changed.count("clog_to_monitors") ||
406 changed.count("clog_to_syslog") ||
407 changed.count("clog_to_syslog_level") ||
408 changed.count("clog_to_syslog_facility") ||
409 changed.count("clog_to_graylog") ||
410 changed.count("clog_to_graylog_host") ||
411 changed.count("clog_to_graylog_port") ||
412 changed.count("host") ||
413 changed.count("fsid")) {
414 if (mds_rank) {
415 mds_rank->update_log_config();
416 }
417 }
418
419 if (!g_conf->mds_log_pause && changed.count("mds_log_pause")) {
420 if (mds_rank) {
421 mds_rank->mdlog->kick_submitter();
422 }
423 }
424
425 if (mds_rank) {
426 mds_rank->handle_conf_change(conf, changed);
427 }
428
429 if (!initially_locked) {
430 mds_lock.Unlock();
431 }
432}
433
434
435int MDSDaemon::init()
436{
437 dout(10) << sizeof(MDSCacheObject) << "\tMDSCacheObject" << dendl;
438 dout(10) << sizeof(CInode) << "\tCInode" << dendl;
439 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item *7=" << 7*sizeof(elist<void*>::item) << dendl;
94b18763
FG
440 dout(10) << sizeof(CInode::mempool_inode) << "\t inode " << dendl;
441 dout(10) << sizeof(CInode::mempool_old_inode) << "\t old_inode " << dendl;
7c673cae
FG
442 dout(10) << sizeof(nest_info_t) << "\t nest_info_t " << dendl;
443 dout(10) << sizeof(frag_info_t) << "\t frag_info_t " << dendl;
444 dout(10) << sizeof(SimpleLock) << "\t SimpleLock *5=" << 5*sizeof(SimpleLock) << dendl;
445 dout(10) << sizeof(ScatterLock) << "\t ScatterLock *3=" << 3*sizeof(ScatterLock) << dendl;
446 dout(10) << sizeof(CDentry) << "\tCDentry" << dendl;
447 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item" << dendl;
448 dout(10) << sizeof(SimpleLock) << "\t SimpleLock" << dendl;
449 dout(10) << sizeof(CDir) << "\tCDir " << dendl;
450 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item *2=" << 2*sizeof(elist<void*>::item) << dendl;
451 dout(10) << sizeof(fnode_t) << "\t fnode_t " << dendl;
452 dout(10) << sizeof(nest_info_t) << "\t nest_info_t *2" << dendl;
453 dout(10) << sizeof(frag_info_t) << "\t frag_info_t *2" << dendl;
454 dout(10) << sizeof(Capability) << "\tCapability " << dendl;
455 dout(10) << sizeof(xlist<void*>::item) << "\t xlist<>::item *2=" << 2*sizeof(xlist<void*>::item) << dendl;
456
457 messenger->add_dispatcher_tail(&beacon);
458 messenger->add_dispatcher_tail(this);
459
460 // get monmap
461 monc->set_messenger(messenger);
462
463 monc->set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD |
464 CEPH_ENTITY_TYPE_MDS | CEPH_ENTITY_TYPE_MGR);
465 int r = 0;
466 r = monc->init();
467 if (r < 0) {
468 derr << "ERROR: failed to get monmap: " << cpp_strerror(-r) << dendl;
469 mds_lock.Lock();
470 suicide();
471 mds_lock.Unlock();
472 return r;
473 }
474
475 // tell monc about log_client so it will know about mon session resets
476 monc->set_log_client(&log_client);
477
478 r = monc->authenticate();
479 if (r < 0) {
480 derr << "ERROR: failed to authenticate: " << cpp_strerror(-r) << dendl;
481 mds_lock.Lock();
482 suicide();
483 mds_lock.Unlock();
484 return r;
485 }
486
487 int rotating_auth_attempts = 0;
488 while (monc->wait_auth_rotating(30.0) < 0) {
489 if (++rotating_auth_attempts <= g_conf->max_rotating_auth_attempts) {
490 derr << "unable to obtain rotating service keys; retrying" << dendl;
491 continue;
492 }
493 derr << "ERROR: failed to refresh rotating keys, "
494 << "maximum retry time reached." << dendl;
495 mds_lock.Lock();
496 suicide();
497 mds_lock.Unlock();
498 return -ETIMEDOUT;
499 }
500
501 mgrc.init();
502 messenger->add_dispatcher_head(&mgrc);
503
504 mds_lock.Lock();
505 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) {
506 dout(4) << __func__ << ": terminated already, dropping out" << dendl;
507 mds_lock.Unlock();
508 return 0;
509 }
510
511 monc->sub_want("mdsmap", 0, 0);
512 monc->sub_want("mgrmap", 0, 0);
513 monc->renew_subs();
514
515 mds_lock.Unlock();
516
517 // Set up admin socket before taking mds_lock, so that ordering
518 // is consistent (later we take mds_lock within asok callbacks)
519 set_up_admin_socket();
520 g_conf->add_observer(this);
521 mds_lock.Lock();
522 if (beacon.get_want_state() == MDSMap::STATE_DNE) {
523 suicide(); // we could do something more graceful here
524 dout(4) << __func__ << ": terminated already, dropping out" << dendl;
525 mds_lock.Unlock();
526 return 0;
527 }
528
529 timer.init();
530
531 beacon.init(mdsmap);
532 messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE));
533
534 // schedule tick
535 reset_tick();
536 mds_lock.Unlock();
537
538 return 0;
539}
540
541void MDSDaemon::reset_tick()
542{
543 // cancel old
544 if (tick_event) timer.cancel_event(tick_event);
545
546 // schedule
3efd9988
FG
547 tick_event = timer.add_event_after(
548 g_conf->mds_tick_interval,
549 new FunctionContext([this](int) {
550 assert(mds_lock.is_locked_by_me());
551 tick();
552 }));
7c673cae
FG
553}
554
555void MDSDaemon::tick()
556{
7c673cae
FG
557 // reschedule
558 reset_tick();
559
560 // Call through to subsystems' tick functions
561 if (mds_rank) {
562 mds_rank->tick();
563 }
564}
565
566void MDSDaemon::send_command_reply(MCommand *m, MDSRank *mds_rank,
567 int r, bufferlist outbl,
94b18763 568 boost::string_view outs)
7c673cae
FG
569{
570 Session *session = static_cast<Session *>(m->get_connection()->get_priv());
571 assert(session != NULL);
572 // If someone is using a closed session for sending commands (e.g.
573 // the ceph CLI) then we should feel free to clean up this connection
574 // as soon as we've sent them a response.
94b18763
FG
575 const bool live_session =
576 session->get_state_seq() > 0 &&
577 mds_rank &&
578 mds_rank->sessionmap.get_session(session->info.inst.name);
7c673cae
FG
579
580 if (!live_session) {
581 // This session only existed to issue commands, so terminate it
582 // as soon as we can.
583 assert(session->is_closed());
584 session->connection->mark_disposable();
7c673cae 585 }
94b18763 586 session->put();
7c673cae
FG
587
588 MCommandReply *reply = new MCommandReply(r, outs);
589 reply->set_tid(m->get_tid());
590 reply->set_data(outbl);
591 m->get_connection()->send_message(reply);
592}
593
594/* This function DOES put the passed message before returning*/
595void MDSDaemon::handle_command(MCommand *m)
596{
597 Session *session = static_cast<Session *>(m->get_connection()->get_priv());
598 assert(session != NULL);
599
600 int r = 0;
601 cmdmap_t cmdmap;
602 std::stringstream ss;
603 std::string outs;
604 bufferlist outbl;
605 Context *run_after = NULL;
606 bool need_reply = true;
607
608 if (!session->auth_caps.allow_all()) {
609 dout(1) << __func__
610 << ": received command from client without `tell` capability: "
611 << m->get_connection()->peer_addr << dendl;
612
613 ss << "permission denied";
614 r = -EPERM;
615 } else if (m->cmd.empty()) {
616 r = -EINVAL;
617 ss << "no command given";
618 outs = ss.str();
619 } else if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
620 r = -EINVAL;
621 outs = ss.str();
622 } else {
623 r = _handle_command(cmdmap, m, &outbl, &outs, &run_after, &need_reply);
624 }
94b18763 625 session->put();
7c673cae
FG
626
627 if (need_reply) {
628 send_command_reply(m, mds_rank, r, outbl, outs);
629 }
630
631 if (run_after) {
632 run_after->complete(0);
633 }
634
635 m->put();
636}
637
638
639struct MDSCommand {
640 string cmdstring;
641 string helpstring;
642 string module;
643 string perm;
644 string availability;
645} mds_commands[] = {
646
647#define COMMAND(parsesig, helptext, module, perm, availability) \
648 {parsesig, helptext, module, perm, availability},
649
650COMMAND("injectargs " \
651 "name=injected_args,type=CephString,n=N",
652 "inject configuration arguments into running MDS",
653 "mds", "*", "cli,rest")
c07f9fc5
FG
654COMMAND("config set " \
655 "name=key,type=CephString name=value,type=CephString",
656 "Set a configuration option at runtime (not persistent)",
657 "mds", "*", "cli,rest")
7c673cae
FG
658COMMAND("exit",
659 "Terminate this MDS",
660 "mds", "*", "cli,rest")
661COMMAND("respawn",
662 "Restart this MDS",
663 "mds", "*", "cli,rest")
664COMMAND("session kill " \
665 "name=session_id,type=CephInt",
666 "End a client session",
667 "mds", "*", "cli,rest")
668COMMAND("cpu_profiler " \
669 "name=arg,type=CephChoices,strings=status|flush",
670 "run cpu profiling on daemon", "mds", "rw", "cli,rest")
671COMMAND("session ls " \
672 "name=filters,type=CephString,n=N,req=false",
673 "List client sessions", "mds", "r", "cli,rest")
31f18b77
FG
674COMMAND("client ls " \
675 "name=filters,type=CephString,n=N,req=false",
676 "List client sessions", "mds", "r", "cli,rest")
7c673cae
FG
677COMMAND("session evict " \
678 "name=filters,type=CephString,n=N,req=false",
679 "Evict client session(s)", "mds", "rw", "cli,rest")
31f18b77
FG
680COMMAND("client evict " \
681 "name=filters,type=CephString,n=N,req=false",
682 "Evict client session(s)", "mds", "rw", "cli,rest")
7c673cae
FG
683COMMAND("damage ls",
684 "List detected metadata damage", "mds", "r", "cli,rest")
685COMMAND("damage rm name=damage_id,type=CephInt",
686 "Remove a damage table entry", "mds", "rw", "cli,rest")
c07f9fc5 687COMMAND("version", "report version of MDS", "mds", "r", "cli,rest")
7c673cae
FG
688COMMAND("heap " \
689 "name=heapcmd,type=CephChoices,strings=dump|start_profiler|stop_profiler|release|stats", \
690 "show heap usage info (available only if compiled with tcmalloc)", \
691 "mds", "*", "cli,rest")
692};
693
694
695int MDSDaemon::_handle_command(
696 const cmdmap_t &cmdmap,
697 MCommand *m,
698 bufferlist *outbl,
699 std::string *outs,
700 Context **run_later,
701 bool *need_reply)
702{
703 assert(outbl != NULL);
704 assert(outs != NULL);
705
706 class SuicideLater : public Context
707 {
708 MDSDaemon *mds;
709
710 public:
711 explicit SuicideLater(MDSDaemon *mds_) : mds(mds_) {}
712 void finish(int r) override {
713 // Wait a little to improve chances of caller getting
714 // our response before seeing us disappear from mdsmap
715 sleep(1);
716
717 mds->suicide();
718 }
719 };
720
721
722 class RespawnLater : public Context
723 {
724 MDSDaemon *mds;
725
726 public:
727
728 explicit RespawnLater(MDSDaemon *mds_) : mds(mds_) {}
729 void finish(int r) override {
730 // Wait a little to improve chances of caller getting
731 // our response before seeing us disappear from mdsmap
732 sleep(1);
733
734 mds->respawn();
735 }
736 };
737
738 std::stringstream ds;
739 std::stringstream ss;
740 std::string prefix;
c07f9fc5
FG
741 std::string format;
742 std::unique_ptr<Formatter> f(Formatter::create(format));
7c673cae
FG
743 cmd_getval(cct, cmdmap, "prefix", prefix);
744
745 int r = 0;
746
747 if (prefix == "get_command_descriptions") {
748 int cmdnum = 0;
c07f9fc5 749 std::unique_ptr<JSONFormatter> f(ceph::make_unique<JSONFormatter>());
7c673cae
FG
750 f->open_object_section("command_descriptions");
751 for (MDSCommand *cp = mds_commands;
752 cp < &mds_commands[ARRAY_SIZE(mds_commands)]; cp++) {
753
754 ostringstream secname;
755 secname << "cmd" << setfill('0') << std::setw(3) << cmdnum;
c07f9fc5 756 dump_cmddesc_to_json(f.get(), secname.str(), cp->cmdstring, cp->helpstring,
7c673cae
FG
757 cp->module, cp->perm, cp->availability, 0);
758 cmdnum++;
759 }
760 f->close_section(); // command_descriptions
761
762 f->flush(ds);
c07f9fc5
FG
763 goto out;
764 }
765
766 cmd_getval(cct, cmdmap, "format", format);
767 if (prefix == "version") {
768 if (f) {
769 f->open_object_section("version");
770 f->dump_string("version", pretty_version_to_str());
771 f->close_section();
772 f->flush(ds);
773 } else {
774 ds << pretty_version_to_str();
775 }
7c673cae
FG
776 } else if (prefix == "injectargs") {
777 vector<string> argsvec;
778 cmd_getval(cct, cmdmap, "injected_args", argsvec);
779
780 if (argsvec.empty()) {
781 r = -EINVAL;
782 ss << "ignoring empty injectargs";
783 goto out;
784 }
785 string args = argsvec.front();
786 for (vector<string>::iterator a = ++argsvec.begin(); a != argsvec.end(); ++a)
787 args += " " + *a;
788 r = cct->_conf->injectargs(args, &ss);
c07f9fc5
FG
789 } else if (prefix == "config set") {
790 std::string key;
791 cmd_getval(cct, cmdmap, "key", key);
792 std::string val;
793 cmd_getval(cct, cmdmap, "value", val);
794 r = cct->_conf->set_val(key, val, true, &ss);
d2e6a577
FG
795 if (r == 0) {
796 cct->_conf->apply_changes(nullptr);
797 }
7c673cae
FG
798 } else if (prefix == "exit") {
799 // We will send response before executing
800 ss << "Exiting...";
801 *run_later = new SuicideLater(this);
c07f9fc5 802 } else if (prefix == "respawn") {
7c673cae
FG
803 // We will send response before executing
804 ss << "Respawning...";
805 *run_later = new RespawnLater(this);
806 } else if (prefix == "session kill") {
807 if (mds_rank == NULL) {
808 r = -EINVAL;
809 ss << "MDS not active";
810 goto out;
811 }
812 // FIXME harmonize `session kill` with admin socket session evict
813 int64_t session_id = 0;
814 bool got = cmd_getval(cct, cmdmap, "session_id", session_id);
815 assert(got);
31f18b77
FG
816 bool killed = mds_rank->evict_client(session_id, false,
817 g_conf->mds_session_blacklist_on_evict,
818 ss);
7c673cae
FG
819 if (!killed)
820 r = -ENOENT;
821 } else if (prefix == "heap") {
822 if (!ceph_using_tcmalloc()) {
823 r = -EOPNOTSUPP;
824 ss << "could not issue heap profiler command -- not using tcmalloc!";
825 } else {
826 string heapcmd;
827 cmd_getval(cct, cmdmap, "heapcmd", heapcmd);
828 vector<string> heapcmd_vec;
829 get_str_vec(heapcmd, heapcmd_vec);
830 ceph_heap_profiler_handle_command(heapcmd_vec, ds);
831 }
832 } else if (prefix == "cpu_profiler") {
833 string arg;
834 cmd_getval(cct, cmdmap, "arg", arg);
835 vector<string> argvec;
836 get_str_vec(arg, argvec);
837 cpu_profiler_handle_command(argvec, ds);
838 } else {
839 // Give MDSRank a shot at the command
b32b8144
FG
840 if (!mds_rank) {
841 ss << "MDS not active";
842 r = -EINVAL;
843 }
844 else {
7c673cae
FG
845 bool handled = mds_rank->handle_command(cmdmap, m, &r, &ds, &ss,
846 need_reply);
b32b8144
FG
847 if (!handled) {
848 // MDSDaemon doesn't know this command
849 ss << "unrecognized command! " << prefix;
850 r = -EINVAL;
7c673cae
FG
851 }
852 }
7c673cae
FG
853 }
854
855out:
856 *outs = ss.str();
857 outbl->append(ds);
858 return r;
859}
860
861/* This function deletes the passed message before returning. */
862
863void MDSDaemon::handle_mds_map(MMDSMap *m)
864{
865 version_t epoch = m->get_epoch();
866 dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << dendl;
867
868 // is it new?
869 if (epoch <= mdsmap->get_epoch()) {
870 dout(5) << " old map epoch " << epoch << " <= " << mdsmap->get_epoch()
871 << ", discarding" << dendl;
872 m->put();
873 return;
874 }
875
876 entity_addr_t addr;
877
878 // keep old map, for a moment
879 MDSMap *oldmap = mdsmap;
880
881 // decode and process
882 mdsmap = new MDSMap;
883 mdsmap->decode(m->get_encoded());
884 const MDSMap::DaemonState new_state = mdsmap->get_state_gid(mds_gid_t(monc->get_global_id()));
885 const int incarnation = mdsmap->get_inc_gid(mds_gid_t(monc->get_global_id()));
886
887 monc->sub_got("mdsmap", mdsmap->get_epoch());
888
889 // Calculate my effective rank (either my owned rank or my
890 // standby_for_rank if in standby replay)
891 mds_rank_t whoami = mdsmap->get_rank_gid(mds_gid_t(monc->get_global_id()));
892
893 // verify compatset
894 CompatSet mdsmap_compat(get_mdsmap_compat_set_all());
895 dout(10) << " my compat " << mdsmap_compat << dendl;
896 dout(10) << " mdsmap compat " << mdsmap->compat << dendl;
897 if (!mdsmap_compat.writeable(mdsmap->compat)) {
898 dout(0) << "handle_mds_map mdsmap compatset " << mdsmap->compat
899 << " not writeable with daemon features " << mdsmap_compat
900 << ", killing myself" << dendl;
901 suicide();
902 goto out;
903 }
904
905 // mark down any failed peers
906 for (map<mds_gid_t,MDSMap::mds_info_t>::const_iterator p = oldmap->get_mds_info().begin();
907 p != oldmap->get_mds_info().end();
908 ++p) {
909 if (mdsmap->get_mds_info().count(p->first) == 0) {
910 dout(10) << " peer mds gid " << p->first << " removed from map" << dendl;
911 messenger->mark_down(p->second.addr);
912 }
913 }
914
915 if (whoami == MDS_RANK_NONE &&
916 new_state == MDSMap::STATE_STANDBY_REPLAY) {
917 whoami = mdsmap->get_mds_info_gid(mds_gid_t(monc->get_global_id())).standby_for_rank;
918 }
919
920 // see who i am
921 addr = messenger->get_myaddr();
c07f9fc5 922 dout(10) << "map says I am " << addr << " mds." << whoami << "." << incarnation
7c673cae
FG
923 << " state " << ceph_mds_state_name(new_state) << dendl;
924
925 if (whoami == MDS_RANK_NONE) {
926 if (mds_rank != NULL) {
c07f9fc5 927 const auto myid = monc->get_global_id();
7c673cae
FG
928 // We have entered a rank-holding state, we shouldn't be back
929 // here!
930 if (g_conf->mds_enforce_unique_name) {
931 if (mds_gid_t existing = mdsmap->find_mds_gid_by_name(name)) {
932 const MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
c07f9fc5
FG
933 if (i.global_id > myid) {
934 dout(1) << "map replaced me with another mds." << whoami
935 << " with gid (" << i.global_id << ") larger than myself ("
936 << myid << "); quitting!" << dendl;
7c673cae
FG
937 // Call suicide() rather than respawn() because if someone else
938 // has taken our ID, we don't want to keep restarting and
939 // fighting them for the ID.
940 suicide();
941 m->put();
942 return;
943 }
944 }
945 }
946
c07f9fc5
FG
947 dout(1) << "map removed me (mds." << whoami << " gid:"
948 << myid << ") from cluster due to lost contact; respawning" << dendl;
7c673cae
FG
949 respawn();
950 }
951 // MDSRank not active: process the map here to see if we have
952 // been assigned a rank.
953 dout(10) << __func__ << ": handling map in rankless mode" << dendl;
954 _handle_mds_map(oldmap);
955 } else {
956
957 // Did we already hold a different rank? MDSMonitor shouldn't try
958 // to change that out from under me!
959 if (mds_rank && whoami != mds_rank->get_nodeid()) {
960 derr << "Invalid rank transition " << mds_rank->get_nodeid() << "->"
961 << whoami << dendl;
962 respawn();
963 }
964
965 // Did I previously not hold a rank? Initialize!
966 if (mds_rank == NULL) {
967 mds_rank = new MDSRankDispatcher(whoami, mds_lock, clog,
968 timer, beacon, mdsmap, messenger, monc,
969 new FunctionContext([this](int r){respawn();}),
970 new FunctionContext([this](int r){suicide();}));
971 dout(10) << __func__ << ": initializing MDS rank "
972 << mds_rank->get_nodeid() << dendl;
973 mds_rank->init();
974 }
975
976 // MDSRank is active: let him process the map, we have no say.
977 dout(10) << __func__ << ": handling map as rank "
978 << mds_rank->get_nodeid() << dendl;
979 mds_rank->handle_mds_map(m, oldmap);
980 }
981
982out:
983 beacon.notify_mdsmap(mdsmap);
984 m->put();
985 delete oldmap;
986}
987
988void MDSDaemon::_handle_mds_map(MDSMap *oldmap)
989{
990 MDSMap::DaemonState new_state = mdsmap->get_state_gid(mds_gid_t(monc->get_global_id()));
991
992 // Normal rankless case, we're marked as standby
993 if (new_state == MDSMap::STATE_STANDBY) {
994 beacon.set_want_state(mdsmap, new_state);
995 dout(1) << "handle_mds_map standby" << dendl;
996
997 return;
998 }
999
1000 // Case where we thought we were standby, but MDSMap disagrees
1001 if (beacon.get_want_state() == MDSMap::STATE_STANDBY) {
1002 dout(10) << "dropped out of mdsmap, try to re-add myself" << dendl;
1003 new_state = MDSMap::STATE_BOOT;
1004 beacon.set_want_state(mdsmap, new_state);
1005 return;
1006 }
1007
1008 // Case where we have sent a boot beacon that isn't reflected yet
1009 if (beacon.get_want_state() == MDSMap::STATE_BOOT) {
1010 dout(10) << "not in map yet" << dendl;
1011 }
1012}
1013
1014void MDSDaemon::handle_signal(int signum)
1015{
1016 assert(signum == SIGINT || signum == SIGTERM);
1017 derr << "*** got signal " << sig_str(signum) << " ***" << dendl;
1018 {
1019 Mutex::Locker l(mds_lock);
1020 if (stopping) {
1021 return;
1022 }
1023 suicide();
1024 }
1025}
1026
1027void MDSDaemon::suicide()
1028{
1029 assert(mds_lock.is_locked());
1030
1031 // make sure we don't suicide twice
1032 assert(stopping == false);
1033 stopping = true;
1034
1035 dout(1) << "suicide. wanted state "
1036 << ceph_mds_state_name(beacon.get_want_state()) << dendl;
1037
1038 if (tick_event) {
1039 timer.cancel_event(tick_event);
1040 tick_event = 0;
1041 }
1042
1043 //because add_observer is called after set_up_admin_socket
1044 //so we can use asok_hook to avoid assert in the remove_observer
1045 if (asok_hook != NULL)
1046 g_conf->remove_observer(this);
1047
1048 clean_up_admin_socket();
1049
1050 // Inform MDS we are going away, then shut down beacon
1051 beacon.set_want_state(mdsmap, MDSMap::STATE_DNE);
1052 if (!mdsmap->is_dne_gid(mds_gid_t(monc->get_global_id()))) {
1053 // Notify the MDSMonitor that we're dying, so that it doesn't have to
1054 // wait for us to go laggy. Only do this if we're actually in the
1055 // MDSMap, because otherwise the MDSMonitor will drop our message.
1056 beacon.send_and_wait(1);
1057 }
1058 beacon.shutdown();
1059
1060 mgrc.shutdown();
1061
1062 if (mds_rank) {
1063 mds_rank->shutdown();
1064 } else {
1065 timer.shutdown();
1066
1067 monc->shutdown();
1068 messenger->shutdown();
1069 }
1070}
1071
1072void MDSDaemon::respawn()
1073{
1074 dout(1) << "respawn" << dendl;
1075
1076 char *new_argv[orig_argc+1];
1077 dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
1078 for (int i=0; i<orig_argc; i++) {
1079 new_argv[i] = (char *)orig_argv[i];
1080 dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
1081 }
1082 new_argv[orig_argc] = NULL;
1083
1084 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
1085 * This allows us to exec the same executable even if it has since been
1086 * unlinked.
1087 */
1088 char exe_path[PATH_MAX] = "";
1089 if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) {
1090 /* Print CWD for the user's interest */
1091 char buf[PATH_MAX];
1092 char *cwd = getcwd(buf, sizeof(buf));
1093 assert(cwd);
1094 dout(1) << " cwd " << cwd << dendl;
1095
1096 /* Fall back to a best-effort: just running in our CWD */
1097 strncpy(exe_path, orig_argv[0], PATH_MAX-1);
1098 } else {
1099 dout(1) << "respawning with exe " << exe_path << dendl;
1100 strcpy(exe_path, PROCPREFIX "/proc/self/exe");
1101 }
1102
1103 dout(1) << " exe_path " << exe_path << dendl;
1104
1105 unblock_all_signals(NULL);
1106 execv(exe_path, new_argv);
1107
1108 dout(0) << "respawn execv " << orig_argv[0]
1109 << " failed with " << cpp_strerror(errno) << dendl;
1110
1111 // We have to assert out here, because suicide() returns, and callers
1112 // to respawn expect it never to return.
1113 ceph_abort();
1114}
1115
1116
1117
1118bool MDSDaemon::ms_dispatch(Message *m)
1119{
1120 Mutex::Locker l(mds_lock);
1121 if (stopping) {
1122 return false;
1123 }
1124
1125 // Drop out early if shutting down
1126 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) {
1127 dout(10) << " stopping, discarding " << *m << dendl;
1128 m->put();
1129 return true;
1130 }
1131
1132 // First see if it's a daemon message
1133 const bool handled_core = handle_core_message(m);
1134 if (handled_core) {
1135 return true;
1136 }
1137
1138 // Not core, try it as a rank message
1139 if (mds_rank) {
1140 return mds_rank->ms_dispatch(m);
1141 } else {
1142 return false;
1143 }
1144}
1145
1146bool MDSDaemon::ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, bool force_new)
1147{
1148 dout(10) << "MDSDaemon::ms_get_authorizer type="
1149 << ceph_entity_type_name(dest_type) << dendl;
1150
1151 /* monitor authorization is being handled on different layer */
1152 if (dest_type == CEPH_ENTITY_TYPE_MON)
1153 return true;
1154
1155 if (force_new) {
1156 if (monc->wait_auth_rotating(10) < 0)
1157 return false;
1158 }
1159
1160 *authorizer = monc->build_authorizer(dest_type);
1161 return *authorizer != NULL;
1162}
1163
1164
1165/*
1166 * high priority messages we always process
1167 */
1168bool MDSDaemon::handle_core_message(Message *m)
1169{
1170 switch (m->get_type()) {
1171 case CEPH_MSG_MON_MAP:
1172 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON);
1173 m->put();
1174 break;
1175
1176 // MDS
1177 case CEPH_MSG_MDS_MAP:
1178 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_MDS);
1179 handle_mds_map(static_cast<MMDSMap*>(m));
1180 break;
1181
1182 // OSD
1183 case MSG_COMMAND:
1184 handle_command(static_cast<MCommand*>(m));
1185 break;
1186 case CEPH_MSG_OSD_MAP:
1187 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD);
1188
1189 if (mds_rank) {
1190 mds_rank->handle_osd_map();
1191 }
1192 m->put();
1193 break;
1194
1195 case MSG_MON_COMMAND:
1196 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON);
1197 clog->warn() << "dropping `mds tell` command from legacy monitor";
1198 m->put();
1199 break;
1200
1201 default:
1202 return false;
1203 }
1204 return true;
1205}
1206
1207void MDSDaemon::ms_handle_connect(Connection *con)
1208{
1209}
1210
1211bool MDSDaemon::ms_handle_reset(Connection *con)
1212{
1213 if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
1214 return false;
1215
1216 Mutex::Locker l(mds_lock);
1217 if (stopping) {
1218 return false;
1219 }
1220 dout(5) << "ms_handle_reset on " << con->get_peer_addr() << dendl;
1221 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1222 return false;
1223
1224 Session *session = static_cast<Session *>(con->get_priv());
1225 if (session) {
1226 if (session->is_closed()) {
1227 dout(3) << "ms_handle_reset closing connection for session " << session->info.inst << dendl;
1228 con->mark_down();
1229 con->set_priv(NULL);
1230 }
1231 session->put();
1232 } else {
1233 con->mark_down();
1234 }
1235 return false;
1236}
1237
1238
1239void MDSDaemon::ms_handle_remote_reset(Connection *con)
1240{
1241 if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
1242 return;
1243
1244 Mutex::Locker l(mds_lock);
1245 if (stopping) {
1246 return;
1247 }
1248
1249 dout(5) << "ms_handle_remote_reset on " << con->get_peer_addr() << dendl;
1250 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1251 return;
1252
1253 Session *session = static_cast<Session *>(con->get_priv());
1254 if (session) {
1255 if (session->is_closed()) {
1256 dout(3) << "ms_handle_remote_reset closing connection for session " << session->info.inst << dendl;
1257 con->mark_down();
1258 con->set_priv(NULL);
1259 }
1260 session->put();
1261 }
1262}
1263
1264bool MDSDaemon::ms_handle_refused(Connection *con)
1265{
1266 // do nothing for now
1267 return false;
1268}
1269
1270bool MDSDaemon::ms_verify_authorizer(Connection *con, int peer_type,
1271 int protocol, bufferlist& authorizer_data, bufferlist& authorizer_reply,
28e407b8
AA
1272 bool& is_valid, CryptoKey& session_key,
1273 std::unique_ptr<AuthAuthorizerChallenge> *challenge)
7c673cae
FG
1274{
1275 Mutex::Locker l(mds_lock);
1276 if (stopping) {
1277 return false;
1278 }
1279 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1280 return false;
1281
1282 AuthAuthorizeHandler *authorize_handler = 0;
1283 switch (peer_type) {
1284 case CEPH_ENTITY_TYPE_MDS:
1285 authorize_handler = authorize_handler_cluster_registry->get_handler(protocol);
1286 break;
1287 default:
1288 authorize_handler = authorize_handler_service_registry->get_handler(protocol);
1289 }
1290 if (!authorize_handler) {
1291 dout(0) << "No AuthAuthorizeHandler found for protocol " << protocol << dendl;
1292 is_valid = false;
1293 return true;
1294 }
1295
1296 AuthCapsInfo caps_info;
1297 EntityName name;
1298 uint64_t global_id;
1299
c07f9fc5
FG
1300 RotatingKeyRing *keys = monc->rotating_secrets.get();
1301 if (keys) {
1302 is_valid = authorize_handler->verify_authorizer(
1303 cct, keys,
1304 authorizer_data, authorizer_reply, name, global_id, caps_info,
28e407b8 1305 session_key, nullptr, challenge);
c07f9fc5
FG
1306 } else {
1307 dout(10) << __func__ << " no rotating_keys (yet), denied" << dendl;
1308 is_valid = false;
1309 }
7c673cae
FG
1310
1311 if (is_valid) {
1312 entity_name_t n(con->get_peer_type(), global_id);
1313
1314 // We allow connections and assign Session instances to connections
1315 // even if we have not been assigned a rank, because clients with
1316 // "allow *" are allowed to connect and do 'tell' operations before
1317 // we have a rank.
1318 Session *s = NULL;
1319 if (mds_rank) {
1320 // If we do hold a rank, see if this is an existing client establishing
1321 // a new connection, rather than a new client
1322 s = mds_rank->sessionmap.get_session(n);
1323 }
1324
1325 // Wire up a Session* to this connection
1326 // It doesn't go into a SessionMap instance until it sends an explicit
1327 // request to open a session (initial state of Session is `closed`)
1328 if (!s) {
1329 s = new Session;
1330 s->info.auth_name = name;
1331 s->info.inst.addr = con->get_peer_addr();
1332 s->info.inst.name = n;
1333 dout(10) << " new session " << s << " for " << s->info.inst << " con " << con << dendl;
1334 con->set_priv(s);
1335 s->connection = con;
28e407b8
AA
1336 if (mds_rank) {
1337 mds_rank->kick_waiters_for_any_client_connection();
1338 }
7c673cae
FG
1339 } else {
1340 dout(10) << " existing session " << s << " for " << s->info.inst << " existing con " << s->connection
1341 << ", new/authorizing con " << con << dendl;
1342 con->set_priv(s->get());
1343
1344
1345
1346 // Wait until we fully accept the connection before setting
1347 // s->connection. In particular, if there are multiple incoming
1348 // connection attempts, they will all get their authorizer
1349 // validated, but some of them may "lose the race" and get
1350 // dropped. We only want to consider the winner(s). See
1351 // ms_handle_accept(). This is important for Sessions we replay
1352 // from the journal on recovery that don't have established
1353 // messenger state; we want the con from only the winning
1354 // connect attempt(s). (Normal reconnects that don't follow MDS
1355 // recovery are reconnected to the existing con by the
1356 // messenger.)
1357 }
1358
1359 if (caps_info.allow_all) {
1360 // Flag for auth providers that don't provide cap strings
1361 s->auth_caps.set_allow_all();
b5b8bbf5
FG
1362 } else {
1363 bufferlist::iterator p = caps_info.caps.begin();
1364 string auth_cap_str;
1365 try {
1366 ::decode(auth_cap_str, p);
1367
1368 dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
1369 std::ostringstream errstr;
1370 if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
1371 dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
1372 << " parsing '" << auth_cap_str << "'" << dendl;
1373 clog->warn() << name << " mds cap '" << auth_cap_str
1374 << "' does not parse: " << errstr.str();
1375 is_valid = false;
1376 }
1377 } catch (buffer::error& e) {
1378 // Assume legacy auth, defaults to:
1379 // * permit all filesystem ops
1380 // * permit no `tell` ops
1381 dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
1382 is_valid = false;
7c673cae 1383 }
7c673cae
FG
1384 }
1385 }
1386
1387 return true; // we made a decision (see is_valid)
1388}
1389
1390
1391void MDSDaemon::ms_handle_accept(Connection *con)
1392{
1393 Mutex::Locker l(mds_lock);
1394 if (stopping) {
1395 return;
1396 }
1397
1398 Session *s = static_cast<Session *>(con->get_priv());
1399 dout(10) << "ms_handle_accept " << con->get_peer_addr() << " con " << con << " session " << s << dendl;
1400 if (s) {
1401 if (s->connection != con) {
1402 dout(10) << " session connection " << s->connection << " -> " << con << dendl;
1403 s->connection = con;
1404
1405 // send out any queued messages
1406 while (!s->preopen_out_queue.empty()) {
1407 con->send_message(s->preopen_out_queue.front());
1408 s->preopen_out_queue.pop_front();
1409 }
1410 }
1411 s->put();
1412 }
1413}
1414
1415bool MDSDaemon::is_clean_shutdown()
1416{
1417 if (mds_rank) {
1418 return mds_rank->is_stopped();
1419 } else {
1420 return true;
1421 }
1422}