]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/MDSDaemon.cc
update sources to 12.2.8
[ceph.git] / ceph / src / mds / MDSDaemon.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <unistd.h>
16
17#include "include/compat.h"
7c673cae
FG
18#include "include/types.h"
19#include "include/str_list.h"
c07f9fc5 20
7c673cae 21#include "common/Clock.h"
c07f9fc5
FG
22#include "common/HeartbeatMap.h"
23#include "common/Timer.h"
24#include "common/backport14.h"
7c673cae 25#include "common/ceph_argparse.h"
c07f9fc5
FG
26#include "common/config.h"
27#include "common/entity_name.h"
7c673cae 28#include "common/errno.h"
c07f9fc5
FG
29#include "common/perf_counters.h"
30#include "common/signal.h"
31#include "common/version.h"
32
33#include "global/signal_handler.h"
7c673cae
FG
34
35#include "msg/Messenger.h"
36#include "mon/MonClient.h"
37
38#include "osdc/Objecter.h"
39
40#include "MDSMap.h"
41
42#include "MDSDaemon.h"
43#include "Server.h"
44#include "Locker.h"
45
46#include "SnapServer.h"
47#include "SnapClient.h"
48
7c673cae
FG
49#include "events/ESession.h"
50#include "events/ESubtreeMap.h"
51
52#include "messages/MMDSMap.h"
53
54#include "messages/MGenericMessage.h"
55
56#include "messages/MMonCommand.h"
57#include "messages/MCommand.h"
58#include "messages/MCommandReply.h"
59
60#include "auth/AuthAuthorizeHandler.h"
61#include "auth/RotatingKeyRing.h"
62#include "auth/KeyRing.h"
63
7c673cae
FG
64#include "perfglue/cpu_profiler.h"
65#include "perfglue/heap_profiler.h"
66
67#define dout_context g_ceph_context
68#define dout_subsys ceph_subsys_mds
69#undef dout_prefix
70#define dout_prefix *_dout << "mds." << name << ' '
71
7c673cae 72// cons/des
94b18763 73MDSDaemon::MDSDaemon(boost::string_view n, Messenger *m, MonClient *mc) :
7c673cae
FG
74 Dispatcher(m->cct),
75 mds_lock("MDSDaemon::mds_lock"),
76 stopping(false),
77 timer(m->cct, mds_lock),
78 beacon(m->cct, mc, n),
79 authorize_handler_cluster_registry(new AuthAuthorizeHandlerRegistry(m->cct,
80 m->cct->_conf->auth_supported.empty() ?
81 m->cct->_conf->auth_cluster_required :
82 m->cct->_conf->auth_supported)),
83 authorize_handler_service_registry(new AuthAuthorizeHandlerRegistry(m->cct,
84 m->cct->_conf->auth_supported.empty() ?
85 m->cct->_conf->auth_service_required :
86 m->cct->_conf->auth_supported)),
87 name(n),
88 messenger(m),
89 monc(mc),
90 mgrc(m->cct, m),
91 log_client(m->cct, messenger, &mc->monmap, LogClient::NO_FLAGS),
92 mds_rank(NULL),
94b18763
FG
93 asok_hook(NULL),
94 starttime(mono_clock::now())
7c673cae
FG
95{
96 orig_argc = 0;
97 orig_argv = NULL;
98
99 clog = log_client.create_channel();
100
101 monc->set_messenger(messenger);
102
103 mdsmap = new MDSMap;
104}
105
106MDSDaemon::~MDSDaemon() {
107 Mutex::Locker lock(mds_lock);
108
109 delete mds_rank;
110 mds_rank = NULL;
111 delete mdsmap;
112 mdsmap = NULL;
113
114 delete authorize_handler_service_registry;
115 delete authorize_handler_cluster_registry;
116}
117
118class MDSSocketHook : public AdminSocketHook {
119 MDSDaemon *mds;
120public:
121 explicit MDSSocketHook(MDSDaemon *m) : mds(m) {}
122 bool call(std::string command, cmdmap_t& cmdmap, std::string format,
123 bufferlist& out) override {
124 stringstream ss;
125 bool r = mds->asok_command(command, cmdmap, format, ss);
126 out.append(ss);
127 return r;
128 }
129};
130
131bool MDSDaemon::asok_command(string command, cmdmap_t& cmdmap, string format,
132 ostream& ss)
133{
134 dout(1) << "asok_command: " << command << " (starting...)" << dendl;
135
136 Formatter *f = Formatter::create(format, "json-pretty", "json-pretty");
137 bool handled = false;
138 if (command == "status") {
139 dump_status(f);
140 handled = true;
141 } else {
142 if (mds_rank == NULL) {
143 dout(1) << "Can't run that command on an inactive MDS!" << dendl;
144 f->dump_string("error", "mds_not_active");
145 } else {
146 handled = mds_rank->handle_asok_command(command, cmdmap, f, ss);
147 }
148 }
149 f->flush(ss);
150 delete f;
151
152 dout(1) << "asok_command: " << command << " (complete)" << dendl;
153
154 return handled;
155}
156
157void MDSDaemon::dump_status(Formatter *f)
158{
159 f->open_object_section("status");
160 f->dump_stream("cluster_fsid") << monc->get_fsid();
161 if (mds_rank) {
162 f->dump_int("whoami", mds_rank->get_nodeid());
163 } else {
164 f->dump_int("whoami", MDS_RANK_NONE);
165 }
166
167 f->dump_int("id", monc->get_global_id());
168 f->dump_string("want_state", ceph_mds_state_name(beacon.get_want_state()));
169 f->dump_string("state", ceph_mds_state_name(mdsmap->get_state_gid(mds_gid_t(
170 monc->get_global_id()))));
171 if (mds_rank) {
172 Mutex::Locker l(mds_lock);
173 mds_rank->dump_status(f);
174 }
175
176 f->dump_unsigned("mdsmap_epoch", mdsmap->get_epoch());
177 if (mds_rank) {
178 f->dump_unsigned("osdmap_epoch", mds_rank->get_osd_epoch());
179 f->dump_unsigned("osdmap_epoch_barrier", mds_rank->get_osd_epoch_barrier());
180 } else {
181 f->dump_unsigned("osdmap_epoch", 0);
182 f->dump_unsigned("osdmap_epoch_barrier", 0);
183 }
94b18763
FG
184
185 f->dump_float("uptime", get_uptime().count());
186
7c673cae
FG
187 f->close_section(); // status
188}
189
190void MDSDaemon::set_up_admin_socket()
191{
192 int r;
193 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
194 assert(asok_hook == nullptr);
195 asok_hook = new MDSSocketHook(this);
196 r = admin_socket->register_command("status", "status", asok_hook,
197 "high-level status of MDS");
198 assert(r == 0);
199 r = admin_socket->register_command("dump_ops_in_flight",
200 "dump_ops_in_flight", asok_hook,
201 "show the ops currently in flight");
202 assert(r == 0);
203 r = admin_socket->register_command("ops",
204 "ops", asok_hook,
205 "show the ops currently in flight");
206 assert(r == 0);
207 r = admin_socket->register_command("dump_blocked_ops", "dump_blocked_ops",
208 asok_hook,
209 "show the blocked ops currently in flight");
210 assert(r == 0);
211 r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
212 asok_hook,
213 "show slowest recent ops");
214 assert(r == 0);
215 r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration",
216 asok_hook,
217 "show slowest recent ops, sorted by op duration");
218 assert(r == 0);
219 r = admin_socket->register_command("scrub_path",
220 "scrub_path name=path,type=CephString "
221 "name=scrubops,type=CephChoices,"
222 "strings=force|recursive|repair,n=N,req=false",
223 asok_hook,
224 "scrub an inode and output results");
225 assert(r == 0);
226 r = admin_socket->register_command("tag path",
227 "tag path name=path,type=CephString"
228 " name=tag,type=CephString",
229 asok_hook,
230 "Apply scrub tag recursively");
231 assert(r == 0);
232 r = admin_socket->register_command("flush_path",
233 "flush_path name=path,type=CephString",
234 asok_hook,
235 "flush an inode (and its dirfrags)");
236 assert(r == 0);
237 r = admin_socket->register_command("export dir",
238 "export dir "
239 "name=path,type=CephString "
240 "name=rank,type=CephInt",
241 asok_hook,
242 "migrate a subtree to named MDS");
243 assert(r == 0);
244 r = admin_socket->register_command("dump cache",
245 "dump cache name=path,type=CephString,req=false",
246 asok_hook,
247 "dump metadata cache (optionally to a file)");
248 assert(r == 0);
181888fb
FG
249 r = admin_socket->register_command("cache status",
250 "cache status",
251 asok_hook,
252 "show cache status");
253 assert(r == 0);
7c673cae
FG
254 r = admin_socket->register_command("dump tree",
255 "dump tree "
256 "name=root,type=CephString,req=true "
257 "name=depth,type=CephInt,req=false ",
258 asok_hook,
259 "dump metadata cache for subtree");
260 assert(r == 0);
28e407b8
AA
261 r = admin_socket->register_command("dump loads",
262 "dump loads",
263 asok_hook,
264 "dump metadata loads");
265 assert(r == 0);
7c673cae
FG
266 r = admin_socket->register_command("session evict",
267 "session evict name=client_id,type=CephString",
268 asok_hook,
269 "Evict a CephFS client");
270 assert(r == 0);
271 r = admin_socket->register_command("osdmap barrier",
272 "osdmap barrier name=target_epoch,type=CephInt",
273 asok_hook,
274 "Wait until the MDS has this OSD map epoch");
275 assert(r == 0);
276 r = admin_socket->register_command("session ls",
277 "session ls",
278 asok_hook,
279 "Enumerate connected CephFS clients");
280 assert(r == 0);
281 r = admin_socket->register_command("flush journal",
282 "flush journal",
283 asok_hook,
284 "Flush the journal to the backing store");
285 assert(r == 0);
286 r = admin_socket->register_command("force_readonly",
287 "force_readonly",
288 asok_hook,
289 "Force MDS to read-only mode");
290 assert(r == 0);
291 r = admin_socket->register_command("get subtrees",
292 "get subtrees",
293 asok_hook,
294 "Return the subtree map");
295 assert(r == 0);
296 r = admin_socket->register_command("dirfrag split",
297 "dirfrag split "
298 "name=path,type=CephString,req=true "
299 "name=frag,type=CephString,req=true "
300 "name=bits,type=CephInt,req=true ",
301 asok_hook,
302 "Fragment directory by path");
303 assert(r == 0);
304 r = admin_socket->register_command("dirfrag merge",
305 "dirfrag merge "
306 "name=path,type=CephString,req=true "
307 "name=frag,type=CephString,req=true",
308 asok_hook,
309 "De-fragment directory by path");
310 assert(r == 0);
311 r = admin_socket->register_command("dirfrag ls",
312 "dirfrag ls "
313 "name=path,type=CephString,req=true",
314 asok_hook,
315 "List fragments in directory");
316 assert(r == 0);
317}
318
319void MDSDaemon::clean_up_admin_socket()
320{
321 AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
322 admin_socket->unregister_command("status");
323 admin_socket->unregister_command("dump_ops_in_flight");
324 admin_socket->unregister_command("ops");
325 admin_socket->unregister_command("dump_blocked_ops");
326 admin_socket->unregister_command("dump_historic_ops");
327 admin_socket->unregister_command("dump_historic_ops_by_duration");
328 admin_socket->unregister_command("scrub_path");
329 admin_socket->unregister_command("tag path");
330 admin_socket->unregister_command("flush_path");
331 admin_socket->unregister_command("export dir");
332 admin_socket->unregister_command("dump cache");
181888fb 333 admin_socket->unregister_command("cache status");
7c673cae 334 admin_socket->unregister_command("dump tree");
28e407b8 335 admin_socket->unregister_command("dump loads");
7c673cae
FG
336 admin_socket->unregister_command("session evict");
337 admin_socket->unregister_command("osdmap barrier");
338 admin_socket->unregister_command("session ls");
339 admin_socket->unregister_command("flush journal");
340 admin_socket->unregister_command("force_readonly");
341 admin_socket->unregister_command("get subtrees");
342 admin_socket->unregister_command("dirfrag split");
343 admin_socket->unregister_command("dirfrag merge");
344 admin_socket->unregister_command("dirfrag ls");
345 delete asok_hook;
346 asok_hook = NULL;
347}
348
349const char** MDSDaemon::get_tracked_conf_keys() const
350{
351 static const char* KEYS[] = {
352 "mds_op_complaint_time", "mds_op_log_threshold",
353 "mds_op_history_size", "mds_op_history_duration",
354 "mds_enable_op_tracker",
355 "mds_log_pause",
356 // clog & admin clog
357 "clog_to_monitors",
358 "clog_to_syslog",
359 "clog_to_syslog_facility",
360 "clog_to_syslog_level",
361 // PurgeQueue
362 "mds_max_purge_ops",
363 "mds_max_purge_ops_per_pg",
364 "mds_max_purge_files",
28e407b8 365 "mds_inject_migrator_session_race",
1adf2230 366 "mds_inject_migrator_message_loss",
7c673cae
FG
367 "clog_to_graylog",
368 "clog_to_graylog_host",
369 "clog_to_graylog_port",
370 "host",
371 "fsid",
372 NULL
373 };
374 return KEYS;
375}
376
377void MDSDaemon::handle_conf_change(const struct md_config_t *conf,
378 const std::set <std::string> &changed)
379{
380 // We may be called within mds_lock (via `tell`) or outwith the
381 // lock (via admin socket `config set`), so handle either case.
382 const bool initially_locked = mds_lock.is_locked_by_me();
383 if (!initially_locked) {
384 mds_lock.Lock();
385 }
386
387 if (changed.count("mds_op_complaint_time") ||
388 changed.count("mds_op_log_threshold")) {
389 if (mds_rank) {
390 mds_rank->op_tracker.set_complaint_and_threshold(conf->mds_op_complaint_time,
391 conf->mds_op_log_threshold);
392 }
393 }
394 if (changed.count("mds_op_history_size") ||
395 changed.count("mds_op_history_duration")) {
396 if (mds_rank) {
397 mds_rank->op_tracker.set_history_size_and_duration(conf->mds_op_history_size,
398 conf->mds_op_history_duration);
399 }
400 }
401 if (changed.count("mds_enable_op_tracker")) {
402 if (mds_rank) {
403 mds_rank->op_tracker.set_tracking(conf->mds_enable_op_tracker);
404 }
405 }
406 if (changed.count("clog_to_monitors") ||
407 changed.count("clog_to_syslog") ||
408 changed.count("clog_to_syslog_level") ||
409 changed.count("clog_to_syslog_facility") ||
410 changed.count("clog_to_graylog") ||
411 changed.count("clog_to_graylog_host") ||
412 changed.count("clog_to_graylog_port") ||
413 changed.count("host") ||
414 changed.count("fsid")) {
415 if (mds_rank) {
416 mds_rank->update_log_config();
417 }
418 }
419
420 if (!g_conf->mds_log_pause && changed.count("mds_log_pause")) {
421 if (mds_rank) {
422 mds_rank->mdlog->kick_submitter();
423 }
424 }
425
426 if (mds_rank) {
427 mds_rank->handle_conf_change(conf, changed);
428 }
429
430 if (!initially_locked) {
431 mds_lock.Unlock();
432 }
433}
434
435
436int MDSDaemon::init()
437{
438 dout(10) << sizeof(MDSCacheObject) << "\tMDSCacheObject" << dendl;
439 dout(10) << sizeof(CInode) << "\tCInode" << dendl;
440 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item *7=" << 7*sizeof(elist<void*>::item) << dendl;
94b18763
FG
441 dout(10) << sizeof(CInode::mempool_inode) << "\t inode " << dendl;
442 dout(10) << sizeof(CInode::mempool_old_inode) << "\t old_inode " << dendl;
7c673cae
FG
443 dout(10) << sizeof(nest_info_t) << "\t nest_info_t " << dendl;
444 dout(10) << sizeof(frag_info_t) << "\t frag_info_t " << dendl;
445 dout(10) << sizeof(SimpleLock) << "\t SimpleLock *5=" << 5*sizeof(SimpleLock) << dendl;
446 dout(10) << sizeof(ScatterLock) << "\t ScatterLock *3=" << 3*sizeof(ScatterLock) << dendl;
447 dout(10) << sizeof(CDentry) << "\tCDentry" << dendl;
448 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item" << dendl;
449 dout(10) << sizeof(SimpleLock) << "\t SimpleLock" << dendl;
450 dout(10) << sizeof(CDir) << "\tCDir " << dendl;
451 dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item *2=" << 2*sizeof(elist<void*>::item) << dendl;
452 dout(10) << sizeof(fnode_t) << "\t fnode_t " << dendl;
453 dout(10) << sizeof(nest_info_t) << "\t nest_info_t *2" << dendl;
454 dout(10) << sizeof(frag_info_t) << "\t frag_info_t *2" << dendl;
455 dout(10) << sizeof(Capability) << "\tCapability " << dendl;
456 dout(10) << sizeof(xlist<void*>::item) << "\t xlist<>::item *2=" << 2*sizeof(xlist<void*>::item) << dendl;
457
458 messenger->add_dispatcher_tail(&beacon);
459 messenger->add_dispatcher_tail(this);
460
461 // get monmap
462 monc->set_messenger(messenger);
463
464 monc->set_want_keys(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD |
465 CEPH_ENTITY_TYPE_MDS | CEPH_ENTITY_TYPE_MGR);
466 int r = 0;
467 r = monc->init();
468 if (r < 0) {
469 derr << "ERROR: failed to get monmap: " << cpp_strerror(-r) << dendl;
470 mds_lock.Lock();
471 suicide();
472 mds_lock.Unlock();
473 return r;
474 }
475
476 // tell monc about log_client so it will know about mon session resets
477 monc->set_log_client(&log_client);
478
479 r = monc->authenticate();
480 if (r < 0) {
481 derr << "ERROR: failed to authenticate: " << cpp_strerror(-r) << dendl;
482 mds_lock.Lock();
483 suicide();
484 mds_lock.Unlock();
485 return r;
486 }
487
488 int rotating_auth_attempts = 0;
489 while (monc->wait_auth_rotating(30.0) < 0) {
490 if (++rotating_auth_attempts <= g_conf->max_rotating_auth_attempts) {
491 derr << "unable to obtain rotating service keys; retrying" << dendl;
492 continue;
493 }
494 derr << "ERROR: failed to refresh rotating keys, "
495 << "maximum retry time reached." << dendl;
496 mds_lock.Lock();
497 suicide();
498 mds_lock.Unlock();
499 return -ETIMEDOUT;
500 }
501
502 mgrc.init();
503 messenger->add_dispatcher_head(&mgrc);
504
505 mds_lock.Lock();
506 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) {
507 dout(4) << __func__ << ": terminated already, dropping out" << dendl;
508 mds_lock.Unlock();
509 return 0;
510 }
511
512 monc->sub_want("mdsmap", 0, 0);
513 monc->sub_want("mgrmap", 0, 0);
514 monc->renew_subs();
515
516 mds_lock.Unlock();
517
518 // Set up admin socket before taking mds_lock, so that ordering
519 // is consistent (later we take mds_lock within asok callbacks)
520 set_up_admin_socket();
521 g_conf->add_observer(this);
522 mds_lock.Lock();
523 if (beacon.get_want_state() == MDSMap::STATE_DNE) {
524 suicide(); // we could do something more graceful here
525 dout(4) << __func__ << ": terminated already, dropping out" << dendl;
526 mds_lock.Unlock();
527 return 0;
528 }
529
530 timer.init();
531
532 beacon.init(mdsmap);
533 messenger->set_myname(entity_name_t::MDS(MDS_RANK_NONE));
534
535 // schedule tick
536 reset_tick();
537 mds_lock.Unlock();
538
539 return 0;
540}
541
542void MDSDaemon::reset_tick()
543{
544 // cancel old
545 if (tick_event) timer.cancel_event(tick_event);
546
547 // schedule
3efd9988
FG
548 tick_event = timer.add_event_after(
549 g_conf->mds_tick_interval,
550 new FunctionContext([this](int) {
551 assert(mds_lock.is_locked_by_me());
552 tick();
553 }));
7c673cae
FG
554}
555
556void MDSDaemon::tick()
557{
7c673cae
FG
558 // reschedule
559 reset_tick();
560
561 // Call through to subsystems' tick functions
562 if (mds_rank) {
563 mds_rank->tick();
564 }
565}
566
567void MDSDaemon::send_command_reply(MCommand *m, MDSRank *mds_rank,
568 int r, bufferlist outbl,
94b18763 569 boost::string_view outs)
7c673cae
FG
570{
571 Session *session = static_cast<Session *>(m->get_connection()->get_priv());
572 assert(session != NULL);
573 // If someone is using a closed session for sending commands (e.g.
574 // the ceph CLI) then we should feel free to clean up this connection
575 // as soon as we've sent them a response.
94b18763
FG
576 const bool live_session =
577 session->get_state_seq() > 0 &&
578 mds_rank &&
579 mds_rank->sessionmap.get_session(session->info.inst.name);
7c673cae
FG
580
581 if (!live_session) {
582 // This session only existed to issue commands, so terminate it
583 // as soon as we can.
584 assert(session->is_closed());
585 session->connection->mark_disposable();
7c673cae 586 }
94b18763 587 session->put();
7c673cae
FG
588
589 MCommandReply *reply = new MCommandReply(r, outs);
590 reply->set_tid(m->get_tid());
591 reply->set_data(outbl);
592 m->get_connection()->send_message(reply);
593}
594
595/* This function DOES put the passed message before returning*/
596void MDSDaemon::handle_command(MCommand *m)
597{
598 Session *session = static_cast<Session *>(m->get_connection()->get_priv());
599 assert(session != NULL);
600
601 int r = 0;
602 cmdmap_t cmdmap;
603 std::stringstream ss;
604 std::string outs;
605 bufferlist outbl;
606 Context *run_after = NULL;
607 bool need_reply = true;
608
609 if (!session->auth_caps.allow_all()) {
610 dout(1) << __func__
611 << ": received command from client without `tell` capability: "
612 << m->get_connection()->peer_addr << dendl;
613
614 ss << "permission denied";
615 r = -EPERM;
616 } else if (m->cmd.empty()) {
617 r = -EINVAL;
618 ss << "no command given";
619 outs = ss.str();
620 } else if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
621 r = -EINVAL;
622 outs = ss.str();
623 } else {
624 r = _handle_command(cmdmap, m, &outbl, &outs, &run_after, &need_reply);
625 }
94b18763 626 session->put();
7c673cae
FG
627
628 if (need_reply) {
629 send_command_reply(m, mds_rank, r, outbl, outs);
630 }
631
632 if (run_after) {
633 run_after->complete(0);
634 }
635
636 m->put();
637}
638
639
640struct MDSCommand {
641 string cmdstring;
642 string helpstring;
643 string module;
644 string perm;
645 string availability;
646} mds_commands[] = {
647
648#define COMMAND(parsesig, helptext, module, perm, availability) \
649 {parsesig, helptext, module, perm, availability},
650
651COMMAND("injectargs " \
652 "name=injected_args,type=CephString,n=N",
653 "inject configuration arguments into running MDS",
654 "mds", "*", "cli,rest")
c07f9fc5
FG
655COMMAND("config set " \
656 "name=key,type=CephString name=value,type=CephString",
657 "Set a configuration option at runtime (not persistent)",
658 "mds", "*", "cli,rest")
7c673cae
FG
659COMMAND("exit",
660 "Terminate this MDS",
661 "mds", "*", "cli,rest")
662COMMAND("respawn",
663 "Restart this MDS",
664 "mds", "*", "cli,rest")
665COMMAND("session kill " \
666 "name=session_id,type=CephInt",
667 "End a client session",
668 "mds", "*", "cli,rest")
669COMMAND("cpu_profiler " \
670 "name=arg,type=CephChoices,strings=status|flush",
671 "run cpu profiling on daemon", "mds", "rw", "cli,rest")
672COMMAND("session ls " \
673 "name=filters,type=CephString,n=N,req=false",
674 "List client sessions", "mds", "r", "cli,rest")
31f18b77
FG
675COMMAND("client ls " \
676 "name=filters,type=CephString,n=N,req=false",
677 "List client sessions", "mds", "r", "cli,rest")
7c673cae
FG
678COMMAND("session evict " \
679 "name=filters,type=CephString,n=N,req=false",
680 "Evict client session(s)", "mds", "rw", "cli,rest")
31f18b77
FG
681COMMAND("client evict " \
682 "name=filters,type=CephString,n=N,req=false",
683 "Evict client session(s)", "mds", "rw", "cli,rest")
7c673cae
FG
684COMMAND("damage ls",
685 "List detected metadata damage", "mds", "r", "cli,rest")
686COMMAND("damage rm name=damage_id,type=CephInt",
687 "Remove a damage table entry", "mds", "rw", "cli,rest")
c07f9fc5 688COMMAND("version", "report version of MDS", "mds", "r", "cli,rest")
7c673cae
FG
689COMMAND("heap " \
690 "name=heapcmd,type=CephChoices,strings=dump|start_profiler|stop_profiler|release|stats", \
691 "show heap usage info (available only if compiled with tcmalloc)", \
692 "mds", "*", "cli,rest")
693};
694
695
696int MDSDaemon::_handle_command(
697 const cmdmap_t &cmdmap,
698 MCommand *m,
699 bufferlist *outbl,
700 std::string *outs,
701 Context **run_later,
702 bool *need_reply)
703{
704 assert(outbl != NULL);
705 assert(outs != NULL);
706
707 class SuicideLater : public Context
708 {
709 MDSDaemon *mds;
710
711 public:
712 explicit SuicideLater(MDSDaemon *mds_) : mds(mds_) {}
713 void finish(int r) override {
714 // Wait a little to improve chances of caller getting
715 // our response before seeing us disappear from mdsmap
716 sleep(1);
717
718 mds->suicide();
719 }
720 };
721
722
723 class RespawnLater : public Context
724 {
725 MDSDaemon *mds;
726
727 public:
728
729 explicit RespawnLater(MDSDaemon *mds_) : mds(mds_) {}
730 void finish(int r) override {
731 // Wait a little to improve chances of caller getting
732 // our response before seeing us disappear from mdsmap
733 sleep(1);
734
735 mds->respawn();
736 }
737 };
738
739 std::stringstream ds;
740 std::stringstream ss;
741 std::string prefix;
c07f9fc5
FG
742 std::string format;
743 std::unique_ptr<Formatter> f(Formatter::create(format));
7c673cae
FG
744 cmd_getval(cct, cmdmap, "prefix", prefix);
745
746 int r = 0;
747
748 if (prefix == "get_command_descriptions") {
749 int cmdnum = 0;
c07f9fc5 750 std::unique_ptr<JSONFormatter> f(ceph::make_unique<JSONFormatter>());
7c673cae
FG
751 f->open_object_section("command_descriptions");
752 for (MDSCommand *cp = mds_commands;
753 cp < &mds_commands[ARRAY_SIZE(mds_commands)]; cp++) {
754
755 ostringstream secname;
756 secname << "cmd" << setfill('0') << std::setw(3) << cmdnum;
c07f9fc5 757 dump_cmddesc_to_json(f.get(), secname.str(), cp->cmdstring, cp->helpstring,
7c673cae
FG
758 cp->module, cp->perm, cp->availability, 0);
759 cmdnum++;
760 }
761 f->close_section(); // command_descriptions
762
763 f->flush(ds);
c07f9fc5
FG
764 goto out;
765 }
766
767 cmd_getval(cct, cmdmap, "format", format);
768 if (prefix == "version") {
769 if (f) {
770 f->open_object_section("version");
771 f->dump_string("version", pretty_version_to_str());
772 f->close_section();
773 f->flush(ds);
774 } else {
775 ds << pretty_version_to_str();
776 }
7c673cae
FG
777 } else if (prefix == "injectargs") {
778 vector<string> argsvec;
779 cmd_getval(cct, cmdmap, "injected_args", argsvec);
780
781 if (argsvec.empty()) {
782 r = -EINVAL;
783 ss << "ignoring empty injectargs";
784 goto out;
785 }
786 string args = argsvec.front();
787 for (vector<string>::iterator a = ++argsvec.begin(); a != argsvec.end(); ++a)
788 args += " " + *a;
789 r = cct->_conf->injectargs(args, &ss);
c07f9fc5
FG
790 } else if (prefix == "config set") {
791 std::string key;
792 cmd_getval(cct, cmdmap, "key", key);
793 std::string val;
794 cmd_getval(cct, cmdmap, "value", val);
795 r = cct->_conf->set_val(key, val, true, &ss);
d2e6a577
FG
796 if (r == 0) {
797 cct->_conf->apply_changes(nullptr);
798 }
7c673cae
FG
799 } else if (prefix == "exit") {
800 // We will send response before executing
801 ss << "Exiting...";
802 *run_later = new SuicideLater(this);
c07f9fc5 803 } else if (prefix == "respawn") {
7c673cae
FG
804 // We will send response before executing
805 ss << "Respawning...";
806 *run_later = new RespawnLater(this);
807 } else if (prefix == "session kill") {
808 if (mds_rank == NULL) {
809 r = -EINVAL;
810 ss << "MDS not active";
811 goto out;
812 }
813 // FIXME harmonize `session kill` with admin socket session evict
814 int64_t session_id = 0;
815 bool got = cmd_getval(cct, cmdmap, "session_id", session_id);
816 assert(got);
31f18b77
FG
817 bool killed = mds_rank->evict_client(session_id, false,
818 g_conf->mds_session_blacklist_on_evict,
819 ss);
7c673cae
FG
820 if (!killed)
821 r = -ENOENT;
822 } else if (prefix == "heap") {
823 if (!ceph_using_tcmalloc()) {
824 r = -EOPNOTSUPP;
825 ss << "could not issue heap profiler command -- not using tcmalloc!";
826 } else {
827 string heapcmd;
828 cmd_getval(cct, cmdmap, "heapcmd", heapcmd);
829 vector<string> heapcmd_vec;
830 get_str_vec(heapcmd, heapcmd_vec);
831 ceph_heap_profiler_handle_command(heapcmd_vec, ds);
832 }
833 } else if (prefix == "cpu_profiler") {
834 string arg;
835 cmd_getval(cct, cmdmap, "arg", arg);
836 vector<string> argvec;
837 get_str_vec(arg, argvec);
838 cpu_profiler_handle_command(argvec, ds);
839 } else {
840 // Give MDSRank a shot at the command
b32b8144
FG
841 if (!mds_rank) {
842 ss << "MDS not active";
843 r = -EINVAL;
844 }
845 else {
7c673cae
FG
846 bool handled = mds_rank->handle_command(cmdmap, m, &r, &ds, &ss,
847 need_reply);
b32b8144
FG
848 if (!handled) {
849 // MDSDaemon doesn't know this command
850 ss << "unrecognized command! " << prefix;
851 r = -EINVAL;
7c673cae
FG
852 }
853 }
7c673cae
FG
854 }
855
856out:
857 *outs = ss.str();
858 outbl->append(ds);
859 return r;
860}
861
862/* This function deletes the passed message before returning. */
863
864void MDSDaemon::handle_mds_map(MMDSMap *m)
865{
866 version_t epoch = m->get_epoch();
7c673cae
FG
867
868 // is it new?
869 if (epoch <= mdsmap->get_epoch()) {
1adf2230
AA
870 dout(5) << "handle_mds_map old map epoch " << epoch << " <= "
871 << mdsmap->get_epoch() << ", discarding" << dendl;
7c673cae
FG
872 m->put();
873 return;
874 }
875
1adf2230
AA
876 dout(1) << "Updating MDS map to version " << epoch << " from " << m->get_source() << dendl;
877
7c673cae
FG
878 entity_addr_t addr;
879
880 // keep old map, for a moment
881 MDSMap *oldmap = mdsmap;
882
883 // decode and process
884 mdsmap = new MDSMap;
885 mdsmap->decode(m->get_encoded());
886 const MDSMap::DaemonState new_state = mdsmap->get_state_gid(mds_gid_t(monc->get_global_id()));
887 const int incarnation = mdsmap->get_inc_gid(mds_gid_t(monc->get_global_id()));
888
889 monc->sub_got("mdsmap", mdsmap->get_epoch());
890
891 // Calculate my effective rank (either my owned rank or my
892 // standby_for_rank if in standby replay)
893 mds_rank_t whoami = mdsmap->get_rank_gid(mds_gid_t(monc->get_global_id()));
894
895 // verify compatset
1adf2230 896 CompatSet mdsmap_compat(MDSMap::get_compat_set_all());
7c673cae
FG
897 dout(10) << " my compat " << mdsmap_compat << dendl;
898 dout(10) << " mdsmap compat " << mdsmap->compat << dendl;
899 if (!mdsmap_compat.writeable(mdsmap->compat)) {
900 dout(0) << "handle_mds_map mdsmap compatset " << mdsmap->compat
901 << " not writeable with daemon features " << mdsmap_compat
902 << ", killing myself" << dendl;
903 suicide();
904 goto out;
905 }
906
907 // mark down any failed peers
908 for (map<mds_gid_t,MDSMap::mds_info_t>::const_iterator p = oldmap->get_mds_info().begin();
909 p != oldmap->get_mds_info().end();
910 ++p) {
911 if (mdsmap->get_mds_info().count(p->first) == 0) {
912 dout(10) << " peer mds gid " << p->first << " removed from map" << dendl;
913 messenger->mark_down(p->second.addr);
914 }
915 }
916
917 if (whoami == MDS_RANK_NONE &&
918 new_state == MDSMap::STATE_STANDBY_REPLAY) {
919 whoami = mdsmap->get_mds_info_gid(mds_gid_t(monc->get_global_id())).standby_for_rank;
920 }
921
922 // see who i am
923 addr = messenger->get_myaddr();
c07f9fc5 924 dout(10) << "map says I am " << addr << " mds." << whoami << "." << incarnation
7c673cae
FG
925 << " state " << ceph_mds_state_name(new_state) << dendl;
926
927 if (whoami == MDS_RANK_NONE) {
928 if (mds_rank != NULL) {
c07f9fc5 929 const auto myid = monc->get_global_id();
7c673cae
FG
930 // We have entered a rank-holding state, we shouldn't be back
931 // here!
932 if (g_conf->mds_enforce_unique_name) {
933 if (mds_gid_t existing = mdsmap->find_mds_gid_by_name(name)) {
934 const MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
c07f9fc5 935 if (i.global_id > myid) {
1adf2230 936 dout(1) << "Map replaced me with another mds." << whoami
c07f9fc5
FG
937 << " with gid (" << i.global_id << ") larger than myself ("
938 << myid << "); quitting!" << dendl;
7c673cae
FG
939 // Call suicide() rather than respawn() because if someone else
940 // has taken our ID, we don't want to keep restarting and
941 // fighting them for the ID.
942 suicide();
943 m->put();
944 return;
945 }
946 }
947 }
948
1adf2230 949 dout(1) << "Map removed me (mds." << whoami << " gid:"
c07f9fc5 950 << myid << ") from cluster due to lost contact; respawning" << dendl;
7c673cae
FG
951 respawn();
952 }
953 // MDSRank not active: process the map here to see if we have
954 // been assigned a rank.
955 dout(10) << __func__ << ": handling map in rankless mode" << dendl;
956 _handle_mds_map(oldmap);
957 } else {
958
959 // Did we already hold a different rank? MDSMonitor shouldn't try
960 // to change that out from under me!
961 if (mds_rank && whoami != mds_rank->get_nodeid()) {
962 derr << "Invalid rank transition " << mds_rank->get_nodeid() << "->"
963 << whoami << dendl;
964 respawn();
965 }
966
967 // Did I previously not hold a rank? Initialize!
968 if (mds_rank == NULL) {
969 mds_rank = new MDSRankDispatcher(whoami, mds_lock, clog,
970 timer, beacon, mdsmap, messenger, monc,
971 new FunctionContext([this](int r){respawn();}),
972 new FunctionContext([this](int r){suicide();}));
973 dout(10) << __func__ << ": initializing MDS rank "
974 << mds_rank->get_nodeid() << dendl;
975 mds_rank->init();
976 }
977
978 // MDSRank is active: let him process the map, we have no say.
979 dout(10) << __func__ << ": handling map as rank "
980 << mds_rank->get_nodeid() << dendl;
981 mds_rank->handle_mds_map(m, oldmap);
982 }
983
984out:
985 beacon.notify_mdsmap(mdsmap);
986 m->put();
987 delete oldmap;
988}
989
990void MDSDaemon::_handle_mds_map(MDSMap *oldmap)
991{
992 MDSMap::DaemonState new_state = mdsmap->get_state_gid(mds_gid_t(monc->get_global_id()));
993
994 // Normal rankless case, we're marked as standby
995 if (new_state == MDSMap::STATE_STANDBY) {
996 beacon.set_want_state(mdsmap, new_state);
1adf2230 997 dout(1) << "Map has assigned me to become a standby" << dendl;
7c673cae
FG
998
999 return;
1000 }
1001
1002 // Case where we thought we were standby, but MDSMap disagrees
1003 if (beacon.get_want_state() == MDSMap::STATE_STANDBY) {
1004 dout(10) << "dropped out of mdsmap, try to re-add myself" << dendl;
1005 new_state = MDSMap::STATE_BOOT;
1006 beacon.set_want_state(mdsmap, new_state);
1007 return;
1008 }
1009
1010 // Case where we have sent a boot beacon that isn't reflected yet
1011 if (beacon.get_want_state() == MDSMap::STATE_BOOT) {
1012 dout(10) << "not in map yet" << dendl;
1013 }
1014}
1015
1016void MDSDaemon::handle_signal(int signum)
1017{
1018 assert(signum == SIGINT || signum == SIGTERM);
1019 derr << "*** got signal " << sig_str(signum) << " ***" << dendl;
1020 {
1021 Mutex::Locker l(mds_lock);
1022 if (stopping) {
1023 return;
1024 }
1025 suicide();
1026 }
1027}
1028
1029void MDSDaemon::suicide()
1030{
1031 assert(mds_lock.is_locked());
1032
1033 // make sure we don't suicide twice
1034 assert(stopping == false);
1035 stopping = true;
1036
1adf2230 1037 dout(1) << "suicide! Wanted state "
7c673cae
FG
1038 << ceph_mds_state_name(beacon.get_want_state()) << dendl;
1039
1040 if (tick_event) {
1041 timer.cancel_event(tick_event);
1042 tick_event = 0;
1043 }
1044
1045 //because add_observer is called after set_up_admin_socket
1046 //so we can use asok_hook to avoid assert in the remove_observer
1047 if (asok_hook != NULL)
1048 g_conf->remove_observer(this);
1049
1050 clean_up_admin_socket();
1051
1052 // Inform MDS we are going away, then shut down beacon
1053 beacon.set_want_state(mdsmap, MDSMap::STATE_DNE);
1054 if (!mdsmap->is_dne_gid(mds_gid_t(monc->get_global_id()))) {
1055 // Notify the MDSMonitor that we're dying, so that it doesn't have to
1056 // wait for us to go laggy. Only do this if we're actually in the
1057 // MDSMap, because otherwise the MDSMonitor will drop our message.
1058 beacon.send_and_wait(1);
1059 }
1060 beacon.shutdown();
1061
1062 mgrc.shutdown();
1063
1064 if (mds_rank) {
1065 mds_rank->shutdown();
1066 } else {
1067 timer.shutdown();
1068
1069 monc->shutdown();
1070 messenger->shutdown();
1071 }
1072}
1073
1074void MDSDaemon::respawn()
1075{
1adf2230
AA
1076 dout(1) << "respawn!" << dendl;
1077
1078 /* Dump recent in case the MDS was stuck doing something which caused it to
1079 * be removed from the MDSMap leading to respawn. */
1080 g_ceph_context->_log->dump_recent();
7c673cae
FG
1081
1082 char *new_argv[orig_argc+1];
1083 dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
1084 for (int i=0; i<orig_argc; i++) {
1085 new_argv[i] = (char *)orig_argv[i];
1086 dout(1) << " " << i << ": '" << orig_argv[i] << "'" << dendl;
1087 }
1088 new_argv[orig_argc] = NULL;
1089
1090 /* Determine the path to our executable, test if Linux /proc/self/exe exists.
1091 * This allows us to exec the same executable even if it has since been
1092 * unlinked.
1093 */
1094 char exe_path[PATH_MAX] = "";
1095 if (readlink(PROCPREFIX "/proc/self/exe", exe_path, PATH_MAX-1) == -1) {
1096 /* Print CWD for the user's interest */
1097 char buf[PATH_MAX];
1098 char *cwd = getcwd(buf, sizeof(buf));
1099 assert(cwd);
1100 dout(1) << " cwd " << cwd << dendl;
1101
1102 /* Fall back to a best-effort: just running in our CWD */
1103 strncpy(exe_path, orig_argv[0], PATH_MAX-1);
1104 } else {
1105 dout(1) << "respawning with exe " << exe_path << dendl;
1106 strcpy(exe_path, PROCPREFIX "/proc/self/exe");
1107 }
1108
1109 dout(1) << " exe_path " << exe_path << dendl;
1110
1111 unblock_all_signals(NULL);
1112 execv(exe_path, new_argv);
1113
1114 dout(0) << "respawn execv " << orig_argv[0]
1115 << " failed with " << cpp_strerror(errno) << dendl;
1116
1117 // We have to assert out here, because suicide() returns, and callers
1118 // to respawn expect it never to return.
1119 ceph_abort();
1120}
1121
1122
1123
1124bool MDSDaemon::ms_dispatch(Message *m)
1125{
1126 Mutex::Locker l(mds_lock);
1127 if (stopping) {
1128 return false;
1129 }
1130
1131 // Drop out early if shutting down
1132 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE) {
1133 dout(10) << " stopping, discarding " << *m << dendl;
1134 m->put();
1135 return true;
1136 }
1137
1138 // First see if it's a daemon message
1139 const bool handled_core = handle_core_message(m);
1140 if (handled_core) {
1141 return true;
1142 }
1143
1144 // Not core, try it as a rank message
1145 if (mds_rank) {
1146 return mds_rank->ms_dispatch(m);
1147 } else {
1148 return false;
1149 }
1150}
1151
1152bool MDSDaemon::ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer, bool force_new)
1153{
1154 dout(10) << "MDSDaemon::ms_get_authorizer type="
1155 << ceph_entity_type_name(dest_type) << dendl;
1156
1157 /* monitor authorization is being handled on different layer */
1158 if (dest_type == CEPH_ENTITY_TYPE_MON)
1159 return true;
1160
1161 if (force_new) {
1162 if (monc->wait_auth_rotating(10) < 0)
1163 return false;
1164 }
1165
1166 *authorizer = monc->build_authorizer(dest_type);
1167 return *authorizer != NULL;
1168}
1169
1170
1171/*
1172 * high priority messages we always process
1173 */
1174bool MDSDaemon::handle_core_message(Message *m)
1175{
1176 switch (m->get_type()) {
1177 case CEPH_MSG_MON_MAP:
1178 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON);
1179 m->put();
1180 break;
1181
1182 // MDS
1183 case CEPH_MSG_MDS_MAP:
1184 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_MDS);
1185 handle_mds_map(static_cast<MMDSMap*>(m));
1186 break;
1187
1188 // OSD
1189 case MSG_COMMAND:
1190 handle_command(static_cast<MCommand*>(m));
1191 break;
1192 case CEPH_MSG_OSD_MAP:
1193 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD);
1194
1195 if (mds_rank) {
1196 mds_rank->handle_osd_map();
1197 }
1198 m->put();
1199 break;
1200
1201 case MSG_MON_COMMAND:
1202 ALLOW_MESSAGES_FROM(CEPH_ENTITY_TYPE_MON);
1203 clog->warn() << "dropping `mds tell` command from legacy monitor";
1204 m->put();
1205 break;
1206
1207 default:
1208 return false;
1209 }
1210 return true;
1211}
1212
1213void MDSDaemon::ms_handle_connect(Connection *con)
1214{
1215}
1216
1217bool MDSDaemon::ms_handle_reset(Connection *con)
1218{
1219 if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
1220 return false;
1221
1222 Mutex::Locker l(mds_lock);
1223 if (stopping) {
1224 return false;
1225 }
1226 dout(5) << "ms_handle_reset on " << con->get_peer_addr() << dendl;
1227 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1228 return false;
1229
1230 Session *session = static_cast<Session *>(con->get_priv());
1231 if (session) {
1232 if (session->is_closed()) {
1233 dout(3) << "ms_handle_reset closing connection for session " << session->info.inst << dendl;
1234 con->mark_down();
1235 con->set_priv(NULL);
1236 }
1237 session->put();
1238 } else {
1239 con->mark_down();
1240 }
1241 return false;
1242}
1243
1244
1245void MDSDaemon::ms_handle_remote_reset(Connection *con)
1246{
1247 if (con->get_peer_type() != CEPH_ENTITY_TYPE_CLIENT)
1248 return;
1249
1250 Mutex::Locker l(mds_lock);
1251 if (stopping) {
1252 return;
1253 }
1254
1255 dout(5) << "ms_handle_remote_reset on " << con->get_peer_addr() << dendl;
1256 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1257 return;
1258
1259 Session *session = static_cast<Session *>(con->get_priv());
1260 if (session) {
1261 if (session->is_closed()) {
1262 dout(3) << "ms_handle_remote_reset closing connection for session " << session->info.inst << dendl;
1263 con->mark_down();
1264 con->set_priv(NULL);
1265 }
1266 session->put();
1267 }
1268}
1269
1270bool MDSDaemon::ms_handle_refused(Connection *con)
1271{
1272 // do nothing for now
1273 return false;
1274}
1275
1276bool MDSDaemon::ms_verify_authorizer(Connection *con, int peer_type,
1277 int protocol, bufferlist& authorizer_data, bufferlist& authorizer_reply,
28e407b8
AA
1278 bool& is_valid, CryptoKey& session_key,
1279 std::unique_ptr<AuthAuthorizerChallenge> *challenge)
7c673cae
FG
1280{
1281 Mutex::Locker l(mds_lock);
1282 if (stopping) {
1283 return false;
1284 }
1285 if (beacon.get_want_state() == CEPH_MDS_STATE_DNE)
1286 return false;
1287
1288 AuthAuthorizeHandler *authorize_handler = 0;
1289 switch (peer_type) {
1290 case CEPH_ENTITY_TYPE_MDS:
1291 authorize_handler = authorize_handler_cluster_registry->get_handler(protocol);
1292 break;
1293 default:
1294 authorize_handler = authorize_handler_service_registry->get_handler(protocol);
1295 }
1296 if (!authorize_handler) {
1297 dout(0) << "No AuthAuthorizeHandler found for protocol " << protocol << dendl;
1298 is_valid = false;
1299 return true;
1300 }
1301
1302 AuthCapsInfo caps_info;
1303 EntityName name;
1304 uint64_t global_id;
1305
c07f9fc5
FG
1306 RotatingKeyRing *keys = monc->rotating_secrets.get();
1307 if (keys) {
1308 is_valid = authorize_handler->verify_authorizer(
1309 cct, keys,
1310 authorizer_data, authorizer_reply, name, global_id, caps_info,
28e407b8 1311 session_key, nullptr, challenge);
c07f9fc5
FG
1312 } else {
1313 dout(10) << __func__ << " no rotating_keys (yet), denied" << dendl;
1314 is_valid = false;
1315 }
7c673cae
FG
1316
1317 if (is_valid) {
1318 entity_name_t n(con->get_peer_type(), global_id);
1319
1320 // We allow connections and assign Session instances to connections
1321 // even if we have not been assigned a rank, because clients with
1322 // "allow *" are allowed to connect and do 'tell' operations before
1323 // we have a rank.
1324 Session *s = NULL;
1325 if (mds_rank) {
1326 // If we do hold a rank, see if this is an existing client establishing
1327 // a new connection, rather than a new client
1328 s = mds_rank->sessionmap.get_session(n);
1329 }
1330
1331 // Wire up a Session* to this connection
1332 // It doesn't go into a SessionMap instance until it sends an explicit
1333 // request to open a session (initial state of Session is `closed`)
1334 if (!s) {
1335 s = new Session;
1336 s->info.auth_name = name;
1337 s->info.inst.addr = con->get_peer_addr();
1338 s->info.inst.name = n;
1339 dout(10) << " new session " << s << " for " << s->info.inst << " con " << con << dendl;
1340 con->set_priv(s);
1341 s->connection = con;
28e407b8
AA
1342 if (mds_rank) {
1343 mds_rank->kick_waiters_for_any_client_connection();
1344 }
7c673cae
FG
1345 } else {
1346 dout(10) << " existing session " << s << " for " << s->info.inst << " existing con " << s->connection
1347 << ", new/authorizing con " << con << dendl;
1348 con->set_priv(s->get());
1349
1350
1351
1352 // Wait until we fully accept the connection before setting
1353 // s->connection. In particular, if there are multiple incoming
1354 // connection attempts, they will all get their authorizer
1355 // validated, but some of them may "lose the race" and get
1356 // dropped. We only want to consider the winner(s). See
1357 // ms_handle_accept(). This is important for Sessions we replay
1358 // from the journal on recovery that don't have established
1359 // messenger state; we want the con from only the winning
1360 // connect attempt(s). (Normal reconnects that don't follow MDS
1361 // recovery are reconnected to the existing con by the
1362 // messenger.)
1363 }
1364
1365 if (caps_info.allow_all) {
1366 // Flag for auth providers that don't provide cap strings
1367 s->auth_caps.set_allow_all();
b5b8bbf5
FG
1368 } else {
1369 bufferlist::iterator p = caps_info.caps.begin();
1370 string auth_cap_str;
1371 try {
1372 ::decode(auth_cap_str, p);
1373
1374 dout(10) << __func__ << ": parsing auth_cap_str='" << auth_cap_str << "'" << dendl;
1375 std::ostringstream errstr;
1376 if (!s->auth_caps.parse(g_ceph_context, auth_cap_str, &errstr)) {
1377 dout(1) << __func__ << ": auth cap parse error: " << errstr.str()
1378 << " parsing '" << auth_cap_str << "'" << dendl;
1379 clog->warn() << name << " mds cap '" << auth_cap_str
1380 << "' does not parse: " << errstr.str();
1381 is_valid = false;
1382 }
1383 } catch (buffer::error& e) {
1384 // Assume legacy auth, defaults to:
1385 // * permit all filesystem ops
1386 // * permit no `tell` ops
1387 dout(1) << __func__ << ": cannot decode auth caps bl of length " << caps_info.caps.length() << dendl;
1388 is_valid = false;
7c673cae 1389 }
7c673cae
FG
1390 }
1391 }
1392
1393 return true; // we made a decision (see is_valid)
1394}
1395
1396
1397void MDSDaemon::ms_handle_accept(Connection *con)
1398{
1399 Mutex::Locker l(mds_lock);
1400 if (stopping) {
1401 return;
1402 }
1403
1404 Session *s = static_cast<Session *>(con->get_priv());
1405 dout(10) << "ms_handle_accept " << con->get_peer_addr() << " con " << con << " session " << s << dendl;
1406 if (s) {
1407 if (s->connection != con) {
1408 dout(10) << " session connection " << s->connection << " -> " << con << dendl;
1409 s->connection = con;
1410
1411 // send out any queued messages
1412 while (!s->preopen_out_queue.empty()) {
1413 con->send_message(s->preopen_out_queue.front());
1414 s->preopen_out_queue.pop_front();
1415 }
1416 }
1417 s->put();
1418 }
1419}
1420
1421bool MDSDaemon::is_clean_shutdown()
1422{
1423 if (mds_rank) {
1424 return mds_rank->is_stopped();
1425 } else {
1426 return true;
1427 }
1428}