1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include "SessionMap.h"
19 #include "osdc/Filer.h"
20 #include "common/Finisher.h"
22 #include "common/config.h"
23 #include "common/errno.h"
24 #include "common/DecayCounter.h"
25 #include "include/ceph_assert.h"
26 #include "include/stringify.h"
28 #define dout_context g_ceph_context
29 #define dout_subsys ceph_subsys_mds
31 #define dout_prefix *_dout << "mds." << rank << ".sessionmap "
34 class SessionMapIOContext
: public MDSIOContextBase
37 SessionMap
*sessionmap
;
38 MDSRank
*get_mds() override
{return sessionmap
->mds
;}
40 explicit SessionMapIOContext(SessionMap
*sessionmap_
) : sessionmap(sessionmap_
) {
41 ceph_assert(sessionmap
!= NULL
);
46 void SessionMap::register_perfcounters()
48 PerfCountersBuilder
plb(g_ceph_context
, "mds_sessions",
49 l_mdssm_first
, l_mdssm_last
);
51 plb
.add_u64(l_mdssm_session_count
, "session_count",
52 "Session count", "sess", PerfCountersBuilder::PRIO_INTERESTING
);
54 plb
.set_prio_default(PerfCountersBuilder::PRIO_USEFUL
);
55 plb
.add_u64_counter(l_mdssm_session_add
, "session_add",
57 plb
.add_u64_counter(l_mdssm_session_remove
, "session_remove",
59 plb
.add_u64(l_mdssm_session_open
, "sessions_open",
60 "Sessions currently open");
61 plb
.add_u64(l_mdssm_session_stale
, "sessions_stale",
62 "Sessions currently stale");
63 plb
.add_u64(l_mdssm_total_load
, "total_load", "Total Load");
64 plb
.add_u64(l_mdssm_avg_load
, "average_load", "Average Load");
65 plb
.add_u64(l_mdssm_avg_session_uptime
, "avg_session_uptime",
66 "Average session uptime");
68 logger
= plb
.create_perf_counters();
69 g_ceph_context
->get_perfcounters_collection()->add(logger
);
72 void SessionMap::dump()
74 dout(10) << "dump" << dendl
;
75 for (ceph::unordered_map
<entity_name_t
,Session
*>::iterator p
= session_map
.begin();
76 p
!= session_map
.end();
78 dout(10) << p
->first
<< " " << p
->second
79 << " state " << p
->second
->get_state_name()
80 << " completed " << p
->second
->info
.completed_requests
81 << " prealloc_inos " << p
->second
->info
.prealloc_inos
82 << " delegated_inos " << p
->second
->delegated_inos
83 << " used_inos " << p
->second
->info
.used_inos
92 object_t
SessionMap::get_object_name() const
95 snprintf(s
, sizeof(s
), "mds%d_sessionmap", int(mds
->get_nodeid()));
100 class C_IO_SM_Load
: public SessionMapIOContext
{
102 const bool first
; //< Am I the initial (header) load?
103 int header_r
; //< Return value from OMAP header read
104 int values_r
; //< Return value from OMAP value read
105 bufferlist header_bl
;
106 std::map
<std::string
, bufferlist
> session_vals
;
107 bool more_session_vals
= false;
109 C_IO_SM_Load(SessionMap
*cm
, const bool f
)
110 : SessionMapIOContext(cm
), first(f
), header_r(0), values_r(0) {}
112 void finish(int r
) override
{
113 sessionmap
->_load_finish(r
, header_r
, values_r
, first
, header_bl
, session_vals
,
116 void print(ostream
& out
) const override
{
117 out
<< "session_load";
124 * Decode OMAP header. Call this once when loading.
126 void SessionMapStore::decode_header(
127 bufferlist
&header_bl
)
129 auto q
= header_bl
.cbegin();
135 void SessionMapStore::encode_header(
136 bufferlist
*header_bl
)
138 ENCODE_START(1, 1, *header_bl
);
139 encode(version
, *header_bl
);
140 ENCODE_FINISH(*header_bl
);
144 * Decode and insert some serialized OMAP values. Call this
145 * repeatedly to insert batched loads.
147 void SessionMapStore::decode_values(std::map
<std::string
, bufferlist
> &session_vals
)
149 for (std::map
<std::string
, bufferlist
>::iterator i
= session_vals
.begin();
150 i
!= session_vals
.end(); ++i
) {
154 bool parsed
= inst
.name
.parse(i
->first
);
156 derr
<< "Corrupt entity name '" << i
->first
<< "' in sessionmap" << dendl
;
157 throw buffer::malformed_input("Corrupt entity name in sessionmap");
160 Session
*s
= get_or_add_session(inst
);
161 if (s
->is_closed()) {
162 s
->set_state(Session::STATE_OPEN
);
163 s
->set_load_avg_decay_rate(decay_rate
);
165 auto q
= i
->second
.cbegin();
171 * An OMAP read finished.
173 void SessionMap::_load_finish(
178 bufferlist
&header_bl
,
179 std::map
<std::string
, bufferlist
> &session_vals
,
180 bool more_session_vals
)
182 if (operation_r
< 0) {
183 derr
<< "_load_finish got " << cpp_strerror(operation_r
) << dendl
;
184 mds
->clog
->error() << "error reading sessionmap '" << get_object_name()
185 << "' " << operation_r
<< " ("
186 << cpp_strerror(operation_r
) << ")";
188 ceph_abort(); // Should be unreachable because damaged() calls respawn()
194 derr
<< __func__
<< ": header error: " << cpp_strerror(header_r
) << dendl
;
195 mds
->clog
->error() << "error reading sessionmap header "
196 << header_r
<< " (" << cpp_strerror(header_r
) << ")";
198 ceph_abort(); // Should be unreachable because damaged() calls respawn()
201 if(header_bl
.length() == 0) {
202 dout(4) << __func__
<< ": header missing, loading legacy..." << dendl
;
208 decode_header(header_bl
);
209 } catch (buffer::error
&e
) {
210 mds
->clog
->error() << "corrupt sessionmap header: " << e
.what();
212 ceph_abort(); // Should be unreachable because damaged() calls respawn()
214 dout(10) << __func__
<< " loaded version " << version
<< dendl
;
218 derr
<< __func__
<< ": error reading values: "
219 << cpp_strerror(values_r
) << dendl
;
220 mds
->clog
->error() << "error reading sessionmap values: "
221 << values_r
<< " (" << cpp_strerror(values_r
) << ")";
223 ceph_abort(); // Should be unreachable because damaged() calls respawn()
226 // Decode session_vals
228 decode_values(session_vals
);
229 } catch (buffer::error
&e
) {
230 mds
->clog
->error() << "corrupt sessionmap values: " << e
.what();
232 ceph_abort(); // Should be unreachable because damaged() calls respawn()
235 if (more_session_vals
) {
236 // Issue another read if we're not at the end of the omap
237 const std::string last_key
= session_vals
.rbegin()->first
;
238 dout(10) << __func__
<< ": continue omap load from '"
239 << last_key
<< "'" << dendl
;
240 object_t oid
= get_object_name();
241 object_locator_t
oloc(mds
->mdsmap
->get_metadata_pool());
242 C_IO_SM_Load
*c
= new C_IO_SM_Load(this, false);
244 op
.omap_get_vals(last_key
, "", g_conf()->mds_sessionmap_keys_per_op
,
245 &c
->session_vals
, &c
->more_session_vals
, &c
->values_r
);
246 mds
->objecter
->read(oid
, oloc
, op
, CEPH_NOSNAP
, NULL
, 0,
247 new C_OnFinisher(c
, mds
->finisher
));
249 // I/O is complete. Update `by_state`
250 dout(10) << __func__
<< ": omap load complete" << dendl
;
251 for (ceph::unordered_map
<entity_name_t
, Session
*>::iterator i
= session_map
.begin();
252 i
!= session_map
.end(); ++i
) {
253 Session
*s
= i
->second
;
254 auto by_state_entry
= by_state
.find(s
->get_state());
255 if (by_state_entry
== by_state
.end())
256 by_state_entry
= by_state
.emplace(s
->get_state(),
257 new xlist
<Session
*>).first
;
258 by_state_entry
->second
->push_back(&s
->item_session_list
);
261 // Population is complete. Trigger load waiters.
262 dout(10) << __func__
<< ": v " << version
263 << ", " << session_map
.size() << " sessions" << dendl
;
264 projected
= committing
= committed
= version
;
266 finish_contexts(g_ceph_context
, waiting_for_load
);
271 * Populate session state from OMAP records in this
272 * rank's sessionmap object.
274 void SessionMap::load(MDSContext
*onload
)
276 dout(10) << "load" << dendl
;
279 waiting_for_load
.push_back(onload
);
281 C_IO_SM_Load
*c
= new C_IO_SM_Load(this, true);
282 object_t oid
= get_object_name();
283 object_locator_t
oloc(mds
->mdsmap
->get_metadata_pool());
286 op
.omap_get_header(&c
->header_bl
, &c
->header_r
);
287 op
.omap_get_vals("", "", g_conf()->mds_sessionmap_keys_per_op
,
288 &c
->session_vals
, &c
->more_session_vals
, &c
->values_r
);
290 mds
->objecter
->read(oid
, oloc
, op
, CEPH_NOSNAP
, NULL
, 0, new C_OnFinisher(c
, mds
->finisher
));
294 class C_IO_SM_LoadLegacy
: public SessionMapIOContext
{
297 explicit C_IO_SM_LoadLegacy(SessionMap
*cm
) : SessionMapIOContext(cm
) {}
298 void finish(int r
) override
{
299 sessionmap
->_load_legacy_finish(r
, bl
);
301 void print(ostream
& out
) const override
{
302 out
<< "session_load_legacy";
309 * Load legacy (object data blob) SessionMap format, assuming
310 * that waiting_for_load has already been populated with
311 * the relevant completion. This is the fallback if we do not
312 * find an OMAP header when attempting to load normally.
314 void SessionMap::load_legacy()
316 dout(10) << __func__
<< dendl
;
318 C_IO_SM_LoadLegacy
*c
= new C_IO_SM_LoadLegacy(this);
319 object_t oid
= get_object_name();
320 object_locator_t
oloc(mds
->mdsmap
->get_metadata_pool());
322 mds
->objecter
->read_full(oid
, oloc
, CEPH_NOSNAP
, &c
->bl
, 0,
323 new C_OnFinisher(c
, mds
->finisher
));
326 void SessionMap::_load_legacy_finish(int r
, bufferlist
&bl
)
328 auto blp
= bl
.cbegin();
330 derr
<< "_load_finish got " << cpp_strerror(r
) << dendl
;
331 ceph_abort_msg("failed to load sessionmap");
334 decode_legacy(blp
); // note: this sets last_cap_renew = now()
335 dout(10) << "_load_finish v " << version
336 << ", " << session_map
.size() << " sessions, "
337 << bl
.length() << " bytes"
339 projected
= committing
= committed
= version
;
342 // Mark all sessions dirty, so that on next save() we will write
343 // a complete OMAP version of the data loaded from the legacy format
344 for (ceph::unordered_map
<entity_name_t
, Session
*>::iterator i
= session_map
.begin();
345 i
!= session_map
.end(); ++i
) {
346 // Don't use mark_dirty because on this occasion we want to ignore the
347 // keys_per_op limit and do one big write (upgrade must be atomic)
348 dirty_sessions
.insert(i
->first
);
350 loaded_legacy
= true;
352 finish_contexts(g_ceph_context
, waiting_for_load
);
360 class C_IO_SM_Save
: public SessionMapIOContext
{
363 C_IO_SM_Save(SessionMap
*cm
, version_t v
) : SessionMapIOContext(cm
), version(v
) {}
364 void finish(int r
) override
{
366 get_mds()->handle_write_error(r
);
368 sessionmap
->_save_finish(version
);
371 void print(ostream
& out
) const override
{
372 out
<< "session_save";
377 void SessionMap::save(MDSContext
*onsave
, version_t needv
)
379 dout(10) << __func__
<< ": needv " << needv
<< ", v " << version
<< dendl
;
381 if (needv
&& committing
>= needv
) {
382 ceph_assert(committing
> committed
);
383 commit_waiters
[committing
].push_back(onsave
);
387 commit_waiters
[version
].push_back(onsave
);
389 committing
= version
;
391 object_t oid
= get_object_name();
392 object_locator_t
oloc(mds
->mdsmap
->get_metadata_pool());
396 /* Compose OSD OMAP transaction for full write */
397 bufferlist header_bl
;
398 encode_header(&header_bl
);
399 op
.omap_set_header(header_bl
);
401 /* If we loaded a legacy sessionmap, then erase the old data. If
402 * an old-versioned MDS tries to read it, it'll fail out safely
403 * with an end_of_buffer exception */
405 dout(4) << __func__
<< " erasing legacy sessionmap" << dendl
;
407 loaded_legacy
= false; // only need to truncate once.
410 dout(20) << " updating keys:" << dendl
;
411 map
<string
, bufferlist
> to_set
;
412 for(std::set
<entity_name_t
>::iterator i
= dirty_sessions
.begin();
413 i
!= dirty_sessions
.end(); ++i
) {
414 const entity_name_t name
= *i
;
415 Session
*session
= session_map
[name
];
417 if (session
->is_open() ||
418 session
->is_closing() ||
419 session
->is_stale() ||
420 session
->is_killing()) {
421 dout(20) << " " << name
<< dendl
;
423 std::ostringstream k
;
428 session
->info
.encode(bl
, mds
->mdsmap
->get_up_features());
431 to_set
[k
.str()] = bl
;
433 session
->clear_dirty_completed_requests();
435 dout(20) << " " << name
<< " (ignoring)" << dendl
;
438 if (!to_set
.empty()) {
442 dout(20) << " removing keys:" << dendl
;
443 set
<string
> to_remove
;
444 for(std::set
<entity_name_t
>::const_iterator i
= null_sessions
.begin();
445 i
!= null_sessions
.end(); ++i
) {
446 dout(20) << " " << *i
<< dendl
;
447 std::ostringstream k
;
449 to_remove
.insert(k
.str());
451 if (!to_remove
.empty()) {
452 op
.omap_rm_keys(to_remove
);
455 dirty_sessions
.clear();
456 null_sessions
.clear();
458 mds
->objecter
->mutate(oid
, oloc
, op
, snapc
,
459 ceph::real_clock::now(),
461 new C_OnFinisher(new C_IO_SM_Save(this, version
),
465 void SessionMap::_save_finish(version_t v
)
467 dout(10) << "_save_finish v" << v
<< dendl
;
470 finish_contexts(g_ceph_context
, commit_waiters
[v
]);
471 commit_waiters
.erase(v
);
476 * Deserialize sessions, and update by_state index
478 void SessionMap::decode_legacy(bufferlist::const_iterator
&p
)
480 // Populate `sessions`
481 SessionMapStore::decode_legacy(p
);
484 for (ceph::unordered_map
<entity_name_t
, Session
*>::iterator i
= session_map
.begin();
485 i
!= session_map
.end(); ++i
) {
486 Session
*s
= i
->second
;
487 auto by_state_entry
= by_state
.find(s
->get_state());
488 if (by_state_entry
== by_state
.end())
489 by_state_entry
= by_state
.emplace(s
->get_state(),
490 new xlist
<Session
*>).first
;
491 by_state_entry
->second
->push_back(&s
->item_session_list
);
495 uint64_t SessionMap::set_state(Session
*session
, int s
) {
496 if (session
->state
!= s
) {
497 session
->set_state(s
);
498 auto by_state_entry
= by_state
.find(s
);
499 if (by_state_entry
== by_state
.end())
500 by_state_entry
= by_state
.emplace(s
, new xlist
<Session
*>).first
;
501 by_state_entry
->second
->push_back(&session
->item_session_list
);
503 if (session
->is_open() || session
->is_stale()) {
504 session
->set_load_avg_decay_rate(decay_rate
);
507 // refresh number of sessions for states which have perf
508 // couters associated
509 logger
->set(l_mdssm_session_open
,
510 get_session_count_in_state(Session::STATE_OPEN
));
511 logger
->set(l_mdssm_session_stale
,
512 get_session_count_in_state(Session::STATE_STALE
));
515 return session
->get_state_seq();
518 void SessionMapStore::decode_legacy(bufferlist::const_iterator
& p
)
520 auto now
= clock::now();
523 if (pre
== (uint64_t)-1) {
524 DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, p
);
525 ceph_assert(struct_v
>= 2);
531 decode(inst
.name
, p
);
532 Session
*s
= get_or_add_session(inst
);
533 if (s
->is_closed()) {
534 s
->set_state(Session::STATE_OPEN
);
535 s
->set_load_avg_decay_rate(decay_rate
);
542 // --- old format ----
545 // this is a meaningless upper bound. can be ignored.
549 while (n
-- && !p
.end()) {
551 Session
*s
= new Session(ConnectionRef());
554 auto& name
= s
->info
.inst
.name
;
555 auto it
= session_map
.find(name
);
556 if (it
!= session_map
.end()) {
557 // eager client connected too fast! aie.
558 dout(10) << " already had session for " << name
<< ", recovering" << dendl
;
567 s
->set_state(Session::STATE_OPEN
);
568 s
->set_load_avg_decay_rate(decay_rate
);
569 s
->last_cap_renew
= now
;
574 void Session::dump(Formatter
*f
, bool cap_dump
) const
576 f
->dump_int("id", info
.inst
.name
.num());
577 f
->dump_object("entity", info
.inst
);
578 f
->dump_string("state", get_state_name());
579 f
->dump_int("num_leases", leases
.size());
580 f
->dump_int("num_caps", caps
.size());
582 f
->open_array_section("caps");
583 for (const auto& cap
: caps
) {
584 f
->dump_object("cap", *cap
);
588 if (is_open() || is_stale()) {
589 f
->dump_unsigned("request_load_avg", get_load_avg());
591 f
->dump_float("uptime", get_session_uptime());
592 f
->dump_unsigned("requests_in_flight", get_request_count());
593 f
->dump_unsigned("num_completed_requests", get_num_completed_requests());
594 f
->dump_unsigned("num_completed_flushes", get_num_completed_flushes());
595 f
->dump_bool("reconnecting", reconnecting
);
596 f
->dump_object("recall_caps", recall_caps
);
597 f
->dump_object("release_caps", release_caps
);
598 f
->dump_object("recall_caps_throttle", recall_caps_throttle
);
599 f
->dump_object("recall_caps_throttle2o", recall_caps_throttle2o
);
600 f
->dump_object("session_cache_liveness", session_cache_liveness
);
601 f
->dump_object("cap_acquisition", cap_acquisition
);
605 void SessionMapStore::dump(Formatter
*f
) const
607 f
->open_array_section("sessions");
608 for (const auto& p
: session_map
) {
609 f
->dump_object("session", *p
.second
);
611 f
->close_section(); // Sessions
614 void SessionMapStore::generate_test_instances(std::list
<SessionMapStore
*>& ls
)
616 // pretty boring for now
617 ls
.push_back(new SessionMapStore());
620 void SessionMap::wipe()
622 dout(1) << "wipe start" << dendl
;
624 while (!session_map
.empty()) {
625 Session
*s
= session_map
.begin()->second
;
628 version
= ++projected
;
629 dout(1) << "wipe result" << dendl
;
631 dout(1) << "wipe done" << dendl
;
634 void SessionMap::wipe_ino_prealloc()
636 for (ceph::unordered_map
<entity_name_t
,Session
*>::iterator p
= session_map
.begin();
637 p
!= session_map
.end();
639 p
->second
->pending_prealloc_inos
.clear();
640 p
->second
->delegated_inos
.clear();
641 p
->second
->info
.prealloc_inos
.clear();
642 p
->second
->info
.used_inos
.clear();
644 projected
= ++version
;
647 void SessionMap::add_session(Session
*s
)
649 dout(10) << __func__
<< " s=" << s
<< " name=" << s
->info
.inst
.name
<< dendl
;
651 ceph_assert(session_map
.count(s
->info
.inst
.name
) == 0);
652 session_map
[s
->info
.inst
.name
] = s
;
653 auto by_state_entry
= by_state
.find(s
->state
);
654 if (by_state_entry
== by_state
.end())
655 by_state_entry
= by_state
.emplace(s
->state
, new xlist
<Session
*>).first
;
656 by_state_entry
->second
->push_back(&s
->item_session_list
);
659 update_average_birth_time(*s
);
661 logger
->set(l_mdssm_session_count
, session_map
.size());
662 logger
->inc(l_mdssm_session_add
);
665 void SessionMap::remove_session(Session
*s
)
667 dout(10) << __func__
<< " s=" << s
<< " name=" << s
->info
.inst
.name
<< dendl
;
669 update_average_birth_time(*s
, false);
671 s
->trim_completed_requests(0);
672 s
->item_session_list
.remove_myself();
673 session_map
.erase(s
->info
.inst
.name
);
674 dirty_sessions
.erase(s
->info
.inst
.name
);
675 null_sessions
.insert(s
->info
.inst
.name
);
678 logger
->set(l_mdssm_session_count
, session_map
.size());
679 logger
->inc(l_mdssm_session_remove
);
682 void SessionMap::touch_session(Session
*session
)
684 dout(10) << __func__
<< " s=" << session
<< " name=" << session
->info
.inst
.name
<< dendl
;
686 // Move to the back of the session list for this state (should
687 // already be on a list courtesy of add_session and set_state)
688 ceph_assert(session
->item_session_list
.is_on_list());
689 auto by_state_entry
= by_state
.find(session
->state
);
690 if (by_state_entry
== by_state
.end())
691 by_state_entry
= by_state
.emplace(session
->state
,
692 new xlist
<Session
*>).first
;
693 by_state_entry
->second
->push_back(&session
->item_session_list
);
695 session
->last_cap_renew
= clock::now();
698 void SessionMap::_mark_dirty(Session
*s
, bool may_save
)
700 if (dirty_sessions
.count(s
->info
.inst
.name
))
704 dirty_sessions
.size() >= g_conf()->mds_sessionmap_keys_per_op
) {
705 // Pre-empt the usual save() call from journal segment trim, in
706 // order to avoid building up an oversized OMAP update operation
707 // from too many sessions modified at once
708 save(new C_MDSInternalNoop
, version
);
711 null_sessions
.erase(s
->info
.inst
.name
);
712 dirty_sessions
.insert(s
->info
.inst
.name
);
715 void SessionMap::mark_dirty(Session
*s
, bool may_save
)
717 dout(20) << __func__
<< " s=" << s
<< " name=" << s
->info
.inst
.name
718 << " v=" << version
<< dendl
;
720 _mark_dirty(s
, may_save
);
725 void SessionMap::replay_dirty_session(Session
*s
)
727 dout(20) << __func__
<< " s=" << s
<< " name=" << s
->info
.inst
.name
728 << " v=" << version
<< dendl
;
730 _mark_dirty(s
, false);
732 replay_advance_version();
735 void SessionMap::replay_advance_version()
741 void SessionMap::replay_open_sessions(version_t event_cmapv
,
742 map
<client_t
,entity_inst_t
>& client_map
,
743 map
<client_t
,client_metadata_t
>& client_metadata_map
)
745 unsigned already_saved
;
747 if (version
+ client_map
.size() < event_cmapv
)
750 // Server::finish_force_open_sessions() marks sessions dirty one by one.
751 // Marking a session dirty may flush all existing dirty sessions. So it's
752 // possible that some sessions are already saved in sessionmap.
753 already_saved
= client_map
.size() - (event_cmapv
- version
);
754 for (const auto& p
: client_map
) {
755 Session
*s
= get_or_add_session(p
.second
);
756 auto q
= client_metadata_map
.find(p
.first
);
757 if (q
!= client_metadata_map
.end())
758 s
->info
.client_metadata
.merge(q
->second
);
760 if (already_saved
> 0) {
768 set_state(s
, Session::STATE_OPEN
);
769 replay_dirty_session(s
);
774 mds
->clog
->error() << "error replaying open sessions(" << client_map
.size()
775 << ") sessionmap v " << event_cmapv
<< " table " << version
;
776 ceph_assert(g_conf()->mds_wipe_sessions
);
777 mds
->sessionmap
.wipe();
778 mds
->sessionmap
.set_version(event_cmapv
);
781 version_t
SessionMap::mark_projected(Session
*s
)
783 dout(20) << __func__
<< " s=" << s
<< " name=" << s
->info
.inst
.name
784 << " pv=" << projected
<< " -> " << projected
+ 1 << dendl
;
786 s
->push_pv(projected
);
791 class C_IO_SM_Save_One
: public SessionMapIOContext
{
794 C_IO_SM_Save_One(SessionMap
*cm
, MDSContext
*on_safe_
)
795 : SessionMapIOContext(cm
), on_safe(on_safe_
) {}
796 void finish(int r
) override
{
798 get_mds()->handle_write_error(r
);
800 on_safe
->complete(r
);
803 void print(ostream
& out
) const override
{
804 out
<< "session_save_one";
810 void SessionMap::save_if_dirty(const std::set
<entity_name_t
> &tgt_sessions
,
811 MDSGatherBuilder
*gather_bld
)
813 ceph_assert(gather_bld
!= NULL
);
815 std::vector
<entity_name_t
> write_sessions
;
817 // Decide which sessions require a write
818 for (std::set
<entity_name_t
>::iterator i
= tgt_sessions
.begin();
819 i
!= tgt_sessions
.end(); ++i
) {
820 const entity_name_t
&session_id
= *i
;
822 if (session_map
.count(session_id
) == 0) {
823 // Session isn't around any more, never mind.
827 Session
*session
= session_map
[session_id
];
828 if (!session
->has_dirty_completed_requests()) {
829 // Session hasn't had completed_requests
830 // modified since last write, no need to
835 if (dirty_sessions
.count(session_id
) > 0) {
836 // Session is already dirtied, will be written, no
837 // need to pre-empt that.
840 // Okay, passed all our checks, now we write
841 // this session out. The version we write
842 // into the OMAP may now be higher-versioned
843 // than the version in the header, but that's
844 // okay because it's never a problem to have
845 // an overly-fresh copy of a session.
846 write_sessions
.push_back(*i
);
849 dout(4) << __func__
<< ": writing " << write_sessions
.size() << dendl
;
851 // Batch writes into mds_sessionmap_keys_per_op
852 const uint32_t kpo
= g_conf()->mds_sessionmap_keys_per_op
;
853 map
<string
, bufferlist
> to_set
;
854 for (uint32_t i
= 0; i
< write_sessions
.size(); ++i
) {
855 const entity_name_t
&session_id
= write_sessions
[i
];
856 Session
*session
= session_map
[session_id
];
857 session
->clear_dirty_completed_requests();
860 std::ostringstream k
;
865 session
->info
.encode(bl
, mds
->mdsmap
->get_up_features());
868 to_set
[k
.str()] = bl
;
870 // Complete this write transaction?
871 if (i
== write_sessions
.size() - 1
872 || i
% kpo
== kpo
- 1) {
875 to_set
.clear(); // clear to start a new transaction
878 object_t oid
= get_object_name();
879 object_locator_t
oloc(mds
->mdsmap
->get_metadata_pool());
880 MDSContext
*on_safe
= gather_bld
->new_sub();
881 mds
->objecter
->mutate(oid
, oloc
, op
, snapc
,
882 ceph::real_clock::now(), 0,
884 new C_IO_SM_Save_One(this, on_safe
),
894 #define dout_prefix *_dout << "Session "
897 * Calculate the length of the `requests` member list,
898 * because elist does not have a size() method.
902 size_t Session::get_request_count() const
905 for (auto p
= requests
.begin(); !p
.end(); ++p
)
911 * Capped in response to a CEPH_MSG_CLIENT_CAPRELEASE message,
912 * with n_caps equal to the number of caps that were released
913 * in the message. Used to update state about how many caps a
914 * client has released since it was last instructed to RECALL_STATE.
916 void Session::notify_cap_release(size_t n_caps
)
918 recall_caps
.hit(-(double)n_caps
);
919 release_caps
.hit(n_caps
);
923 * Called when a CEPH_MSG_CLIENT_SESSION->CEPH_SESSION_RECALL_STATE
924 * message is sent to the client. Update our recall-related state
925 * in order to generate health metrics if the session doesn't see
926 * a commensurate number of calls to ::notify_cap_release
928 uint64_t Session::notify_recall_sent(size_t new_limit
)
930 const auto num_caps
= caps
.size();
931 ceph_assert(new_limit
< num_caps
); // Behaviour of Server::recall_client_state
932 const auto count
= num_caps
-new_limit
;
934 if (recall_limit
!= new_limit
) {
937 new_change
= 0; /* no change! */
940 /* Always hit the session counter as a RECALL message is still sent to the
941 * client and we do not want the MDS to burn its global counter tokens on a
942 * session that is not releasing caps (i.e. allow the session counter to
943 * throttle future RECALL messages).
945 recall_caps_throttle
.hit(count
);
946 recall_caps_throttle2o
.hit(count
);
947 recall_caps
.hit(count
);
952 * Use client metadata to generate a somewhat-friendlier
953 * name for the client than its session ID.
955 * This is *not* guaranteed to be unique, and any machine
956 * consumers of session-related output should always use
957 * the session ID as a primary capacity and use this only
958 * as a presentation hint.
960 void Session::_update_human_name()
962 auto info_client_metadata_entry
= info
.client_metadata
.find("hostname");
963 if (info_client_metadata_entry
!= info
.client_metadata
.end()) {
964 // Happy path, refer to clients by hostname
965 human_name
= info_client_metadata_entry
->second
;
966 if (!info
.auth_name
.has_default_id()) {
967 // When a non-default entity ID is set by the user, assume they
968 // would like to see it in references to the client, if it's
969 // reasonable short. Limit the length because we don't want
970 // to put e.g. uuid-generated names into a "human readable"
972 const int arbitrarily_short
= 16;
973 if (info
.auth_name
.get_id().size() < arbitrarily_short
) {
974 human_name
+= std::string(":") + info
.auth_name
.get_id();
978 // Fallback, refer to clients by ID e.g. client.4567
979 human_name
= stringify(info
.inst
.name
.num());
983 void Session::decode(bufferlist::const_iterator
&p
)
987 _update_human_name();
990 int Session::check_access(CInode
*in
, unsigned mask
,
991 int caller_uid
, int caller_gid
,
992 const vector
<uint64_t> *caller_gid_list
,
993 int new_uid
, int new_gid
)
998 diri
= in
->get_projected_parent_dn()->get_dir()->get_inode();
999 if (diri
&& diri
->is_stray()){
1000 path
= in
->get_projected_inode()->stray_prior_path
;
1001 dout(20) << __func__
<< " stray_prior_path " << path
<< dendl
;
1003 in
->make_path_string(path
, true);
1004 dout(20) << __func__
<< " path " << path
<< dendl
;
1007 path
= path
.substr(1); // drop leading /
1009 if (in
->inode
.is_dir() &&
1010 in
->inode
.has_layout() &&
1011 in
->inode
.layout
.pool_ns
.length() &&
1012 !connection
->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2
)) {
1013 dout(10) << __func__
<< " client doesn't support FS_FILE_LAYOUT_V2" << dendl
;
1017 if (!auth_caps
.is_capable(path
, in
->inode
.uid
, in
->inode
.gid
, in
->inode
.mode
,
1018 caller_uid
, caller_gid
, caller_gid_list
, mask
,
1026 // track total and per session load
1027 void SessionMap::hit_session(Session
*session
) {
1028 uint64_t sessions
= get_session_count_in_state(Session::STATE_OPEN
) +
1029 get_session_count_in_state(Session::STATE_STALE
) +
1030 get_session_count_in_state(Session::STATE_CLOSING
);
1031 ceph_assert(sessions
!= 0);
1033 double total_load
= total_load_avg
.hit();
1034 double avg_load
= total_load
/ sessions
;
1036 logger
->set(l_mdssm_total_load
, (uint64_t)total_load
);
1037 logger
->set(l_mdssm_avg_load
, (uint64_t)avg_load
);
1039 session
->hit_session();
1042 void SessionMap::handle_conf_change(const std::set
<std::string
>& changed
)
1044 auto apply_to_open_sessions
= [this](auto f
) {
1045 if (auto it
= by_state
.find(Session::STATE_OPEN
); it
!= by_state
.end()) {
1046 for (const auto &session
: *(it
->second
)) {
1050 if (auto it
= by_state
.find(Session::STATE_STALE
); it
!= by_state
.end()) {
1051 for (const auto &session
: *(it
->second
)) {
1057 if (changed
.count("mds_request_load_average_decay_rate")) {
1058 auto d
= g_conf().get_val
<double>("mds_request_load_average_decay_rate");
1061 total_load_avg
= DecayCounter(d
);
1063 auto mut
= [d
](auto s
) {
1064 s
->set_load_avg_decay_rate(d
);
1066 apply_to_open_sessions(mut
);
1068 if (changed
.count("mds_recall_max_decay_rate")) {
1069 auto d
= g_conf().get_val
<double>("mds_recall_max_decay_rate");
1070 auto mut
= [d
](auto s
) {
1071 s
->recall_caps_throttle
= DecayCounter(d
);
1073 apply_to_open_sessions(mut
);
1075 if (changed
.count("mds_recall_warning_decay_rate")) {
1076 auto d
= g_conf().get_val
<double>("mds_recall_warning_decay_rate");
1077 auto mut
= [d
](auto s
) {
1078 s
->recall_caps
= DecayCounter(d
);
1079 s
->release_caps
= DecayCounter(d
);
1081 apply_to_open_sessions(mut
);
1083 if (changed
.count("mds_session_cache_liveness_decay_rate")) {
1084 auto d
= g_conf().get_val
<double>("mds_session_cache_liveness_decay_rate");
1085 auto mut
= [d
](auto s
) {
1086 s
->session_cache_liveness
= DecayCounter(d
);
1087 s
->session_cache_liveness
.hit(s
->caps
.size()); /* so the MDS doesn't immediately start trimming a new session */
1089 apply_to_open_sessions(mut
);
1091 if (changed
.count("mds_session_cap_acquisition_decay_rate")) {
1092 auto d
= g_conf().get_val
<double>("mds_session_cap_acquisition_decay_rate");
1093 auto mut
= [d
](auto s
) {
1094 s
->cap_acquisition
= DecayCounter(d
);
1096 apply_to_open_sessions(mut
);
1100 void SessionMap::update_average_session_age() {
1101 if (!session_map
.size()) {
1105 double avg_uptime
= std::chrono::duration
<double>(clock::now()-avg_birth_time
).count();
1106 logger
->set(l_mdssm_avg_session_uptime
, (uint64_t)avg_uptime
);
1109 int SessionFilter::parse(
1110 const std::vector
<std::string
> &args
,
1111 std::stringstream
*ss
)
1113 ceph_assert(ss
!= NULL
);
1115 for (const auto &s
: args
) {
1116 dout(20) << __func__
<< " parsing filter '" << s
<< "'" << dendl
;
1118 auto eq
= s
.find("=");
1119 if (eq
== std::string::npos
|| eq
== s
.size()) {
1120 // allow this to be a bare id for compatibility with pre-octopus asok
1123 id
= strict_strtoll(s
.c_str(), 10, &err
);
1125 *ss
<< "Invalid filter '" << s
<< "'";
1131 // Keys that start with this are to be taken as referring
1132 // to freeform client metadata fields.
1133 const std::string
metadata_prefix("client_metadata.");
1135 auto k
= s
.substr(0, eq
);
1136 auto v
= s
.substr(eq
+ 1);
1138 dout(20) << __func__
<< " parsed k='" << k
<< "', v='" << v
<< "'" << dendl
;
1140 if (k
.compare(0, metadata_prefix
.size(), metadata_prefix
) == 0
1141 && k
.size() > metadata_prefix
.size()) {
1142 // Filter on arbitrary metadata key (no fixed schema for this,
1143 // so anything after the dot is a valid field to filter on)
1144 auto metadata_key
= k
.substr(metadata_prefix
.size());
1145 metadata
.insert(std::make_pair(metadata_key
, v
));
1146 } else if (k
== "auth_name") {
1147 // Filter on client entity name
1149 } else if (k
== "state") {
1151 } else if (k
== "id") {
1153 id
= strict_strtoll(v
.c_str(), 10, &err
);
1158 } else if (k
== "reconnecting") {
1161 * Strict boolean parser. Allow true/false/0/1.
1162 * Anything else is -EINVAL.
1164 auto is_true
= [](std::string_view bstr
, bool *out
) -> bool
1166 ceph_assert(out
!= nullptr);
1168 if (bstr
== "true" || bstr
== "1") {
1171 } else if (bstr
== "false" || bstr
== "0") {
1180 int r
= is_true(v
, &bval
);
1182 set_reconnecting(bval
);
1184 *ss
<< "Invalid boolean value '" << v
<< "'";
1188 *ss
<< "Invalid filter key '" << k
<< "'";
1196 bool SessionFilter::match(
1197 const Session
&session
,
1198 std::function
<bool(client_t
)> is_reconnecting
) const
1200 for (const auto &m
: metadata
) {
1201 const auto &k
= m
.first
;
1202 const auto &v
= m
.second
;
1203 auto it
= session
.info
.client_metadata
.find(k
);
1204 if (it
== session
.info
.client_metadata
.end()) {
1207 if (it
->second
!= v
) {
1212 if (!auth_name
.empty() && auth_name
!= session
.info
.auth_name
.get_id()) {
1216 if (!state
.empty() && state
!= session
.get_state_name()) {
1220 if (id
!= 0 && id
!= session
.info
.inst
.name
.num()) {
1224 if (reconnecting
.first
) {
1225 const bool am_reconnecting
= is_reconnecting(session
.info
.inst
.name
.num());
1226 if (reconnecting
.second
!= am_reconnecting
) {
1234 std::ostream
& operator<<(std::ostream
&out
, const Session
&s
)
1236 if (s
.get_human_name() == stringify(s
.get_client())) {
1237 out
<< s
.get_human_name();
1239 out
<< s
.get_human_name() << " (" << std::dec
<< s
.get_client() << ")";