]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "MDSRank.h" | |
16 | #include "MDCache.h" | |
17 | #include "Mutation.h" | |
18 | #include "SessionMap.h" | |
19 | #include "osdc/Filer.h" | |
20 | #include "common/Finisher.h" | |
21 | ||
22 | #include "common/config.h" | |
23 | #include "common/errno.h" | |
91327a77 | 24 | #include "common/DecayCounter.h" |
11fdf7f2 | 25 | #include "include/ceph_assert.h" |
7c673cae FG |
26 | #include "include/stringify.h" |
27 | ||
28 | #define dout_context g_ceph_context | |
29 | #define dout_subsys ceph_subsys_mds | |
30 | #undef dout_prefix | |
31 | #define dout_prefix *_dout << "mds." << rank << ".sessionmap " | |
32 | ||
20effc67 TL |
33 | using namespace std; |
34 | ||
7c673cae FG |
35 | namespace { |
36 | class SessionMapIOContext : public MDSIOContextBase | |
37 | { | |
38 | protected: | |
39 | SessionMap *sessionmap; | |
40 | MDSRank *get_mds() override {return sessionmap->mds;} | |
41 | public: | |
42 | explicit SessionMapIOContext(SessionMap *sessionmap_) : sessionmap(sessionmap_) { | |
11fdf7f2 | 43 | ceph_assert(sessionmap != NULL); |
7c673cae FG |
44 | } |
45 | }; | |
46 | }; | |
47 | ||
aee94f69 TL |
48 | SessionMap::SessionMap(MDSRank *m) |
49 | : mds(m), | |
50 | mds_session_metadata_threshold(g_conf().get_val<Option::size_t>("mds_session_metadata_threshold")) { | |
51 | } | |
52 | ||
7c673cae FG |
53 | void SessionMap::register_perfcounters() |
54 | { | |
55 | PerfCountersBuilder plb(g_ceph_context, "mds_sessions", | |
56 | l_mdssm_first, l_mdssm_last); | |
91327a77 | 57 | |
7c673cae | 58 | plb.add_u64(l_mdssm_session_count, "session_count", |
b32b8144 | 59 | "Session count", "sess", PerfCountersBuilder::PRIO_INTERESTING); |
91327a77 AA |
60 | |
61 | plb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL); | |
7c673cae FG |
62 | plb.add_u64_counter(l_mdssm_session_add, "session_add", |
63 | "Sessions added"); | |
64 | plb.add_u64_counter(l_mdssm_session_remove, "session_remove", | |
65 | "Sessions removed"); | |
91327a77 AA |
66 | plb.add_u64(l_mdssm_session_open, "sessions_open", |
67 | "Sessions currently open"); | |
68 | plb.add_u64(l_mdssm_session_stale, "sessions_stale", | |
69 | "Sessions currently stale"); | |
70 | plb.add_u64(l_mdssm_total_load, "total_load", "Total Load"); | |
71 | plb.add_u64(l_mdssm_avg_load, "average_load", "Average Load"); | |
72 | plb.add_u64(l_mdssm_avg_session_uptime, "avg_session_uptime", | |
73 | "Average session uptime"); | |
aee94f69 TL |
74 | plb.add_u64(l_mdssm_metadata_threshold_sessions_evicted, "mdthresh_evicted", |
75 | "Sessions evicted on reaching metadata threshold"); | |
91327a77 | 76 | |
7c673cae FG |
77 | logger = plb.create_perf_counters(); |
78 | g_ceph_context->get_perfcounters_collection()->add(logger); | |
79 | } | |
80 | ||
81 | void SessionMap::dump() | |
82 | { | |
83 | dout(10) << "dump" << dendl; | |
84 | for (ceph::unordered_map<entity_name_t,Session*>::iterator p = session_map.begin(); | |
85 | p != session_map.end(); | |
86 | ++p) | |
87 | dout(10) << p->first << " " << p->second | |
88 | << " state " << p->second->get_state_name() | |
89 | << " completed " << p->second->info.completed_requests | |
f67539c2 | 90 | << " free_prealloc_inos " << p->second->free_prealloc_inos |
9f95a23c | 91 | << " delegated_inos " << p->second->delegated_inos |
7c673cae FG |
92 | << dendl; |
93 | } | |
94 | ||
95 | ||
96 | // ---------------- | |
97 | // LOAD | |
98 | ||
99 | ||
100 | object_t SessionMap::get_object_name() const | |
101 | { | |
102 | char s[30]; | |
103 | snprintf(s, sizeof(s), "mds%d_sessionmap", int(mds->get_nodeid())); | |
104 | return object_t(s); | |
105 | } | |
106 | ||
107 | namespace { | |
108 | class C_IO_SM_Load : public SessionMapIOContext { | |
109 | public: | |
110 | const bool first; //< Am I the initial (header) load? | |
111 | int header_r; //< Return value from OMAP header read | |
112 | int values_r; //< Return value from OMAP value read | |
113 | bufferlist header_bl; | |
114 | std::map<std::string, bufferlist> session_vals; | |
115 | bool more_session_vals = false; | |
116 | ||
117 | C_IO_SM_Load(SessionMap *cm, const bool f) | |
118 | : SessionMapIOContext(cm), first(f), header_r(0), values_r(0) {} | |
119 | ||
120 | void finish(int r) override { | |
121 | sessionmap->_load_finish(r, header_r, values_r, first, header_bl, session_vals, | |
122 | more_session_vals); | |
123 | } | |
91327a77 AA |
124 | void print(ostream& out) const override { |
125 | out << "session_load"; | |
126 | } | |
7c673cae FG |
127 | }; |
128 | } | |
129 | ||
130 | ||
131 | /** | |
132 | * Decode OMAP header. Call this once when loading. | |
133 | */ | |
134 | void SessionMapStore::decode_header( | |
135 | bufferlist &header_bl) | |
136 | { | |
11fdf7f2 | 137 | auto q = header_bl.cbegin(); |
7c673cae | 138 | DECODE_START(1, q) |
11fdf7f2 | 139 | decode(version, q); |
7c673cae FG |
140 | DECODE_FINISH(q); |
141 | } | |
142 | ||
143 | void SessionMapStore::encode_header( | |
144 | bufferlist *header_bl) | |
145 | { | |
146 | ENCODE_START(1, 1, *header_bl); | |
11fdf7f2 | 147 | encode(version, *header_bl); |
7c673cae FG |
148 | ENCODE_FINISH(*header_bl); |
149 | } | |
150 | ||
151 | /** | |
152 | * Decode and insert some serialized OMAP values. Call this | |
153 | * repeatedly to insert batched loads. | |
154 | */ | |
155 | void SessionMapStore::decode_values(std::map<std::string, bufferlist> &session_vals) | |
156 | { | |
157 | for (std::map<std::string, bufferlist>::iterator i = session_vals.begin(); | |
158 | i != session_vals.end(); ++i) { | |
159 | ||
160 | entity_inst_t inst; | |
161 | ||
162 | bool parsed = inst.name.parse(i->first); | |
163 | if (!parsed) { | |
164 | derr << "Corrupt entity name '" << i->first << "' in sessionmap" << dendl; | |
165 | throw buffer::malformed_input("Corrupt entity name in sessionmap"); | |
166 | } | |
167 | ||
168 | Session *s = get_or_add_session(inst); | |
91327a77 | 169 | if (s->is_closed()) { |
7c673cae | 170 | s->set_state(Session::STATE_OPEN); |
91327a77 AA |
171 | s->set_load_avg_decay_rate(decay_rate); |
172 | } | |
11fdf7f2 | 173 | auto q = i->second.cbegin(); |
7c673cae FG |
174 | s->decode(q); |
175 | } | |
176 | } | |
177 | ||
178 | /** | |
179 | * An OMAP read finished. | |
180 | */ | |
181 | void SessionMap::_load_finish( | |
182 | int operation_r, | |
183 | int header_r, | |
184 | int values_r, | |
185 | bool first, | |
186 | bufferlist &header_bl, | |
187 | std::map<std::string, bufferlist> &session_vals, | |
188 | bool more_session_vals) | |
189 | { | |
190 | if (operation_r < 0) { | |
191 | derr << "_load_finish got " << cpp_strerror(operation_r) << dendl; | |
192 | mds->clog->error() << "error reading sessionmap '" << get_object_name() | |
193 | << "' " << operation_r << " (" | |
194 | << cpp_strerror(operation_r) << ")"; | |
195 | mds->damaged(); | |
196 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
197 | } | |
198 | ||
199 | // Decode header | |
200 | if (first) { | |
201 | if (header_r != 0) { | |
202 | derr << __func__ << ": header error: " << cpp_strerror(header_r) << dendl; | |
203 | mds->clog->error() << "error reading sessionmap header " | |
204 | << header_r << " (" << cpp_strerror(header_r) << ")"; | |
205 | mds->damaged(); | |
206 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
207 | } | |
208 | ||
209 | if(header_bl.length() == 0) { | |
210 | dout(4) << __func__ << ": header missing, loading legacy..." << dendl; | |
211 | load_legacy(); | |
212 | return; | |
213 | } | |
214 | ||
215 | try { | |
216 | decode_header(header_bl); | |
217 | } catch (buffer::error &e) { | |
218 | mds->clog->error() << "corrupt sessionmap header: " << e.what(); | |
219 | mds->damaged(); | |
220 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
221 | } | |
222 | dout(10) << __func__ << " loaded version " << version << dendl; | |
223 | } | |
224 | ||
225 | if (values_r != 0) { | |
226 | derr << __func__ << ": error reading values: " | |
227 | << cpp_strerror(values_r) << dendl; | |
228 | mds->clog->error() << "error reading sessionmap values: " | |
229 | << values_r << " (" << cpp_strerror(values_r) << ")"; | |
230 | mds->damaged(); | |
231 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
232 | } | |
233 | ||
234 | // Decode session_vals | |
235 | try { | |
236 | decode_values(session_vals); | |
237 | } catch (buffer::error &e) { | |
238 | mds->clog->error() << "corrupt sessionmap values: " << e.what(); | |
239 | mds->damaged(); | |
240 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
241 | } | |
242 | ||
243 | if (more_session_vals) { | |
244 | // Issue another read if we're not at the end of the omap | |
245 | const std::string last_key = session_vals.rbegin()->first; | |
246 | dout(10) << __func__ << ": continue omap load from '" | |
247 | << last_key << "'" << dendl; | |
248 | object_t oid = get_object_name(); | |
b3b6e05e | 249 | object_locator_t oloc(mds->get_metadata_pool()); |
7c673cae FG |
250 | C_IO_SM_Load *c = new C_IO_SM_Load(this, false); |
251 | ObjectOperation op; | |
11fdf7f2 | 252 | op.omap_get_vals(last_key, "", g_conf()->mds_sessionmap_keys_per_op, |
7c673cae FG |
253 | &c->session_vals, &c->more_session_vals, &c->values_r); |
254 | mds->objecter->read(oid, oloc, op, CEPH_NOSNAP, NULL, 0, | |
255 | new C_OnFinisher(c, mds->finisher)); | |
256 | } else { | |
257 | // I/O is complete. Update `by_state` | |
258 | dout(10) << __func__ << ": omap load complete" << dendl; | |
259 | for (ceph::unordered_map<entity_name_t, Session*>::iterator i = session_map.begin(); | |
260 | i != session_map.end(); ++i) { | |
261 | Session *s = i->second; | |
262 | auto by_state_entry = by_state.find(s->get_state()); | |
263 | if (by_state_entry == by_state.end()) | |
264 | by_state_entry = by_state.emplace(s->get_state(), | |
265 | new xlist<Session*>).first; | |
266 | by_state_entry->second->push_back(&s->item_session_list); | |
267 | } | |
268 | ||
269 | // Population is complete. Trigger load waiters. | |
270 | dout(10) << __func__ << ": v " << version | |
271 | << ", " << session_map.size() << " sessions" << dendl; | |
272 | projected = committing = committed = version; | |
273 | dump(); | |
274 | finish_contexts(g_ceph_context, waiting_for_load); | |
275 | } | |
276 | } | |
277 | ||
278 | /** | |
279 | * Populate session state from OMAP records in this | |
280 | * rank's sessionmap object. | |
281 | */ | |
11fdf7f2 | 282 | void SessionMap::load(MDSContext *onload) |
7c673cae FG |
283 | { |
284 | dout(10) << "load" << dendl; | |
285 | ||
286 | if (onload) | |
287 | waiting_for_load.push_back(onload); | |
288 | ||
289 | C_IO_SM_Load *c = new C_IO_SM_Load(this, true); | |
290 | object_t oid = get_object_name(); | |
b3b6e05e | 291 | object_locator_t oloc(mds->get_metadata_pool()); |
7c673cae FG |
292 | |
293 | ObjectOperation op; | |
294 | op.omap_get_header(&c->header_bl, &c->header_r); | |
11fdf7f2 | 295 | op.omap_get_vals("", "", g_conf()->mds_sessionmap_keys_per_op, |
7c673cae FG |
296 | &c->session_vals, &c->more_session_vals, &c->values_r); |
297 | ||
298 | mds->objecter->read(oid, oloc, op, CEPH_NOSNAP, NULL, 0, new C_OnFinisher(c, mds->finisher)); | |
299 | } | |
300 | ||
301 | namespace { | |
302 | class C_IO_SM_LoadLegacy : public SessionMapIOContext { | |
303 | public: | |
304 | bufferlist bl; | |
305 | explicit C_IO_SM_LoadLegacy(SessionMap *cm) : SessionMapIOContext(cm) {} | |
306 | void finish(int r) override { | |
307 | sessionmap->_load_legacy_finish(r, bl); | |
308 | } | |
91327a77 AA |
309 | void print(ostream& out) const override { |
310 | out << "session_load_legacy"; | |
311 | } | |
7c673cae FG |
312 | }; |
313 | } | |
314 | ||
315 | ||
316 | /** | |
317 | * Load legacy (object data blob) SessionMap format, assuming | |
318 | * that waiting_for_load has already been populated with | |
319 | * the relevant completion. This is the fallback if we do not | |
320 | * find an OMAP header when attempting to load normally. | |
321 | */ | |
322 | void SessionMap::load_legacy() | |
323 | { | |
324 | dout(10) << __func__ << dendl; | |
325 | ||
326 | C_IO_SM_LoadLegacy *c = new C_IO_SM_LoadLegacy(this); | |
327 | object_t oid = get_object_name(); | |
b3b6e05e | 328 | object_locator_t oloc(mds->get_metadata_pool()); |
7c673cae FG |
329 | |
330 | mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0, | |
331 | new C_OnFinisher(c, mds->finisher)); | |
332 | } | |
333 | ||
334 | void SessionMap::_load_legacy_finish(int r, bufferlist &bl) | |
335 | { | |
11fdf7f2 | 336 | auto blp = bl.cbegin(); |
7c673cae FG |
337 | if (r < 0) { |
338 | derr << "_load_finish got " << cpp_strerror(r) << dendl; | |
11fdf7f2 | 339 | ceph_abort_msg("failed to load sessionmap"); |
7c673cae FG |
340 | } |
341 | dump(); | |
342 | decode_legacy(blp); // note: this sets last_cap_renew = now() | |
343 | dout(10) << "_load_finish v " << version | |
344 | << ", " << session_map.size() << " sessions, " | |
345 | << bl.length() << " bytes" | |
346 | << dendl; | |
347 | projected = committing = committed = version; | |
348 | dump(); | |
349 | ||
350 | // Mark all sessions dirty, so that on next save() we will write | |
351 | // a complete OMAP version of the data loaded from the legacy format | |
352 | for (ceph::unordered_map<entity_name_t, Session*>::iterator i = session_map.begin(); | |
353 | i != session_map.end(); ++i) { | |
354 | // Don't use mark_dirty because on this occasion we want to ignore the | |
355 | // keys_per_op limit and do one big write (upgrade must be atomic) | |
356 | dirty_sessions.insert(i->first); | |
357 | } | |
358 | loaded_legacy = true; | |
359 | ||
360 | finish_contexts(g_ceph_context, waiting_for_load); | |
361 | } | |
362 | ||
363 | ||
364 | // ---------------- | |
365 | // SAVE | |
366 | ||
367 | namespace { | |
368 | class C_IO_SM_Save : public SessionMapIOContext { | |
369 | version_t version; | |
370 | public: | |
371 | C_IO_SM_Save(SessionMap *cm, version_t v) : SessionMapIOContext(cm), version(v) {} | |
372 | void finish(int r) override { | |
373 | if (r != 0) { | |
374 | get_mds()->handle_write_error(r); | |
375 | } else { | |
376 | sessionmap->_save_finish(version); | |
377 | } | |
378 | } | |
91327a77 AA |
379 | void print(ostream& out) const override { |
380 | out << "session_save"; | |
381 | } | |
7c673cae FG |
382 | }; |
383 | } | |
384 | ||
aee94f69 TL |
385 | bool SessionMap::validate_and_encode_session(MDSRank *mds, Session *session, bufferlist& bl) { |
386 | session->info.encode(bl, mds->mdsmap->get_up_features()); | |
387 | return bl.length() < mds_session_metadata_threshold; | |
388 | } | |
389 | ||
11fdf7f2 | 390 | void SessionMap::save(MDSContext *onsave, version_t needv) |
7c673cae FG |
391 | { |
392 | dout(10) << __func__ << ": needv " << needv << ", v " << version << dendl; | |
393 | ||
394 | if (needv && committing >= needv) { | |
11fdf7f2 | 395 | ceph_assert(committing > committed); |
7c673cae FG |
396 | commit_waiters[committing].push_back(onsave); |
397 | return; | |
398 | } | |
399 | ||
400 | commit_waiters[version].push_back(onsave); | |
401 | ||
402 | committing = version; | |
403 | SnapContext snapc; | |
404 | object_t oid = get_object_name(); | |
b3b6e05e | 405 | object_locator_t oloc(mds->get_metadata_pool()); |
7c673cae FG |
406 | |
407 | ObjectOperation op; | |
408 | ||
409 | /* Compose OSD OMAP transaction for full write */ | |
410 | bufferlist header_bl; | |
411 | encode_header(&header_bl); | |
412 | op.omap_set_header(header_bl); | |
413 | ||
414 | /* If we loaded a legacy sessionmap, then erase the old data. If | |
415 | * an old-versioned MDS tries to read it, it'll fail out safely | |
416 | * with an end_of_buffer exception */ | |
417 | if (loaded_legacy) { | |
418 | dout(4) << __func__ << " erasing legacy sessionmap" << dendl; | |
419 | op.truncate(0); | |
420 | loaded_legacy = false; // only need to truncate once. | |
421 | } | |
422 | ||
423 | dout(20) << " updating keys:" << dendl; | |
424 | map<string, bufferlist> to_set; | |
aee94f69 | 425 | std::set<entity_name_t> to_blocklist; |
7c673cae FG |
426 | for(std::set<entity_name_t>::iterator i = dirty_sessions.begin(); |
427 | i != dirty_sessions.end(); ++i) { | |
428 | const entity_name_t name = *i; | |
429 | Session *session = session_map[name]; | |
430 | ||
431 | if (session->is_open() || | |
432 | session->is_closing() || | |
433 | session->is_stale() || | |
434 | session->is_killing()) { | |
435 | dout(20) << " " << name << dendl; | |
7c673cae FG |
436 | |
437 | // Serialize V | |
438 | bufferlist bl; | |
aee94f69 TL |
439 | if (!validate_and_encode_session(mds, session, bl)) { |
440 | derr << __func__ << ": session (" << name << ") exceeds" | |
441 | << " sesion metadata threshold - blocklisting" << dendl; | |
442 | to_blocklist.emplace(name); | |
443 | continue; | |
444 | } | |
445 | ||
446 | // Serialize K | |
447 | CachedStackStringStream css; | |
448 | *css << name; | |
7c673cae FG |
449 | |
450 | // Add to RADOS op | |
f67539c2 | 451 | to_set[std::string(css->strv())] = bl; |
7c673cae FG |
452 | |
453 | session->clear_dirty_completed_requests(); | |
454 | } else { | |
455 | dout(20) << " " << name << " (ignoring)" << dendl; | |
456 | } | |
457 | } | |
458 | if (!to_set.empty()) { | |
459 | op.omap_set(to_set); | |
460 | } | |
461 | ||
462 | dout(20) << " removing keys:" << dendl; | |
463 | set<string> to_remove; | |
464 | for(std::set<entity_name_t>::const_iterator i = null_sessions.begin(); | |
465 | i != null_sessions.end(); ++i) { | |
466 | dout(20) << " " << *i << dendl; | |
f67539c2 TL |
467 | CachedStackStringStream css; |
468 | *css << *i; | |
469 | to_remove.insert(css->str()); | |
7c673cae FG |
470 | } |
471 | if (!to_remove.empty()) { | |
472 | op.omap_rm_keys(to_remove); | |
473 | } | |
474 | ||
475 | dirty_sessions.clear(); | |
476 | null_sessions.clear(); | |
477 | ||
478 | mds->objecter->mutate(oid, oloc, op, snapc, | |
479 | ceph::real_clock::now(), | |
480 | 0, | |
481 | new C_OnFinisher(new C_IO_SM_Save(this, version), | |
482 | mds->finisher)); | |
aee94f69 TL |
483 | apply_blocklist(to_blocklist); |
484 | logger->inc(l_mdssm_metadata_threshold_sessions_evicted, to_blocklist.size()); | |
7c673cae FG |
485 | } |
486 | ||
487 | void SessionMap::_save_finish(version_t v) | |
488 | { | |
489 | dout(10) << "_save_finish v" << v << dendl; | |
490 | committed = v; | |
491 | ||
492 | finish_contexts(g_ceph_context, commit_waiters[v]); | |
493 | commit_waiters.erase(v); | |
494 | } | |
495 | ||
496 | ||
497 | /** | |
498 | * Deserialize sessions, and update by_state index | |
499 | */ | |
11fdf7f2 | 500 | void SessionMap::decode_legacy(bufferlist::const_iterator &p) |
7c673cae FG |
501 | { |
502 | // Populate `sessions` | |
503 | SessionMapStore::decode_legacy(p); | |
504 | ||
505 | // Update `by_state` | |
506 | for (ceph::unordered_map<entity_name_t, Session*>::iterator i = session_map.begin(); | |
507 | i != session_map.end(); ++i) { | |
508 | Session *s = i->second; | |
509 | auto by_state_entry = by_state.find(s->get_state()); | |
510 | if (by_state_entry == by_state.end()) | |
511 | by_state_entry = by_state.emplace(s->get_state(), | |
512 | new xlist<Session*>).first; | |
513 | by_state_entry->second->push_back(&s->item_session_list); | |
514 | } | |
515 | } | |
516 | ||
517 | uint64_t SessionMap::set_state(Session *session, int s) { | |
518 | if (session->state != s) { | |
519 | session->set_state(s); | |
520 | auto by_state_entry = by_state.find(s); | |
521 | if (by_state_entry == by_state.end()) | |
522 | by_state_entry = by_state.emplace(s, new xlist<Session*>).first; | |
523 | by_state_entry->second->push_back(&session->item_session_list); | |
91327a77 AA |
524 | |
525 | if (session->is_open() || session->is_stale()) { | |
526 | session->set_load_avg_decay_rate(decay_rate); | |
527 | } | |
528 | ||
529 | // refresh number of sessions for states which have perf | |
530 | // couters associated | |
531 | logger->set(l_mdssm_session_open, | |
532 | get_session_count_in_state(Session::STATE_OPEN)); | |
533 | logger->set(l_mdssm_session_stale, | |
534 | get_session_count_in_state(Session::STATE_STALE)); | |
7c673cae | 535 | } |
91327a77 | 536 | |
7c673cae FG |
537 | return session->get_state_seq(); |
538 | } | |
539 | ||
11fdf7f2 | 540 | void SessionMapStore::decode_legacy(bufferlist::const_iterator& p) |
7c673cae | 541 | { |
91327a77 | 542 | auto now = clock::now(); |
7c673cae | 543 | uint64_t pre; |
11fdf7f2 | 544 | decode(pre, p); |
7c673cae FG |
545 | if (pre == (uint64_t)-1) { |
546 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, p); | |
11fdf7f2 | 547 | ceph_assert(struct_v >= 2); |
7c673cae | 548 | |
11fdf7f2 | 549 | decode(version, p); |
7c673cae FG |
550 | |
551 | while (!p.end()) { | |
552 | entity_inst_t inst; | |
11fdf7f2 | 553 | decode(inst.name, p); |
7c673cae | 554 | Session *s = get_or_add_session(inst); |
91327a77 | 555 | if (s->is_closed()) { |
7c673cae | 556 | s->set_state(Session::STATE_OPEN); |
91327a77 AA |
557 | s->set_load_avg_decay_rate(decay_rate); |
558 | } | |
7c673cae FG |
559 | s->decode(p); |
560 | } | |
561 | ||
562 | DECODE_FINISH(p); | |
563 | } else { | |
564 | // --- old format ---- | |
565 | version = pre; | |
566 | ||
567 | // this is a meaningless upper bound. can be ignored. | |
568 | __u32 n; | |
11fdf7f2 | 569 | decode(n, p); |
7c673cae FG |
570 | |
571 | while (n-- && !p.end()) { | |
a8e16298 TL |
572 | auto p2 = p; |
573 | Session *s = new Session(ConnectionRef()); | |
7c673cae | 574 | s->info.decode(p); |
92f5a8d4 TL |
575 | { |
576 | auto& name = s->info.inst.name; | |
577 | auto it = session_map.find(name); | |
578 | if (it != session_map.end()) { | |
579 | // eager client connected too fast! aie. | |
580 | dout(10) << " already had session for " << name << ", recovering" << dendl; | |
581 | delete s; | |
582 | s = it->second; | |
583 | p = p2; | |
584 | s->info.decode(p); | |
585 | } else { | |
586 | it->second = s; | |
587 | } | |
7c673cae FG |
588 | } |
589 | s->set_state(Session::STATE_OPEN); | |
91327a77 | 590 | s->set_load_avg_decay_rate(decay_rate); |
7c673cae FG |
591 | s->last_cap_renew = now; |
592 | } | |
593 | } | |
594 | } | |
595 | ||
adb31ebb | 596 | void Session::dump(Formatter *f, bool cap_dump) const |
92f5a8d4 TL |
597 | { |
598 | f->dump_int("id", info.inst.name.num()); | |
599 | f->dump_object("entity", info.inst); | |
600 | f->dump_string("state", get_state_name()); | |
601 | f->dump_int("num_leases", leases.size()); | |
602 | f->dump_int("num_caps", caps.size()); | |
adb31ebb TL |
603 | if (cap_dump) { |
604 | f->open_array_section("caps"); | |
605 | for (const auto& cap : caps) { | |
606 | f->dump_object("cap", *cap); | |
607 | } | |
608 | f->close_section(); | |
609 | } | |
92f5a8d4 TL |
610 | if (is_open() || is_stale()) { |
611 | f->dump_unsigned("request_load_avg", get_load_avg()); | |
612 | } | |
613 | f->dump_float("uptime", get_session_uptime()); | |
614 | f->dump_unsigned("requests_in_flight", get_request_count()); | |
b3b6e05e TL |
615 | f->dump_unsigned("num_completed_requests", get_num_completed_requests()); |
616 | f->dump_unsigned("num_completed_flushes", get_num_completed_flushes()); | |
92f5a8d4 TL |
617 | f->dump_bool("reconnecting", reconnecting); |
618 | f->dump_object("recall_caps", recall_caps); | |
619 | f->dump_object("release_caps", release_caps); | |
620 | f->dump_object("recall_caps_throttle", recall_caps_throttle); | |
621 | f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o); | |
622 | f->dump_object("session_cache_liveness", session_cache_liveness); | |
adb31ebb | 623 | f->dump_object("cap_acquisition", cap_acquisition); |
f67539c2 | 624 | |
f38dd50b TL |
625 | f->dump_unsigned("last_trim_completed_requests_tid", last_trim_completed_requests_tid); |
626 | f->dump_unsigned("last_trim_completed_flushes_tid", last_trim_completed_flushes_tid); | |
627 | ||
f67539c2 TL |
628 | f->open_array_section("delegated_inos"); |
629 | for (const auto& [start, len] : delegated_inos) { | |
630 | f->open_object_section("ino_range"); | |
631 | f->dump_stream("start") << start; | |
632 | f->dump_unsigned("length", len); | |
633 | f->close_section(); | |
634 | } | |
635 | f->close_section(); | |
636 | ||
92f5a8d4 TL |
637 | info.dump(f); |
638 | } | |
639 | ||
7c673cae FG |
640 | void SessionMapStore::dump(Formatter *f) const |
641 | { | |
92f5a8d4 TL |
642 | f->open_array_section("sessions"); |
643 | for (const auto& p : session_map) { | |
644 | f->dump_object("session", *p.second); | |
7c673cae FG |
645 | } |
646 | f->close_section(); // Sessions | |
647 | } | |
648 | ||
9f95a23c | 649 | void SessionMapStore::generate_test_instances(std::list<SessionMapStore*>& ls) |
7c673cae FG |
650 | { |
651 | // pretty boring for now | |
652 | ls.push_back(new SessionMapStore()); | |
653 | } | |
654 | ||
655 | void SessionMap::wipe() | |
656 | { | |
657 | dout(1) << "wipe start" << dendl; | |
658 | dump(); | |
659 | while (!session_map.empty()) { | |
660 | Session *s = session_map.begin()->second; | |
661 | remove_session(s); | |
662 | } | |
663 | version = ++projected; | |
664 | dout(1) << "wipe result" << dendl; | |
665 | dump(); | |
666 | dout(1) << "wipe done" << dendl; | |
667 | } | |
668 | ||
669 | void SessionMap::wipe_ino_prealloc() | |
670 | { | |
671 | for (ceph::unordered_map<entity_name_t,Session*>::iterator p = session_map.begin(); | |
672 | p != session_map.end(); | |
673 | ++p) { | |
674 | p->second->pending_prealloc_inos.clear(); | |
f67539c2 | 675 | p->second->free_prealloc_inos.clear(); |
9f95a23c | 676 | p->second->delegated_inos.clear(); |
7c673cae | 677 | p->second->info.prealloc_inos.clear(); |
7c673cae FG |
678 | } |
679 | projected = ++version; | |
680 | } | |
681 | ||
682 | void SessionMap::add_session(Session *s) | |
683 | { | |
684 | dout(10) << __func__ << " s=" << s << " name=" << s->info.inst.name << dendl; | |
685 | ||
11fdf7f2 | 686 | ceph_assert(session_map.count(s->info.inst.name) == 0); |
7c673cae FG |
687 | session_map[s->info.inst.name] = s; |
688 | auto by_state_entry = by_state.find(s->state); | |
689 | if (by_state_entry == by_state.end()) | |
690 | by_state_entry = by_state.emplace(s->state, new xlist<Session*>).first; | |
691 | by_state_entry->second->push_back(&s->item_session_list); | |
692 | s->get(); | |
693 | ||
91327a77 AA |
694 | update_average_birth_time(*s); |
695 | ||
7c673cae FG |
696 | logger->set(l_mdssm_session_count, session_map.size()); |
697 | logger->inc(l_mdssm_session_add); | |
698 | } | |
699 | ||
700 | void SessionMap::remove_session(Session *s) | |
701 | { | |
702 | dout(10) << __func__ << " s=" << s << " name=" << s->info.inst.name << dendl; | |
703 | ||
91327a77 AA |
704 | update_average_birth_time(*s, false); |
705 | ||
7c673cae FG |
706 | s->trim_completed_requests(0); |
707 | s->item_session_list.remove_myself(); | |
f38dd50b | 708 | broken_root_squash_clients.erase(s); |
7c673cae FG |
709 | session_map.erase(s->info.inst.name); |
710 | dirty_sessions.erase(s->info.inst.name); | |
711 | null_sessions.insert(s->info.inst.name); | |
712 | s->put(); | |
713 | ||
714 | logger->set(l_mdssm_session_count, session_map.size()); | |
715 | logger->inc(l_mdssm_session_remove); | |
716 | } | |
717 | ||
718 | void SessionMap::touch_session(Session *session) | |
719 | { | |
720 | dout(10) << __func__ << " s=" << session << " name=" << session->info.inst.name << dendl; | |
721 | ||
722 | // Move to the back of the session list for this state (should | |
723 | // already be on a list courtesy of add_session and set_state) | |
11fdf7f2 | 724 | ceph_assert(session->item_session_list.is_on_list()); |
7c673cae FG |
725 | auto by_state_entry = by_state.find(session->state); |
726 | if (by_state_entry == by_state.end()) | |
727 | by_state_entry = by_state.emplace(session->state, | |
728 | new xlist<Session*>).first; | |
729 | by_state_entry->second->push_back(&session->item_session_list); | |
730 | ||
91327a77 | 731 | session->last_cap_renew = clock::now(); |
7c673cae FG |
732 | } |
733 | ||
81eedcae | 734 | void SessionMap::_mark_dirty(Session *s, bool may_save) |
7c673cae | 735 | { |
31f18b77 FG |
736 | if (dirty_sessions.count(s->info.inst.name)) |
737 | return; | |
738 | ||
81eedcae TL |
739 | if (may_save && |
740 | dirty_sessions.size() >= g_conf()->mds_sessionmap_keys_per_op) { | |
7c673cae FG |
741 | // Pre-empt the usual save() call from journal segment trim, in |
742 | // order to avoid building up an oversized OMAP update operation | |
743 | // from too many sessions modified at once | |
744 | save(new C_MDSInternalNoop, version); | |
745 | } | |
746 | ||
31f18b77 | 747 | null_sessions.erase(s->info.inst.name); |
7c673cae FG |
748 | dirty_sessions.insert(s->info.inst.name); |
749 | } | |
750 | ||
81eedcae | 751 | void SessionMap::mark_dirty(Session *s, bool may_save) |
7c673cae FG |
752 | { |
753 | dout(20) << __func__ << " s=" << s << " name=" << s->info.inst.name | |
754 | << " v=" << version << dendl; | |
755 | ||
81eedcae | 756 | _mark_dirty(s, may_save); |
7c673cae FG |
757 | version++; |
758 | s->pop_pv(version); | |
759 | } | |
760 | ||
761 | void SessionMap::replay_dirty_session(Session *s) | |
762 | { | |
763 | dout(20) << __func__ << " s=" << s << " name=" << s->info.inst.name | |
764 | << " v=" << version << dendl; | |
765 | ||
81eedcae | 766 | _mark_dirty(s, false); |
7c673cae FG |
767 | |
768 | replay_advance_version(); | |
769 | } | |
770 | ||
771 | void SessionMap::replay_advance_version() | |
772 | { | |
773 | version++; | |
774 | projected = version; | |
775 | } | |
776 | ||
81eedcae TL |
777 | void SessionMap::replay_open_sessions(version_t event_cmapv, |
778 | map<client_t,entity_inst_t>& client_map, | |
779 | map<client_t,client_metadata_t>& client_metadata_map) | |
780 | { | |
781 | unsigned already_saved; | |
782 | ||
783 | if (version + client_map.size() < event_cmapv) | |
784 | goto bad; | |
785 | ||
786 | // Server::finish_force_open_sessions() marks sessions dirty one by one. | |
787 | // Marking a session dirty may flush all existing dirty sessions. So it's | |
788 | // possible that some sessions are already saved in sessionmap. | |
789 | already_saved = client_map.size() - (event_cmapv - version); | |
790 | for (const auto& p : client_map) { | |
791 | Session *s = get_or_add_session(p.second); | |
792 | auto q = client_metadata_map.find(p.first); | |
793 | if (q != client_metadata_map.end()) | |
794 | s->info.client_metadata.merge(q->second); | |
795 | ||
796 | if (already_saved > 0) { | |
797 | if (s->is_closed()) | |
798 | goto bad; | |
799 | ||
800 | --already_saved; | |
801 | continue; | |
802 | } | |
803 | ||
804 | set_state(s, Session::STATE_OPEN); | |
805 | replay_dirty_session(s); | |
806 | } | |
807 | return; | |
808 | ||
809 | bad: | |
810 | mds->clog->error() << "error replaying open sessions(" << client_map.size() | |
811 | << ") sessionmap v " << event_cmapv << " table " << version; | |
812 | ceph_assert(g_conf()->mds_wipe_sessions); | |
813 | mds->sessionmap.wipe(); | |
814 | mds->sessionmap.set_version(event_cmapv); | |
815 | } | |
816 | ||
7c673cae FG |
817 | version_t SessionMap::mark_projected(Session *s) |
818 | { | |
819 | dout(20) << __func__ << " s=" << s << " name=" << s->info.inst.name | |
820 | << " pv=" << projected << " -> " << projected + 1 << dendl; | |
821 | ++projected; | |
822 | s->push_pv(projected); | |
823 | return projected; | |
824 | } | |
825 | ||
826 | namespace { | |
827 | class C_IO_SM_Save_One : public SessionMapIOContext { | |
11fdf7f2 | 828 | MDSContext *on_safe; |
7c673cae | 829 | public: |
11fdf7f2 | 830 | C_IO_SM_Save_One(SessionMap *cm, MDSContext *on_safe_) |
7c673cae FG |
831 | : SessionMapIOContext(cm), on_safe(on_safe_) {} |
832 | void finish(int r) override { | |
833 | if (r != 0) { | |
834 | get_mds()->handle_write_error(r); | |
835 | } else { | |
836 | on_safe->complete(r); | |
837 | } | |
838 | } | |
91327a77 AA |
839 | void print(ostream& out) const override { |
840 | out << "session_save_one"; | |
841 | } | |
7c673cae FG |
842 | }; |
843 | } | |
844 | ||
845 | ||
846 | void SessionMap::save_if_dirty(const std::set<entity_name_t> &tgt_sessions, | |
847 | MDSGatherBuilder *gather_bld) | |
848 | { | |
11fdf7f2 | 849 | ceph_assert(gather_bld != NULL); |
7c673cae | 850 | |
aee94f69 TL |
851 | std::set<entity_name_t> to_blocklist; |
852 | std::map<entity_name_t, bufferlist> write_sessions; | |
7c673cae FG |
853 | |
854 | // Decide which sessions require a write | |
855 | for (std::set<entity_name_t>::iterator i = tgt_sessions.begin(); | |
856 | i != tgt_sessions.end(); ++i) { | |
857 | const entity_name_t &session_id = *i; | |
858 | ||
859 | if (session_map.count(session_id) == 0) { | |
860 | // Session isn't around any more, never mind. | |
861 | continue; | |
862 | } | |
863 | ||
864 | Session *session = session_map[session_id]; | |
865 | if (!session->has_dirty_completed_requests()) { | |
866 | // Session hasn't had completed_requests | |
867 | // modified since last write, no need to | |
868 | // write it now. | |
869 | continue; | |
870 | } | |
871 | ||
872 | if (dirty_sessions.count(session_id) > 0) { | |
873 | // Session is already dirtied, will be written, no | |
874 | // need to pre-empt that. | |
875 | continue; | |
876 | } | |
aee94f69 TL |
877 | |
878 | // Serialize V | |
879 | bufferlist bl; | |
880 | if (!validate_and_encode_session(mds, session, bl)) { | |
881 | derr << __func__ << ": session (" << session_id << ") exceeds" | |
882 | << " sesion metadata threshold - blocklisting" << dendl; | |
883 | to_blocklist.emplace(session_id); | |
884 | continue; | |
885 | } | |
886 | ||
7c673cae FG |
887 | // Okay, passed all our checks, now we write |
888 | // this session out. The version we write | |
889 | // into the OMAP may now be higher-versioned | |
890 | // than the version in the header, but that's | |
891 | // okay because it's never a problem to have | |
892 | // an overly-fresh copy of a session. | |
aee94f69 TL |
893 | write_sessions.emplace(session_id, std::move(bl)); |
894 | session->clear_dirty_completed_requests(); | |
7c673cae FG |
895 | } |
896 | ||
897 | dout(4) << __func__ << ": writing " << write_sessions.size() << dendl; | |
898 | ||
899 | // Batch writes into mds_sessionmap_keys_per_op | |
11fdf7f2 | 900 | const uint32_t kpo = g_conf()->mds_sessionmap_keys_per_op; |
7c673cae | 901 | map<string, bufferlist> to_set; |
7c673cae | 902 | |
aee94f69 TL |
903 | uint32_t i = 0; |
904 | for (auto &[session_id, bl] : write_sessions) { | |
7c673cae | 905 | // Serialize K |
f67539c2 TL |
906 | CachedStackStringStream css; |
907 | *css << session_id; | |
7c673cae | 908 | |
7c673cae | 909 | // Add to RADOS op |
aee94f69 | 910 | to_set[css->str()] = std::move(bl); |
7c673cae FG |
911 | |
912 | // Complete this write transaction? | |
913 | if (i == write_sessions.size() - 1 | |
914 | || i % kpo == kpo - 1) { | |
915 | ObjectOperation op; | |
916 | op.omap_set(to_set); | |
11fdf7f2 | 917 | to_set.clear(); // clear to start a new transaction |
7c673cae FG |
918 | |
919 | SnapContext snapc; | |
920 | object_t oid = get_object_name(); | |
b3b6e05e | 921 | object_locator_t oloc(mds->get_metadata_pool()); |
11fdf7f2 | 922 | MDSContext *on_safe = gather_bld->new_sub(); |
7c673cae | 923 | mds->objecter->mutate(oid, oloc, op, snapc, |
91327a77 AA |
924 | ceph::real_clock::now(), 0, |
925 | new C_OnFinisher( | |
7c673cae FG |
926 | new C_IO_SM_Save_One(this, on_safe), |
927 | mds->finisher)); | |
928 | } | |
aee94f69 | 929 | ++i; |
7c673cae | 930 | } |
aee94f69 TL |
931 | |
932 | apply_blocklist(to_blocklist); | |
933 | logger->inc(l_mdssm_metadata_threshold_sessions_evicted, to_blocklist.size()); | |
7c673cae FG |
934 | } |
935 | ||
936 | // ================= | |
937 | // Session | |
938 | ||
939 | #undef dout_prefix | |
940 | #define dout_prefix *_dout << "Session " | |
941 | ||
942 | /** | |
943 | * Calculate the length of the `requests` member list, | |
944 | * because elist does not have a size() method. | |
945 | * | |
92f5a8d4 | 946 | * O(N) runtime. |
7c673cae | 947 | */ |
92f5a8d4 | 948 | size_t Session::get_request_count() const |
7c673cae FG |
949 | { |
950 | size_t result = 0; | |
9f95a23c | 951 | for (auto p = requests.begin(); !p.end(); ++p) |
7c673cae | 952 | ++result; |
7c673cae FG |
953 | return result; |
954 | } | |
955 | ||
956 | /** | |
957 | * Capped in response to a CEPH_MSG_CLIENT_CAPRELEASE message, | |
958 | * with n_caps equal to the number of caps that were released | |
959 | * in the message. Used to update state about how many caps a | |
960 | * client has released since it was last instructed to RECALL_STATE. | |
961 | */ | |
962 | void Session::notify_cap_release(size_t n_caps) | |
963 | { | |
11fdf7f2 TL |
964 | recall_caps.hit(-(double)n_caps); |
965 | release_caps.hit(n_caps); | |
7c673cae FG |
966 | } |
967 | ||
968 | /** | |
969 | * Called when a CEPH_MSG_CLIENT_SESSION->CEPH_SESSION_RECALL_STATE | |
970 | * message is sent to the client. Update our recall-related state | |
971 | * in order to generate health metrics if the session doesn't see | |
972 | * a commensurate number of calls to ::notify_cap_release | |
973 | */ | |
a8e16298 | 974 | uint64_t Session::notify_recall_sent(size_t new_limit) |
7c673cae | 975 | { |
a8e16298 TL |
976 | const auto num_caps = caps.size(); |
977 | ceph_assert(new_limit < num_caps); // Behaviour of Server::recall_client_state | |
978 | const auto count = num_caps-new_limit; | |
979 | uint64_t new_change; | |
980 | if (recall_limit != new_limit) { | |
981 | new_change = count; | |
7c673cae | 982 | } else { |
a8e16298 | 983 | new_change = 0; /* no change! */ |
7c673cae | 984 | } |
7c673cae | 985 | |
a8e16298 TL |
986 | /* Always hit the session counter as a RECALL message is still sent to the |
987 | * client and we do not want the MDS to burn its global counter tokens on a | |
988 | * session that is not releasing caps (i.e. allow the session counter to | |
989 | * throttle future RECALL messages). | |
990 | */ | |
11fdf7f2 TL |
991 | recall_caps_throttle.hit(count); |
992 | recall_caps_throttle2o.hit(count); | |
993 | recall_caps.hit(count); | |
a8e16298 | 994 | return new_change; |
7c673cae FG |
995 | } |
996 | ||
997 | /** | |
998 | * Use client metadata to generate a somewhat-friendlier | |
999 | * name for the client than its session ID. | |
1000 | * | |
1001 | * This is *not* guaranteed to be unique, and any machine | |
1002 | * consumers of session-related output should always use | |
1003 | * the session ID as a primary capacity and use this only | |
1004 | * as a presentation hint. | |
1005 | */ | |
1006 | void Session::_update_human_name() | |
1007 | { | |
1008 | auto info_client_metadata_entry = info.client_metadata.find("hostname"); | |
1009 | if (info_client_metadata_entry != info.client_metadata.end()) { | |
1010 | // Happy path, refer to clients by hostname | |
1011 | human_name = info_client_metadata_entry->second; | |
1012 | if (!info.auth_name.has_default_id()) { | |
1013 | // When a non-default entity ID is set by the user, assume they | |
1014 | // would like to see it in references to the client, if it's | |
1015 | // reasonable short. Limit the length because we don't want | |
1016 | // to put e.g. uuid-generated names into a "human readable" | |
1017 | // rendering. | |
1018 | const int arbitrarily_short = 16; | |
1019 | if (info.auth_name.get_id().size() < arbitrarily_short) { | |
1020 | human_name += std::string(":") + info.auth_name.get_id(); | |
1021 | } | |
1022 | } | |
1023 | } else { | |
1024 | // Fallback, refer to clients by ID e.g. client.4567 | |
1025 | human_name = stringify(info.inst.name.num()); | |
1026 | } | |
1027 | } | |
1028 | ||
11fdf7f2 | 1029 | void Session::decode(bufferlist::const_iterator &p) |
7c673cae FG |
1030 | { |
1031 | info.decode(p); | |
1032 | ||
f67539c2 TL |
1033 | free_prealloc_inos = info.prealloc_inos; |
1034 | ||
7c673cae FG |
1035 | _update_human_name(); |
1036 | } | |
1037 | ||
1038 | int Session::check_access(CInode *in, unsigned mask, | |
1039 | int caller_uid, int caller_gid, | |
1040 | const vector<uint64_t> *caller_gid_list, | |
1041 | int new_uid, int new_gid) | |
1042 | { | |
1043 | string path; | |
1044 | CInode *diri = NULL; | |
1045 | if (!in->is_base()) | |
1046 | diri = in->get_projected_parent_dn()->get_dir()->get_inode(); | |
1047 | if (diri && diri->is_stray()){ | |
11fdf7f2 | 1048 | path = in->get_projected_inode()->stray_prior_path; |
7c673cae FG |
1049 | dout(20) << __func__ << " stray_prior_path " << path << dendl; |
1050 | } else { | |
1051 | in->make_path_string(path, true); | |
1052 | dout(20) << __func__ << " path " << path << dendl; | |
1053 | } | |
1054 | if (path.length()) | |
1055 | path = path.substr(1); // drop leading / | |
1056 | ||
f67539c2 TL |
1057 | const auto& inode = in->get_inode(); |
1058 | if (in->is_dir() && | |
1059 | inode->has_layout() && | |
1060 | inode->layout.pool_ns.length() && | |
7c673cae FG |
1061 | !connection->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) { |
1062 | dout(10) << __func__ << " client doesn't support FS_FILE_LAYOUT_V2" << dendl; | |
f67539c2 | 1063 | return -CEPHFS_EIO; |
7c673cae FG |
1064 | } |
1065 | ||
f67539c2 | 1066 | if (!auth_caps.is_capable(path, inode->uid, inode->gid, inode->mode, |
7c673cae | 1067 | caller_uid, caller_gid, caller_gid_list, mask, |
11fdf7f2 | 1068 | new_uid, new_gid, |
92f5a8d4 | 1069 | info.inst.addr)) { |
f67539c2 | 1070 | return -CEPHFS_EACCES; |
7c673cae FG |
1071 | } |
1072 | return 0; | |
1073 | } | |
1074 | ||
91327a77 AA |
1075 | // track total and per session load |
1076 | void SessionMap::hit_session(Session *session) { | |
1077 | uint64_t sessions = get_session_count_in_state(Session::STATE_OPEN) + | |
f91f0fd5 TL |
1078 | get_session_count_in_state(Session::STATE_STALE) + |
1079 | get_session_count_in_state(Session::STATE_CLOSING); | |
11fdf7f2 | 1080 | ceph_assert(sessions != 0); |
91327a77 | 1081 | |
11fdf7f2 | 1082 | double total_load = total_load_avg.hit(); |
91327a77 AA |
1083 | double avg_load = total_load / sessions; |
1084 | ||
1085 | logger->set(l_mdssm_total_load, (uint64_t)total_load); | |
1086 | logger->set(l_mdssm_avg_load, (uint64_t)avg_load); | |
1087 | ||
1088 | session->hit_session(); | |
1089 | } | |
1090 | ||
92f5a8d4 | 1091 | void SessionMap::handle_conf_change(const std::set<std::string>& changed) |
a8e16298 | 1092 | { |
11fdf7f2 TL |
1093 | auto apply_to_open_sessions = [this](auto f) { |
1094 | if (auto it = by_state.find(Session::STATE_OPEN); it != by_state.end()) { | |
a8e16298 | 1095 | for (const auto &session : *(it->second)) { |
11fdf7f2 | 1096 | f(session); |
a8e16298 TL |
1097 | } |
1098 | } | |
11fdf7f2 | 1099 | if (auto it = by_state.find(Session::STATE_STALE); it != by_state.end()) { |
a8e16298 | 1100 | for (const auto &session : *(it->second)) { |
11fdf7f2 | 1101 | f(session); |
a8e16298 TL |
1102 | } |
1103 | } | |
11fdf7f2 TL |
1104 | }; |
1105 | ||
1106 | if (changed.count("mds_request_load_average_decay_rate")) { | |
1107 | auto d = g_conf().get_val<double>("mds_request_load_average_decay_rate"); | |
11fdf7f2 TL |
1108 | |
1109 | decay_rate = d; | |
1110 | total_load_avg = DecayCounter(d); | |
1111 | ||
1112 | auto mut = [d](auto s) { | |
1113 | s->set_load_avg_decay_rate(d); | |
1114 | }; | |
1115 | apply_to_open_sessions(mut); | |
a8e16298 TL |
1116 | } |
1117 | if (changed.count("mds_recall_max_decay_rate")) { | |
11fdf7f2 TL |
1118 | auto d = g_conf().get_val<double>("mds_recall_max_decay_rate"); |
1119 | auto mut = [d](auto s) { | |
1120 | s->recall_caps_throttle = DecayCounter(d); | |
1121 | }; | |
1122 | apply_to_open_sessions(mut); | |
a8e16298 TL |
1123 | } |
1124 | if (changed.count("mds_recall_warning_decay_rate")) { | |
11fdf7f2 TL |
1125 | auto d = g_conf().get_val<double>("mds_recall_warning_decay_rate"); |
1126 | auto mut = [d](auto s) { | |
1127 | s->recall_caps = DecayCounter(d); | |
1128 | s->release_caps = DecayCounter(d); | |
1129 | }; | |
1130 | apply_to_open_sessions(mut); | |
91327a77 | 1131 | } |
92f5a8d4 TL |
1132 | if (changed.count("mds_session_cache_liveness_decay_rate")) { |
1133 | auto d = g_conf().get_val<double>("mds_session_cache_liveness_decay_rate"); | |
1134 | auto mut = [d](auto s) { | |
1135 | s->session_cache_liveness = DecayCounter(d); | |
1136 | s->session_cache_liveness.hit(s->caps.size()); /* so the MDS doesn't immediately start trimming a new session */ | |
1137 | }; | |
1138 | apply_to_open_sessions(mut); | |
1139 | } | |
adb31ebb TL |
1140 | if (changed.count("mds_session_cap_acquisition_decay_rate")) { |
1141 | auto d = g_conf().get_val<double>("mds_session_cap_acquisition_decay_rate"); | |
1142 | auto mut = [d](auto s) { | |
1143 | s->cap_acquisition = DecayCounter(d); | |
1144 | }; | |
1145 | apply_to_open_sessions(mut); | |
1146 | } | |
aee94f69 TL |
1147 | |
1148 | if (changed.count("mds_session_metadata_threshold")) { | |
1149 | mds_session_metadata_threshold = g_conf().get_val<Option::size_t>("mds_session_metadata_threshold"); | |
1150 | } | |
91327a77 AA |
1151 | } |
1152 | ||
1153 | void SessionMap::update_average_session_age() { | |
1154 | if (!session_map.size()) { | |
1155 | return; | |
1156 | } | |
1157 | ||
1158 | double avg_uptime = std::chrono::duration<double>(clock::now()-avg_birth_time).count(); | |
1159 | logger->set(l_mdssm_avg_session_uptime, (uint64_t)avg_uptime); | |
1160 | } | |
1161 | ||
aee94f69 TL |
1162 | void SessionMap::apply_blocklist(const std::set<entity_name_t>& victims) { |
1163 | if (victims.empty()) { | |
1164 | return; | |
1165 | } | |
1166 | ||
1167 | C_GatherBuilder gather(g_ceph_context, new C_MDSInternalNoop); | |
1168 | for (auto &victim : victims) { | |
1169 | CachedStackStringStream css; | |
1170 | mds->evict_client(victim.num(), false, g_conf()->mds_session_blocklist_on_evict, *css, | |
1171 | gather.new_sub()); | |
1172 | } | |
1173 | gather.activate(); | |
1174 | } | |
1175 | ||
7c673cae FG |
1176 | int SessionFilter::parse( |
1177 | const std::vector<std::string> &args, | |
f67539c2 | 1178 | std::ostream *ss) |
7c673cae | 1179 | { |
11fdf7f2 | 1180 | ceph_assert(ss != NULL); |
7c673cae FG |
1181 | |
1182 | for (const auto &s : args) { | |
1183 | dout(20) << __func__ << " parsing filter '" << s << "'" << dendl; | |
1184 | ||
1185 | auto eq = s.find("="); | |
1186 | if (eq == std::string::npos || eq == s.size()) { | |
9f95a23c TL |
1187 | // allow this to be a bare id for compatibility with pre-octopus asok |
1188 | // 'session evict'. | |
1189 | std::string err; | |
1190 | id = strict_strtoll(s.c_str(), 10, &err); | |
1191 | if (!err.empty()) { | |
1192 | *ss << "Invalid filter '" << s << "'"; | |
f67539c2 | 1193 | return -CEPHFS_EINVAL; |
9f95a23c TL |
1194 | } |
1195 | return 0; | |
7c673cae FG |
1196 | } |
1197 | ||
1198 | // Keys that start with this are to be taken as referring | |
1199 | // to freeform client metadata fields. | |
1200 | const std::string metadata_prefix("client_metadata."); | |
1201 | ||
1202 | auto k = s.substr(0, eq); | |
1203 | auto v = s.substr(eq + 1); | |
1204 | ||
1205 | dout(20) << __func__ << " parsed k='" << k << "', v='" << v << "'" << dendl; | |
1206 | ||
1207 | if (k.compare(0, metadata_prefix.size(), metadata_prefix) == 0 | |
1208 | && k.size() > metadata_prefix.size()) { | |
1209 | // Filter on arbitrary metadata key (no fixed schema for this, | |
1210 | // so anything after the dot is a valid field to filter on) | |
1211 | auto metadata_key = k.substr(metadata_prefix.size()); | |
1212 | metadata.insert(std::make_pair(metadata_key, v)); | |
1213 | } else if (k == "auth_name") { | |
1214 | // Filter on client entity name | |
1215 | auth_name = v; | |
1216 | } else if (k == "state") { | |
1217 | state = v; | |
1218 | } else if (k == "id") { | |
1219 | std::string err; | |
1220 | id = strict_strtoll(v.c_str(), 10, &err); | |
1221 | if (!err.empty()) { | |
1222 | *ss << err; | |
f67539c2 | 1223 | return -CEPHFS_EINVAL; |
7c673cae FG |
1224 | } |
1225 | } else if (k == "reconnecting") { | |
1226 | ||
1227 | /** | |
1228 | * Strict boolean parser. Allow true/false/0/1. | |
f67539c2 | 1229 | * Anything else is -CEPHFS_EINVAL. |
7c673cae | 1230 | */ |
11fdf7f2 | 1231 | auto is_true = [](std::string_view bstr, bool *out) -> bool |
7c673cae | 1232 | { |
11fdf7f2 | 1233 | ceph_assert(out != nullptr); |
7c673cae FG |
1234 | |
1235 | if (bstr == "true" || bstr == "1") { | |
1236 | *out = true; | |
1237 | return 0; | |
1238 | } else if (bstr == "false" || bstr == "0") { | |
1239 | *out = false; | |
1240 | return 0; | |
1241 | } else { | |
f67539c2 | 1242 | return -CEPHFS_EINVAL; |
7c673cae FG |
1243 | } |
1244 | }; | |
1245 | ||
1246 | bool bval; | |
1247 | int r = is_true(v, &bval); | |
1248 | if (r == 0) { | |
1249 | set_reconnecting(bval); | |
1250 | } else { | |
1251 | *ss << "Invalid boolean value '" << v << "'"; | |
f67539c2 | 1252 | return -CEPHFS_EINVAL; |
7c673cae FG |
1253 | } |
1254 | } else { | |
1255 | *ss << "Invalid filter key '" << k << "'"; | |
f67539c2 | 1256 | return -CEPHFS_EINVAL; |
7c673cae FG |
1257 | } |
1258 | } | |
1259 | ||
1260 | return 0; | |
1261 | } | |
1262 | ||
1263 | bool SessionFilter::match( | |
1264 | const Session &session, | |
1265 | std::function<bool(client_t)> is_reconnecting) const | |
1266 | { | |
1267 | for (const auto &m : metadata) { | |
1268 | const auto &k = m.first; | |
1269 | const auto &v = m.second; | |
11fdf7f2 TL |
1270 | auto it = session.info.client_metadata.find(k); |
1271 | if (it == session.info.client_metadata.end()) { | |
7c673cae FG |
1272 | return false; |
1273 | } | |
11fdf7f2 | 1274 | if (it->second != v) { |
7c673cae FG |
1275 | return false; |
1276 | } | |
1277 | } | |
1278 | ||
1279 | if (!auth_name.empty() && auth_name != session.info.auth_name.get_id()) { | |
1280 | return false; | |
1281 | } | |
1282 | ||
1283 | if (!state.empty() && state != session.get_state_name()) { | |
1284 | return false; | |
1285 | } | |
1286 | ||
1287 | if (id != 0 && id != session.info.inst.name.num()) { | |
1288 | return false; | |
1289 | } | |
1290 | ||
1291 | if (reconnecting.first) { | |
1292 | const bool am_reconnecting = is_reconnecting(session.info.inst.name.num()); | |
1293 | if (reconnecting.second != am_reconnecting) { | |
1294 | return false; | |
1295 | } | |
1296 | } | |
1297 | ||
1298 | return true; | |
1299 | } | |
1300 | ||
1301 | std::ostream& operator<<(std::ostream &out, const Session &s) | |
1302 | { | |
11fdf7f2 | 1303 | if (s.get_human_name() == stringify(s.get_client())) { |
7c673cae FG |
1304 | out << s.get_human_name(); |
1305 | } else { | |
11fdf7f2 | 1306 | out << s.get_human_name() << " (" << std::dec << s.get_client() << ")"; |
7c673cae FG |
1307 | } |
1308 | return out; | |
1309 | } | |
1310 |