]>
Commit | Line | Data |
---|---|---|
f67539c2 | 1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
7c673cae FG |
2 | // vim: ts=8 sw=2 smarttab |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
f67539c2 | 10 | * License version 2.1, as published by the Free Software |
7c673cae | 11 | * Foundation. See file COPYING. |
f67539c2 | 12 | * |
7c673cae FG |
13 | */ |
14 | ||
f67539c2 TL |
15 | /* |
16 | * This is the top level monitor. It runs on each machine in the Monitor | |
17 | * Cluster. The election of a leader for the paxos algorithm only happens | |
18 | * once per machine via the elector. There is a separate paxos instance (state) | |
19 | * kept for each of the system components: Object Store Device (OSD) Monitor, | |
7c673cae FG |
20 | * Placement Group (PG) Monitor, Metadata Server (MDS) Monitor, and Client Monitor. |
21 | */ | |
22 | ||
23 | #ifndef CEPH_MONITOR_H | |
24 | #define CEPH_MONITOR_H | |
25 | ||
26 | #include <errno.h> | |
27 | #include <cmath> | |
11fdf7f2 | 28 | #include <string> |
f67539c2 | 29 | #include <array> |
7c673cae FG |
30 | |
31 | #include "include/types.h" | |
224ce89b | 32 | #include "include/health.h" |
7c673cae FG |
33 | #include "msg/Messenger.h" |
34 | ||
35 | #include "common/Timer.h" | |
36 | ||
224ce89b | 37 | #include "health_check.h" |
7c673cae FG |
38 | #include "MonMap.h" |
39 | #include "Elector.h" | |
40 | #include "Paxos.h" | |
41 | #include "Session.h" | |
c07f9fc5 | 42 | #include "MonCommand.h" |
7c673cae | 43 | |
11fdf7f2 TL |
44 | |
45 | #include "common/config_obs.h" | |
7c673cae | 46 | #include "common/LogClient.h" |
11fdf7f2 TL |
47 | #include "auth/AuthClient.h" |
48 | #include "auth/AuthServer.h" | |
7c673cae FG |
49 | #include "auth/cephx/CephxKeyServer.h" |
50 | #include "auth/AuthMethodList.h" | |
51 | #include "auth/KeyRing.h" | |
9f95a23c | 52 | #include "include/common_fwd.h" |
7c673cae FG |
53 | #include "messages/MMonCommand.h" |
54 | #include "mon/MonitorDBStore.h" | |
7c673cae FG |
55 | #include "mgr/MgrClient.h" |
56 | ||
57 | #include "mon/MonOpRequest.h" | |
58 | #include "common/WorkQueue.h" | |
59 | ||
adb31ebb | 60 | using namespace TOPNSPC::common; |
7c673cae FG |
61 | |
62 | #define CEPH_MON_PROTOCOL 13 /* cluster internal */ | |
63 | ||
64 | ||
65 | enum { | |
66 | l_cluster_first = 555000, | |
67 | l_cluster_num_mon, | |
68 | l_cluster_num_mon_quorum, | |
69 | l_cluster_num_osd, | |
70 | l_cluster_num_osd_up, | |
71 | l_cluster_num_osd_in, | |
72 | l_cluster_osd_epoch, | |
73 | l_cluster_osd_bytes, | |
74 | l_cluster_osd_bytes_used, | |
75 | l_cluster_osd_bytes_avail, | |
76 | l_cluster_num_pool, | |
77 | l_cluster_num_pg, | |
78 | l_cluster_num_pg_active_clean, | |
79 | l_cluster_num_pg_active, | |
80 | l_cluster_num_pg_peering, | |
81 | l_cluster_num_object, | |
82 | l_cluster_num_object_degraded, | |
83 | l_cluster_num_object_misplaced, | |
84 | l_cluster_num_object_unfound, | |
85 | l_cluster_num_bytes, | |
7c673cae FG |
86 | l_cluster_last, |
87 | }; | |
88 | ||
89 | enum { | |
90 | l_mon_first = 456000, | |
91 | l_mon_num_sessions, | |
92 | l_mon_session_add, | |
93 | l_mon_session_rm, | |
94 | l_mon_session_trim, | |
95 | l_mon_num_elections, | |
96 | l_mon_election_call, | |
97 | l_mon_election_win, | |
98 | l_mon_election_lose, | |
99 | l_mon_last, | |
100 | }; | |
101 | ||
7c673cae FG |
102 | class PaxosService; |
103 | ||
7c673cae FG |
104 | class AdminSocketHook; |
105 | ||
7c673cae FG |
106 | #define COMPAT_SET_LOC "feature_set" |
107 | ||
7c673cae | 108 | class Monitor : public Dispatcher, |
11fdf7f2 TL |
109 | public AuthClient, |
110 | public AuthServer, | |
7c673cae FG |
111 | public md_config_obs_t { |
112 | public: | |
11fdf7f2 TL |
113 | int orig_argc = 0; |
114 | const char **orig_argv = nullptr; | |
115 | ||
7c673cae | 116 | // me |
f67539c2 | 117 | std::string name; |
7c673cae FG |
118 | int rank; |
119 | Messenger *messenger; | |
120 | ConnectionRef con_self; | |
9f95a23c | 121 | ceph::mutex lock = ceph::make_mutex("Monitor::lock"); |
7c673cae FG |
122 | SafeTimer timer; |
123 | Finisher finisher; | |
124 | ThreadPool cpu_tp; ///< threadpool for CPU intensive work | |
11fdf7f2 TL |
125 | |
126 | ceph::mutex auth_lock = ceph::make_mutex("Monitor::auth_lock"); | |
127 | ||
7c673cae FG |
128 | /// true if we have ever joined a quorum. if false, we are either a |
129 | /// new cluster, a newly joining monitor, or a just-upgraded | |
130 | /// monitor. | |
131 | bool has_ever_joined; | |
132 | ||
133 | PerfCounters *logger, *cluster_logger; | |
134 | bool cluster_logger_registered; | |
135 | ||
136 | void register_cluster_logger(); | |
137 | void unregister_cluster_logger(); | |
138 | ||
139 | MonMap *monmap; | |
140 | uuid_d fingerprint; | |
141 | ||
f67539c2 | 142 | std::set<entity_addrvec_t> extra_probe_peers; |
7c673cae FG |
143 | |
144 | LogClient log_client; | |
145 | LogChannelRef clog; | |
146 | LogChannelRef audit_clog; | |
147 | KeyRing keyring; | |
148 | KeyServer key_server; | |
149 | ||
150 | AuthMethodList auth_cluster_required; | |
151 | AuthMethodList auth_service_required; | |
152 | ||
153 | CompatSet features; | |
154 | ||
f67539c2 TL |
155 | std::vector<MonCommand> leader_mon_commands; // quorum leader's commands |
156 | std::vector<MonCommand> local_mon_commands; // commands i support | |
157 | ceph::buffer::list local_mon_commands_bl; // encoded version of above | |
d2e6a577 | 158 | |
f67539c2 TL |
159 | std::vector<MonCommand> prenautilus_local_mon_commands; |
160 | ceph::buffer::list prenautilus_local_mon_commands_bl; | |
7c673cae FG |
161 | |
162 | Messenger *mgr_messenger; | |
163 | MgrClient mgr_client; | |
164 | uint64_t mgr_proxy_bytes = 0; // in-flight proxied mgr command message bytes | |
11fdf7f2 | 165 | std::string gss_ktfile_client{}; |
31f18b77 | 166 | |
7c673cae FG |
167 | private: |
168 | void new_tick(); | |
169 | ||
170 | // -- local storage -- | |
171 | public: | |
172 | MonitorDBStore *store; | |
f67539c2 TL |
173 | static const std::string MONITOR_NAME; |
174 | static const std::string MONITOR_STORE_PREFIX; | |
7c673cae FG |
175 | |
176 | // -- monitor state -- | |
177 | private: | |
178 | enum { | |
11fdf7f2 TL |
179 | STATE_INIT = 1, |
180 | STATE_PROBING, | |
7c673cae FG |
181 | STATE_SYNCHRONIZING, |
182 | STATE_ELECTING, | |
183 | STATE_LEADER, | |
184 | STATE_PEON, | |
185 | STATE_SHUTDOWN | |
186 | }; | |
11fdf7f2 | 187 | int state = STATE_INIT; |
7c673cae FG |
188 | |
189 | public: | |
190 | static const char *get_state_name(int s) { | |
191 | switch (s) { | |
192 | case STATE_PROBING: return "probing"; | |
193 | case STATE_SYNCHRONIZING: return "synchronizing"; | |
194 | case STATE_ELECTING: return "electing"; | |
195 | case STATE_LEADER: return "leader"; | |
196 | case STATE_PEON: return "peon"; | |
197 | case STATE_SHUTDOWN: return "shutdown"; | |
198 | default: return "???"; | |
199 | } | |
200 | } | |
201 | const char *get_state_name() const { | |
202 | return get_state_name(state); | |
203 | } | |
204 | ||
11fdf7f2 | 205 | bool is_init() const { return state == STATE_INIT; } |
7c673cae FG |
206 | bool is_shutdown() const { return state == STATE_SHUTDOWN; } |
207 | bool is_probing() const { return state == STATE_PROBING; } | |
208 | bool is_synchronizing() const { return state == STATE_SYNCHRONIZING; } | |
209 | bool is_electing() const { return state == STATE_ELECTING; } | |
210 | bool is_leader() const { return state == STATE_LEADER; } | |
211 | bool is_peon() const { return state == STATE_PEON; } | |
212 | ||
213 | const utime_t &get_leader_since() const; | |
214 | ||
215 | void prepare_new_fingerprint(MonitorDBStore::TransactionRef t); | |
216 | ||
11fdf7f2 TL |
217 | std::vector<DaemonHealthMetric> get_health_metrics(); |
218 | ||
20effc67 TL |
219 | int quorum_age() const { |
220 | auto age = std::chrono::duration_cast<std::chrono::seconds>( | |
221 | ceph::mono_clock::now() - quorum_since); | |
222 | return age.count(); | |
223 | } | |
224 | ||
225 | bool is_mon_down() const { | |
226 | int max = monmap->size(); | |
227 | int actual = get_quorum().size(); | |
228 | auto now = ceph::real_clock::now(); | |
229 | return actual < max && now > monmap->created.to_real_time(); | |
230 | } | |
231 | ||
7c673cae FG |
232 | // -- elector -- |
233 | private: | |
f67539c2 | 234 | std::unique_ptr<Paxos> paxos; |
7c673cae FG |
235 | Elector elector; |
236 | friend class Elector; | |
237 | ||
238 | /// features we require of peers (based on on-disk compatset) | |
239 | uint64_t required_features; | |
240 | ||
241 | int leader; // current leader (to best of knowledge) | |
f67539c2 TL |
242 | std::set<int> quorum; // current active set of monitors (if !starting) |
243 | ceph::mono_clock::time_point quorum_since; // when quorum formed | |
7c673cae FG |
244 | utime_t leader_since; // when this monitor became the leader, if it is the leader |
245 | utime_t exited_quorum; // time detected as not in quorum; 0 if in | |
31f18b77 FG |
246 | |
247 | // map of counts of connected clients, by type and features, for | |
248 | // each quorum mon | |
f67539c2 | 249 | std::map<int,FeatureMap> quorum_feature_map; |
31f18b77 | 250 | |
7c673cae FG |
251 | /** |
252 | * Intersection of quorum member's connection feature bits. | |
253 | */ | |
254 | uint64_t quorum_con_features; | |
255 | /** | |
256 | * Intersection of quorum members mon-specific feature bits | |
257 | */ | |
258 | mon_feature_t quorum_mon_features; | |
7c673cae | 259 | |
9f95a23c | 260 | ceph_release_t quorum_min_mon_release{ceph_release_t::unknown}; |
11fdf7f2 | 261 | |
f67539c2 TL |
262 | std::set<std::string> outside_quorum; |
263 | ||
264 | bool stretch_mode_engaged{false}; | |
265 | bool degraded_stretch_mode{false}; | |
266 | bool recovering_stretch_mode{false}; | |
20effc67 TL |
267 | std::string stretch_bucket_divider; |
268 | std::map<std::string, std::set<std::string>> dead_mon_buckets; // bucket->mon ranks, locations with no live mons | |
269 | std::set<std::string> up_mon_buckets; // locations with a live mon | |
f67539c2 TL |
270 | void do_stretch_mode_election_work(); |
271 | ||
272 | bool session_stretch_allowed(MonSession *s, MonOpRequestRef& op); | |
273 | void disconnect_disallowed_stretch_sessions(); | |
274 | void set_elector_disallowed_leaders(bool allow_election); | |
b3b6e05e | 275 | |
20effc67 | 276 | std::map<std::string,std::string> crush_loc; |
b3b6e05e | 277 | bool need_set_crush_loc{false}; |
f67539c2 TL |
278 | public: |
279 | bool is_stretch_mode() { return stretch_mode_engaged; } | |
280 | bool is_degraded_stretch_mode() { return degraded_stretch_mode; } | |
281 | bool is_recovering_stretch_mode() { return recovering_stretch_mode; } | |
b3b6e05e TL |
282 | |
283 | /** | |
284 | * This set of functions maintains the in-memory stretch state | |
285 | * and sets up transitions of the map states by calling in to | |
286 | * MonmapMonitor and OSDMonitor. | |
287 | * | |
288 | * The [maybe_]go_* functions are called on the leader to | |
289 | * decide if transitions should happen; the trigger_* functions | |
290 | * set up the map transitions; and the set_* functions actually | |
291 | * change the memory state -- but these are only called | |
292 | * via OSDMonitor::update_from_paxos, to guarantee consistent | |
293 | * updates across the entire cluster. | |
294 | */ | |
295 | void try_engage_stretch_mode(); | |
f67539c2 | 296 | void maybe_go_degraded_stretch_mode(); |
20effc67 TL |
297 | void trigger_degraded_stretch_mode(const std::set<std::string>& dead_mons, |
298 | const std::set<int>& dead_buckets); | |
f67539c2 TL |
299 | void set_degraded_stretch_mode(); |
300 | void go_recovery_stretch_mode(); | |
b3b6e05e | 301 | void set_recovery_stretch_mode(); |
f67539c2 TL |
302 | void trigger_healthy_stretch_mode(); |
303 | void set_healthy_stretch_mode(); | |
304 | void enable_stretch_mode(); | |
20effc67 | 305 | void set_mon_crush_location(const std::string& loc); |
f67539c2 TL |
306 | |
307 | ||
308 | private: | |
7c673cae FG |
309 | |
310 | /** | |
311 | * @defgroup Monitor_h_scrub | |
312 | * @{ | |
313 | */ | |
314 | version_t scrub_version; ///< paxos version we are scrubbing | |
f67539c2 | 315 | std::map<int,ScrubResult> scrub_result; ///< results so far |
7c673cae FG |
316 | |
317 | /** | |
318 | * trigger a cross-mon scrub | |
319 | * | |
320 | * Verify all mons are storing identical content | |
321 | */ | |
322 | int scrub_start(); | |
323 | int scrub(); | |
324 | void handle_scrub(MonOpRequestRef op); | |
325 | bool _scrub(ScrubResult *r, | |
f67539c2 | 326 | std::pair<std::string,std::string> *start, |
7c673cae FG |
327 | int *num_keys); |
328 | void scrub_check_results(); | |
329 | void scrub_timeout(); | |
330 | void scrub_finish(); | |
331 | void scrub_reset(); | |
f67539c2 | 332 | void scrub_update_interval(ceph::timespan interval); |
7c673cae FG |
333 | |
334 | Context *scrub_event; ///< periodic event to trigger scrub (leader) | |
335 | Context *scrub_timeout_event; ///< scrub round timeout (leader) | |
336 | void scrub_event_start(); | |
337 | void scrub_event_cancel(); | |
338 | void scrub_reset_timeout(); | |
339 | void scrub_cancel_timeout(); | |
340 | ||
341 | struct ScrubState { | |
f67539c2 | 342 | std::pair<std::string,std::string> last_key; ///< last scrubbed key |
7c673cae FG |
343 | bool finished; |
344 | ||
345 | ScrubState() : finished(false) { } | |
346 | virtual ~ScrubState() { } | |
347 | }; | |
11fdf7f2 | 348 | std::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub |
7c673cae FG |
349 | |
350 | /** | |
351 | * @defgroup Monitor_h_sync Synchronization | |
352 | * @{ | |
353 | */ | |
354 | /** | |
355 | * @} // provider state | |
356 | */ | |
357 | struct SyncProvider { | |
11fdf7f2 | 358 | entity_addrvec_t addrs; |
7c673cae FG |
359 | uint64_t cookie; ///< unique cookie for this sync attempt |
360 | utime_t timeout; ///< when we give up and expire this attempt | |
361 | version_t last_committed; ///< last paxos version on peer | |
f67539c2 | 362 | std::pair<std::string,std::string> last_key; ///< last key sent to (or on) peer |
7c673cae FG |
363 | bool full; ///< full scan? |
364 | MonitorDBStore::Synchronizer synchronizer; ///< iterator | |
365 | ||
366 | SyncProvider() : cookie(0), last_committed(0), full(false) {} | |
367 | ||
368 | void reset_timeout(CephContext *cct, int grace) { | |
369 | timeout = ceph_clock_now(); | |
370 | timeout += grace; | |
371 | } | |
372 | }; | |
373 | ||
f67539c2 | 374 | std::map<std::uint64_t, SyncProvider> sync_providers; ///< cookie -> SyncProvider for those syncing from us |
7c673cae FG |
375 | uint64_t sync_provider_count; ///< counter for issued cookies to keep them unique |
376 | ||
377 | /** | |
378 | * @} // requester state | |
379 | */ | |
11fdf7f2 | 380 | entity_addrvec_t sync_provider; ///< who we are syncing from |
7c673cae FG |
381 | uint64_t sync_cookie; ///< 0 if we are starting, non-zero otherwise |
382 | bool sync_full; ///< true if we are a full sync, false for recent catch-up | |
383 | version_t sync_start_version; ///< last_committed at sync start | |
384 | Context *sync_timeout_event; ///< timeout event | |
385 | ||
386 | /** | |
387 | * floor for sync source | |
388 | * | |
389 | * When we sync we forget about our old last_committed value which | |
390 | * can be dangerous. For example, if we have a cluster of: | |
391 | * | |
392 | * mon.a: lc 100 | |
393 | * mon.b: lc 80 | |
394 | * mon.c: lc 100 (us) | |
395 | * | |
396 | * If something forces us to sync (say, corruption, or manual | |
397 | * intervention, or bug), we forget last_committed, and might abort. | |
398 | * If mon.a happens to be down when we come back, we will see: | |
399 | * | |
400 | * mon.b: lc 80 | |
401 | * mon.c: lc 0 (us) | |
402 | * | |
403 | * and sync from mon.b, at which point a+b will both have lc 80 and | |
404 | * come online with a majority holding out of date commits. | |
405 | * | |
406 | * Avoid this by preserving our old last_committed value prior to | |
407 | * sync and never going backwards. | |
408 | */ | |
409 | version_t sync_last_committed_floor; | |
410 | ||
411 | /** | |
412 | * Obtain the synchronization target prefixes in set form. | |
413 | * | |
414 | * We consider a target prefix all those that are relevant when | |
415 | * synchronizing two stores. That is, all those that hold paxos service's | |
416 | * versions, as well as paxos versions, or any control keys such as the | |
417 | * first or last committed version. | |
418 | * | |
419 | * Given the current design, this function should return the name of all and | |
420 | * any available paxos service, plus the paxos name. | |
421 | * | |
422 | * @returns a set of strings referring to the prefixes being synchronized | |
423 | */ | |
f67539c2 | 424 | std::set<std::string> get_sync_targets_names(); |
7c673cae FG |
425 | |
426 | /** | |
427 | * Reset the monitor's sync-related data structures for syncing *from* a peer | |
428 | */ | |
429 | void sync_reset_requester(); | |
430 | ||
431 | /** | |
432 | * Reset sync state related to allowing others to sync from us | |
433 | */ | |
434 | void sync_reset_provider(); | |
435 | ||
436 | /** | |
437 | * Caled when a sync attempt times out (requester-side) | |
438 | */ | |
439 | void sync_timeout(); | |
440 | ||
441 | /** | |
442 | * Get the latest monmap for backup purposes during sync | |
443 | */ | |
f67539c2 | 444 | void sync_obtain_latest_monmap(ceph::buffer::list &bl); |
7c673cae FG |
445 | |
446 | /** | |
447 | * Start sync process | |
448 | * | |
449 | * Start pulling committed state from another monitor. | |
450 | * | |
451 | * @param entity where to pull committed state from | |
452 | * @param full whether to do a full sync or just catch up on recent paxos | |
453 | */ | |
11fdf7f2 | 454 | void sync_start(entity_addrvec_t &addrs, bool full); |
7c673cae FG |
455 | |
456 | public: | |
457 | /** | |
458 | * force a sync on next mon restart | |
459 | */ | |
f67539c2 | 460 | void sync_force(ceph::Formatter *f); |
7c673cae FG |
461 | |
462 | private: | |
463 | /** | |
464 | * store critical state for safekeeping during sync | |
465 | * | |
466 | * We store a few things on the side that we don't want to get clobbered by sync. This | |
467 | * includes the latest monmap and a lower bound on last_committed. | |
468 | */ | |
469 | void sync_stash_critical_state(MonitorDBStore::TransactionRef tx); | |
470 | ||
471 | /** | |
472 | * reset the sync timeout | |
473 | * | |
474 | * This is used on the client to restart if things aren't progressing | |
475 | */ | |
476 | void sync_reset_timeout(); | |
477 | ||
478 | /** | |
479 | * trim stale sync provider state | |
480 | * | |
481 | * If someone is syncing from us and hasn't talked to us recently, expire their state. | |
482 | */ | |
483 | void sync_trim_providers(); | |
484 | ||
485 | /** | |
486 | * Complete a sync | |
487 | * | |
488 | * Finish up a sync after we've gotten all of the chunks. | |
489 | * | |
490 | * @param last_committed final last_committed value from provider | |
491 | */ | |
492 | void sync_finish(version_t last_committed); | |
493 | ||
494 | /** | |
495 | * request the next chunk from the provider | |
496 | */ | |
497 | void sync_get_next_chunk(); | |
498 | ||
499 | /** | |
500 | * handle sync message | |
501 | * | |
502 | * @param m Sync message with operation type MMonSync::OP_START_CHUNKS | |
503 | */ | |
504 | void handle_sync(MonOpRequestRef op); | |
505 | ||
506 | void _sync_reply_no_cookie(MonOpRequestRef op); | |
507 | ||
508 | void handle_sync_get_cookie(MonOpRequestRef op); | |
509 | void handle_sync_get_chunk(MonOpRequestRef op); | |
510 | void handle_sync_finish(MonOpRequestRef op); | |
511 | ||
512 | void handle_sync_cookie(MonOpRequestRef op); | |
513 | void handle_sync_forward(MonOpRequestRef op); | |
514 | void handle_sync_chunk(MonOpRequestRef op); | |
515 | void handle_sync_no_cookie(MonOpRequestRef op); | |
516 | ||
517 | /** | |
518 | * @} // Synchronization | |
519 | */ | |
520 | ||
f67539c2 TL |
521 | std::list<Context*> waitfor_quorum; |
522 | std::list<Context*> maybe_wait_for_quorum; | |
7c673cae FG |
523 | |
524 | /** | |
525 | * @defgroup Monitor_h_TimeCheck Monitor Clock Drift Early Warning System | |
526 | * @{ | |
527 | * | |
528 | * We use time checks to keep track of any clock drifting going on in the | |
529 | * cluster. This is accomplished by periodically ping each monitor in the | |
530 | * quorum and register its response time on a map, assessing how much its | |
531 | * clock has drifted. We also take this opportunity to assess the latency | |
532 | * on response. | |
533 | * | |
534 | * This mechanism works as follows: | |
535 | * | |
536 | * - Leader sends out a 'PING' message to each other monitor in the quorum. | |
537 | * The message is timestamped with the leader's current time. The leader's | |
538 | * current time is recorded in a map, associated with each peon's | |
539 | * instance. | |
540 | * - The peon replies to the leader with a timestamped 'PONG' message. | |
541 | * - The leader calculates a delta between the peon's timestamp and its | |
542 | * current time and stashes it. | |
543 | * - The leader also calculates the time it took to receive the 'PONG' | |
544 | * since the 'PING' was sent, and stashes an approximate latency estimate. | |
545 | * - Once all the quorum members have pong'ed, the leader will share the | |
546 | * clock skew and latency maps with all the monitors in the quorum. | |
547 | */ | |
f67539c2 TL |
548 | std::map<int, utime_t> timecheck_waiting; |
549 | std::map<int, double> timecheck_skews; | |
550 | std::map<int, double> timecheck_latencies; | |
7c673cae FG |
551 | // odd value means we are mid-round; even value means the round has |
552 | // finished. | |
553 | version_t timecheck_round; | |
554 | unsigned int timecheck_acks; | |
555 | utime_t timecheck_round_start; | |
224ce89b | 556 | friend class HealthMonitor; |
7c673cae FG |
557 | /* When we hit a skew we will start a new round based off of |
558 | * 'mon_timecheck_skew_interval'. Each new round will be backed off | |
559 | * until we hit 'mon_timecheck_interval' -- which is the typical | |
560 | * interval when not in the presence of a skew. | |
561 | * | |
562 | * This variable tracks the number of rounds with skews since last clean | |
563 | * so that we can report to the user and properly adjust the backoff. | |
564 | */ | |
565 | uint64_t timecheck_rounds_since_clean; | |
566 | /** | |
567 | * Time Check event. | |
568 | */ | |
569 | Context *timecheck_event; | |
570 | ||
571 | void timecheck_start(); | |
572 | void timecheck_finish(); | |
573 | void timecheck_start_round(); | |
574 | void timecheck_finish_round(bool success = true); | |
575 | void timecheck_cancel_round(); | |
576 | void timecheck_cleanup(); | |
577 | void timecheck_reset_event(); | |
578 | void timecheck_check_skews(); | |
579 | void timecheck_report(); | |
580 | void timecheck(); | |
f67539c2 | 581 | health_status_t timecheck_status(std::ostringstream &ss, |
7c673cae FG |
582 | const double skew_bound, |
583 | const double latency); | |
584 | void handle_timecheck_leader(MonOpRequestRef op); | |
585 | void handle_timecheck_peon(MonOpRequestRef op); | |
586 | void handle_timecheck(MonOpRequestRef op); | |
587 | ||
588 | /** | |
589 | * Returns 'true' if this is considered to be a skew; 'false' otherwise. | |
590 | */ | |
591 | bool timecheck_has_skew(const double skew_bound, double *abs) const { | |
592 | double abs_skew = std::fabs(skew_bound); | |
593 | if (abs) | |
594 | *abs = abs_skew; | |
11fdf7f2 | 595 | return (abs_skew > g_conf()->mon_clock_drift_allowed); |
7c673cae FG |
596 | } |
597 | ||
598 | /** | |
599 | * @} | |
600 | */ | |
601 | /** | |
602 | * Handle ping messages from others. | |
603 | */ | |
604 | void handle_ping(MonOpRequestRef op); | |
605 | ||
606 | Context *probe_timeout_event = nullptr; // for probing | |
607 | ||
608 | void reset_probe_timeout(); | |
609 | void cancel_probe_timeout(); | |
610 | void probe_timeout(int r); | |
611 | ||
612 | void _apply_compatset_features(CompatSet &new_features); | |
613 | ||
614 | public: | |
615 | epoch_t get_epoch(); | |
616 | int get_leader() const { return leader; } | |
f67539c2 TL |
617 | std::string get_leader_name() { |
618 | return quorum.empty() ? std::string() : monmap->get_name(leader); | |
224ce89b | 619 | } |
f67539c2 TL |
620 | const std::set<int>& get_quorum() const { return quorum; } |
621 | std::list<std::string> get_quorum_names() { | |
622 | std::list<std::string> q; | |
623 | for (auto p = quorum.begin(); p != quorum.end(); ++p) | |
7c673cae FG |
624 | q.push_back(monmap->get_name(*p)); |
625 | return q; | |
626 | } | |
627 | uint64_t get_quorum_con_features() const { | |
628 | return quorum_con_features; | |
629 | } | |
630 | mon_feature_t get_quorum_mon_features() const { | |
631 | return quorum_mon_features; | |
632 | } | |
633 | uint64_t get_required_features() const { | |
634 | return required_features; | |
635 | } | |
636 | mon_feature_t get_required_mon_features() const { | |
637 | return monmap->get_required_features(); | |
638 | } | |
639 | void apply_quorum_to_compatset_features(); | |
640 | void apply_monmap_to_compatset_features(); | |
641 | void calc_quorum_requirements(); | |
642 | ||
31f18b77 FG |
643 | void get_combined_feature_map(FeatureMap *fm); |
644 | ||
7c673cae FG |
645 | private: |
646 | void _reset(); ///< called from bootstrap, start_, or join_election | |
647 | void wait_for_paxos_write(); | |
648 | void _finish_svc_election(); ///< called by {win,lose}_election | |
11fdf7f2 | 649 | void respawn(); |
7c673cae FG |
650 | public: |
651 | void bootstrap(); | |
652 | void join_election(); | |
653 | void start_election(); | |
654 | void win_standalone_election(); | |
655 | // end election (called by Elector) | |
f67539c2 | 656 | void win_election(epoch_t epoch, const std::set<int>& q, |
7c673cae FG |
657 | uint64_t features, |
658 | const mon_feature_t& mon_features, | |
9f95a23c | 659 | ceph_release_t min_mon_release, |
f67539c2 TL |
660 | const std::map<int,Metadata>& metadata); |
661 | void lose_election(epoch_t epoch, std::set<int>& q, int l, | |
7c673cae | 662 | uint64_t features, |
11fdf7f2 | 663 | const mon_feature_t& mon_features, |
9f95a23c | 664 | ceph_release_t min_mon_release); |
7c673cae FG |
665 | // end election (called by Elector) |
666 | void finish_election(); | |
667 | ||
7c673cae FG |
668 | void update_logger(); |
669 | ||
670 | /** | |
671 | * Vector holding the Services serviced by this Monitor. | |
672 | */ | |
f67539c2 | 673 | std::array<std::unique_ptr<PaxosService>, PAXOS_NUM> paxos_service; |
7c673cae FG |
674 | |
675 | class MDSMonitor *mdsmon() { | |
11fdf7f2 | 676 | return (class MDSMonitor *)paxos_service[PAXOS_MDSMAP].get(); |
7c673cae FG |
677 | } |
678 | ||
679 | class MonmapMonitor *monmon() { | |
11fdf7f2 | 680 | return (class MonmapMonitor *)paxos_service[PAXOS_MONMAP].get(); |
7c673cae FG |
681 | } |
682 | ||
683 | class OSDMonitor *osdmon() { | |
11fdf7f2 | 684 | return (class OSDMonitor *)paxos_service[PAXOS_OSDMAP].get(); |
7c673cae FG |
685 | } |
686 | ||
687 | class AuthMonitor *authmon() { | |
11fdf7f2 | 688 | return (class AuthMonitor *)paxos_service[PAXOS_AUTH].get(); |
7c673cae FG |
689 | } |
690 | ||
691 | class LogMonitor *logmon() { | |
11fdf7f2 | 692 | return (class LogMonitor*) paxos_service[PAXOS_LOG].get(); |
7c673cae FG |
693 | } |
694 | ||
695 | class MgrMonitor *mgrmon() { | |
11fdf7f2 | 696 | return (class MgrMonitor*) paxos_service[PAXOS_MGR].get(); |
7c673cae FG |
697 | } |
698 | ||
31f18b77 | 699 | class MgrStatMonitor *mgrstatmon() { |
11fdf7f2 | 700 | return (class MgrStatMonitor*) paxos_service[PAXOS_MGRSTAT].get(); |
31f18b77 FG |
701 | } |
702 | ||
b32b8144 | 703 | class HealthMonitor *healthmon() { |
11fdf7f2 TL |
704 | return (class HealthMonitor*) paxos_service[PAXOS_HEALTH].get(); |
705 | } | |
706 | ||
707 | class ConfigMonitor *configmon() { | |
708 | return (class ConfigMonitor*) paxos_service[PAXOS_CONFIG].get(); | |
224ce89b WB |
709 | } |
710 | ||
f67539c2 TL |
711 | class KVMonitor *kvmon() { |
712 | return (class KVMonitor*) paxos_service[PAXOS_KV].get(); | |
713 | } | |
714 | ||
7c673cae FG |
715 | friend class Paxos; |
716 | friend class OSDMonitor; | |
717 | friend class MDSMonitor; | |
718 | friend class MonmapMonitor; | |
7c673cae | 719 | friend class LogMonitor; |
f67539c2 | 720 | friend class KVMonitor; |
7c673cae FG |
721 | |
722 | // -- sessions -- | |
723 | MonSessionMap session_map; | |
9f95a23c | 724 | ceph::mutex session_map_lock = ceph::make_mutex("Monitor::session_map_lock"); |
7c673cae FG |
725 | AdminSocketHook *admin_hook; |
726 | ||
727 | template<typename Func, typename...Args> | |
728 | void with_session_map(Func&& func) { | |
11fdf7f2 | 729 | std::lock_guard l(session_map_lock); |
7c673cae FG |
730 | std::forward<Func>(func)(session_map); |
731 | } | |
732 | void send_latest_monmap(Connection *con); | |
733 | ||
734 | // messages | |
735 | void handle_get_version(MonOpRequestRef op); | |
736 | void handle_subscribe(MonOpRequestRef op); | |
737 | void handle_mon_get_map(MonOpRequestRef op); | |
738 | ||
11fdf7f2 | 739 | static void _generate_command_map(cmdmap_t& cmdmap, |
f67539c2 | 740 | std::map<std::string,std::string> ¶m_str_map); |
c07f9fc5 | 741 | static const MonCommand *_get_moncommand( |
f67539c2 TL |
742 | const std::string &cmd_prefix, |
743 | const std::vector<MonCommand>& cmds); | |
744 | bool _allowed_command(MonSession *s, const std::string& module, | |
745 | const std::string& prefix, | |
11fdf7f2 | 746 | const cmdmap_t& cmdmap, |
f67539c2 | 747 | const std::map<std::string,std::string>& param_str_map, |
7c673cae | 748 | const MonCommand *this_cmd); |
f67539c2 TL |
749 | void get_mon_status(ceph::Formatter *f); |
750 | void _quorum_status(ceph::Formatter *f, std::ostream& ss); | |
11fdf7f2 TL |
751 | bool _add_bootstrap_peer_hint(std::string_view cmd, const cmdmap_t& cmdmap, |
752 | std::ostream& ss); | |
9f95a23c | 753 | void handle_tell_command(MonOpRequestRef op); |
7c673cae FG |
754 | void handle_command(MonOpRequestRef op); |
755 | void handle_route(MonOpRequestRef op); | |
756 | ||
f67539c2 TL |
757 | int get_mon_metadata(int mon, ceph::Formatter *f, std::ostream& err); |
758 | int print_nodes(ceph::Formatter *f, std::ostream& err); | |
7c673cae | 759 | |
f67539c2 TL |
760 | // track metadata reported by win_election() |
761 | std::map<int, Metadata> mon_metadata; | |
762 | std::map<int, Metadata> pending_metadata; | |
7c673cae FG |
763 | |
764 | /** | |
765 | * | |
766 | */ | |
767 | struct health_cache_t { | |
768 | health_status_t overall; | |
f67539c2 | 769 | std::string summary; |
7c673cae FG |
770 | |
771 | void reset() { | |
772 | // health_status_t doesn't really have a NONE value and we're not | |
773 | // okay with setting something else (say, HEALTH_ERR). so just | |
774 | // leave it be. | |
775 | summary.clear(); | |
776 | } | |
777 | } health_status_cache; | |
778 | ||
779 | Context *health_tick_event = nullptr; | |
780 | Context *health_interval_event = nullptr; | |
781 | ||
782 | void health_tick_start(); | |
783 | void health_tick_stop(); | |
9f95a23c | 784 | ceph::real_clock::time_point health_interval_calc_next_update(); |
7c673cae FG |
785 | void health_interval_start(); |
786 | void health_interval_stop(); | |
787 | void health_events_cleanup(); | |
788 | ||
789 | void health_to_clog_update_conf(const std::set<std::string> &changed); | |
790 | ||
791 | void do_health_to_clog_interval(); | |
792 | void do_health_to_clog(bool force = false); | |
793 | ||
224ce89b WB |
794 | void log_health( |
795 | const health_check_map_t& updated, | |
796 | const health_check_map_t& previous, | |
797 | MonitorDBStore::TransactionRef t); | |
798 | ||
2a845540 TL |
799 | void update_pending_metadata(); |
800 | ||
181888fb FG |
801 | protected: |
802 | ||
803 | class HealthCheckLogStatus { | |
804 | public: | |
805 | health_status_t severity; | |
806 | std::string last_message; | |
807 | utime_t updated_at = 0; | |
808 | HealthCheckLogStatus(health_status_t severity_, | |
809 | const std::string &last_message_, | |
810 | utime_t updated_at_) | |
811 | : severity(severity_), | |
812 | last_message(last_message_), | |
813 | updated_at(updated_at_) | |
814 | {} | |
815 | }; | |
816 | std::map<std::string, HealthCheckLogStatus> health_check_log_times; | |
817 | ||
818 | public: | |
819 | ||
f67539c2 TL |
820 | void get_cluster_status(std::stringstream &ss, ceph::Formatter *f, |
821 | MonSession *session); | |
7c673cae | 822 | |
f67539c2 TL |
823 | void reply_command(MonOpRequestRef op, int rc, const std::string &rs, version_t version); |
824 | void reply_command(MonOpRequestRef op, int rc, const std::string &rs, ceph::buffer::list& rdata, version_t version); | |
7c673cae | 825 | |
f67539c2 | 826 | void reply_tell_command(MonOpRequestRef op, int rc, const std::string &rs); |
9f95a23c TL |
827 | |
828 | ||
7c673cae FG |
829 | |
830 | void handle_probe(MonOpRequestRef op); | |
831 | /** | |
832 | * Handle a Probe Operation, replying with our name, quorum and known versions. | |
833 | * | |
834 | * We use the MMonProbe message class for anything and everything related with | |
835 | * Monitor probing. One of the operations relates directly with the probing | |
836 | * itself, in which we receive a probe request and to which we reply with | |
837 | * our name, our quorum and the known versions for each Paxos service. Thus the | |
838 | * redundant function name. This reply will obviously be sent to the one | |
839 | * probing/requesting these infos. | |
840 | * | |
841 | * @todo Add @pre and @post | |
842 | * | |
843 | * @param m A Probe message, with an operation of type Probe. | |
844 | */ | |
845 | void handle_probe_probe(MonOpRequestRef op); | |
846 | void handle_probe_reply(MonOpRequestRef op); | |
847 | ||
848 | // request routing | |
849 | struct RoutedRequest { | |
850 | uint64_t tid; | |
f67539c2 | 851 | ceph::buffer::list request_bl; |
7c673cae FG |
852 | MonSession *session; |
853 | ConnectionRef con; | |
854 | uint64_t con_features; | |
7c673cae FG |
855 | MonOpRequestRef op; |
856 | ||
857 | RoutedRequest() : tid(0), session(NULL), con_features(0) {} | |
858 | ~RoutedRequest() { | |
859 | if (session) | |
860 | session->put(); | |
861 | } | |
862 | }; | |
863 | uint64_t routed_request_tid; | |
f67539c2 TL |
864 | std::map<uint64_t, RoutedRequest*> routed_requests; |
865 | ||
7c673cae FG |
866 | void forward_request_leader(MonOpRequestRef op); |
867 | void handle_forward(MonOpRequestRef op); | |
7c673cae FG |
868 | void send_reply(MonOpRequestRef op, Message *reply); |
869 | void no_reply(MonOpRequestRef op); | |
870 | void resend_routed_requests(); | |
871 | void remove_session(MonSession *s); | |
872 | void remove_all_sessions(); | |
873 | void waitlist_or_zap_client(MonOpRequestRef op); | |
874 | ||
11fdf7f2 | 875 | void send_mon_message(Message *m, int rank); |
b3b6e05e TL |
876 | /** can_change_external_state if we can do things like |
877 | * call elections as a result of the new map. | |
878 | */ | |
39ae355f | 879 | void notify_new_monmap(bool can_change_external_state=false, bool remove_rank_elector=true); |
7c673cae FG |
880 | |
881 | public: | |
882 | struct C_Command : public C_MonOp { | |
f67539c2 | 883 | Monitor &mon; |
7c673cae | 884 | int rc; |
f67539c2 TL |
885 | std::string rs; |
886 | ceph::buffer::list rdata; | |
7c673cae | 887 | version_t version; |
f67539c2 | 888 | C_Command(Monitor &_mm, MonOpRequestRef _op, int r, std::string s, version_t v) : |
7c673cae | 889 | C_MonOp(_op), mon(_mm), rc(r), rs(s), version(v){} |
f67539c2 | 890 | C_Command(Monitor &_mm, MonOpRequestRef _op, int r, std::string s, ceph::buffer::list rd, version_t v) : |
7c673cae FG |
891 | C_MonOp(_op), mon(_mm), rc(r), rs(s), rdata(rd), version(v){} |
892 | ||
893 | void _finish(int r) override { | |
9f95a23c | 894 | auto m = op->get_req<MMonCommand>(); |
7c673cae | 895 | if (r >= 0) { |
f67539c2 | 896 | std::ostringstream ss; |
7c673cae FG |
897 | if (!op->get_req()->get_connection()) { |
898 | ss << "connection dropped for command "; | |
899 | } else { | |
900 | MonSession *s = op->get_session(); | |
901 | ||
902 | // if client drops we may not have a session to draw information from. | |
903 | if (s) { | |
11fdf7f2 | 904 | ss << "from='" << s->name << " " << s->addrs << "' " |
7c673cae FG |
905 | << "entity='" << s->entity_name << "' "; |
906 | } else { | |
907 | ss << "session dropped for command "; | |
908 | } | |
909 | } | |
adb31ebb | 910 | cmdmap_t cmdmap; |
f67539c2 | 911 | std::ostringstream ds; |
20effc67 | 912 | std::string prefix; |
adb31ebb TL |
913 | cmdmap_from_json(m->cmd, &cmdmap, ds); |
914 | cmd_getval(cmdmap, "prefix", prefix); | |
915 | if (prefix != "config set" && prefix != "config-key set") | |
916 | ss << "cmd='" << m->cmd << "': finished"; | |
7c673cae | 917 | |
f67539c2 TL |
918 | mon.audit_clog->info() << ss.str(); |
919 | mon.reply_command(op, rc, rs, rdata, version); | |
7c673cae FG |
920 | } |
921 | else if (r == -ECANCELED) | |
922 | return; | |
923 | else if (r == -EAGAIN) | |
f67539c2 | 924 | mon.dispatch_op(op); |
7c673cae | 925 | else |
11fdf7f2 | 926 | ceph_abort_msg("bad C_Command return value"); |
7c673cae FG |
927 | } |
928 | }; | |
929 | ||
930 | private: | |
931 | class C_RetryMessage : public C_MonOp { | |
932 | Monitor *mon; | |
933 | public: | |
934 | C_RetryMessage(Monitor *m, MonOpRequestRef op) : | |
935 | C_MonOp(op), mon(m) { } | |
936 | ||
937 | void _finish(int r) override { | |
938 | if (r == -EAGAIN || r >= 0) | |
939 | mon->dispatch_op(op); | |
940 | else if (r == -ECANCELED) | |
941 | return; | |
942 | else | |
11fdf7f2 | 943 | ceph_abort_msg("bad C_RetryMessage return value"); |
7c673cae FG |
944 | } |
945 | }; | |
946 | ||
947 | //ms_dispatch handles a lot of logic and we want to reuse it | |
948 | //on forwarded messages, so we create a non-locking version for this class | |
949 | void _ms_dispatch(Message *m); | |
950 | bool ms_dispatch(Message *m) override { | |
9f95a23c | 951 | std::lock_guard l{lock}; |
7c673cae | 952 | _ms_dispatch(m); |
7c673cae FG |
953 | return true; |
954 | } | |
955 | void dispatch_op(MonOpRequestRef op); | |
956 | //mon_caps is used for un-connected messages from monitors | |
11fdf7f2 | 957 | MonCap mon_caps; |
9f95a23c | 958 | bool get_authorizer(int dest_type, AuthAuthorizer **authorizer); |
11fdf7f2 | 959 | public: // for AuthMonitor msgr1: |
aee94f69 | 960 | int ms_handle_fast_authentication(Connection *con) override; |
11fdf7f2 TL |
961 | private: |
962 | void ms_handle_accept(Connection *con) override; | |
7c673cae FG |
963 | bool ms_handle_reset(Connection *con) override; |
964 | void ms_handle_remote_reset(Connection *con) override {} | |
965 | bool ms_handle_refused(Connection *con) override; | |
966 | ||
11fdf7f2 TL |
967 | // AuthClient |
968 | int get_auth_request( | |
969 | Connection *con, | |
970 | AuthConnectionMeta *auth_meta, | |
971 | uint32_t *method, | |
f67539c2 TL |
972 | std::vector<uint32_t> *preferred_modes, |
973 | ceph::buffer::list *out) override; | |
11fdf7f2 TL |
974 | int handle_auth_reply_more( |
975 | Connection *con, | |
976 | AuthConnectionMeta *auth_meta, | |
f67539c2 TL |
977 | const ceph::buffer::list& bl, |
978 | ceph::buffer::list *reply) override; | |
11fdf7f2 TL |
979 | int handle_auth_done( |
980 | Connection *con, | |
981 | AuthConnectionMeta *auth_meta, | |
982 | uint64_t global_id, | |
983 | uint32_t con_mode, | |
f67539c2 | 984 | const ceph::buffer::list& bl, |
11fdf7f2 TL |
985 | CryptoKey *session_key, |
986 | std::string *connection_secret) override; | |
987 | int handle_auth_bad_method( | |
988 | Connection *con, | |
989 | AuthConnectionMeta *auth_meta, | |
990 | uint32_t old_auth_method, | |
991 | int result, | |
992 | const std::vector<uint32_t>& allowed_methods, | |
993 | const std::vector<uint32_t>& allowed_modes) override; | |
994 | // /AuthClient | |
995 | // AuthServer | |
996 | int handle_auth_request( | |
997 | Connection *con, | |
998 | AuthConnectionMeta *auth_meta, | |
999 | bool more, | |
1000 | uint32_t auth_method, | |
f67539c2 TL |
1001 | const ceph::buffer::list& bl, |
1002 | ceph::buffer::list *reply) override; | |
11fdf7f2 TL |
1003 | // /AuthServer |
1004 | ||
f67539c2 | 1005 | int write_default_keyring(ceph::buffer::list& bl); |
7c673cae FG |
1006 | void extract_save_mon_key(KeyRing& keyring); |
1007 | ||
224ce89b | 1008 | void collect_metadata(Metadata *m); |
224ce89b | 1009 | int load_metadata(); |
f67539c2 TL |
1010 | void count_metadata(const std::string& field, ceph::Formatter *f); |
1011 | void count_metadata(const std::string& field, std::map<std::string,int> *out); | |
1012 | // get_all_versions() gathers version information from daemons for health check | |
20effc67 TL |
1013 | void get_all_versions(std::map<std::string, std::list<std::string>> &versions); |
1014 | void get_versions(std::map<std::string, std::list<std::string>> &versions); | |
7c673cae FG |
1015 | |
1016 | // features | |
1017 | static CompatSet get_initial_supported_features(); | |
1018 | static CompatSet get_supported_features(); | |
1019 | static CompatSet get_legacy_features(); | |
1020 | /// read the ondisk features into the CompatSet pointed to by read_features | |
1021 | static void read_features_off_disk(MonitorDBStore *store, CompatSet *read_features); | |
1022 | void read_features(); | |
1023 | void write_features(MonitorDBStore::TransactionRef t); | |
1024 | ||
1025 | OpTracker op_tracker; | |
1026 | ||
1027 | public: | |
f67539c2 | 1028 | Monitor(CephContext *cct_, std::string nm, MonitorDBStore *s, |
7c673cae FG |
1029 | Messenger *m, Messenger *mgr_m, MonMap *map); |
1030 | ~Monitor() override; | |
1031 | ||
1032 | static int check_features(MonitorDBStore *store); | |
1033 | ||
1034 | // config observer | |
1035 | const char** get_tracked_conf_keys() const override; | |
11fdf7f2 | 1036 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
1037 | const std::set<std::string> &changed) override; |
1038 | ||
1039 | void update_log_clients(); | |
1040 | int sanitize_options(); | |
1041 | int preinit(); | |
1042 | int init(); | |
1043 | void init_paxos(); | |
1044 | void refresh_from_paxos(bool *need_bootstrap); | |
1045 | void shutdown(); | |
1046 | void tick(); | |
1047 | ||
1048 | void handle_signal(int sig); | |
1049 | ||
f67539c2 | 1050 | int mkfs(ceph::buffer::list& osdmapbl); |
7c673cae FG |
1051 | |
1052 | /** | |
1053 | * check cluster_fsid file | |
1054 | * | |
1055 | * @return EEXIST if file exists and doesn't match, 0 on match, or negative error code | |
1056 | */ | |
1057 | int check_fsid(); | |
1058 | ||
1059 | /** | |
1060 | * write cluster_fsid file | |
1061 | * | |
1062 | * @return 0 on success, or negative error code | |
1063 | */ | |
1064 | int write_fsid(); | |
1065 | int write_fsid(MonitorDBStore::TransactionRef t); | |
1066 | ||
9f95a23c | 1067 | int do_admin_command(std::string_view command, const cmdmap_t& cmdmap, |
f67539c2 | 1068 | ceph::Formatter *f, |
9f95a23c TL |
1069 | std::ostream& err, |
1070 | std::ostream& out); | |
7c673cae FG |
1071 | |
1072 | private: | |
1073 | // don't allow copying | |
1074 | Monitor(const Monitor& rhs); | |
1075 | Monitor& operator=(const Monitor &rhs); | |
1076 | ||
1077 | public: | |
c07f9fc5 | 1078 | static void format_command_descriptions(const std::vector<MonCommand> &commands, |
f67539c2 | 1079 | ceph::Formatter *f, |
11fdf7f2 | 1080 | uint64_t features, |
f67539c2 | 1081 | ceph::buffer::list *rdata); |
d2e6a577 FG |
1082 | |
1083 | const std::vector<MonCommand> &get_local_commands(mon_feature_t f) { | |
11fdf7f2 | 1084 | if (f.contains_all(ceph::features::mon::FEATURE_NAUTILUS)) { |
d2e6a577 | 1085 | return local_mon_commands; |
11fdf7f2 TL |
1086 | } else { |
1087 | return prenautilus_local_mon_commands; | |
1088 | } | |
d2e6a577 | 1089 | } |
f67539c2 | 1090 | const ceph::buffer::list& get_local_commands_bl(mon_feature_t f) { |
11fdf7f2 | 1091 | if (f.contains_all(ceph::features::mon::FEATURE_NAUTILUS)) { |
d2e6a577 | 1092 | return local_mon_commands_bl; |
11fdf7f2 TL |
1093 | } else { |
1094 | return prenautilus_local_mon_commands_bl; | |
1095 | } | |
d2e6a577 FG |
1096 | } |
1097 | void set_leader_commands(const std::vector<MonCommand>& cmds) { | |
1098 | leader_mon_commands = cmds; | |
1099 | } | |
1100 | ||
11fdf7f2 | 1101 | bool is_keyring_required(); |
7c673cae FG |
1102 | }; |
1103 | ||
1104 | #define CEPH_MON_FEATURE_INCOMPAT_BASE CompatSet::Feature (1, "initial feature set (~v.18)") | |
1105 | #define CEPH_MON_FEATURE_INCOMPAT_GV CompatSet::Feature (2, "global version sequencing (v0.52)") | |
1106 | #define CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS CompatSet::Feature (3, "single paxos with k/v store (v0.\?)") | |
1107 | #define CEPH_MON_FEATURE_INCOMPAT_OSD_ERASURE_CODES CompatSet::Feature(4, "support erasure code pools") | |
1108 | #define CEPH_MON_FEATURE_INCOMPAT_OSDMAP_ENC CompatSet::Feature(5, "new-style osdmap encoding") | |
1109 | #define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V2 CompatSet::Feature(6, "support isa/lrc erasure code") | |
1110 | #define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V3 CompatSet::Feature(7, "support shec erasure code") | |
1111 | #define CEPH_MON_FEATURE_INCOMPAT_KRAKEN CompatSet::Feature(8, "support monmap features") | |
181888fb | 1112 | #define CEPH_MON_FEATURE_INCOMPAT_LUMINOUS CompatSet::Feature(9, "luminous ondisk layout") |
11fdf7f2 TL |
1113 | #define CEPH_MON_FEATURE_INCOMPAT_MIMIC CompatSet::Feature(10, "mimic ondisk layout") |
1114 | #define CEPH_MON_FEATURE_INCOMPAT_NAUTILUS CompatSet::Feature(11, "nautilus ondisk layout") | |
9f95a23c | 1115 | #define CEPH_MON_FEATURE_INCOMPAT_OCTOPUS CompatSet::Feature(12, "octopus ondisk layout") |
f67539c2 | 1116 | #define CEPH_MON_FEATURE_INCOMPAT_PACIFIC CompatSet::Feature(13, "pacific ondisk layout") |
20effc67 | 1117 | #define CEPH_MON_FEATURE_INCOMPAT_QUINCY CompatSet::Feature(14, "quincy ondisk layout") |
1e59de90 | 1118 | #define CEPH_MON_FEATURE_INCOMPAT_REEF CompatSet::Feature(15, "reef ondisk layout") |
7c673cae FG |
1119 | // make sure you add your feature to Monitor::get_supported_features |
1120 | ||
7c673cae | 1121 | |
9f95a23c TL |
1122 | /* Callers use: |
1123 | * | |
1124 | * new C_MonContext{...} | |
1125 | * | |
1126 | * instead of | |
1127 | * | |
1128 | * new C_MonContext(...) | |
1129 | * | |
1130 | * because of gcc bug [1]. | |
1131 | * | |
1132 | * [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85883 | |
1133 | */ | |
1134 | template<typename T> | |
1135 | class C_MonContext : public LambdaContext<T> { | |
1136 | public: | |
1137 | C_MonContext(const Monitor* m, T&& f) : | |
1138 | LambdaContext<T>(std::forward<T>(f)), | |
1139 | mon(m) | |
1140 | {} | |
1141 | void finish(int r) override { | |
1142 | if (mon->is_shutdown()) | |
1143 | return; | |
1144 | LambdaContext<T>::finish(r); | |
1145 | } | |
1146 | private: | |
1147 | const Monitor* mon; | |
1148 | }; | |
7c673cae FG |
1149 | |
1150 | #endif |