1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
16 * This is the top level monitor. It runs on each machine in the Monitor
17 * Cluster. The election of a leader for the paxos algorithm only happens
18 * once per machine via the elector. There is a separate paxos instance (state)
19 * kept for each of the system components: Object Store Device (OSD) Monitor,
20 * Placement Group (PG) Monitor, Metadata Server (MDS) Monitor, and Client Monitor.
23 #ifndef CEPH_MONITOR_H
24 #define CEPH_MONITOR_H
29 #include "include/types.h"
30 #include "msg/Messenger.h"
32 #include "common/Timer.h"
38 #include "PGStatService.h"
40 #include "common/LogClient.h"
41 #include "auth/cephx/CephxKeyServer.h"
42 #include "auth/AuthMethodList.h"
43 #include "auth/KeyRing.h"
44 #include "messages/MMonCommand.h"
45 #include "mon/MonitorDBStore.h"
46 #include "include/memory.h"
47 #include "mgr/MgrClient.h"
49 #include "mon/MonOpRequest.h"
50 #include "common/WorkQueue.h"
53 #define CEPH_MON_PROTOCOL 13 /* cluster internal */
57 l_cluster_first
= 555000,
59 l_cluster_num_mon_quorum
,
65 l_cluster_osd_bytes_used
,
66 l_cluster_osd_bytes_avail
,
69 l_cluster_num_pg_active_clean
,
70 l_cluster_num_pg_active
,
71 l_cluster_num_pg_peering
,
73 l_cluster_num_object_degraded
,
74 l_cluster_num_object_misplaced
,
75 l_cluster_num_object_unfound
,
79 l_cluster_num_mds_failed
,
101 class AdminSocketHook
;
104 class MMonGetVersion
;
109 struct MMonSubscribe
;
116 #define COMPAT_SET_LOC "feature_set"
118 class C_MonContext final
: public FunctionContext
{
121 explicit C_MonContext(Monitor
*m
, boost::function
<void(int)>&& callback
)
122 : FunctionContext(std::move(callback
)), mon(m
) {}
123 void finish(int r
) override
;
126 class Monitor
: public Dispatcher
,
127 public md_config_obs_t
{
132 Messenger
*messenger
;
133 ConnectionRef con_self
;
137 ThreadPool cpu_tp
; ///< threadpool for CPU intensive work
139 /// true if we have ever joined a quorum. if false, we are either a
140 /// new cluster, a newly joining monitor, or a just-upgraded
142 bool has_ever_joined
;
144 PerfCounters
*logger
, *cluster_logger
;
145 bool cluster_logger_registered
;
147 void register_cluster_logger();
148 void unregister_cluster_logger();
153 set
<entity_addr_t
> extra_probe_peers
;
155 LogClient log_client
;
157 LogChannelRef audit_clog
;
159 KeyServer key_server
;
161 AuthMethodList auth_cluster_required
;
162 AuthMethodList auth_service_required
;
166 const MonCommand
*leader_supported_mon_commands
;
167 int leader_supported_mon_commands_size
;
169 Messenger
*mgr_messenger
;
170 MgrClient mgr_client
;
171 uint64_t mgr_proxy_bytes
= 0; // in-flight proxied mgr command message bytes
173 const MonPGStatService
*pgservice
;
178 // -- local storage --
180 MonitorDBStore
*store
;
181 static const string MONITOR_NAME
;
182 static const string MONITOR_STORE_PREFIX
;
184 // -- monitor state --
197 static const char *get_state_name(int s
) {
199 case STATE_PROBING
: return "probing";
200 case STATE_SYNCHRONIZING
: return "synchronizing";
201 case STATE_ELECTING
: return "electing";
202 case STATE_LEADER
: return "leader";
203 case STATE_PEON
: return "peon";
204 case STATE_SHUTDOWN
: return "shutdown";
205 default: return "???";
208 const char *get_state_name() const {
209 return get_state_name(state
);
212 bool is_shutdown() const { return state
== STATE_SHUTDOWN
; }
213 bool is_probing() const { return state
== STATE_PROBING
; }
214 bool is_synchronizing() const { return state
== STATE_SYNCHRONIZING
; }
215 bool is_electing() const { return state
== STATE_ELECTING
; }
216 bool is_leader() const { return state
== STATE_LEADER
; }
217 bool is_peon() const { return state
== STATE_PEON
; }
219 const utime_t
&get_leader_since() const;
221 void prepare_new_fingerprint(MonitorDBStore::TransactionRef t
);
227 friend class Elector
;
229 /// features we require of peers (based on on-disk compatset)
230 uint64_t required_features
;
232 int leader
; // current leader (to best of knowledge)
233 set
<int> quorum
; // current active set of monitors (if !starting)
234 utime_t leader_since
; // when this monitor became the leader, if it is the leader
235 utime_t exited_quorum
; // time detected as not in quorum; 0 if in
237 // map of counts of connected clients, by type and features, for
239 map
<int,FeatureMap
> quorum_feature_map
;
242 * Intersection of quorum member's connection feature bits.
244 uint64_t quorum_con_features
;
246 * Intersection of quorum members mon-specific feature bits
248 mon_feature_t quorum_mon_features
;
249 bufferlist supported_commands_bl
; // encoded MonCommands we support
251 set
<string
> outside_quorum
;
254 * @defgroup Monitor_h_scrub
257 version_t scrub_version
; ///< paxos version we are scrubbing
258 map
<int,ScrubResult
> scrub_result
; ///< results so far
261 * trigger a cross-mon scrub
263 * Verify all mons are storing identical content
267 void handle_scrub(MonOpRequestRef op
);
268 bool _scrub(ScrubResult
*r
,
269 pair
<string
,string
> *start
,
271 void scrub_check_results();
272 void scrub_timeout();
275 void scrub_update_interval(int secs
);
277 Context
*scrub_event
; ///< periodic event to trigger scrub (leader)
278 Context
*scrub_timeout_event
; ///< scrub round timeout (leader)
279 void scrub_event_start();
280 void scrub_event_cancel();
281 void scrub_reset_timeout();
282 void scrub_cancel_timeout();
285 pair
<string
,string
> last_key
; ///< last scrubbed key
288 ScrubState() : finished(false) { }
289 virtual ~ScrubState() { }
291 ceph::shared_ptr
<ScrubState
> scrub_state
; ///< keeps track of current scrub
294 * @defgroup Monitor_h_sync Synchronization
298 * @} // provider state
300 struct SyncProvider
{
301 entity_inst_t entity
; ///< who
302 uint64_t cookie
; ///< unique cookie for this sync attempt
303 utime_t timeout
; ///< when we give up and expire this attempt
304 version_t last_committed
; ///< last paxos version on peer
305 pair
<string
,string
> last_key
; ///< last key sent to (or on) peer
306 bool full
; ///< full scan?
307 MonitorDBStore::Synchronizer synchronizer
; ///< iterator
309 SyncProvider() : cookie(0), last_committed(0), full(false) {}
311 void reset_timeout(CephContext
*cct
, int grace
) {
312 timeout
= ceph_clock_now();
317 map
<uint64_t, SyncProvider
> sync_providers
; ///< cookie -> SyncProvider for those syncing from us
318 uint64_t sync_provider_count
; ///< counter for issued cookies to keep them unique
321 * @} // requester state
323 entity_inst_t sync_provider
; ///< who we are syncing from
324 uint64_t sync_cookie
; ///< 0 if we are starting, non-zero otherwise
325 bool sync_full
; ///< true if we are a full sync, false for recent catch-up
326 version_t sync_start_version
; ///< last_committed at sync start
327 Context
*sync_timeout_event
; ///< timeout event
330 * floor for sync source
332 * When we sync we forget about our old last_committed value which
333 * can be dangerous. For example, if we have a cluster of:
339 * If something forces us to sync (say, corruption, or manual
340 * intervention, or bug), we forget last_committed, and might abort.
341 * If mon.a happens to be down when we come back, we will see:
346 * and sync from mon.b, at which point a+b will both have lc 80 and
347 * come online with a majority holding out of date commits.
349 * Avoid this by preserving our old last_committed value prior to
350 * sync and never going backwards.
352 version_t sync_last_committed_floor
;
355 * Obtain the synchronization target prefixes in set form.
357 * We consider a target prefix all those that are relevant when
358 * synchronizing two stores. That is, all those that hold paxos service's
359 * versions, as well as paxos versions, or any control keys such as the
360 * first or last committed version.
362 * Given the current design, this function should return the name of all and
363 * any available paxos service, plus the paxos name.
365 * @returns a set of strings referring to the prefixes being synchronized
367 set
<string
> get_sync_targets_names();
370 * Reset the monitor's sync-related data structures for syncing *from* a peer
372 void sync_reset_requester();
375 * Reset sync state related to allowing others to sync from us
377 void sync_reset_provider();
380 * Caled when a sync attempt times out (requester-side)
385 * Get the latest monmap for backup purposes during sync
387 void sync_obtain_latest_monmap(bufferlist
&bl
);
392 * Start pulling committed state from another monitor.
394 * @param entity where to pull committed state from
395 * @param full whether to do a full sync or just catch up on recent paxos
397 void sync_start(entity_inst_t
&entity
, bool full
);
401 * force a sync on next mon restart
403 void sync_force(Formatter
*f
, ostream
& ss
);
407 * store critical state for safekeeping during sync
409 * We store a few things on the side that we don't want to get clobbered by sync. This
410 * includes the latest monmap and a lower bound on last_committed.
412 void sync_stash_critical_state(MonitorDBStore::TransactionRef tx
);
415 * reset the sync timeout
417 * This is used on the client to restart if things aren't progressing
419 void sync_reset_timeout();
422 * trim stale sync provider state
424 * If someone is syncing from us and hasn't talked to us recently, expire their state.
426 void sync_trim_providers();
431 * Finish up a sync after we've gotten all of the chunks.
433 * @param last_committed final last_committed value from provider
435 void sync_finish(version_t last_committed
);
438 * request the next chunk from the provider
440 void sync_get_next_chunk();
443 * handle sync message
445 * @param m Sync message with operation type MMonSync::OP_START_CHUNKS
447 void handle_sync(MonOpRequestRef op
);
449 void _sync_reply_no_cookie(MonOpRequestRef op
);
451 void handle_sync_get_cookie(MonOpRequestRef op
);
452 void handle_sync_get_chunk(MonOpRequestRef op
);
453 void handle_sync_finish(MonOpRequestRef op
);
455 void handle_sync_cookie(MonOpRequestRef op
);
456 void handle_sync_forward(MonOpRequestRef op
);
457 void handle_sync_chunk(MonOpRequestRef op
);
458 void handle_sync_no_cookie(MonOpRequestRef op
);
461 * @} // Synchronization
464 list
<Context
*> waitfor_quorum
;
465 list
<Context
*> maybe_wait_for_quorum
;
468 * @defgroup Monitor_h_TimeCheck Monitor Clock Drift Early Warning System
471 * We use time checks to keep track of any clock drifting going on in the
472 * cluster. This is accomplished by periodically ping each monitor in the
473 * quorum and register its response time on a map, assessing how much its
474 * clock has drifted. We also take this opportunity to assess the latency
477 * This mechanism works as follows:
479 * - Leader sends out a 'PING' message to each other monitor in the quorum.
480 * The message is timestamped with the leader's current time. The leader's
481 * current time is recorded in a map, associated with each peon's
483 * - The peon replies to the leader with a timestamped 'PONG' message.
484 * - The leader calculates a delta between the peon's timestamp and its
485 * current time and stashes it.
486 * - The leader also calculates the time it took to receive the 'PONG'
487 * since the 'PING' was sent, and stashes an approximate latency estimate.
488 * - Once all the quorum members have pong'ed, the leader will share the
489 * clock skew and latency maps with all the monitors in the quorum.
491 map
<entity_inst_t
, utime_t
> timecheck_waiting
;
492 map
<entity_inst_t
, double> timecheck_skews
;
493 map
<entity_inst_t
, double> timecheck_latencies
;
494 // odd value means we are mid-round; even value means the round has
496 version_t timecheck_round
;
497 unsigned int timecheck_acks
;
498 utime_t timecheck_round_start
;
499 /* When we hit a skew we will start a new round based off of
500 * 'mon_timecheck_skew_interval'. Each new round will be backed off
501 * until we hit 'mon_timecheck_interval' -- which is the typical
502 * interval when not in the presence of a skew.
504 * This variable tracks the number of rounds with skews since last clean
505 * so that we can report to the user and properly adjust the backoff.
507 uint64_t timecheck_rounds_since_clean
;
511 Context
*timecheck_event
;
513 void timecheck_start();
514 void timecheck_finish();
515 void timecheck_start_round();
516 void timecheck_finish_round(bool success
= true);
517 void timecheck_cancel_round();
518 void timecheck_cleanup();
519 void timecheck_reset_event();
520 void timecheck_check_skews();
521 void timecheck_report();
523 health_status_t
timecheck_status(ostringstream
&ss
,
524 const double skew_bound
,
525 const double latency
);
526 void handle_timecheck_leader(MonOpRequestRef op
);
527 void handle_timecheck_peon(MonOpRequestRef op
);
528 void handle_timecheck(MonOpRequestRef op
);
531 * Returns 'true' if this is considered to be a skew; 'false' otherwise.
533 bool timecheck_has_skew(const double skew_bound
, double *abs
) const {
534 double abs_skew
= std::fabs(skew_bound
);
537 return (abs_skew
> g_conf
->mon_clock_drift_allowed
);
544 * Handle ping messages from others.
546 void handle_ping(MonOpRequestRef op
);
548 Context
*probe_timeout_event
= nullptr; // for probing
550 void reset_probe_timeout();
551 void cancel_probe_timeout();
552 void probe_timeout(int r
);
554 void _apply_compatset_features(CompatSet
&new_features
);
558 int get_leader() const { return leader
; }
559 const set
<int>& get_quorum() const { return quorum
; }
560 list
<string
> get_quorum_names() {
562 for (set
<int>::iterator p
= quorum
.begin(); p
!= quorum
.end(); ++p
)
563 q
.push_back(monmap
->get_name(*p
));
566 uint64_t get_quorum_con_features() const {
567 return quorum_con_features
;
569 mon_feature_t
get_quorum_mon_features() const {
570 return quorum_mon_features
;
572 uint64_t get_required_features() const {
573 return required_features
;
575 mon_feature_t
get_required_mon_features() const {
576 return monmap
->get_required_features();
578 void apply_quorum_to_compatset_features();
579 void apply_monmap_to_compatset_features();
580 void calc_quorum_requirements();
582 void get_combined_feature_map(FeatureMap
*fm
);
585 void _reset(); ///< called from bootstrap, start_, or join_election
586 void wait_for_paxos_write();
587 void _finish_svc_election(); ///< called by {win,lose}_election
590 void join_election();
591 void start_election();
592 void win_standalone_election();
593 // end election (called by Elector)
594 void win_election(epoch_t epoch
, set
<int>& q
,
596 const mon_feature_t
& mon_features
,
597 const MonCommand
*cmdset
, int cmdsize
);
598 void lose_election(epoch_t epoch
, set
<int>& q
, int l
,
600 const mon_feature_t
& mon_features
);
601 // end election (called by Elector)
602 void finish_election();
604 const bufferlist
& get_supported_commands_bl() {
605 return supported_commands_bl
;
608 void update_logger();
611 * Vector holding the Services serviced by this Monitor.
613 vector
<PaxosService
*> paxos_service
;
615 PaxosService
*get_paxos_service_by_name(const string
& name
);
617 class PGMonitor
*pgmon() {
618 return (class PGMonitor
*)paxos_service
[PAXOS_PGMAP
];
621 class MDSMonitor
*mdsmon() {
622 return (class MDSMonitor
*)paxos_service
[PAXOS_MDSMAP
];
625 class MonmapMonitor
*monmon() {
626 return (class MonmapMonitor
*)paxos_service
[PAXOS_MONMAP
];
629 class OSDMonitor
*osdmon() {
630 return (class OSDMonitor
*)paxos_service
[PAXOS_OSDMAP
];
633 class AuthMonitor
*authmon() {
634 return (class AuthMonitor
*)paxos_service
[PAXOS_AUTH
];
637 class LogMonitor
*logmon() {
638 return (class LogMonitor
*) paxos_service
[PAXOS_LOG
];
641 class MgrMonitor
*mgrmon() {
642 return (class MgrMonitor
*) paxos_service
[PAXOS_MGR
];
645 class MgrStatMonitor
*mgrstatmon() {
646 return (class MgrStatMonitor
*) paxos_service
[PAXOS_MGRSTAT
];
650 friend class OSDMonitor
;
651 friend class MDSMonitor
;
652 friend class MonmapMonitor
;
653 friend class PGMonitor
;
654 friend class LogMonitor
;
655 friend class ConfigKeyService
;
657 QuorumService
*health_monitor
;
658 QuorumService
*config_key_service
;
661 MonSessionMap session_map
;
662 Mutex session_map_lock
{"Monitor::session_map_lock"};
663 AdminSocketHook
*admin_hook
;
665 template<typename Func
, typename
...Args
>
666 void with_session_map(Func
&& func
) {
667 Mutex::Locker
l(session_map_lock
);
668 std::forward
<Func
>(func
)(session_map
);
670 void send_latest_monmap(Connection
*con
);
673 void handle_get_version(MonOpRequestRef op
);
674 void handle_subscribe(MonOpRequestRef op
);
675 void handle_mon_get_map(MonOpRequestRef op
);
677 static void _generate_command_map(map
<string
,cmd_vartype
>& cmdmap
,
678 map
<string
,string
> ¶m_str_map
);
679 static const MonCommand
*_get_moncommand(const string
&cmd_prefix
,
680 MonCommand
*cmds
, int cmds_size
);
681 bool _allowed_command(MonSession
*s
, string
&module
, string
&prefix
,
682 const map
<string
,cmd_vartype
>& cmdmap
,
683 const map
<string
,string
>& param_str_map
,
684 const MonCommand
*this_cmd
);
685 void get_mon_status(Formatter
*f
, ostream
& ss
);
686 void _quorum_status(Formatter
*f
, ostream
& ss
);
687 bool _add_bootstrap_peer_hint(string cmd
, cmdmap_t
& cmdmap
, ostream
& ss
);
688 void handle_command(MonOpRequestRef op
);
689 void handle_route(MonOpRequestRef op
);
691 void handle_mon_metadata(MonOpRequestRef op
);
692 int get_mon_metadata(int mon
, Formatter
*f
, ostream
& err
);
693 int print_nodes(Formatter
*f
, ostream
& err
);
695 // Accumulate metadata across calls to update_mon_metadata
696 map
<int, Metadata
> pending_metadata
;
701 struct health_cache_t
{
702 health_status_t overall
;
706 // health_status_t doesn't really have a NONE value and we're not
707 // okay with setting something else (say, HEALTH_ERR). so just
711 } health_status_cache
;
713 Context
*health_tick_event
= nullptr;
714 Context
*health_interval_event
= nullptr;
716 void health_tick_start();
717 void health_tick_stop();
718 utime_t
health_interval_calc_next_update();
719 void health_interval_start();
720 void health_interval_stop();
721 void health_events_cleanup();
723 void health_to_clog_update_conf(const std::set
<std::string
> &changed
);
725 void do_health_to_clog_interval();
726 void do_health_to_clog(bool force
= false);
729 * Generate health report
731 * @param status one-line status summary
732 * @param detailbl optional bufferlist* to fill with a detailed report
733 * @returns health status
735 health_status_t
get_health(list
<string
>& status
, bufferlist
*detailbl
,
737 void get_cluster_status(stringstream
&ss
, Formatter
*f
);
739 void reply_command(MonOpRequestRef op
, int rc
, const string
&rs
, version_t version
);
740 void reply_command(MonOpRequestRef op
, int rc
, const string
&rs
, bufferlist
& rdata
, version_t version
);
743 void handle_probe(MonOpRequestRef op
);
745 * Handle a Probe Operation, replying with our name, quorum and known versions.
747 * We use the MMonProbe message class for anything and everything related with
748 * Monitor probing. One of the operations relates directly with the probing
749 * itself, in which we receive a probe request and to which we reply with
750 * our name, our quorum and the known versions for each Paxos service. Thus the
751 * redundant function name. This reply will obviously be sent to the one
752 * probing/requesting these infos.
754 * @todo Add @pre and @post
756 * @param m A Probe message, with an operation of type Probe.
758 void handle_probe_probe(MonOpRequestRef op
);
759 void handle_probe_reply(MonOpRequestRef op
);
762 struct RoutedRequest
{
764 bufferlist request_bl
;
767 uint64_t con_features
;
768 entity_inst_t client_inst
;
771 RoutedRequest() : tid(0), session(NULL
), con_features(0) {}
777 uint64_t routed_request_tid
;
778 map
<uint64_t, RoutedRequest
*> routed_requests
;
780 void forward_request_leader(MonOpRequestRef op
);
781 void handle_forward(MonOpRequestRef op
);
782 void try_send_message(Message
*m
, const entity_inst_t
& to
);
783 void send_reply(MonOpRequestRef op
, Message
*reply
);
784 void no_reply(MonOpRequestRef op
);
785 void resend_routed_requests();
786 void remove_session(MonSession
*s
);
787 void remove_all_sessions();
788 void waitlist_or_zap_client(MonOpRequestRef op
);
790 void send_command(const entity_inst_t
& inst
,
791 const vector
<string
>& com
);
794 struct C_Command
: public C_MonOp
{
800 C_Command(Monitor
*_mm
, MonOpRequestRef _op
, int r
, string s
, version_t v
) :
801 C_MonOp(_op
), mon(_mm
), rc(r
), rs(s
), version(v
){}
802 C_Command(Monitor
*_mm
, MonOpRequestRef _op
, int r
, string s
, bufferlist rd
, version_t v
) :
803 C_MonOp(_op
), mon(_mm
), rc(r
), rs(s
), rdata(rd
), version(v
){}
805 void _finish(int r
) override
{
806 MMonCommand
*m
= static_cast<MMonCommand
*>(op
->get_req());
809 if (!op
->get_req()->get_connection()) {
810 ss
<< "connection dropped for command ";
812 MonSession
*s
= op
->get_session();
814 // if client drops we may not have a session to draw information from.
816 ss
<< "from='" << s
->inst
<< "' "
817 << "entity='" << s
->entity_name
<< "' ";
819 ss
<< "session dropped for command ";
822 ss
<< "cmd='" << m
->cmd
<< "': finished";
824 mon
->audit_clog
->info() << ss
.str();
825 mon
->reply_command(op
, rc
, rs
, rdata
, version
);
827 else if (r
== -ECANCELED
)
829 else if (r
== -EAGAIN
)
830 mon
->dispatch_op(op
);
832 assert(0 == "bad C_Command return value");
837 class C_RetryMessage
: public C_MonOp
{
840 C_RetryMessage(Monitor
*m
, MonOpRequestRef op
) :
841 C_MonOp(op
), mon(m
) { }
843 void _finish(int r
) override
{
844 if (r
== -EAGAIN
|| r
>= 0)
845 mon
->dispatch_op(op
);
846 else if (r
== -ECANCELED
)
849 assert(0 == "bad C_RetryMessage return value");
853 //ms_dispatch handles a lot of logic and we want to reuse it
854 //on forwarded messages, so we create a non-locking version for this class
855 void _ms_dispatch(Message
*m
);
856 bool ms_dispatch(Message
*m
) override
{
862 void dispatch_op(MonOpRequestRef op
);
863 //mon_caps is used for un-connected messages from monitors
865 bool ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
) override
;
866 bool ms_verify_authorizer(Connection
*con
, int peer_type
,
867 int protocol
, bufferlist
& authorizer_data
, bufferlist
& authorizer_reply
,
868 bool& isvalid
, CryptoKey
& session_key
) override
;
869 bool ms_handle_reset(Connection
*con
) override
;
870 void ms_handle_remote_reset(Connection
*con
) override
{}
871 bool ms_handle_refused(Connection
*con
) override
;
873 int write_default_keyring(bufferlist
& bl
);
874 void extract_save_mon_key(KeyRing
& keyring
);
876 void update_mon_metadata(int from
, Metadata
&& m
);
877 int load_metadata(map
<int, Metadata
>& m
);
878 void count_metadata(const string
& field
, Formatter
*f
);
881 static CompatSet
get_initial_supported_features();
882 static CompatSet
get_supported_features();
883 static CompatSet
get_legacy_features();
884 /// read the ondisk features into the CompatSet pointed to by read_features
885 static void read_features_off_disk(MonitorDBStore
*store
, CompatSet
*read_features
);
886 void read_features();
887 void write_features(MonitorDBStore::TransactionRef t
);
889 OpTracker op_tracker
;
892 Monitor(CephContext
*cct_
, string nm
, MonitorDBStore
*s
,
893 Messenger
*m
, Messenger
*mgr_m
, MonMap
*map
);
896 static int check_features(MonitorDBStore
*store
);
899 const char** get_tracked_conf_keys() const override
;
900 void handle_conf_change(const struct md_config_t
*conf
,
901 const std::set
<std::string
> &changed
) override
;
903 void update_log_clients();
904 int sanitize_options();
908 void refresh_from_paxos(bool *need_bootstrap
);
912 void handle_signal(int sig
);
914 int mkfs(bufferlist
& osdmapbl
);
917 * check cluster_fsid file
919 * @return EEXIST if file exists and doesn't match, 0 on match, or negative error code
924 * write cluster_fsid file
926 * @return 0 on success, or negative error code
929 int write_fsid(MonitorDBStore::TransactionRef t
);
931 void do_admin_command(std::string command
, cmdmap_t
& cmdmap
,
932 std::string format
, ostream
& ss
);
935 // don't allow copying
936 Monitor(const Monitor
& rhs
);
937 Monitor
& operator=(const Monitor
&rhs
);
940 static void format_command_descriptions(const MonCommand
*commands
,
941 unsigned commands_size
,
944 bool hide_mgr_flag
=false);
945 void get_locally_supported_monitor_commands(const MonCommand
**cmds
, int *count
);
946 /// the Monitor owns this pointer once you pass it in
947 void set_leader_supported_commands(const MonCommand
*cmds
, int size
);
948 static bool is_keyring_required();
951 #define CEPH_MON_FEATURE_INCOMPAT_BASE CompatSet::Feature (1, "initial feature set (~v.18)")
952 #define CEPH_MON_FEATURE_INCOMPAT_GV CompatSet::Feature (2, "global version sequencing (v0.52)")
953 #define CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS CompatSet::Feature (3, "single paxos with k/v store (v0.\?)")
954 #define CEPH_MON_FEATURE_INCOMPAT_OSD_ERASURE_CODES CompatSet::Feature(4, "support erasure code pools")
955 #define CEPH_MON_FEATURE_INCOMPAT_OSDMAP_ENC CompatSet::Feature(5, "new-style osdmap encoding")
956 #define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V2 CompatSet::Feature(6, "support isa/lrc erasure code")
957 #define CEPH_MON_FEATURE_INCOMPAT_ERASURE_CODE_PLUGINS_V3 CompatSet::Feature(7, "support shec erasure code")
958 #define CEPH_MON_FEATURE_INCOMPAT_KRAKEN CompatSet::Feature(8, "support monmap features")
959 // make sure you add your feature to Monitor::get_supported_features
970 static const uint64_t FLAG_NONE
= 0;
971 static const uint64_t FLAG_NOFORWARD
= 1 << 0;
972 static const uint64_t FLAG_OBSOLETE
= 1 << 1;
973 static const uint64_t FLAG_DEPRECATED
= 1 << 2;
974 static const uint64_t FLAG_MGR
= 1 << 3;
976 bool has_flag(uint64_t flag
) const { return (flags
& flag
) != 0; }
977 void set_flag(uint64_t flag
) { flags
|= flag
; }
978 void unset_flag(uint64_t flag
) { flags
&= ~flag
; }
980 void encode(bufferlist
&bl
) const {
982 * very naughty: deliberately unversioned because individual commands
983 * shouldn't be encoded standalone, only as a full set (which we do
984 * version, see encode_array() below).
986 ::encode(cmdstring
, bl
);
987 ::encode(helpstring
, bl
);
988 ::encode(module
, bl
);
989 ::encode(req_perms
, bl
);
990 ::encode(availability
, bl
);
992 void decode(bufferlist::iterator
&bl
) {
993 ::decode(cmdstring
, bl
);
994 ::decode(helpstring
, bl
);
995 ::decode(module
, bl
);
996 ::decode(req_perms
, bl
);
997 ::decode(availability
, bl
);
999 bool is_compat(const MonCommand
* o
) const {
1000 return cmdstring
== o
->cmdstring
&&
1001 module
== o
->module
&& req_perms
== o
->req_perms
&&
1002 availability
== o
->availability
;
1005 bool is_noforward() const {
1006 return has_flag(MonCommand::FLAG_NOFORWARD
);
1009 bool is_obsolete() const {
1010 return has_flag(MonCommand::FLAG_OBSOLETE
);
1013 bool is_deprecated() const {
1014 return has_flag(MonCommand::FLAG_DEPRECATED
);
1017 bool is_mgr() const {
1018 return has_flag(MonCommand::FLAG_MGR
);
1021 static void encode_array(const MonCommand
*cmds
, int size
, bufferlist
&bl
) {
1022 ENCODE_START(2, 1, bl
);
1025 ::encode_array_nohead(cmds
, size
, bl
);
1026 for (int i
= 0; i
< size
; i
++)
1027 ::encode(cmds
[i
].flags
, bl
);
1030 static void decode_array(MonCommand
**cmds
, int *size
,
1031 bufferlist::iterator
&bl
) {
1032 DECODE_START(2, bl
);
1036 *cmds
= new MonCommand
[*size
];
1037 ::decode_array_nohead(*cmds
, *size
, bl
);
1038 if (struct_v
>= 2) {
1039 for (int i
= 0; i
< *size
; i
++)
1040 ::decode((*cmds
)[i
].flags
, bl
);
1042 for (int i
= 0; i
< *size
; i
++)
1043 (*cmds
)[i
].flags
= 0;
1048 bool requires_perm(char p
) const {
1049 return (req_perms
.find(p
) != string::npos
);
1052 WRITE_CLASS_ENCODER(MonCommand
)