]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / mon / MonMap.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef CEPH_MONMAP_H
16#define CEPH_MONMAP_H
17
11fdf7f2
TL
18#ifdef WITH_SEASTAR
19#include <seastar/core/future.hh>
20#endif
7c673cae 21
11fdf7f2 22#include "common/config_fwd.h"
9f95a23c 23#include "common/ceph_releases.h"
11fdf7f2
TL
24
25#include "include/err.h"
7c673cae 26#include "include/types.h"
11fdf7f2 27
7c673cae 28#include "mon/mon_types.h"
11fdf7f2
TL
29#include "msg/Message.h"
30
f67539c2 31class health_check_map_t;
11fdf7f2
TL
32
33#ifdef WITH_SEASTAR
f91f0fd5 34namespace crimson::common {
11fdf7f2
TL
35 class ConfigProxy;
36}
37#endif
7c673cae
FG
38
39namespace ceph {
40 class Formatter;
41}
42
43struct mon_info_t {
44 /**
45 * monitor name
46 *
47 * i.e., 'foo' in 'mon.foo'
48 */
9f95a23c 49 std::string name;
7c673cae 50 /**
11fdf7f2 51 * monitor's public address(es)
7c673cae 52 *
11fdf7f2
TL
53 * public facing address(es), used to communicate with all clients
54 * and with other monitors.
7c673cae 55 */
11fdf7f2 56 entity_addrvec_t public_addrs;
224ce89b
WB
57 /**
58 * the priority of the mon, the lower value the more preferred
59 */
60 uint16_t priority{0};
9f95a23c 61 uint16_t weight{0};
7c673cae 62
f67539c2
TL
63 /**
64 * The location of the monitor, in CRUSH hierarchy terms
65 */
66 std::map<std::string,std::string> crush_loc;
67
11fdf7f2 68 // <REMOVE ME>
9f95a23c 69 mon_info_t(const std::string& n, const entity_addr_t& p_addr, uint16_t p)
11fdf7f2
TL
70 : name(n), public_addrs(p_addr), priority(p)
71 {}
72 // </REMOVE ME>
73
9f95a23c
TL
74 mon_info_t(const std::string& n, const entity_addrvec_t& p_addrs,
75 uint16_t p, uint16_t w)
76 : name(n), public_addrs(p_addrs), priority(p), weight(w)
224ce89b 77 {}
9f95a23c 78 mon_info_t(const std::string &n, const entity_addrvec_t& p_addrs)
11fdf7f2 79 : name(n), public_addrs(p_addrs)
7c673cae
FG
80 { }
81
82 mon_info_t() { }
83
84
9f95a23c
TL
85 void encode(ceph::buffer::list& bl, uint64_t features) const;
86 void decode(ceph::buffer::list::const_iterator& p);
87 void print(std::ostream& out) const;
7c673cae
FG
88};
89WRITE_CLASS_ENCODER_FEATURES(mon_info_t)
90
9f95a23c 91inline std::ostream& operator<<(std::ostream& out, const mon_info_t& mon) {
7c673cae
FG
92 mon.print(out);
93 return out;
94}
95
96class MonMap {
97 public:
98 epoch_t epoch; // what epoch/version of the monmap
99 uuid_d fsid;
100 utime_t last_changed;
101 utime_t created;
102
9f95a23c
TL
103 std::map<std::string, mon_info_t> mon_info;
104 std::map<entity_addr_t, std::string> addr_mons;
7c673cae 105
9f95a23c 106 std::vector<std::string> ranks;
f67539c2
TL
107 /* ranks which were removed when this map took effect.
108 There should only be one at a time, but leave support
109 for arbitrary numbers just to be safe. */
110 std::set<int> removed_ranks;
7c673cae
FG
111
112 /**
113 * Persistent Features are all those features that once set on a
114 * monmap cannot, and should not, be removed. These will define the
115 * non-negotiable features that a given monitor must support to
116 * properly operate in a given quorum.
117 *
118 * Should be reserved for features that we really want to make sure
119 * are sticky, and are important enough to tolerate not being able
120 * to downgrade a monitor.
121 */
122 mon_feature_t persistent_features;
123 /**
124 * Optional Features are all those features that can be enabled or
125 * disabled following a given criteria -- e.g., user-mandated via the
126 * cli --, and act much like indicators of what the cluster currently
127 * supports.
128 *
129 * They are by no means "optional" in the sense that monitors can
130 * ignore them. Just that they are not persistent.
131 */
132 mon_feature_t optional_features;
133
134 /**
135 * Returns the set of features required by this monmap.
136 *
137 * The features required by this monmap is the union of all the
138 * currently set persistent features and the currently set optional
139 * features.
140 *
141 * @returns the set of features required by this monmap
142 */
143 mon_feature_t get_required_features() const {
144 return (persistent_features | optional_features);
145 }
146
11fdf7f2 147 // upgrade gate
9f95a23c 148 ceph_release_t min_mon_release{ceph_release_t::unknown};
11fdf7f2 149
9f95a23c
TL
150 void _add_ambiguous_addr(const std::string& name,
151 entity_addr_t addr,
152 int priority,
153 int weight,
154 bool for_mkfs);
11fdf7f2 155
f67539c2
TL
156 enum election_strategy {
157 // Keep in sync with ElectionLogic.h!
158 CLASSIC = 1, // the original rank-based one
159 DISALLOW = 2, // disallow a set from being leader
160 CONNECTIVITY = 3 // includes DISALLOW, extends to prefer stronger connections
161 };
162 election_strategy strategy = CLASSIC;
163 std::set<std::string> disallowed_leaders; // can't be leader under CONNECTIVITY/DISALLOW
164 bool stretch_mode_enabled = false;
20effc67
TL
165 std::string tiebreaker_mon;
166 std::set<std::string> stretch_marked_down_mons; // can't be leader until fully recovered
f67539c2 167
7c673cae 168public:
11fdf7f2
TL
169 void calc_legacy_ranks();
170 void calc_addr_mons() {
171 // populate addr_mons
172 addr_mons.clear();
173 for (auto& p : mon_info) {
174 for (auto& a : p.second.public_addrs.v) {
175 addr_mons[a] = p.first;
176 }
177 }
178 }
7c673cae
FG
179
180 MonMap()
181 : epoch(0) {
7c673cae
FG
182 }
183
184 uuid_d& get_fsid() { return fsid; }
185
186 unsigned size() const {
187 return mon_info.size();
188 }
189
11fdf7f2
TL
190 unsigned min_quorum_size(unsigned total_mons=0) const {
191 if (total_mons == 0) {
192 total_mons = size();
193 }
194 return total_mons / 2 + 1;
195 }
196
7c673cae
FG
197 epoch_t get_epoch() const { return epoch; }
198 void set_epoch(epoch_t e) { epoch = e; }
199
200 /**
201 * Obtain list of public facing addresses
202 *
203 * @param ls list to populate with the monitors' addresses
204 */
9f95a23c 205 void list_addrs(std::list<entity_addr_t>& ls) const {
11fdf7f2
TL
206 for (auto& i : mon_info) {
207 for (auto& j : i.second.public_addrs.v) {
208 ls.push_back(j);
209 }
7c673cae
FG
210 }
211 }
212
224ce89b
WB
213 /**
214 * Add new monitor to the monmap
215 *
216 * @param m monitor info of the new monitor
217 */
11fdf7f2
TL
218 void add(const mon_info_t& m) {
219 ceph_assert(mon_info.count(m.name) == 0);
220 for (auto& a : m.public_addrs.v) {
221 ceph_assert(addr_mons.count(a) == 0);
222 }
223 mon_info[m.name] = m;
224 if (get_required_features().contains_all(
225 ceph::features::mon::FEATURE_NAUTILUS)) {
226 ranks.push_back(m.name);
227 ceph_assert(ranks.size() == mon_info.size());
228 } else {
229 calc_legacy_ranks();
230 }
231 calc_addr_mons();
224ce89b
WB
232 }
233
7c673cae
FG
234 /**
235 * Add new monitor to the monmap
236 *
237 * @param name Monitor name (i.e., 'foo' in 'mon.foo')
238 * @param addr Monitor's public address
239 */
9f95a23c
TL
240 void add(const std::string &name, const entity_addrvec_t &addrv,
241 uint16_t priority=0, uint16_t weight=0) {
242 add(mon_info_t(name, addrv, priority, weight));
7c673cae 243 }
224ce89b 244
7c673cae
FG
245 /**
246 * Remove monitor from the monmap
247 *
248 * @param name Monitor name (i.e., 'foo' in 'mon.foo')
249 */
9f95a23c 250 void remove(const std::string &name) {
f67539c2 251 // this must match what we do in ConnectionTracker::notify_rank_removed
11fdf7f2 252 ceph_assert(mon_info.count(name));
f67539c2 253 int rank = get_rank(name);
7c673cae 254 mon_info.erase(name);
f67539c2 255 disallowed_leaders.erase(name);
11fdf7f2 256 ceph_assert(mon_info.count(name) == 0);
f67539c2
TL
257 if (rank >= 0 ) {
258 removed_ranks.insert(rank);
259 }
11fdf7f2
TL
260 if (get_required_features().contains_all(
261 ceph::features::mon::FEATURE_NAUTILUS)) {
262 ranks.erase(std::find(ranks.begin(), ranks.end(), name));
263 ceph_assert(ranks.size() == mon_info.size());
264 } else {
265 calc_legacy_ranks();
266 }
267 calc_addr_mons();
7c673cae
FG
268 }
269
270 /**
271 * Rename monitor from @p oldname to @p newname
272 *
273 * @param oldname monitor's current name (i.e., 'foo' in 'mon.foo')
274 * @param newname monitor's new name (i.e., 'bar' in 'mon.bar')
275 */
9f95a23c 276 void rename(std::string oldname, std::string newname) {
11fdf7f2
TL
277 ceph_assert(contains(oldname));
278 ceph_assert(!contains(newname));
7c673cae
FG
279 mon_info[newname] = mon_info[oldname];
280 mon_info.erase(oldname);
281 mon_info[newname].name = newname;
11fdf7f2
TL
282 if (get_required_features().contains_all(
283 ceph::features::mon::FEATURE_NAUTILUS)) {
284 *std::find(ranks.begin(), ranks.end(), oldname) = newname;
285 ceph_assert(ranks.size() == mon_info.size());
286 } else {
287 calc_legacy_ranks();
288 }
289 calc_addr_mons();
290 }
291
9f95a23c 292 int set_rank(const std::string& name, int rank) {
11fdf7f2
TL
293 int oldrank = get_rank(name);
294 if (oldrank < 0) {
295 return -ENOENT;
296 }
297 if (rank < 0 || rank >= (int)ranks.size()) {
298 return -EINVAL;
299 }
300 if (oldrank != rank) {
301 ranks.erase(ranks.begin() + oldrank);
302 ranks.insert(ranks.begin() + rank, name);
303 }
304 return 0;
7c673cae
FG
305 }
306
9f95a23c 307 bool contains(const std::string& name) const {
7c673cae
FG
308 return mon_info.count(name);
309 }
310
311 /**
312 * Check if monmap contains a monitor with address @p a
313 *
314 * @note checks for all addresses a monitor may have, public or otherwise.
315 *
316 * @param a monitor address
317 * @returns true if monmap contains a monitor with address @p;
318 * false otherwise.
319 */
9f95a23c 320 bool contains(const entity_addr_t &a, std::string *name=nullptr) const {
11fdf7f2
TL
321 for (auto& i : mon_info) {
322 for (auto& j : i.second.public_addrs.v) {
323 if (j == a) {
324 if (name) {
325 *name = i.first;
326 }
327 return true;
328 }
329 }
330 }
331 return false;
332 }
9f95a23c 333 bool contains(const entity_addrvec_t &av, std::string *name=nullptr) const {
11fdf7f2
TL
334 for (auto& i : mon_info) {
335 for (auto& j : i.second.public_addrs.v) {
336 for (auto& k : av.v) {
337 if (j == k) {
338 if (name) {
339 *name = i.first;
340 }
341 return true;
342 }
343 }
344 }
7c673cae
FG
345 }
346 return false;
347 }
348
9f95a23c 349 std::string get_name(unsigned n) const {
11fdf7f2 350 ceph_assert(n < ranks.size());
7c673cae
FG
351 return ranks[n];
352 }
9f95a23c
TL
353 std::string get_name(const entity_addr_t& a) const {
354 std::map<entity_addr_t, std::string>::const_iterator p = addr_mons.find(a);
7c673cae 355 if (p == addr_mons.end())
9f95a23c 356 return std::string();
7c673cae
FG
357 else
358 return p->second;
359 }
9f95a23c 360 std::string get_name(const entity_addrvec_t& av) const {
11fdf7f2 361 for (auto& i : av.v) {
9f95a23c 362 std::map<entity_addr_t, std::string>::const_iterator p = addr_mons.find(i);
11fdf7f2
TL
363 if (p != addr_mons.end())
364 return p->second;
365 }
9f95a23c 366 return std::string();
11fdf7f2 367 }
7c673cae 368
9f95a23c 369 int get_rank(const std::string& n) const {
11fdf7f2
TL
370 if (auto found = std::find(ranks.begin(), ranks.end(), n);
371 found != ranks.end()) {
372 return std::distance(ranks.begin(), found);
373 } else {
374 return -1;
375 }
7c673cae 376 }
11fdf7f2 377 int get_rank(const entity_addr_t& a) const {
9f95a23c 378 std::string n = get_name(a);
11fdf7f2
TL
379 if (!n.empty()) {
380 return get_rank(n);
381 }
382 return -1;
383 }
384 int get_rank(const entity_addrvec_t& av) const {
9f95a23c 385 std::string n = get_name(av);
11fdf7f2
TL
386 if (!n.empty()) {
387 return get_rank(n);
388 }
7c673cae
FG
389 return -1;
390 }
9f95a23c 391 bool get_addr_name(const entity_addr_t& a, std::string& name) {
7c673cae
FG
392 if (addr_mons.count(a) == 0)
393 return false;
394 name = addr_mons[a];
395 return true;
396 }
397
9f95a23c 398 const entity_addrvec_t& get_addrs(const std::string& n) const {
11fdf7f2 399 ceph_assert(mon_info.count(n));
9f95a23c 400 std::map<std::string,mon_info_t>::const_iterator p = mon_info.find(n);
11fdf7f2 401 return p->second.public_addrs;
7c673cae 402 }
11fdf7f2
TL
403 const entity_addrvec_t& get_addrs(unsigned m) const {
404 ceph_assert(m < ranks.size());
405 return get_addrs(ranks[m]);
7c673cae 406 }
9f95a23c 407 void set_addrvec(const std::string& n, const entity_addrvec_t& a) {
11fdf7f2
TL
408 ceph_assert(mon_info.count(n));
409 mon_info[n].public_addrs = a;
410 calc_addr_mons();
7c673cae 411 }
9f95a23c
TL
412 uint16_t get_priority(const std::string& n) const {
413 auto it = mon_info.find(n);
414 ceph_assert(it != mon_info.end());
415 return it->second.priority;
416 }
417 uint16_t get_weight(const std::string& n) const {
418 auto it = mon_info.find(n);
419 ceph_assert(it != mon_info.end());
420 return it->second.weight;
421 }
422 void set_weight(const std::string& n, uint16_t v) {
423 auto it = mon_info.find(n);
424 ceph_assert(it != mon_info.end());
425 it->second.weight = v;
426 }
7c673cae 427
9f95a23c
TL
428 void encode(ceph::buffer::list& blist, uint64_t con_features) const;
429 void decode(ceph::buffer::list& blist) {
11fdf7f2 430 auto p = std::cbegin(blist);
7c673cae
FG
431 decode(p);
432 }
9f95a23c 433 void decode(ceph::buffer::list::const_iterator& p);
7c673cae
FG
434
435 void generate_fsid() {
436 fsid.generate_random();
437 }
438
439 // read from/write to a file
440 int write(const char *fn);
441 int read(const char *fn);
442
443 /**
444 * build an initial bootstrap monmap from conf
445 *
446 * Build an initial bootstrap monmap from the config. This will
447 * try, in this order:
448 *
449 * 1 monmap -- an explicitly provided monmap
450 * 2 mon_host -- list of monitors
451 * 3 config [mon.*] sections, and 'mon addr' fields in those sections
452 *
453 * @param cct context (and associated config)
9f95a23c 454 * @param errout std::ostream to send error messages too
7c673cae 455 */
11fdf7f2 456#ifdef WITH_SEASTAR
9f95a23c 457 seastar::future<> build_initial(const crimson::common::ConfigProxy& conf, bool for_mkfs);
11fdf7f2 458#else
9f95a23c 459 int build_initial(CephContext *cct, bool for_mkfs, std::ostream& errout);
11fdf7f2 460#endif
7c673cae
FG
461 /**
462 * filter monmap given a set of initial members.
463 *
464 * Remove mons that aren't in the initial_members list. Add missing
465 * mons and give them dummy IPs (blank IPv4, with a non-zero
466 * nonce). If the name matches my_name, then my_addr will be used in
467 * place of a dummy addr.
468 *
469 * @param initial_members list of initial member names
470 * @param my_name name of self, can be blank
471 * @param my_addr my addr
472 * @param removed optional pointer to set to insert removed mon addrs to
473 */
474 void set_initial_members(CephContext *cct,
9f95a23c
TL
475 std::list<std::string>& initial_members,
476 std::string my_name,
11fdf7f2 477 const entity_addrvec_t& my_addrs,
9f95a23c 478 std::set<entity_addrvec_t> *removed);
7c673cae 479
9f95a23c
TL
480 void print(std::ostream& out) const;
481 void print_summary(std::ostream& out) const;
7c673cae 482 void dump(ceph::Formatter *f) const;
9f95a23c 483 void dump_summary(ceph::Formatter *f) const;
7c673cae 484
f67539c2
TL
485 void check_health(health_check_map_t *checks) const;
486
9f95a23c 487 static void generate_test_instances(std::list<MonMap*>& o);
11fdf7f2 488protected:
f91f0fd5
TL
489 /**
490 * build a monmap from a list of entity_addrvec_t's
491 *
492 * Give mons dummy names.
493 *
494 * @param addrs list of entity_addrvec_t's
495 * @param prefix prefix to prepend to generated mon names
496 */
497 void init_with_addrs(const std::vector<entity_addrvec_t>& addrs,
498 bool for_mkfs,
499 std::string_view prefix);
11fdf7f2
TL
500 /**
501 * build a monmap from a list of ips
502 *
503 * Give mons dummy names.
504 *
505 * @param hosts list of ips, space or comma separated
506 * @param prefix prefix to prepend to generated mon names
507 * @return 0 for success, -errno on error
508 */
509 int init_with_ips(const std::string& ips,
510 bool for_mkfs,
f91f0fd5 511 std::string_view prefix);
11fdf7f2
TL
512 /**
513 * build a monmap from a list of hostnames
514 *
515 * Give mons dummy names.
516 *
517 * @param hosts list of ips, space or comma separated
518 * @param prefix prefix to prepend to generated mon names
519 * @return 0 for success, -errno on error
520 */
521 int init_with_hosts(const std::string& hostlist,
522 bool for_mkfs,
f91f0fd5 523 std::string_view prefix);
11fdf7f2
TL
524 int init_with_config_file(const ConfigProxy& conf, std::ostream& errout);
525#if WITH_SEASTAR
526 seastar::future<> read_monmap(const std::string& monmap);
527 /// try to build monmap with different settings, like
528 /// mon_host, mon* sections, and mon_dns_srv_name
9f95a23c 529 seastar::future<> build_monmap(const crimson::common::ConfigProxy& conf, bool for_mkfs);
11fdf7f2
TL
530 /// initialize monmap by resolving given service name
531 seastar::future<> init_with_dns_srv(bool for_mkfs, const std::string& name);
20effc67
TL
532 /// initialize monmap with `mon_host` or `mon_host_override`
533 bool maybe_init_with_mon_host(const std::string& mon_host, bool for_mkfs);
11fdf7f2
TL
534#else
535 /// read from encoded monmap file
536 int init_with_monmap(const std::string& monmap, std::ostream& errout);
537 int init_with_dns_srv(CephContext* cct, std::string srv_name, bool for_mkfs,
538 std::ostream& errout);
539#endif
7c673cae
FG
540};
541WRITE_CLASS_ENCODER_FEATURES(MonMap)
542
9f95a23c 543inline std::ostream& operator<<(std::ostream &out, const MonMap &m) {
7c673cae
FG
544 m.print_summary(out);
545 return out;
546}
547
548#endif