]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.cc
buildsys: switch source download to quincy
[ceph.git] / ceph / src / mon / MonMap.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
7c673cae
FG
3
4#include "MonMap.h"
5
6#include <algorithm>
7#include <sys/types.h>
8#include <sys/stat.h>
9#include <fcntl.h>
10
11fdf7f2
TL
11#ifdef WITH_SEASTAR
12#include <seastar/core/fstream.hh>
13#include <seastar/core/reactor.hh>
14#include <seastar/net/dns.hh>
15#include "crimson/common/config_proxy.h"
16#endif
17
7c673cae
FG
18#include "common/Formatter.h"
19
20#include "include/ceph_features.h"
21#include "include/addr_parsing.h"
22#include "common/ceph_argparse.h"
23#include "common/dns_resolve.h"
24#include "common/errno.h"
7c673cae 25#include "common/dout.h"
11fdf7f2 26#include "common/Clock.h"
f67539c2 27#include "mon/health_check.h"
7c673cae 28
9f95a23c
TL
29using std::list;
30using std::map;
31using std::ostream;
32using std::set;
33using std::string;
34using std::vector;
35
36using ceph::DNSResolver;
7c673cae
FG
37using ceph::Formatter;
38
9f95a23c 39void mon_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 40{
f67539c2
TL
41 uint8_t v = 5;
42 uint8_t min_v = 1;
43 if (!crush_loc.empty()) {
44 // we added crush_loc in version 5, but need to let old clients decode it
a4b75251 45 // so just leave the min_v at version 1. Monitors are protected
f67539c2
TL
46 // from misunderstandings about location because setting it is blocked
47 // on FEATURE_PINGING
a4b75251 48 min_v = 1;
f67539c2 49 }
11fdf7f2
TL
50 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
51 v = 2;
52 }
f67539c2 53 ENCODE_START(v, min_v, bl);
11fdf7f2
TL
54 encode(name, bl);
55 if (v < 3) {
f67539c2 56 ceph_assert(min_v == 1);
eafe8130
TL
57 auto a = public_addrs.legacy_addr();
58 if (a != entity_addr_t()) {
59 encode(a, bl, features);
60 } else {
61 // note: we don't have a legacy addr here, so lie so that it looks
62 // like one, just so that old clients get a valid-looking map.
63 // they won't be able to talk to the v2 mons, but that's better
64 // than nothing.
65 encode(public_addrs.as_legacy_addr(), bl, features);
66 }
11fdf7f2
TL
67 } else {
68 encode(public_addrs, bl, features);
69 }
70 encode(priority, bl);
9f95a23c 71 encode(weight, bl);
f67539c2 72 encode(crush_loc, bl);
7c673cae
FG
73 ENCODE_FINISH(bl);
74}
75
9f95a23c 76void mon_info_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae 77{
f67539c2 78 DECODE_START(5, p);
11fdf7f2
TL
79 decode(name, p);
80 decode(public_addrs, p);
224ce89b 81 if (struct_v >= 2) {
11fdf7f2 82 decode(priority, p);
224ce89b 83 }
9f95a23c
TL
84 if (struct_v >= 4) {
85 decode(weight, p);
86 }
f67539c2
TL
87 if (struct_v >= 5) {
88 decode(crush_loc, p);
89 }
7c673cae
FG
90 DECODE_FINISH(p);
91}
92
93void mon_info_t::print(ostream& out) const
94{
95 out << "mon." << name
11fdf7f2 96 << " addrs " << public_addrs
9f95a23c 97 << " priority " << priority
f67539c2
TL
98 << " weight " << weight
99 << " crush location " << crush_loc;
7c673cae
FG
100}
101
7c673cae
FG
102namespace {
103 struct rank_cmp {
104 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
11fdf7f2 105 if (a.public_addrs.legacy_or_front_addr() == b.public_addrs.legacy_or_front_addr())
7c673cae 106 return a.name < b.name;
11fdf7f2 107 return a.public_addrs.legacy_or_front_addr() < b.public_addrs.legacy_or_front_addr();
7c673cae
FG
108 }
109 };
110}
111
11fdf7f2
TL
112void MonMap::calc_legacy_ranks()
113{
7c673cae 114 ranks.resize(mon_info.size());
7c673cae
FG
115
116 // Used to order entries according to public_addr, because that's
117 // how the ranks are expected to be ordered by. We may expand this
118 // later on, according to some other criteria, by specifying a
119 // different comparator.
120 //
121 // Please note that we use a 'set' here instead of resorting to
122 // std::sort() because we need more info than that's available in
123 // the vector. The vector will thus be ordered by, e.g., public_addr
124 // while only containing the names of each individual monitor.
125 // The only way of achieving this with std::sort() would be to first
126 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
127 // with custom comparison functions, and then copy each invidual entry
128 // to a new vector. Unless there's a simpler way, we don't think the
129 // added complexity makes up for the additional memory usage of a 'set'.
130 set<mon_info_t, rank_cmp> tmp;
131
9f95a23c 132 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
133 mon_info_t &m = p->second;
134 tmp.insert(m);
7c673cae
FG
135 }
136
137 // map the set to the actual ranks etc
138 unsigned i = 0;
9f95a23c 139 for (auto p = tmp.begin(); p != tmp.end(); ++p, ++i) {
7c673cae
FG
140 ranks[i] = p->name;
141 }
142}
143
9f95a23c 144void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const
7c673cae 145{
7c673cae 146 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
11fdf7f2 147 using ceph::encode;
7c673cae 148 __u16 v = 1;
11fdf7f2 149 encode(v, blist);
9f95a23c 150 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
151 encode(epoch, blist);
152 vector<entity_inst_t> mon_inst(ranks.size());
153 for (unsigned n = 0; n < ranks.size(); n++) {
154 mon_inst[n].name = entity_name_t::MON(n);
155 mon_inst[n].addr = get_addrs(n).legacy_addr();
156 }
157 encode(mon_inst, blist, con_features);
158 encode(last_changed, blist);
159 encode(created, blist);
7c673cae
FG
160 return;
161 }
162
11fdf7f2
TL
163 map<string,entity_addr_t> legacy_mon_addr;
164 if (!HAVE_FEATURE(con_features, MONENC) ||
165 !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
166 for (auto& [name, info] : mon_info) {
167 legacy_mon_addr[name] = info.public_addrs.legacy_addr();
168 }
169 }
170
171 if (!HAVE_FEATURE(con_features, MONENC)) {
172 /* we keep the mon_addr map when encoding to ensure compatibility
173 * with clients and other monitors that do not yet support the 'mons'
174 * map. This map keeps its original behavior, containing a mapping of
175 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
176 * address -- which is obtained from the public address of each entry
177 * in the 'mons' map.
178 */
179 using ceph::encode;
7c673cae 180 __u16 v = 2;
11fdf7f2 181 encode(v, blist);
9f95a23c 182 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
183 encode(epoch, blist);
184 encode(legacy_mon_addr, blist, con_features);
185 encode(last_changed, blist);
186 encode(created, blist);
187 return;
188 }
189
190 if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
191 ENCODE_START(5, 3, blist);
9f95a23c 192 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
193 encode(epoch, blist);
194 encode(legacy_mon_addr, blist, con_features);
195 encode(last_changed, blist);
196 encode(created, blist);
197 encode(persistent_features, blist);
198 encode(optional_features, blist);
199 encode(mon_info, blist, con_features);
200 ENCODE_FINISH(blist);
201 return;
202 }
203
f67539c2 204 ENCODE_START(9, 6, blist);
9f95a23c 205 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
206 encode(epoch, blist);
207 encode(last_changed, blist);
208 encode(created, blist);
209 encode(persistent_features, blist);
210 encode(optional_features, blist);
211 encode(mon_info, blist, con_features);
212 encode(ranks, blist);
213 encode(min_mon_release, blist);
f67539c2
TL
214 encode(removed_ranks, blist);
215 uint8_t t = strategy;
216 encode(t, blist);
217 encode(disallowed_leaders, blist);
218 encode(stretch_mode_enabled, blist);
219 encode(tiebreaker_mon, blist);
220 encode(stretch_marked_down_mons, blist);
7c673cae
FG
221 ENCODE_FINISH(blist);
222}
223
9f95a23c 224void MonMap::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
225{
226 map<string,entity_addr_t> mon_addr;
f67539c2 227 DECODE_START_LEGACY_COMPAT_LEN_16(9, 3, 3, p);
9f95a23c 228 ceph::decode_raw(fsid, p);
11fdf7f2 229 decode(epoch, p);
7c673cae
FG
230 if (struct_v == 1) {
231 vector<entity_inst_t> mon_inst;
11fdf7f2 232 decode(mon_inst, p);
7c673cae
FG
233 for (unsigned i = 0; i < mon_inst.size(); i++) {
234 char n[2];
235 n[0] = '0' + i;
236 n[1] = 0;
237 string name = n;
238 mon_addr[name] = mon_inst[i].addr;
239 }
11fdf7f2
TL
240 } else if (struct_v < 6) {
241 decode(mon_addr, p);
7c673cae 242 }
11fdf7f2
TL
243 decode(last_changed, p);
244 decode(created, p);
7c673cae 245 if (struct_v >= 4) {
11fdf7f2
TL
246 decode(persistent_features, p);
247 decode(optional_features, p);
7c673cae 248 }
11fdf7f2
TL
249 if (struct_v < 5) {
250 // generate mon_info from legacy mon_addr
251 for (auto& [name, addr] : mon_addr) {
252 mon_info_t &m = mon_info[name];
253 m.name = name;
254 m.public_addrs = entity_addrvec_t(addr);
255 }
256 } else {
257 decode(mon_info, p);
258 }
259 if (struct_v < 6) {
260 calc_legacy_ranks();
261 } else {
262 decode(ranks, p);
263 }
264 if (struct_v >= 7) {
265 decode(min_mon_release, p);
7c673cae 266 } else {
11fdf7f2 267 min_mon_release = infer_ceph_release_from_mon_features(persistent_features);
7c673cae 268 }
f67539c2
TL
269 if (struct_v >= 8) {
270 decode(removed_ranks, p);
271 uint8_t t;
272 decode(t, p);
273 strategy = static_cast<election_strategy>(t);
274 decode(disallowed_leaders, p);
275 }
276 if (struct_v >= 9) {
277 decode(stretch_mode_enabled, p);
278 decode(tiebreaker_mon, p);
279 decode(stretch_marked_down_mons, p);
280 } else {
281 stretch_mode_enabled = false;
282 tiebreaker_mon = "";
283 stretch_marked_down_mons.clear();
284 }
11fdf7f2 285 calc_addr_mons();
7c673cae 286 DECODE_FINISH(p);
7c673cae
FG
287}
288
289void MonMap::generate_test_instances(list<MonMap*>& o)
290{
291 o.push_back(new MonMap);
292 o.push_back(new MonMap);
293 o.back()->epoch = 1;
294 o.back()->last_changed = utime_t(123, 456);
295 o.back()->created = utime_t(789, 101112);
11fdf7f2 296 o.back()->add("one", entity_addrvec_t());
7c673cae
FG
297
298 MonMap *m = new MonMap;
299 {
300 m->epoch = 1;
301 m->last_changed = utime_t(123, 456);
302
11fdf7f2
TL
303 entity_addrvec_t empty_addr_one = entity_addrvec_t(entity_addr_t());
304 empty_addr_one.v[0].set_nonce(1);
7c673cae 305 m->add("empty_addr_one", empty_addr_one);
11fdf7f2
TL
306 entity_addrvec_t empty_addr_two = entity_addrvec_t(entity_addr_t());
307 empty_addr_two.v[0].set_nonce(2);
308 m->add("empty_addr_two", empty_addr_two);
7c673cae
FG
309
310 const char *local_pub_addr_s = "127.0.1.2";
311
312 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
11fdf7f2 313 entity_addrvec_t local_pub_addr;
7c673cae
FG
314 local_pub_addr.parse(local_pub_addr_s, &end_p);
315
9f95a23c 316 m->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr), 1, 1));
7c673cae 317
11fdf7f2 318 m->add("empty_addr_zero", entity_addrvec_t());
7c673cae
FG
319 }
320 o.push_back(m);
321}
322
323// read from/write to a file
324int MonMap::write(const char *fn)
325{
326 // encode
9f95a23c 327 ceph::buffer::list bl;
7c673cae
FG
328 encode(bl, CEPH_FEATURES_ALL);
329
330 return bl.write_file(fn);
331}
332
333int MonMap::read(const char *fn)
334{
335 // read
9f95a23c 336 ceph::buffer::list bl;
7c673cae
FG
337 std::string error;
338 int r = bl.read_file(fn, &error);
339 if (r < 0)
340 return r;
341 decode(bl);
342 return 0;
343}
344
345void MonMap::print_summary(ostream& out) const
346{
347 out << "e" << epoch << ": "
348 << mon_info.size() << " mons at {";
349 // the map that we used to print, as it was, no longer
350 // maps strings to the monitor's public address, but to
351 // mon_info_t instead. As such, print the map in a way
352 // that keeps the expected format.
353 bool has_printed = false;
9f95a23c 354 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
355 if (has_printed)
356 out << ",";
11fdf7f2 357 out << p->first << "=" << p->second.public_addrs;
7c673cae
FG
358 has_printed = true;
359 }
360 out << "}";
361}
362
363void MonMap::print(ostream& out) const
364{
365 out << "epoch " << epoch << "\n";
366 out << "fsid " << fsid << "\n";
367 out << "last_changed " << last_changed << "\n";
368 out << "created " << created << "\n";
f67539c2 369 out << "min_mon_release " << to_integer<unsigned>(min_mon_release)
9f95a23c 370 << " (" << min_mon_release << ")\n";
f67539c2 371 out << "election_strategy: " << strategy << "\n";
a4b75251
TL
372 if (stretch_mode_enabled) {
373 out << "stretch_mode_enabled " << stretch_mode_enabled << "\n";
374 out << "tiebreaker_mon " << tiebreaker_mon << "\n";
375 }
376 if (stretch_mode_enabled ||
377 !disallowed_leaders.empty()) {
f67539c2
TL
378 out << "disallowed_leaders " << disallowed_leaders << "\n";
379 }
7c673cae 380 unsigned i = 0;
9f95a23c 381 for (auto p = ranks.begin(); p != ranks.end(); ++p) {
f67539c2
TL
382 const auto &mi = mon_info.find(*p);
383 ceph_assert(mi != mon_info.end());
384 out << i++ << ": " << mi->second.public_addrs << " mon." << *p;
385 if (!mi->second.crush_loc.empty()) {
386 out << "; crush_location " << mi->second.crush_loc;
387 }
388 out << "\n";
7c673cae
FG
389 }
390}
391
392void MonMap::dump(Formatter *f) const
393{
394 f->dump_unsigned("epoch", epoch);
395 f->dump_stream("fsid") << fsid;
9f95a23c
TL
396 last_changed.gmtime(f->dump_stream("modified"));
397 created.gmtime(f->dump_stream("created"));
f67539c2
TL
398 f->dump_unsigned("min_mon_release", to_integer<unsigned>(min_mon_release));
399 f->dump_string("min_mon_release_name", to_string(min_mon_release));
400 f->dump_int ("election_strategy", strategy);
401 f->dump_stream("disallowed_leaders: ") << disallowed_leaders;
402 f->dump_bool("stretch_mode", stretch_mode_enabled);
a4b75251 403 f->dump_string("tiebreaker_mon", tiebreaker_mon);
7c673cae
FG
404 f->open_object_section("features");
405 persistent_features.dump(f, "persistent");
406 optional_features.dump(f, "optional");
407 f->close_section();
408 f->open_array_section("mons");
409 int i = 0;
9f95a23c 410 for (auto p = ranks.begin(); p != ranks.end(); ++p, ++i) {
7c673cae
FG
411 f->open_object_section("mon");
412 f->dump_int("rank", i);
413 f->dump_string("name", *p);
11fdf7f2
TL
414 f->dump_object("public_addrs", get_addrs(*p));
415 // compat: make these look like pre-nautilus entity_addr_t
416 f->dump_stream("addr") << get_addrs(*p).get_legacy_str();
417 f->dump_stream("public_addr") << get_addrs(*p).get_legacy_str();
9f95a23c
TL
418 f->dump_unsigned("priority", get_priority(*p));
419 f->dump_unsigned("weight", get_weight(*p));
f67539c2
TL
420 const auto &mi = mon_info.find(*p);
421 // we don't need to assert this validity as all the get_* functions did
422 f->dump_stream("crush_location") << mi->second.crush_loc;
7c673cae
FG
423 f->close_section();
424 }
425 f->close_section();
426}
427
9f95a23c
TL
428void MonMap::dump_summary(Formatter *f) const
429{
430 f->dump_unsigned("epoch", epoch);
f67539c2 431 f->dump_string("min_mon_release_name", to_string(min_mon_release));
9f95a23c
TL
432 f->dump_unsigned("num_mons", ranks.size());
433}
434
11fdf7f2
TL
435// an ambiguous mon addr may be legacy or may be msgr2--we aren' sure.
436// when that happens we need to try them both (unless we can
437// reasonably infer from the port number which it is).
438void MonMap::_add_ambiguous_addr(const string& name,
9f95a23c
TL
439 entity_addr_t addr,
440 int priority,
441 int weight,
442 bool for_mkfs)
11fdf7f2
TL
443{
444 if (addr.get_type() != entity_addr_t::TYPE_ANY) {
445 // a v1: or v2: prefix was specified
446 if (addr.get_port() == 0) {
447 // use default port
9f95a23c 448 if (addr.get_type() == entity_addr_t::TYPE_LEGACY) {
11fdf7f2
TL
449 addr.set_port(CEPH_MON_PORT_LEGACY);
450 } else if (addr.get_type() == entity_addr_t::TYPE_MSGR2) {
451 addr.set_port(CEPH_MON_PORT_IANA);
452 } else {
453 // wth
454 return;
455 }
456 if (!contains(addr)) {
9f95a23c 457 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
458 }
459 } else {
460 if (!contains(addr)) {
9f95a23c 461 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
462 }
463 }
464 } else {
465 // no v1: or v2: prefix specified
466 if (addr.get_port() == CEPH_MON_PORT_LEGACY) {
467 // legacy port implies legacy addr
468 addr.set_type(entity_addr_t::TYPE_LEGACY);
469 if (!contains(addr)) {
470 if (!for_mkfs) {
9f95a23c 471 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2 472 } else {
9f95a23c 473 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
474 }
475 }
476 } else if (addr.get_port() == CEPH_MON_PORT_IANA) {
477 // iana port implies msgr2 addr
478 addr.set_type(entity_addr_t::TYPE_MSGR2);
479 if (!contains(addr)) {
9f95a23c 480 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
481 }
482 } else if (addr.get_port() == 0) {
483 // no port; include both msgr2 and legacy ports
484 if (!for_mkfs) {
485 addr.set_type(entity_addr_t::TYPE_MSGR2);
486 addr.set_port(CEPH_MON_PORT_IANA);
487 if (!contains(addr)) {
9f95a23c 488 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
489 }
490 addr.set_type(entity_addr_t::TYPE_LEGACY);
491 addr.set_port(CEPH_MON_PORT_LEGACY);
492 if (!contains(addr)) {
9f95a23c 493 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
494 }
495 } else {
496 entity_addrvec_t av;
497 addr.set_type(entity_addr_t::TYPE_MSGR2);
498 addr.set_port(CEPH_MON_PORT_IANA);
499 av.v.push_back(addr);
500 addr.set_type(entity_addr_t::TYPE_LEGACY);
501 addr.set_port(CEPH_MON_PORT_LEGACY);
502 av.v.push_back(addr);
503 if (!contains(av)) {
9f95a23c 504 add(name, av, priority, weight);
11fdf7f2
TL
505 }
506 }
507 } else {
508 addr.set_type(entity_addr_t::TYPE_MSGR2);
509 if (!contains(addr)) {
9f95a23c 510 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
511 }
512 if (!for_mkfs) {
513 // try legacy on same port too
514 addr.set_type(entity_addr_t::TYPE_LEGACY);
515 if (!contains(addr)) {
9f95a23c 516 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
517 }
518 }
519 }
520 }
521}
7c673cae 522
f91f0fd5
TL
523void MonMap::init_with_addrs(const std::vector<entity_addrvec_t>& addrs,
524 bool for_mkfs,
525 std::string_view prefix)
526{
527 char id = 'a';
528 for (auto& addr : addrs) {
529 string name{prefix};
530 name += id++;
531 if (addr.v.size() == 1) {
532 _add_ambiguous_addr(name, addr.front(), 0, 0, for_mkfs);
533 } else {
534 // they specified an addrvec, so let's assume they also specified
535 // the addr *type* and *port*. (we could possibly improve this?)
536 add(name, addr, 0);
537 }
538 }
539}
540
11fdf7f2
TL
541int MonMap::init_with_ips(const std::string& ips,
542 bool for_mkfs,
f91f0fd5 543 std::string_view prefix)
7c673cae 544{
11fdf7f2
TL
545 vector<entity_addrvec_t> addrs;
546 if (!parse_ip_port_vec(
547 ips.c_str(), addrs,
548 entity_addr_t::TYPE_ANY)) {
549 return -EINVAL;
550 }
551 if (addrs.empty())
552 return -ENOENT;
f91f0fd5 553 init_with_addrs(addrs, for_mkfs, prefix);
11fdf7f2
TL
554 return 0;
555}
7c673cae 556
11fdf7f2
TL
557int MonMap::init_with_hosts(const std::string& hostlist,
558 bool for_mkfs,
f91f0fd5 559 std::string_view prefix)
11fdf7f2 560{
7c673cae 561 // maybe they passed us a DNS-resolvable name
11fdf7f2 562 char *hosts = resolve_addrs(hostlist.c_str());
7c673cae
FG
563 if (!hosts)
564 return -EINVAL;
11fdf7f2
TL
565
566 vector<entity_addrvec_t> addrs;
567 bool success = parse_ip_port_vec(
568 hosts, addrs,
92f5a8d4 569 entity_addr_t::TYPE_ANY);
7c673cae
FG
570 free(hosts);
571 if (!success)
572 return -EINVAL;
7c673cae
FG
573 if (addrs.empty())
574 return -ENOENT;
f91f0fd5 575 init_with_addrs(addrs, for_mkfs, prefix);
11fdf7f2 576 calc_legacy_ranks();
7c673cae
FG
577 return 0;
578}
579
580void MonMap::set_initial_members(CephContext *cct,
581 list<std::string>& initial_members,
11fdf7f2
TL
582 string my_name,
583 const entity_addrvec_t& my_addrs,
584 set<entity_addrvec_t> *removed)
7c673cae
FG
585{
586 // remove non-initial members
587 unsigned i = 0;
588 while (i < size()) {
589 string n = get_name(i);
11fdf7f2
TL
590 if (std::find(initial_members.begin(), initial_members.end(), n)
591 != initial_members.end()) {
592 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addrs(i) << dendl;
7c673cae
FG
593 i++;
594 continue;
595 }
596
11fdf7f2
TL
597 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addrs(i)
598 << dendl;
599 if (removed) {
600 removed->insert(get_addrs(i));
601 }
7c673cae 602 remove(n);
11fdf7f2 603 ceph_assert(!contains(n));
7c673cae
FG
604 }
605
606 // add missing initial members
11fdf7f2
TL
607 for (auto& p : initial_members) {
608 if (!contains(p)) {
609 if (p == my_name) {
610 lgeneric_dout(cct, 1) << " adding self " << p << " " << my_addrs
611 << dendl;
612 add(p, my_addrs);
7c673cae
FG
613 } else {
614 entity_addr_t a;
615 a.set_type(entity_addr_t::TYPE_LEGACY);
616 a.set_family(AF_INET);
617 for (int n=1; ; n++) {
618 a.set_nonce(n);
619 if (!contains(a))
620 break;
621 }
11fdf7f2
TL
622 lgeneric_dout(cct, 1) << " adding " << p << " " << a << dendl;
623 add(p, entity_addrvec_t(a));
7c673cae 624 }
11fdf7f2 625 ceph_assert(contains(p));
7c673cae
FG
626 }
627 }
11fdf7f2 628 calc_legacy_ranks();
7c673cae
FG
629}
630
11fdf7f2
TL
631int MonMap::init_with_config_file(const ConfigProxy& conf,
632 std::ostream& errout)
7c673cae 633{
11fdf7f2
TL
634 std::vector<std::string> sections;
635 int ret = conf.get_all_sections(sections);
7c673cae
FG
636 if (ret) {
637 errout << "Unable to find any monitors in the configuration "
638 << "file, because there was an error listing the sections. error "
639 << ret << std::endl;
640 return -ENOENT;
641 }
11fdf7f2
TL
642 std::vector<std::string> mon_names;
643 for (const auto& section : sections) {
644 if (section.substr(0, 4) == "mon." && section.size() > 4) {
645 mon_names.push_back(section.substr(4));
7c673cae
FG
646 }
647 }
648
649 // Find an address for each monitor in the config file.
11fdf7f2
TL
650 for (const auto& mon_name : mon_names) {
651 std::vector<std::string> sections;
7c673cae
FG
652 std::string m_name("mon");
653 m_name += ".";
11fdf7f2 654 m_name += mon_name;
7c673cae
FG
655 sections.push_back(m_name);
656 sections.push_back("mon");
657 sections.push_back("global");
658 std::string val;
11fdf7f2 659 int res = conf.get_val_from_conf_file(sections, "mon addr", val, true);
7c673cae 660 if (res) {
11fdf7f2
TL
661 errout << "failed to get an address for mon." << mon_name
662 << ": error " << res << std::endl;
7c673cae
FG
663 continue;
664 }
11fdf7f2
TL
665 // the 'mon addr' field is a legacy field, so assume anything
666 // there on a weird port is a v1 address, and do not handle
667 // addrvecs.
7c673cae 668 entity_addr_t addr;
11fdf7f2
TL
669 if (!addr.parse(val.c_str(), nullptr, entity_addr_t::TYPE_LEGACY)) {
670 errout << "unable to parse address for mon." << mon_name
671 << ": addr='" << val << "'" << std::endl;
7c673cae
FG
672 continue;
673 }
11fdf7f2
TL
674 if (addr.get_port() == 0) {
675 addr.set_port(CEPH_MON_PORT_LEGACY);
676 }
224ce89b 677 uint16_t priority = 0;
11fdf7f2 678 if (!conf.get_val_from_conf_file(sections, "mon priority", val, false)) {
224ce89b
WB
679 try {
680 priority = std::stoul(val);
681 } catch (std::logic_error&) {
11fdf7f2 682 errout << "unable to parse priority for mon." << mon_name
224ce89b
WB
683 << ": priority='" << val << "'" << std::endl;
684 continue;
685 }
686 }
9f95a23c
TL
687 uint16_t weight = 0;
688 if (!conf.get_val_from_conf_file(sections, "mon weight", val, false)) {
689 try {
690 weight = std::stoul(val);
691 } catch (std::logic_error&) {
692 errout << "unable to parse weight for mon." << mon_name
693 << ": weight='" << val << "'"
694 << std::endl;
695 continue;
696 }
697 }
11fdf7f2 698
9f95a23c 699 // make sure this mon isn't already in the map
7c673cae
FG
700 if (contains(addr))
701 remove(get_name(addr));
11fdf7f2
TL
702 if (contains(mon_name))
703 remove(mon_name);
9f95a23c 704 _add_ambiguous_addr(mon_name, addr, priority, weight, false);
11fdf7f2
TL
705 }
706 return 0;
707}
708
f67539c2
TL
709void MonMap::check_health(health_check_map_t *checks) const
710{
711 if (stretch_mode_enabled) {
712 list<string> detail;
713 for (auto& p : mon_info) {
714 if (p.second.crush_loc.empty()) {
715 ostringstream ss;
716 ss << "mon " << p.first << " has no location set while in stretch mode";
717 detail.push_back(ss.str());
718 }
719 }
720 if (!detail.empty()) {
721 ostringstream ss;
722 ss << detail.size() << " monitor(s) have no location set while in stretch mode"
723 << "; this may cause issues with failover, OSD connections, netsplit handling, etc";
724 auto& d = checks->add("MON_LOCATION_NOT_SET", HEALTH_WARN,
725 ss.str(), detail.size());
726 d.detail.swap(detail);
727 }
728 }
729}
730
11fdf7f2
TL
731#ifdef WITH_SEASTAR
732
733using namespace seastar;
734
f67539c2 735seastar::future<> MonMap::read_monmap(const std::string& monmap)
11fdf7f2
TL
736{
737 return open_file_dma(monmap, open_flags::ro).then([this] (file f) {
738 return f.size().then([this, f = std::move(f)](size_t s) {
739 return do_with(make_file_input_stream(f), [this, s](input_stream<char>& in) {
740 return in.read_exactly(s).then([this](temporary_buffer<char> buf) {
9f95a23c
TL
741 ceph::buffer::list bl;
742 bl.push_back(ceph::buffer::ptr_node::create(
743 ceph::buffer::create(std::move(buf))));
11fdf7f2
TL
744 decode(bl);
745 });
746 });
747 });
748 });
749}
7c673cae 750
f67539c2 751seastar::future<> MonMap::init_with_dns_srv(bool for_mkfs, const std::string& name)
11fdf7f2
TL
752{
753 string domain;
754 string service = name;
755 // check if domain is also provided and extract it from srv_name
756 size_t idx = name.find("_");
757 if (idx != name.npos) {
758 domain = name.substr(idx + 1);
759 service = name.substr(0, idx);
7c673cae 760 }
9f95a23c
TL
761 return seastar::net::dns::get_srv_records(
762 seastar::net::dns_resolver::srv_proto::tcp,
763 service, domain).then([this](seastar::net::dns_resolver::srv_records records) {
11fdf7f2 764 return parallel_for_each(records, [this](auto record) {
9f95a23c
TL
765 return seastar::net::dns::resolve_name(record.target).then(
766 [record,this](seastar::net::inet_address a) {
11fdf7f2
TL
767 // the resolved address does not contain ceph specific info like nonce
768 // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually
769 entity_addr_t addr;
770 addr.set_type(entity_addr_t::TYPE_ANY);
771 addr.set_family(int(a.in_family()));
772 addr.set_port(record.port);
773 switch (a.in_family()) {
9f95a23c 774 case seastar::net::inet_address::family::INET:
11fdf7f2
TL
775 addr.in4_addr().sin_addr = a;
776 break;
9f95a23c 777 case seastar::net::inet_address::family::INET6:
11fdf7f2
TL
778 addr.in6_addr().sin6_addr = a;
779 break;
780 }
9f95a23c
TL
781 _add_ambiguous_addr(record.target,
782 addr,
783 record.priority,
784 record.weight,
785 false);
11fdf7f2
TL
786 });
787 });
788 }).handle_exception_type([](const std::system_error& e) {
789 // ignore DNS failures
790 return seastar::make_ready_future<>();
791 });
792}
7c673cae 793
9f95a23c 794seastar::future<> MonMap::build_monmap(const crimson::common::ConfigProxy& conf,
11fdf7f2
TL
795 bool for_mkfs)
796{
797 // -m foo?
798 if (const auto mon_host = conf.get_val<std::string>("mon_host");
799 !mon_host.empty()) {
800 if (auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); ret == 0) {
801 return make_ready_future<>();
7c673cae 802 }
11fdf7f2
TL
803 // TODO: resolve_addrs() is a blocking call
804 if (auto ret = init_with_hosts(mon_host, for_mkfs, "noname-"); ret == 0) {
805 return make_ready_future<>();
806 } else {
807 throw std::runtime_error(cpp_strerror(ret));
808 }
809 }
7c673cae 810
11fdf7f2
TL
811 // What monitors are in the config file?
812 ostringstream errout;
813 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
814 throw std::runtime_error(errout.str());
815 }
816 if (size() > 0) {
817 return make_ready_future<>();
818 }
819 // no info found from conf options lets try use DNS SRV records
820 const string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
821 return init_with_dns_srv(for_mkfs, srv_name).then([this] {
822 if (size() == 0) {
823 throw std::runtime_error("no monitors specified to connect to.");
824 }
825 });
826}
7c673cae 827
f67539c2 828seastar::future<> MonMap::build_initial(const crimson::common::ConfigProxy& conf, bool for_mkfs)
11fdf7f2
TL
829{
830 // file?
831 if (const auto monmap = conf.get_val<std::string>("monmap");
832 !monmap.empty()) {
833 return read_monmap(monmap);
834 } else {
835 // fsid from conf?
836 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
837 !new_fsid.is_zero()) {
838 fsid = new_fsid;
7c673cae 839 }
11fdf7f2
TL
840 return build_monmap(conf, for_mkfs).then([this] {
841 created = ceph_clock_now();
842 last_changed = created;
843 calc_legacy_ranks();
844 });
845 }
846}
847
848#else // WITH_SEASTAR
849
850int MonMap::init_with_monmap(const std::string& monmap, std::ostream& errout)
851{
852 int r;
853 try {
854 r = read(monmap.c_str());
9f95a23c 855 } catch (ceph::buffer::error&) {
11fdf7f2
TL
856 r = -EINVAL;
857 }
858 if (r >= 0)
859 return 0;
860 errout << "unable to read/decode monmap from " << monmap
861 << ": " << cpp_strerror(-r) << std::endl;
862 return r;
863}
864
865int MonMap::init_with_dns_srv(CephContext* cct,
866 std::string srv_name,
867 bool for_mkfs,
868 std::ostream& errout)
869{
870 string domain;
871 // check if domain is also provided and extract it from srv_name
872 size_t idx = srv_name.find("_");
873 if (idx != string::npos) {
874 domain = srv_name.substr(idx + 1);
875 srv_name = srv_name.substr(0, idx);
876 }
877
878 map<string, DNSResolver::Record> records;
879 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
880 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
881
882 errout << "unable to get monitor info from DNS SRV with service name: "
883 << "ceph-mon" << std::endl;
884 return -1;
885 } else {
886 for (auto& record : records) {
887 record.second.addr.set_type(entity_addr_t::TYPE_ANY);
9f95a23c
TL
888 _add_ambiguous_addr(record.first,
889 record.second.addr,
890 record.second.priority,
891 record.second.weight,
892 false);
7c673cae 893 }
11fdf7f2
TL
894 return 0;
895 }
896}
897
898int MonMap::build_initial(CephContext *cct, bool for_mkfs, ostream& errout)
899{
900 const auto& conf = cct->_conf;
f91f0fd5
TL
901
902 // mon_host_override?
903 auto mon_host_override = conf.get_val<std::string>("mon_host_override");
904 if (!mon_host_override.empty()) {
905 lgeneric_dout(cct, 1) << "Using mon_host_override " << mon_host_override << dendl;
906 auto ret = init_with_ips(mon_host_override, for_mkfs, "noname-");
907 if (ret == -EINVAL) {
908 ret = init_with_hosts(mon_host_override, for_mkfs, "noname-");
909 }
910 if (ret < 0) {
911 errout << "unable to parse addrs in '" << mon_host_override << "'"
912 << std::endl;
913 }
914 return ret;
915 }
916
917 // cct?
918 auto addrs = cct->get_mon_addrs();
919 if (addrs != nullptr && (addrs->size() > 0)) {
920 init_with_addrs(*addrs, for_mkfs, "noname-");
921 return 0;
922 }
923
11fdf7f2
TL
924 // file?
925 if (const auto monmap = conf.get_val<std::string>("monmap");
926 !monmap.empty()) {
927 return init_with_monmap(monmap, errout);
7c673cae
FG
928 }
929
11fdf7f2
TL
930 // fsid from conf?
931 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
932 !new_fsid.is_zero()) {
933 fsid = new_fsid;
934 }
935 // -m foo?
936 if (const auto mon_host = conf.get_val<std::string>("mon_host");
937 !mon_host.empty()) {
938 auto ret = init_with_ips(mon_host, for_mkfs, "noname-");
939 if (ret == -EINVAL) {
940 ret = init_with_hosts(mon_host, for_mkfs, "noname-");
941 }
942 if (ret < 0) {
943 errout << "unable to parse addrs in '" << mon_host << "'"
944 << std::endl;
945 return ret;
946 }
947 }
948 if (size() == 0) {
949 // What monitors are in the config file?
950 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
951 return ret;
952 }
953 }
954 if (size() == 0) {
955 // no info found from conf options lets try use DNS SRV records
956 string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
957 if (auto ret = init_with_dns_srv(cct, srv_name, for_mkfs, errout); ret < 0) {
958 return -ENOENT;
959 }
960 }
7c673cae
FG
961 if (size() == 0) {
962 errout << "no monitors specified to connect to." << std::endl;
963 return -ENOENT;
964 }
f67539c2 965 strategy = static_cast<election_strategy>(conf.get_val<uint64_t>("mon_election_default_strategy"));
7c673cae
FG
966 created = ceph_clock_now();
967 last_changed = created;
11fdf7f2 968 calc_legacy_ranks();
7c673cae
FG
969 return 0;
970}
11fdf7f2 971#endif // WITH_SEASTAR