]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / mon / MonMap.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
7c673cae
FG
3
4#include "MonMap.h"
5
6#include <algorithm>
20effc67 7#include <sstream>
7c673cae
FG
8#include <sys/types.h>
9#include <sys/stat.h>
10#include <fcntl.h>
11
11fdf7f2
TL
12#ifdef WITH_SEASTAR
13#include <seastar/core/fstream.hh>
14#include <seastar/core/reactor.hh>
15#include <seastar/net/dns.hh>
16#include "crimson/common/config_proxy.h"
17#endif
18
7c673cae
FG
19#include "common/Formatter.h"
20
21#include "include/ceph_features.h"
22#include "include/addr_parsing.h"
23#include "common/ceph_argparse.h"
24#include "common/dns_resolve.h"
25#include "common/errno.h"
7c673cae 26#include "common/dout.h"
11fdf7f2 27#include "common/Clock.h"
f67539c2 28#include "mon/health_check.h"
7c673cae 29
9f95a23c
TL
30using std::list;
31using std::map;
32using std::ostream;
20effc67 33using std::ostringstream;
9f95a23c
TL
34using std::set;
35using std::string;
36using std::vector;
37
38using ceph::DNSResolver;
7c673cae
FG
39using ceph::Formatter;
40
20effc67
TL
41#ifdef WITH_SEASTAR
42namespace {
43 seastar::logger& logger()
44 {
45 return crimson::get_logger(ceph_subsys_monc);
46 }
47}
48#endif
49
9f95a23c 50void mon_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 51{
f67539c2
TL
52 uint8_t v = 5;
53 uint8_t min_v = 1;
54 if (!crush_loc.empty()) {
55 // we added crush_loc in version 5, but need to let old clients decode it
a4b75251 56 // so just leave the min_v at version 1. Monitors are protected
f67539c2
TL
57 // from misunderstandings about location because setting it is blocked
58 // on FEATURE_PINGING
a4b75251 59 min_v = 1;
f67539c2 60 }
11fdf7f2
TL
61 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
62 v = 2;
63 }
f67539c2 64 ENCODE_START(v, min_v, bl);
11fdf7f2
TL
65 encode(name, bl);
66 if (v < 3) {
f67539c2 67 ceph_assert(min_v == 1);
eafe8130
TL
68 auto a = public_addrs.legacy_addr();
69 if (a != entity_addr_t()) {
70 encode(a, bl, features);
71 } else {
72 // note: we don't have a legacy addr here, so lie so that it looks
73 // like one, just so that old clients get a valid-looking map.
74 // they won't be able to talk to the v2 mons, but that's better
75 // than nothing.
76 encode(public_addrs.as_legacy_addr(), bl, features);
77 }
11fdf7f2
TL
78 } else {
79 encode(public_addrs, bl, features);
80 }
81 encode(priority, bl);
9f95a23c 82 encode(weight, bl);
f67539c2 83 encode(crush_loc, bl);
7c673cae
FG
84 ENCODE_FINISH(bl);
85}
86
9f95a23c 87void mon_info_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae 88{
f67539c2 89 DECODE_START(5, p);
11fdf7f2
TL
90 decode(name, p);
91 decode(public_addrs, p);
224ce89b 92 if (struct_v >= 2) {
11fdf7f2 93 decode(priority, p);
224ce89b 94 }
9f95a23c
TL
95 if (struct_v >= 4) {
96 decode(weight, p);
97 }
f67539c2
TL
98 if (struct_v >= 5) {
99 decode(crush_loc, p);
100 }
7c673cae
FG
101 DECODE_FINISH(p);
102}
103
104void mon_info_t::print(ostream& out) const
105{
106 out << "mon." << name
11fdf7f2 107 << " addrs " << public_addrs
9f95a23c 108 << " priority " << priority
f67539c2
TL
109 << " weight " << weight
110 << " crush location " << crush_loc;
7c673cae
FG
111}
112
7c673cae
FG
113namespace {
114 struct rank_cmp {
115 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
11fdf7f2 116 if (a.public_addrs.legacy_or_front_addr() == b.public_addrs.legacy_or_front_addr())
7c673cae 117 return a.name < b.name;
11fdf7f2 118 return a.public_addrs.legacy_or_front_addr() < b.public_addrs.legacy_or_front_addr();
7c673cae
FG
119 }
120 };
121}
122
11fdf7f2
TL
123void MonMap::calc_legacy_ranks()
124{
7c673cae 125 ranks.resize(mon_info.size());
7c673cae
FG
126
127 // Used to order entries according to public_addr, because that's
128 // how the ranks are expected to be ordered by. We may expand this
129 // later on, according to some other criteria, by specifying a
130 // different comparator.
131 //
132 // Please note that we use a 'set' here instead of resorting to
133 // std::sort() because we need more info than that's available in
134 // the vector. The vector will thus be ordered by, e.g., public_addr
135 // while only containing the names of each individual monitor.
136 // The only way of achieving this with std::sort() would be to first
137 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
138 // with custom comparison functions, and then copy each invidual entry
139 // to a new vector. Unless there's a simpler way, we don't think the
140 // added complexity makes up for the additional memory usage of a 'set'.
141 set<mon_info_t, rank_cmp> tmp;
142
9f95a23c 143 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
144 mon_info_t &m = p->second;
145 tmp.insert(m);
7c673cae
FG
146 }
147
148 // map the set to the actual ranks etc
149 unsigned i = 0;
9f95a23c 150 for (auto p = tmp.begin(); p != tmp.end(); ++p, ++i) {
7c673cae
FG
151 ranks[i] = p->name;
152 }
153}
154
9f95a23c 155void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const
7c673cae 156{
7c673cae 157 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
11fdf7f2 158 using ceph::encode;
7c673cae 159 __u16 v = 1;
11fdf7f2 160 encode(v, blist);
9f95a23c 161 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
162 encode(epoch, blist);
163 vector<entity_inst_t> mon_inst(ranks.size());
164 for (unsigned n = 0; n < ranks.size(); n++) {
165 mon_inst[n].name = entity_name_t::MON(n);
166 mon_inst[n].addr = get_addrs(n).legacy_addr();
167 }
168 encode(mon_inst, blist, con_features);
169 encode(last_changed, blist);
170 encode(created, blist);
7c673cae
FG
171 return;
172 }
173
11fdf7f2
TL
174 map<string,entity_addr_t> legacy_mon_addr;
175 if (!HAVE_FEATURE(con_features, MONENC) ||
176 !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
177 for (auto& [name, info] : mon_info) {
178 legacy_mon_addr[name] = info.public_addrs.legacy_addr();
179 }
180 }
181
182 if (!HAVE_FEATURE(con_features, MONENC)) {
183 /* we keep the mon_addr map when encoding to ensure compatibility
184 * with clients and other monitors that do not yet support the 'mons'
185 * map. This map keeps its original behavior, containing a mapping of
186 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
187 * address -- which is obtained from the public address of each entry
188 * in the 'mons' map.
189 */
190 using ceph::encode;
7c673cae 191 __u16 v = 2;
11fdf7f2 192 encode(v, blist);
9f95a23c 193 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
194 encode(epoch, blist);
195 encode(legacy_mon_addr, blist, con_features);
196 encode(last_changed, blist);
197 encode(created, blist);
198 return;
199 }
200
201 if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
202 ENCODE_START(5, 3, blist);
9f95a23c 203 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
204 encode(epoch, blist);
205 encode(legacy_mon_addr, blist, con_features);
206 encode(last_changed, blist);
207 encode(created, blist);
208 encode(persistent_features, blist);
209 encode(optional_features, blist);
210 encode(mon_info, blist, con_features);
211 ENCODE_FINISH(blist);
212 return;
213 }
214
f67539c2 215 ENCODE_START(9, 6, blist);
9f95a23c 216 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
217 encode(epoch, blist);
218 encode(last_changed, blist);
219 encode(created, blist);
220 encode(persistent_features, blist);
221 encode(optional_features, blist);
222 encode(mon_info, blist, con_features);
223 encode(ranks, blist);
224 encode(min_mon_release, blist);
f67539c2
TL
225 encode(removed_ranks, blist);
226 uint8_t t = strategy;
227 encode(t, blist);
228 encode(disallowed_leaders, blist);
229 encode(stretch_mode_enabled, blist);
230 encode(tiebreaker_mon, blist);
231 encode(stretch_marked_down_mons, blist);
7c673cae
FG
232 ENCODE_FINISH(blist);
233}
234
9f95a23c 235void MonMap::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
236{
237 map<string,entity_addr_t> mon_addr;
f67539c2 238 DECODE_START_LEGACY_COMPAT_LEN_16(9, 3, 3, p);
9f95a23c 239 ceph::decode_raw(fsid, p);
11fdf7f2 240 decode(epoch, p);
7c673cae
FG
241 if (struct_v == 1) {
242 vector<entity_inst_t> mon_inst;
11fdf7f2 243 decode(mon_inst, p);
7c673cae
FG
244 for (unsigned i = 0; i < mon_inst.size(); i++) {
245 char n[2];
246 n[0] = '0' + i;
247 n[1] = 0;
248 string name = n;
249 mon_addr[name] = mon_inst[i].addr;
250 }
11fdf7f2
TL
251 } else if (struct_v < 6) {
252 decode(mon_addr, p);
7c673cae 253 }
11fdf7f2
TL
254 decode(last_changed, p);
255 decode(created, p);
7c673cae 256 if (struct_v >= 4) {
11fdf7f2
TL
257 decode(persistent_features, p);
258 decode(optional_features, p);
7c673cae 259 }
11fdf7f2
TL
260 if (struct_v < 5) {
261 // generate mon_info from legacy mon_addr
262 for (auto& [name, addr] : mon_addr) {
263 mon_info_t &m = mon_info[name];
264 m.name = name;
265 m.public_addrs = entity_addrvec_t(addr);
266 }
267 } else {
268 decode(mon_info, p);
269 }
270 if (struct_v < 6) {
271 calc_legacy_ranks();
272 } else {
273 decode(ranks, p);
274 }
275 if (struct_v >= 7) {
276 decode(min_mon_release, p);
7c673cae 277 } else {
11fdf7f2 278 min_mon_release = infer_ceph_release_from_mon_features(persistent_features);
7c673cae 279 }
f67539c2
TL
280 if (struct_v >= 8) {
281 decode(removed_ranks, p);
282 uint8_t t;
283 decode(t, p);
284 strategy = static_cast<election_strategy>(t);
285 decode(disallowed_leaders, p);
286 }
287 if (struct_v >= 9) {
288 decode(stretch_mode_enabled, p);
289 decode(tiebreaker_mon, p);
290 decode(stretch_marked_down_mons, p);
291 } else {
292 stretch_mode_enabled = false;
293 tiebreaker_mon = "";
294 stretch_marked_down_mons.clear();
295 }
11fdf7f2 296 calc_addr_mons();
7c673cae 297 DECODE_FINISH(p);
7c673cae
FG
298}
299
300void MonMap::generate_test_instances(list<MonMap*>& o)
301{
302 o.push_back(new MonMap);
303 o.push_back(new MonMap);
304 o.back()->epoch = 1;
305 o.back()->last_changed = utime_t(123, 456);
306 o.back()->created = utime_t(789, 101112);
11fdf7f2 307 o.back()->add("one", entity_addrvec_t());
7c673cae
FG
308
309 MonMap *m = new MonMap;
310 {
311 m->epoch = 1;
312 m->last_changed = utime_t(123, 456);
313
11fdf7f2
TL
314 entity_addrvec_t empty_addr_one = entity_addrvec_t(entity_addr_t());
315 empty_addr_one.v[0].set_nonce(1);
7c673cae 316 m->add("empty_addr_one", empty_addr_one);
11fdf7f2
TL
317 entity_addrvec_t empty_addr_two = entity_addrvec_t(entity_addr_t());
318 empty_addr_two.v[0].set_nonce(2);
319 m->add("empty_addr_two", empty_addr_two);
7c673cae
FG
320
321 const char *local_pub_addr_s = "127.0.1.2";
322
323 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
11fdf7f2 324 entity_addrvec_t local_pub_addr;
7c673cae
FG
325 local_pub_addr.parse(local_pub_addr_s, &end_p);
326
9f95a23c 327 m->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr), 1, 1));
7c673cae 328
11fdf7f2 329 m->add("empty_addr_zero", entity_addrvec_t());
7c673cae
FG
330 }
331 o.push_back(m);
332}
333
334// read from/write to a file
335int MonMap::write(const char *fn)
336{
337 // encode
9f95a23c 338 ceph::buffer::list bl;
7c673cae
FG
339 encode(bl, CEPH_FEATURES_ALL);
340
341 return bl.write_file(fn);
342}
343
344int MonMap::read(const char *fn)
345{
346 // read
9f95a23c 347 ceph::buffer::list bl;
7c673cae
FG
348 std::string error;
349 int r = bl.read_file(fn, &error);
350 if (r < 0)
351 return r;
352 decode(bl);
353 return 0;
354}
355
356void MonMap::print_summary(ostream& out) const
357{
358 out << "e" << epoch << ": "
359 << mon_info.size() << " mons at {";
360 // the map that we used to print, as it was, no longer
361 // maps strings to the monitor's public address, but to
362 // mon_info_t instead. As such, print the map in a way
363 // that keeps the expected format.
364 bool has_printed = false;
9f95a23c 365 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
366 if (has_printed)
367 out << ",";
11fdf7f2 368 out << p->first << "=" << p->second.public_addrs;
7c673cae
FG
369 has_printed = true;
370 }
39ae355f 371 out << "}" << " removed_ranks: {" << removed_ranks << "}";
7c673cae
FG
372}
373
374void MonMap::print(ostream& out) const
375{
376 out << "epoch " << epoch << "\n";
377 out << "fsid " << fsid << "\n";
378 out << "last_changed " << last_changed << "\n";
379 out << "created " << created << "\n";
f67539c2 380 out << "min_mon_release " << to_integer<unsigned>(min_mon_release)
9f95a23c 381 << " (" << min_mon_release << ")\n";
f67539c2 382 out << "election_strategy: " << strategy << "\n";
a4b75251
TL
383 if (stretch_mode_enabled) {
384 out << "stretch_mode_enabled " << stretch_mode_enabled << "\n";
385 out << "tiebreaker_mon " << tiebreaker_mon << "\n";
386 }
387 if (stretch_mode_enabled ||
388 !disallowed_leaders.empty()) {
f67539c2
TL
389 out << "disallowed_leaders " << disallowed_leaders << "\n";
390 }
7c673cae 391 unsigned i = 0;
9f95a23c 392 for (auto p = ranks.begin(); p != ranks.end(); ++p) {
f67539c2
TL
393 const auto &mi = mon_info.find(*p);
394 ceph_assert(mi != mon_info.end());
395 out << i++ << ": " << mi->second.public_addrs << " mon." << *p;
396 if (!mi->second.crush_loc.empty()) {
397 out << "; crush_location " << mi->second.crush_loc;
398 }
399 out << "\n";
7c673cae
FG
400 }
401}
402
403void MonMap::dump(Formatter *f) const
404{
405 f->dump_unsigned("epoch", epoch);
406 f->dump_stream("fsid") << fsid;
9f95a23c
TL
407 last_changed.gmtime(f->dump_stream("modified"));
408 created.gmtime(f->dump_stream("created"));
f67539c2
TL
409 f->dump_unsigned("min_mon_release", to_integer<unsigned>(min_mon_release));
410 f->dump_string("min_mon_release_name", to_string(min_mon_release));
411 f->dump_int ("election_strategy", strategy);
412 f->dump_stream("disallowed_leaders: ") << disallowed_leaders;
413 f->dump_bool("stretch_mode", stretch_mode_enabled);
a4b75251 414 f->dump_string("tiebreaker_mon", tiebreaker_mon);
39ae355f 415 f->dump_stream("removed_ranks: ") << removed_ranks;
7c673cae
FG
416 f->open_object_section("features");
417 persistent_features.dump(f, "persistent");
418 optional_features.dump(f, "optional");
419 f->close_section();
420 f->open_array_section("mons");
421 int i = 0;
9f95a23c 422 for (auto p = ranks.begin(); p != ranks.end(); ++p, ++i) {
7c673cae
FG
423 f->open_object_section("mon");
424 f->dump_int("rank", i);
425 f->dump_string("name", *p);
11fdf7f2
TL
426 f->dump_object("public_addrs", get_addrs(*p));
427 // compat: make these look like pre-nautilus entity_addr_t
428 f->dump_stream("addr") << get_addrs(*p).get_legacy_str();
429 f->dump_stream("public_addr") << get_addrs(*p).get_legacy_str();
9f95a23c
TL
430 f->dump_unsigned("priority", get_priority(*p));
431 f->dump_unsigned("weight", get_weight(*p));
f67539c2
TL
432 const auto &mi = mon_info.find(*p);
433 // we don't need to assert this validity as all the get_* functions did
434 f->dump_stream("crush_location") << mi->second.crush_loc;
7c673cae
FG
435 f->close_section();
436 }
437 f->close_section();
438}
439
9f95a23c
TL
440void MonMap::dump_summary(Formatter *f) const
441{
442 f->dump_unsigned("epoch", epoch);
f67539c2 443 f->dump_string("min_mon_release_name", to_string(min_mon_release));
9f95a23c
TL
444 f->dump_unsigned("num_mons", ranks.size());
445}
446
11fdf7f2
TL
447// an ambiguous mon addr may be legacy or may be msgr2--we aren' sure.
448// when that happens we need to try them both (unless we can
449// reasonably infer from the port number which it is).
450void MonMap::_add_ambiguous_addr(const string& name,
9f95a23c
TL
451 entity_addr_t addr,
452 int priority,
453 int weight,
454 bool for_mkfs)
11fdf7f2
TL
455{
456 if (addr.get_type() != entity_addr_t::TYPE_ANY) {
457 // a v1: or v2: prefix was specified
458 if (addr.get_port() == 0) {
459 // use default port
9f95a23c 460 if (addr.get_type() == entity_addr_t::TYPE_LEGACY) {
11fdf7f2
TL
461 addr.set_port(CEPH_MON_PORT_LEGACY);
462 } else if (addr.get_type() == entity_addr_t::TYPE_MSGR2) {
463 addr.set_port(CEPH_MON_PORT_IANA);
464 } else {
465 // wth
466 return;
467 }
468 if (!contains(addr)) {
9f95a23c 469 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
470 }
471 } else {
472 if (!contains(addr)) {
9f95a23c 473 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
474 }
475 }
476 } else {
477 // no v1: or v2: prefix specified
478 if (addr.get_port() == CEPH_MON_PORT_LEGACY) {
479 // legacy port implies legacy addr
480 addr.set_type(entity_addr_t::TYPE_LEGACY);
481 if (!contains(addr)) {
482 if (!for_mkfs) {
9f95a23c 483 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2 484 } else {
9f95a23c 485 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
486 }
487 }
488 } else if (addr.get_port() == CEPH_MON_PORT_IANA) {
489 // iana port implies msgr2 addr
490 addr.set_type(entity_addr_t::TYPE_MSGR2);
491 if (!contains(addr)) {
9f95a23c 492 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
493 }
494 } else if (addr.get_port() == 0) {
495 // no port; include both msgr2 and legacy ports
496 if (!for_mkfs) {
497 addr.set_type(entity_addr_t::TYPE_MSGR2);
498 addr.set_port(CEPH_MON_PORT_IANA);
499 if (!contains(addr)) {
9f95a23c 500 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
501 }
502 addr.set_type(entity_addr_t::TYPE_LEGACY);
503 addr.set_port(CEPH_MON_PORT_LEGACY);
504 if (!contains(addr)) {
9f95a23c 505 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
506 }
507 } else {
508 entity_addrvec_t av;
509 addr.set_type(entity_addr_t::TYPE_MSGR2);
510 addr.set_port(CEPH_MON_PORT_IANA);
511 av.v.push_back(addr);
512 addr.set_type(entity_addr_t::TYPE_LEGACY);
513 addr.set_port(CEPH_MON_PORT_LEGACY);
514 av.v.push_back(addr);
515 if (!contains(av)) {
9f95a23c 516 add(name, av, priority, weight);
11fdf7f2
TL
517 }
518 }
519 } else {
520 addr.set_type(entity_addr_t::TYPE_MSGR2);
521 if (!contains(addr)) {
9f95a23c 522 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
523 }
524 if (!for_mkfs) {
525 // try legacy on same port too
526 addr.set_type(entity_addr_t::TYPE_LEGACY);
527 if (!contains(addr)) {
9f95a23c 528 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
529 }
530 }
531 }
532 }
533}
7c673cae 534
f91f0fd5
TL
535void MonMap::init_with_addrs(const std::vector<entity_addrvec_t>& addrs,
536 bool for_mkfs,
537 std::string_view prefix)
538{
539 char id = 'a';
540 for (auto& addr : addrs) {
541 string name{prefix};
542 name += id++;
543 if (addr.v.size() == 1) {
544 _add_ambiguous_addr(name, addr.front(), 0, 0, for_mkfs);
545 } else {
546 // they specified an addrvec, so let's assume they also specified
547 // the addr *type* and *port*. (we could possibly improve this?)
548 add(name, addr, 0);
549 }
550 }
551}
552
11fdf7f2
TL
553int MonMap::init_with_ips(const std::string& ips,
554 bool for_mkfs,
f91f0fd5 555 std::string_view prefix)
7c673cae 556{
11fdf7f2
TL
557 vector<entity_addrvec_t> addrs;
558 if (!parse_ip_port_vec(
559 ips.c_str(), addrs,
560 entity_addr_t::TYPE_ANY)) {
561 return -EINVAL;
562 }
563 if (addrs.empty())
564 return -ENOENT;
f91f0fd5 565 init_with_addrs(addrs, for_mkfs, prefix);
11fdf7f2
TL
566 return 0;
567}
7c673cae 568
11fdf7f2
TL
569int MonMap::init_with_hosts(const std::string& hostlist,
570 bool for_mkfs,
f91f0fd5 571 std::string_view prefix)
11fdf7f2 572{
7c673cae 573 // maybe they passed us a DNS-resolvable name
11fdf7f2 574 char *hosts = resolve_addrs(hostlist.c_str());
7c673cae
FG
575 if (!hosts)
576 return -EINVAL;
11fdf7f2
TL
577
578 vector<entity_addrvec_t> addrs;
579 bool success = parse_ip_port_vec(
580 hosts, addrs,
92f5a8d4 581 entity_addr_t::TYPE_ANY);
7c673cae
FG
582 free(hosts);
583 if (!success)
584 return -EINVAL;
7c673cae
FG
585 if (addrs.empty())
586 return -ENOENT;
f91f0fd5 587 init_with_addrs(addrs, for_mkfs, prefix);
11fdf7f2 588 calc_legacy_ranks();
7c673cae
FG
589 return 0;
590}
591
592void MonMap::set_initial_members(CephContext *cct,
593 list<std::string>& initial_members,
11fdf7f2
TL
594 string my_name,
595 const entity_addrvec_t& my_addrs,
596 set<entity_addrvec_t> *removed)
7c673cae
FG
597{
598 // remove non-initial members
599 unsigned i = 0;
600 while (i < size()) {
601 string n = get_name(i);
11fdf7f2
TL
602 if (std::find(initial_members.begin(), initial_members.end(), n)
603 != initial_members.end()) {
604 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addrs(i) << dendl;
7c673cae
FG
605 i++;
606 continue;
607 }
608
11fdf7f2
TL
609 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addrs(i)
610 << dendl;
611 if (removed) {
612 removed->insert(get_addrs(i));
613 }
7c673cae 614 remove(n);
11fdf7f2 615 ceph_assert(!contains(n));
7c673cae
FG
616 }
617
618 // add missing initial members
11fdf7f2
TL
619 for (auto& p : initial_members) {
620 if (!contains(p)) {
621 if (p == my_name) {
622 lgeneric_dout(cct, 1) << " adding self " << p << " " << my_addrs
623 << dendl;
624 add(p, my_addrs);
7c673cae
FG
625 } else {
626 entity_addr_t a;
627 a.set_type(entity_addr_t::TYPE_LEGACY);
628 a.set_family(AF_INET);
629 for (int n=1; ; n++) {
630 a.set_nonce(n);
631 if (!contains(a))
632 break;
633 }
11fdf7f2
TL
634 lgeneric_dout(cct, 1) << " adding " << p << " " << a << dendl;
635 add(p, entity_addrvec_t(a));
7c673cae 636 }
11fdf7f2 637 ceph_assert(contains(p));
7c673cae
FG
638 }
639 }
11fdf7f2 640 calc_legacy_ranks();
7c673cae
FG
641}
642
11fdf7f2
TL
643int MonMap::init_with_config_file(const ConfigProxy& conf,
644 std::ostream& errout)
7c673cae 645{
11fdf7f2
TL
646 std::vector<std::string> sections;
647 int ret = conf.get_all_sections(sections);
7c673cae
FG
648 if (ret) {
649 errout << "Unable to find any monitors in the configuration "
650 << "file, because there was an error listing the sections. error "
651 << ret << std::endl;
652 return -ENOENT;
653 }
11fdf7f2
TL
654 std::vector<std::string> mon_names;
655 for (const auto& section : sections) {
656 if (section.substr(0, 4) == "mon." && section.size() > 4) {
657 mon_names.push_back(section.substr(4));
7c673cae
FG
658 }
659 }
660
661 // Find an address for each monitor in the config file.
11fdf7f2
TL
662 for (const auto& mon_name : mon_names) {
663 std::vector<std::string> sections;
7c673cae
FG
664 std::string m_name("mon");
665 m_name += ".";
11fdf7f2 666 m_name += mon_name;
7c673cae
FG
667 sections.push_back(m_name);
668 sections.push_back("mon");
669 sections.push_back("global");
670 std::string val;
11fdf7f2 671 int res = conf.get_val_from_conf_file(sections, "mon addr", val, true);
7c673cae 672 if (res) {
11fdf7f2
TL
673 errout << "failed to get an address for mon." << mon_name
674 << ": error " << res << std::endl;
7c673cae
FG
675 continue;
676 }
11fdf7f2
TL
677 // the 'mon addr' field is a legacy field, so assume anything
678 // there on a weird port is a v1 address, and do not handle
679 // addrvecs.
7c673cae 680 entity_addr_t addr;
20effc67 681 if (!addr.parse(val, entity_addr_t::TYPE_LEGACY)) {
11fdf7f2
TL
682 errout << "unable to parse address for mon." << mon_name
683 << ": addr='" << val << "'" << std::endl;
7c673cae
FG
684 continue;
685 }
11fdf7f2
TL
686 if (addr.get_port() == 0) {
687 addr.set_port(CEPH_MON_PORT_LEGACY);
688 }
224ce89b 689 uint16_t priority = 0;
11fdf7f2 690 if (!conf.get_val_from_conf_file(sections, "mon priority", val, false)) {
224ce89b
WB
691 try {
692 priority = std::stoul(val);
693 } catch (std::logic_error&) {
11fdf7f2 694 errout << "unable to parse priority for mon." << mon_name
224ce89b
WB
695 << ": priority='" << val << "'" << std::endl;
696 continue;
697 }
698 }
9f95a23c
TL
699 uint16_t weight = 0;
700 if (!conf.get_val_from_conf_file(sections, "mon weight", val, false)) {
701 try {
702 weight = std::stoul(val);
703 } catch (std::logic_error&) {
704 errout << "unable to parse weight for mon." << mon_name
705 << ": weight='" << val << "'"
706 << std::endl;
707 continue;
708 }
709 }
11fdf7f2 710
9f95a23c 711 // make sure this mon isn't already in the map
7c673cae
FG
712 if (contains(addr))
713 remove(get_name(addr));
11fdf7f2
TL
714 if (contains(mon_name))
715 remove(mon_name);
9f95a23c 716 _add_ambiguous_addr(mon_name, addr, priority, weight, false);
11fdf7f2
TL
717 }
718 return 0;
719}
720
f67539c2
TL
721void MonMap::check_health(health_check_map_t *checks) const
722{
723 if (stretch_mode_enabled) {
724 list<string> detail;
725 for (auto& p : mon_info) {
726 if (p.second.crush_loc.empty()) {
727 ostringstream ss;
728 ss << "mon " << p.first << " has no location set while in stretch mode";
729 detail.push_back(ss.str());
730 }
731 }
732 if (!detail.empty()) {
733 ostringstream ss;
734 ss << detail.size() << " monitor(s) have no location set while in stretch mode"
735 << "; this may cause issues with failover, OSD connections, netsplit handling, etc";
736 auto& d = checks->add("MON_LOCATION_NOT_SET", HEALTH_WARN,
737 ss.str(), detail.size());
738 d.detail.swap(detail);
739 }
740 }
741}
742
11fdf7f2
TL
743#ifdef WITH_SEASTAR
744
f67539c2 745seastar::future<> MonMap::read_monmap(const std::string& monmap)
11fdf7f2 746{
20effc67 747 using namespace seastar;
11fdf7f2
TL
748 return open_file_dma(monmap, open_flags::ro).then([this] (file f) {
749 return f.size().then([this, f = std::move(f)](size_t s) {
750 return do_with(make_file_input_stream(f), [this, s](input_stream<char>& in) {
751 return in.read_exactly(s).then([this](temporary_buffer<char> buf) {
9f95a23c
TL
752 ceph::buffer::list bl;
753 bl.push_back(ceph::buffer::ptr_node::create(
754 ceph::buffer::create(std::move(buf))));
11fdf7f2
TL
755 decode(bl);
756 });
757 });
758 });
759 });
760}
7c673cae 761
f67539c2 762seastar::future<> MonMap::init_with_dns_srv(bool for_mkfs, const std::string& name)
11fdf7f2 763{
20effc67 764 logger().debug("{}: for_mkfs={}, name={}", __func__, for_mkfs, name);
11fdf7f2
TL
765 string domain;
766 string service = name;
767 // check if domain is also provided and extract it from srv_name
768 size_t idx = name.find("_");
769 if (idx != name.npos) {
770 domain = name.substr(idx + 1);
771 service = name.substr(0, idx);
7c673cae 772 }
9f95a23c
TL
773 return seastar::net::dns::get_srv_records(
774 seastar::net::dns_resolver::srv_proto::tcp,
775 service, domain).then([this](seastar::net::dns_resolver::srv_records records) {
20effc67 776 return seastar::parallel_for_each(records, [this](auto record) {
9f95a23c
TL
777 return seastar::net::dns::resolve_name(record.target).then(
778 [record,this](seastar::net::inet_address a) {
11fdf7f2
TL
779 // the resolved address does not contain ceph specific info like nonce
780 // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually
781 entity_addr_t addr;
782 addr.set_type(entity_addr_t::TYPE_ANY);
783 addr.set_family(int(a.in_family()));
784 addr.set_port(record.port);
785 switch (a.in_family()) {
9f95a23c 786 case seastar::net::inet_address::family::INET:
11fdf7f2
TL
787 addr.in4_addr().sin_addr = a;
788 break;
9f95a23c 789 case seastar::net::inet_address::family::INET6:
11fdf7f2
TL
790 addr.in6_addr().sin6_addr = a;
791 break;
792 }
9f95a23c
TL
793 _add_ambiguous_addr(record.target,
794 addr,
795 record.priority,
796 record.weight,
797 false);
20effc67
TL
798 }).handle_exception_type([t=record.target](const std::system_error& e) {
799 logger().debug("{}: unable to resolve name for {}: {}",
800 "init_with_dns_srv", t, e);
11fdf7f2
TL
801 });
802 });
20effc67
TL
803 }).handle_exception_type([name](const std::system_error& e) {
804 logger().debug("{}: unable to get monitor info from DNS SRV with {}: {}",
805 "init_with_dns_srv", name, e);
11fdf7f2
TL
806 // ignore DNS failures
807 return seastar::make_ready_future<>();
808 });
809}
7c673cae 810
20effc67
TL
811bool MonMap::maybe_init_with_mon_host(const std::string& mon_host,
812 const bool for_mkfs)
11fdf7f2 813{
20effc67 814 if (!mon_host.empty()) {
11fdf7f2 815 if (auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); ret == 0) {
20effc67 816 return true;
7c673cae 817 }
11fdf7f2
TL
818 // TODO: resolve_addrs() is a blocking call
819 if (auto ret = init_with_hosts(mon_host, for_mkfs, "noname-"); ret == 0) {
20effc67 820 return true;
11fdf7f2
TL
821 } else {
822 throw std::runtime_error(cpp_strerror(ret));
823 }
824 }
20effc67
TL
825 return false;
826}
827
828seastar::future<> MonMap::build_monmap(const crimson::common::ConfigProxy& conf,
829 bool for_mkfs)
830{
831 logger().debug("{}: for_mkfs={}", __func__, for_mkfs);
832 // -m foo?
833 if (maybe_init_with_mon_host(conf.get_val<std::string>("mon_host"), for_mkfs)) {
834 return seastar::make_ready_future<>();
835 }
7c673cae 836
11fdf7f2
TL
837 // What monitors are in the config file?
838 ostringstream errout;
839 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
840 throw std::runtime_error(errout.str());
841 }
842 if (size() > 0) {
20effc67 843 return seastar::make_ready_future<>();
11fdf7f2
TL
844 }
845 // no info found from conf options lets try use DNS SRV records
846 const string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
847 return init_with_dns_srv(for_mkfs, srv_name).then([this] {
848 if (size() == 0) {
849 throw std::runtime_error("no monitors specified to connect to.");
850 }
851 });
852}
7c673cae 853
f67539c2 854seastar::future<> MonMap::build_initial(const crimson::common::ConfigProxy& conf, bool for_mkfs)
11fdf7f2 855{
20effc67
TL
856 // mon_host_override?
857 if (maybe_init_with_mon_host(conf.get_val<std::string>("mon_host_override"),
858 for_mkfs)) {
859 return seastar::make_ready_future<>();
860 }
861
11fdf7f2
TL
862 // file?
863 if (const auto monmap = conf.get_val<std::string>("monmap");
864 !monmap.empty()) {
865 return read_monmap(monmap);
866 } else {
867 // fsid from conf?
868 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
869 !new_fsid.is_zero()) {
870 fsid = new_fsid;
7c673cae 871 }
11fdf7f2
TL
872 return build_monmap(conf, for_mkfs).then([this] {
873 created = ceph_clock_now();
874 last_changed = created;
875 calc_legacy_ranks();
876 });
877 }
878}
879
880#else // WITH_SEASTAR
881
882int MonMap::init_with_monmap(const std::string& monmap, std::ostream& errout)
883{
884 int r;
885 try {
886 r = read(monmap.c_str());
9f95a23c 887 } catch (ceph::buffer::error&) {
11fdf7f2
TL
888 r = -EINVAL;
889 }
890 if (r >= 0)
891 return 0;
892 errout << "unable to read/decode monmap from " << monmap
893 << ": " << cpp_strerror(-r) << std::endl;
894 return r;
895}
896
897int MonMap::init_with_dns_srv(CephContext* cct,
898 std::string srv_name,
899 bool for_mkfs,
900 std::ostream& errout)
901{
20effc67
TL
902 lgeneric_dout(cct, 1) << __func__ << " srv_name: " << srv_name << dendl;
903
11fdf7f2
TL
904 string domain;
905 // check if domain is also provided and extract it from srv_name
906 size_t idx = srv_name.find("_");
907 if (idx != string::npos) {
908 domain = srv_name.substr(idx + 1);
909 srv_name = srv_name.substr(0, idx);
910 }
911
912 map<string, DNSResolver::Record> records;
913 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
914 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
915
916 errout << "unable to get monitor info from DNS SRV with service name: "
917 << "ceph-mon" << std::endl;
918 return -1;
919 } else {
920 for (auto& record : records) {
921 record.second.addr.set_type(entity_addr_t::TYPE_ANY);
9f95a23c
TL
922 _add_ambiguous_addr(record.first,
923 record.second.addr,
924 record.second.priority,
925 record.second.weight,
926 false);
7c673cae 927 }
11fdf7f2
TL
928 return 0;
929 }
930}
931
932int MonMap::build_initial(CephContext *cct, bool for_mkfs, ostream& errout)
933{
20effc67 934 lgeneric_dout(cct, 1) << __func__ << " for_mkfs: " << for_mkfs << dendl;
11fdf7f2 935 const auto& conf = cct->_conf;
f91f0fd5
TL
936
937 // mon_host_override?
938 auto mon_host_override = conf.get_val<std::string>("mon_host_override");
939 if (!mon_host_override.empty()) {
940 lgeneric_dout(cct, 1) << "Using mon_host_override " << mon_host_override << dendl;
941 auto ret = init_with_ips(mon_host_override, for_mkfs, "noname-");
942 if (ret == -EINVAL) {
943 ret = init_with_hosts(mon_host_override, for_mkfs, "noname-");
944 }
945 if (ret < 0) {
946 errout << "unable to parse addrs in '" << mon_host_override << "'"
947 << std::endl;
948 }
949 return ret;
950 }
951
952 // cct?
953 auto addrs = cct->get_mon_addrs();
954 if (addrs != nullptr && (addrs->size() > 0)) {
955 init_with_addrs(*addrs, for_mkfs, "noname-");
956 return 0;
957 }
958
11fdf7f2
TL
959 // file?
960 if (const auto monmap = conf.get_val<std::string>("monmap");
961 !monmap.empty()) {
962 return init_with_monmap(monmap, errout);
7c673cae
FG
963 }
964
11fdf7f2
TL
965 // fsid from conf?
966 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
967 !new_fsid.is_zero()) {
968 fsid = new_fsid;
969 }
970 // -m foo?
971 if (const auto mon_host = conf.get_val<std::string>("mon_host");
972 !mon_host.empty()) {
973 auto ret = init_with_ips(mon_host, for_mkfs, "noname-");
974 if (ret == -EINVAL) {
975 ret = init_with_hosts(mon_host, for_mkfs, "noname-");
976 }
977 if (ret < 0) {
978 errout << "unable to parse addrs in '" << mon_host << "'"
979 << std::endl;
980 return ret;
981 }
982 }
983 if (size() == 0) {
984 // What monitors are in the config file?
985 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
986 return ret;
987 }
988 }
989 if (size() == 0) {
990 // no info found from conf options lets try use DNS SRV records
991 string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
992 if (auto ret = init_with_dns_srv(cct, srv_name, for_mkfs, errout); ret < 0) {
993 return -ENOENT;
994 }
995 }
7c673cae
FG
996 if (size() == 0) {
997 errout << "no monitors specified to connect to." << std::endl;
998 return -ENOENT;
999 }
f67539c2 1000 strategy = static_cast<election_strategy>(conf.get_val<uint64_t>("mon_election_default_strategy"));
7c673cae
FG
1001 created = ceph_clock_now();
1002 last_changed = created;
11fdf7f2 1003 calc_legacy_ranks();
7c673cae
FG
1004 return 0;
1005}
11fdf7f2 1006#endif // WITH_SEASTAR