]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.cc
bump version to 15.2.6-pve1
[ceph.git] / ceph / src / mon / MonMap.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
7c673cae
FG
3
4#include "MonMap.h"
5
6#include <algorithm>
7#include <sys/types.h>
8#include <sys/stat.h>
9#include <fcntl.h>
10
11fdf7f2
TL
11#ifdef WITH_SEASTAR
12#include <seastar/core/fstream.hh>
13#include <seastar/core/reactor.hh>
14#include <seastar/net/dns.hh>
15#include "crimson/common/config_proxy.h"
16#endif
17
7c673cae
FG
18#include "common/Formatter.h"
19
20#include "include/ceph_features.h"
21#include "include/addr_parsing.h"
22#include "common/ceph_argparse.h"
23#include "common/dns_resolve.h"
24#include "common/errno.h"
7c673cae 25#include "common/dout.h"
11fdf7f2 26#include "common/Clock.h"
7c673cae 27
9f95a23c
TL
28using std::list;
29using std::map;
30using std::ostream;
31using std::set;
32using std::string;
33using std::vector;
34
35using ceph::DNSResolver;
7c673cae
FG
36using ceph::Formatter;
37
9f95a23c 38void mon_info_t::encode(ceph::buffer::list& bl, uint64_t features) const
7c673cae 39{
9f95a23c 40 uint8_t v = 4;
11fdf7f2
TL
41 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
42 v = 2;
43 }
44 ENCODE_START(v, 1, bl);
45 encode(name, bl);
46 if (v < 3) {
eafe8130
TL
47 auto a = public_addrs.legacy_addr();
48 if (a != entity_addr_t()) {
49 encode(a, bl, features);
50 } else {
51 // note: we don't have a legacy addr here, so lie so that it looks
52 // like one, just so that old clients get a valid-looking map.
53 // they won't be able to talk to the v2 mons, but that's better
54 // than nothing.
55 encode(public_addrs.as_legacy_addr(), bl, features);
56 }
11fdf7f2
TL
57 } else {
58 encode(public_addrs, bl, features);
59 }
60 encode(priority, bl);
9f95a23c 61 encode(weight, bl);
7c673cae
FG
62 ENCODE_FINISH(bl);
63}
64
9f95a23c 65void mon_info_t::decode(ceph::buffer::list::const_iterator& p)
7c673cae 66{
9f95a23c 67 DECODE_START(4, p);
11fdf7f2
TL
68 decode(name, p);
69 decode(public_addrs, p);
224ce89b 70 if (struct_v >= 2) {
11fdf7f2 71 decode(priority, p);
224ce89b 72 }
9f95a23c
TL
73 if (struct_v >= 4) {
74 decode(weight, p);
75 }
7c673cae
FG
76 DECODE_FINISH(p);
77}
78
79void mon_info_t::print(ostream& out) const
80{
81 out << "mon." << name
11fdf7f2 82 << " addrs " << public_addrs
9f95a23c
TL
83 << " priority " << priority
84 << " weight " << weight;
7c673cae
FG
85}
86
7c673cae
FG
87namespace {
88 struct rank_cmp {
89 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
11fdf7f2 90 if (a.public_addrs.legacy_or_front_addr() == b.public_addrs.legacy_or_front_addr())
7c673cae 91 return a.name < b.name;
11fdf7f2 92 return a.public_addrs.legacy_or_front_addr() < b.public_addrs.legacy_or_front_addr();
7c673cae
FG
93 }
94 };
95}
96
11fdf7f2
TL
97void MonMap::calc_legacy_ranks()
98{
7c673cae 99 ranks.resize(mon_info.size());
7c673cae
FG
100
101 // Used to order entries according to public_addr, because that's
102 // how the ranks are expected to be ordered by. We may expand this
103 // later on, according to some other criteria, by specifying a
104 // different comparator.
105 //
106 // Please note that we use a 'set' here instead of resorting to
107 // std::sort() because we need more info than that's available in
108 // the vector. The vector will thus be ordered by, e.g., public_addr
109 // while only containing the names of each individual monitor.
110 // The only way of achieving this with std::sort() would be to first
111 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
112 // with custom comparison functions, and then copy each invidual entry
113 // to a new vector. Unless there's a simpler way, we don't think the
114 // added complexity makes up for the additional memory usage of a 'set'.
115 set<mon_info_t, rank_cmp> tmp;
116
9f95a23c 117 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
118 mon_info_t &m = p->second;
119 tmp.insert(m);
7c673cae
FG
120 }
121
122 // map the set to the actual ranks etc
123 unsigned i = 0;
9f95a23c 124 for (auto p = tmp.begin(); p != tmp.end(); ++p, ++i) {
7c673cae
FG
125 ranks[i] = p->name;
126 }
127}
128
9f95a23c 129void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const
7c673cae 130{
7c673cae 131 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
11fdf7f2 132 using ceph::encode;
7c673cae 133 __u16 v = 1;
11fdf7f2 134 encode(v, blist);
9f95a23c 135 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
136 encode(epoch, blist);
137 vector<entity_inst_t> mon_inst(ranks.size());
138 for (unsigned n = 0; n < ranks.size(); n++) {
139 mon_inst[n].name = entity_name_t::MON(n);
140 mon_inst[n].addr = get_addrs(n).legacy_addr();
141 }
142 encode(mon_inst, blist, con_features);
143 encode(last_changed, blist);
144 encode(created, blist);
7c673cae
FG
145 return;
146 }
147
11fdf7f2
TL
148 map<string,entity_addr_t> legacy_mon_addr;
149 if (!HAVE_FEATURE(con_features, MONENC) ||
150 !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
151 for (auto& [name, info] : mon_info) {
152 legacy_mon_addr[name] = info.public_addrs.legacy_addr();
153 }
154 }
155
156 if (!HAVE_FEATURE(con_features, MONENC)) {
157 /* we keep the mon_addr map when encoding to ensure compatibility
158 * with clients and other monitors that do not yet support the 'mons'
159 * map. This map keeps its original behavior, containing a mapping of
160 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
161 * address -- which is obtained from the public address of each entry
162 * in the 'mons' map.
163 */
164 using ceph::encode;
7c673cae 165 __u16 v = 2;
11fdf7f2 166 encode(v, blist);
9f95a23c 167 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
168 encode(epoch, blist);
169 encode(legacy_mon_addr, blist, con_features);
170 encode(last_changed, blist);
171 encode(created, blist);
172 return;
173 }
174
175 if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
176 ENCODE_START(5, 3, blist);
9f95a23c 177 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
178 encode(epoch, blist);
179 encode(legacy_mon_addr, blist, con_features);
180 encode(last_changed, blist);
181 encode(created, blist);
182 encode(persistent_features, blist);
183 encode(optional_features, blist);
184 encode(mon_info, blist, con_features);
185 ENCODE_FINISH(blist);
186 return;
187 }
188
189 ENCODE_START(7, 6, blist);
9f95a23c 190 ceph::encode_raw(fsid, blist);
11fdf7f2
TL
191 encode(epoch, blist);
192 encode(last_changed, blist);
193 encode(created, blist);
194 encode(persistent_features, blist);
195 encode(optional_features, blist);
196 encode(mon_info, blist, con_features);
197 encode(ranks, blist);
198 encode(min_mon_release, blist);
7c673cae
FG
199 ENCODE_FINISH(blist);
200}
201
9f95a23c 202void MonMap::decode(ceph::buffer::list::const_iterator& p)
7c673cae
FG
203{
204 map<string,entity_addr_t> mon_addr;
11fdf7f2 205 DECODE_START_LEGACY_COMPAT_LEN_16(7, 3, 3, p);
9f95a23c 206 ceph::decode_raw(fsid, p);
11fdf7f2 207 decode(epoch, p);
7c673cae
FG
208 if (struct_v == 1) {
209 vector<entity_inst_t> mon_inst;
11fdf7f2 210 decode(mon_inst, p);
7c673cae
FG
211 for (unsigned i = 0; i < mon_inst.size(); i++) {
212 char n[2];
213 n[0] = '0' + i;
214 n[1] = 0;
215 string name = n;
216 mon_addr[name] = mon_inst[i].addr;
217 }
11fdf7f2
TL
218 } else if (struct_v < 6) {
219 decode(mon_addr, p);
7c673cae 220 }
11fdf7f2
TL
221 decode(last_changed, p);
222 decode(created, p);
7c673cae 223 if (struct_v >= 4) {
11fdf7f2
TL
224 decode(persistent_features, p);
225 decode(optional_features, p);
7c673cae 226 }
11fdf7f2
TL
227 if (struct_v < 5) {
228 // generate mon_info from legacy mon_addr
229 for (auto& [name, addr] : mon_addr) {
230 mon_info_t &m = mon_info[name];
231 m.name = name;
232 m.public_addrs = entity_addrvec_t(addr);
233 }
234 } else {
235 decode(mon_info, p);
236 }
237 if (struct_v < 6) {
238 calc_legacy_ranks();
239 } else {
240 decode(ranks, p);
241 }
242 if (struct_v >= 7) {
243 decode(min_mon_release, p);
7c673cae 244 } else {
11fdf7f2 245 min_mon_release = infer_ceph_release_from_mon_features(persistent_features);
7c673cae 246 }
11fdf7f2 247 calc_addr_mons();
7c673cae 248 DECODE_FINISH(p);
7c673cae
FG
249}
250
251void MonMap::generate_test_instances(list<MonMap*>& o)
252{
253 o.push_back(new MonMap);
254 o.push_back(new MonMap);
255 o.back()->epoch = 1;
256 o.back()->last_changed = utime_t(123, 456);
257 o.back()->created = utime_t(789, 101112);
11fdf7f2 258 o.back()->add("one", entity_addrvec_t());
7c673cae
FG
259
260 MonMap *m = new MonMap;
261 {
262 m->epoch = 1;
263 m->last_changed = utime_t(123, 456);
264
11fdf7f2
TL
265 entity_addrvec_t empty_addr_one = entity_addrvec_t(entity_addr_t());
266 empty_addr_one.v[0].set_nonce(1);
7c673cae 267 m->add("empty_addr_one", empty_addr_one);
11fdf7f2
TL
268 entity_addrvec_t empty_addr_two = entity_addrvec_t(entity_addr_t());
269 empty_addr_two.v[0].set_nonce(2);
270 m->add("empty_addr_two", empty_addr_two);
7c673cae
FG
271
272 const char *local_pub_addr_s = "127.0.1.2";
273
274 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
11fdf7f2 275 entity_addrvec_t local_pub_addr;
7c673cae
FG
276 local_pub_addr.parse(local_pub_addr_s, &end_p);
277
9f95a23c 278 m->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr), 1, 1));
7c673cae 279
11fdf7f2 280 m->add("empty_addr_zero", entity_addrvec_t());
7c673cae
FG
281 }
282 o.push_back(m);
283}
284
285// read from/write to a file
286int MonMap::write(const char *fn)
287{
288 // encode
9f95a23c 289 ceph::buffer::list bl;
7c673cae
FG
290 encode(bl, CEPH_FEATURES_ALL);
291
292 return bl.write_file(fn);
293}
294
295int MonMap::read(const char *fn)
296{
297 // read
9f95a23c 298 ceph::buffer::list bl;
7c673cae
FG
299 std::string error;
300 int r = bl.read_file(fn, &error);
301 if (r < 0)
302 return r;
303 decode(bl);
304 return 0;
305}
306
307void MonMap::print_summary(ostream& out) const
308{
309 out << "e" << epoch << ": "
310 << mon_info.size() << " mons at {";
311 // the map that we used to print, as it was, no longer
312 // maps strings to the monitor's public address, but to
313 // mon_info_t instead. As such, print the map in a way
314 // that keeps the expected format.
315 bool has_printed = false;
9f95a23c 316 for (auto p = mon_info.begin(); p != mon_info.end(); ++p) {
7c673cae
FG
317 if (has_printed)
318 out << ",";
11fdf7f2 319 out << p->first << "=" << p->second.public_addrs;
7c673cae
FG
320 has_printed = true;
321 }
322 out << "}";
323}
324
325void MonMap::print(ostream& out) const
326{
327 out << "epoch " << epoch << "\n";
328 out << "fsid " << fsid << "\n";
329 out << "last_changed " << last_changed << "\n";
330 out << "created " << created << "\n";
9f95a23c
TL
331 out << "min_mon_release " << ceph::to_integer<unsigned>(min_mon_release)
332 << " (" << min_mon_release << ")\n";
7c673cae 333 unsigned i = 0;
9f95a23c 334 for (auto p = ranks.begin(); p != ranks.end(); ++p) {
11fdf7f2 335 out << i++ << ": " << get_addrs(*p) << " mon." << *p << "\n";
7c673cae
FG
336 }
337}
338
339void MonMap::dump(Formatter *f) const
340{
341 f->dump_unsigned("epoch", epoch);
342 f->dump_stream("fsid") << fsid;
9f95a23c
TL
343 last_changed.gmtime(f->dump_stream("modified"));
344 created.gmtime(f->dump_stream("created"));
345 f->dump_unsigned("min_mon_release", ceph::to_integer<unsigned>(min_mon_release));
346 f->dump_string("min_mon_release_name", ceph::to_string(min_mon_release));
7c673cae
FG
347 f->open_object_section("features");
348 persistent_features.dump(f, "persistent");
349 optional_features.dump(f, "optional");
350 f->close_section();
351 f->open_array_section("mons");
352 int i = 0;
9f95a23c 353 for (auto p = ranks.begin(); p != ranks.end(); ++p, ++i) {
7c673cae
FG
354 f->open_object_section("mon");
355 f->dump_int("rank", i);
356 f->dump_string("name", *p);
11fdf7f2
TL
357 f->dump_object("public_addrs", get_addrs(*p));
358 // compat: make these look like pre-nautilus entity_addr_t
359 f->dump_stream("addr") << get_addrs(*p).get_legacy_str();
360 f->dump_stream("public_addr") << get_addrs(*p).get_legacy_str();
9f95a23c
TL
361 f->dump_unsigned("priority", get_priority(*p));
362 f->dump_unsigned("weight", get_weight(*p));
7c673cae
FG
363 f->close_section();
364 }
365 f->close_section();
366}
367
9f95a23c
TL
368void MonMap::dump_summary(Formatter *f) const
369{
370 f->dump_unsigned("epoch", epoch);
371 f->dump_string("min_mon_release_name", ceph::to_string(min_mon_release));
372 f->dump_unsigned("num_mons", ranks.size());
373}
374
375
11fdf7f2
TL
376// an ambiguous mon addr may be legacy or may be msgr2--we aren' sure.
377// when that happens we need to try them both (unless we can
378// reasonably infer from the port number which it is).
379void MonMap::_add_ambiguous_addr(const string& name,
9f95a23c
TL
380 entity_addr_t addr,
381 int priority,
382 int weight,
383 bool for_mkfs)
11fdf7f2
TL
384{
385 if (addr.get_type() != entity_addr_t::TYPE_ANY) {
386 // a v1: or v2: prefix was specified
387 if (addr.get_port() == 0) {
388 // use default port
9f95a23c 389 if (addr.get_type() == entity_addr_t::TYPE_LEGACY) {
11fdf7f2
TL
390 addr.set_port(CEPH_MON_PORT_LEGACY);
391 } else if (addr.get_type() == entity_addr_t::TYPE_MSGR2) {
392 addr.set_port(CEPH_MON_PORT_IANA);
393 } else {
394 // wth
395 return;
396 }
397 if (!contains(addr)) {
9f95a23c 398 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
399 }
400 } else {
401 if (!contains(addr)) {
9f95a23c 402 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
403 }
404 }
405 } else {
406 // no v1: or v2: prefix specified
407 if (addr.get_port() == CEPH_MON_PORT_LEGACY) {
408 // legacy port implies legacy addr
409 addr.set_type(entity_addr_t::TYPE_LEGACY);
410 if (!contains(addr)) {
411 if (!for_mkfs) {
9f95a23c 412 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2 413 } else {
9f95a23c 414 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
415 }
416 }
417 } else if (addr.get_port() == CEPH_MON_PORT_IANA) {
418 // iana port implies msgr2 addr
419 addr.set_type(entity_addr_t::TYPE_MSGR2);
420 if (!contains(addr)) {
9f95a23c 421 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
422 }
423 } else if (addr.get_port() == 0) {
424 // no port; include both msgr2 and legacy ports
425 if (!for_mkfs) {
426 addr.set_type(entity_addr_t::TYPE_MSGR2);
427 addr.set_port(CEPH_MON_PORT_IANA);
428 if (!contains(addr)) {
9f95a23c 429 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
430 }
431 addr.set_type(entity_addr_t::TYPE_LEGACY);
432 addr.set_port(CEPH_MON_PORT_LEGACY);
433 if (!contains(addr)) {
9f95a23c 434 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
435 }
436 } else {
437 entity_addrvec_t av;
438 addr.set_type(entity_addr_t::TYPE_MSGR2);
439 addr.set_port(CEPH_MON_PORT_IANA);
440 av.v.push_back(addr);
441 addr.set_type(entity_addr_t::TYPE_LEGACY);
442 addr.set_port(CEPH_MON_PORT_LEGACY);
443 av.v.push_back(addr);
444 if (!contains(av)) {
9f95a23c 445 add(name, av, priority, weight);
11fdf7f2
TL
446 }
447 }
448 } else {
449 addr.set_type(entity_addr_t::TYPE_MSGR2);
450 if (!contains(addr)) {
9f95a23c 451 add(name, entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
452 }
453 if (!for_mkfs) {
454 // try legacy on same port too
455 addr.set_type(entity_addr_t::TYPE_LEGACY);
456 if (!contains(addr)) {
9f95a23c 457 add(name + "-legacy", entity_addrvec_t(addr), priority, weight);
11fdf7f2
TL
458 }
459 }
460 }
461 }
462}
7c673cae 463
11fdf7f2
TL
464int MonMap::init_with_ips(const std::string& ips,
465 bool for_mkfs,
466 const std::string &prefix)
7c673cae 467{
11fdf7f2
TL
468 vector<entity_addrvec_t> addrs;
469 if (!parse_ip_port_vec(
470 ips.c_str(), addrs,
471 entity_addr_t::TYPE_ANY)) {
472 return -EINVAL;
473 }
474 if (addrs.empty())
475 return -ENOENT;
476 for (unsigned i=0; i<addrs.size(); i++) {
477 char n[2];
478 n[0] = 'a' + i;
479 n[1] = 0;
480 string name;
481 name = prefix;
482 name += n;
483 if (addrs[i].v.size() == 1) {
9f95a23c 484 _add_ambiguous_addr(name, addrs[i].front(), 0, 0, for_mkfs);
11fdf7f2
TL
485 } else {
486 // they specified an addrvec, so let's assume they also specified
487 // the addr *type* and *port*. (we could possibly improve this?)
488 add(name, addrs[i], 0);
7c673cae 489 }
7c673cae 490 }
11fdf7f2
TL
491 return 0;
492}
7c673cae 493
11fdf7f2
TL
494int MonMap::init_with_hosts(const std::string& hostlist,
495 bool for_mkfs,
496 const std::string& prefix)
497{
7c673cae 498 // maybe they passed us a DNS-resolvable name
11fdf7f2 499 char *hosts = resolve_addrs(hostlist.c_str());
7c673cae
FG
500 if (!hosts)
501 return -EINVAL;
11fdf7f2
TL
502
503 vector<entity_addrvec_t> addrs;
504 bool success = parse_ip_port_vec(
505 hosts, addrs,
92f5a8d4 506 entity_addr_t::TYPE_ANY);
7c673cae
FG
507 free(hosts);
508 if (!success)
509 return -EINVAL;
7c673cae
FG
510 if (addrs.empty())
511 return -ENOENT;
7c673cae
FG
512 for (unsigned i=0; i<addrs.size(); i++) {
513 char n[2];
514 n[0] = 'a' + i;
515 n[1] = 0;
7c673cae
FG
516 string name = prefix;
517 name += n;
11fdf7f2 518 if (addrs[i].v.size() == 1) {
9f95a23c 519 _add_ambiguous_addr(name, addrs[i].front(), 0, 0, for_mkfs);
11fdf7f2 520 } else {
92f5a8d4
TL
521 // they specified an addrvec, so let's assume they also specified
522 // the addr *type* and *port*. (we could possibly improve this?)
11fdf7f2
TL
523 add(name, addrs[i], 0);
524 }
7c673cae 525 }
11fdf7f2 526 calc_legacy_ranks();
7c673cae
FG
527 return 0;
528}
529
530void MonMap::set_initial_members(CephContext *cct,
531 list<std::string>& initial_members,
11fdf7f2
TL
532 string my_name,
533 const entity_addrvec_t& my_addrs,
534 set<entity_addrvec_t> *removed)
7c673cae
FG
535{
536 // remove non-initial members
537 unsigned i = 0;
538 while (i < size()) {
539 string n = get_name(i);
11fdf7f2
TL
540 if (std::find(initial_members.begin(), initial_members.end(), n)
541 != initial_members.end()) {
542 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addrs(i) << dendl;
7c673cae
FG
543 i++;
544 continue;
545 }
546
11fdf7f2
TL
547 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addrs(i)
548 << dendl;
549 if (removed) {
550 removed->insert(get_addrs(i));
551 }
7c673cae 552 remove(n);
11fdf7f2 553 ceph_assert(!contains(n));
7c673cae
FG
554 }
555
556 // add missing initial members
11fdf7f2
TL
557 for (auto& p : initial_members) {
558 if (!contains(p)) {
559 if (p == my_name) {
560 lgeneric_dout(cct, 1) << " adding self " << p << " " << my_addrs
561 << dendl;
562 add(p, my_addrs);
7c673cae
FG
563 } else {
564 entity_addr_t a;
565 a.set_type(entity_addr_t::TYPE_LEGACY);
566 a.set_family(AF_INET);
567 for (int n=1; ; n++) {
568 a.set_nonce(n);
569 if (!contains(a))
570 break;
571 }
11fdf7f2
TL
572 lgeneric_dout(cct, 1) << " adding " << p << " " << a << dendl;
573 add(p, entity_addrvec_t(a));
7c673cae 574 }
11fdf7f2 575 ceph_assert(contains(p));
7c673cae
FG
576 }
577 }
11fdf7f2 578 calc_legacy_ranks();
7c673cae
FG
579}
580
11fdf7f2
TL
581int MonMap::init_with_config_file(const ConfigProxy& conf,
582 std::ostream& errout)
7c673cae 583{
11fdf7f2
TL
584 std::vector<std::string> sections;
585 int ret = conf.get_all_sections(sections);
7c673cae
FG
586 if (ret) {
587 errout << "Unable to find any monitors in the configuration "
588 << "file, because there was an error listing the sections. error "
589 << ret << std::endl;
590 return -ENOENT;
591 }
11fdf7f2
TL
592 std::vector<std::string> mon_names;
593 for (const auto& section : sections) {
594 if (section.substr(0, 4) == "mon." && section.size() > 4) {
595 mon_names.push_back(section.substr(4));
7c673cae
FG
596 }
597 }
598
599 // Find an address for each monitor in the config file.
11fdf7f2
TL
600 for (const auto& mon_name : mon_names) {
601 std::vector<std::string> sections;
7c673cae
FG
602 std::string m_name("mon");
603 m_name += ".";
11fdf7f2 604 m_name += mon_name;
7c673cae
FG
605 sections.push_back(m_name);
606 sections.push_back("mon");
607 sections.push_back("global");
608 std::string val;
11fdf7f2 609 int res = conf.get_val_from_conf_file(sections, "mon addr", val, true);
7c673cae 610 if (res) {
11fdf7f2
TL
611 errout << "failed to get an address for mon." << mon_name
612 << ": error " << res << std::endl;
7c673cae
FG
613 continue;
614 }
11fdf7f2
TL
615 // the 'mon addr' field is a legacy field, so assume anything
616 // there on a weird port is a v1 address, and do not handle
617 // addrvecs.
7c673cae 618 entity_addr_t addr;
11fdf7f2
TL
619 if (!addr.parse(val.c_str(), nullptr, entity_addr_t::TYPE_LEGACY)) {
620 errout << "unable to parse address for mon." << mon_name
621 << ": addr='" << val << "'" << std::endl;
7c673cae
FG
622 continue;
623 }
11fdf7f2
TL
624 if (addr.get_port() == 0) {
625 addr.set_port(CEPH_MON_PORT_LEGACY);
626 }
224ce89b 627 uint16_t priority = 0;
11fdf7f2 628 if (!conf.get_val_from_conf_file(sections, "mon priority", val, false)) {
224ce89b
WB
629 try {
630 priority = std::stoul(val);
631 } catch (std::logic_error&) {
11fdf7f2 632 errout << "unable to parse priority for mon." << mon_name
224ce89b
WB
633 << ": priority='" << val << "'" << std::endl;
634 continue;
635 }
636 }
9f95a23c
TL
637 uint16_t weight = 0;
638 if (!conf.get_val_from_conf_file(sections, "mon weight", val, false)) {
639 try {
640 weight = std::stoul(val);
641 } catch (std::logic_error&) {
642 errout << "unable to parse weight for mon." << mon_name
643 << ": weight='" << val << "'"
644 << std::endl;
645 continue;
646 }
647 }
11fdf7f2 648
9f95a23c 649 // make sure this mon isn't already in the map
7c673cae
FG
650 if (contains(addr))
651 remove(get_name(addr));
11fdf7f2
TL
652 if (contains(mon_name))
653 remove(mon_name);
9f95a23c 654 _add_ambiguous_addr(mon_name, addr, priority, weight, false);
11fdf7f2
TL
655 }
656 return 0;
657}
658
659#ifdef WITH_SEASTAR
660
661using namespace seastar;
662
663future<> MonMap::read_monmap(const std::string& monmap)
664{
665 return open_file_dma(monmap, open_flags::ro).then([this] (file f) {
666 return f.size().then([this, f = std::move(f)](size_t s) {
667 return do_with(make_file_input_stream(f), [this, s](input_stream<char>& in) {
668 return in.read_exactly(s).then([this](temporary_buffer<char> buf) {
9f95a23c
TL
669 ceph::buffer::list bl;
670 bl.push_back(ceph::buffer::ptr_node::create(
671 ceph::buffer::create(std::move(buf))));
11fdf7f2
TL
672 decode(bl);
673 });
674 });
675 });
676 });
677}
7c673cae 678
11fdf7f2
TL
679future<> MonMap::init_with_dns_srv(bool for_mkfs, const std::string& name)
680{
681 string domain;
682 string service = name;
683 // check if domain is also provided and extract it from srv_name
684 size_t idx = name.find("_");
685 if (idx != name.npos) {
686 domain = name.substr(idx + 1);
687 service = name.substr(0, idx);
7c673cae 688 }
9f95a23c
TL
689 return seastar::net::dns::get_srv_records(
690 seastar::net::dns_resolver::srv_proto::tcp,
691 service, domain).then([this](seastar::net::dns_resolver::srv_records records) {
11fdf7f2 692 return parallel_for_each(records, [this](auto record) {
9f95a23c
TL
693 return seastar::net::dns::resolve_name(record.target).then(
694 [record,this](seastar::net::inet_address a) {
11fdf7f2
TL
695 // the resolved address does not contain ceph specific info like nonce
696 // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually
697 entity_addr_t addr;
698 addr.set_type(entity_addr_t::TYPE_ANY);
699 addr.set_family(int(a.in_family()));
700 addr.set_port(record.port);
701 switch (a.in_family()) {
9f95a23c 702 case seastar::net::inet_address::family::INET:
11fdf7f2
TL
703 addr.in4_addr().sin_addr = a;
704 break;
9f95a23c 705 case seastar::net::inet_address::family::INET6:
11fdf7f2
TL
706 addr.in6_addr().sin6_addr = a;
707 break;
708 }
9f95a23c
TL
709 _add_ambiguous_addr(record.target,
710 addr,
711 record.priority,
712 record.weight,
713 false);
11fdf7f2
TL
714 });
715 });
716 }).handle_exception_type([](const std::system_error& e) {
717 // ignore DNS failures
718 return seastar::make_ready_future<>();
719 });
720}
7c673cae 721
9f95a23c 722seastar::future<> MonMap::build_monmap(const crimson::common::ConfigProxy& conf,
11fdf7f2
TL
723 bool for_mkfs)
724{
725 // -m foo?
726 if (const auto mon_host = conf.get_val<std::string>("mon_host");
727 !mon_host.empty()) {
728 if (auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); ret == 0) {
729 return make_ready_future<>();
7c673cae 730 }
11fdf7f2
TL
731 // TODO: resolve_addrs() is a blocking call
732 if (auto ret = init_with_hosts(mon_host, for_mkfs, "noname-"); ret == 0) {
733 return make_ready_future<>();
734 } else {
735 throw std::runtime_error(cpp_strerror(ret));
736 }
737 }
7c673cae 738
11fdf7f2
TL
739 // What monitors are in the config file?
740 ostringstream errout;
741 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
742 throw std::runtime_error(errout.str());
743 }
744 if (size() > 0) {
745 return make_ready_future<>();
746 }
747 // no info found from conf options lets try use DNS SRV records
748 const string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
749 return init_with_dns_srv(for_mkfs, srv_name).then([this] {
750 if (size() == 0) {
751 throw std::runtime_error("no monitors specified to connect to.");
752 }
753 });
754}
7c673cae 755
9f95a23c 756future<> MonMap::build_initial(const crimson::common::ConfigProxy& conf, bool for_mkfs)
11fdf7f2
TL
757{
758 // file?
759 if (const auto monmap = conf.get_val<std::string>("monmap");
760 !monmap.empty()) {
761 return read_monmap(monmap);
762 } else {
763 // fsid from conf?
764 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
765 !new_fsid.is_zero()) {
766 fsid = new_fsid;
7c673cae 767 }
11fdf7f2
TL
768 return build_monmap(conf, for_mkfs).then([this] {
769 created = ceph_clock_now();
770 last_changed = created;
771 calc_legacy_ranks();
772 });
773 }
774}
775
776#else // WITH_SEASTAR
777
778int MonMap::init_with_monmap(const std::string& monmap, std::ostream& errout)
779{
780 int r;
781 try {
782 r = read(monmap.c_str());
9f95a23c 783 } catch (ceph::buffer::error&) {
11fdf7f2
TL
784 r = -EINVAL;
785 }
786 if (r >= 0)
787 return 0;
788 errout << "unable to read/decode monmap from " << monmap
789 << ": " << cpp_strerror(-r) << std::endl;
790 return r;
791}
792
793int MonMap::init_with_dns_srv(CephContext* cct,
794 std::string srv_name,
795 bool for_mkfs,
796 std::ostream& errout)
797{
798 string domain;
799 // check if domain is also provided and extract it from srv_name
800 size_t idx = srv_name.find("_");
801 if (idx != string::npos) {
802 domain = srv_name.substr(idx + 1);
803 srv_name = srv_name.substr(0, idx);
804 }
805
806 map<string, DNSResolver::Record> records;
807 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
808 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
809
810 errout << "unable to get monitor info from DNS SRV with service name: "
811 << "ceph-mon" << std::endl;
812 return -1;
813 } else {
814 for (auto& record : records) {
815 record.second.addr.set_type(entity_addr_t::TYPE_ANY);
9f95a23c
TL
816 _add_ambiguous_addr(record.first,
817 record.second.addr,
818 record.second.priority,
819 record.second.weight,
820 false);
7c673cae 821 }
11fdf7f2
TL
822 return 0;
823 }
824}
825
826int MonMap::build_initial(CephContext *cct, bool for_mkfs, ostream& errout)
827{
828 const auto& conf = cct->_conf;
829 // file?
830 if (const auto monmap = conf.get_val<std::string>("monmap");
831 !monmap.empty()) {
832 return init_with_monmap(monmap, errout);
7c673cae
FG
833 }
834
11fdf7f2
TL
835 // fsid from conf?
836 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
837 !new_fsid.is_zero()) {
838 fsid = new_fsid;
839 }
840 // -m foo?
841 if (const auto mon_host = conf.get_val<std::string>("mon_host");
842 !mon_host.empty()) {
843 auto ret = init_with_ips(mon_host, for_mkfs, "noname-");
844 if (ret == -EINVAL) {
845 ret = init_with_hosts(mon_host, for_mkfs, "noname-");
846 }
847 if (ret < 0) {
848 errout << "unable to parse addrs in '" << mon_host << "'"
849 << std::endl;
850 return ret;
851 }
852 }
853 if (size() == 0) {
854 // What monitors are in the config file?
855 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
856 return ret;
857 }
858 }
859 if (size() == 0) {
860 // no info found from conf options lets try use DNS SRV records
861 string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
862 if (auto ret = init_with_dns_srv(cct, srv_name, for_mkfs, errout); ret < 0) {
863 return -ENOENT;
864 }
865 }
7c673cae
FG
866 if (size() == 0) {
867 errout << "no monitors specified to connect to." << std::endl;
868 return -ENOENT;
869 }
870 created = ceph_clock_now();
871 last_changed = created;
11fdf7f2 872 calc_legacy_ranks();
7c673cae
FG
873 return 0;
874}
11fdf7f2 875#endif // WITH_SEASTAR