1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
12 #include <seastar/core/fstream.hh>
13 #include <seastar/core/reactor.hh>
14 #include <seastar/net/dns.hh>
15 #include "crimson/common/config_proxy.h"
18 #include "common/Formatter.h"
20 #include "include/ceph_features.h"
21 #include "include/addr_parsing.h"
22 #include "common/ceph_argparse.h"
23 #include "common/dns_resolve.h"
24 #include "common/errno.h"
25 #include "common/dout.h"
26 #include "common/Clock.h"
28 using ceph::Formatter
;
30 void mon_info_t::encode(bufferlist
& bl
, uint64_t features
) const
33 if (!HAVE_FEATURE(features
, SERVER_NAUTILUS
)) {
36 ENCODE_START(v
, 1, bl
);
39 auto a
= public_addrs
.legacy_addr();
40 if (a
!= entity_addr_t()) {
41 encode(a
, bl
, features
);
43 // note: we don't have a legacy addr here, so lie so that it looks
44 // like one, just so that old clients get a valid-looking map.
45 // they won't be able to talk to the v2 mons, but that's better
47 encode(public_addrs
.as_legacy_addr(), bl
, features
);
50 encode(public_addrs
, bl
, features
);
56 void mon_info_t::decode(bufferlist::const_iterator
& p
)
60 decode(public_addrs
, p
);
67 void mon_info_t::print(ostream
& out
) const
70 << " addrs " << public_addrs
71 << " priority " << priority
;
76 bool operator()(const mon_info_t
&a
, const mon_info_t
&b
) const {
77 if (a
.public_addrs
.legacy_or_front_addr() == b
.public_addrs
.legacy_or_front_addr())
78 return a
.name
< b
.name
;
79 return a
.public_addrs
.legacy_or_front_addr() < b
.public_addrs
.legacy_or_front_addr();
84 void MonMap::calc_legacy_ranks()
86 ranks
.resize(mon_info
.size());
88 // Used to order entries according to public_addr, because that's
89 // how the ranks are expected to be ordered by. We may expand this
90 // later on, according to some other criteria, by specifying a
91 // different comparator.
93 // Please note that we use a 'set' here instead of resorting to
94 // std::sort() because we need more info than that's available in
95 // the vector. The vector will thus be ordered by, e.g., public_addr
96 // while only containing the names of each individual monitor.
97 // The only way of achieving this with std::sort() would be to first
98 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
99 // with custom comparison functions, and then copy each invidual entry
100 // to a new vector. Unless there's a simpler way, we don't think the
101 // added complexity makes up for the additional memory usage of a 'set'.
102 set
<mon_info_t
, rank_cmp
> tmp
;
104 for (map
<string
,mon_info_t
>::iterator p
= mon_info
.begin();
107 mon_info_t
&m
= p
->second
;
111 // map the set to the actual ranks etc
113 for (set
<mon_info_t
>::iterator p
= tmp
.begin();
120 void MonMap::encode(bufferlist
& blist
, uint64_t con_features
) const
122 if ((con_features
& CEPH_FEATURE_MONNAMES
) == 0) {
126 encode_raw(fsid
, blist
);
127 encode(epoch
, blist
);
128 vector
<entity_inst_t
> mon_inst(ranks
.size());
129 for (unsigned n
= 0; n
< ranks
.size(); n
++) {
130 mon_inst
[n
].name
= entity_name_t::MON(n
);
131 mon_inst
[n
].addr
= get_addrs(n
).legacy_addr();
133 encode(mon_inst
, blist
, con_features
);
134 encode(last_changed
, blist
);
135 encode(created
, blist
);
139 map
<string
,entity_addr_t
> legacy_mon_addr
;
140 if (!HAVE_FEATURE(con_features
, MONENC
) ||
141 !HAVE_FEATURE(con_features
, SERVER_NAUTILUS
)) {
142 for (auto& [name
, info
] : mon_info
) {
143 legacy_mon_addr
[name
] = info
.public_addrs
.legacy_addr();
147 if (!HAVE_FEATURE(con_features
, MONENC
)) {
148 /* we keep the mon_addr map when encoding to ensure compatibility
149 * with clients and other monitors that do not yet support the 'mons'
150 * map. This map keeps its original behavior, containing a mapping of
151 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
152 * address -- which is obtained from the public address of each entry
158 encode_raw(fsid
, blist
);
159 encode(epoch
, blist
);
160 encode(legacy_mon_addr
, blist
, con_features
);
161 encode(last_changed
, blist
);
162 encode(created
, blist
);
166 if (!HAVE_FEATURE(con_features
, SERVER_NAUTILUS
)) {
167 ENCODE_START(5, 3, blist
);
168 encode_raw(fsid
, blist
);
169 encode(epoch
, blist
);
170 encode(legacy_mon_addr
, blist
, con_features
);
171 encode(last_changed
, blist
);
172 encode(created
, blist
);
173 encode(persistent_features
, blist
);
174 encode(optional_features
, blist
);
175 encode(mon_info
, blist
, con_features
);
176 ENCODE_FINISH(blist
);
180 ENCODE_START(7, 6, blist
);
181 encode_raw(fsid
, blist
);
182 encode(epoch
, blist
);
183 encode(last_changed
, blist
);
184 encode(created
, blist
);
185 encode(persistent_features
, blist
);
186 encode(optional_features
, blist
);
187 encode(mon_info
, blist
, con_features
);
188 encode(ranks
, blist
);
189 encode(min_mon_release
, blist
);
190 ENCODE_FINISH(blist
);
193 void MonMap::decode(bufferlist::const_iterator
& p
)
195 map
<string
,entity_addr_t
> mon_addr
;
196 DECODE_START_LEGACY_COMPAT_LEN_16(7, 3, 3, p
);
200 vector
<entity_inst_t
> mon_inst
;
202 for (unsigned i
= 0; i
< mon_inst
.size(); i
++) {
207 mon_addr
[name
] = mon_inst
[i
].addr
;
209 } else if (struct_v
< 6) {
212 decode(last_changed
, p
);
215 decode(persistent_features
, p
);
216 decode(optional_features
, p
);
219 // generate mon_info from legacy mon_addr
220 for (auto& [name
, addr
] : mon_addr
) {
221 mon_info_t
&m
= mon_info
[name
];
223 m
.public_addrs
= entity_addrvec_t(addr
);
234 decode(min_mon_release
, p
);
236 min_mon_release
= infer_ceph_release_from_mon_features(persistent_features
);
242 void MonMap::generate_test_instances(list
<MonMap
*>& o
)
244 o
.push_back(new MonMap
);
245 o
.push_back(new MonMap
);
247 o
.back()->last_changed
= utime_t(123, 456);
248 o
.back()->created
= utime_t(789, 101112);
249 o
.back()->add("one", entity_addrvec_t());
251 MonMap
*m
= new MonMap
;
254 m
->last_changed
= utime_t(123, 456);
256 entity_addrvec_t empty_addr_one
= entity_addrvec_t(entity_addr_t());
257 empty_addr_one
.v
[0].set_nonce(1);
258 m
->add("empty_addr_one", empty_addr_one
);
259 entity_addrvec_t empty_addr_two
= entity_addrvec_t(entity_addr_t());
260 empty_addr_two
.v
[0].set_nonce(2);
261 m
->add("empty_addr_two", empty_addr_two
);
263 const char *local_pub_addr_s
= "127.0.1.2";
265 const char *end_p
= local_pub_addr_s
+ strlen(local_pub_addr_s
);
266 entity_addrvec_t local_pub_addr
;
267 local_pub_addr
.parse(local_pub_addr_s
, &end_p
);
269 m
->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr
), 1));
271 m
->add("empty_addr_zero", entity_addrvec_t());
276 // read from/write to a file
277 int MonMap::write(const char *fn
)
281 encode(bl
, CEPH_FEATURES_ALL
);
283 return bl
.write_file(fn
);
286 int MonMap::read(const char *fn
)
291 int r
= bl
.read_file(fn
, &error
);
298 void MonMap::print_summary(ostream
& out
) const
300 out
<< "e" << epoch
<< ": "
301 << mon_info
.size() << " mons at {";
302 // the map that we used to print, as it was, no longer
303 // maps strings to the monitor's public address, but to
304 // mon_info_t instead. As such, print the map in a way
305 // that keeps the expected format.
306 bool has_printed
= false;
307 for (map
<string
,mon_info_t
>::const_iterator p
= mon_info
.begin();
312 out
<< p
->first
<< "=" << p
->second
.public_addrs
;
318 void MonMap::print(ostream
& out
) const
320 out
<< "epoch " << epoch
<< "\n";
321 out
<< "fsid " << fsid
<< "\n";
322 out
<< "last_changed " << last_changed
<< "\n";
323 out
<< "created " << created
<< "\n";
324 out
<< "min_mon_release " << (int)min_mon_release
325 << " (" << ceph_release_name(min_mon_release
) << ")\n";
327 for (vector
<string
>::const_iterator p
= ranks
.begin();
330 out
<< i
++ << ": " << get_addrs(*p
) << " mon." << *p
<< "\n";
334 void MonMap::dump(Formatter
*f
) const
336 f
->dump_unsigned("epoch", epoch
);
337 f
->dump_stream("fsid") << fsid
;
338 f
->dump_stream("modified") << last_changed
;
339 f
->dump_stream("created") << created
;
340 f
->dump_unsigned("min_mon_release", min_mon_release
);
341 f
->dump_string("min_mon_release_name", ceph_release_name(min_mon_release
));
342 f
->open_object_section("features");
343 persistent_features
.dump(f
, "persistent");
344 optional_features
.dump(f
, "optional");
346 f
->open_array_section("mons");
348 for (vector
<string
>::const_iterator p
= ranks
.begin();
351 f
->open_object_section("mon");
352 f
->dump_int("rank", i
);
353 f
->dump_string("name", *p
);
354 f
->dump_object("public_addrs", get_addrs(*p
));
355 // compat: make these look like pre-nautilus entity_addr_t
356 f
->dump_stream("addr") << get_addrs(*p
).get_legacy_str();
357 f
->dump_stream("public_addr") << get_addrs(*p
).get_legacy_str();
363 // an ambiguous mon addr may be legacy or may be msgr2--we aren' sure.
364 // when that happens we need to try them both (unless we can
365 // reasonably infer from the port number which it is).
366 void MonMap::_add_ambiguous_addr(const string
& name
,
371 if (addr
.get_type() != entity_addr_t::TYPE_ANY
) {
372 // a v1: or v2: prefix was specified
373 if (addr
.get_port() == 0) {
375 if (addr
.get_type() == entity_addr_t::TYPE_ANY
) {
376 addr
.set_port(CEPH_MON_PORT_IANA
);
377 } else if (addr
.get_type() == entity_addr_t::TYPE_LEGACY
) {
378 addr
.set_port(CEPH_MON_PORT_LEGACY
);
379 } else if (addr
.get_type() == entity_addr_t::TYPE_MSGR2
) {
380 addr
.set_port(CEPH_MON_PORT_IANA
);
385 if (!contains(addr
)) {
386 add(name
, entity_addrvec_t(addr
));
389 if (!contains(addr
)) {
390 add(name
, entity_addrvec_t(addr
), priority
);
394 // no v1: or v2: prefix specified
395 if (addr
.get_port() == CEPH_MON_PORT_LEGACY
) {
396 // legacy port implies legacy addr
397 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
398 if (!contains(addr
)) {
400 add(name
+ "-legacy", entity_addrvec_t(addr
));
402 add(name
, entity_addrvec_t(addr
));
405 } else if (addr
.get_port() == CEPH_MON_PORT_IANA
) {
406 // iana port implies msgr2 addr
407 addr
.set_type(entity_addr_t::TYPE_MSGR2
);
408 if (!contains(addr
)) {
409 add(name
, entity_addrvec_t(addr
));
411 } else if (addr
.get_port() == 0) {
412 // no port; include both msgr2 and legacy ports
414 addr
.set_type(entity_addr_t::TYPE_MSGR2
);
415 addr
.set_port(CEPH_MON_PORT_IANA
);
416 if (!contains(addr
)) {
417 add(name
, entity_addrvec_t(addr
));
419 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
420 addr
.set_port(CEPH_MON_PORT_LEGACY
);
421 if (!contains(addr
)) {
422 add(name
+ "-legacy", entity_addrvec_t(addr
));
426 addr
.set_type(entity_addr_t::TYPE_MSGR2
);
427 addr
.set_port(CEPH_MON_PORT_IANA
);
428 av
.v
.push_back(addr
);
429 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
430 addr
.set_port(CEPH_MON_PORT_LEGACY
);
431 av
.v
.push_back(addr
);
437 addr
.set_type(entity_addr_t::TYPE_MSGR2
);
438 if (!contains(addr
)) {
439 add(name
, entity_addrvec_t(addr
), priority
);
442 // try legacy on same port too
443 addr
.set_type(entity_addr_t::TYPE_LEGACY
);
444 if (!contains(addr
)) {
445 add(name
+ "-legacy", entity_addrvec_t(addr
), priority
);
452 int MonMap::init_with_ips(const std::string
& ips
,
454 const std::string
&prefix
)
456 vector
<entity_addrvec_t
> addrs
;
457 if (!parse_ip_port_vec(
459 entity_addr_t::TYPE_ANY
)) {
464 for (unsigned i
=0; i
<addrs
.size(); i
++) {
471 if (addrs
[i
].v
.size() == 1) {
472 _add_ambiguous_addr(name
, addrs
[i
].front(), 0, for_mkfs
);
474 // they specified an addrvec, so let's assume they also specified
475 // the addr *type* and *port*. (we could possibly improve this?)
476 add(name
, addrs
[i
], 0);
482 int MonMap::init_with_hosts(const std::string
& hostlist
,
484 const std::string
& prefix
)
486 // maybe they passed us a DNS-resolvable name
487 char *hosts
= resolve_addrs(hostlist
.c_str());
491 vector
<entity_addrvec_t
> addrs
;
492 bool success
= parse_ip_port_vec(
494 entity_addr_t::TYPE_ANY
);
500 for (unsigned i
=0; i
<addrs
.size(); i
++) {
504 string name
= prefix
;
506 if (addrs
[i
].v
.size() == 1) {
507 _add_ambiguous_addr(name
, addrs
[i
].front(), 0, for_mkfs
);
509 // they specified an addrvec, so let's assume they also specified
510 // the addr *type* and *port*. (we could possibly improve this?)
511 add(name
, addrs
[i
], 0);
518 void MonMap::set_initial_members(CephContext
*cct
,
519 list
<std::string
>& initial_members
,
521 const entity_addrvec_t
& my_addrs
,
522 set
<entity_addrvec_t
> *removed
)
524 // remove non-initial members
527 string n
= get_name(i
);
528 if (std::find(initial_members
.begin(), initial_members
.end(), n
)
529 != initial_members
.end()) {
530 lgeneric_dout(cct
, 1) << " keeping " << n
<< " " << get_addrs(i
) << dendl
;
535 lgeneric_dout(cct
, 1) << " removing " << get_name(i
) << " " << get_addrs(i
)
538 removed
->insert(get_addrs(i
));
541 ceph_assert(!contains(n
));
544 // add missing initial members
545 for (auto& p
: initial_members
) {
548 lgeneric_dout(cct
, 1) << " adding self " << p
<< " " << my_addrs
553 a
.set_type(entity_addr_t::TYPE_LEGACY
);
554 a
.set_family(AF_INET
);
555 for (int n
=1; ; n
++) {
560 lgeneric_dout(cct
, 1) << " adding " << p
<< " " << a
<< dendl
;
561 add(p
, entity_addrvec_t(a
));
563 ceph_assert(contains(p
));
569 int MonMap::init_with_config_file(const ConfigProxy
& conf
,
570 std::ostream
& errout
)
572 std::vector
<std::string
> sections
;
573 int ret
= conf
.get_all_sections(sections
);
575 errout
<< "Unable to find any monitors in the configuration "
576 << "file, because there was an error listing the sections. error "
580 std::vector
<std::string
> mon_names
;
581 for (const auto& section
: sections
) {
582 if (section
.substr(0, 4) == "mon." && section
.size() > 4) {
583 mon_names
.push_back(section
.substr(4));
587 // Find an address for each monitor in the config file.
588 for (const auto& mon_name
: mon_names
) {
589 std::vector
<std::string
> sections
;
590 std::string
m_name("mon");
593 sections
.push_back(m_name
);
594 sections
.push_back("mon");
595 sections
.push_back("global");
597 int res
= conf
.get_val_from_conf_file(sections
, "mon addr", val
, true);
599 errout
<< "failed to get an address for mon." << mon_name
600 << ": error " << res
<< std::endl
;
603 // the 'mon addr' field is a legacy field, so assume anything
604 // there on a weird port is a v1 address, and do not handle
607 if (!addr
.parse(val
.c_str(), nullptr, entity_addr_t::TYPE_LEGACY
)) {
608 errout
<< "unable to parse address for mon." << mon_name
609 << ": addr='" << val
<< "'" << std::endl
;
612 if (addr
.get_port() == 0) {
613 addr
.set_port(CEPH_MON_PORT_LEGACY
);
615 uint16_t priority
= 0;
616 if (!conf
.get_val_from_conf_file(sections
, "mon priority", val
, false)) {
618 priority
= std::stoul(val
);
619 } catch (std::logic_error
&) {
620 errout
<< "unable to parse priority for mon." << mon_name
621 << ": priority='" << val
<< "'" << std::endl
;
626 // the make sure this mon isn't already in the map
628 remove(get_name(addr
));
629 if (contains(mon_name
))
631 _add_ambiguous_addr(mon_name
, addr
, priority
);
638 using namespace seastar
;
640 future
<> MonMap::read_monmap(const std::string
& monmap
)
642 return open_file_dma(monmap
, open_flags::ro
).then([this] (file f
) {
643 return f
.size().then([this, f
= std::move(f
)](size_t s
) {
644 return do_with(make_file_input_stream(f
), [this, s
](input_stream
<char>& in
) {
645 return in
.read_exactly(s
).then([this](temporary_buffer
<char> buf
) {
647 bl
.append(buffer::create(std::move(buf
)));
655 future
<> MonMap::init_with_dns_srv(bool for_mkfs
, const std::string
& name
)
658 string service
= name
;
659 // check if domain is also provided and extract it from srv_name
660 size_t idx
= name
.find("_");
661 if (idx
!= name
.npos
) {
662 domain
= name
.substr(idx
+ 1);
663 service
= name
.substr(0, idx
);
665 return net::dns::get_srv_records(
666 net::dns_resolver::srv_proto::tcp
,
667 service
, domain
).then([this](net::dns_resolver::srv_records records
) {
668 return parallel_for_each(records
, [this](auto record
) {
669 return net::dns::resolve_name(record
.target
).then(
670 [record
,this](net::inet_address a
) {
671 // the resolved address does not contain ceph specific info like nonce
672 // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually
674 addr
.set_type(entity_addr_t::TYPE_ANY
);
675 addr
.set_family(int(a
.in_family()));
676 addr
.set_port(record
.port
);
677 switch (a
.in_family()) {
678 case net::inet_address::family::INET
:
679 addr
.in4_addr().sin_addr
= a
;
681 case net::inet_address::family::INET6
:
682 addr
.in6_addr().sin6_addr
= a
;
685 _add_ambiguous_addr(record
.target
, addr
, record
.priority
);
688 }).handle_exception_type([](const std::system_error
& e
) {
689 // ignore DNS failures
690 return seastar::make_ready_future
<>();
694 seastar::future
<> MonMap::build_monmap(const ceph::common::ConfigProxy
& conf
,
698 if (const auto mon_host
= conf
.get_val
<std::string
>("mon_host");
700 if (auto ret
= init_with_ips(mon_host
, for_mkfs
, "noname-"); ret
== 0) {
701 return make_ready_future
<>();
703 // TODO: resolve_addrs() is a blocking call
704 if (auto ret
= init_with_hosts(mon_host
, for_mkfs
, "noname-"); ret
== 0) {
705 return make_ready_future
<>();
707 throw std::runtime_error(cpp_strerror(ret
));
711 // What monitors are in the config file?
712 ostringstream errout
;
713 if (auto ret
= init_with_config_file(conf
, errout
); ret
< 0) {
714 throw std::runtime_error(errout
.str());
717 return make_ready_future
<>();
719 // no info found from conf options lets try use DNS SRV records
720 const string srv_name
= conf
.get_val
<std::string
>("mon_dns_srv_name");
721 return init_with_dns_srv(for_mkfs
, srv_name
).then([this] {
723 throw std::runtime_error("no monitors specified to connect to.");
728 future
<> MonMap::build_initial(const ceph::common::ConfigProxy
& conf
, bool for_mkfs
)
731 if (const auto monmap
= conf
.get_val
<std::string
>("monmap");
733 return read_monmap(monmap
);
736 if (const auto new_fsid
= conf
.get_val
<uuid_d
>("fsid");
737 !new_fsid
.is_zero()) {
740 return build_monmap(conf
, for_mkfs
).then([this] {
741 created
= ceph_clock_now();
742 last_changed
= created
;
748 #else // WITH_SEASTAR
750 int MonMap::init_with_monmap(const std::string
& monmap
, std::ostream
& errout
)
754 r
= read(monmap
.c_str());
755 } catch (buffer::error
&) {
760 errout
<< "unable to read/decode monmap from " << monmap
761 << ": " << cpp_strerror(-r
) << std::endl
;
765 int MonMap::init_with_dns_srv(CephContext
* cct
,
766 std::string srv_name
,
768 std::ostream
& errout
)
771 // check if domain is also provided and extract it from srv_name
772 size_t idx
= srv_name
.find("_");
773 if (idx
!= string::npos
) {
774 domain
= srv_name
.substr(idx
+ 1);
775 srv_name
= srv_name
.substr(0, idx
);
778 map
<string
, DNSResolver::Record
> records
;
779 if (DNSResolver::get_instance()->resolve_srv_hosts(cct
, srv_name
,
780 DNSResolver::SRV_Protocol::TCP
, domain
, &records
) != 0) {
782 errout
<< "unable to get monitor info from DNS SRV with service name: "
783 << "ceph-mon" << std::endl
;
786 for (auto& record
: records
) {
787 record
.second
.addr
.set_type(entity_addr_t::TYPE_ANY
);
788 _add_ambiguous_addr(record
.first
, record
.second
.addr
,
789 record
.second
.priority
);
795 int MonMap::build_initial(CephContext
*cct
, bool for_mkfs
, ostream
& errout
)
797 const auto& conf
= cct
->_conf
;
799 if (const auto monmap
= conf
.get_val
<std::string
>("monmap");
801 return init_with_monmap(monmap
, errout
);
805 if (const auto new_fsid
= conf
.get_val
<uuid_d
>("fsid");
806 !new_fsid
.is_zero()) {
810 if (const auto mon_host
= conf
.get_val
<std::string
>("mon_host");
812 auto ret
= init_with_ips(mon_host
, for_mkfs
, "noname-");
813 if (ret
== -EINVAL
) {
814 ret
= init_with_hosts(mon_host
, for_mkfs
, "noname-");
817 errout
<< "unable to parse addrs in '" << mon_host
<< "'"
823 // What monitors are in the config file?
824 if (auto ret
= init_with_config_file(conf
, errout
); ret
< 0) {
829 // no info found from conf options lets try use DNS SRV records
830 string srv_name
= conf
.get_val
<std::string
>("mon_dns_srv_name");
831 if (auto ret
= init_with_dns_srv(cct
, srv_name
, for_mkfs
, errout
); ret
< 0) {
836 errout
<< "no monitors specified to connect to." << std::endl
;
839 created
= ceph_clock_now();
840 last_changed
= created
;
844 #endif // WITH_SEASTAR