9 #include "common/Formatter.h"
11 #include "include/ceph_features.h"
12 #include "include/addr_parsing.h"
13 #include "common/ceph_argparse.h"
14 #include "common/dns_resolve.h"
15 #include "common/errno.h"
17 #include "common/dout.h"
19 using ceph::Formatter
;
21 void mon_info_t::encode(bufferlist
& bl
, uint64_t features
) const
23 ENCODE_START(2, 1, bl
);
25 ::encode(public_addr
, bl
, features
);
26 ::encode(priority
, bl
);
30 void mon_info_t::decode(bufferlist::iterator
& p
)
34 ::decode(public_addr
, p
);
36 ::decode(priority
, p
);
41 void mon_info_t::print(ostream
& out
) const
44 << " public " << public_addr
45 << " priority " << priority
;
48 void MonMap::sanitize_mons(map
<string
,entity_addr_t
>& o
)
50 // if mon_info is populated, it means we decoded a map encoded
51 // by someone who understands the new format (i.e., is able to
52 // encode 'mon_info'). This means they must also have provided
53 // a properly populated 'mon_addr' (which we have dropped with
54 // this patch), 'o' being the contents of said map. In this
55 // case, 'o' must have the same number of entries as 'mon_info'.
57 // Also, for each entry in 'o', there has to be a matching
58 // 'mon_info' entry, properly populated with a name and a matching
61 // OTOH, if 'mon_info' is not populated, it means the one that
62 // originally encoded the map does not know the new format, and
63 // 'o' will be our only source of info about the monitors in the
64 // cluster -- and we will use it to populate our 'mon_info' map.
66 bool has_mon_info
= false;
67 if (mon_info
.size() > 0) {
68 assert(o
.size() == mon_info
.size());
74 // make sure the info we have is accurate
75 assert(mon_info
.count(p
.first
));
76 assert(mon_info
[p
.first
].name
== p
.first
);
77 assert(mon_info
[p
.first
].public_addr
== p
.second
);
79 mon_info_t
&m
= mon_info
[p
.first
];
81 m
.public_addr
= p
.second
;
88 bool operator()(const mon_info_t
&a
, const mon_info_t
&b
) const {
89 if (a
.public_addr
== b
.public_addr
)
90 return a
.name
< b
.name
;
91 return a
.public_addr
< b
.public_addr
;
96 void MonMap::calc_ranks() {
98 ranks
.resize(mon_info
.size());
101 // Used to order entries according to public_addr, because that's
102 // how the ranks are expected to be ordered by. We may expand this
103 // later on, according to some other criteria, by specifying a
104 // different comparator.
106 // Please note that we use a 'set' here instead of resorting to
107 // std::sort() because we need more info than that's available in
108 // the vector. The vector will thus be ordered by, e.g., public_addr
109 // while only containing the names of each individual monitor.
110 // The only way of achieving this with std::sort() would be to first
111 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
112 // with custom comparison functions, and then copy each invidual entry
113 // to a new vector. Unless there's a simpler way, we don't think the
114 // added complexity makes up for the additional memory usage of a 'set'.
115 set
<mon_info_t
, rank_cmp
> tmp
;
117 for (map
<string
,mon_info_t
>::iterator p
= mon_info
.begin();
120 mon_info_t
&m
= p
->second
;
123 // populate addr_mons
124 assert(addr_mons
.count(m
.public_addr
) == 0);
125 addr_mons
[m
.public_addr
] = m
.name
;
128 // map the set to the actual ranks etc
130 for (set
<mon_info_t
>::iterator p
= tmp
.begin();
137 void MonMap::encode(bufferlist
& blist
, uint64_t con_features
) const
139 /* we keep the mon_addr map when encoding to ensure compatibility
140 * with clients and other monitors that do not yet support the 'mons'
141 * map. This map keeps its original behavior, containing a mapping of
142 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
143 * address -- which is obtained from the public address of each entry
146 map
<string
,entity_addr_t
> mon_addr
;
147 for (map
<string
,mon_info_t
>::const_iterator p
= mon_info
.begin();
150 mon_addr
[p
->first
] = p
->second
.public_addr
;
153 if ((con_features
& CEPH_FEATURE_MONNAMES
) == 0) {
156 ::encode_raw(fsid
, blist
);
157 ::encode(epoch
, blist
);
158 vector
<entity_inst_t
> mon_inst(mon_addr
.size());
159 for (unsigned n
= 0; n
< mon_addr
.size(); n
++)
160 mon_inst
[n
] = get_inst(n
);
161 ::encode(mon_inst
, blist
, con_features
);
162 ::encode(last_changed
, blist
);
163 ::encode(created
, blist
);
167 if ((con_features
& CEPH_FEATURE_MONENC
) == 0) {
170 ::encode_raw(fsid
, blist
);
171 ::encode(epoch
, blist
);
172 ::encode(mon_addr
, blist
, con_features
);
173 ::encode(last_changed
, blist
);
174 ::encode(created
, blist
);
177 ENCODE_START(5, 3, blist
);
178 ::encode_raw(fsid
, blist
);
179 ::encode(epoch
, blist
);
180 ::encode(mon_addr
, blist
, con_features
);
181 ::encode(last_changed
, blist
);
182 ::encode(created
, blist
);
183 ::encode(persistent_features
, blist
);
184 ::encode(optional_features
, blist
);
185 // this superseeds 'mon_addr'
186 ::encode(mon_info
, blist
, con_features
);
187 ENCODE_FINISH(blist
);
190 void MonMap::decode(bufferlist::iterator
&p
)
192 map
<string
,entity_addr_t
> mon_addr
;
193 DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p
);
194 ::decode_raw(fsid
, p
);
197 vector
<entity_inst_t
> mon_inst
;
198 ::decode(mon_inst
, p
);
199 for (unsigned i
= 0; i
< mon_inst
.size(); i
++) {
204 mon_addr
[name
] = mon_inst
[i
].addr
;
207 ::decode(mon_addr
, p
);
209 ::decode(last_changed
, p
);
210 ::decode(created
, p
);
212 ::decode(persistent_features
, p
);
213 ::decode(optional_features
, p
);
216 ::decode(mon_info
, p
);
218 // we may be decoding to an existing monmap; if we do not
219 // clear the mon_info map now, we will likely incur in problems
220 // later on MonMap::sanitize_mons()
224 sanitize_mons(mon_addr
);
228 void MonMap::generate_test_instances(list
<MonMap
*>& o
)
230 o
.push_back(new MonMap
);
231 o
.push_back(new MonMap
);
233 o
.back()->last_changed
= utime_t(123, 456);
234 o
.back()->created
= utime_t(789, 101112);
235 o
.back()->add("one", entity_addr_t());
237 MonMap
*m
= new MonMap
;
240 m
->last_changed
= utime_t(123, 456);
242 entity_addr_t empty_addr_one
;
243 empty_addr_one
.set_nonce(1);
244 m
->add("empty_addr_one", empty_addr_one
);
245 entity_addr_t empty_addr_two
;
246 empty_addr_two
.set_nonce(2);
247 m
->add("empty_adrr_two", empty_addr_two
);
249 const char *local_pub_addr_s
= "127.0.1.2";
251 const char *end_p
= local_pub_addr_s
+ strlen(local_pub_addr_s
);
252 entity_addr_t local_pub_addr
;
253 local_pub_addr
.parse(local_pub_addr_s
, &end_p
);
255 m
->add(mon_info_t("filled_pub_addr", local_pub_addr
, 1));
257 m
->add("empty_addr_zero", entity_addr_t());
262 // read from/write to a file
263 int MonMap::write(const char *fn
)
267 encode(bl
, CEPH_FEATURES_ALL
);
269 return bl
.write_file(fn
);
272 int MonMap::read(const char *fn
)
277 int r
= bl
.read_file(fn
, &error
);
284 void MonMap::print_summary(ostream
& out
) const
286 out
<< "e" << epoch
<< ": "
287 << mon_info
.size() << " mons at {";
288 // the map that we used to print, as it was, no longer
289 // maps strings to the monitor's public address, but to
290 // mon_info_t instead. As such, print the map in a way
291 // that keeps the expected format.
292 bool has_printed
= false;
293 for (map
<string
,mon_info_t
>::const_iterator p
= mon_info
.begin();
298 out
<< p
->first
<< "=" << p
->second
.public_addr
;
304 void MonMap::print(ostream
& out
) const
306 out
<< "epoch " << epoch
<< "\n";
307 out
<< "fsid " << fsid
<< "\n";
308 out
<< "last_changed " << last_changed
<< "\n";
309 out
<< "created " << created
<< "\n";
311 for (vector
<string
>::const_iterator p
= ranks
.begin();
314 out
<< i
++ << ": " << get_addr(*p
) << " mon." << *p
<< "\n";
318 void MonMap::dump(Formatter
*f
) const
320 f
->dump_unsigned("epoch", epoch
);
321 f
->dump_stream("fsid") << fsid
;
322 f
->dump_stream("modified") << last_changed
;
323 f
->dump_stream("created") << created
;
324 f
->open_object_section("features");
325 persistent_features
.dump(f
, "persistent");
326 optional_features
.dump(f
, "optional");
328 f
->open_array_section("mons");
330 for (vector
<string
>::const_iterator p
= ranks
.begin();
333 f
->open_object_section("mon");
334 f
->dump_int("rank", i
);
335 f
->dump_string("name", *p
);
336 f
->dump_stream("addr") << get_addr(*p
);
337 f
->dump_stream("public_addr") << get_addr(*p
);
344 int MonMap::build_from_host_list(std::string hostlist
, std::string prefix
)
346 vector
<entity_addr_t
> addrs
;
347 if (parse_ip_port_vec(hostlist
.c_str(), addrs
)) {
350 for (unsigned i
=0; i
<addrs
.size(); i
++) {
354 if (addrs
[i
].get_port() == 0)
355 addrs
[i
].set_port(CEPH_MON_PORT
);
356 string name
= prefix
;
358 if (!contains(addrs
[i
]))
364 // maybe they passed us a DNS-resolvable name
366 hosts
= resolve_addrs(hostlist
.c_str());
369 bool success
= parse_ip_port_vec(hosts
, addrs
);
377 for (unsigned i
=0; i
<addrs
.size(); i
++) {
381 if (addrs
[i
].get_port() == 0)
382 addrs
[i
].set_port(CEPH_MON_PORT
);
383 string name
= prefix
;
385 if (!contains(addrs
[i
]) &&
392 void MonMap::set_initial_members(CephContext
*cct
,
393 list
<std::string
>& initial_members
,
394 string my_name
, const entity_addr_t
& my_addr
,
395 set
<entity_addr_t
> *removed
)
397 // remove non-initial members
400 string n
= get_name(i
);
401 if (std::find(initial_members
.begin(), initial_members
.end(), n
) != initial_members
.end()) {
402 lgeneric_dout(cct
, 1) << " keeping " << n
<< " " << get_addr(i
) << dendl
;
407 lgeneric_dout(cct
, 1) << " removing " << get_name(i
) << " " << get_addr(i
) << dendl
;
409 removed
->insert(get_addr(i
));
411 assert(!contains(n
));
414 // add missing initial members
415 for (list
<string
>::iterator p
= initial_members
.begin(); p
!= initial_members
.end(); ++p
) {
418 lgeneric_dout(cct
, 1) << " adding self " << *p
<< " " << my_addr
<< dendl
;
422 a
.set_type(entity_addr_t::TYPE_LEGACY
);
423 a
.set_family(AF_INET
);
424 for (int n
=1; ; n
++) {
429 lgeneric_dout(cct
, 1) << " adding " << *p
<< " " << a
<< dendl
;
432 assert(contains(*p
));
438 int MonMap::build_initial(CephContext
*cct
, ostream
& errout
)
440 const md_config_t
*conf
= cct
->_conf
;
442 if (!conf
->monmap
.empty()) {
445 r
= read(conf
->monmap
.c_str());
447 catch (const buffer::error
&e
) {
452 errout
<< "unable to read/decode monmap from " << conf
->monmap
453 << ": " << cpp_strerror(-r
) << std::endl
;
458 if (!cct
->_conf
->fsid
.is_zero()) {
459 fsid
= cct
->_conf
->fsid
;
463 if (!conf
->mon_host
.empty()) {
464 int r
= build_from_host_list(conf
->mon_host
, "noname-");
466 errout
<< "unable to parse addrs in '" << conf
->mon_host
<< "'"
470 created
= ceph_clock_now();
471 last_changed
= created
;
475 // What monitors are in the config file?
476 std::vector
<std::string
> sections
;
477 int ret
= conf
->get_all_sections(sections
);
479 errout
<< "Unable to find any monitors in the configuration "
480 << "file, because there was an error listing the sections. error "
484 std::vector
<std::string
> mon_names
;
485 for (std::vector
<std::string
>::const_iterator s
= sections
.begin();
486 s
!= sections
.end(); ++s
) {
487 if ((s
->substr(0, 4) == "mon.") && (s
->size() > 4)) {
488 mon_names
.push_back(s
->substr(4));
492 // Find an address for each monitor in the config file.
493 for (std::vector
<std::string
>::const_iterator m
= mon_names
.begin();
494 m
!= mon_names
.end(); ++m
) {
495 std::vector
<std::string
> sections
;
496 std::string
m_name("mon");
499 sections
.push_back(m_name
);
500 sections
.push_back("mon");
501 sections
.push_back("global");
503 int res
= conf
->get_val_from_conf_file(sections
, "mon addr", val
, true);
505 errout
<< "failed to get an address for mon." << *m
<< ": error "
510 if (!addr
.parse(val
.c_str())) {
511 errout
<< "unable to parse address for mon." << *m
512 << ": addr='" << val
<< "'" << std::endl
;
515 if (addr
.get_port() == 0)
516 addr
.set_port(CEPH_MON_PORT
);
518 uint16_t priority
= 0;
519 if (!conf
->get_val_from_conf_file(sections
, "mon priority", val
, false)) {
521 priority
= std::stoul(val
);
522 } catch (std::logic_error
&) {
523 errout
<< "unable to parse priority for mon." << *m
524 << ": priority='" << val
<< "'" << std::endl
;
528 // the make sure this mon isn't already in the map
530 remove(get_name(addr
));
534 add(mon_info_t
{*m
, addr
, priority
});
538 // no info found from conf options lets try use DNS SRV records
539 string srv_name
= conf
->mon_dns_srv_name
;
541 // check if domain is also provided and extract it from srv_name
542 size_t idx
= srv_name
.find("_");
543 if (idx
!= string::npos
) {
544 domain
= srv_name
.substr(idx
+ 1);
545 srv_name
= srv_name
.substr(0, idx
);
548 map
<string
, DNSResolver::Record
> records
;
549 if (DNSResolver::get_instance()->resolve_srv_hosts(cct
, srv_name
,
550 DNSResolver::SRV_Protocol::TCP
, domain
, &records
) != 0) {
552 errout
<< "unable to get monitor info from DNS SRV with service name: " <<
553 "ceph-mon" << std::endl
;
556 for (const auto& record
: records
) {
557 add(mon_info_t
{record
.first
,
559 record
.second
.priority
});
565 errout
<< "no monitors specified to connect to." << std::endl
;
568 created
= ceph_clock_now();
569 last_changed
= created
;