9 #include "common/Formatter.h"
11 #include "include/ceph_features.h"
12 #include "include/addr_parsing.h"
13 #include "common/ceph_argparse.h"
14 #include "common/dns_resolve.h"
15 #include "common/errno.h"
17 #include "common/dout.h"
19 using ceph::Formatter
;
21 void mon_info_t::encode(bufferlist
& bl
, uint64_t features
) const
23 ENCODE_START(1, 1, bl
);
25 ::encode(public_addr
, bl
, features
);
29 void mon_info_t::decode(bufferlist::iterator
& p
)
33 ::decode(public_addr
, p
);
37 void mon_info_t::print(ostream
& out
) const
40 << " public " << public_addr
;
43 void MonMap::sanitize_mons(map
<string
,entity_addr_t
>& o
)
45 // if mon_info is populated, it means we decoded a map encoded
46 // by someone who understands the new format (i.e., is able to
47 // encode 'mon_info'). This means they must also have provided
48 // a properly populated 'mon_addr' (which we have dropped with
49 // this patch), 'o' being the contents of said map. In this
50 // case, 'o' must have the same number of entries as 'mon_info'.
52 // Also, for each entry in 'o', there has to be a matching
53 // 'mon_info' entry, properly populated with a name and a matching
56 // OTOH, if 'mon_info' is not populated, it means the one that
57 // originally encoded the map does not know the new format, and
58 // 'o' will be our only source of info about the monitors in the
59 // cluster -- and we will use it to populate our 'mon_info' map.
61 bool has_mon_info
= false;
62 if (mon_info
.size() > 0) {
63 assert(o
.size() == mon_info
.size());
69 // make sure the info we have is accurate
70 assert(mon_info
.count(p
.first
));
71 assert(mon_info
[p
.first
].name
== p
.first
);
72 assert(mon_info
[p
.first
].public_addr
== p
.second
);
74 mon_info_t
&m
= mon_info
[p
.first
];
76 m
.public_addr
= p
.second
;
83 bool operator()(const mon_info_t
&a
, const mon_info_t
&b
) const {
84 if (a
.public_addr
== b
.public_addr
)
85 return a
.name
< b
.name
;
86 return a
.public_addr
< b
.public_addr
;
91 void MonMap::calc_ranks() {
93 ranks
.resize(mon_info
.size());
96 // Used to order entries according to public_addr, because that's
97 // how the ranks are expected to be ordered by. We may expand this
98 // later on, according to some other criteria, by specifying a
99 // different comparator.
101 // Please note that we use a 'set' here instead of resorting to
102 // std::sort() because we need more info than that's available in
103 // the vector. The vector will thus be ordered by, e.g., public_addr
104 // while only containing the names of each individual monitor.
105 // The only way of achieving this with std::sort() would be to first
106 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
107 // with custom comparison functions, and then copy each invidual entry
108 // to a new vector. Unless there's a simpler way, we don't think the
109 // added complexity makes up for the additional memory usage of a 'set'.
110 set
<mon_info_t
, rank_cmp
> tmp
;
112 for (map
<string
,mon_info_t
>::iterator p
= mon_info
.begin();
115 mon_info_t
&m
= p
->second
;
118 // populate addr_mons
119 assert(addr_mons
.count(m
.public_addr
) == 0);
120 addr_mons
[m
.public_addr
] = m
.name
;
123 // map the set to the actual ranks etc
125 for (set
<mon_info_t
>::iterator p
= tmp
.begin();
132 void MonMap::encode(bufferlist
& blist
, uint64_t con_features
) const
134 /* we keep the mon_addr map when encoding to ensure compatibility
135 * with clients and other monitors that do not yet support the 'mons'
136 * map. This map keeps its original behavior, containing a mapping of
137 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
138 * address -- which is obtained from the public address of each entry
141 map
<string
,entity_addr_t
> mon_addr
;
142 for (map
<string
,mon_info_t
>::const_iterator p
= mon_info
.begin();
145 mon_addr
[p
->first
] = p
->second
.public_addr
;
148 if ((con_features
& CEPH_FEATURE_MONNAMES
) == 0) {
151 ::encode_raw(fsid
, blist
);
152 ::encode(epoch
, blist
);
153 vector
<entity_inst_t
> mon_inst(mon_addr
.size());
154 for (unsigned n
= 0; n
< mon_addr
.size(); n
++)
155 mon_inst
[n
] = get_inst(n
);
156 ::encode(mon_inst
, blist
, con_features
);
157 ::encode(last_changed
, blist
);
158 ::encode(created
, blist
);
162 if ((con_features
& CEPH_FEATURE_MONENC
) == 0) {
165 ::encode_raw(fsid
, blist
);
166 ::encode(epoch
, blist
);
167 ::encode(mon_addr
, blist
, con_features
);
168 ::encode(last_changed
, blist
);
169 ::encode(created
, blist
);
172 ENCODE_START(5, 3, blist
);
173 ::encode_raw(fsid
, blist
);
174 ::encode(epoch
, blist
);
175 ::encode(mon_addr
, blist
, con_features
);
176 ::encode(last_changed
, blist
);
177 ::encode(created
, blist
);
178 ::encode(persistent_features
, blist
);
179 ::encode(optional_features
, blist
);
180 // this superseeds 'mon_addr'
181 ::encode(mon_info
, blist
, con_features
);
182 ENCODE_FINISH(blist
);
185 void MonMap::decode(bufferlist::iterator
&p
)
187 map
<string
,entity_addr_t
> mon_addr
;
188 DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p
);
189 ::decode_raw(fsid
, p
);
192 vector
<entity_inst_t
> mon_inst
;
193 ::decode(mon_inst
, p
);
194 for (unsigned i
= 0; i
< mon_inst
.size(); i
++) {
199 mon_addr
[name
] = mon_inst
[i
].addr
;
202 ::decode(mon_addr
, p
);
204 ::decode(last_changed
, p
);
205 ::decode(created
, p
);
207 ::decode(persistent_features
, p
);
208 ::decode(optional_features
, p
);
211 ::decode(mon_info
, p
);
213 // we may be decoding to an existing monmap; if we do not
214 // clear the mon_info map now, we will likely incur in problems
215 // later on MonMap::sanitize_mons()
219 sanitize_mons(mon_addr
);
223 void MonMap::generate_test_instances(list
<MonMap
*>& o
)
225 o
.push_back(new MonMap
);
226 o
.push_back(new MonMap
);
228 o
.back()->last_changed
= utime_t(123, 456);
229 o
.back()->created
= utime_t(789, 101112);
230 o
.back()->add("one", entity_addr_t());
232 MonMap
*m
= new MonMap
;
235 m
->last_changed
= utime_t(123, 456);
237 entity_addr_t empty_addr_one
;
238 empty_addr_one
.set_nonce(1);
239 m
->add("empty_addr_one", empty_addr_one
);
240 entity_addr_t empty_addr_two
;
241 empty_addr_two
.set_nonce(2);
242 m
->add("empty_adrr_two", empty_addr_two
);
244 const char *local_pub_addr_s
= "127.0.1.2";
246 const char *end_p
= local_pub_addr_s
+ strlen(local_pub_addr_s
);
247 entity_addr_t local_pub_addr
;
248 local_pub_addr
.parse(local_pub_addr_s
, &end_p
);
250 m
->add("filled_pub_addr", local_pub_addr
);
252 m
->add("empty_addr_zero", entity_addr_t());
257 // read from/write to a file
258 int MonMap::write(const char *fn
)
262 encode(bl
, CEPH_FEATURES_ALL
);
264 return bl
.write_file(fn
);
267 int MonMap::read(const char *fn
)
272 int r
= bl
.read_file(fn
, &error
);
279 void MonMap::print_summary(ostream
& out
) const
281 out
<< "e" << epoch
<< ": "
282 << mon_info
.size() << " mons at {";
283 // the map that we used to print, as it was, no longer
284 // maps strings to the monitor's public address, but to
285 // mon_info_t instead. As such, print the map in a way
286 // that keeps the expected format.
287 bool has_printed
= false;
288 for (map
<string
,mon_info_t
>::const_iterator p
= mon_info
.begin();
293 out
<< p
->first
<< "=" << p
->second
.public_addr
;
299 void MonMap::print(ostream
& out
) const
301 out
<< "epoch " << epoch
<< "\n";
302 out
<< "fsid " << fsid
<< "\n";
303 out
<< "last_changed " << last_changed
<< "\n";
304 out
<< "created " << created
<< "\n";
306 for (vector
<string
>::const_iterator p
= ranks
.begin();
309 out
<< i
++ << ": " << get_addr(*p
) << " mon." << *p
<< "\n";
313 void MonMap::dump(Formatter
*f
) const
315 f
->dump_unsigned("epoch", epoch
);
316 f
->dump_stream("fsid") << fsid
;
317 f
->dump_stream("modified") << last_changed
;
318 f
->dump_stream("created") << created
;
319 f
->open_object_section("features");
320 persistent_features
.dump(f
, "persistent");
321 optional_features
.dump(f
, "optional");
323 f
->open_array_section("mons");
325 for (vector
<string
>::const_iterator p
= ranks
.begin();
328 f
->open_object_section("mon");
329 f
->dump_int("rank", i
);
330 f
->dump_string("name", *p
);
331 f
->dump_stream("addr") << get_addr(*p
);
332 f
->dump_stream("public_addr") << get_addr(*p
);
339 int MonMap::build_from_host_list(std::string hostlist
, std::string prefix
)
341 vector
<entity_addr_t
> addrs
;
342 if (parse_ip_port_vec(hostlist
.c_str(), addrs
)) {
345 for (unsigned i
=0; i
<addrs
.size(); i
++) {
349 if (addrs
[i
].get_port() == 0)
350 addrs
[i
].set_port(CEPH_MON_PORT
);
351 string name
= prefix
;
353 if (!contains(addrs
[i
]))
359 // maybe they passed us a DNS-resolvable name
361 hosts
= resolve_addrs(hostlist
.c_str());
364 bool success
= parse_ip_port_vec(hosts
, addrs
);
372 for (unsigned i
=0; i
<addrs
.size(); i
++) {
376 if (addrs
[i
].get_port() == 0)
377 addrs
[i
].set_port(CEPH_MON_PORT
);
378 string name
= prefix
;
380 if (!contains(addrs
[i
]) &&
387 void MonMap::set_initial_members(CephContext
*cct
,
388 list
<std::string
>& initial_members
,
389 string my_name
, const entity_addr_t
& my_addr
,
390 set
<entity_addr_t
> *removed
)
392 // remove non-initial members
395 string n
= get_name(i
);
396 if (std::find(initial_members
.begin(), initial_members
.end(), n
) != initial_members
.end()) {
397 lgeneric_dout(cct
, 1) << " keeping " << n
<< " " << get_addr(i
) << dendl
;
402 lgeneric_dout(cct
, 1) << " removing " << get_name(i
) << " " << get_addr(i
) << dendl
;
404 removed
->insert(get_addr(i
));
406 assert(!contains(n
));
409 // add missing initial members
410 for (list
<string
>::iterator p
= initial_members
.begin(); p
!= initial_members
.end(); ++p
) {
413 lgeneric_dout(cct
, 1) << " adding self " << *p
<< " " << my_addr
<< dendl
;
417 a
.set_type(entity_addr_t::TYPE_LEGACY
);
418 a
.set_family(AF_INET
);
419 for (int n
=1; ; n
++) {
424 lgeneric_dout(cct
, 1) << " adding " << *p
<< " " << a
<< dendl
;
427 assert(contains(*p
));
433 int MonMap::build_initial(CephContext
*cct
, ostream
& errout
)
435 const md_config_t
*conf
= cct
->_conf
;
437 if (!conf
->monmap
.empty()) {
440 r
= read(conf
->monmap
.c_str());
442 catch (const buffer::error
&e
) {
447 errout
<< "unable to read/decode monmap from " << conf
->monmap
448 << ": " << cpp_strerror(-r
) << std::endl
;
453 if (!cct
->_conf
->fsid
.is_zero()) {
454 fsid
= cct
->_conf
->fsid
;
458 if (!conf
->mon_host
.empty()) {
459 int r
= build_from_host_list(conf
->mon_host
, "noname-");
461 errout
<< "unable to parse addrs in '" << conf
->mon_host
<< "'"
465 created
= ceph_clock_now();
466 last_changed
= created
;
470 // What monitors are in the config file?
471 std::vector
<std::string
> sections
;
472 int ret
= conf
->get_all_sections(sections
);
474 errout
<< "Unable to find any monitors in the configuration "
475 << "file, because there was an error listing the sections. error "
479 std::vector
<std::string
> mon_names
;
480 for (std::vector
<std::string
>::const_iterator s
= sections
.begin();
481 s
!= sections
.end(); ++s
) {
482 if ((s
->substr(0, 4) == "mon.") && (s
->size() > 4)) {
483 mon_names
.push_back(s
->substr(4));
487 // Find an address for each monitor in the config file.
488 for (std::vector
<std::string
>::const_iterator m
= mon_names
.begin();
489 m
!= mon_names
.end(); ++m
) {
490 std::vector
<std::string
> sections
;
491 std::string
m_name("mon");
494 sections
.push_back(m_name
);
495 sections
.push_back("mon");
496 sections
.push_back("global");
498 int res
= conf
->get_val_from_conf_file(sections
, "mon addr", val
, true);
500 errout
<< "failed to get an address for mon." << *m
<< ": error "
505 if (!addr
.parse(val
.c_str())) {
506 errout
<< "unable to parse address for mon." << *m
507 << ": addr='" << val
<< "'" << std::endl
;
510 if (addr
.get_port() == 0)
511 addr
.set_port(CEPH_MON_PORT
);
513 // the make sure this mon isn't already in the map
515 remove(get_name(addr
));
519 add(m
->c_str(), addr
);
523 // no info found from conf options lets try use DNS SRV records
524 string srv_name
= conf
->mon_dns_srv_name
;
526 // check if domain is also provided and extract it from srv_name
527 size_t idx
= srv_name
.find("_");
528 if (idx
!= string::npos
) {
529 domain
= srv_name
.substr(idx
+ 1);
530 srv_name
= srv_name
.substr(0, idx
);
533 map
<string
, entity_addr_t
> addrs
;
534 if (DNSResolver::get_instance()->resolve_srv_hosts(cct
, srv_name
,
535 DNSResolver::SRV_Protocol::TCP
, domain
, &addrs
) != 0) {
537 errout
<< "unable to get monitor info from DNS SRV with service name: " <<
538 "ceph-mon" << std::endl
;
541 for (const auto& addr
: addrs
) {
542 add(addr
.first
, addr
.second
);
548 errout
<< "no monitors specified to connect to." << std::endl
;
551 created
= ceph_clock_now();
552 last_changed
= created
;