]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.cc
Add patch for failing prerm scripts
[ceph.git] / ceph / src / mon / MonMap.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
7c673cae
FG
3
4#include "MonMap.h"
5
6#include <algorithm>
7#include <sys/types.h>
8#include <sys/stat.h>
9#include <fcntl.h>
10
11fdf7f2
TL
11#ifdef WITH_SEASTAR
12#include <seastar/core/fstream.hh>
13#include <seastar/core/reactor.hh>
14#include <seastar/net/dns.hh>
15#include "crimson/common/config_proxy.h"
16#endif
17
7c673cae
FG
18#include "common/Formatter.h"
19
20#include "include/ceph_features.h"
21#include "include/addr_parsing.h"
22#include "common/ceph_argparse.h"
23#include "common/dns_resolve.h"
24#include "common/errno.h"
7c673cae 25#include "common/dout.h"
11fdf7f2 26#include "common/Clock.h"
7c673cae
FG
27
28using ceph::Formatter;
29
30void mon_info_t::encode(bufferlist& bl, uint64_t features) const
31{
11fdf7f2
TL
32 uint8_t v = 3;
33 if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) {
34 v = 2;
35 }
36 ENCODE_START(v, 1, bl);
37 encode(name, bl);
38 if (v < 3) {
eafe8130
TL
39 auto a = public_addrs.legacy_addr();
40 if (a != entity_addr_t()) {
41 encode(a, bl, features);
42 } else {
43 // note: we don't have a legacy addr here, so lie so that it looks
44 // like one, just so that old clients get a valid-looking map.
45 // they won't be able to talk to the v2 mons, but that's better
46 // than nothing.
47 encode(public_addrs.as_legacy_addr(), bl, features);
48 }
11fdf7f2
TL
49 } else {
50 encode(public_addrs, bl, features);
51 }
52 encode(priority, bl);
7c673cae
FG
53 ENCODE_FINISH(bl);
54}
55
11fdf7f2 56void mon_info_t::decode(bufferlist::const_iterator& p)
7c673cae 57{
11fdf7f2
TL
58 DECODE_START(3, p);
59 decode(name, p);
60 decode(public_addrs, p);
224ce89b 61 if (struct_v >= 2) {
11fdf7f2 62 decode(priority, p);
224ce89b 63 }
7c673cae
FG
64 DECODE_FINISH(p);
65}
66
67void mon_info_t::print(ostream& out) const
68{
69 out << "mon." << name
11fdf7f2 70 << " addrs " << public_addrs
224ce89b 71 << " priority " << priority;
7c673cae
FG
72}
73
7c673cae
FG
74namespace {
75 struct rank_cmp {
76 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
11fdf7f2 77 if (a.public_addrs.legacy_or_front_addr() == b.public_addrs.legacy_or_front_addr())
7c673cae 78 return a.name < b.name;
11fdf7f2 79 return a.public_addrs.legacy_or_front_addr() < b.public_addrs.legacy_or_front_addr();
7c673cae
FG
80 }
81 };
82}
83
11fdf7f2
TL
84void MonMap::calc_legacy_ranks()
85{
7c673cae 86 ranks.resize(mon_info.size());
7c673cae
FG
87
88 // Used to order entries according to public_addr, because that's
89 // how the ranks are expected to be ordered by. We may expand this
90 // later on, according to some other criteria, by specifying a
91 // different comparator.
92 //
93 // Please note that we use a 'set' here instead of resorting to
94 // std::sort() because we need more info than that's available in
95 // the vector. The vector will thus be ordered by, e.g., public_addr
96 // while only containing the names of each individual monitor.
97 // The only way of achieving this with std::sort() would be to first
98 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
99 // with custom comparison functions, and then copy each invidual entry
100 // to a new vector. Unless there's a simpler way, we don't think the
101 // added complexity makes up for the additional memory usage of a 'set'.
102 set<mon_info_t, rank_cmp> tmp;
103
104 for (map<string,mon_info_t>::iterator p = mon_info.begin();
105 p != mon_info.end();
106 ++p) {
107 mon_info_t &m = p->second;
108 tmp.insert(m);
7c673cae
FG
109 }
110
111 // map the set to the actual ranks etc
112 unsigned i = 0;
113 for (set<mon_info_t>::iterator p = tmp.begin();
114 p != tmp.end();
115 ++p, ++i) {
116 ranks[i] = p->name;
117 }
118}
119
120void MonMap::encode(bufferlist& blist, uint64_t con_features) const
121{
7c673cae 122 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
11fdf7f2 123 using ceph::encode;
7c673cae 124 __u16 v = 1;
11fdf7f2
TL
125 encode(v, blist);
126 encode_raw(fsid, blist);
127 encode(epoch, blist);
128 vector<entity_inst_t> mon_inst(ranks.size());
129 for (unsigned n = 0; n < ranks.size(); n++) {
130 mon_inst[n].name = entity_name_t::MON(n);
131 mon_inst[n].addr = get_addrs(n).legacy_addr();
132 }
133 encode(mon_inst, blist, con_features);
134 encode(last_changed, blist);
135 encode(created, blist);
7c673cae
FG
136 return;
137 }
138
11fdf7f2
TL
139 map<string,entity_addr_t> legacy_mon_addr;
140 if (!HAVE_FEATURE(con_features, MONENC) ||
141 !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
142 for (auto& [name, info] : mon_info) {
143 legacy_mon_addr[name] = info.public_addrs.legacy_addr();
144 }
145 }
146
147 if (!HAVE_FEATURE(con_features, MONENC)) {
148 /* we keep the mon_addr map when encoding to ensure compatibility
149 * with clients and other monitors that do not yet support the 'mons'
150 * map. This map keeps its original behavior, containing a mapping of
151 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
152 * address -- which is obtained from the public address of each entry
153 * in the 'mons' map.
154 */
155 using ceph::encode;
7c673cae 156 __u16 v = 2;
11fdf7f2
TL
157 encode(v, blist);
158 encode_raw(fsid, blist);
159 encode(epoch, blist);
160 encode(legacy_mon_addr, blist, con_features);
161 encode(last_changed, blist);
162 encode(created, blist);
163 return;
164 }
165
166 if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) {
167 ENCODE_START(5, 3, blist);
168 encode_raw(fsid, blist);
169 encode(epoch, blist);
170 encode(legacy_mon_addr, blist, con_features);
171 encode(last_changed, blist);
172 encode(created, blist);
173 encode(persistent_features, blist);
174 encode(optional_features, blist);
175 encode(mon_info, blist, con_features);
176 ENCODE_FINISH(blist);
177 return;
178 }
179
180 ENCODE_START(7, 6, blist);
181 encode_raw(fsid, blist);
182 encode(epoch, blist);
183 encode(last_changed, blist);
184 encode(created, blist);
185 encode(persistent_features, blist);
186 encode(optional_features, blist);
187 encode(mon_info, blist, con_features);
188 encode(ranks, blist);
189 encode(min_mon_release, blist);
7c673cae
FG
190 ENCODE_FINISH(blist);
191}
192
11fdf7f2 193void MonMap::decode(bufferlist::const_iterator& p)
7c673cae
FG
194{
195 map<string,entity_addr_t> mon_addr;
11fdf7f2
TL
196 DECODE_START_LEGACY_COMPAT_LEN_16(7, 3, 3, p);
197 decode_raw(fsid, p);
198 decode(epoch, p);
7c673cae
FG
199 if (struct_v == 1) {
200 vector<entity_inst_t> mon_inst;
11fdf7f2 201 decode(mon_inst, p);
7c673cae
FG
202 for (unsigned i = 0; i < mon_inst.size(); i++) {
203 char n[2];
204 n[0] = '0' + i;
205 n[1] = 0;
206 string name = n;
207 mon_addr[name] = mon_inst[i].addr;
208 }
11fdf7f2
TL
209 } else if (struct_v < 6) {
210 decode(mon_addr, p);
7c673cae 211 }
11fdf7f2
TL
212 decode(last_changed, p);
213 decode(created, p);
7c673cae 214 if (struct_v >= 4) {
11fdf7f2
TL
215 decode(persistent_features, p);
216 decode(optional_features, p);
7c673cae 217 }
11fdf7f2
TL
218 if (struct_v < 5) {
219 // generate mon_info from legacy mon_addr
220 for (auto& [name, addr] : mon_addr) {
221 mon_info_t &m = mon_info[name];
222 m.name = name;
223 m.public_addrs = entity_addrvec_t(addr);
224 }
225 } else {
226 decode(mon_info, p);
227 }
228 if (struct_v < 6) {
229 calc_legacy_ranks();
230 } else {
231 decode(ranks, p);
232 }
233 if (struct_v >= 7) {
234 decode(min_mon_release, p);
7c673cae 235 } else {
11fdf7f2 236 min_mon_release = infer_ceph_release_from_mon_features(persistent_features);
7c673cae 237 }
11fdf7f2 238 calc_addr_mons();
7c673cae 239 DECODE_FINISH(p);
7c673cae
FG
240}
241
242void MonMap::generate_test_instances(list<MonMap*>& o)
243{
244 o.push_back(new MonMap);
245 o.push_back(new MonMap);
246 o.back()->epoch = 1;
247 o.back()->last_changed = utime_t(123, 456);
248 o.back()->created = utime_t(789, 101112);
11fdf7f2 249 o.back()->add("one", entity_addrvec_t());
7c673cae
FG
250
251 MonMap *m = new MonMap;
252 {
253 m->epoch = 1;
254 m->last_changed = utime_t(123, 456);
255
11fdf7f2
TL
256 entity_addrvec_t empty_addr_one = entity_addrvec_t(entity_addr_t());
257 empty_addr_one.v[0].set_nonce(1);
7c673cae 258 m->add("empty_addr_one", empty_addr_one);
11fdf7f2
TL
259 entity_addrvec_t empty_addr_two = entity_addrvec_t(entity_addr_t());
260 empty_addr_two.v[0].set_nonce(2);
261 m->add("empty_addr_two", empty_addr_two);
7c673cae
FG
262
263 const char *local_pub_addr_s = "127.0.1.2";
264
265 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
11fdf7f2 266 entity_addrvec_t local_pub_addr;
7c673cae
FG
267 local_pub_addr.parse(local_pub_addr_s, &end_p);
268
11fdf7f2 269 m->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr), 1));
7c673cae 270
11fdf7f2 271 m->add("empty_addr_zero", entity_addrvec_t());
7c673cae
FG
272 }
273 o.push_back(m);
274}
275
276// read from/write to a file
277int MonMap::write(const char *fn)
278{
279 // encode
280 bufferlist bl;
281 encode(bl, CEPH_FEATURES_ALL);
282
283 return bl.write_file(fn);
284}
285
286int MonMap::read(const char *fn)
287{
288 // read
289 bufferlist bl;
290 std::string error;
291 int r = bl.read_file(fn, &error);
292 if (r < 0)
293 return r;
294 decode(bl);
295 return 0;
296}
297
298void MonMap::print_summary(ostream& out) const
299{
300 out << "e" << epoch << ": "
301 << mon_info.size() << " mons at {";
302 // the map that we used to print, as it was, no longer
303 // maps strings to the monitor's public address, but to
304 // mon_info_t instead. As such, print the map in a way
305 // that keeps the expected format.
306 bool has_printed = false;
307 for (map<string,mon_info_t>::const_iterator p = mon_info.begin();
308 p != mon_info.end();
309 ++p) {
310 if (has_printed)
311 out << ",";
11fdf7f2 312 out << p->first << "=" << p->second.public_addrs;
7c673cae
FG
313 has_printed = true;
314 }
315 out << "}";
316}
317
318void MonMap::print(ostream& out) const
319{
320 out << "epoch " << epoch << "\n";
321 out << "fsid " << fsid << "\n";
322 out << "last_changed " << last_changed << "\n";
323 out << "created " << created << "\n";
11fdf7f2
TL
324 out << "min_mon_release " << (int)min_mon_release
325 << " (" << ceph_release_name(min_mon_release) << ")\n";
7c673cae
FG
326 unsigned i = 0;
327 for (vector<string>::const_iterator p = ranks.begin();
328 p != ranks.end();
329 ++p) {
11fdf7f2 330 out << i++ << ": " << get_addrs(*p) << " mon." << *p << "\n";
7c673cae
FG
331 }
332}
333
334void MonMap::dump(Formatter *f) const
335{
336 f->dump_unsigned("epoch", epoch);
337 f->dump_stream("fsid") << fsid;
338 f->dump_stream("modified") << last_changed;
339 f->dump_stream("created") << created;
11fdf7f2
TL
340 f->dump_unsigned("min_mon_release", min_mon_release);
341 f->dump_string("min_mon_release_name", ceph_release_name(min_mon_release));
7c673cae
FG
342 f->open_object_section("features");
343 persistent_features.dump(f, "persistent");
344 optional_features.dump(f, "optional");
345 f->close_section();
346 f->open_array_section("mons");
347 int i = 0;
348 for (vector<string>::const_iterator p = ranks.begin();
349 p != ranks.end();
350 ++p, ++i) {
351 f->open_object_section("mon");
352 f->dump_int("rank", i);
353 f->dump_string("name", *p);
11fdf7f2
TL
354 f->dump_object("public_addrs", get_addrs(*p));
355 // compat: make these look like pre-nautilus entity_addr_t
356 f->dump_stream("addr") << get_addrs(*p).get_legacy_str();
357 f->dump_stream("public_addr") << get_addrs(*p).get_legacy_str();
7c673cae
FG
358 f->close_section();
359 }
360 f->close_section();
361}
362
11fdf7f2
TL
363// an ambiguous mon addr may be legacy or may be msgr2--we aren' sure.
364// when that happens we need to try them both (unless we can
365// reasonably infer from the port number which it is).
366void MonMap::_add_ambiguous_addr(const string& name,
367 entity_addr_t addr,
368 int priority,
369 bool for_mkfs)
370{
371 if (addr.get_type() != entity_addr_t::TYPE_ANY) {
372 // a v1: or v2: prefix was specified
373 if (addr.get_port() == 0) {
374 // use default port
375 if (addr.get_type() == entity_addr_t::TYPE_ANY) {
376 addr.set_port(CEPH_MON_PORT_IANA);
377 } else if (addr.get_type() == entity_addr_t::TYPE_LEGACY) {
378 addr.set_port(CEPH_MON_PORT_LEGACY);
379 } else if (addr.get_type() == entity_addr_t::TYPE_MSGR2) {
380 addr.set_port(CEPH_MON_PORT_IANA);
381 } else {
382 // wth
383 return;
384 }
385 if (!contains(addr)) {
386 add(name, entity_addrvec_t(addr));
387 }
388 } else {
389 if (!contains(addr)) {
390 add(name, entity_addrvec_t(addr), priority);
391 }
392 }
393 } else {
394 // no v1: or v2: prefix specified
395 if (addr.get_port() == CEPH_MON_PORT_LEGACY) {
396 // legacy port implies legacy addr
397 addr.set_type(entity_addr_t::TYPE_LEGACY);
398 if (!contains(addr)) {
399 if (!for_mkfs) {
400 add(name + "-legacy", entity_addrvec_t(addr));
401 } else {
402 add(name, entity_addrvec_t(addr));
403 }
404 }
405 } else if (addr.get_port() == CEPH_MON_PORT_IANA) {
406 // iana port implies msgr2 addr
407 addr.set_type(entity_addr_t::TYPE_MSGR2);
408 if (!contains(addr)) {
409 add(name, entity_addrvec_t(addr));
410 }
411 } else if (addr.get_port() == 0) {
412 // no port; include both msgr2 and legacy ports
413 if (!for_mkfs) {
414 addr.set_type(entity_addr_t::TYPE_MSGR2);
415 addr.set_port(CEPH_MON_PORT_IANA);
416 if (!contains(addr)) {
417 add(name, entity_addrvec_t(addr));
418 }
419 addr.set_type(entity_addr_t::TYPE_LEGACY);
420 addr.set_port(CEPH_MON_PORT_LEGACY);
421 if (!contains(addr)) {
422 add(name + "-legacy", entity_addrvec_t(addr));
423 }
424 } else {
425 entity_addrvec_t av;
426 addr.set_type(entity_addr_t::TYPE_MSGR2);
427 addr.set_port(CEPH_MON_PORT_IANA);
428 av.v.push_back(addr);
429 addr.set_type(entity_addr_t::TYPE_LEGACY);
430 addr.set_port(CEPH_MON_PORT_LEGACY);
431 av.v.push_back(addr);
432 if (!contains(av)) {
433 add(name, av);
434 }
435 }
436 } else {
437 addr.set_type(entity_addr_t::TYPE_MSGR2);
438 if (!contains(addr)) {
439 add(name, entity_addrvec_t(addr), priority);
440 }
441 if (!for_mkfs) {
442 // try legacy on same port too
443 addr.set_type(entity_addr_t::TYPE_LEGACY);
444 if (!contains(addr)) {
445 add(name + "-legacy", entity_addrvec_t(addr), priority);
446 }
447 }
448 }
449 }
450}
7c673cae 451
11fdf7f2
TL
452int MonMap::init_with_ips(const std::string& ips,
453 bool for_mkfs,
454 const std::string &prefix)
7c673cae 455{
11fdf7f2
TL
456 vector<entity_addrvec_t> addrs;
457 if (!parse_ip_port_vec(
458 ips.c_str(), addrs,
459 entity_addr_t::TYPE_ANY)) {
460 return -EINVAL;
461 }
462 if (addrs.empty())
463 return -ENOENT;
464 for (unsigned i=0; i<addrs.size(); i++) {
465 char n[2];
466 n[0] = 'a' + i;
467 n[1] = 0;
468 string name;
469 name = prefix;
470 name += n;
471 if (addrs[i].v.size() == 1) {
472 _add_ambiguous_addr(name, addrs[i].front(), 0, for_mkfs);
473 } else {
474 // they specified an addrvec, so let's assume they also specified
475 // the addr *type* and *port*. (we could possibly improve this?)
476 add(name, addrs[i], 0);
7c673cae 477 }
7c673cae 478 }
11fdf7f2
TL
479 return 0;
480}
7c673cae 481
11fdf7f2
TL
482int MonMap::init_with_hosts(const std::string& hostlist,
483 bool for_mkfs,
484 const std::string& prefix)
485{
7c673cae 486 // maybe they passed us a DNS-resolvable name
11fdf7f2 487 char *hosts = resolve_addrs(hostlist.c_str());
7c673cae
FG
488 if (!hosts)
489 return -EINVAL;
11fdf7f2
TL
490
491 vector<entity_addrvec_t> addrs;
492 bool success = parse_ip_port_vec(
493 hosts, addrs,
494 for_mkfs ? entity_addr_t::TYPE_MSGR2 : entity_addr_t::TYPE_ANY);
7c673cae
FG
495 free(hosts);
496 if (!success)
497 return -EINVAL;
7c673cae
FG
498 if (addrs.empty())
499 return -ENOENT;
7c673cae
FG
500 for (unsigned i=0; i<addrs.size(); i++) {
501 char n[2];
502 n[0] = 'a' + i;
503 n[1] = 0;
7c673cae
FG
504 string name = prefix;
505 name += n;
11fdf7f2
TL
506 if (addrs[i].v.size() == 1) {
507 _add_ambiguous_addr(name, addrs[i].front(), 0);
508 } else {
509 add(name, addrs[i], 0);
510 }
7c673cae 511 }
11fdf7f2 512 calc_legacy_ranks();
7c673cae
FG
513 return 0;
514}
515
516void MonMap::set_initial_members(CephContext *cct,
517 list<std::string>& initial_members,
11fdf7f2
TL
518 string my_name,
519 const entity_addrvec_t& my_addrs,
520 set<entity_addrvec_t> *removed)
7c673cae
FG
521{
522 // remove non-initial members
523 unsigned i = 0;
524 while (i < size()) {
525 string n = get_name(i);
11fdf7f2
TL
526 if (std::find(initial_members.begin(), initial_members.end(), n)
527 != initial_members.end()) {
528 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addrs(i) << dendl;
7c673cae
FG
529 i++;
530 continue;
531 }
532
11fdf7f2
TL
533 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addrs(i)
534 << dendl;
535 if (removed) {
536 removed->insert(get_addrs(i));
537 }
7c673cae 538 remove(n);
11fdf7f2 539 ceph_assert(!contains(n));
7c673cae
FG
540 }
541
542 // add missing initial members
11fdf7f2
TL
543 for (auto& p : initial_members) {
544 if (!contains(p)) {
545 if (p == my_name) {
546 lgeneric_dout(cct, 1) << " adding self " << p << " " << my_addrs
547 << dendl;
548 add(p, my_addrs);
7c673cae
FG
549 } else {
550 entity_addr_t a;
551 a.set_type(entity_addr_t::TYPE_LEGACY);
552 a.set_family(AF_INET);
553 for (int n=1; ; n++) {
554 a.set_nonce(n);
555 if (!contains(a))
556 break;
557 }
11fdf7f2
TL
558 lgeneric_dout(cct, 1) << " adding " << p << " " << a << dendl;
559 add(p, entity_addrvec_t(a));
7c673cae 560 }
11fdf7f2 561 ceph_assert(contains(p));
7c673cae
FG
562 }
563 }
11fdf7f2 564 calc_legacy_ranks();
7c673cae
FG
565}
566
11fdf7f2
TL
567int MonMap::init_with_config_file(const ConfigProxy& conf,
568 std::ostream& errout)
7c673cae 569{
11fdf7f2
TL
570 std::vector<std::string> sections;
571 int ret = conf.get_all_sections(sections);
7c673cae
FG
572 if (ret) {
573 errout << "Unable to find any monitors in the configuration "
574 << "file, because there was an error listing the sections. error "
575 << ret << std::endl;
576 return -ENOENT;
577 }
11fdf7f2
TL
578 std::vector<std::string> mon_names;
579 for (const auto& section : sections) {
580 if (section.substr(0, 4) == "mon." && section.size() > 4) {
581 mon_names.push_back(section.substr(4));
7c673cae
FG
582 }
583 }
584
585 // Find an address for each monitor in the config file.
11fdf7f2
TL
586 for (const auto& mon_name : mon_names) {
587 std::vector<std::string> sections;
7c673cae
FG
588 std::string m_name("mon");
589 m_name += ".";
11fdf7f2 590 m_name += mon_name;
7c673cae
FG
591 sections.push_back(m_name);
592 sections.push_back("mon");
593 sections.push_back("global");
594 std::string val;
11fdf7f2 595 int res = conf.get_val_from_conf_file(sections, "mon addr", val, true);
7c673cae 596 if (res) {
11fdf7f2
TL
597 errout << "failed to get an address for mon." << mon_name
598 << ": error " << res << std::endl;
7c673cae
FG
599 continue;
600 }
11fdf7f2
TL
601 // the 'mon addr' field is a legacy field, so assume anything
602 // there on a weird port is a v1 address, and do not handle
603 // addrvecs.
7c673cae 604 entity_addr_t addr;
11fdf7f2
TL
605 if (!addr.parse(val.c_str(), nullptr, entity_addr_t::TYPE_LEGACY)) {
606 errout << "unable to parse address for mon." << mon_name
607 << ": addr='" << val << "'" << std::endl;
7c673cae
FG
608 continue;
609 }
11fdf7f2
TL
610 if (addr.get_port() == 0) {
611 addr.set_port(CEPH_MON_PORT_LEGACY);
612 }
224ce89b 613 uint16_t priority = 0;
11fdf7f2 614 if (!conf.get_val_from_conf_file(sections, "mon priority", val, false)) {
224ce89b
WB
615 try {
616 priority = std::stoul(val);
617 } catch (std::logic_error&) {
11fdf7f2 618 errout << "unable to parse priority for mon." << mon_name
224ce89b
WB
619 << ": priority='" << val << "'" << std::endl;
620 continue;
621 }
622 }
11fdf7f2 623
7c673cae
FG
624 // the make sure this mon isn't already in the map
625 if (contains(addr))
626 remove(get_name(addr));
11fdf7f2
TL
627 if (contains(mon_name))
628 remove(mon_name);
629 _add_ambiguous_addr(mon_name, addr, priority);
630 }
631 return 0;
632}
633
634#ifdef WITH_SEASTAR
635
636using namespace seastar;
637
638future<> MonMap::read_monmap(const std::string& monmap)
639{
640 return open_file_dma(monmap, open_flags::ro).then([this] (file f) {
641 return f.size().then([this, f = std::move(f)](size_t s) {
642 return do_with(make_file_input_stream(f), [this, s](input_stream<char>& in) {
643 return in.read_exactly(s).then([this](temporary_buffer<char> buf) {
644 bufferlist bl;
645 bl.append(buffer::create(std::move(buf)));
646 decode(bl);
647 });
648 });
649 });
650 });
651}
7c673cae 652
11fdf7f2
TL
653future<> MonMap::init_with_dns_srv(bool for_mkfs, const std::string& name)
654{
655 string domain;
656 string service = name;
657 // check if domain is also provided and extract it from srv_name
658 size_t idx = name.find("_");
659 if (idx != name.npos) {
660 domain = name.substr(idx + 1);
661 service = name.substr(0, idx);
7c673cae 662 }
11fdf7f2
TL
663 return net::dns::get_srv_records(
664 net::dns_resolver::srv_proto::tcp,
665 service, domain).then([this](net::dns_resolver::srv_records records) {
666 return parallel_for_each(records, [this](auto record) {
667 return net::dns::resolve_name(record.target).then(
668 [record,this](net::inet_address a) {
669 // the resolved address does not contain ceph specific info like nonce
670 // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually
671 entity_addr_t addr;
672 addr.set_type(entity_addr_t::TYPE_ANY);
673 addr.set_family(int(a.in_family()));
674 addr.set_port(record.port);
675 switch (a.in_family()) {
676 case net::inet_address::family::INET:
677 addr.in4_addr().sin_addr = a;
678 break;
679 case net::inet_address::family::INET6:
680 addr.in6_addr().sin6_addr = a;
681 break;
682 }
683 _add_ambiguous_addr(record.target, addr, record.priority);
684 });
685 });
686 }).handle_exception_type([](const std::system_error& e) {
687 // ignore DNS failures
688 return seastar::make_ready_future<>();
689 });
690}
7c673cae 691
11fdf7f2
TL
692seastar::future<> MonMap::build_monmap(const ceph::common::ConfigProxy& conf,
693 bool for_mkfs)
694{
695 // -m foo?
696 if (const auto mon_host = conf.get_val<std::string>("mon_host");
697 !mon_host.empty()) {
698 if (auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); ret == 0) {
699 return make_ready_future<>();
7c673cae 700 }
11fdf7f2
TL
701 // TODO: resolve_addrs() is a blocking call
702 if (auto ret = init_with_hosts(mon_host, for_mkfs, "noname-"); ret == 0) {
703 return make_ready_future<>();
704 } else {
705 throw std::runtime_error(cpp_strerror(ret));
706 }
707 }
7c673cae 708
11fdf7f2
TL
709 // What monitors are in the config file?
710 ostringstream errout;
711 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
712 throw std::runtime_error(errout.str());
713 }
714 if (size() > 0) {
715 return make_ready_future<>();
716 }
717 // no info found from conf options lets try use DNS SRV records
718 const string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
719 return init_with_dns_srv(for_mkfs, srv_name).then([this] {
720 if (size() == 0) {
721 throw std::runtime_error("no monitors specified to connect to.");
722 }
723 });
724}
7c673cae 725
11fdf7f2
TL
726future<> MonMap::build_initial(const ceph::common::ConfigProxy& conf, bool for_mkfs)
727{
728 // file?
729 if (const auto monmap = conf.get_val<std::string>("monmap");
730 !monmap.empty()) {
731 return read_monmap(monmap);
732 } else {
733 // fsid from conf?
734 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
735 !new_fsid.is_zero()) {
736 fsid = new_fsid;
7c673cae 737 }
11fdf7f2
TL
738 return build_monmap(conf, for_mkfs).then([this] {
739 created = ceph_clock_now();
740 last_changed = created;
741 calc_legacy_ranks();
742 });
743 }
744}
745
746#else // WITH_SEASTAR
747
748int MonMap::init_with_monmap(const std::string& monmap, std::ostream& errout)
749{
750 int r;
751 try {
752 r = read(monmap.c_str());
753 } catch (buffer::error&) {
754 r = -EINVAL;
755 }
756 if (r >= 0)
757 return 0;
758 errout << "unable to read/decode monmap from " << monmap
759 << ": " << cpp_strerror(-r) << std::endl;
760 return r;
761}
762
763int MonMap::init_with_dns_srv(CephContext* cct,
764 std::string srv_name,
765 bool for_mkfs,
766 std::ostream& errout)
767{
768 string domain;
769 // check if domain is also provided and extract it from srv_name
770 size_t idx = srv_name.find("_");
771 if (idx != string::npos) {
772 domain = srv_name.substr(idx + 1);
773 srv_name = srv_name.substr(0, idx);
774 }
775
776 map<string, DNSResolver::Record> records;
777 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
778 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
779
780 errout << "unable to get monitor info from DNS SRV with service name: "
781 << "ceph-mon" << std::endl;
782 return -1;
783 } else {
784 for (auto& record : records) {
785 record.second.addr.set_type(entity_addr_t::TYPE_ANY);
786 _add_ambiguous_addr(record.first, record.second.addr,
787 record.second.priority);
7c673cae 788 }
11fdf7f2
TL
789 return 0;
790 }
791}
792
793int MonMap::build_initial(CephContext *cct, bool for_mkfs, ostream& errout)
794{
795 const auto& conf = cct->_conf;
796 // file?
797 if (const auto monmap = conf.get_val<std::string>("monmap");
798 !monmap.empty()) {
799 return init_with_monmap(monmap, errout);
7c673cae
FG
800 }
801
11fdf7f2
TL
802 // fsid from conf?
803 if (const auto new_fsid = conf.get_val<uuid_d>("fsid");
804 !new_fsid.is_zero()) {
805 fsid = new_fsid;
806 }
807 // -m foo?
808 if (const auto mon_host = conf.get_val<std::string>("mon_host");
809 !mon_host.empty()) {
810 auto ret = init_with_ips(mon_host, for_mkfs, "noname-");
811 if (ret == -EINVAL) {
812 ret = init_with_hosts(mon_host, for_mkfs, "noname-");
813 }
814 if (ret < 0) {
815 errout << "unable to parse addrs in '" << mon_host << "'"
816 << std::endl;
817 return ret;
818 }
819 }
820 if (size() == 0) {
821 // What monitors are in the config file?
822 if (auto ret = init_with_config_file(conf, errout); ret < 0) {
823 return ret;
824 }
825 }
826 if (size() == 0) {
827 // no info found from conf options lets try use DNS SRV records
828 string srv_name = conf.get_val<std::string>("mon_dns_srv_name");
829 if (auto ret = init_with_dns_srv(cct, srv_name, for_mkfs, errout); ret < 0) {
830 return -ENOENT;
831 }
832 }
7c673cae
FG
833 if (size() == 0) {
834 errout << "no monitors specified to connect to." << std::endl;
835 return -ENOENT;
836 }
837 created = ceph_clock_now();
838 last_changed = created;
11fdf7f2 839 calc_legacy_ranks();
7c673cae
FG
840 return 0;
841}
11fdf7f2 842#endif // WITH_SEASTAR