]> git.proxmox.com Git - ceph.git/blame - ceph/src/mon/MonMap.cc
update sources to v12.1.1
[ceph.git] / ceph / src / mon / MonMap.cc
CommitLineData
7c673cae
FG
1
2#include "MonMap.h"
3
4#include <algorithm>
5#include <sys/types.h>
6#include <sys/stat.h>
7#include <fcntl.h>
8
9#include "common/Formatter.h"
10
11#include "include/ceph_features.h"
12#include "include/addr_parsing.h"
13#include "common/ceph_argparse.h"
14#include "common/dns_resolve.h"
15#include "common/errno.h"
16
17#include "common/dout.h"
18
19using ceph::Formatter;
20
21void mon_info_t::encode(bufferlist& bl, uint64_t features) const
22{
224ce89b 23 ENCODE_START(2, 1, bl);
7c673cae
FG
24 ::encode(name, bl);
25 ::encode(public_addr, bl, features);
224ce89b 26 ::encode(priority, bl);
7c673cae
FG
27 ENCODE_FINISH(bl);
28}
29
30void mon_info_t::decode(bufferlist::iterator& p)
31{
32 DECODE_START(1, p);
33 ::decode(name, p);
34 ::decode(public_addr, p);
224ce89b
WB
35 if (struct_v >= 2) {
36 ::decode(priority, p);
37 }
7c673cae
FG
38 DECODE_FINISH(p);
39}
40
41void mon_info_t::print(ostream& out) const
42{
43 out << "mon." << name
224ce89b
WB
44 << " public " << public_addr
45 << " priority " << priority;
7c673cae
FG
46}
47
48void MonMap::sanitize_mons(map<string,entity_addr_t>& o)
49{
50 // if mon_info is populated, it means we decoded a map encoded
51 // by someone who understands the new format (i.e., is able to
52 // encode 'mon_info'). This means they must also have provided
53 // a properly populated 'mon_addr' (which we have dropped with
54 // this patch), 'o' being the contents of said map. In this
55 // case, 'o' must have the same number of entries as 'mon_info'.
56 //
57 // Also, for each entry in 'o', there has to be a matching
58 // 'mon_info' entry, properly populated with a name and a matching
59 // 'public_addr'.
60 //
61 // OTOH, if 'mon_info' is not populated, it means the one that
62 // originally encoded the map does not know the new format, and
63 // 'o' will be our only source of info about the monitors in the
64 // cluster -- and we will use it to populate our 'mon_info' map.
65
66 bool has_mon_info = false;
67 if (mon_info.size() > 0) {
68 assert(o.size() == mon_info.size());
69 has_mon_info = true;
70 }
71
72 for (auto p : o) {
73 if (has_mon_info) {
74 // make sure the info we have is accurate
75 assert(mon_info.count(p.first));
76 assert(mon_info[p.first].name == p.first);
77 assert(mon_info[p.first].public_addr == p.second);
78 } else {
79 mon_info_t &m = mon_info[p.first];
80 m.name = p.first;
81 m.public_addr = p.second;
82 }
83 }
84}
85
86namespace {
87 struct rank_cmp {
88 bool operator()(const mon_info_t &a, const mon_info_t &b) const {
89 if (a.public_addr == b.public_addr)
90 return a.name < b.name;
91 return a.public_addr < b.public_addr;
92 }
93 };
94}
95
96void MonMap::calc_ranks() {
97
98 ranks.resize(mon_info.size());
99 addr_mons.clear();
100
101 // Used to order entries according to public_addr, because that's
102 // how the ranks are expected to be ordered by. We may expand this
103 // later on, according to some other criteria, by specifying a
104 // different comparator.
105 //
106 // Please note that we use a 'set' here instead of resorting to
107 // std::sort() because we need more info than that's available in
108 // the vector. The vector will thus be ordered by, e.g., public_addr
109 // while only containing the names of each individual monitor.
110 // The only way of achieving this with std::sort() would be to first
111 // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo'
112 // with custom comparison functions, and then copy each invidual entry
113 // to a new vector. Unless there's a simpler way, we don't think the
114 // added complexity makes up for the additional memory usage of a 'set'.
115 set<mon_info_t, rank_cmp> tmp;
116
117 for (map<string,mon_info_t>::iterator p = mon_info.begin();
118 p != mon_info.end();
119 ++p) {
120 mon_info_t &m = p->second;
121 tmp.insert(m);
122
123 // populate addr_mons
124 assert(addr_mons.count(m.public_addr) == 0);
125 addr_mons[m.public_addr] = m.name;
126 }
127
128 // map the set to the actual ranks etc
129 unsigned i = 0;
130 for (set<mon_info_t>::iterator p = tmp.begin();
131 p != tmp.end();
132 ++p, ++i) {
133 ranks[i] = p->name;
134 }
135}
136
137void MonMap::encode(bufferlist& blist, uint64_t con_features) const
138{
139 /* we keep the mon_addr map when encoding to ensure compatibility
140 * with clients and other monitors that do not yet support the 'mons'
141 * map. This map keeps its original behavior, containing a mapping of
142 * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public
143 * address -- which is obtained from the public address of each entry
144 * in the 'mons' map.
145 */
146 map<string,entity_addr_t> mon_addr;
147 for (map<string,mon_info_t>::const_iterator p = mon_info.begin();
148 p != mon_info.end();
149 ++p) {
150 mon_addr[p->first] = p->second.public_addr;
151 }
152
153 if ((con_features & CEPH_FEATURE_MONNAMES) == 0) {
154 __u16 v = 1;
155 ::encode(v, blist);
156 ::encode_raw(fsid, blist);
157 ::encode(epoch, blist);
158 vector<entity_inst_t> mon_inst(mon_addr.size());
159 for (unsigned n = 0; n < mon_addr.size(); n++)
160 mon_inst[n] = get_inst(n);
161 ::encode(mon_inst, blist, con_features);
162 ::encode(last_changed, blist);
163 ::encode(created, blist);
164 return;
165 }
166
167 if ((con_features & CEPH_FEATURE_MONENC) == 0) {
168 __u16 v = 2;
169 ::encode(v, blist);
170 ::encode_raw(fsid, blist);
171 ::encode(epoch, blist);
172 ::encode(mon_addr, blist, con_features);
173 ::encode(last_changed, blist);
174 ::encode(created, blist);
175 }
176
177 ENCODE_START(5, 3, blist);
178 ::encode_raw(fsid, blist);
179 ::encode(epoch, blist);
180 ::encode(mon_addr, blist, con_features);
181 ::encode(last_changed, blist);
182 ::encode(created, blist);
183 ::encode(persistent_features, blist);
184 ::encode(optional_features, blist);
185 // this superseeds 'mon_addr'
186 ::encode(mon_info, blist, con_features);
187 ENCODE_FINISH(blist);
188}
189
190void MonMap::decode(bufferlist::iterator &p)
191{
192 map<string,entity_addr_t> mon_addr;
193 DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p);
194 ::decode_raw(fsid, p);
195 ::decode(epoch, p);
196 if (struct_v == 1) {
197 vector<entity_inst_t> mon_inst;
198 ::decode(mon_inst, p);
199 for (unsigned i = 0; i < mon_inst.size(); i++) {
200 char n[2];
201 n[0] = '0' + i;
202 n[1] = 0;
203 string name = n;
204 mon_addr[name] = mon_inst[i].addr;
205 }
206 } else {
207 ::decode(mon_addr, p);
208 }
209 ::decode(last_changed, p);
210 ::decode(created, p);
211 if (struct_v >= 4) {
212 ::decode(persistent_features, p);
213 ::decode(optional_features, p);
214 }
215 if (struct_v >= 5) {
216 ::decode(mon_info, p);
217 } else {
218 // we may be decoding to an existing monmap; if we do not
219 // clear the mon_info map now, we will likely incur in problems
220 // later on MonMap::sanitize_mons()
221 mon_info.clear();
222 }
223 DECODE_FINISH(p);
224 sanitize_mons(mon_addr);
225 calc_ranks();
226}
227
228void MonMap::generate_test_instances(list<MonMap*>& o)
229{
230 o.push_back(new MonMap);
231 o.push_back(new MonMap);
232 o.back()->epoch = 1;
233 o.back()->last_changed = utime_t(123, 456);
234 o.back()->created = utime_t(789, 101112);
235 o.back()->add("one", entity_addr_t());
236
237 MonMap *m = new MonMap;
238 {
239 m->epoch = 1;
240 m->last_changed = utime_t(123, 456);
241
242 entity_addr_t empty_addr_one;
243 empty_addr_one.set_nonce(1);
244 m->add("empty_addr_one", empty_addr_one);
245 entity_addr_t empty_addr_two;
246 empty_addr_two.set_nonce(2);
247 m->add("empty_adrr_two", empty_addr_two);
248
249 const char *local_pub_addr_s = "127.0.1.2";
250
251 const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s);
252 entity_addr_t local_pub_addr;
253 local_pub_addr.parse(local_pub_addr_s, &end_p);
254
224ce89b 255 m->add(mon_info_t("filled_pub_addr", local_pub_addr, 1));
7c673cae
FG
256
257 m->add("empty_addr_zero", entity_addr_t());
258 }
259 o.push_back(m);
260}
261
262// read from/write to a file
263int MonMap::write(const char *fn)
264{
265 // encode
266 bufferlist bl;
267 encode(bl, CEPH_FEATURES_ALL);
268
269 return bl.write_file(fn);
270}
271
272int MonMap::read(const char *fn)
273{
274 // read
275 bufferlist bl;
276 std::string error;
277 int r = bl.read_file(fn, &error);
278 if (r < 0)
279 return r;
280 decode(bl);
281 return 0;
282}
283
284void MonMap::print_summary(ostream& out) const
285{
286 out << "e" << epoch << ": "
287 << mon_info.size() << " mons at {";
288 // the map that we used to print, as it was, no longer
289 // maps strings to the monitor's public address, but to
290 // mon_info_t instead. As such, print the map in a way
291 // that keeps the expected format.
292 bool has_printed = false;
293 for (map<string,mon_info_t>::const_iterator p = mon_info.begin();
294 p != mon_info.end();
295 ++p) {
296 if (has_printed)
297 out << ",";
298 out << p->first << "=" << p->second.public_addr;
299 has_printed = true;
300 }
301 out << "}";
302}
303
304void MonMap::print(ostream& out) const
305{
306 out << "epoch " << epoch << "\n";
307 out << "fsid " << fsid << "\n";
308 out << "last_changed " << last_changed << "\n";
309 out << "created " << created << "\n";
310 unsigned i = 0;
311 for (vector<string>::const_iterator p = ranks.begin();
312 p != ranks.end();
313 ++p) {
314 out << i++ << ": " << get_addr(*p) << " mon." << *p << "\n";
315 }
316}
317
318void MonMap::dump(Formatter *f) const
319{
320 f->dump_unsigned("epoch", epoch);
321 f->dump_stream("fsid") << fsid;
322 f->dump_stream("modified") << last_changed;
323 f->dump_stream("created") << created;
324 f->open_object_section("features");
325 persistent_features.dump(f, "persistent");
326 optional_features.dump(f, "optional");
327 f->close_section();
328 f->open_array_section("mons");
329 int i = 0;
330 for (vector<string>::const_iterator p = ranks.begin();
331 p != ranks.end();
332 ++p, ++i) {
333 f->open_object_section("mon");
334 f->dump_int("rank", i);
335 f->dump_string("name", *p);
336 f->dump_stream("addr") << get_addr(*p);
337 f->dump_stream("public_addr") << get_addr(*p);
338 f->close_section();
339 }
340 f->close_section();
341}
342
343
344int MonMap::build_from_host_list(std::string hostlist, std::string prefix)
345{
346 vector<entity_addr_t> addrs;
347 if (parse_ip_port_vec(hostlist.c_str(), addrs)) {
348 if (addrs.empty())
349 return -ENOENT;
350 for (unsigned i=0; i<addrs.size(); i++) {
351 char n[2];
352 n[0] = 'a' + i;
353 n[1] = 0;
354 if (addrs[i].get_port() == 0)
355 addrs[i].set_port(CEPH_MON_PORT);
356 string name = prefix;
357 name += n;
358 if (!contains(addrs[i]))
359 add(name, addrs[i]);
360 }
361 return 0;
362 }
363
364 // maybe they passed us a DNS-resolvable name
365 char *hosts = NULL;
366 hosts = resolve_addrs(hostlist.c_str());
367 if (!hosts)
368 return -EINVAL;
369 bool success = parse_ip_port_vec(hosts, addrs);
370 free(hosts);
371 if (!success)
372 return -EINVAL;
373
374 if (addrs.empty())
375 return -ENOENT;
376
377 for (unsigned i=0; i<addrs.size(); i++) {
378 char n[2];
379 n[0] = 'a' + i;
380 n[1] = 0;
381 if (addrs[i].get_port() == 0)
382 addrs[i].set_port(CEPH_MON_PORT);
383 string name = prefix;
384 name += n;
385 if (!contains(addrs[i]) &&
386 !contains(name))
387 add(name, addrs[i]);
388 }
389 return 0;
390}
391
392void MonMap::set_initial_members(CephContext *cct,
393 list<std::string>& initial_members,
394 string my_name, const entity_addr_t& my_addr,
395 set<entity_addr_t> *removed)
396{
397 // remove non-initial members
398 unsigned i = 0;
399 while (i < size()) {
400 string n = get_name(i);
401 if (std::find(initial_members.begin(), initial_members.end(), n) != initial_members.end()) {
402 lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addr(i) << dendl;
403 i++;
404 continue;
405 }
406
407 lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addr(i) << dendl;
408 if (removed)
409 removed->insert(get_addr(i));
410 remove(n);
411 assert(!contains(n));
412 }
413
414 // add missing initial members
415 for (list<string>::iterator p = initial_members.begin(); p != initial_members.end(); ++p) {
416 if (!contains(*p)) {
417 if (*p == my_name) {
418 lgeneric_dout(cct, 1) << " adding self " << *p << " " << my_addr << dendl;
419 add(*p, my_addr);
420 } else {
421 entity_addr_t a;
422 a.set_type(entity_addr_t::TYPE_LEGACY);
423 a.set_family(AF_INET);
424 for (int n=1; ; n++) {
425 a.set_nonce(n);
426 if (!contains(a))
427 break;
428 }
429 lgeneric_dout(cct, 1) << " adding " << *p << " " << a << dendl;
430 add(*p, a);
431 }
432 assert(contains(*p));
433 }
434 }
435}
436
437
438int MonMap::build_initial(CephContext *cct, ostream& errout)
439{
440 const md_config_t *conf = cct->_conf;
441 // file?
442 if (!conf->monmap.empty()) {
443 int r;
444 try {
445 r = read(conf->monmap.c_str());
446 }
447 catch (const buffer::error &e) {
448 r = -EINVAL;
449 }
450 if (r >= 0)
451 return 0;
452 errout << "unable to read/decode monmap from " << conf->monmap
453 << ": " << cpp_strerror(-r) << std::endl;
454 return r;
455 }
456
457 // fsid from conf?
458 if (!cct->_conf->fsid.is_zero()) {
459 fsid = cct->_conf->fsid;
460 }
461
462 // -m foo?
463 if (!conf->mon_host.empty()) {
464 int r = build_from_host_list(conf->mon_host, "noname-");
465 if (r < 0) {
466 errout << "unable to parse addrs in '" << conf->mon_host << "'"
467 << std::endl;
468 return r;
469 }
470 created = ceph_clock_now();
471 last_changed = created;
472 return 0;
473 }
474
475 // What monitors are in the config file?
476 std::vector <std::string> sections;
477 int ret = conf->get_all_sections(sections);
478 if (ret) {
479 errout << "Unable to find any monitors in the configuration "
480 << "file, because there was an error listing the sections. error "
481 << ret << std::endl;
482 return -ENOENT;
483 }
484 std::vector <std::string> mon_names;
485 for (std::vector <std::string>::const_iterator s = sections.begin();
486 s != sections.end(); ++s) {
487 if ((s->substr(0, 4) == "mon.") && (s->size() > 4)) {
488 mon_names.push_back(s->substr(4));
489 }
490 }
491
492 // Find an address for each monitor in the config file.
493 for (std::vector <std::string>::const_iterator m = mon_names.begin();
494 m != mon_names.end(); ++m) {
495 std::vector <std::string> sections;
496 std::string m_name("mon");
497 m_name += ".";
498 m_name += *m;
499 sections.push_back(m_name);
500 sections.push_back("mon");
501 sections.push_back("global");
502 std::string val;
503 int res = conf->get_val_from_conf_file(sections, "mon addr", val, true);
504 if (res) {
505 errout << "failed to get an address for mon." << *m << ": error "
506 << res << std::endl;
507 continue;
508 }
509 entity_addr_t addr;
510 if (!addr.parse(val.c_str())) {
511 errout << "unable to parse address for mon." << *m
512 << ": addr='" << val << "'" << std::endl;
513 continue;
514 }
515 if (addr.get_port() == 0)
516 addr.set_port(CEPH_MON_PORT);
517
224ce89b
WB
518 uint16_t priority = 0;
519 if (!conf->get_val_from_conf_file(sections, "mon priority", val, false)) {
520 try {
521 priority = std::stoul(val);
522 } catch (std::logic_error&) {
523 errout << "unable to parse priority for mon." << *m
524 << ": priority='" << val << "'" << std::endl;
525 continue;
526 }
527 }
7c673cae
FG
528 // the make sure this mon isn't already in the map
529 if (contains(addr))
530 remove(get_name(addr));
531 if (contains(*m))
532 remove(*m);
533
224ce89b 534 add(mon_info_t{*m, addr, priority});
7c673cae
FG
535 }
536
537 if (size() == 0) {
538 // no info found from conf options lets try use DNS SRV records
539 string srv_name = conf->mon_dns_srv_name;
540 string domain;
541 // check if domain is also provided and extract it from srv_name
542 size_t idx = srv_name.find("_");
543 if (idx != string::npos) {
544 domain = srv_name.substr(idx + 1);
545 srv_name = srv_name.substr(0, idx);
546 }
547
224ce89b 548 map<string, DNSResolver::Record> records;
7c673cae 549 if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name,
224ce89b 550 DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) {
7c673cae
FG
551
552 errout << "unable to get monitor info from DNS SRV with service name: " <<
553 "ceph-mon" << std::endl;
554 }
555 else {
224ce89b
WB
556 for (const auto& record : records) {
557 add(mon_info_t{record.first,
558 record.second.addr,
559 record.second.priority});
7c673cae
FG
560 }
561 }
562 }
563
564 if (size() == 0) {
565 errout << "no monitors specified to connect to." << std::endl;
566 return -ENOENT;
567 }
568 created = ceph_clock_now();
569 last_changed = created;
570 return 0;
571}