]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
7c673cae FG |
3 | |
4 | #include "MonMap.h" | |
5 | ||
6 | #include <algorithm> | |
7 | #include <sys/types.h> | |
8 | #include <sys/stat.h> | |
9 | #include <fcntl.h> | |
10 | ||
11fdf7f2 TL |
11 | #ifdef WITH_SEASTAR |
12 | #include <seastar/core/fstream.hh> | |
13 | #include <seastar/core/reactor.hh> | |
14 | #include <seastar/net/dns.hh> | |
15 | #include "crimson/common/config_proxy.h" | |
16 | #endif | |
17 | ||
7c673cae FG |
18 | #include "common/Formatter.h" |
19 | ||
20 | #include "include/ceph_features.h" | |
21 | #include "include/addr_parsing.h" | |
22 | #include "common/ceph_argparse.h" | |
23 | #include "common/dns_resolve.h" | |
24 | #include "common/errno.h" | |
7c673cae | 25 | #include "common/dout.h" |
11fdf7f2 | 26 | #include "common/Clock.h" |
7c673cae FG |
27 | |
28 | using ceph::Formatter; | |
29 | ||
30 | void mon_info_t::encode(bufferlist& bl, uint64_t features) const | |
31 | { | |
11fdf7f2 TL |
32 | uint8_t v = 3; |
33 | if (!HAVE_FEATURE(features, SERVER_NAUTILUS)) { | |
34 | v = 2; | |
35 | } | |
36 | ENCODE_START(v, 1, bl); | |
37 | encode(name, bl); | |
38 | if (v < 3) { | |
eafe8130 TL |
39 | auto a = public_addrs.legacy_addr(); |
40 | if (a != entity_addr_t()) { | |
41 | encode(a, bl, features); | |
42 | } else { | |
43 | // note: we don't have a legacy addr here, so lie so that it looks | |
44 | // like one, just so that old clients get a valid-looking map. | |
45 | // they won't be able to talk to the v2 mons, but that's better | |
46 | // than nothing. | |
47 | encode(public_addrs.as_legacy_addr(), bl, features); | |
48 | } | |
11fdf7f2 TL |
49 | } else { |
50 | encode(public_addrs, bl, features); | |
51 | } | |
52 | encode(priority, bl); | |
7c673cae FG |
53 | ENCODE_FINISH(bl); |
54 | } | |
55 | ||
11fdf7f2 | 56 | void mon_info_t::decode(bufferlist::const_iterator& p) |
7c673cae | 57 | { |
11fdf7f2 TL |
58 | DECODE_START(3, p); |
59 | decode(name, p); | |
60 | decode(public_addrs, p); | |
224ce89b | 61 | if (struct_v >= 2) { |
11fdf7f2 | 62 | decode(priority, p); |
224ce89b | 63 | } |
7c673cae FG |
64 | DECODE_FINISH(p); |
65 | } | |
66 | ||
67 | void mon_info_t::print(ostream& out) const | |
68 | { | |
69 | out << "mon." << name | |
11fdf7f2 | 70 | << " addrs " << public_addrs |
224ce89b | 71 | << " priority " << priority; |
7c673cae FG |
72 | } |
73 | ||
7c673cae FG |
74 | namespace { |
75 | struct rank_cmp { | |
76 | bool operator()(const mon_info_t &a, const mon_info_t &b) const { | |
11fdf7f2 | 77 | if (a.public_addrs.legacy_or_front_addr() == b.public_addrs.legacy_or_front_addr()) |
7c673cae | 78 | return a.name < b.name; |
11fdf7f2 | 79 | return a.public_addrs.legacy_or_front_addr() < b.public_addrs.legacy_or_front_addr(); |
7c673cae FG |
80 | } |
81 | }; | |
82 | } | |
83 | ||
11fdf7f2 TL |
84 | void MonMap::calc_legacy_ranks() |
85 | { | |
7c673cae | 86 | ranks.resize(mon_info.size()); |
7c673cae FG |
87 | |
88 | // Used to order entries according to public_addr, because that's | |
89 | // how the ranks are expected to be ordered by. We may expand this | |
90 | // later on, according to some other criteria, by specifying a | |
91 | // different comparator. | |
92 | // | |
93 | // Please note that we use a 'set' here instead of resorting to | |
94 | // std::sort() because we need more info than that's available in | |
95 | // the vector. The vector will thus be ordered by, e.g., public_addr | |
96 | // while only containing the names of each individual monitor. | |
97 | // The only way of achieving this with std::sort() would be to first | |
98 | // insert every mon_info_t entry into a vector 'foo', std::sort() 'foo' | |
99 | // with custom comparison functions, and then copy each invidual entry | |
100 | // to a new vector. Unless there's a simpler way, we don't think the | |
101 | // added complexity makes up for the additional memory usage of a 'set'. | |
102 | set<mon_info_t, rank_cmp> tmp; | |
103 | ||
104 | for (map<string,mon_info_t>::iterator p = mon_info.begin(); | |
105 | p != mon_info.end(); | |
106 | ++p) { | |
107 | mon_info_t &m = p->second; | |
108 | tmp.insert(m); | |
7c673cae FG |
109 | } |
110 | ||
111 | // map the set to the actual ranks etc | |
112 | unsigned i = 0; | |
113 | for (set<mon_info_t>::iterator p = tmp.begin(); | |
114 | p != tmp.end(); | |
115 | ++p, ++i) { | |
116 | ranks[i] = p->name; | |
117 | } | |
118 | } | |
119 | ||
120 | void MonMap::encode(bufferlist& blist, uint64_t con_features) const | |
121 | { | |
7c673cae | 122 | if ((con_features & CEPH_FEATURE_MONNAMES) == 0) { |
11fdf7f2 | 123 | using ceph::encode; |
7c673cae | 124 | __u16 v = 1; |
11fdf7f2 TL |
125 | encode(v, blist); |
126 | encode_raw(fsid, blist); | |
127 | encode(epoch, blist); | |
128 | vector<entity_inst_t> mon_inst(ranks.size()); | |
129 | for (unsigned n = 0; n < ranks.size(); n++) { | |
130 | mon_inst[n].name = entity_name_t::MON(n); | |
131 | mon_inst[n].addr = get_addrs(n).legacy_addr(); | |
132 | } | |
133 | encode(mon_inst, blist, con_features); | |
134 | encode(last_changed, blist); | |
135 | encode(created, blist); | |
7c673cae FG |
136 | return; |
137 | } | |
138 | ||
11fdf7f2 TL |
139 | map<string,entity_addr_t> legacy_mon_addr; |
140 | if (!HAVE_FEATURE(con_features, MONENC) || | |
141 | !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) { | |
142 | for (auto& [name, info] : mon_info) { | |
143 | legacy_mon_addr[name] = info.public_addrs.legacy_addr(); | |
144 | } | |
145 | } | |
146 | ||
147 | if (!HAVE_FEATURE(con_features, MONENC)) { | |
148 | /* we keep the mon_addr map when encoding to ensure compatibility | |
149 | * with clients and other monitors that do not yet support the 'mons' | |
150 | * map. This map keeps its original behavior, containing a mapping of | |
151 | * monitor id (i.e., 'foo' in 'mon.foo') to the monitor's public | |
152 | * address -- which is obtained from the public address of each entry | |
153 | * in the 'mons' map. | |
154 | */ | |
155 | using ceph::encode; | |
7c673cae | 156 | __u16 v = 2; |
11fdf7f2 TL |
157 | encode(v, blist); |
158 | encode_raw(fsid, blist); | |
159 | encode(epoch, blist); | |
160 | encode(legacy_mon_addr, blist, con_features); | |
161 | encode(last_changed, blist); | |
162 | encode(created, blist); | |
163 | return; | |
164 | } | |
165 | ||
166 | if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) { | |
167 | ENCODE_START(5, 3, blist); | |
168 | encode_raw(fsid, blist); | |
169 | encode(epoch, blist); | |
170 | encode(legacy_mon_addr, blist, con_features); | |
171 | encode(last_changed, blist); | |
172 | encode(created, blist); | |
173 | encode(persistent_features, blist); | |
174 | encode(optional_features, blist); | |
175 | encode(mon_info, blist, con_features); | |
176 | ENCODE_FINISH(blist); | |
177 | return; | |
178 | } | |
179 | ||
180 | ENCODE_START(7, 6, blist); | |
181 | encode_raw(fsid, blist); | |
182 | encode(epoch, blist); | |
183 | encode(last_changed, blist); | |
184 | encode(created, blist); | |
185 | encode(persistent_features, blist); | |
186 | encode(optional_features, blist); | |
187 | encode(mon_info, blist, con_features); | |
188 | encode(ranks, blist); | |
189 | encode(min_mon_release, blist); | |
7c673cae FG |
190 | ENCODE_FINISH(blist); |
191 | } | |
192 | ||
11fdf7f2 | 193 | void MonMap::decode(bufferlist::const_iterator& p) |
7c673cae FG |
194 | { |
195 | map<string,entity_addr_t> mon_addr; | |
11fdf7f2 TL |
196 | DECODE_START_LEGACY_COMPAT_LEN_16(7, 3, 3, p); |
197 | decode_raw(fsid, p); | |
198 | decode(epoch, p); | |
7c673cae FG |
199 | if (struct_v == 1) { |
200 | vector<entity_inst_t> mon_inst; | |
11fdf7f2 | 201 | decode(mon_inst, p); |
7c673cae FG |
202 | for (unsigned i = 0; i < mon_inst.size(); i++) { |
203 | char n[2]; | |
204 | n[0] = '0' + i; | |
205 | n[1] = 0; | |
206 | string name = n; | |
207 | mon_addr[name] = mon_inst[i].addr; | |
208 | } | |
11fdf7f2 TL |
209 | } else if (struct_v < 6) { |
210 | decode(mon_addr, p); | |
7c673cae | 211 | } |
11fdf7f2 TL |
212 | decode(last_changed, p); |
213 | decode(created, p); | |
7c673cae | 214 | if (struct_v >= 4) { |
11fdf7f2 TL |
215 | decode(persistent_features, p); |
216 | decode(optional_features, p); | |
7c673cae | 217 | } |
11fdf7f2 TL |
218 | if (struct_v < 5) { |
219 | // generate mon_info from legacy mon_addr | |
220 | for (auto& [name, addr] : mon_addr) { | |
221 | mon_info_t &m = mon_info[name]; | |
222 | m.name = name; | |
223 | m.public_addrs = entity_addrvec_t(addr); | |
224 | } | |
225 | } else { | |
226 | decode(mon_info, p); | |
227 | } | |
228 | if (struct_v < 6) { | |
229 | calc_legacy_ranks(); | |
230 | } else { | |
231 | decode(ranks, p); | |
232 | } | |
233 | if (struct_v >= 7) { | |
234 | decode(min_mon_release, p); | |
7c673cae | 235 | } else { |
11fdf7f2 | 236 | min_mon_release = infer_ceph_release_from_mon_features(persistent_features); |
7c673cae | 237 | } |
11fdf7f2 | 238 | calc_addr_mons(); |
7c673cae | 239 | DECODE_FINISH(p); |
7c673cae FG |
240 | } |
241 | ||
242 | void MonMap::generate_test_instances(list<MonMap*>& o) | |
243 | { | |
244 | o.push_back(new MonMap); | |
245 | o.push_back(new MonMap); | |
246 | o.back()->epoch = 1; | |
247 | o.back()->last_changed = utime_t(123, 456); | |
248 | o.back()->created = utime_t(789, 101112); | |
11fdf7f2 | 249 | o.back()->add("one", entity_addrvec_t()); |
7c673cae FG |
250 | |
251 | MonMap *m = new MonMap; | |
252 | { | |
253 | m->epoch = 1; | |
254 | m->last_changed = utime_t(123, 456); | |
255 | ||
11fdf7f2 TL |
256 | entity_addrvec_t empty_addr_one = entity_addrvec_t(entity_addr_t()); |
257 | empty_addr_one.v[0].set_nonce(1); | |
7c673cae | 258 | m->add("empty_addr_one", empty_addr_one); |
11fdf7f2 TL |
259 | entity_addrvec_t empty_addr_two = entity_addrvec_t(entity_addr_t()); |
260 | empty_addr_two.v[0].set_nonce(2); | |
261 | m->add("empty_addr_two", empty_addr_two); | |
7c673cae FG |
262 | |
263 | const char *local_pub_addr_s = "127.0.1.2"; | |
264 | ||
265 | const char *end_p = local_pub_addr_s + strlen(local_pub_addr_s); | |
11fdf7f2 | 266 | entity_addrvec_t local_pub_addr; |
7c673cae FG |
267 | local_pub_addr.parse(local_pub_addr_s, &end_p); |
268 | ||
11fdf7f2 | 269 | m->add(mon_info_t("filled_pub_addr", entity_addrvec_t(local_pub_addr), 1)); |
7c673cae | 270 | |
11fdf7f2 | 271 | m->add("empty_addr_zero", entity_addrvec_t()); |
7c673cae FG |
272 | } |
273 | o.push_back(m); | |
274 | } | |
275 | ||
276 | // read from/write to a file | |
277 | int MonMap::write(const char *fn) | |
278 | { | |
279 | // encode | |
280 | bufferlist bl; | |
281 | encode(bl, CEPH_FEATURES_ALL); | |
282 | ||
283 | return bl.write_file(fn); | |
284 | } | |
285 | ||
286 | int MonMap::read(const char *fn) | |
287 | { | |
288 | // read | |
289 | bufferlist bl; | |
290 | std::string error; | |
291 | int r = bl.read_file(fn, &error); | |
292 | if (r < 0) | |
293 | return r; | |
294 | decode(bl); | |
295 | return 0; | |
296 | } | |
297 | ||
298 | void MonMap::print_summary(ostream& out) const | |
299 | { | |
300 | out << "e" << epoch << ": " | |
301 | << mon_info.size() << " mons at {"; | |
302 | // the map that we used to print, as it was, no longer | |
303 | // maps strings to the monitor's public address, but to | |
304 | // mon_info_t instead. As such, print the map in a way | |
305 | // that keeps the expected format. | |
306 | bool has_printed = false; | |
307 | for (map<string,mon_info_t>::const_iterator p = mon_info.begin(); | |
308 | p != mon_info.end(); | |
309 | ++p) { | |
310 | if (has_printed) | |
311 | out << ","; | |
11fdf7f2 | 312 | out << p->first << "=" << p->second.public_addrs; |
7c673cae FG |
313 | has_printed = true; |
314 | } | |
315 | out << "}"; | |
316 | } | |
317 | ||
318 | void MonMap::print(ostream& out) const | |
319 | { | |
320 | out << "epoch " << epoch << "\n"; | |
321 | out << "fsid " << fsid << "\n"; | |
322 | out << "last_changed " << last_changed << "\n"; | |
323 | out << "created " << created << "\n"; | |
11fdf7f2 TL |
324 | out << "min_mon_release " << (int)min_mon_release |
325 | << " (" << ceph_release_name(min_mon_release) << ")\n"; | |
7c673cae FG |
326 | unsigned i = 0; |
327 | for (vector<string>::const_iterator p = ranks.begin(); | |
328 | p != ranks.end(); | |
329 | ++p) { | |
11fdf7f2 | 330 | out << i++ << ": " << get_addrs(*p) << " mon." << *p << "\n"; |
7c673cae FG |
331 | } |
332 | } | |
333 | ||
334 | void MonMap::dump(Formatter *f) const | |
335 | { | |
336 | f->dump_unsigned("epoch", epoch); | |
337 | f->dump_stream("fsid") << fsid; | |
338 | f->dump_stream("modified") << last_changed; | |
339 | f->dump_stream("created") << created; | |
11fdf7f2 TL |
340 | f->dump_unsigned("min_mon_release", min_mon_release); |
341 | f->dump_string("min_mon_release_name", ceph_release_name(min_mon_release)); | |
7c673cae FG |
342 | f->open_object_section("features"); |
343 | persistent_features.dump(f, "persistent"); | |
344 | optional_features.dump(f, "optional"); | |
345 | f->close_section(); | |
346 | f->open_array_section("mons"); | |
347 | int i = 0; | |
348 | for (vector<string>::const_iterator p = ranks.begin(); | |
349 | p != ranks.end(); | |
350 | ++p, ++i) { | |
351 | f->open_object_section("mon"); | |
352 | f->dump_int("rank", i); | |
353 | f->dump_string("name", *p); | |
11fdf7f2 TL |
354 | f->dump_object("public_addrs", get_addrs(*p)); |
355 | // compat: make these look like pre-nautilus entity_addr_t | |
356 | f->dump_stream("addr") << get_addrs(*p).get_legacy_str(); | |
357 | f->dump_stream("public_addr") << get_addrs(*p).get_legacy_str(); | |
7c673cae FG |
358 | f->close_section(); |
359 | } | |
360 | f->close_section(); | |
361 | } | |
362 | ||
11fdf7f2 TL |
363 | // an ambiguous mon addr may be legacy or may be msgr2--we aren' sure. |
364 | // when that happens we need to try them both (unless we can | |
365 | // reasonably infer from the port number which it is). | |
366 | void MonMap::_add_ambiguous_addr(const string& name, | |
367 | entity_addr_t addr, | |
368 | int priority, | |
369 | bool for_mkfs) | |
370 | { | |
371 | if (addr.get_type() != entity_addr_t::TYPE_ANY) { | |
372 | // a v1: or v2: prefix was specified | |
373 | if (addr.get_port() == 0) { | |
374 | // use default port | |
375 | if (addr.get_type() == entity_addr_t::TYPE_ANY) { | |
376 | addr.set_port(CEPH_MON_PORT_IANA); | |
377 | } else if (addr.get_type() == entity_addr_t::TYPE_LEGACY) { | |
378 | addr.set_port(CEPH_MON_PORT_LEGACY); | |
379 | } else if (addr.get_type() == entity_addr_t::TYPE_MSGR2) { | |
380 | addr.set_port(CEPH_MON_PORT_IANA); | |
381 | } else { | |
382 | // wth | |
383 | return; | |
384 | } | |
385 | if (!contains(addr)) { | |
386 | add(name, entity_addrvec_t(addr)); | |
387 | } | |
388 | } else { | |
389 | if (!contains(addr)) { | |
390 | add(name, entity_addrvec_t(addr), priority); | |
391 | } | |
392 | } | |
393 | } else { | |
394 | // no v1: or v2: prefix specified | |
395 | if (addr.get_port() == CEPH_MON_PORT_LEGACY) { | |
396 | // legacy port implies legacy addr | |
397 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
398 | if (!contains(addr)) { | |
399 | if (!for_mkfs) { | |
400 | add(name + "-legacy", entity_addrvec_t(addr)); | |
401 | } else { | |
402 | add(name, entity_addrvec_t(addr)); | |
403 | } | |
404 | } | |
405 | } else if (addr.get_port() == CEPH_MON_PORT_IANA) { | |
406 | // iana port implies msgr2 addr | |
407 | addr.set_type(entity_addr_t::TYPE_MSGR2); | |
408 | if (!contains(addr)) { | |
409 | add(name, entity_addrvec_t(addr)); | |
410 | } | |
411 | } else if (addr.get_port() == 0) { | |
412 | // no port; include both msgr2 and legacy ports | |
413 | if (!for_mkfs) { | |
414 | addr.set_type(entity_addr_t::TYPE_MSGR2); | |
415 | addr.set_port(CEPH_MON_PORT_IANA); | |
416 | if (!contains(addr)) { | |
417 | add(name, entity_addrvec_t(addr)); | |
418 | } | |
419 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
420 | addr.set_port(CEPH_MON_PORT_LEGACY); | |
421 | if (!contains(addr)) { | |
422 | add(name + "-legacy", entity_addrvec_t(addr)); | |
423 | } | |
424 | } else { | |
425 | entity_addrvec_t av; | |
426 | addr.set_type(entity_addr_t::TYPE_MSGR2); | |
427 | addr.set_port(CEPH_MON_PORT_IANA); | |
428 | av.v.push_back(addr); | |
429 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
430 | addr.set_port(CEPH_MON_PORT_LEGACY); | |
431 | av.v.push_back(addr); | |
432 | if (!contains(av)) { | |
433 | add(name, av); | |
434 | } | |
435 | } | |
436 | } else { | |
437 | addr.set_type(entity_addr_t::TYPE_MSGR2); | |
438 | if (!contains(addr)) { | |
439 | add(name, entity_addrvec_t(addr), priority); | |
440 | } | |
441 | if (!for_mkfs) { | |
442 | // try legacy on same port too | |
443 | addr.set_type(entity_addr_t::TYPE_LEGACY); | |
444 | if (!contains(addr)) { | |
445 | add(name + "-legacy", entity_addrvec_t(addr), priority); | |
446 | } | |
447 | } | |
448 | } | |
449 | } | |
450 | } | |
7c673cae | 451 | |
11fdf7f2 TL |
452 | int MonMap::init_with_ips(const std::string& ips, |
453 | bool for_mkfs, | |
454 | const std::string &prefix) | |
7c673cae | 455 | { |
11fdf7f2 TL |
456 | vector<entity_addrvec_t> addrs; |
457 | if (!parse_ip_port_vec( | |
458 | ips.c_str(), addrs, | |
459 | entity_addr_t::TYPE_ANY)) { | |
460 | return -EINVAL; | |
461 | } | |
462 | if (addrs.empty()) | |
463 | return -ENOENT; | |
464 | for (unsigned i=0; i<addrs.size(); i++) { | |
465 | char n[2]; | |
466 | n[0] = 'a' + i; | |
467 | n[1] = 0; | |
468 | string name; | |
469 | name = prefix; | |
470 | name += n; | |
471 | if (addrs[i].v.size() == 1) { | |
472 | _add_ambiguous_addr(name, addrs[i].front(), 0, for_mkfs); | |
473 | } else { | |
474 | // they specified an addrvec, so let's assume they also specified | |
475 | // the addr *type* and *port*. (we could possibly improve this?) | |
476 | add(name, addrs[i], 0); | |
7c673cae | 477 | } |
7c673cae | 478 | } |
11fdf7f2 TL |
479 | return 0; |
480 | } | |
7c673cae | 481 | |
11fdf7f2 TL |
482 | int MonMap::init_with_hosts(const std::string& hostlist, |
483 | bool for_mkfs, | |
484 | const std::string& prefix) | |
485 | { | |
7c673cae | 486 | // maybe they passed us a DNS-resolvable name |
11fdf7f2 | 487 | char *hosts = resolve_addrs(hostlist.c_str()); |
7c673cae FG |
488 | if (!hosts) |
489 | return -EINVAL; | |
11fdf7f2 TL |
490 | |
491 | vector<entity_addrvec_t> addrs; | |
492 | bool success = parse_ip_port_vec( | |
493 | hosts, addrs, | |
494 | for_mkfs ? entity_addr_t::TYPE_MSGR2 : entity_addr_t::TYPE_ANY); | |
7c673cae FG |
495 | free(hosts); |
496 | if (!success) | |
497 | return -EINVAL; | |
7c673cae FG |
498 | if (addrs.empty()) |
499 | return -ENOENT; | |
7c673cae FG |
500 | for (unsigned i=0; i<addrs.size(); i++) { |
501 | char n[2]; | |
502 | n[0] = 'a' + i; | |
503 | n[1] = 0; | |
7c673cae FG |
504 | string name = prefix; |
505 | name += n; | |
11fdf7f2 TL |
506 | if (addrs[i].v.size() == 1) { |
507 | _add_ambiguous_addr(name, addrs[i].front(), 0); | |
508 | } else { | |
509 | add(name, addrs[i], 0); | |
510 | } | |
7c673cae | 511 | } |
11fdf7f2 | 512 | calc_legacy_ranks(); |
7c673cae FG |
513 | return 0; |
514 | } | |
515 | ||
516 | void MonMap::set_initial_members(CephContext *cct, | |
517 | list<std::string>& initial_members, | |
11fdf7f2 TL |
518 | string my_name, |
519 | const entity_addrvec_t& my_addrs, | |
520 | set<entity_addrvec_t> *removed) | |
7c673cae FG |
521 | { |
522 | // remove non-initial members | |
523 | unsigned i = 0; | |
524 | while (i < size()) { | |
525 | string n = get_name(i); | |
11fdf7f2 TL |
526 | if (std::find(initial_members.begin(), initial_members.end(), n) |
527 | != initial_members.end()) { | |
528 | lgeneric_dout(cct, 1) << " keeping " << n << " " << get_addrs(i) << dendl; | |
7c673cae FG |
529 | i++; |
530 | continue; | |
531 | } | |
532 | ||
11fdf7f2 TL |
533 | lgeneric_dout(cct, 1) << " removing " << get_name(i) << " " << get_addrs(i) |
534 | << dendl; | |
535 | if (removed) { | |
536 | removed->insert(get_addrs(i)); | |
537 | } | |
7c673cae | 538 | remove(n); |
11fdf7f2 | 539 | ceph_assert(!contains(n)); |
7c673cae FG |
540 | } |
541 | ||
542 | // add missing initial members | |
11fdf7f2 TL |
543 | for (auto& p : initial_members) { |
544 | if (!contains(p)) { | |
545 | if (p == my_name) { | |
546 | lgeneric_dout(cct, 1) << " adding self " << p << " " << my_addrs | |
547 | << dendl; | |
548 | add(p, my_addrs); | |
7c673cae FG |
549 | } else { |
550 | entity_addr_t a; | |
551 | a.set_type(entity_addr_t::TYPE_LEGACY); | |
552 | a.set_family(AF_INET); | |
553 | for (int n=1; ; n++) { | |
554 | a.set_nonce(n); | |
555 | if (!contains(a)) | |
556 | break; | |
557 | } | |
11fdf7f2 TL |
558 | lgeneric_dout(cct, 1) << " adding " << p << " " << a << dendl; |
559 | add(p, entity_addrvec_t(a)); | |
7c673cae | 560 | } |
11fdf7f2 | 561 | ceph_assert(contains(p)); |
7c673cae FG |
562 | } |
563 | } | |
11fdf7f2 | 564 | calc_legacy_ranks(); |
7c673cae FG |
565 | } |
566 | ||
11fdf7f2 TL |
567 | int MonMap::init_with_config_file(const ConfigProxy& conf, |
568 | std::ostream& errout) | |
7c673cae | 569 | { |
11fdf7f2 TL |
570 | std::vector<std::string> sections; |
571 | int ret = conf.get_all_sections(sections); | |
7c673cae FG |
572 | if (ret) { |
573 | errout << "Unable to find any monitors in the configuration " | |
574 | << "file, because there was an error listing the sections. error " | |
575 | << ret << std::endl; | |
576 | return -ENOENT; | |
577 | } | |
11fdf7f2 TL |
578 | std::vector<std::string> mon_names; |
579 | for (const auto& section : sections) { | |
580 | if (section.substr(0, 4) == "mon." && section.size() > 4) { | |
581 | mon_names.push_back(section.substr(4)); | |
7c673cae FG |
582 | } |
583 | } | |
584 | ||
585 | // Find an address for each monitor in the config file. | |
11fdf7f2 TL |
586 | for (const auto& mon_name : mon_names) { |
587 | std::vector<std::string> sections; | |
7c673cae FG |
588 | std::string m_name("mon"); |
589 | m_name += "."; | |
11fdf7f2 | 590 | m_name += mon_name; |
7c673cae FG |
591 | sections.push_back(m_name); |
592 | sections.push_back("mon"); | |
593 | sections.push_back("global"); | |
594 | std::string val; | |
11fdf7f2 | 595 | int res = conf.get_val_from_conf_file(sections, "mon addr", val, true); |
7c673cae | 596 | if (res) { |
11fdf7f2 TL |
597 | errout << "failed to get an address for mon." << mon_name |
598 | << ": error " << res << std::endl; | |
7c673cae FG |
599 | continue; |
600 | } | |
11fdf7f2 TL |
601 | // the 'mon addr' field is a legacy field, so assume anything |
602 | // there on a weird port is a v1 address, and do not handle | |
603 | // addrvecs. | |
7c673cae | 604 | entity_addr_t addr; |
11fdf7f2 TL |
605 | if (!addr.parse(val.c_str(), nullptr, entity_addr_t::TYPE_LEGACY)) { |
606 | errout << "unable to parse address for mon." << mon_name | |
607 | << ": addr='" << val << "'" << std::endl; | |
7c673cae FG |
608 | continue; |
609 | } | |
11fdf7f2 TL |
610 | if (addr.get_port() == 0) { |
611 | addr.set_port(CEPH_MON_PORT_LEGACY); | |
612 | } | |
224ce89b | 613 | uint16_t priority = 0; |
11fdf7f2 | 614 | if (!conf.get_val_from_conf_file(sections, "mon priority", val, false)) { |
224ce89b WB |
615 | try { |
616 | priority = std::stoul(val); | |
617 | } catch (std::logic_error&) { | |
11fdf7f2 | 618 | errout << "unable to parse priority for mon." << mon_name |
224ce89b WB |
619 | << ": priority='" << val << "'" << std::endl; |
620 | continue; | |
621 | } | |
622 | } | |
11fdf7f2 | 623 | |
7c673cae FG |
624 | // the make sure this mon isn't already in the map |
625 | if (contains(addr)) | |
626 | remove(get_name(addr)); | |
11fdf7f2 TL |
627 | if (contains(mon_name)) |
628 | remove(mon_name); | |
629 | _add_ambiguous_addr(mon_name, addr, priority); | |
630 | } | |
631 | return 0; | |
632 | } | |
633 | ||
634 | #ifdef WITH_SEASTAR | |
635 | ||
636 | using namespace seastar; | |
637 | ||
638 | future<> MonMap::read_monmap(const std::string& monmap) | |
639 | { | |
640 | return open_file_dma(monmap, open_flags::ro).then([this] (file f) { | |
641 | return f.size().then([this, f = std::move(f)](size_t s) { | |
642 | return do_with(make_file_input_stream(f), [this, s](input_stream<char>& in) { | |
643 | return in.read_exactly(s).then([this](temporary_buffer<char> buf) { | |
644 | bufferlist bl; | |
645 | bl.append(buffer::create(std::move(buf))); | |
646 | decode(bl); | |
647 | }); | |
648 | }); | |
649 | }); | |
650 | }); | |
651 | } | |
7c673cae | 652 | |
11fdf7f2 TL |
653 | future<> MonMap::init_with_dns_srv(bool for_mkfs, const std::string& name) |
654 | { | |
655 | string domain; | |
656 | string service = name; | |
657 | // check if domain is also provided and extract it from srv_name | |
658 | size_t idx = name.find("_"); | |
659 | if (idx != name.npos) { | |
660 | domain = name.substr(idx + 1); | |
661 | service = name.substr(0, idx); | |
7c673cae | 662 | } |
11fdf7f2 TL |
663 | return net::dns::get_srv_records( |
664 | net::dns_resolver::srv_proto::tcp, | |
665 | service, domain).then([this](net::dns_resolver::srv_records records) { | |
666 | return parallel_for_each(records, [this](auto record) { | |
667 | return net::dns::resolve_name(record.target).then( | |
668 | [record,this](net::inet_address a) { | |
669 | // the resolved address does not contain ceph specific info like nonce | |
670 | // nonce or msgr proto (legacy, msgr2), so set entity_addr_t manually | |
671 | entity_addr_t addr; | |
672 | addr.set_type(entity_addr_t::TYPE_ANY); | |
673 | addr.set_family(int(a.in_family())); | |
674 | addr.set_port(record.port); | |
675 | switch (a.in_family()) { | |
676 | case net::inet_address::family::INET: | |
677 | addr.in4_addr().sin_addr = a; | |
678 | break; | |
679 | case net::inet_address::family::INET6: | |
680 | addr.in6_addr().sin6_addr = a; | |
681 | break; | |
682 | } | |
683 | _add_ambiguous_addr(record.target, addr, record.priority); | |
684 | }); | |
685 | }); | |
686 | }).handle_exception_type([](const std::system_error& e) { | |
687 | // ignore DNS failures | |
688 | return seastar::make_ready_future<>(); | |
689 | }); | |
690 | } | |
7c673cae | 691 | |
11fdf7f2 TL |
692 | seastar::future<> MonMap::build_monmap(const ceph::common::ConfigProxy& conf, |
693 | bool for_mkfs) | |
694 | { | |
695 | // -m foo? | |
696 | if (const auto mon_host = conf.get_val<std::string>("mon_host"); | |
697 | !mon_host.empty()) { | |
698 | if (auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); ret == 0) { | |
699 | return make_ready_future<>(); | |
7c673cae | 700 | } |
11fdf7f2 TL |
701 | // TODO: resolve_addrs() is a blocking call |
702 | if (auto ret = init_with_hosts(mon_host, for_mkfs, "noname-"); ret == 0) { | |
703 | return make_ready_future<>(); | |
704 | } else { | |
705 | throw std::runtime_error(cpp_strerror(ret)); | |
706 | } | |
707 | } | |
7c673cae | 708 | |
11fdf7f2 TL |
709 | // What monitors are in the config file? |
710 | ostringstream errout; | |
711 | if (auto ret = init_with_config_file(conf, errout); ret < 0) { | |
712 | throw std::runtime_error(errout.str()); | |
713 | } | |
714 | if (size() > 0) { | |
715 | return make_ready_future<>(); | |
716 | } | |
717 | // no info found from conf options lets try use DNS SRV records | |
718 | const string srv_name = conf.get_val<std::string>("mon_dns_srv_name"); | |
719 | return init_with_dns_srv(for_mkfs, srv_name).then([this] { | |
720 | if (size() == 0) { | |
721 | throw std::runtime_error("no monitors specified to connect to."); | |
722 | } | |
723 | }); | |
724 | } | |
7c673cae | 725 | |
11fdf7f2 TL |
726 | future<> MonMap::build_initial(const ceph::common::ConfigProxy& conf, bool for_mkfs) |
727 | { | |
728 | // file? | |
729 | if (const auto monmap = conf.get_val<std::string>("monmap"); | |
730 | !monmap.empty()) { | |
731 | return read_monmap(monmap); | |
732 | } else { | |
733 | // fsid from conf? | |
734 | if (const auto new_fsid = conf.get_val<uuid_d>("fsid"); | |
735 | !new_fsid.is_zero()) { | |
736 | fsid = new_fsid; | |
7c673cae | 737 | } |
11fdf7f2 TL |
738 | return build_monmap(conf, for_mkfs).then([this] { |
739 | created = ceph_clock_now(); | |
740 | last_changed = created; | |
741 | calc_legacy_ranks(); | |
742 | }); | |
743 | } | |
744 | } | |
745 | ||
746 | #else // WITH_SEASTAR | |
747 | ||
748 | int MonMap::init_with_monmap(const std::string& monmap, std::ostream& errout) | |
749 | { | |
750 | int r; | |
751 | try { | |
752 | r = read(monmap.c_str()); | |
753 | } catch (buffer::error&) { | |
754 | r = -EINVAL; | |
755 | } | |
756 | if (r >= 0) | |
757 | return 0; | |
758 | errout << "unable to read/decode monmap from " << monmap | |
759 | << ": " << cpp_strerror(-r) << std::endl; | |
760 | return r; | |
761 | } | |
762 | ||
763 | int MonMap::init_with_dns_srv(CephContext* cct, | |
764 | std::string srv_name, | |
765 | bool for_mkfs, | |
766 | std::ostream& errout) | |
767 | { | |
768 | string domain; | |
769 | // check if domain is also provided and extract it from srv_name | |
770 | size_t idx = srv_name.find("_"); | |
771 | if (idx != string::npos) { | |
772 | domain = srv_name.substr(idx + 1); | |
773 | srv_name = srv_name.substr(0, idx); | |
774 | } | |
775 | ||
776 | map<string, DNSResolver::Record> records; | |
777 | if (DNSResolver::get_instance()->resolve_srv_hosts(cct, srv_name, | |
778 | DNSResolver::SRV_Protocol::TCP, domain, &records) != 0) { | |
779 | ||
780 | errout << "unable to get monitor info from DNS SRV with service name: " | |
781 | << "ceph-mon" << std::endl; | |
782 | return -1; | |
783 | } else { | |
784 | for (auto& record : records) { | |
785 | record.second.addr.set_type(entity_addr_t::TYPE_ANY); | |
786 | _add_ambiguous_addr(record.first, record.second.addr, | |
787 | record.second.priority); | |
7c673cae | 788 | } |
11fdf7f2 TL |
789 | return 0; |
790 | } | |
791 | } | |
792 | ||
793 | int MonMap::build_initial(CephContext *cct, bool for_mkfs, ostream& errout) | |
794 | { | |
795 | const auto& conf = cct->_conf; | |
796 | // file? | |
797 | if (const auto monmap = conf.get_val<std::string>("monmap"); | |
798 | !monmap.empty()) { | |
799 | return init_with_monmap(monmap, errout); | |
7c673cae FG |
800 | } |
801 | ||
11fdf7f2 TL |
802 | // fsid from conf? |
803 | if (const auto new_fsid = conf.get_val<uuid_d>("fsid"); | |
804 | !new_fsid.is_zero()) { | |
805 | fsid = new_fsid; | |
806 | } | |
807 | // -m foo? | |
808 | if (const auto mon_host = conf.get_val<std::string>("mon_host"); | |
809 | !mon_host.empty()) { | |
810 | auto ret = init_with_ips(mon_host, for_mkfs, "noname-"); | |
811 | if (ret == -EINVAL) { | |
812 | ret = init_with_hosts(mon_host, for_mkfs, "noname-"); | |
813 | } | |
814 | if (ret < 0) { | |
815 | errout << "unable to parse addrs in '" << mon_host << "'" | |
816 | << std::endl; | |
817 | return ret; | |
818 | } | |
819 | } | |
820 | if (size() == 0) { | |
821 | // What monitors are in the config file? | |
822 | if (auto ret = init_with_config_file(conf, errout); ret < 0) { | |
823 | return ret; | |
824 | } | |
825 | } | |
826 | if (size() == 0) { | |
827 | // no info found from conf options lets try use DNS SRV records | |
828 | string srv_name = conf.get_val<std::string>("mon_dns_srv_name"); | |
829 | if (auto ret = init_with_dns_srv(cct, srv_name, for_mkfs, errout); ret < 0) { | |
830 | return -ENOENT; | |
831 | } | |
832 | } | |
7c673cae FG |
833 | if (size() == 0) { |
834 | errout << "no monitors specified to connect to." << std::endl; | |
835 | return -ENOENT; | |
836 | } | |
837 | created = ceph_clock_now(); | |
838 | last_changed = created; | |
11fdf7f2 | 839 | calc_legacy_ranks(); |
7c673cae FG |
840 | return 0; |
841 | } | |
11fdf7f2 | 842 | #endif // WITH_SEASTAR |