]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph_mon.cc
d/control: depend on python3-yaml for ceph-mgr
[ceph.git] / ceph / src / ceph_mon.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18
19 #include <iostream>
20 #include <string>
21
22 #include "common/config.h"
23 #include "include/ceph_features.h"
24
25 #include "mon/MonMap.h"
26 #include "mon/Monitor.h"
27 #include "mon/MonitorDBStore.h"
28 #include "mon/MonClient.h"
29
30 #include "msg/Messenger.h"
31
32 #include "include/CompatSet.h"
33
34 #include "common/ceph_argparse.h"
35 #include "common/pick_address.h"
36 #include "common/Throttle.h"
37 #include "common/Timer.h"
38 #include "common/errno.h"
39 #include "common/Preforker.h"
40
41 #include "global/global_init.h"
42 #include "global/signal_handler.h"
43
44 #include "perfglue/heap_profiler.h"
45
46 #include "include/ceph_assert.h"
47
48 #define dout_subsys ceph_subsys_mon
49
50 Monitor *mon = NULL;
51
52 void handle_mon_signal(int signum)
53 {
54 if (mon)
55 mon->handle_signal(signum);
56 }
57
58
59 int obtain_monmap(MonitorDBStore &store, bufferlist &bl)
60 {
61 dout(10) << __func__ << dendl;
62 /*
63 * the monmap may be in one of three places:
64 * 'mon_sync:temp_newer_monmap' - stashed newer map for bootstrap
65 * 'monmap:<latest_version_no>' - the monmap we'd really like to have
66 * 'mon_sync:latest_monmap' - last monmap backed up for the last sync
67 * 'mkfs:monmap' - a monmap resulting from mkfs
68 */
69
70 if (store.exists("monmap", "last_committed")) {
71 version_t latest_ver = store.get("monmap", "last_committed");
72 if (store.exists("monmap", latest_ver)) {
73 int err = store.get("monmap", latest_ver, bl);
74 ceph_assert(err == 0);
75 ceph_assert(bl.length() > 0);
76 dout(10) << __func__ << " read last committed monmap ver "
77 << latest_ver << dendl;
78
79 // see if there is stashed newer map (see bootstrap())
80 if (store.exists("mon_sync", "temp_newer_monmap")) {
81 bufferlist bl2;
82 int err = store.get("mon_sync", "temp_newer_monmap", bl2);
83 ceph_assert(err == 0);
84 ceph_assert(bl2.length() > 0);
85 MonMap b;
86 b.decode(bl2);
87 if (b.get_epoch() > latest_ver) {
88 dout(10) << __func__ << " using stashed monmap " << b.get_epoch()
89 << " instead" << dendl;
90 bl.claim(bl2);
91 } else {
92 dout(10) << __func__ << " ignoring stashed monmap " << b.get_epoch()
93 << dendl;
94 }
95 }
96 return 0;
97 }
98 }
99
100 if (store.exists("mon_sync", "in_sync")
101 || store.exists("mon_sync", "force_sync")) {
102 dout(10) << __func__ << " detected aborted sync" << dendl;
103 if (store.exists("mon_sync", "latest_monmap")) {
104 int err = store.get("mon_sync", "latest_monmap", bl);
105 ceph_assert(err == 0);
106 ceph_assert(bl.length() > 0);
107 dout(10) << __func__ << " read backup monmap" << dendl;
108 return 0;
109 }
110 }
111
112 if (store.exists("mkfs", "monmap")) {
113 dout(10) << __func__ << " found mkfs monmap" << dendl;
114 int err = store.get("mkfs", "monmap", bl);
115 ceph_assert(err == 0);
116 ceph_assert(bl.length() > 0);
117 return 0;
118 }
119
120 derr << __func__ << " unable to find a monmap" << dendl;
121 return -ENOENT;
122 }
123
124 int check_mon_data_exists()
125 {
126 string mon_data = g_conf()->mon_data;
127 struct stat buf;
128 if (::stat(mon_data.c_str(), &buf)) {
129 if (errno != ENOENT) {
130 derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl;
131 }
132 return -errno;
133 }
134 return 0;
135 }
136
137 /** Check whether **mon data** is empty.
138 *
139 * Being empty means mkfs has not been run and there's no monitor setup
140 * at **g_conf()->mon_data**.
141 *
142 * If the directory g_conf()->mon_data is not empty we will return -ENOTEMPTY.
143 * Otherwise we will return 0. Any other negative returns will represent
144 * a failure to be handled by the caller.
145 *
146 * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise.
147 */
148 int check_mon_data_empty()
149 {
150 string mon_data = g_conf()->mon_data;
151
152 DIR *dir = ::opendir(mon_data.c_str());
153 if (!dir) {
154 derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
155 return -errno;
156 }
157 int code = 0;
158 struct dirent *de = nullptr;
159 errno = 0;
160 while ((de = ::readdir(dir))) {
161 if (string(".") != de->d_name &&
162 string("..") != de->d_name &&
163 string("kv_backend") != de->d_name) {
164 code = -ENOTEMPTY;
165 break;
166 }
167 }
168 if (!de && errno) {
169 derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
170 code = -errno;
171 }
172
173 ::closedir(dir);
174
175 return code;
176 }
177
178 static void usage()
179 {
180 cout << "usage: ceph-mon -i <ID> [flags]\n"
181 << " --debug_mon n\n"
182 << " debug monitor level (e.g. 10)\n"
183 << " --mkfs\n"
184 << " build fresh monitor fs\n"
185 << " --force-sync\n"
186 << " force a sync from another mon by wiping local data (BE CAREFUL)\n"
187 << " --yes-i-really-mean-it\n"
188 << " mandatory safeguard for --force-sync\n"
189 << " --compact\n"
190 << " compact the monitor store\n"
191 << " --osdmap <filename>\n"
192 << " only used when --mkfs is provided: load the osdmap from <filename>\n"
193 << " --inject-monmap <filename>\n"
194 << " write the <filename> monmap to the local monitor store and exit\n"
195 << " --extract-monmap <filename>\n"
196 << " extract the monmap from the local monitor store and exit\n"
197 << " --mon-data <directory>\n"
198 << " where the mon store and keyring are located\n"
199 << std::endl;
200 generic_server_usage();
201 }
202
203 entity_addrvec_t make_mon_addrs(entity_addr_t a)
204 {
205 entity_addrvec_t addrs;
206 if (a.get_port() == 0) {
207 a.set_type(entity_addr_t::TYPE_MSGR2);
208 a.set_port(CEPH_MON_PORT_IANA);
209 addrs.v.push_back(a);
210 a.set_type(entity_addr_t::TYPE_LEGACY);
211 a.set_port(CEPH_MON_PORT_LEGACY);
212 addrs.v.push_back(a);
213 } else if (a.get_port() == CEPH_MON_PORT_LEGACY) {
214 a.set_type(entity_addr_t::TYPE_LEGACY);
215 addrs.v.push_back(a);
216 } else if (a.get_type() == entity_addr_t::TYPE_ANY) {
217 a.set_type(entity_addr_t::TYPE_MSGR2);
218 addrs.v.push_back(a);
219 } else {
220 addrs.v.push_back(a);
221 }
222 return addrs;
223 }
224
225 int main(int argc, const char **argv)
226 {
227 // reset our process name, in case we did a respawn, so that it's not
228 // left as "exe".
229 ceph_pthread_setname(pthread_self(), "ceph-mon");
230
231 int err;
232
233 bool mkfs = false;
234 bool compact = false;
235 bool force_sync = false;
236 bool yes_really = false;
237 std::string osdmapfn, inject_monmap, extract_monmap;
238
239 vector<const char*> args;
240 argv_to_vec(argc, argv, args);
241 if (args.empty()) {
242 cerr << argv[0] << ": -h or --help for usage" << std::endl;
243 exit(1);
244 }
245 if (ceph_argparse_need_usage(args)) {
246 usage();
247 exit(0);
248 }
249
250 // We need to specify some default values that may be overridden by the
251 // user, that are specific to the monitor. The options we are overriding
252 // are also used on the OSD (or in any other component that uses leveldb),
253 // so changing the global defaults is not an option.
254 // This is not the prettiest way of doing this, especially since it has us
255 // having a different place defining default values, but it's not horribly
256 // wrong enough to prevent us from doing it :)
257 //
258 // NOTE: user-defined options will take precedence over ours.
259 //
260 // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB
261 // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB
262 // leveldb_block_size = 64*1024 = 65536 // 64KB
263 // leveldb_compression = false
264 // leveldb_log = ""
265 map<string,string> defaults = {
266 { "leveldb_write_buffer_size", "33554432" },
267 { "leveldb_cache_size", "536870912" },
268 { "leveldb_block_size", "65536" },
269 { "leveldb_compression", "false"},
270 { "leveldb_log", "" },
271 { "keyring", "$mon_data/keyring" },
272 };
273
274 int flags = 0;
275 {
276 vector<const char*> args_copy = args;
277 std::string val;
278 for (std::vector<const char*>::iterator i = args_copy.begin();
279 i != args_copy.end(); ) {
280 if (ceph_argparse_double_dash(args_copy, i)) {
281 break;
282 } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) {
283 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
284 } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) {
285 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
286 } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) {
287 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
288 } else {
289 ++i;
290 }
291 }
292 }
293
294 // don't try to get config from mon cluster during startup
295 flags |= CINIT_FLAG_NO_MON_CONFIG;
296
297 auto cct = global_init(&defaults, args,
298 CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON,
299 flags, "mon_data");
300 ceph_heap_profiler_init();
301
302 std::string val;
303 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
304 if (ceph_argparse_double_dash(args, i)) {
305 break;
306 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
307 mkfs = true;
308 } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) {
309 compact = true;
310 } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) {
311 force_sync = true;
312 } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) {
313 yes_really = true;
314 } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) {
315 osdmapfn = val;
316 } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) {
317 inject_monmap = val;
318 } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) {
319 extract_monmap = val;
320 } else {
321 ++i;
322 }
323 }
324 if (!args.empty()) {
325 cerr << "too many arguments: " << args << std::endl;
326 exit(1);
327 }
328
329 if (force_sync && !yes_really) {
330 cerr << "are you SURE you want to force a sync? this will erase local data and may\n"
331 << "break your mon cluster. pass --yes-i-really-mean-it if you do." << std::endl;
332 exit(1);
333 }
334
335 if (g_conf()->mon_data.empty()) {
336 cerr << "must specify '--mon-data=foo' data path" << std::endl;
337 exit(1);
338 }
339
340 if (g_conf()->name.get_id().empty()) {
341 cerr << "must specify id (--id <id> or --name mon.<id>)" << std::endl;
342 exit(1);
343 }
344
345 // -- mkfs --
346 if (mkfs) {
347
348 int err = check_mon_data_exists();
349 if (err == -ENOENT) {
350 if (::mkdir(g_conf()->mon_data.c_str(), 0755)) {
351 derr << "mkdir(" << g_conf()->mon_data << ") : "
352 << cpp_strerror(errno) << dendl;
353 exit(1);
354 }
355 } else if (err < 0) {
356 derr << "error opening '" << g_conf()->mon_data << "': "
357 << cpp_strerror(-err) << dendl;
358 exit(-err);
359 }
360
361 err = check_mon_data_empty();
362 if (err == -ENOTEMPTY) {
363 // Mon may exist. Let the user know and exit gracefully.
364 derr << "'" << g_conf()->mon_data << "' already exists and is not empty"
365 << ": monitor may already exist" << dendl;
366 exit(0);
367 } else if (err < 0) {
368 derr << "error checking if '" << g_conf()->mon_data << "' is empty: "
369 << cpp_strerror(-err) << dendl;
370 exit(-err);
371 }
372
373 // resolve public_network -> public_addr
374 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
375
376 dout(10) << "public_network " << g_conf()->public_network << dendl;
377 dout(10) << "public_addr " << g_conf()->public_addr << dendl;
378 dout(10) << "public_addrv " << g_conf()->public_addrv << dendl;
379
380 common_init_finish(g_ceph_context);
381
382 bufferlist monmapbl, osdmapbl;
383 std::string error;
384 MonMap monmap;
385
386 // load or generate monmap
387 const auto monmap_fn = g_conf().get_val<string>("monmap");
388 if (monmap_fn.length()) {
389 int err = monmapbl.read_file(monmap_fn.c_str(), &error);
390 if (err < 0) {
391 derr << argv[0] << ": error reading " << monmap_fn << ": " << error << dendl;
392 exit(1);
393 }
394 try {
395 monmap.decode(monmapbl);
396
397 // always mark seed/mkfs monmap as epoch 0
398 monmap.set_epoch(0);
399 } catch (const buffer::error& e) {
400 derr << argv[0] << ": error decoding monmap " << monmap_fn << ": " << e.what() << dendl;
401 exit(1);
402 }
403
404 dout(1) << "imported monmap:\n";
405 monmap.print(*_dout);
406 *_dout << dendl;
407
408 } else {
409 ostringstream oss;
410 int err = monmap.build_initial(g_ceph_context, true, oss);
411 if (oss.tellp())
412 derr << oss.str() << dendl;
413 if (err < 0) {
414 derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl;
415 }
416
417 dout(1) << "initial generated monmap:\n";
418 monmap.print(*_dout);
419 *_dout << dendl;
420
421 // am i part of the initial quorum?
422 if (monmap.contains(g_conf()->name.get_id())) {
423 // hmm, make sure the ip listed exists on the current host?
424 // maybe later.
425 } else if (!g_conf()->public_addrv.empty()) {
426 entity_addrvec_t av = g_conf()->public_addrv;
427 string name;
428 if (monmap.contains(av, &name)) {
429 monmap.rename(name, g_conf()->name.get_id());
430 dout(0) << argv[0] << ": renaming mon." << name << " " << av
431 << " to mon." << g_conf()->name.get_id() << dendl;
432 }
433 } else if (!g_conf()->public_addr.is_blank_ip()) {
434 entity_addrvec_t av = make_mon_addrs(g_conf()->public_addr);
435 string name;
436 if (monmap.contains(av, &name)) {
437 monmap.rename(name, g_conf()->name.get_id());
438 dout(0) << argv[0] << ": renaming mon." << name << " " << av
439 << " to mon." << g_conf()->name.get_id() << dendl;
440 }
441 } else {
442 // is a local address listed without a name? if so, name myself.
443 list<entity_addr_t> ls;
444 monmap.list_addrs(ls);
445 dout(0) << " monmap addrs are " << ls << ", checking if any are local"
446 << dendl;
447
448 entity_addr_t local;
449 if (have_local_addr(g_ceph_context, ls, &local)) {
450 dout(0) << " have local addr " << local << dendl;
451 string name;
452 local.set_type(entity_addr_t::TYPE_MSGR2);
453 if (!monmap.get_addr_name(local, name)) {
454 local.set_type(entity_addr_t::TYPE_LEGACY);
455 if (!monmap.get_addr_name(local, name)) {
456 dout(0) << "no local addresses appear in bootstrap monmap"
457 << dendl;
458 }
459 }
460 if (name.compare(0, 7, "noname-") == 0) {
461 dout(0) << argv[0] << ": mon." << name << " " << local
462 << " is local, renaming to mon." << g_conf()->name.get_id()
463 << dendl;
464 monmap.rename(name, g_conf()->name.get_id());
465 } else if (name.size()) {
466 dout(0) << argv[0] << ": mon." << name << " " << local
467 << " is local, but not 'noname-' + something; "
468 << "not assuming it's me" << dendl;
469 }
470 } else {
471 dout(0) << " no local addrs match monmap" << dendl;
472 }
473 }
474 }
475
476 const auto fsid = g_conf().get_val<uuid_d>("fsid");
477 if (!fsid.is_zero()) {
478 monmap.fsid = fsid;
479 dout(0) << argv[0] << ": set fsid to " << fsid << dendl;
480 }
481
482 if (monmap.fsid.is_zero()) {
483 derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl;
484 exit(10);
485 }
486
487 //monmap.print(cout);
488
489 // osdmap
490 if (osdmapfn.length()) {
491 err = osdmapbl.read_file(osdmapfn.c_str(), &error);
492 if (err < 0) {
493 derr << argv[0] << ": error reading " << osdmapfn << ": "
494 << error << dendl;
495 exit(1);
496 }
497 }
498
499 // go
500 MonitorDBStore store(g_conf()->mon_data);
501 ostringstream oss;
502 int r = store.create_and_open(oss);
503 if (oss.tellp())
504 derr << oss.str() << dendl;
505 if (r < 0) {
506 derr << argv[0] << ": error opening mon data directory at '"
507 << g_conf()->mon_data << "': " << cpp_strerror(r) << dendl;
508 exit(1);
509 }
510 ceph_assert(r == 0);
511
512 Monitor mon(g_ceph_context, g_conf()->name.get_id(), &store, 0, 0, &monmap);
513 r = mon.mkfs(osdmapbl);
514 if (r < 0) {
515 derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl;
516 exit(1);
517 }
518 store.close();
519 dout(0) << argv[0] << ": created monfs at " << g_conf()->mon_data
520 << " for " << g_conf()->name << dendl;
521 return 0;
522 }
523
524 err = check_mon_data_exists();
525 if (err < 0 && err == -ENOENT) {
526 derr << "monitor data directory at '" << g_conf()->mon_data << "'"
527 << " does not exist: have you run 'mkfs'?" << dendl;
528 exit(1);
529 } else if (err < 0) {
530 derr << "error accessing monitor data directory at '"
531 << g_conf()->mon_data << "': " << cpp_strerror(-err) << dendl;
532 exit(1);
533 }
534
535 err = check_mon_data_empty();
536 if (err == 0) {
537 derr << "monitor data directory at '" << g_conf()->mon_data
538 << "' is empty: have you run 'mkfs'?" << dendl;
539 exit(1);
540 } else if (err < 0 && err != -ENOTEMPTY) {
541 // we don't want an empty data dir by now
542 derr << "error accessing '" << g_conf()->mon_data << "': "
543 << cpp_strerror(-err) << dendl;
544 exit(1);
545 }
546
547 {
548 // check fs stats. don't start if it's critically close to full.
549 ceph_data_stats_t stats;
550 int err = get_fs_stats(stats, g_conf()->mon_data.c_str());
551 if (err < 0) {
552 derr << "error checking monitor data's fs stats: " << cpp_strerror(err)
553 << dendl;
554 exit(-err);
555 }
556 if (stats.avail_percent <= g_conf()->mon_data_avail_crit) {
557 derr << "error: monitor data filesystem reached concerning levels of"
558 << " available storage space (available: "
559 << stats.avail_percent << "% " << byte_u_t(stats.byte_avail)
560 << ")\nyou may adjust 'mon data avail crit' to a lower value"
561 << " to make this go away (default: " << g_conf()->mon_data_avail_crit
562 << "%)\n" << dendl;
563 exit(ENOSPC);
564 }
565 }
566
567 // we fork early to prevent leveldb's environment static state from
568 // screwing us over
569 Preforker prefork;
570 if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) {
571 if (global_init_prefork(g_ceph_context) >= 0) {
572 string err_msg;
573 err = prefork.prefork(err_msg);
574 if (err < 0) {
575 derr << err_msg << dendl;
576 prefork.exit(err);
577 }
578 if (prefork.is_parent()) {
579 err = prefork.parent_wait(err_msg);
580 if (err < 0)
581 derr << err_msg << dendl;
582 prefork.exit(err);
583 }
584 setsid();
585 global_init_postfork_start(g_ceph_context);
586 }
587 common_init_finish(g_ceph_context);
588 global_init_chdir(g_ceph_context);
589 if (global_init_preload_erasure_code(g_ceph_context) < 0)
590 prefork.exit(1);
591 }
592
593 // set up signal handlers, now that we've daemonized/forked.
594 init_async_signal_handler();
595 register_async_signal_handler(SIGHUP, sighup_handler);
596
597 MonitorDBStore *store = new MonitorDBStore(g_conf()->mon_data);
598
599 // make sure we aren't upgrading too fast
600 {
601 string val;
602 int r = store->read_meta("min_mon_release", &val);
603 if (r >= 0 && val.size()) {
604 ceph_release_t from_release = ceph_release_from_name(val);
605 ostringstream err;
606 if (!can_upgrade_from(from_release, "min_mon_release", err)) {
607 derr << err.str() << dendl;
608 prefork.exit(1);
609 }
610 }
611 }
612
613 {
614 ostringstream oss;
615 err = store->open(oss);
616 if (oss.tellp())
617 derr << oss.str() << dendl;
618 if (err < 0) {
619 derr << "error opening mon data directory at '"
620 << g_conf()->mon_data << "': " << cpp_strerror(err) << dendl;
621 prefork.exit(1);
622 }
623 }
624
625 bufferlist magicbl;
626 err = store->get(Monitor::MONITOR_NAME, "magic", magicbl);
627 if (err || !magicbl.length()) {
628 derr << "unable to read magic from mon data" << dendl;
629 prefork.exit(1);
630 }
631 string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n
632 if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) {
633 derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl;
634 prefork.exit(1);
635 }
636
637 err = Monitor::check_features(store);
638 if (err < 0) {
639 derr << "error checking features: " << cpp_strerror(err) << dendl;
640 prefork.exit(1);
641 }
642
643 // inject new monmap?
644 if (!inject_monmap.empty()) {
645 bufferlist bl;
646 std::string error;
647 int r = bl.read_file(inject_monmap.c_str(), &error);
648 if (r) {
649 derr << "unable to read monmap from " << inject_monmap << ": "
650 << error << dendl;
651 prefork.exit(1);
652 }
653
654 // get next version
655 version_t v = store->get("monmap", "last_committed");
656 dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1)
657 << dendl;
658 v++;
659
660 // set the version
661 MonMap tmp;
662 tmp.decode(bl);
663 if (tmp.get_epoch() != v) {
664 dout(0) << "changing monmap epoch from " << tmp.get_epoch()
665 << " to " << v << dendl;
666 tmp.set_epoch(v);
667 }
668 bufferlist mapbl;
669 tmp.encode(mapbl, CEPH_FEATURES_ALL);
670 bufferlist final;
671 encode(v, final);
672 encode(mapbl, final);
673
674 auto t(std::make_shared<MonitorDBStore::Transaction>());
675 // save it
676 t->put("monmap", v, mapbl);
677 t->put("monmap", "latest", final);
678 t->put("monmap", "last_committed", v);
679 store->apply_transaction(t);
680
681 dout(0) << "done." << dendl;
682 prefork.exit(0);
683 }
684
685 // monmap?
686 MonMap monmap;
687 {
688 // note that even if we don't find a viable monmap, we should go ahead
689 // and try to build it up in the next if-else block.
690 bufferlist mapbl;
691 int err = obtain_monmap(*store, mapbl);
692 if (err >= 0) {
693 try {
694 monmap.decode(mapbl);
695 } catch (const buffer::error& e) {
696 derr << "can't decode monmap: " << e.what() << dendl;
697 }
698 } else {
699 derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl;
700 }
701
702 dout(10) << __func__ << " monmap:\n";
703 JSONFormatter jf(true);
704 jf.dump_object("monmap", monmap);
705 jf.flush(*_dout);
706 *_dout << dendl;
707
708 if (!extract_monmap.empty()) {
709 int r = mapbl.write_file(extract_monmap.c_str());
710 if (r < 0) {
711 r = -errno;
712 derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl;
713 prefork.exit(1);
714 }
715 derr << "wrote monmap to " << extract_monmap << dendl;
716 prefork.exit(0);
717 }
718 }
719
720 // this is what i will bind to
721 entity_addrvec_t ipaddrs;
722
723 if (monmap.contains(g_conf()->name.get_id())) {
724 ipaddrs = monmap.get_addrs(g_conf()->name.get_id());
725
726 // print helpful warning if the conf file doesn't match
727 std::vector <std::string> my_sections;
728 g_conf().get_my_sections(my_sections);
729 std::string mon_addr_str;
730 if (g_conf().get_val_from_conf_file(my_sections, "mon addr",
731 mon_addr_str, true) == 0) {
732 entity_addr_t conf_addr;
733 if (conf_addr.parse(mon_addr_str.c_str())) {
734 entity_addrvec_t conf_addrs = make_mon_addrs(conf_addr);
735 if (ipaddrs != conf_addrs) {
736 derr << "WARNING: 'mon addr' config option " << conf_addrs
737 << " does not match monmap file" << std::endl
738 << " continuing with monmap configuration" << dendl;
739 }
740 } else
741 derr << "WARNING: invalid 'mon addr' config option" << std::endl
742 << " continuing with monmap configuration" << dendl;
743 }
744 } else {
745 dout(0) << g_conf()->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl;
746
747 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
748 if (!g_conf()->public_addrv.empty()) {
749 ipaddrs = g_conf()->public_addrv;
750 dout(0) << "using public_addrv " << ipaddrs << dendl;
751 } else if (!g_conf()->public_addr.is_blank_ip()) {
752 ipaddrs = make_mon_addrs(g_conf()->public_addr);
753 dout(0) << "using public_addr " << g_conf()->public_addr << " -> "
754 << ipaddrs << dendl;
755 } else {
756 MonMap tmpmap;
757 ostringstream oss;
758 int err = tmpmap.build_initial(g_ceph_context, true, oss);
759 if (oss.tellp())
760 derr << oss.str() << dendl;
761 if (err < 0) {
762 derr << argv[0] << ": error generating initial monmap: "
763 << cpp_strerror(err) << dendl;
764 prefork.exit(1);
765 }
766 if (tmpmap.contains(g_conf()->name.get_id())) {
767 ipaddrs = tmpmap.get_addrs(g_conf()->name.get_id());
768 } else {
769 derr << "no public_addr or public_network specified, and "
770 << g_conf()->name << " not present in monmap or ceph.conf" << dendl;
771 prefork.exit(1);
772 }
773 }
774 }
775
776 // bind
777 int rank = monmap.get_rank(g_conf()->name.get_id());
778 std::string public_msgr_type = g_conf()->ms_public_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_public_type;
779 Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type,
780 entity_name_t::MON(rank), "mon",
781 0, // zero nonce
782 Messenger::HAS_MANY_CONNECTIONS);
783 if (!msgr)
784 exit(1);
785 msgr->set_cluster_protocol(CEPH_MON_PROTOCOL);
786 msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH);
787
788 msgr->set_default_policy(Messenger::Policy::stateless_server(0));
789 msgr->set_policy(entity_name_t::TYPE_MON,
790 Messenger::Policy::lossless_peer_reuse(
791 CEPH_FEATURE_SERVER_LUMINOUS));
792 msgr->set_policy(entity_name_t::TYPE_OSD,
793 Messenger::Policy::stateless_server(
794 CEPH_FEATURE_SERVER_LUMINOUS));
795 msgr->set_policy(entity_name_t::TYPE_CLIENT,
796 Messenger::Policy::stateless_server(0));
797 msgr->set_policy(entity_name_t::TYPE_MDS,
798 Messenger::Policy::stateless_server(0));
799
800 // throttle client traffic
801 Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes",
802 g_conf()->mon_client_bytes);
803 msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
804 client_throttler, NULL);
805
806 // throttle daemon traffic
807 // NOTE: actual usage on the leader may multiply by the number of
808 // monitors if they forward large update messages from daemons.
809 Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes",
810 g_conf()->mon_daemon_bytes);
811 msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler,
812 NULL);
813 msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler,
814 NULL);
815
816 entity_addrvec_t bind_addrs = ipaddrs;
817 entity_addrvec_t public_addrs = ipaddrs;
818
819 // check if the public_bind_addr option is set
820 if (!g_conf()->public_bind_addr.is_blank_ip()) {
821 bind_addrs = make_mon_addrs(g_conf()->public_bind_addr);
822 }
823
824 dout(0) << "starting " << g_conf()->name << " rank " << rank
825 << " at public addrs " << public_addrs
826 << " at bind addrs " << bind_addrs
827 << " mon_data " << g_conf()->mon_data
828 << " fsid " << monmap.get_fsid()
829 << dendl;
830
831 Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type,
832 entity_name_t::MON(rank), "mon-mgrc",
833 Messenger::get_pid_nonce(),
834 0);
835 if (!mgr_msgr) {
836 derr << "unable to create mgr_msgr" << dendl;
837 prefork.exit(1);
838 }
839
840 mon = new Monitor(g_ceph_context, g_conf()->name.get_id(), store,
841 msgr, mgr_msgr, &monmap);
842
843 mon->orig_argc = argc;
844 mon->orig_argv = argv;
845
846 if (force_sync) {
847 derr << "flagging a forced sync ..." << dendl;
848 ostringstream oss;
849 JSONFormatter jf(true);
850 mon->sync_force(&jf);
851 derr << "out:\n";
852 jf.flush(*_dout);
853 *_dout << dendl;
854 }
855
856 err = mon->preinit();
857 if (err < 0) {
858 derr << "failed to initialize" << dendl;
859 prefork.exit(1);
860 }
861
862 if (compact || g_conf()->mon_compact_on_start) {
863 derr << "compacting monitor store ..." << dendl;
864 mon->store->compact();
865 derr << "done compacting" << dendl;
866 }
867
868 // bind
869 err = msgr->bindv(bind_addrs);
870 if (err < 0) {
871 derr << "unable to bind monitor to " << bind_addrs << dendl;
872 prefork.exit(1);
873 }
874
875 // if the public and bind addr are different set the msgr addr
876 // to the public one, now that the bind is complete.
877 if (public_addrs != bind_addrs) {
878 msgr->set_addrs(public_addrs);
879 }
880
881 if (g_conf()->daemonize) {
882 global_init_postfork_finish(g_ceph_context);
883 prefork.daemonize();
884 }
885
886 msgr->start();
887 mgr_msgr->start();
888
889 mon->init();
890
891 register_async_signal_handler_oneshot(SIGINT, handle_mon_signal);
892 register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal);
893
894 if (g_conf()->inject_early_sigterm)
895 kill(getpid(), SIGTERM);
896
897 msgr->wait();
898 mgr_msgr->wait();
899
900 store->close();
901
902 unregister_async_signal_handler(SIGHUP, sighup_handler);
903 unregister_async_signal_handler(SIGINT, handle_mon_signal);
904 unregister_async_signal_handler(SIGTERM, handle_mon_signal);
905 shutdown_async_signal_handler();
906
907 delete mon;
908 delete store;
909 delete msgr;
910 delete mgr_msgr;
911 delete client_throttler;
912 delete daemon_throttler;
913
914 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
915 char s[20];
916 snprintf(s, sizeof(s), "gmon/%d", getpid());
917 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
918 dout(0) << "ceph-mon: gmon.out should be in " << s << dendl;
919 }
920
921 prefork.signal_exit(0);
922 return 0;
923 }