]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph_mon.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / ceph_mon.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18
19#include <iostream>
20#include <string>
7c673cae
FG
21
22#include "common/config.h"
23#include "include/ceph_features.h"
24
25#include "mon/MonMap.h"
26#include "mon/Monitor.h"
27#include "mon/MonitorDBStore.h"
28#include "mon/MonClient.h"
29
30#include "msg/Messenger.h"
31
32#include "include/CompatSet.h"
33
34#include "common/ceph_argparse.h"
35#include "common/pick_address.h"
11fdf7f2 36#include "common/Throttle.h"
7c673cae
FG
37#include "common/Timer.h"
38#include "common/errno.h"
39#include "common/Preforker.h"
40
41#include "global/global_init.h"
42#include "global/signal_handler.h"
43
44#include "perfglue/heap_profiler.h"
45
11fdf7f2 46#include "include/ceph_assert.h"
7c673cae
FG
47
48#define dout_subsys ceph_subsys_mon
49
50Monitor *mon = NULL;
51
52void handle_mon_signal(int signum)
53{
54 if (mon)
55 mon->handle_signal(signum);
56}
57
58
59int obtain_monmap(MonitorDBStore &store, bufferlist &bl)
60{
61 dout(10) << __func__ << dendl;
62 /*
63 * the monmap may be in one of three places:
64 * 'monmap:<latest_version_no>' - the monmap we'd really like to have
65 * 'mon_sync:latest_monmap' - last monmap backed up for the last sync
66 * 'mkfs:monmap' - a monmap resulting from mkfs
67 */
68
69 if (store.exists("monmap", "last_committed")) {
70 version_t latest_ver = store.get("monmap", "last_committed");
71 if (store.exists("monmap", latest_ver)) {
72 int err = store.get("monmap", latest_ver, bl);
11fdf7f2
TL
73 ceph_assert(err == 0);
74 ceph_assert(bl.length() > 0);
7c673cae
FG
75 dout(10) << __func__ << " read last committed monmap ver "
76 << latest_ver << dendl;
77 return 0;
78 }
79 }
80
81 if (store.exists("mon_sync", "in_sync")
82 || store.exists("mon_sync", "force_sync")) {
83 dout(10) << __func__ << " detected aborted sync" << dendl;
84 if (store.exists("mon_sync", "latest_monmap")) {
85 int err = store.get("mon_sync", "latest_monmap", bl);
11fdf7f2
TL
86 ceph_assert(err == 0);
87 ceph_assert(bl.length() > 0);
7c673cae
FG
88 dout(10) << __func__ << " read backup monmap" << dendl;
89 return 0;
90 }
91 }
92
93 if (store.exists("mkfs", "monmap")) {
94 dout(10) << __func__ << " found mkfs monmap" << dendl;
95 int err = store.get("mkfs", "monmap", bl);
11fdf7f2
TL
96 ceph_assert(err == 0);
97 ceph_assert(bl.length() > 0);
7c673cae
FG
98 return 0;
99 }
100
101 derr << __func__ << " unable to find a monmap" << dendl;
102 return -ENOENT;
103}
104
105int check_mon_data_exists()
106{
11fdf7f2 107 string mon_data = g_conf()->mon_data;
7c673cae
FG
108 struct stat buf;
109 if (::stat(mon_data.c_str(), &buf)) {
110 if (errno != ENOENT) {
31f18b77 111 derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
112 }
113 return -errno;
114 }
115 return 0;
116}
117
118/** Check whether **mon data** is empty.
119 *
120 * Being empty means mkfs has not been run and there's no monitor setup
11fdf7f2 121 * at **g_conf()->mon_data**.
7c673cae 122 *
11fdf7f2 123 * If the directory g_conf()->mon_data is not empty we will return -ENOTEMPTY.
7c673cae
FG
124 * Otherwise we will return 0. Any other negative returns will represent
125 * a failure to be handled by the caller.
126 *
127 * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise.
128 */
129int check_mon_data_empty()
130{
11fdf7f2 131 string mon_data = g_conf()->mon_data;
7c673cae
FG
132
133 DIR *dir = ::opendir(mon_data.c_str());
134 if (!dir) {
31f18b77 135 derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
136 return -errno;
137 }
138 int code = 0;
139 struct dirent *de = nullptr;
140 errno = 0;
141 while ((de = ::readdir(dir))) {
142 if (string(".") != de->d_name &&
143 string("..") != de->d_name &&
144 string("kv_backend") != de->d_name) {
145 code = -ENOTEMPTY;
146 break;
147 }
148 }
149 if (!de && errno) {
31f18b77 150 derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
151 code = -errno;
152 }
153
154 ::closedir(dir);
155
156 return code;
157}
158
159static void usage()
160{
31f18b77
FG
161 cout << "usage: ceph-mon -i <ID> [flags]\n"
162 << " --debug_mon n\n"
163 << " debug monitor level (e.g. 10)\n"
164 << " --mkfs\n"
165 << " build fresh monitor fs\n"
166 << " --force-sync\n"
167 << " force a sync from another mon by wiping local data (BE CAREFUL)\n"
168 << " --yes-i-really-mean-it\n"
169 << " mandatory safeguard for --force-sync\n"
170 << " --compact\n"
171 << " compact the monitor store\n"
172 << " --osdmap <filename>\n"
173 << " only used when --mkfs is provided: load the osdmap from <filename>\n"
174 << " --inject-monmap <filename>\n"
175 << " write the <filename> monmap to the local monitor store and exit\n"
176 << " --extract-monmap <filename>\n"
177 << " extract the monmap from the local monitor store and exit\n"
178 << " --mon-data <directory>\n"
179 << " where the mon store and keyring are located\n"
180 << std::endl;
7c673cae
FG
181 generic_server_usage();
182}
183
11fdf7f2
TL
184entity_addrvec_t make_mon_addrs(entity_addr_t a)
185{
186 entity_addrvec_t addrs;
187 if (a.get_port() == 0) {
188 a.set_type(entity_addr_t::TYPE_MSGR2);
189 a.set_port(CEPH_MON_PORT_IANA);
190 addrs.v.push_back(a);
191 a.set_type(entity_addr_t::TYPE_LEGACY);
192 a.set_port(CEPH_MON_PORT_LEGACY);
193 addrs.v.push_back(a);
194 } else if (a.get_port() == CEPH_MON_PORT_LEGACY) {
195 a.set_type(entity_addr_t::TYPE_LEGACY);
196 addrs.v.push_back(a);
92f5a8d4 197 } else if (a.get_type() == entity_addr_t::TYPE_ANY) {
11fdf7f2
TL
198 a.set_type(entity_addr_t::TYPE_MSGR2);
199 addrs.v.push_back(a);
92f5a8d4
TL
200 } else {
201 addrs.v.push_back(a);
11fdf7f2
TL
202 }
203 return addrs;
204}
205
7c673cae 206int main(int argc, const char **argv)
7c673cae 207{
11fdf7f2
TL
208 // reset our process name, in case we did a respawn, so that it's not
209 // left as "exe".
210 ceph_pthread_setname(pthread_self(), "ceph-mon");
211
7c673cae
FG
212 int err;
213
214 bool mkfs = false;
215 bool compact = false;
216 bool force_sync = false;
217 bool yes_really = false;
218 std::string osdmapfn, inject_monmap, extract_monmap;
219
220 vector<const char*> args;
221 argv_to_vec(argc, argv, args);
11fdf7f2
TL
222 if (args.empty()) {
223 cerr << argv[0] << ": -h or --help for usage" << std::endl;
224 exit(1);
225 }
226 if (ceph_argparse_need_usage(args)) {
227 usage();
228 exit(0);
229 }
7c673cae
FG
230
231 // We need to specify some default values that may be overridden by the
232 // user, that are specific to the monitor. The options we are overriding
233 // are also used on the OSD (or in any other component that uses leveldb),
c07f9fc5 234 // so changing the global defaults is not an option.
7c673cae 235 // This is not the prettiest way of doing this, especially since it has us
c07f9fc5
FG
236 // having a different place defining default values, but it's not horribly
237 // wrong enough to prevent us from doing it :)
7c673cae
FG
238 //
239 // NOTE: user-defined options will take precedence over ours.
240 //
241 // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB
242 // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB
243 // leveldb_block_size = 64*1024 = 65536 // 64KB
244 // leveldb_compression = false
245 // leveldb_log = ""
11fdf7f2
TL
246 map<string,string> defaults = {
247 { "leveldb_write_buffer_size", "33554432" },
248 { "leveldb_cache_size", "536870912" },
249 { "leveldb_block_size", "65536" },
250 { "leveldb_compression", "false"},
251 { "leveldb_log", "" },
252 { "keyring", "$mon_data/keyring" },
253 };
7c673cae
FG
254
255 int flags = 0;
256 {
257 vector<const char*> args_copy = args;
258 std::string val;
259 for (std::vector<const char*>::iterator i = args_copy.begin();
260 i != args_copy.end(); ) {
261 if (ceph_argparse_double_dash(args_copy, i)) {
262 break;
263 } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) {
264 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
265 } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) {
266 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
267 } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) {
268 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
269 } else {
270 ++i;
271 }
272 }
273 }
274
11fdf7f2
TL
275 // don't try to get config from mon cluster during startup
276 flags |= CINIT_FLAG_NO_MON_CONFIG;
277
278 auto cct = global_init(&defaults, args,
7c673cae
FG
279 CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON,
280 flags, "mon_data");
281 ceph_heap_profiler_init();
282
7c673cae
FG
283 std::string val;
284 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
285 if (ceph_argparse_double_dash(args, i)) {
286 break;
7c673cae
FG
287 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
288 mkfs = true;
289 } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) {
290 compact = true;
291 } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) {
292 force_sync = true;
293 } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) {
294 yes_really = true;
295 } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) {
296 osdmapfn = val;
297 } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) {
298 inject_monmap = val;
299 } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) {
300 extract_monmap = val;
301 } else {
302 ++i;
303 }
304 }
305 if (!args.empty()) {
11fdf7f2
TL
306 cerr << "too many arguments: " << args << std::endl;
307 exit(1);
7c673cae
FG
308 }
309
310 if (force_sync && !yes_really) {
11fdf7f2
TL
311 cerr << "are you SURE you want to force a sync? this will erase local data and may\n"
312 << "break your mon cluster. pass --yes-i-really-mean-it if you do." << std::endl;
7c673cae
FG
313 exit(1);
314 }
315
11fdf7f2
TL
316 if (g_conf()->mon_data.empty()) {
317 cerr << "must specify '--mon-data=foo' data path" << std::endl;
318 exit(1);
7c673cae
FG
319 }
320
11fdf7f2
TL
321 if (g_conf()->name.get_id().empty()) {
322 cerr << "must specify id (--id <id> or --name mon.<id>)" << std::endl;
323 exit(1);
7c673cae
FG
324 }
325
326 // -- mkfs --
327 if (mkfs) {
328
329 int err = check_mon_data_exists();
330 if (err == -ENOENT) {
11fdf7f2
TL
331 if (::mkdir(g_conf()->mon_data.c_str(), 0755)) {
332 derr << "mkdir(" << g_conf()->mon_data << ") : "
31f18b77 333 << cpp_strerror(errno) << dendl;
7c673cae
FG
334 exit(1);
335 }
336 } else if (err < 0) {
11fdf7f2 337 derr << "error opening '" << g_conf()->mon_data << "': "
31f18b77 338 << cpp_strerror(-err) << dendl;
7c673cae
FG
339 exit(-err);
340 }
341
342 err = check_mon_data_empty();
343 if (err == -ENOTEMPTY) {
344 // Mon may exist. Let the user know and exit gracefully.
11fdf7f2 345 derr << "'" << g_conf()->mon_data << "' already exists and is not empty"
31f18b77 346 << ": monitor may already exist" << dendl;
7c673cae
FG
347 exit(0);
348 } else if (err < 0) {
11fdf7f2 349 derr << "error checking if '" << g_conf()->mon_data << "' is empty: "
31f18b77 350 << cpp_strerror(-err) << dendl;
7c673cae
FG
351 exit(-err);
352 }
353
354 // resolve public_network -> public_addr
355 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
356
11fdf7f2
TL
357 dout(10) << "public_network " << g_conf()->public_network << dendl;
358 dout(10) << "public_addr " << g_conf()->public_network << dendl;
359
7c673cae
FG
360 common_init_finish(g_ceph_context);
361
362 bufferlist monmapbl, osdmapbl;
363 std::string error;
364 MonMap monmap;
365
366 // load or generate monmap
11fdf7f2 367 const auto monmap_fn = g_conf().get_val<string>("monmap");
3efd9988
FG
368 if (monmap_fn.length()) {
369 int err = monmapbl.read_file(monmap_fn.c_str(), &error);
7c673cae 370 if (err < 0) {
3efd9988 371 derr << argv[0] << ": error reading " << monmap_fn << ": " << error << dendl;
7c673cae
FG
372 exit(1);
373 }
374 try {
375 monmap.decode(monmapbl);
376
377 // always mark seed/mkfs monmap as epoch 0
378 monmap.set_epoch(0);
3efd9988
FG
379 } catch (const buffer::error& e) {
380 derr << argv[0] << ": error decoding monmap " << monmap_fn << ": " << e.what() << dendl;
7c673cae 381 exit(1);
11fdf7f2
TL
382 }
383
384 dout(1) << "imported monmap:\n";
385 monmap.print(*_dout);
386 *_dout << dendl;
387
7c673cae 388 } else {
31f18b77 389 ostringstream oss;
11fdf7f2 390 int err = monmap.build_initial(g_ceph_context, true, oss);
31f18b77
FG
391 if (oss.tellp())
392 derr << oss.str() << dendl;
7c673cae 393 if (err < 0) {
31f18b77 394 derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl;
7c673cae
FG
395 }
396
11fdf7f2
TL
397 dout(1) << "initial generated monmap:\n";
398 monmap.print(*_dout);
399 *_dout << dendl;
400
7c673cae 401 // am i part of the initial quorum?
11fdf7f2 402 if (monmap.contains(g_conf()->name.get_id())) {
7c673cae
FG
403 // hmm, make sure the ip listed exists on the current host?
404 // maybe later.
11fdf7f2
TL
405 } else if (!g_conf()->public_addr.is_blank_ip()) {
406 entity_addrvec_t av = make_mon_addrs(g_conf()->public_addr);
407 string name;
408 if (monmap.contains(av, &name)) {
409 monmap.rename(name, g_conf()->name.get_id());
410 dout(0) << argv[0] << ": renaming mon." << name << " " << av
411 << " to mon." << g_conf()->name.get_id() << dendl;
7c673cae
FG
412 }
413 } else {
414 // is a local address listed without a name? if so, name myself.
415 list<entity_addr_t> ls;
416 monmap.list_addrs(ls);
11fdf7f2
TL
417 dout(0) << " monmap addrs are " << ls << ", checking if any are local"
418 << dendl;
7c673cae 419
11fdf7f2 420 entity_addr_t local;
7c673cae 421 if (have_local_addr(g_ceph_context, ls, &local)) {
11fdf7f2 422 dout(0) << " have local addr " << local << dendl;
7c673cae 423 string name;
11fdf7f2
TL
424 local.set_type(entity_addr_t::TYPE_MSGR2);
425 if (!monmap.get_addr_name(local, name)) {
426 local.set_type(entity_addr_t::TYPE_LEGACY);
427 if (!monmap.get_addr_name(local, name)) {
428 dout(0) << "no local addresses appear in bootstrap monmap"
429 << dendl;
430 }
431 }
7c673cae 432 if (name.compare(0, 7, "noname-") == 0) {
224ce89b 433 dout(0) << argv[0] << ": mon." << name << " " << local
11fdf7f2
TL
434 << " is local, renaming to mon." << g_conf()->name.get_id()
435 << dendl;
436 monmap.rename(name, g_conf()->name.get_id());
437 } else if (name.size()) {
224ce89b 438 dout(0) << argv[0] << ": mon." << name << " " << local
11fdf7f2
TL
439 << " is local, but not 'noname-' + something; "
440 << "not assuming it's me" << dendl;
7c673cae 441 }
11fdf7f2
TL
442 } else {
443 dout(0) << " no local addrs match monmap" << dendl;
7c673cae
FG
444 }
445 }
446 }
447
11fdf7f2 448 const auto fsid = g_conf().get_val<uuid_d>("fsid");
3efd9988
FG
449 if (!fsid.is_zero()) {
450 monmap.fsid = fsid;
451 dout(0) << argv[0] << ": set fsid to " << fsid << dendl;
7c673cae
FG
452 }
453
454 if (monmap.fsid.is_zero()) {
31f18b77 455 derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl;
7c673cae
FG
456 exit(10);
457 }
458
459 //monmap.print(cout);
460
461 // osdmap
462 if (osdmapfn.length()) {
463 err = osdmapbl.read_file(osdmapfn.c_str(), &error);
464 if (err < 0) {
31f18b77
FG
465 derr << argv[0] << ": error reading " << osdmapfn << ": "
466 << error << dendl;
7c673cae
FG
467 exit(1);
468 }
469 }
470
471 // go
11fdf7f2 472 MonitorDBStore store(g_conf()->mon_data);
31f18b77
FG
473 ostringstream oss;
474 int r = store.create_and_open(oss);
475 if (oss.tellp())
476 derr << oss.str() << dendl;
7c673cae 477 if (r < 0) {
31f18b77 478 derr << argv[0] << ": error opening mon data directory at '"
11fdf7f2 479 << g_conf()->mon_data << "': " << cpp_strerror(r) << dendl;
7c673cae
FG
480 exit(1);
481 }
11fdf7f2 482 ceph_assert(r == 0);
7c673cae 483
11fdf7f2 484 Monitor mon(g_ceph_context, g_conf()->name.get_id(), &store, 0, 0, &monmap);
7c673cae
FG
485 r = mon.mkfs(osdmapbl);
486 if (r < 0) {
31f18b77 487 derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl;
7c673cae
FG
488 exit(1);
489 }
490 store.close();
11fdf7f2
TL
491 dout(0) << argv[0] << ": created monfs at " << g_conf()->mon_data
492 << " for " << g_conf()->name << dendl;
7c673cae
FG
493 return 0;
494 }
495
496 err = check_mon_data_exists();
497 if (err < 0 && err == -ENOENT) {
11fdf7f2 498 derr << "monitor data directory at '" << g_conf()->mon_data << "'"
31f18b77 499 << " does not exist: have you run 'mkfs'?" << dendl;
7c673cae
FG
500 exit(1);
501 } else if (err < 0) {
31f18b77 502 derr << "error accessing monitor data directory at '"
11fdf7f2 503 << g_conf()->mon_data << "': " << cpp_strerror(-err) << dendl;
7c673cae
FG
504 exit(1);
505 }
506
507 err = check_mon_data_empty();
508 if (err == 0) {
11fdf7f2 509 derr << "monitor data directory at '" << g_conf()->mon_data
7c673cae
FG
510 << "' is empty: have you run 'mkfs'?" << dendl;
511 exit(1);
512 } else if (err < 0 && err != -ENOTEMPTY) {
513 // we don't want an empty data dir by now
11fdf7f2 514 derr << "error accessing '" << g_conf()->mon_data << "': "
31f18b77 515 << cpp_strerror(-err) << dendl;
7c673cae
FG
516 exit(1);
517 }
518
519 {
520 // check fs stats. don't start if it's critically close to full.
521 ceph_data_stats_t stats;
11fdf7f2 522 int err = get_fs_stats(stats, g_conf()->mon_data.c_str());
7c673cae 523 if (err < 0) {
31f18b77
FG
524 derr << "error checking monitor data's fs stats: " << cpp_strerror(err)
525 << dendl;
7c673cae
FG
526 exit(-err);
527 }
11fdf7f2 528 if (stats.avail_percent <= g_conf()->mon_data_avail_crit) {
31f18b77 529 derr << "error: monitor data filesystem reached concerning levels of"
7c673cae 530 << " available storage space (available: "
1adf2230 531 << stats.avail_percent << "% " << byte_u_t(stats.byte_avail)
7c673cae 532 << ")\nyou may adjust 'mon data avail crit' to a lower value"
11fdf7f2 533 << " to make this go away (default: " << g_conf()->mon_data_avail_crit
31f18b77 534 << "%)\n" << dendl;
7c673cae
FG
535 exit(ENOSPC);
536 }
537 }
538
539 // we fork early to prevent leveldb's environment static state from
540 // screwing us over
541 Preforker prefork;
542 if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) {
543 if (global_init_prefork(g_ceph_context) >= 0) {
544 string err_msg;
545 err = prefork.prefork(err_msg);
546 if (err < 0) {
31f18b77 547 derr << err_msg << dendl;
7c673cae
FG
548 prefork.exit(err);
549 }
550 if (prefork.is_parent()) {
551 err = prefork.parent_wait(err_msg);
552 if (err < 0)
31f18b77 553 derr << err_msg << dendl;
7c673cae
FG
554 prefork.exit(err);
555 }
224ce89b 556 setsid();
7c673cae
FG
557 global_init_postfork_start(g_ceph_context);
558 }
559 common_init_finish(g_ceph_context);
560 global_init_chdir(g_ceph_context);
7c673cae
FG
561 if (global_init_preload_erasure_code(g_ceph_context) < 0)
562 prefork.exit(1);
7c673cae
FG
563 }
564
11fdf7f2
TL
565 // set up signal handlers, now that we've daemonized/forked.
566 init_async_signal_handler();
567 register_async_signal_handler(SIGHUP, sighup_handler);
568
569 MonitorDBStore *store = new MonitorDBStore(g_conf()->mon_data);
570
571 // make sure we aren't upgrading too fast
572 {
573 string val;
574 int r = store->read_meta("min_mon_release", &val);
575 if (r >= 0 && val.size()) {
576 int min = atoi(val.c_str());
577 if (min &&
578 min + 2 < (int)ceph_release()) {
579 derr << "recorded min_mon_release is " << min
580 << " (" << ceph_release_name(min)
581 << ") which is >2 releases older than installed "
582 << ceph_release() << " (" << ceph_release_name(ceph_release())
583 << "); you can only upgrade 2 releases at a time" << dendl;
584 derr << "you should first upgrade to "
585 << (min + 1) << " (" << ceph_release_name(min + 1) << ") or "
586 << (min + 2) << " (" << ceph_release_name(min + 2) << ")" << dendl;
587 prefork.exit(1);
588 }
589 }
590 }
591
31f18b77
FG
592 {
593 ostringstream oss;
594 err = store->open(oss);
595 if (oss.tellp())
596 derr << oss.str() << dendl;
597 if (err < 0) {
598 derr << "error opening mon data directory at '"
11fdf7f2 599 << g_conf()->mon_data << "': " << cpp_strerror(err) << dendl;
31f18b77
FG
600 prefork.exit(1);
601 }
7c673cae
FG
602 }
603
604 bufferlist magicbl;
605 err = store->get(Monitor::MONITOR_NAME, "magic", magicbl);
606 if (err || !magicbl.length()) {
607 derr << "unable to read magic from mon data" << dendl;
608 prefork.exit(1);
609 }
610 string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n
611 if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) {
612 derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl;
613 prefork.exit(1);
614 }
615
616 err = Monitor::check_features(store);
617 if (err < 0) {
618 derr << "error checking features: " << cpp_strerror(err) << dendl;
619 prefork.exit(1);
620 }
621
622 // inject new monmap?
623 if (!inject_monmap.empty()) {
624 bufferlist bl;
625 std::string error;
626 int r = bl.read_file(inject_monmap.c_str(), &error);
627 if (r) {
628 derr << "unable to read monmap from " << inject_monmap << ": "
629 << error << dendl;
630 prefork.exit(1);
631 }
632
633 // get next version
634 version_t v = store->get("monmap", "last_committed");
635 dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1)
636 << dendl;
637 v++;
638
639 // set the version
640 MonMap tmp;
641 tmp.decode(bl);
642 if (tmp.get_epoch() != v) {
643 dout(0) << "changing monmap epoch from " << tmp.get_epoch()
644 << " to " << v << dendl;
645 tmp.set_epoch(v);
646 }
647 bufferlist mapbl;
648 tmp.encode(mapbl, CEPH_FEATURES_ALL);
649 bufferlist final;
11fdf7f2
TL
650 encode(v, final);
651 encode(mapbl, final);
7c673cae
FG
652
653 auto t(std::make_shared<MonitorDBStore::Transaction>());
654 // save it
655 t->put("monmap", v, mapbl);
656 t->put("monmap", "latest", final);
657 t->put("monmap", "last_committed", v);
658 store->apply_transaction(t);
659
660 dout(0) << "done." << dendl;
661 prefork.exit(0);
662 }
663
664 // monmap?
665 MonMap monmap;
666 {
667 // note that even if we don't find a viable monmap, we should go ahead
668 // and try to build it up in the next if-else block.
669 bufferlist mapbl;
670 int err = obtain_monmap(*store, mapbl);
671 if (err >= 0) {
672 try {
673 monmap.decode(mapbl);
674 } catch (const buffer::error& e) {
31f18b77 675 derr << "can't decode monmap: " << e.what() << dendl;
7c673cae
FG
676 }
677 } else {
678 derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl;
679 }
11fdf7f2
TL
680
681 dout(10) << __func__ << " monmap:\n";
682 JSONFormatter jf(true);
683 jf.dump_object("monmap", monmap);
684 jf.flush(*_dout);
685 *_dout << dendl;
686
7c673cae
FG
687 if (!extract_monmap.empty()) {
688 int r = mapbl.write_file(extract_monmap.c_str());
689 if (r < 0) {
690 r = -errno;
691 derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl;
692 prefork.exit(1);
693 }
694 derr << "wrote monmap to " << extract_monmap << dendl;
695 prefork.exit(0);
696 }
697 }
698
699 // this is what i will bind to
11fdf7f2 700 entity_addrvec_t ipaddrs;
7c673cae 701
11fdf7f2
TL
702 if (monmap.contains(g_conf()->name.get_id())) {
703 ipaddrs = monmap.get_addrs(g_conf()->name.get_id());
7c673cae
FG
704
705 // print helpful warning if the conf file doesn't match
7c673cae 706 std::vector <std::string> my_sections;
11fdf7f2 707 g_conf().get_my_sections(my_sections);
7c673cae 708 std::string mon_addr_str;
11fdf7f2 709 if (g_conf().get_val_from_conf_file(my_sections, "mon addr",
7c673cae 710 mon_addr_str, true) == 0) {
11fdf7f2
TL
711 entity_addr_t conf_addr;
712 if (conf_addr.parse(mon_addr_str.c_str())) {
713 entity_addrvec_t conf_addrs = make_mon_addrs(conf_addr);
714 if (ipaddrs != conf_addrs) {
715 derr << "WARNING: 'mon addr' config option " << conf_addrs
716 << " does not match monmap file" << std::endl
717 << " continuing with monmap configuration" << dendl;
718 }
719 } else
720 derr << "WARNING: invalid 'mon addr' config option" << std::endl
7c673cae 721 << " continuing with monmap configuration" << dendl;
7c673cae
FG
722 }
723 } else {
11fdf7f2 724 dout(0) << g_conf()->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl;
7c673cae
FG
725
726 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
11fdf7f2
TL
727 if (!g_conf()->public_addr.is_blank_ip()) {
728 ipaddrs = make_mon_addrs(g_conf()->public_addr);
729 dout(0) << "using public_addr " << g_conf()->public_addr << " -> "
730 << ipaddrs << dendl;
7c673cae
FG
731 } else {
732 MonMap tmpmap;
31f18b77 733 ostringstream oss;
11fdf7f2 734 int err = tmpmap.build_initial(g_ceph_context, true, oss);
31f18b77
FG
735 if (oss.tellp())
736 derr << oss.str() << dendl;
7c673cae
FG
737 if (err < 0) {
738 derr << argv[0] << ": error generating initial monmap: "
739 << cpp_strerror(err) << dendl;
7c673cae
FG
740 prefork.exit(1);
741 }
11fdf7f2
TL
742 if (tmpmap.contains(g_conf()->name.get_id())) {
743 ipaddrs = tmpmap.get_addrs(g_conf()->name.get_id());
7c673cae 744 } else {
11fdf7f2
TL
745 derr << "no public_addr or public_network specified, and "
746 << g_conf()->name << " not present in monmap or ceph.conf" << dendl;
7c673cae
FG
747 prefork.exit(1);
748 }
749 }
750 }
751
752 // bind
11fdf7f2
TL
753 int rank = monmap.get_rank(g_conf()->name.get_id());
754 std::string public_msgr_type = g_conf()->ms_public_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_public_type;
7c673cae
FG
755 Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type,
756 entity_name_t::MON(rank), "mon",
757 0, Messenger::HAS_MANY_CONNECTIONS);
758 if (!msgr)
759 exit(1);
760 msgr->set_cluster_protocol(CEPH_MON_PROTOCOL);
761 msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH);
762
763 msgr->set_default_policy(Messenger::Policy::stateless_server(0));
764 msgr->set_policy(entity_name_t::TYPE_MON,
765 Messenger::Policy::lossless_peer_reuse(
11fdf7f2 766 CEPH_FEATURE_SERVER_LUMINOUS));
7c673cae
FG
767 msgr->set_policy(entity_name_t::TYPE_OSD,
768 Messenger::Policy::stateless_server(
11fdf7f2 769 CEPH_FEATURE_SERVER_LUMINOUS));
7c673cae
FG
770 msgr->set_policy(entity_name_t::TYPE_CLIENT,
771 Messenger::Policy::stateless_server(0));
772 msgr->set_policy(entity_name_t::TYPE_MDS,
773 Messenger::Policy::stateless_server(0));
774
775 // throttle client traffic
776 Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes",
11fdf7f2 777 g_conf()->mon_client_bytes);
7c673cae
FG
778 msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
779 client_throttler, NULL);
780
781 // throttle daemon traffic
782 // NOTE: actual usage on the leader may multiply by the number of
783 // monitors if they forward large update messages from daemons.
784 Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes",
11fdf7f2 785 g_conf()->mon_daemon_bytes);
7c673cae
FG
786 msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler,
787 NULL);
788 msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler,
789 NULL);
790
11fdf7f2
TL
791 entity_addrvec_t bind_addrs = ipaddrs;
792 entity_addrvec_t public_addrs = ipaddrs;
224ce89b
WB
793
794 // check if the public_bind_addr option is set
11fdf7f2
TL
795 if (!g_conf()->public_bind_addr.is_blank_ip()) {
796 bind_addrs = make_mon_addrs(g_conf()->public_bind_addr);
7c673cae
FG
797 }
798
11fdf7f2
TL
799 dout(0) << "starting " << g_conf()->name << " rank " << rank
800 << " at public addrs " << public_addrs
801 << " at bind addrs " << bind_addrs
802 << " mon_data " << g_conf()->mon_data
803 << " fsid " << monmap.get_fsid()
804 << dendl;
224ce89b 805
7c673cae
FG
806 Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type,
807 entity_name_t::MON(rank), "mon-mgrc",
808 getpid(), 0);
809 if (!mgr_msgr) {
810 derr << "unable to create mgr_msgr" << dendl;
811 prefork.exit(1);
812 }
813
11fdf7f2 814 mon = new Monitor(g_ceph_context, g_conf()->name.get_id(), store,
7c673cae
FG
815 msgr, mgr_msgr, &monmap);
816
11fdf7f2
TL
817 mon->orig_argc = argc;
818 mon->orig_argv = argv;
819
7c673cae
FG
820 if (force_sync) {
821 derr << "flagging a forced sync ..." << dendl;
31f18b77
FG
822 ostringstream oss;
823 mon->sync_force(NULL, oss);
824 if (oss.tellp())
825 derr << oss.str() << dendl;
7c673cae
FG
826 }
827
828 err = mon->preinit();
829 if (err < 0) {
830 derr << "failed to initialize" << dendl;
831 prefork.exit(1);
832 }
833
11fdf7f2 834 if (compact || g_conf()->mon_compact_on_start) {
7c673cae
FG
835 derr << "compacting monitor store ..." << dendl;
836 mon->store->compact();
837 derr << "done compacting" << dendl;
838 }
839
11fdf7f2
TL
840 // bind
841 err = msgr->bindv(bind_addrs);
842 if (err < 0) {
843 derr << "unable to bind monitor to " << bind_addrs << dendl;
844 prefork.exit(1);
845 }
846
847 // if the public and bind addr are different set the msgr addr
848 // to the public one, now that the bind is complete.
849 if (public_addrs != bind_addrs) {
850 msgr->set_addrs(public_addrs);
851 }
852
853 if (g_conf()->daemonize) {
7c673cae
FG
854 global_init_postfork_finish(g_ceph_context);
855 prefork.daemonize();
856 }
857
858 msgr->start();
859 mgr_msgr->start();
860
861 mon->init();
862
7c673cae
FG
863 register_async_signal_handler_oneshot(SIGINT, handle_mon_signal);
864 register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal);
865
11fdf7f2 866 if (g_conf()->inject_early_sigterm)
7c673cae
FG
867 kill(getpid(), SIGTERM);
868
869 msgr->wait();
870 mgr_msgr->wait();
871
872 store->close();
873
874 unregister_async_signal_handler(SIGHUP, sighup_handler);
875 unregister_async_signal_handler(SIGINT, handle_mon_signal);
876 unregister_async_signal_handler(SIGTERM, handle_mon_signal);
877 shutdown_async_signal_handler();
878
879 delete mon;
880 delete store;
881 delete msgr;
882 delete mgr_msgr;
883 delete client_throttler;
884 delete daemon_throttler;
885
886 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
887 char s[20];
888 snprintf(s, sizeof(s), "gmon/%d", getpid());
889 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
890 dout(0) << "ceph-mon: gmon.out should be in " << s << dendl;
891 }
892
893 prefork.signal_exit(0);
894 return 0;
895}