]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph_mon.cc
update sources to 12.2.8
[ceph.git] / ceph / src / ceph_mon.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18
19#include <iostream>
20#include <string>
21using namespace std;
22
23#include "common/config.h"
24#include "include/ceph_features.h"
25
26#include "mon/MonMap.h"
27#include "mon/Monitor.h"
28#include "mon/MonitorDBStore.h"
29#include "mon/MonClient.h"
30
31#include "msg/Messenger.h"
32
33#include "include/CompatSet.h"
34
35#include "common/ceph_argparse.h"
36#include "common/pick_address.h"
37#include "common/Timer.h"
38#include "common/errno.h"
39#include "common/Preforker.h"
40
41#include "global/global_init.h"
42#include "global/signal_handler.h"
43
44#include "perfglue/heap_profiler.h"
45
46#include "include/assert.h"
47
48#define dout_subsys ceph_subsys_mon
49
50Monitor *mon = NULL;
51
52void handle_mon_signal(int signum)
53{
54 if (mon)
55 mon->handle_signal(signum);
56}
57
58
59int obtain_monmap(MonitorDBStore &store, bufferlist &bl)
60{
61 dout(10) << __func__ << dendl;
62 /*
63 * the monmap may be in one of three places:
64 * 'monmap:<latest_version_no>' - the monmap we'd really like to have
65 * 'mon_sync:latest_monmap' - last monmap backed up for the last sync
66 * 'mkfs:monmap' - a monmap resulting from mkfs
67 */
68
69 if (store.exists("monmap", "last_committed")) {
70 version_t latest_ver = store.get("monmap", "last_committed");
71 if (store.exists("monmap", latest_ver)) {
72 int err = store.get("monmap", latest_ver, bl);
73 assert(err == 0);
74 assert(bl.length() > 0);
75 dout(10) << __func__ << " read last committed monmap ver "
76 << latest_ver << dendl;
77 return 0;
78 }
79 }
80
81 if (store.exists("mon_sync", "in_sync")
82 || store.exists("mon_sync", "force_sync")) {
83 dout(10) << __func__ << " detected aborted sync" << dendl;
84 if (store.exists("mon_sync", "latest_monmap")) {
85 int err = store.get("mon_sync", "latest_monmap", bl);
86 assert(err == 0);
87 assert(bl.length() > 0);
88 dout(10) << __func__ << " read backup monmap" << dendl;
89 return 0;
90 }
91 }
92
93 if (store.exists("mkfs", "monmap")) {
94 dout(10) << __func__ << " found mkfs monmap" << dendl;
95 int err = store.get("mkfs", "monmap", bl);
96 assert(err == 0);
97 assert(bl.length() > 0);
98 return 0;
99 }
100
101 derr << __func__ << " unable to find a monmap" << dendl;
102 return -ENOENT;
103}
104
105int check_mon_data_exists()
106{
107 string mon_data = g_conf->mon_data;
108 struct stat buf;
109 if (::stat(mon_data.c_str(), &buf)) {
110 if (errno != ENOENT) {
31f18b77 111 derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
112 }
113 return -errno;
114 }
115 return 0;
116}
117
118/** Check whether **mon data** is empty.
119 *
120 * Being empty means mkfs has not been run and there's no monitor setup
121 * at **g_conf->mon_data**.
122 *
123 * If the directory g_conf->mon_data is not empty we will return -ENOTEMPTY.
124 * Otherwise we will return 0. Any other negative returns will represent
125 * a failure to be handled by the caller.
126 *
127 * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise.
128 */
129int check_mon_data_empty()
130{
131 string mon_data = g_conf->mon_data;
132
133 DIR *dir = ::opendir(mon_data.c_str());
134 if (!dir) {
31f18b77 135 derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
136 return -errno;
137 }
138 int code = 0;
139 struct dirent *de = nullptr;
140 errno = 0;
141 while ((de = ::readdir(dir))) {
142 if (string(".") != de->d_name &&
143 string("..") != de->d_name &&
144 string("kv_backend") != de->d_name) {
145 code = -ENOTEMPTY;
146 break;
147 }
148 }
149 if (!de && errno) {
31f18b77 150 derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl;
7c673cae
FG
151 code = -errno;
152 }
153
154 ::closedir(dir);
155
156 return code;
157}
158
159static void usage()
160{
31f18b77
FG
161 cout << "usage: ceph-mon -i <ID> [flags]\n"
162 << " --debug_mon n\n"
163 << " debug monitor level (e.g. 10)\n"
164 << " --mkfs\n"
165 << " build fresh monitor fs\n"
166 << " --force-sync\n"
167 << " force a sync from another mon by wiping local data (BE CAREFUL)\n"
168 << " --yes-i-really-mean-it\n"
169 << " mandatory safeguard for --force-sync\n"
170 << " --compact\n"
171 << " compact the monitor store\n"
172 << " --osdmap <filename>\n"
173 << " only used when --mkfs is provided: load the osdmap from <filename>\n"
174 << " --inject-monmap <filename>\n"
175 << " write the <filename> monmap to the local monitor store and exit\n"
176 << " --extract-monmap <filename>\n"
177 << " extract the monmap from the local monitor store and exit\n"
178 << " --mon-data <directory>\n"
179 << " where the mon store and keyring are located\n"
180 << std::endl;
7c673cae
FG
181 generic_server_usage();
182}
183
7c673cae 184int main(int argc, const char **argv)
7c673cae
FG
185{
186 int err;
187
188 bool mkfs = false;
189 bool compact = false;
190 bool force_sync = false;
191 bool yes_really = false;
192 std::string osdmapfn, inject_monmap, extract_monmap;
193
194 vector<const char*> args;
195 argv_to_vec(argc, argv, args);
196 env_to_vec(args);
197
198 // We need to specify some default values that may be overridden by the
199 // user, that are specific to the monitor. The options we are overriding
200 // are also used on the OSD (or in any other component that uses leveldb),
c07f9fc5 201 // so changing the global defaults is not an option.
7c673cae 202 // This is not the prettiest way of doing this, especially since it has us
c07f9fc5
FG
203 // having a different place defining default values, but it's not horribly
204 // wrong enough to prevent us from doing it :)
7c673cae
FG
205 //
206 // NOTE: user-defined options will take precedence over ours.
207 //
208 // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB
209 // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB
210 // leveldb_block_size = 64*1024 = 65536 // 64KB
211 // leveldb_compression = false
212 // leveldb_log = ""
213 vector<const char*> def_args;
214 def_args.push_back("--leveldb-write-buffer-size=33554432");
215 def_args.push_back("--leveldb-cache-size=536870912");
216 def_args.push_back("--leveldb-block-size=65536");
217 def_args.push_back("--leveldb-compression=false");
218 def_args.push_back("--leveldb-log=");
219
220 int flags = 0;
221 {
222 vector<const char*> args_copy = args;
223 std::string val;
224 for (std::vector<const char*>::iterator i = args_copy.begin();
225 i != args_copy.end(); ) {
226 if (ceph_argparse_double_dash(args_copy, i)) {
227 break;
228 } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) {
229 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
230 } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) {
231 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
232 } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) {
233 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
234 } else {
235 ++i;
236 }
237 }
238 }
239
240 auto cct = global_init(&def_args, args,
241 CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON,
242 flags, "mon_data");
243 ceph_heap_profiler_init();
244
7c673cae
FG
245 std::string val;
246 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
247 if (ceph_argparse_double_dash(args, i)) {
248 break;
249 } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
250 usage();
251 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
252 mkfs = true;
253 } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) {
254 compact = true;
255 } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) {
256 force_sync = true;
257 } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) {
258 yes_really = true;
259 } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) {
260 osdmapfn = val;
261 } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) {
262 inject_monmap = val;
263 } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) {
264 extract_monmap = val;
265 } else {
266 ++i;
267 }
268 }
269 if (!args.empty()) {
31f18b77 270 derr << "too many arguments: " << args << dendl;
7c673cae
FG
271 usage();
272 }
273
274 if (force_sync && !yes_really) {
31f18b77
FG
275 derr << "are you SURE you want to force a sync? this will erase local data and may\n"
276 << "break your mon cluster. pass --yes-i-really-mean-it if you do." << dendl;
7c673cae
FG
277 exit(1);
278 }
279
280 if (g_conf->mon_data.empty()) {
31f18b77 281 derr << "must specify '--mon-data=foo' data path" << dendl;
7c673cae
FG
282 usage();
283 }
284
285 if (g_conf->name.get_id().empty()) {
31f18b77 286 derr << "must specify id (--id <id> or --name mon.<id>)" << dendl;
7c673cae
FG
287 usage();
288 }
289
290 // -- mkfs --
291 if (mkfs) {
292
293 int err = check_mon_data_exists();
294 if (err == -ENOENT) {
295 if (::mkdir(g_conf->mon_data.c_str(), 0755)) {
31f18b77
FG
296 derr << "mkdir(" << g_conf->mon_data << ") : "
297 << cpp_strerror(errno) << dendl;
7c673cae
FG
298 exit(1);
299 }
300 } else if (err < 0) {
31f18b77
FG
301 derr << "error opening '" << g_conf->mon_data << "': "
302 << cpp_strerror(-err) << dendl;
7c673cae
FG
303 exit(-err);
304 }
305
306 err = check_mon_data_empty();
307 if (err == -ENOTEMPTY) {
308 // Mon may exist. Let the user know and exit gracefully.
31f18b77
FG
309 derr << "'" << g_conf->mon_data << "' already exists and is not empty"
310 << ": monitor may already exist" << dendl;
7c673cae
FG
311 exit(0);
312 } else if (err < 0) {
31f18b77
FG
313 derr << "error checking if '" << g_conf->mon_data << "' is empty: "
314 << cpp_strerror(-err) << dendl;
7c673cae
FG
315 exit(-err);
316 }
317
318 // resolve public_network -> public_addr
319 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
320
321 common_init_finish(g_ceph_context);
322
323 bufferlist monmapbl, osdmapbl;
324 std::string error;
325 MonMap monmap;
326
327 // load or generate monmap
3efd9988
FG
328 const auto monmap_fn = g_conf->get_val<string>("monmap");
329 if (monmap_fn.length()) {
330 int err = monmapbl.read_file(monmap_fn.c_str(), &error);
7c673cae 331 if (err < 0) {
3efd9988 332 derr << argv[0] << ": error reading " << monmap_fn << ": " << error << dendl;
7c673cae
FG
333 exit(1);
334 }
335 try {
336 monmap.decode(monmapbl);
337
338 // always mark seed/mkfs monmap as epoch 0
339 monmap.set_epoch(0);
3efd9988
FG
340 } catch (const buffer::error& e) {
341 derr << argv[0] << ": error decoding monmap " << monmap_fn << ": " << e.what() << dendl;
7c673cae
FG
342 exit(1);
343 }
344 } else {
31f18b77
FG
345 ostringstream oss;
346 int err = monmap.build_initial(g_ceph_context, oss);
347 if (oss.tellp())
348 derr << oss.str() << dendl;
7c673cae 349 if (err < 0) {
31f18b77 350 derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl;
7c673cae
FG
351 }
352
353 // am i part of the initial quorum?
354 if (monmap.contains(g_conf->name.get_id())) {
355 // hmm, make sure the ip listed exists on the current host?
356 // maybe later.
357 } else if (!g_conf->public_addr.is_blank_ip()) {
358 entity_addr_t a = g_conf->public_addr;
359 if (a.get_port() == 0)
360 a.set_port(CEPH_MON_PORT);
361 if (monmap.contains(a)) {
362 string name;
363 monmap.get_addr_name(a, name);
364 monmap.rename(name, g_conf->name.get_id());
224ce89b
WB
365 dout(0) << argv[0] << ": renaming mon." << name << " " << a
366 << " to mon." << g_conf->name.get_id() << dendl;
7c673cae
FG
367 }
368 } else {
369 // is a local address listed without a name? if so, name myself.
370 list<entity_addr_t> ls;
371 monmap.list_addrs(ls);
372 entity_addr_t local;
373
374 if (have_local_addr(g_ceph_context, ls, &local)) {
375 string name;
376 monmap.get_addr_name(local, name);
377
378 if (name.compare(0, 7, "noname-") == 0) {
224ce89b
WB
379 dout(0) << argv[0] << ": mon." << name << " " << local
380 << " is local, renaming to mon." << g_conf->name.get_id() << dendl;
7c673cae
FG
381 monmap.rename(name, g_conf->name.get_id());
382 } else {
224ce89b
WB
383 dout(0) << argv[0] << ": mon." << name << " " << local
384 << " is local, but not 'noname-' + something; not assuming it's me" << dendl;
7c673cae
FG
385 }
386 }
387 }
388 }
389
3efd9988
FG
390 const auto fsid = g_conf->get_val<uuid_d>("fsid");
391 if (!fsid.is_zero()) {
392 monmap.fsid = fsid;
393 dout(0) << argv[0] << ": set fsid to " << fsid << dendl;
7c673cae
FG
394 }
395
396 if (monmap.fsid.is_zero()) {
31f18b77 397 derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl;
7c673cae
FG
398 exit(10);
399 }
400
401 //monmap.print(cout);
402
403 // osdmap
404 if (osdmapfn.length()) {
405 err = osdmapbl.read_file(osdmapfn.c_str(), &error);
406 if (err < 0) {
31f18b77
FG
407 derr << argv[0] << ": error reading " << osdmapfn << ": "
408 << error << dendl;
7c673cae
FG
409 exit(1);
410 }
411 }
412
413 // go
414 MonitorDBStore store(g_conf->mon_data);
31f18b77
FG
415 ostringstream oss;
416 int r = store.create_and_open(oss);
417 if (oss.tellp())
418 derr << oss.str() << dendl;
7c673cae 419 if (r < 0) {
31f18b77
FG
420 derr << argv[0] << ": error opening mon data directory at '"
421 << g_conf->mon_data << "': " << cpp_strerror(r) << dendl;
7c673cae
FG
422 exit(1);
423 }
424 assert(r == 0);
425
426 Monitor mon(g_ceph_context, g_conf->name.get_id(), &store, 0, 0, &monmap);
427 r = mon.mkfs(osdmapbl);
428 if (r < 0) {
31f18b77 429 derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl;
7c673cae
FG
430 exit(1);
431 }
432 store.close();
224ce89b
WB
433 dout(0) << argv[0] << ": created monfs at " << g_conf->mon_data
434 << " for " << g_conf->name << dendl;
7c673cae
FG
435 return 0;
436 }
437
438 err = check_mon_data_exists();
439 if (err < 0 && err == -ENOENT) {
31f18b77
FG
440 derr << "monitor data directory at '" << g_conf->mon_data << "'"
441 << " does not exist: have you run 'mkfs'?" << dendl;
7c673cae
FG
442 exit(1);
443 } else if (err < 0) {
31f18b77
FG
444 derr << "error accessing monitor data directory at '"
445 << g_conf->mon_data << "': " << cpp_strerror(-err) << dendl;
7c673cae
FG
446 exit(1);
447 }
448
449 err = check_mon_data_empty();
450 if (err == 0) {
451 derr << "monitor data directory at '" << g_conf->mon_data
452 << "' is empty: have you run 'mkfs'?" << dendl;
453 exit(1);
454 } else if (err < 0 && err != -ENOTEMPTY) {
455 // we don't want an empty data dir by now
31f18b77
FG
456 derr << "error accessing '" << g_conf->mon_data << "': "
457 << cpp_strerror(-err) << dendl;
7c673cae
FG
458 exit(1);
459 }
460
461 {
462 // check fs stats. don't start if it's critically close to full.
463 ceph_data_stats_t stats;
464 int err = get_fs_stats(stats, g_conf->mon_data.c_str());
465 if (err < 0) {
31f18b77
FG
466 derr << "error checking monitor data's fs stats: " << cpp_strerror(err)
467 << dendl;
7c673cae
FG
468 exit(-err);
469 }
470 if (stats.avail_percent <= g_conf->mon_data_avail_crit) {
31f18b77 471 derr << "error: monitor data filesystem reached concerning levels of"
7c673cae 472 << " available storage space (available: "
1adf2230 473 << stats.avail_percent << "% " << byte_u_t(stats.byte_avail)
7c673cae
FG
474 << ")\nyou may adjust 'mon data avail crit' to a lower value"
475 << " to make this go away (default: " << g_conf->mon_data_avail_crit
31f18b77 476 << "%)\n" << dendl;
7c673cae
FG
477 exit(ENOSPC);
478 }
479 }
480
481 // we fork early to prevent leveldb's environment static state from
482 // screwing us over
483 Preforker prefork;
484 if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) {
485 if (global_init_prefork(g_ceph_context) >= 0) {
486 string err_msg;
487 err = prefork.prefork(err_msg);
488 if (err < 0) {
31f18b77 489 derr << err_msg << dendl;
7c673cae
FG
490 prefork.exit(err);
491 }
492 if (prefork.is_parent()) {
493 err = prefork.parent_wait(err_msg);
494 if (err < 0)
31f18b77 495 derr << err_msg << dendl;
7c673cae
FG
496 prefork.exit(err);
497 }
224ce89b 498 setsid();
7c673cae
FG
499 global_init_postfork_start(g_ceph_context);
500 }
501 common_init_finish(g_ceph_context);
502 global_init_chdir(g_ceph_context);
7c673cae
FG
503 if (global_init_preload_erasure_code(g_ceph_context) < 0)
504 prefork.exit(1);
7c673cae
FG
505 }
506
507 MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data);
31f18b77
FG
508 {
509 ostringstream oss;
510 err = store->open(oss);
511 if (oss.tellp())
512 derr << oss.str() << dendl;
513 if (err < 0) {
514 derr << "error opening mon data directory at '"
515 << g_conf->mon_data << "': " << cpp_strerror(err) << dendl;
516 prefork.exit(1);
517 }
7c673cae
FG
518 }
519
520 bufferlist magicbl;
521 err = store->get(Monitor::MONITOR_NAME, "magic", magicbl);
522 if (err || !magicbl.length()) {
523 derr << "unable to read magic from mon data" << dendl;
524 prefork.exit(1);
525 }
526 string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n
527 if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) {
528 derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl;
529 prefork.exit(1);
530 }
531
532 err = Monitor::check_features(store);
533 if (err < 0) {
534 derr << "error checking features: " << cpp_strerror(err) << dendl;
535 prefork.exit(1);
536 }
537
538 // inject new monmap?
539 if (!inject_monmap.empty()) {
540 bufferlist bl;
541 std::string error;
542 int r = bl.read_file(inject_monmap.c_str(), &error);
543 if (r) {
544 derr << "unable to read monmap from " << inject_monmap << ": "
545 << error << dendl;
546 prefork.exit(1);
547 }
548
549 // get next version
550 version_t v = store->get("monmap", "last_committed");
551 dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1)
552 << dendl;
553 v++;
554
555 // set the version
556 MonMap tmp;
557 tmp.decode(bl);
558 if (tmp.get_epoch() != v) {
559 dout(0) << "changing monmap epoch from " << tmp.get_epoch()
560 << " to " << v << dendl;
561 tmp.set_epoch(v);
562 }
563 bufferlist mapbl;
564 tmp.encode(mapbl, CEPH_FEATURES_ALL);
565 bufferlist final;
566 ::encode(v, final);
567 ::encode(mapbl, final);
568
569 auto t(std::make_shared<MonitorDBStore::Transaction>());
570 // save it
571 t->put("monmap", v, mapbl);
572 t->put("monmap", "latest", final);
573 t->put("monmap", "last_committed", v);
574 store->apply_transaction(t);
575
576 dout(0) << "done." << dendl;
577 prefork.exit(0);
578 }
579
580 // monmap?
581 MonMap monmap;
582 {
583 // note that even if we don't find a viable monmap, we should go ahead
584 // and try to build it up in the next if-else block.
585 bufferlist mapbl;
586 int err = obtain_monmap(*store, mapbl);
587 if (err >= 0) {
588 try {
589 monmap.decode(mapbl);
590 } catch (const buffer::error& e) {
31f18b77 591 derr << "can't decode monmap: " << e.what() << dendl;
7c673cae
FG
592 }
593 } else {
594 derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl;
595 }
596 if (!extract_monmap.empty()) {
597 int r = mapbl.write_file(extract_monmap.c_str());
598 if (r < 0) {
599 r = -errno;
600 derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl;
601 prefork.exit(1);
602 }
603 derr << "wrote monmap to " << extract_monmap << dendl;
604 prefork.exit(0);
605 }
606 }
607
608 // this is what i will bind to
609 entity_addr_t ipaddr;
610
611 if (monmap.contains(g_conf->name.get_id())) {
612 ipaddr = monmap.get_addr(g_conf->name.get_id());
613
614 // print helpful warning if the conf file doesn't match
615 entity_addr_t conf_addr;
616 std::vector <std::string> my_sections;
617 g_conf->get_my_sections(my_sections);
618 std::string mon_addr_str;
619 if (g_conf->get_val_from_conf_file(my_sections, "mon addr",
620 mon_addr_str, true) == 0) {
621 if (conf_addr.parse(mon_addr_str.c_str()) && (ipaddr != conf_addr)) {
622 derr << "WARNING: 'mon addr' config option " << conf_addr
623 << " does not match monmap file" << std::endl
624 << " continuing with monmap configuration" << dendl;
625 }
626 }
627 } else {
628 dout(0) << g_conf->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl;
629
630 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
631 if (!g_conf->public_addr.is_blank_ip()) {
632 ipaddr = g_conf->public_addr;
633 if (ipaddr.get_port() == 0)
634 ipaddr.set_port(CEPH_MON_PORT);
635 dout(0) << "using public_addr " << g_conf->public_addr << " -> "
636 << ipaddr << dendl;
637 } else {
638 MonMap tmpmap;
31f18b77
FG
639 ostringstream oss;
640 int err = tmpmap.build_initial(g_ceph_context, oss);
641 if (oss.tellp())
642 derr << oss.str() << dendl;
7c673cae
FG
643 if (err < 0) {
644 derr << argv[0] << ": error generating initial monmap: "
645 << cpp_strerror(err) << dendl;
646 usage();
647 prefork.exit(1);
648 }
649 if (tmpmap.contains(g_conf->name.get_id())) {
650 ipaddr = tmpmap.get_addr(g_conf->name.get_id());
651 } else {
652 derr << "no public_addr or public_network specified, and " << g_conf->name
653 << " not present in monmap or ceph.conf" << dendl;
654 prefork.exit(1);
655 }
656 }
657 }
658
659 // bind
660 int rank = monmap.get_rank(g_conf->name.get_id());
661 std::string public_msgr_type = g_conf->ms_public_type.empty() ? g_conf->get_val<std::string>("ms_type") : g_conf->ms_public_type;
662 Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type,
663 entity_name_t::MON(rank), "mon",
664 0, Messenger::HAS_MANY_CONNECTIONS);
665 if (!msgr)
666 exit(1);
667 msgr->set_cluster_protocol(CEPH_MON_PROTOCOL);
668 msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH);
669
670 msgr->set_default_policy(Messenger::Policy::stateless_server(0));
671 msgr->set_policy(entity_name_t::TYPE_MON,
672 Messenger::Policy::lossless_peer_reuse(
673 CEPH_FEATURE_UID |
674 CEPH_FEATURE_PGID64 |
675 CEPH_FEATURE_MON_SINGLE_PAXOS));
676 msgr->set_policy(entity_name_t::TYPE_OSD,
677 Messenger::Policy::stateless_server(
678 CEPH_FEATURE_PGID64 |
679 CEPH_FEATURE_OSDENC));
680 msgr->set_policy(entity_name_t::TYPE_CLIENT,
681 Messenger::Policy::stateless_server(0));
682 msgr->set_policy(entity_name_t::TYPE_MDS,
683 Messenger::Policy::stateless_server(0));
684
685 // throttle client traffic
686 Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes",
687 g_conf->mon_client_bytes);
688 msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
689 client_throttler, NULL);
690
691 // throttle daemon traffic
692 // NOTE: actual usage on the leader may multiply by the number of
693 // monitors if they forward large update messages from daemons.
694 Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes",
695 g_conf->mon_daemon_bytes);
696 msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler,
697 NULL);
698 msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler,
699 NULL);
700
224ce89b
WB
701 entity_addr_t bind_addr = ipaddr;
702 entity_addr_t public_addr = ipaddr;
703
704 // check if the public_bind_addr option is set
705 if (!g_conf->public_bind_addr.is_blank_ip()) {
706 bind_addr = g_conf->public_bind_addr;
707
708 // set the default port if not already set
709 if (bind_addr.get_port() == 0) {
710 bind_addr.set_port(CEPH_MON_PORT);
711 }
712 }
713
7c673cae 714 dout(0) << "starting " << g_conf->name << " rank " << rank
224ce89b
WB
715 << " at public addr " << public_addr
716 << " at bind addr " << bind_addr
7c673cae
FG
717 << " mon_data " << g_conf->mon_data
718 << " fsid " << monmap.get_fsid()
719 << dendl;
720
224ce89b 721 err = msgr->bind(bind_addr);
7c673cae 722 if (err < 0) {
224ce89b 723 derr << "unable to bind monitor to " << bind_addr << dendl;
7c673cae
FG
724 prefork.exit(1);
725 }
726
224ce89b
WB
727 // if the public and bind addr are different set the msgr addr
728 // to the public one, now that the bind is complete.
729 if (public_addr != bind_addr) {
730 msgr->set_addr(public_addr);
731 }
732
7c673cae
FG
733 Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type,
734 entity_name_t::MON(rank), "mon-mgrc",
735 getpid(), 0);
736 if (!mgr_msgr) {
737 derr << "unable to create mgr_msgr" << dendl;
738 prefork.exit(1);
739 }
740
224ce89b 741 dout(0) << "starting " << g_conf->name << " rank " << rank
7c673cae
FG
742 << " at " << ipaddr
743 << " mon_data " << g_conf->mon_data
744 << " fsid " << monmap.get_fsid()
224ce89b 745 << dendl;
7c673cae
FG
746
747 // start monitor
748 mon = new Monitor(g_ceph_context, g_conf->name.get_id(), store,
749 msgr, mgr_msgr, &monmap);
750
751 if (force_sync) {
752 derr << "flagging a forced sync ..." << dendl;
31f18b77
FG
753 ostringstream oss;
754 mon->sync_force(NULL, oss);
755 if (oss.tellp())
756 derr << oss.str() << dendl;
7c673cae
FG
757 }
758
759 err = mon->preinit();
760 if (err < 0) {
761 derr << "failed to initialize" << dendl;
762 prefork.exit(1);
763 }
764
765 if (compact || g_conf->mon_compact_on_start) {
766 derr << "compacting monitor store ..." << dendl;
767 mon->store->compact();
768 derr << "done compacting" << dendl;
769 }
770
771 if (g_conf->daemonize) {
772 global_init_postfork_finish(g_ceph_context);
773 prefork.daemonize();
774 }
775
776 msgr->start();
777 mgr_msgr->start();
778
779 mon->init();
780
781 // set up signal handlers, now that we've daemonized/forked.
782 init_async_signal_handler();
783 register_async_signal_handler(SIGHUP, sighup_handler);
784 register_async_signal_handler_oneshot(SIGINT, handle_mon_signal);
785 register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal);
786
787 if (g_conf->inject_early_sigterm)
788 kill(getpid(), SIGTERM);
789
790 msgr->wait();
791 mgr_msgr->wait();
792
793 store->close();
794
795 unregister_async_signal_handler(SIGHUP, sighup_handler);
796 unregister_async_signal_handler(SIGINT, handle_mon_signal);
797 unregister_async_signal_handler(SIGTERM, handle_mon_signal);
798 shutdown_async_signal_handler();
799
800 delete mon;
801 delete store;
802 delete msgr;
803 delete mgr_msgr;
804 delete client_throttler;
805 delete daemon_throttler;
806
807 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
808 char s[20];
809 snprintf(s, sizeof(s), "gmon/%d", getpid());
810 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
811 dout(0) << "ceph-mon: gmon.out should be in " << s << dendl;
812 }
813
814 prefork.signal_exit(0);
815 return 0;
816}