]> git.proxmox.com Git - ceph.git/blob - ceph/src/ceph_mon.cc
bump version to 12.0.3-pve3
[ceph.git] / ceph / src / ceph_mon.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18
19 #include <iostream>
20 #include <string>
21 using namespace std;
22
23 #include "common/config.h"
24 #include "include/ceph_features.h"
25
26 #include "mon/MonMap.h"
27 #include "mon/Monitor.h"
28 #include "mon/MonitorDBStore.h"
29 #include "mon/MonClient.h"
30
31 #include "msg/Messenger.h"
32
33 #include "include/CompatSet.h"
34
35 #include "common/ceph_argparse.h"
36 #include "common/pick_address.h"
37 #include "common/Timer.h"
38 #include "common/errno.h"
39 #include "common/Preforker.h"
40
41 #include "global/global_init.h"
42 #include "global/signal_handler.h"
43
44 #include "perfglue/heap_profiler.h"
45
46 #include "include/assert.h"
47
48 #define dout_subsys ceph_subsys_mon
49
50 Monitor *mon = NULL;
51
52 void handle_mon_signal(int signum)
53 {
54 if (mon)
55 mon->handle_signal(signum);
56 }
57
58
59 int obtain_monmap(MonitorDBStore &store, bufferlist &bl)
60 {
61 dout(10) << __func__ << dendl;
62 /*
63 * the monmap may be in one of three places:
64 * 'monmap:<latest_version_no>' - the monmap we'd really like to have
65 * 'mon_sync:latest_monmap' - last monmap backed up for the last sync
66 * 'mkfs:monmap' - a monmap resulting from mkfs
67 */
68
69 if (store.exists("monmap", "last_committed")) {
70 version_t latest_ver = store.get("monmap", "last_committed");
71 if (store.exists("monmap", latest_ver)) {
72 int err = store.get("monmap", latest_ver, bl);
73 assert(err == 0);
74 assert(bl.length() > 0);
75 dout(10) << __func__ << " read last committed monmap ver "
76 << latest_ver << dendl;
77 return 0;
78 }
79 }
80
81 if (store.exists("mon_sync", "in_sync")
82 || store.exists("mon_sync", "force_sync")) {
83 dout(10) << __func__ << " detected aborted sync" << dendl;
84 if (store.exists("mon_sync", "latest_monmap")) {
85 int err = store.get("mon_sync", "latest_monmap", bl);
86 assert(err == 0);
87 assert(bl.length() > 0);
88 dout(10) << __func__ << " read backup monmap" << dendl;
89 return 0;
90 }
91 }
92
93 if (store.exists("mkfs", "monmap")) {
94 dout(10) << __func__ << " found mkfs monmap" << dendl;
95 int err = store.get("mkfs", "monmap", bl);
96 assert(err == 0);
97 assert(bl.length() > 0);
98 return 0;
99 }
100
101 derr << __func__ << " unable to find a monmap" << dendl;
102 return -ENOENT;
103 }
104
105 int check_mon_data_exists()
106 {
107 string mon_data = g_conf->mon_data;
108 struct stat buf;
109 if (::stat(mon_data.c_str(), &buf)) {
110 if (errno != ENOENT) {
111 cerr << "stat(" << mon_data << ") " << cpp_strerror(errno) << std::endl;
112 }
113 return -errno;
114 }
115 return 0;
116 }
117
118 /** Check whether **mon data** is empty.
119 *
120 * Being empty means mkfs has not been run and there's no monitor setup
121 * at **g_conf->mon_data**.
122 *
123 * If the directory g_conf->mon_data is not empty we will return -ENOTEMPTY.
124 * Otherwise we will return 0. Any other negative returns will represent
125 * a failure to be handled by the caller.
126 *
127 * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise.
128 */
129 int check_mon_data_empty()
130 {
131 string mon_data = g_conf->mon_data;
132
133 DIR *dir = ::opendir(mon_data.c_str());
134 if (!dir) {
135 cerr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << std::endl;
136 return -errno;
137 }
138 int code = 0;
139 struct dirent *de = nullptr;
140 errno = 0;
141 while ((de = ::readdir(dir))) {
142 if (string(".") != de->d_name &&
143 string("..") != de->d_name &&
144 string("kv_backend") != de->d_name) {
145 code = -ENOTEMPTY;
146 break;
147 }
148 }
149 if (!de && errno) {
150 cerr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << std::endl;
151 code = -errno;
152 }
153
154 ::closedir(dir);
155
156 return code;
157 }
158
159 static void usage()
160 {
161 cerr << "usage: ceph-mon -i monid [flags]" << std::endl;
162 cerr << " --debug_mon n\n";
163 cerr << " debug monitor level (e.g. 10)\n";
164 cerr << " --mkfs\n";
165 cerr << " build fresh monitor fs\n";
166 cerr << " --force-sync\n";
167 cerr << " force a sync from another mon by wiping local data (BE CAREFUL)\n";
168 cerr << " --yes-i-really-mean-it\n";
169 cerr << " mandatory safeguard for --force-sync\n";
170 cerr << " --compact\n";
171 cerr << " compact the monitor store\n";
172 cerr << " --osdmap <filename>\n";
173 cerr << " only used when --mkfs is provided: load the osdmap from <filename>\n";
174 cerr << " --inject-monmap <filename>\n";
175 cerr << " write the <filename> monmap to the local monitor store and exit\n";
176 cerr << " --extract-monmap <filename>\n";
177 cerr << " extract the monmap from the local monitor store and exit\n";
178 cerr << " --mon-data <directory>\n";
179 cerr << " where the mon store and keyring are located\n";
180 generic_server_usage();
181 }
182
183 #ifdef BUILDING_FOR_EMBEDDED
184 void cephd_preload_embedded_plugins();
185 extern "C" int cephd_mon(int argc, const char **argv)
186 #else
187 int main(int argc, const char **argv)
188 #endif
189 {
190 int err;
191
192 bool mkfs = false;
193 bool compact = false;
194 bool force_sync = false;
195 bool yes_really = false;
196 std::string osdmapfn, inject_monmap, extract_monmap;
197
198 vector<const char*> args;
199 argv_to_vec(argc, argv, args);
200 env_to_vec(args);
201
202 // We need to specify some default values that may be overridden by the
203 // user, that are specific to the monitor. The options we are overriding
204 // are also used on the OSD (or in any other component that uses leveldb),
205 // so changing them directly in common/config_opts.h is not an option.
206 // This is not the prettiest way of doing this, especially since it has us
207 // having a different place than common/config_opts.h defining default
208 // values, but it's not horribly wrong enough to prevent us from doing it :)
209 //
210 // NOTE: user-defined options will take precedence over ours.
211 //
212 // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB
213 // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB
214 // leveldb_block_size = 64*1024 = 65536 // 64KB
215 // leveldb_compression = false
216 // leveldb_log = ""
217 vector<const char*> def_args;
218 def_args.push_back("--leveldb-write-buffer-size=33554432");
219 def_args.push_back("--leveldb-cache-size=536870912");
220 def_args.push_back("--leveldb-block-size=65536");
221 def_args.push_back("--leveldb-compression=false");
222 def_args.push_back("--leveldb-log=");
223
224 int flags = 0;
225 {
226 vector<const char*> args_copy = args;
227 std::string val;
228 for (std::vector<const char*>::iterator i = args_copy.begin();
229 i != args_copy.end(); ) {
230 if (ceph_argparse_double_dash(args_copy, i)) {
231 break;
232 } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) {
233 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
234 } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) {
235 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
236 } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) {
237 flags |= CINIT_FLAG_NO_DAEMON_ACTIONS;
238 } else {
239 ++i;
240 }
241 }
242 }
243
244 auto cct = global_init(&def_args, args,
245 CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON,
246 flags, "mon_data");
247 ceph_heap_profiler_init();
248
249 uuid_d fsid;
250 std::string val;
251 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
252 if (ceph_argparse_double_dash(args, i)) {
253 break;
254 } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) {
255 usage();
256 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
257 mkfs = true;
258 } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) {
259 compact = true;
260 } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) {
261 force_sync = true;
262 } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) {
263 yes_really = true;
264 } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) {
265 osdmapfn = val;
266 } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) {
267 inject_monmap = val;
268 } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) {
269 extract_monmap = val;
270 } else {
271 ++i;
272 }
273 }
274 if (!args.empty()) {
275 cerr << "too many arguments: " << args << std::endl;
276 usage();
277 }
278
279 if (force_sync && !yes_really) {
280 cerr << "are you SURE you want to force a sync? this will erase local data and may\n"
281 << "break your mon cluster. pass --yes-i-really-mean-it if you do." << std::endl;
282 exit(1);
283 }
284
285 if (g_conf->mon_data.empty()) {
286 cerr << "must specify '--mon-data=foo' data path" << std::endl;
287 usage();
288 }
289
290 if (g_conf->name.get_id().empty()) {
291 cerr << "must specify id (--id <id> or --name mon.<id>)" << std::endl;
292 usage();
293 }
294
295 // -- mkfs --
296 if (mkfs) {
297
298 int err = check_mon_data_exists();
299 if (err == -ENOENT) {
300 if (::mkdir(g_conf->mon_data.c_str(), 0755)) {
301 cerr << "mkdir(" << g_conf->mon_data << ") : "
302 << cpp_strerror(errno) << std::endl;
303 exit(1);
304 }
305 } else if (err < 0) {
306 cerr << "error opening '" << g_conf->mon_data << "': "
307 << cpp_strerror(-err) << std::endl;
308 exit(-err);
309 }
310
311 err = check_mon_data_empty();
312 if (err == -ENOTEMPTY) {
313 // Mon may exist. Let the user know and exit gracefully.
314 cerr << "'" << g_conf->mon_data << "' already exists and is not empty"
315 << ": monitor may already exist" << std::endl;
316 exit(0);
317 } else if (err < 0) {
318 cerr << "error checking if '" << g_conf->mon_data << "' is empty: "
319 << cpp_strerror(-err) << std::endl;
320 exit(-err);
321 }
322
323 // resolve public_network -> public_addr
324 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
325
326 common_init_finish(g_ceph_context);
327
328 bufferlist monmapbl, osdmapbl;
329 std::string error;
330 MonMap monmap;
331
332 // load or generate monmap
333 if (g_conf->monmap.length()) {
334 int err = monmapbl.read_file(g_conf->monmap.c_str(), &error);
335 if (err < 0) {
336 cerr << argv[0] << ": error reading " << g_conf->monmap << ": " << error << std::endl;
337 exit(1);
338 }
339 try {
340 monmap.decode(monmapbl);
341
342 // always mark seed/mkfs monmap as epoch 0
343 monmap.set_epoch(0);
344 }
345 catch (const buffer::error& e) {
346 cerr << argv[0] << ": error decoding monmap " << g_conf->monmap << ": " << e.what() << std::endl;
347 exit(1);
348 }
349 } else {
350 int err = monmap.build_initial(g_ceph_context, cerr);
351 if (err < 0) {
352 cerr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << std::endl;
353 }
354
355 // am i part of the initial quorum?
356 if (monmap.contains(g_conf->name.get_id())) {
357 // hmm, make sure the ip listed exists on the current host?
358 // maybe later.
359 } else if (!g_conf->public_addr.is_blank_ip()) {
360 entity_addr_t a = g_conf->public_addr;
361 if (a.get_port() == 0)
362 a.set_port(CEPH_MON_PORT);
363 if (monmap.contains(a)) {
364 string name;
365 monmap.get_addr_name(a, name);
366 monmap.rename(name, g_conf->name.get_id());
367 cout << argv[0] << ": renaming mon." << name << " " << a
368 << " to mon." << g_conf->name.get_id() << std::endl;
369 }
370 } else {
371 // is a local address listed without a name? if so, name myself.
372 list<entity_addr_t> ls;
373 monmap.list_addrs(ls);
374 entity_addr_t local;
375
376 if (have_local_addr(g_ceph_context, ls, &local)) {
377 string name;
378 monmap.get_addr_name(local, name);
379
380 if (name.compare(0, 7, "noname-") == 0) {
381 cout << argv[0] << ": mon." << name << " " << local
382 << " is local, renaming to mon." << g_conf->name.get_id() << std::endl;
383 monmap.rename(name, g_conf->name.get_id());
384 } else {
385 cout << argv[0] << ": mon." << name << " " << local
386 << " is local, but not 'noname-' + something; not assuming it's me" << std::endl;
387 }
388 }
389 }
390 }
391
392 if (!g_conf->fsid.is_zero()) {
393 monmap.fsid = g_conf->fsid;
394 cout << argv[0] << ": set fsid to " << g_conf->fsid << std::endl;
395 }
396
397 if (monmap.fsid.is_zero()) {
398 cerr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << std::endl;
399 exit(10);
400 }
401
402 //monmap.print(cout);
403
404 // osdmap
405 if (osdmapfn.length()) {
406 err = osdmapbl.read_file(osdmapfn.c_str(), &error);
407 if (err < 0) {
408 cerr << argv[0] << ": error reading " << osdmapfn << ": "
409 << error << std::endl;
410 exit(1);
411 }
412 }
413
414 // go
415 MonitorDBStore store(g_conf->mon_data);
416 int r = store.create_and_open(cerr);
417 if (r < 0) {
418 cerr << argv[0] << ": error opening mon data directory at '"
419 << g_conf->mon_data << "': " << cpp_strerror(r) << std::endl;
420 exit(1);
421 }
422 assert(r == 0);
423
424 Monitor mon(g_ceph_context, g_conf->name.get_id(), &store, 0, 0, &monmap);
425 r = mon.mkfs(osdmapbl);
426 if (r < 0) {
427 cerr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << std::endl;
428 exit(1);
429 }
430 store.close();
431 cout << argv[0] << ": created monfs at " << g_conf->mon_data
432 << " for " << g_conf->name << std::endl;
433 return 0;
434 }
435
436 err = check_mon_data_exists();
437 if (err < 0 && err == -ENOENT) {
438 cerr << "monitor data directory at '" << g_conf->mon_data << "'"
439 << " does not exist: have you run 'mkfs'?" << std::endl;
440 exit(1);
441 } else if (err < 0) {
442 cerr << "error accessing monitor data directory at '"
443 << g_conf->mon_data << "': " << cpp_strerror(-err) << std::endl;
444 exit(1);
445 }
446
447 err = check_mon_data_empty();
448 if (err == 0) {
449 derr << "monitor data directory at '" << g_conf->mon_data
450 << "' is empty: have you run 'mkfs'?" << dendl;
451 exit(1);
452 } else if (err < 0 && err != -ENOTEMPTY) {
453 // we don't want an empty data dir by now
454 cerr << "error accessing '" << g_conf->mon_data << "': "
455 << cpp_strerror(-err) << std::endl;
456 exit(1);
457 }
458
459 {
460 // check fs stats. don't start if it's critically close to full.
461 ceph_data_stats_t stats;
462 int err = get_fs_stats(stats, g_conf->mon_data.c_str());
463 if (err < 0) {
464 cerr << "error checking monitor data's fs stats: " << cpp_strerror(err)
465 << std::endl;
466 exit(-err);
467 }
468 if (stats.avail_percent <= g_conf->mon_data_avail_crit) {
469 cerr << "error: monitor data filesystem reached concerning levels of"
470 << " available storage space (available: "
471 << stats.avail_percent << "% " << prettybyte_t(stats.byte_avail)
472 << ")\nyou may adjust 'mon data avail crit' to a lower value"
473 << " to make this go away (default: " << g_conf->mon_data_avail_crit
474 << "%)\n" << std::endl;
475 exit(ENOSPC);
476 }
477 }
478
479 // we fork early to prevent leveldb's environment static state from
480 // screwing us over
481 Preforker prefork;
482 if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) {
483 if (global_init_prefork(g_ceph_context) >= 0) {
484 string err_msg;
485 err = prefork.prefork(err_msg);
486 if (err < 0) {
487 cerr << err_msg << std::endl;
488 prefork.exit(err);
489 }
490 if (prefork.is_parent()) {
491 err = prefork.parent_wait(err_msg);
492 if (err < 0)
493 cerr << err_msg << std::endl;
494 prefork.exit(err);
495 }
496 global_init_postfork_start(g_ceph_context);
497 }
498 common_init_finish(g_ceph_context);
499 global_init_chdir(g_ceph_context);
500 #ifndef BUILDING_FOR_EMBEDDED
501 if (global_init_preload_erasure_code(g_ceph_context) < 0)
502 prefork.exit(1);
503 #else
504 cephd_preload_embedded_plugins();
505 #endif
506 }
507
508 MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data);
509 err = store->open(std::cerr);
510 if (err < 0) {
511 derr << "error opening mon data directory at '"
512 << g_conf->mon_data << "': " << cpp_strerror(err) << dendl;
513 prefork.exit(1);
514 }
515
516 bufferlist magicbl;
517 err = store->get(Monitor::MONITOR_NAME, "magic", magicbl);
518 if (err || !magicbl.length()) {
519 derr << "unable to read magic from mon data" << dendl;
520 prefork.exit(1);
521 }
522 string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n
523 if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) {
524 derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl;
525 prefork.exit(1);
526 }
527
528 err = Monitor::check_features(store);
529 if (err < 0) {
530 derr << "error checking features: " << cpp_strerror(err) << dendl;
531 prefork.exit(1);
532 }
533
534 // inject new monmap?
535 if (!inject_monmap.empty()) {
536 bufferlist bl;
537 std::string error;
538 int r = bl.read_file(inject_monmap.c_str(), &error);
539 if (r) {
540 derr << "unable to read monmap from " << inject_monmap << ": "
541 << error << dendl;
542 prefork.exit(1);
543 }
544
545 // get next version
546 version_t v = store->get("monmap", "last_committed");
547 dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1)
548 << dendl;
549 v++;
550
551 // set the version
552 MonMap tmp;
553 tmp.decode(bl);
554 if (tmp.get_epoch() != v) {
555 dout(0) << "changing monmap epoch from " << tmp.get_epoch()
556 << " to " << v << dendl;
557 tmp.set_epoch(v);
558 }
559 bufferlist mapbl;
560 tmp.encode(mapbl, CEPH_FEATURES_ALL);
561 bufferlist final;
562 ::encode(v, final);
563 ::encode(mapbl, final);
564
565 auto t(std::make_shared<MonitorDBStore::Transaction>());
566 // save it
567 t->put("monmap", v, mapbl);
568 t->put("monmap", "latest", final);
569 t->put("monmap", "last_committed", v);
570 store->apply_transaction(t);
571
572 dout(0) << "done." << dendl;
573 prefork.exit(0);
574 }
575
576 // monmap?
577 MonMap monmap;
578 {
579 // note that even if we don't find a viable monmap, we should go ahead
580 // and try to build it up in the next if-else block.
581 bufferlist mapbl;
582 int err = obtain_monmap(*store, mapbl);
583 if (err >= 0) {
584 try {
585 monmap.decode(mapbl);
586 } catch (const buffer::error& e) {
587 cerr << "can't decode monmap: " << e.what() << std::endl;
588 }
589 } else {
590 derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl;
591 }
592 if (!extract_monmap.empty()) {
593 int r = mapbl.write_file(extract_monmap.c_str());
594 if (r < 0) {
595 r = -errno;
596 derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl;
597 prefork.exit(1);
598 }
599 derr << "wrote monmap to " << extract_monmap << dendl;
600 prefork.exit(0);
601 }
602 }
603
604 // this is what i will bind to
605 entity_addr_t ipaddr;
606
607 if (monmap.contains(g_conf->name.get_id())) {
608 ipaddr = monmap.get_addr(g_conf->name.get_id());
609
610 // print helpful warning if the conf file doesn't match
611 entity_addr_t conf_addr;
612 std::vector <std::string> my_sections;
613 g_conf->get_my_sections(my_sections);
614 std::string mon_addr_str;
615 if (g_conf->get_val_from_conf_file(my_sections, "mon addr",
616 mon_addr_str, true) == 0) {
617 if (conf_addr.parse(mon_addr_str.c_str()) && (ipaddr != conf_addr)) {
618 derr << "WARNING: 'mon addr' config option " << conf_addr
619 << " does not match monmap file" << std::endl
620 << " continuing with monmap configuration" << dendl;
621 }
622 }
623 } else {
624 dout(0) << g_conf->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl;
625
626 pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC);
627 if (!g_conf->public_addr.is_blank_ip()) {
628 ipaddr = g_conf->public_addr;
629 if (ipaddr.get_port() == 0)
630 ipaddr.set_port(CEPH_MON_PORT);
631 dout(0) << "using public_addr " << g_conf->public_addr << " -> "
632 << ipaddr << dendl;
633 } else {
634 MonMap tmpmap;
635 int err = tmpmap.build_initial(g_ceph_context, cerr);
636 if (err < 0) {
637 derr << argv[0] << ": error generating initial monmap: "
638 << cpp_strerror(err) << dendl;
639 usage();
640 prefork.exit(1);
641 }
642 if (tmpmap.contains(g_conf->name.get_id())) {
643 ipaddr = tmpmap.get_addr(g_conf->name.get_id());
644 } else {
645 derr << "no public_addr or public_network specified, and " << g_conf->name
646 << " not present in monmap or ceph.conf" << dendl;
647 prefork.exit(1);
648 }
649 }
650 }
651
652 // bind
653 int rank = monmap.get_rank(g_conf->name.get_id());
654 std::string public_msgr_type = g_conf->ms_public_type.empty() ? g_conf->get_val<std::string>("ms_type") : g_conf->ms_public_type;
655 Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type,
656 entity_name_t::MON(rank), "mon",
657 0, Messenger::HAS_MANY_CONNECTIONS);
658 if (!msgr)
659 exit(1);
660 msgr->set_cluster_protocol(CEPH_MON_PROTOCOL);
661 msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH);
662
663 msgr->set_default_policy(Messenger::Policy::stateless_server(0));
664 msgr->set_policy(entity_name_t::TYPE_MON,
665 Messenger::Policy::lossless_peer_reuse(
666 CEPH_FEATURE_UID |
667 CEPH_FEATURE_PGID64 |
668 CEPH_FEATURE_MON_SINGLE_PAXOS));
669 msgr->set_policy(entity_name_t::TYPE_OSD,
670 Messenger::Policy::stateless_server(
671 CEPH_FEATURE_PGID64 |
672 CEPH_FEATURE_OSDENC));
673 msgr->set_policy(entity_name_t::TYPE_CLIENT,
674 Messenger::Policy::stateless_server(0));
675 msgr->set_policy(entity_name_t::TYPE_MDS,
676 Messenger::Policy::stateless_server(0));
677
678 // throttle client traffic
679 Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes",
680 g_conf->mon_client_bytes);
681 msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
682 client_throttler, NULL);
683
684 // throttle daemon traffic
685 // NOTE: actual usage on the leader may multiply by the number of
686 // monitors if they forward large update messages from daemons.
687 Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes",
688 g_conf->mon_daemon_bytes);
689 msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler,
690 NULL);
691 msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler,
692 NULL);
693
694 dout(0) << "starting " << g_conf->name << " rank " << rank
695 << " at " << ipaddr
696 << " mon_data " << g_conf->mon_data
697 << " fsid " << monmap.get_fsid()
698 << dendl;
699
700 err = msgr->bind(ipaddr);
701 if (err < 0) {
702 derr << "unable to bind monitor to " << ipaddr << dendl;
703 prefork.exit(1);
704 }
705
706 Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type,
707 entity_name_t::MON(rank), "mon-mgrc",
708 getpid(), 0);
709 if (!mgr_msgr) {
710 derr << "unable to create mgr_msgr" << dendl;
711 prefork.exit(1);
712 }
713
714 cout << "starting " << g_conf->name << " rank " << rank
715 << " at " << ipaddr
716 << " mon_data " << g_conf->mon_data
717 << " fsid " << monmap.get_fsid()
718 << std::endl;
719
720 // start monitor
721 mon = new Monitor(g_ceph_context, g_conf->name.get_id(), store,
722 msgr, mgr_msgr, &monmap);
723
724 if (force_sync) {
725 derr << "flagging a forced sync ..." << dendl;
726 mon->sync_force(NULL, cerr);
727 }
728
729 err = mon->preinit();
730 if (err < 0) {
731 derr << "failed to initialize" << dendl;
732 prefork.exit(1);
733 }
734
735 if (compact || g_conf->mon_compact_on_start) {
736 derr << "compacting monitor store ..." << dendl;
737 mon->store->compact();
738 derr << "done compacting" << dendl;
739 }
740
741 if (g_conf->daemonize) {
742 global_init_postfork_finish(g_ceph_context);
743 prefork.daemonize();
744 }
745
746 msgr->start();
747 mgr_msgr->start();
748
749 mon->init();
750
751 // set up signal handlers, now that we've daemonized/forked.
752 init_async_signal_handler();
753 register_async_signal_handler(SIGHUP, sighup_handler);
754 register_async_signal_handler_oneshot(SIGINT, handle_mon_signal);
755 register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal);
756
757 if (g_conf->inject_early_sigterm)
758 kill(getpid(), SIGTERM);
759
760 msgr->wait();
761 mgr_msgr->wait();
762
763 store->close();
764
765 unregister_async_signal_handler(SIGHUP, sighup_handler);
766 unregister_async_signal_handler(SIGINT, handle_mon_signal);
767 unregister_async_signal_handler(SIGTERM, handle_mon_signal);
768 shutdown_async_signal_handler();
769
770 delete mon;
771 delete store;
772 delete msgr;
773 delete mgr_msgr;
774 delete client_throttler;
775 delete daemon_throttler;
776
777 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
778 char s[20];
779 snprintf(s, sizeof(s), "gmon/%d", getpid());
780 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
781 dout(0) << "ceph-mon: gmon.out should be in " << s << dendl;
782 }
783
784 prefork.signal_exit(0);
785 return 0;
786 }
787