]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | ||
19 | #include <iostream> | |
20 | #include <string> | |
21 | using namespace std; | |
22 | ||
23 | #include "common/config.h" | |
24 | #include "include/ceph_features.h" | |
25 | ||
26 | #include "mon/MonMap.h" | |
27 | #include "mon/Monitor.h" | |
28 | #include "mon/MonitorDBStore.h" | |
29 | #include "mon/MonClient.h" | |
30 | ||
31 | #include "msg/Messenger.h" | |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | ||
35 | #include "common/ceph_argparse.h" | |
36 | #include "common/pick_address.h" | |
37 | #include "common/Timer.h" | |
38 | #include "common/errno.h" | |
39 | #include "common/Preforker.h" | |
40 | ||
41 | #include "global/global_init.h" | |
42 | #include "global/signal_handler.h" | |
43 | ||
44 | #include "perfglue/heap_profiler.h" | |
45 | ||
46 | #include "include/assert.h" | |
47 | ||
48 | #define dout_subsys ceph_subsys_mon | |
49 | ||
50 | Monitor *mon = NULL; | |
51 | ||
52 | void handle_mon_signal(int signum) | |
53 | { | |
54 | if (mon) | |
55 | mon->handle_signal(signum); | |
56 | } | |
57 | ||
58 | ||
59 | int obtain_monmap(MonitorDBStore &store, bufferlist &bl) | |
60 | { | |
61 | dout(10) << __func__ << dendl; | |
62 | /* | |
63 | * the monmap may be in one of three places: | |
64 | * 'monmap:<latest_version_no>' - the monmap we'd really like to have | |
65 | * 'mon_sync:latest_monmap' - last monmap backed up for the last sync | |
66 | * 'mkfs:monmap' - a monmap resulting from mkfs | |
67 | */ | |
68 | ||
69 | if (store.exists("monmap", "last_committed")) { | |
70 | version_t latest_ver = store.get("monmap", "last_committed"); | |
71 | if (store.exists("monmap", latest_ver)) { | |
72 | int err = store.get("monmap", latest_ver, bl); | |
73 | assert(err == 0); | |
74 | assert(bl.length() > 0); | |
75 | dout(10) << __func__ << " read last committed monmap ver " | |
76 | << latest_ver << dendl; | |
77 | return 0; | |
78 | } | |
79 | } | |
80 | ||
81 | if (store.exists("mon_sync", "in_sync") | |
82 | || store.exists("mon_sync", "force_sync")) { | |
83 | dout(10) << __func__ << " detected aborted sync" << dendl; | |
84 | if (store.exists("mon_sync", "latest_monmap")) { | |
85 | int err = store.get("mon_sync", "latest_monmap", bl); | |
86 | assert(err == 0); | |
87 | assert(bl.length() > 0); | |
88 | dout(10) << __func__ << " read backup monmap" << dendl; | |
89 | return 0; | |
90 | } | |
91 | } | |
92 | ||
93 | if (store.exists("mkfs", "monmap")) { | |
94 | dout(10) << __func__ << " found mkfs monmap" << dendl; | |
95 | int err = store.get("mkfs", "monmap", bl); | |
96 | assert(err == 0); | |
97 | assert(bl.length() > 0); | |
98 | return 0; | |
99 | } | |
100 | ||
101 | derr << __func__ << " unable to find a monmap" << dendl; | |
102 | return -ENOENT; | |
103 | } | |
104 | ||
105 | int check_mon_data_exists() | |
106 | { | |
107 | string mon_data = g_conf->mon_data; | |
108 | struct stat buf; | |
109 | if (::stat(mon_data.c_str(), &buf)) { | |
110 | if (errno != ENOENT) { | |
31f18b77 | 111 | derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
112 | } |
113 | return -errno; | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | /** Check whether **mon data** is empty. | |
119 | * | |
120 | * Being empty means mkfs has not been run and there's no monitor setup | |
121 | * at **g_conf->mon_data**. | |
122 | * | |
123 | * If the directory g_conf->mon_data is not empty we will return -ENOTEMPTY. | |
124 | * Otherwise we will return 0. Any other negative returns will represent | |
125 | * a failure to be handled by the caller. | |
126 | * | |
127 | * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise. | |
128 | */ | |
129 | int check_mon_data_empty() | |
130 | { | |
131 | string mon_data = g_conf->mon_data; | |
132 | ||
133 | DIR *dir = ::opendir(mon_data.c_str()); | |
134 | if (!dir) { | |
31f18b77 | 135 | derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
136 | return -errno; |
137 | } | |
138 | int code = 0; | |
139 | struct dirent *de = nullptr; | |
140 | errno = 0; | |
141 | while ((de = ::readdir(dir))) { | |
142 | if (string(".") != de->d_name && | |
143 | string("..") != de->d_name && | |
144 | string("kv_backend") != de->d_name) { | |
145 | code = -ENOTEMPTY; | |
146 | break; | |
147 | } | |
148 | } | |
149 | if (!de && errno) { | |
31f18b77 | 150 | derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
151 | code = -errno; |
152 | } | |
153 | ||
154 | ::closedir(dir); | |
155 | ||
156 | return code; | |
157 | } | |
158 | ||
159 | static void usage() | |
160 | { | |
31f18b77 FG |
161 | cout << "usage: ceph-mon -i <ID> [flags]\n" |
162 | << " --debug_mon n\n" | |
163 | << " debug monitor level (e.g. 10)\n" | |
164 | << " --mkfs\n" | |
165 | << " build fresh monitor fs\n" | |
166 | << " --force-sync\n" | |
167 | << " force a sync from another mon by wiping local data (BE CAREFUL)\n" | |
168 | << " --yes-i-really-mean-it\n" | |
169 | << " mandatory safeguard for --force-sync\n" | |
170 | << " --compact\n" | |
171 | << " compact the monitor store\n" | |
172 | << " --osdmap <filename>\n" | |
173 | << " only used when --mkfs is provided: load the osdmap from <filename>\n" | |
174 | << " --inject-monmap <filename>\n" | |
175 | << " write the <filename> monmap to the local monitor store and exit\n" | |
176 | << " --extract-monmap <filename>\n" | |
177 | << " extract the monmap from the local monitor store and exit\n" | |
178 | << " --mon-data <directory>\n" | |
179 | << " where the mon store and keyring are located\n" | |
180 | << std::endl; | |
7c673cae FG |
181 | generic_server_usage(); |
182 | } | |
183 | ||
184 | #ifdef BUILDING_FOR_EMBEDDED | |
185 | void cephd_preload_embedded_plugins(); | |
186 | extern "C" int cephd_mon(int argc, const char **argv) | |
187 | #else | |
188 | int main(int argc, const char **argv) | |
189 | #endif | |
190 | { | |
191 | int err; | |
192 | ||
193 | bool mkfs = false; | |
194 | bool compact = false; | |
195 | bool force_sync = false; | |
196 | bool yes_really = false; | |
197 | std::string osdmapfn, inject_monmap, extract_monmap; | |
198 | ||
199 | vector<const char*> args; | |
200 | argv_to_vec(argc, argv, args); | |
201 | env_to_vec(args); | |
202 | ||
203 | // We need to specify some default values that may be overridden by the | |
204 | // user, that are specific to the monitor. The options we are overriding | |
205 | // are also used on the OSD (or in any other component that uses leveldb), | |
206 | // so changing them directly in common/config_opts.h is not an option. | |
207 | // This is not the prettiest way of doing this, especially since it has us | |
208 | // having a different place than common/config_opts.h defining default | |
209 | // values, but it's not horribly wrong enough to prevent us from doing it :) | |
210 | // | |
211 | // NOTE: user-defined options will take precedence over ours. | |
212 | // | |
213 | // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB | |
214 | // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB | |
215 | // leveldb_block_size = 64*1024 = 65536 // 64KB | |
216 | // leveldb_compression = false | |
217 | // leveldb_log = "" | |
218 | vector<const char*> def_args; | |
219 | def_args.push_back("--leveldb-write-buffer-size=33554432"); | |
220 | def_args.push_back("--leveldb-cache-size=536870912"); | |
221 | def_args.push_back("--leveldb-block-size=65536"); | |
222 | def_args.push_back("--leveldb-compression=false"); | |
223 | def_args.push_back("--leveldb-log="); | |
224 | ||
225 | int flags = 0; | |
226 | { | |
227 | vector<const char*> args_copy = args; | |
228 | std::string val; | |
229 | for (std::vector<const char*>::iterator i = args_copy.begin(); | |
230 | i != args_copy.end(); ) { | |
231 | if (ceph_argparse_double_dash(args_copy, i)) { | |
232 | break; | |
233 | } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) { | |
234 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
235 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) { | |
236 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
237 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) { | |
238 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
239 | } else { | |
240 | ++i; | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
245 | auto cct = global_init(&def_args, args, | |
246 | CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, | |
247 | flags, "mon_data"); | |
248 | ceph_heap_profiler_init(); | |
249 | ||
250 | uuid_d fsid; | |
251 | std::string val; | |
252 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
253 | if (ceph_argparse_double_dash(args, i)) { | |
254 | break; | |
255 | } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
256 | usage(); | |
257 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { | |
258 | mkfs = true; | |
259 | } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) { | |
260 | compact = true; | |
261 | } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) { | |
262 | force_sync = true; | |
263 | } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) { | |
264 | yes_really = true; | |
265 | } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) { | |
266 | osdmapfn = val; | |
267 | } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) { | |
268 | inject_monmap = val; | |
269 | } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) { | |
270 | extract_monmap = val; | |
271 | } else { | |
272 | ++i; | |
273 | } | |
274 | } | |
275 | if (!args.empty()) { | |
31f18b77 | 276 | derr << "too many arguments: " << args << dendl; |
7c673cae FG |
277 | usage(); |
278 | } | |
279 | ||
280 | if (force_sync && !yes_really) { | |
31f18b77 FG |
281 | derr << "are you SURE you want to force a sync? this will erase local data and may\n" |
282 | << "break your mon cluster. pass --yes-i-really-mean-it if you do." << dendl; | |
7c673cae FG |
283 | exit(1); |
284 | } | |
285 | ||
286 | if (g_conf->mon_data.empty()) { | |
31f18b77 | 287 | derr << "must specify '--mon-data=foo' data path" << dendl; |
7c673cae FG |
288 | usage(); |
289 | } | |
290 | ||
291 | if (g_conf->name.get_id().empty()) { | |
31f18b77 | 292 | derr << "must specify id (--id <id> or --name mon.<id>)" << dendl; |
7c673cae FG |
293 | usage(); |
294 | } | |
295 | ||
296 | // -- mkfs -- | |
297 | if (mkfs) { | |
298 | ||
299 | int err = check_mon_data_exists(); | |
300 | if (err == -ENOENT) { | |
301 | if (::mkdir(g_conf->mon_data.c_str(), 0755)) { | |
31f18b77 FG |
302 | derr << "mkdir(" << g_conf->mon_data << ") : " |
303 | << cpp_strerror(errno) << dendl; | |
7c673cae FG |
304 | exit(1); |
305 | } | |
306 | } else if (err < 0) { | |
31f18b77 FG |
307 | derr << "error opening '" << g_conf->mon_data << "': " |
308 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
309 | exit(-err); |
310 | } | |
311 | ||
312 | err = check_mon_data_empty(); | |
313 | if (err == -ENOTEMPTY) { | |
314 | // Mon may exist. Let the user know and exit gracefully. | |
31f18b77 FG |
315 | derr << "'" << g_conf->mon_data << "' already exists and is not empty" |
316 | << ": monitor may already exist" << dendl; | |
7c673cae FG |
317 | exit(0); |
318 | } else if (err < 0) { | |
31f18b77 FG |
319 | derr << "error checking if '" << g_conf->mon_data << "' is empty: " |
320 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
321 | exit(-err); |
322 | } | |
323 | ||
324 | // resolve public_network -> public_addr | |
325 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
326 | ||
327 | common_init_finish(g_ceph_context); | |
328 | ||
329 | bufferlist monmapbl, osdmapbl; | |
330 | std::string error; | |
331 | MonMap monmap; | |
332 | ||
333 | // load or generate monmap | |
334 | if (g_conf->monmap.length()) { | |
335 | int err = monmapbl.read_file(g_conf->monmap.c_str(), &error); | |
336 | if (err < 0) { | |
31f18b77 | 337 | derr << argv[0] << ": error reading " << g_conf->monmap << ": " << error << dendl; |
7c673cae FG |
338 | exit(1); |
339 | } | |
340 | try { | |
341 | monmap.decode(monmapbl); | |
342 | ||
343 | // always mark seed/mkfs monmap as epoch 0 | |
344 | monmap.set_epoch(0); | |
345 | } | |
346 | catch (const buffer::error& e) { | |
31f18b77 | 347 | derr << argv[0] << ": error decoding monmap " << g_conf->monmap << ": " << e.what() << dendl; |
7c673cae FG |
348 | exit(1); |
349 | } | |
350 | } else { | |
31f18b77 FG |
351 | ostringstream oss; |
352 | int err = monmap.build_initial(g_ceph_context, oss); | |
353 | if (oss.tellp()) | |
354 | derr << oss.str() << dendl; | |
7c673cae | 355 | if (err < 0) { |
31f18b77 | 356 | derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl; |
7c673cae FG |
357 | } |
358 | ||
359 | // am i part of the initial quorum? | |
360 | if (monmap.contains(g_conf->name.get_id())) { | |
361 | // hmm, make sure the ip listed exists on the current host? | |
362 | // maybe later. | |
363 | } else if (!g_conf->public_addr.is_blank_ip()) { | |
364 | entity_addr_t a = g_conf->public_addr; | |
365 | if (a.get_port() == 0) | |
366 | a.set_port(CEPH_MON_PORT); | |
367 | if (monmap.contains(a)) { | |
368 | string name; | |
369 | monmap.get_addr_name(a, name); | |
370 | monmap.rename(name, g_conf->name.get_id()); | |
371 | cout << argv[0] << ": renaming mon." << name << " " << a | |
372 | << " to mon." << g_conf->name.get_id() << std::endl; | |
373 | } | |
374 | } else { | |
375 | // is a local address listed without a name? if so, name myself. | |
376 | list<entity_addr_t> ls; | |
377 | monmap.list_addrs(ls); | |
378 | entity_addr_t local; | |
379 | ||
380 | if (have_local_addr(g_ceph_context, ls, &local)) { | |
381 | string name; | |
382 | monmap.get_addr_name(local, name); | |
383 | ||
384 | if (name.compare(0, 7, "noname-") == 0) { | |
385 | cout << argv[0] << ": mon." << name << " " << local | |
386 | << " is local, renaming to mon." << g_conf->name.get_id() << std::endl; | |
387 | monmap.rename(name, g_conf->name.get_id()); | |
388 | } else { | |
389 | cout << argv[0] << ": mon." << name << " " << local | |
390 | << " is local, but not 'noname-' + something; not assuming it's me" << std::endl; | |
391 | } | |
392 | } | |
393 | } | |
394 | } | |
395 | ||
396 | if (!g_conf->fsid.is_zero()) { | |
397 | monmap.fsid = g_conf->fsid; | |
398 | cout << argv[0] << ": set fsid to " << g_conf->fsid << std::endl; | |
399 | } | |
400 | ||
401 | if (monmap.fsid.is_zero()) { | |
31f18b77 | 402 | derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl; |
7c673cae FG |
403 | exit(10); |
404 | } | |
405 | ||
406 | //monmap.print(cout); | |
407 | ||
408 | // osdmap | |
409 | if (osdmapfn.length()) { | |
410 | err = osdmapbl.read_file(osdmapfn.c_str(), &error); | |
411 | if (err < 0) { | |
31f18b77 FG |
412 | derr << argv[0] << ": error reading " << osdmapfn << ": " |
413 | << error << dendl; | |
7c673cae FG |
414 | exit(1); |
415 | } | |
416 | } | |
417 | ||
418 | // go | |
419 | MonitorDBStore store(g_conf->mon_data); | |
31f18b77 FG |
420 | ostringstream oss; |
421 | int r = store.create_and_open(oss); | |
422 | if (oss.tellp()) | |
423 | derr << oss.str() << dendl; | |
7c673cae | 424 | if (r < 0) { |
31f18b77 FG |
425 | derr << argv[0] << ": error opening mon data directory at '" |
426 | << g_conf->mon_data << "': " << cpp_strerror(r) << dendl; | |
7c673cae FG |
427 | exit(1); |
428 | } | |
429 | assert(r == 0); | |
430 | ||
431 | Monitor mon(g_ceph_context, g_conf->name.get_id(), &store, 0, 0, &monmap); | |
432 | r = mon.mkfs(osdmapbl); | |
433 | if (r < 0) { | |
31f18b77 | 434 | derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl; |
7c673cae FG |
435 | exit(1); |
436 | } | |
437 | store.close(); | |
438 | cout << argv[0] << ": created monfs at " << g_conf->mon_data | |
439 | << " for " << g_conf->name << std::endl; | |
440 | return 0; | |
441 | } | |
442 | ||
443 | err = check_mon_data_exists(); | |
444 | if (err < 0 && err == -ENOENT) { | |
31f18b77 FG |
445 | derr << "monitor data directory at '" << g_conf->mon_data << "'" |
446 | << " does not exist: have you run 'mkfs'?" << dendl; | |
7c673cae FG |
447 | exit(1); |
448 | } else if (err < 0) { | |
31f18b77 FG |
449 | derr << "error accessing monitor data directory at '" |
450 | << g_conf->mon_data << "': " << cpp_strerror(-err) << dendl; | |
7c673cae FG |
451 | exit(1); |
452 | } | |
453 | ||
454 | err = check_mon_data_empty(); | |
455 | if (err == 0) { | |
456 | derr << "monitor data directory at '" << g_conf->mon_data | |
457 | << "' is empty: have you run 'mkfs'?" << dendl; | |
458 | exit(1); | |
459 | } else if (err < 0 && err != -ENOTEMPTY) { | |
460 | // we don't want an empty data dir by now | |
31f18b77 FG |
461 | derr << "error accessing '" << g_conf->mon_data << "': " |
462 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
463 | exit(1); |
464 | } | |
465 | ||
466 | { | |
467 | // check fs stats. don't start if it's critically close to full. | |
468 | ceph_data_stats_t stats; | |
469 | int err = get_fs_stats(stats, g_conf->mon_data.c_str()); | |
470 | if (err < 0) { | |
31f18b77 FG |
471 | derr << "error checking monitor data's fs stats: " << cpp_strerror(err) |
472 | << dendl; | |
7c673cae FG |
473 | exit(-err); |
474 | } | |
475 | if (stats.avail_percent <= g_conf->mon_data_avail_crit) { | |
31f18b77 | 476 | derr << "error: monitor data filesystem reached concerning levels of" |
7c673cae FG |
477 | << " available storage space (available: " |
478 | << stats.avail_percent << "% " << prettybyte_t(stats.byte_avail) | |
479 | << ")\nyou may adjust 'mon data avail crit' to a lower value" | |
480 | << " to make this go away (default: " << g_conf->mon_data_avail_crit | |
31f18b77 | 481 | << "%)\n" << dendl; |
7c673cae FG |
482 | exit(ENOSPC); |
483 | } | |
484 | } | |
485 | ||
486 | // we fork early to prevent leveldb's environment static state from | |
487 | // screwing us over | |
488 | Preforker prefork; | |
489 | if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) { | |
490 | if (global_init_prefork(g_ceph_context) >= 0) { | |
491 | string err_msg; | |
492 | err = prefork.prefork(err_msg); | |
493 | if (err < 0) { | |
31f18b77 | 494 | derr << err_msg << dendl; |
7c673cae FG |
495 | prefork.exit(err); |
496 | } | |
497 | if (prefork.is_parent()) { | |
498 | err = prefork.parent_wait(err_msg); | |
499 | if (err < 0) | |
31f18b77 | 500 | derr << err_msg << dendl; |
7c673cae FG |
501 | prefork.exit(err); |
502 | } | |
503 | global_init_postfork_start(g_ceph_context); | |
504 | } | |
505 | common_init_finish(g_ceph_context); | |
506 | global_init_chdir(g_ceph_context); | |
507 | #ifndef BUILDING_FOR_EMBEDDED | |
508 | if (global_init_preload_erasure_code(g_ceph_context) < 0) | |
509 | prefork.exit(1); | |
510 | #else | |
511 | cephd_preload_embedded_plugins(); | |
512 | #endif | |
513 | } | |
514 | ||
515 | MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data); | |
31f18b77 FG |
516 | { |
517 | ostringstream oss; | |
518 | err = store->open(oss); | |
519 | if (oss.tellp()) | |
520 | derr << oss.str() << dendl; | |
521 | if (err < 0) { | |
522 | derr << "error opening mon data directory at '" | |
523 | << g_conf->mon_data << "': " << cpp_strerror(err) << dendl; | |
524 | prefork.exit(1); | |
525 | } | |
7c673cae FG |
526 | } |
527 | ||
528 | bufferlist magicbl; | |
529 | err = store->get(Monitor::MONITOR_NAME, "magic", magicbl); | |
530 | if (err || !magicbl.length()) { | |
531 | derr << "unable to read magic from mon data" << dendl; | |
532 | prefork.exit(1); | |
533 | } | |
534 | string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n | |
535 | if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) { | |
536 | derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl; | |
537 | prefork.exit(1); | |
538 | } | |
539 | ||
540 | err = Monitor::check_features(store); | |
541 | if (err < 0) { | |
542 | derr << "error checking features: " << cpp_strerror(err) << dendl; | |
543 | prefork.exit(1); | |
544 | } | |
545 | ||
546 | // inject new monmap? | |
547 | if (!inject_monmap.empty()) { | |
548 | bufferlist bl; | |
549 | std::string error; | |
550 | int r = bl.read_file(inject_monmap.c_str(), &error); | |
551 | if (r) { | |
552 | derr << "unable to read monmap from " << inject_monmap << ": " | |
553 | << error << dendl; | |
554 | prefork.exit(1); | |
555 | } | |
556 | ||
557 | // get next version | |
558 | version_t v = store->get("monmap", "last_committed"); | |
559 | dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1) | |
560 | << dendl; | |
561 | v++; | |
562 | ||
563 | // set the version | |
564 | MonMap tmp; | |
565 | tmp.decode(bl); | |
566 | if (tmp.get_epoch() != v) { | |
567 | dout(0) << "changing monmap epoch from " << tmp.get_epoch() | |
568 | << " to " << v << dendl; | |
569 | tmp.set_epoch(v); | |
570 | } | |
571 | bufferlist mapbl; | |
572 | tmp.encode(mapbl, CEPH_FEATURES_ALL); | |
573 | bufferlist final; | |
574 | ::encode(v, final); | |
575 | ::encode(mapbl, final); | |
576 | ||
577 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
578 | // save it | |
579 | t->put("monmap", v, mapbl); | |
580 | t->put("monmap", "latest", final); | |
581 | t->put("monmap", "last_committed", v); | |
582 | store->apply_transaction(t); | |
583 | ||
584 | dout(0) << "done." << dendl; | |
585 | prefork.exit(0); | |
586 | } | |
587 | ||
588 | // monmap? | |
589 | MonMap monmap; | |
590 | { | |
591 | // note that even if we don't find a viable monmap, we should go ahead | |
592 | // and try to build it up in the next if-else block. | |
593 | bufferlist mapbl; | |
594 | int err = obtain_monmap(*store, mapbl); | |
595 | if (err >= 0) { | |
596 | try { | |
597 | monmap.decode(mapbl); | |
598 | } catch (const buffer::error& e) { | |
31f18b77 | 599 | derr << "can't decode monmap: " << e.what() << dendl; |
7c673cae FG |
600 | } |
601 | } else { | |
602 | derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl; | |
603 | } | |
604 | if (!extract_monmap.empty()) { | |
605 | int r = mapbl.write_file(extract_monmap.c_str()); | |
606 | if (r < 0) { | |
607 | r = -errno; | |
608 | derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl; | |
609 | prefork.exit(1); | |
610 | } | |
611 | derr << "wrote monmap to " << extract_monmap << dendl; | |
612 | prefork.exit(0); | |
613 | } | |
614 | } | |
615 | ||
616 | // this is what i will bind to | |
617 | entity_addr_t ipaddr; | |
618 | ||
619 | if (monmap.contains(g_conf->name.get_id())) { | |
620 | ipaddr = monmap.get_addr(g_conf->name.get_id()); | |
621 | ||
622 | // print helpful warning if the conf file doesn't match | |
623 | entity_addr_t conf_addr; | |
624 | std::vector <std::string> my_sections; | |
625 | g_conf->get_my_sections(my_sections); | |
626 | std::string mon_addr_str; | |
627 | if (g_conf->get_val_from_conf_file(my_sections, "mon addr", | |
628 | mon_addr_str, true) == 0) { | |
629 | if (conf_addr.parse(mon_addr_str.c_str()) && (ipaddr != conf_addr)) { | |
630 | derr << "WARNING: 'mon addr' config option " << conf_addr | |
631 | << " does not match monmap file" << std::endl | |
632 | << " continuing with monmap configuration" << dendl; | |
633 | } | |
634 | } | |
635 | } else { | |
636 | dout(0) << g_conf->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl; | |
637 | ||
638 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
639 | if (!g_conf->public_addr.is_blank_ip()) { | |
640 | ipaddr = g_conf->public_addr; | |
641 | if (ipaddr.get_port() == 0) | |
642 | ipaddr.set_port(CEPH_MON_PORT); | |
643 | dout(0) << "using public_addr " << g_conf->public_addr << " -> " | |
644 | << ipaddr << dendl; | |
645 | } else { | |
646 | MonMap tmpmap; | |
31f18b77 FG |
647 | ostringstream oss; |
648 | int err = tmpmap.build_initial(g_ceph_context, oss); | |
649 | if (oss.tellp()) | |
650 | derr << oss.str() << dendl; | |
7c673cae FG |
651 | if (err < 0) { |
652 | derr << argv[0] << ": error generating initial monmap: " | |
653 | << cpp_strerror(err) << dendl; | |
654 | usage(); | |
655 | prefork.exit(1); | |
656 | } | |
657 | if (tmpmap.contains(g_conf->name.get_id())) { | |
658 | ipaddr = tmpmap.get_addr(g_conf->name.get_id()); | |
659 | } else { | |
660 | derr << "no public_addr or public_network specified, and " << g_conf->name | |
661 | << " not present in monmap or ceph.conf" << dendl; | |
662 | prefork.exit(1); | |
663 | } | |
664 | } | |
665 | } | |
666 | ||
667 | // bind | |
668 | int rank = monmap.get_rank(g_conf->name.get_id()); | |
669 | std::string public_msgr_type = g_conf->ms_public_type.empty() ? g_conf->get_val<std::string>("ms_type") : g_conf->ms_public_type; | |
670 | Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type, | |
671 | entity_name_t::MON(rank), "mon", | |
672 | 0, Messenger::HAS_MANY_CONNECTIONS); | |
673 | if (!msgr) | |
674 | exit(1); | |
675 | msgr->set_cluster_protocol(CEPH_MON_PROTOCOL); | |
676 | msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH); | |
677 | ||
678 | msgr->set_default_policy(Messenger::Policy::stateless_server(0)); | |
679 | msgr->set_policy(entity_name_t::TYPE_MON, | |
680 | Messenger::Policy::lossless_peer_reuse( | |
681 | CEPH_FEATURE_UID | | |
682 | CEPH_FEATURE_PGID64 | | |
683 | CEPH_FEATURE_MON_SINGLE_PAXOS)); | |
684 | msgr->set_policy(entity_name_t::TYPE_OSD, | |
685 | Messenger::Policy::stateless_server( | |
686 | CEPH_FEATURE_PGID64 | | |
687 | CEPH_FEATURE_OSDENC)); | |
688 | msgr->set_policy(entity_name_t::TYPE_CLIENT, | |
689 | Messenger::Policy::stateless_server(0)); | |
690 | msgr->set_policy(entity_name_t::TYPE_MDS, | |
691 | Messenger::Policy::stateless_server(0)); | |
692 | ||
693 | // throttle client traffic | |
694 | Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes", | |
695 | g_conf->mon_client_bytes); | |
696 | msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT, | |
697 | client_throttler, NULL); | |
698 | ||
699 | // throttle daemon traffic | |
700 | // NOTE: actual usage on the leader may multiply by the number of | |
701 | // monitors if they forward large update messages from daemons. | |
702 | Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes", | |
703 | g_conf->mon_daemon_bytes); | |
704 | msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler, | |
705 | NULL); | |
706 | msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler, | |
707 | NULL); | |
708 | ||
709 | dout(0) << "starting " << g_conf->name << " rank " << rank | |
710 | << " at " << ipaddr | |
711 | << " mon_data " << g_conf->mon_data | |
712 | << " fsid " << monmap.get_fsid() | |
713 | << dendl; | |
714 | ||
715 | err = msgr->bind(ipaddr); | |
716 | if (err < 0) { | |
717 | derr << "unable to bind monitor to " << ipaddr << dendl; | |
718 | prefork.exit(1); | |
719 | } | |
720 | ||
721 | Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type, | |
722 | entity_name_t::MON(rank), "mon-mgrc", | |
723 | getpid(), 0); | |
724 | if (!mgr_msgr) { | |
725 | derr << "unable to create mgr_msgr" << dendl; | |
726 | prefork.exit(1); | |
727 | } | |
728 | ||
729 | cout << "starting " << g_conf->name << " rank " << rank | |
730 | << " at " << ipaddr | |
731 | << " mon_data " << g_conf->mon_data | |
732 | << " fsid " << monmap.get_fsid() | |
733 | << std::endl; | |
734 | ||
735 | // start monitor | |
736 | mon = new Monitor(g_ceph_context, g_conf->name.get_id(), store, | |
737 | msgr, mgr_msgr, &monmap); | |
738 | ||
739 | if (force_sync) { | |
740 | derr << "flagging a forced sync ..." << dendl; | |
31f18b77 FG |
741 | ostringstream oss; |
742 | mon->sync_force(NULL, oss); | |
743 | if (oss.tellp()) | |
744 | derr << oss.str() << dendl; | |
7c673cae FG |
745 | } |
746 | ||
747 | err = mon->preinit(); | |
748 | if (err < 0) { | |
749 | derr << "failed to initialize" << dendl; | |
750 | prefork.exit(1); | |
751 | } | |
752 | ||
753 | if (compact || g_conf->mon_compact_on_start) { | |
754 | derr << "compacting monitor store ..." << dendl; | |
755 | mon->store->compact(); | |
756 | derr << "done compacting" << dendl; | |
757 | } | |
758 | ||
759 | if (g_conf->daemonize) { | |
760 | global_init_postfork_finish(g_ceph_context); | |
761 | prefork.daemonize(); | |
762 | } | |
763 | ||
764 | msgr->start(); | |
765 | mgr_msgr->start(); | |
766 | ||
767 | mon->init(); | |
768 | ||
769 | // set up signal handlers, now that we've daemonized/forked. | |
770 | init_async_signal_handler(); | |
771 | register_async_signal_handler(SIGHUP, sighup_handler); | |
772 | register_async_signal_handler_oneshot(SIGINT, handle_mon_signal); | |
773 | register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal); | |
774 | ||
775 | if (g_conf->inject_early_sigterm) | |
776 | kill(getpid(), SIGTERM); | |
777 | ||
778 | msgr->wait(); | |
779 | mgr_msgr->wait(); | |
780 | ||
781 | store->close(); | |
782 | ||
783 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
784 | unregister_async_signal_handler(SIGINT, handle_mon_signal); | |
785 | unregister_async_signal_handler(SIGTERM, handle_mon_signal); | |
786 | shutdown_async_signal_handler(); | |
787 | ||
788 | delete mon; | |
789 | delete store; | |
790 | delete msgr; | |
791 | delete mgr_msgr; | |
792 | delete client_throttler; | |
793 | delete daemon_throttler; | |
794 | ||
795 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
796 | char s[20]; | |
797 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
798 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
799 | dout(0) << "ceph-mon: gmon.out should be in " << s << dendl; | |
800 | } | |
801 | ||
802 | prefork.signal_exit(0); | |
803 | return 0; | |
804 | } | |
805 |