]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | ||
19 | #include <iostream> | |
20 | #include <string> | |
21 | using namespace std; | |
22 | ||
23 | #include "common/config.h" | |
24 | #include "include/ceph_features.h" | |
25 | ||
26 | #include "mon/MonMap.h" | |
27 | #include "mon/Monitor.h" | |
28 | #include "mon/MonitorDBStore.h" | |
29 | #include "mon/MonClient.h" | |
30 | ||
31 | #include "msg/Messenger.h" | |
32 | ||
33 | #include "include/CompatSet.h" | |
34 | ||
35 | #include "common/ceph_argparse.h" | |
36 | #include "common/pick_address.h" | |
37 | #include "common/Timer.h" | |
38 | #include "common/errno.h" | |
39 | #include "common/Preforker.h" | |
40 | ||
41 | #include "global/global_init.h" | |
42 | #include "global/signal_handler.h" | |
43 | ||
44 | #include "perfglue/heap_profiler.h" | |
45 | ||
46 | #include "include/assert.h" | |
47 | ||
48 | #define dout_subsys ceph_subsys_mon | |
49 | ||
50 | Monitor *mon = NULL; | |
51 | ||
52 | void handle_mon_signal(int signum) | |
53 | { | |
54 | if (mon) | |
55 | mon->handle_signal(signum); | |
56 | } | |
57 | ||
58 | ||
59 | int obtain_monmap(MonitorDBStore &store, bufferlist &bl) | |
60 | { | |
61 | dout(10) << __func__ << dendl; | |
62 | /* | |
63 | * the monmap may be in one of three places: | |
64 | * 'monmap:<latest_version_no>' - the monmap we'd really like to have | |
65 | * 'mon_sync:latest_monmap' - last monmap backed up for the last sync | |
66 | * 'mkfs:monmap' - a monmap resulting from mkfs | |
67 | */ | |
68 | ||
69 | if (store.exists("monmap", "last_committed")) { | |
70 | version_t latest_ver = store.get("monmap", "last_committed"); | |
71 | if (store.exists("monmap", latest_ver)) { | |
72 | int err = store.get("monmap", latest_ver, bl); | |
73 | assert(err == 0); | |
74 | assert(bl.length() > 0); | |
75 | dout(10) << __func__ << " read last committed monmap ver " | |
76 | << latest_ver << dendl; | |
77 | return 0; | |
78 | } | |
79 | } | |
80 | ||
81 | if (store.exists("mon_sync", "in_sync") | |
82 | || store.exists("mon_sync", "force_sync")) { | |
83 | dout(10) << __func__ << " detected aborted sync" << dendl; | |
84 | if (store.exists("mon_sync", "latest_monmap")) { | |
85 | int err = store.get("mon_sync", "latest_monmap", bl); | |
86 | assert(err == 0); | |
87 | assert(bl.length() > 0); | |
88 | dout(10) << __func__ << " read backup monmap" << dendl; | |
89 | return 0; | |
90 | } | |
91 | } | |
92 | ||
93 | if (store.exists("mkfs", "monmap")) { | |
94 | dout(10) << __func__ << " found mkfs monmap" << dendl; | |
95 | int err = store.get("mkfs", "monmap", bl); | |
96 | assert(err == 0); | |
97 | assert(bl.length() > 0); | |
98 | return 0; | |
99 | } | |
100 | ||
101 | derr << __func__ << " unable to find a monmap" << dendl; | |
102 | return -ENOENT; | |
103 | } | |
104 | ||
105 | int check_mon_data_exists() | |
106 | { | |
107 | string mon_data = g_conf->mon_data; | |
108 | struct stat buf; | |
109 | if (::stat(mon_data.c_str(), &buf)) { | |
110 | if (errno != ENOENT) { | |
31f18b77 | 111 | derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
112 | } |
113 | return -errno; | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | /** Check whether **mon data** is empty. | |
119 | * | |
120 | * Being empty means mkfs has not been run and there's no monitor setup | |
121 | * at **g_conf->mon_data**. | |
122 | * | |
123 | * If the directory g_conf->mon_data is not empty we will return -ENOTEMPTY. | |
124 | * Otherwise we will return 0. Any other negative returns will represent | |
125 | * a failure to be handled by the caller. | |
126 | * | |
127 | * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise. | |
128 | */ | |
129 | int check_mon_data_empty() | |
130 | { | |
131 | string mon_data = g_conf->mon_data; | |
132 | ||
133 | DIR *dir = ::opendir(mon_data.c_str()); | |
134 | if (!dir) { | |
31f18b77 | 135 | derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
136 | return -errno; |
137 | } | |
138 | int code = 0; | |
139 | struct dirent *de = nullptr; | |
140 | errno = 0; | |
141 | while ((de = ::readdir(dir))) { | |
142 | if (string(".") != de->d_name && | |
143 | string("..") != de->d_name && | |
144 | string("kv_backend") != de->d_name) { | |
145 | code = -ENOTEMPTY; | |
146 | break; | |
147 | } | |
148 | } | |
149 | if (!de && errno) { | |
31f18b77 | 150 | derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
151 | code = -errno; |
152 | } | |
153 | ||
154 | ::closedir(dir); | |
155 | ||
156 | return code; | |
157 | } | |
158 | ||
159 | static void usage() | |
160 | { | |
31f18b77 FG |
161 | cout << "usage: ceph-mon -i <ID> [flags]\n" |
162 | << " --debug_mon n\n" | |
163 | << " debug monitor level (e.g. 10)\n" | |
164 | << " --mkfs\n" | |
165 | << " build fresh monitor fs\n" | |
166 | << " --force-sync\n" | |
167 | << " force a sync from another mon by wiping local data (BE CAREFUL)\n" | |
168 | << " --yes-i-really-mean-it\n" | |
169 | << " mandatory safeguard for --force-sync\n" | |
170 | << " --compact\n" | |
171 | << " compact the monitor store\n" | |
172 | << " --osdmap <filename>\n" | |
173 | << " only used when --mkfs is provided: load the osdmap from <filename>\n" | |
174 | << " --inject-monmap <filename>\n" | |
175 | << " write the <filename> monmap to the local monitor store and exit\n" | |
176 | << " --extract-monmap <filename>\n" | |
177 | << " extract the monmap from the local monitor store and exit\n" | |
178 | << " --mon-data <directory>\n" | |
179 | << " where the mon store and keyring are located\n" | |
180 | << std::endl; | |
7c673cae FG |
181 | generic_server_usage(); |
182 | } | |
183 | ||
7c673cae | 184 | int main(int argc, const char **argv) |
7c673cae FG |
185 | { |
186 | int err; | |
187 | ||
188 | bool mkfs = false; | |
189 | bool compact = false; | |
190 | bool force_sync = false; | |
191 | bool yes_really = false; | |
192 | std::string osdmapfn, inject_monmap, extract_monmap; | |
193 | ||
194 | vector<const char*> args; | |
195 | argv_to_vec(argc, argv, args); | |
196 | env_to_vec(args); | |
197 | ||
198 | // We need to specify some default values that may be overridden by the | |
199 | // user, that are specific to the monitor. The options we are overriding | |
200 | // are also used on the OSD (or in any other component that uses leveldb), | |
c07f9fc5 | 201 | // so changing the global defaults is not an option. |
7c673cae | 202 | // This is not the prettiest way of doing this, especially since it has us |
c07f9fc5 FG |
203 | // having a different place defining default values, but it's not horribly |
204 | // wrong enough to prevent us from doing it :) | |
7c673cae FG |
205 | // |
206 | // NOTE: user-defined options will take precedence over ours. | |
207 | // | |
208 | // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB | |
209 | // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB | |
210 | // leveldb_block_size = 64*1024 = 65536 // 64KB | |
211 | // leveldb_compression = false | |
212 | // leveldb_log = "" | |
213 | vector<const char*> def_args; | |
214 | def_args.push_back("--leveldb-write-buffer-size=33554432"); | |
215 | def_args.push_back("--leveldb-cache-size=536870912"); | |
216 | def_args.push_back("--leveldb-block-size=65536"); | |
217 | def_args.push_back("--leveldb-compression=false"); | |
218 | def_args.push_back("--leveldb-log="); | |
219 | ||
220 | int flags = 0; | |
221 | { | |
222 | vector<const char*> args_copy = args; | |
223 | std::string val; | |
224 | for (std::vector<const char*>::iterator i = args_copy.begin(); | |
225 | i != args_copy.end(); ) { | |
226 | if (ceph_argparse_double_dash(args_copy, i)) { | |
227 | break; | |
228 | } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) { | |
229 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
230 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) { | |
231 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
232 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) { | |
233 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
234 | } else { | |
235 | ++i; | |
236 | } | |
237 | } | |
238 | } | |
239 | ||
240 | auto cct = global_init(&def_args, args, | |
241 | CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, | |
242 | flags, "mon_data"); | |
243 | ceph_heap_profiler_init(); | |
244 | ||
7c673cae FG |
245 | std::string val; |
246 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
247 | if (ceph_argparse_double_dash(args, i)) { | |
248 | break; | |
249 | } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { | |
250 | usage(); | |
251 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { | |
252 | mkfs = true; | |
253 | } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) { | |
254 | compact = true; | |
255 | } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) { | |
256 | force_sync = true; | |
257 | } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) { | |
258 | yes_really = true; | |
259 | } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) { | |
260 | osdmapfn = val; | |
261 | } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) { | |
262 | inject_monmap = val; | |
263 | } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) { | |
264 | extract_monmap = val; | |
265 | } else { | |
266 | ++i; | |
267 | } | |
268 | } | |
269 | if (!args.empty()) { | |
31f18b77 | 270 | derr << "too many arguments: " << args << dendl; |
7c673cae FG |
271 | usage(); |
272 | } | |
273 | ||
274 | if (force_sync && !yes_really) { | |
31f18b77 FG |
275 | derr << "are you SURE you want to force a sync? this will erase local data and may\n" |
276 | << "break your mon cluster. pass --yes-i-really-mean-it if you do." << dendl; | |
7c673cae FG |
277 | exit(1); |
278 | } | |
279 | ||
280 | if (g_conf->mon_data.empty()) { | |
31f18b77 | 281 | derr << "must specify '--mon-data=foo' data path" << dendl; |
7c673cae FG |
282 | usage(); |
283 | } | |
284 | ||
285 | if (g_conf->name.get_id().empty()) { | |
31f18b77 | 286 | derr << "must specify id (--id <id> or --name mon.<id>)" << dendl; |
7c673cae FG |
287 | usage(); |
288 | } | |
289 | ||
290 | // -- mkfs -- | |
291 | if (mkfs) { | |
292 | ||
293 | int err = check_mon_data_exists(); | |
294 | if (err == -ENOENT) { | |
295 | if (::mkdir(g_conf->mon_data.c_str(), 0755)) { | |
31f18b77 FG |
296 | derr << "mkdir(" << g_conf->mon_data << ") : " |
297 | << cpp_strerror(errno) << dendl; | |
7c673cae FG |
298 | exit(1); |
299 | } | |
300 | } else if (err < 0) { | |
31f18b77 FG |
301 | derr << "error opening '" << g_conf->mon_data << "': " |
302 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
303 | exit(-err); |
304 | } | |
305 | ||
306 | err = check_mon_data_empty(); | |
307 | if (err == -ENOTEMPTY) { | |
308 | // Mon may exist. Let the user know and exit gracefully. | |
31f18b77 FG |
309 | derr << "'" << g_conf->mon_data << "' already exists and is not empty" |
310 | << ": monitor may already exist" << dendl; | |
7c673cae FG |
311 | exit(0); |
312 | } else if (err < 0) { | |
31f18b77 FG |
313 | derr << "error checking if '" << g_conf->mon_data << "' is empty: " |
314 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
315 | exit(-err); |
316 | } | |
317 | ||
318 | // resolve public_network -> public_addr | |
319 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
320 | ||
321 | common_init_finish(g_ceph_context); | |
322 | ||
323 | bufferlist monmapbl, osdmapbl; | |
324 | std::string error; | |
325 | MonMap monmap; | |
326 | ||
327 | // load or generate monmap | |
3efd9988 FG |
328 | const auto monmap_fn = g_conf->get_val<string>("monmap"); |
329 | if (monmap_fn.length()) { | |
330 | int err = monmapbl.read_file(monmap_fn.c_str(), &error); | |
7c673cae | 331 | if (err < 0) { |
3efd9988 | 332 | derr << argv[0] << ": error reading " << monmap_fn << ": " << error << dendl; |
7c673cae FG |
333 | exit(1); |
334 | } | |
335 | try { | |
336 | monmap.decode(monmapbl); | |
337 | ||
338 | // always mark seed/mkfs monmap as epoch 0 | |
339 | monmap.set_epoch(0); | |
3efd9988 FG |
340 | } catch (const buffer::error& e) { |
341 | derr << argv[0] << ": error decoding monmap " << monmap_fn << ": " << e.what() << dendl; | |
7c673cae FG |
342 | exit(1); |
343 | } | |
344 | } else { | |
31f18b77 FG |
345 | ostringstream oss; |
346 | int err = monmap.build_initial(g_ceph_context, oss); | |
347 | if (oss.tellp()) | |
348 | derr << oss.str() << dendl; | |
7c673cae | 349 | if (err < 0) { |
31f18b77 | 350 | derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl; |
7c673cae FG |
351 | } |
352 | ||
353 | // am i part of the initial quorum? | |
354 | if (monmap.contains(g_conf->name.get_id())) { | |
355 | // hmm, make sure the ip listed exists on the current host? | |
356 | // maybe later. | |
357 | } else if (!g_conf->public_addr.is_blank_ip()) { | |
358 | entity_addr_t a = g_conf->public_addr; | |
359 | if (a.get_port() == 0) | |
360 | a.set_port(CEPH_MON_PORT); | |
361 | if (monmap.contains(a)) { | |
362 | string name; | |
363 | monmap.get_addr_name(a, name); | |
364 | monmap.rename(name, g_conf->name.get_id()); | |
224ce89b WB |
365 | dout(0) << argv[0] << ": renaming mon." << name << " " << a |
366 | << " to mon." << g_conf->name.get_id() << dendl; | |
7c673cae FG |
367 | } |
368 | } else { | |
369 | // is a local address listed without a name? if so, name myself. | |
370 | list<entity_addr_t> ls; | |
371 | monmap.list_addrs(ls); | |
372 | entity_addr_t local; | |
373 | ||
374 | if (have_local_addr(g_ceph_context, ls, &local)) { | |
375 | string name; | |
376 | monmap.get_addr_name(local, name); | |
377 | ||
378 | if (name.compare(0, 7, "noname-") == 0) { | |
224ce89b WB |
379 | dout(0) << argv[0] << ": mon." << name << " " << local |
380 | << " is local, renaming to mon." << g_conf->name.get_id() << dendl; | |
7c673cae FG |
381 | monmap.rename(name, g_conf->name.get_id()); |
382 | } else { | |
224ce89b WB |
383 | dout(0) << argv[0] << ": mon." << name << " " << local |
384 | << " is local, but not 'noname-' + something; not assuming it's me" << dendl; | |
7c673cae FG |
385 | } |
386 | } | |
387 | } | |
388 | } | |
389 | ||
3efd9988 FG |
390 | const auto fsid = g_conf->get_val<uuid_d>("fsid"); |
391 | if (!fsid.is_zero()) { | |
392 | monmap.fsid = fsid; | |
393 | dout(0) << argv[0] << ": set fsid to " << fsid << dendl; | |
7c673cae FG |
394 | } |
395 | ||
396 | if (monmap.fsid.is_zero()) { | |
31f18b77 | 397 | derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl; |
7c673cae FG |
398 | exit(10); |
399 | } | |
400 | ||
401 | //monmap.print(cout); | |
402 | ||
403 | // osdmap | |
404 | if (osdmapfn.length()) { | |
405 | err = osdmapbl.read_file(osdmapfn.c_str(), &error); | |
406 | if (err < 0) { | |
31f18b77 FG |
407 | derr << argv[0] << ": error reading " << osdmapfn << ": " |
408 | << error << dendl; | |
7c673cae FG |
409 | exit(1); |
410 | } | |
411 | } | |
412 | ||
413 | // go | |
414 | MonitorDBStore store(g_conf->mon_data); | |
31f18b77 FG |
415 | ostringstream oss; |
416 | int r = store.create_and_open(oss); | |
417 | if (oss.tellp()) | |
418 | derr << oss.str() << dendl; | |
7c673cae | 419 | if (r < 0) { |
31f18b77 FG |
420 | derr << argv[0] << ": error opening mon data directory at '" |
421 | << g_conf->mon_data << "': " << cpp_strerror(r) << dendl; | |
7c673cae FG |
422 | exit(1); |
423 | } | |
424 | assert(r == 0); | |
425 | ||
426 | Monitor mon(g_ceph_context, g_conf->name.get_id(), &store, 0, 0, &monmap); | |
427 | r = mon.mkfs(osdmapbl); | |
428 | if (r < 0) { | |
31f18b77 | 429 | derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl; |
7c673cae FG |
430 | exit(1); |
431 | } | |
432 | store.close(); | |
224ce89b WB |
433 | dout(0) << argv[0] << ": created monfs at " << g_conf->mon_data |
434 | << " for " << g_conf->name << dendl; | |
7c673cae FG |
435 | return 0; |
436 | } | |
437 | ||
438 | err = check_mon_data_exists(); | |
439 | if (err < 0 && err == -ENOENT) { | |
31f18b77 FG |
440 | derr << "monitor data directory at '" << g_conf->mon_data << "'" |
441 | << " does not exist: have you run 'mkfs'?" << dendl; | |
7c673cae FG |
442 | exit(1); |
443 | } else if (err < 0) { | |
31f18b77 FG |
444 | derr << "error accessing monitor data directory at '" |
445 | << g_conf->mon_data << "': " << cpp_strerror(-err) << dendl; | |
7c673cae FG |
446 | exit(1); |
447 | } | |
448 | ||
449 | err = check_mon_data_empty(); | |
450 | if (err == 0) { | |
451 | derr << "monitor data directory at '" << g_conf->mon_data | |
452 | << "' is empty: have you run 'mkfs'?" << dendl; | |
453 | exit(1); | |
454 | } else if (err < 0 && err != -ENOTEMPTY) { | |
455 | // we don't want an empty data dir by now | |
31f18b77 FG |
456 | derr << "error accessing '" << g_conf->mon_data << "': " |
457 | << cpp_strerror(-err) << dendl; | |
7c673cae FG |
458 | exit(1); |
459 | } | |
460 | ||
461 | { | |
462 | // check fs stats. don't start if it's critically close to full. | |
463 | ceph_data_stats_t stats; | |
464 | int err = get_fs_stats(stats, g_conf->mon_data.c_str()); | |
465 | if (err < 0) { | |
31f18b77 FG |
466 | derr << "error checking monitor data's fs stats: " << cpp_strerror(err) |
467 | << dendl; | |
7c673cae FG |
468 | exit(-err); |
469 | } | |
470 | if (stats.avail_percent <= g_conf->mon_data_avail_crit) { | |
31f18b77 | 471 | derr << "error: monitor data filesystem reached concerning levels of" |
7c673cae | 472 | << " available storage space (available: " |
1adf2230 | 473 | << stats.avail_percent << "% " << byte_u_t(stats.byte_avail) |
7c673cae FG |
474 | << ")\nyou may adjust 'mon data avail crit' to a lower value" |
475 | << " to make this go away (default: " << g_conf->mon_data_avail_crit | |
31f18b77 | 476 | << "%)\n" << dendl; |
7c673cae FG |
477 | exit(ENOSPC); |
478 | } | |
479 | } | |
480 | ||
481 | // we fork early to prevent leveldb's environment static state from | |
482 | // screwing us over | |
483 | Preforker prefork; | |
484 | if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) { | |
485 | if (global_init_prefork(g_ceph_context) >= 0) { | |
486 | string err_msg; | |
487 | err = prefork.prefork(err_msg); | |
488 | if (err < 0) { | |
31f18b77 | 489 | derr << err_msg << dendl; |
7c673cae FG |
490 | prefork.exit(err); |
491 | } | |
492 | if (prefork.is_parent()) { | |
493 | err = prefork.parent_wait(err_msg); | |
494 | if (err < 0) | |
31f18b77 | 495 | derr << err_msg << dendl; |
7c673cae FG |
496 | prefork.exit(err); |
497 | } | |
224ce89b | 498 | setsid(); |
7c673cae FG |
499 | global_init_postfork_start(g_ceph_context); |
500 | } | |
501 | common_init_finish(g_ceph_context); | |
502 | global_init_chdir(g_ceph_context); | |
7c673cae FG |
503 | if (global_init_preload_erasure_code(g_ceph_context) < 0) |
504 | prefork.exit(1); | |
7c673cae FG |
505 | } |
506 | ||
507 | MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data); | |
31f18b77 FG |
508 | { |
509 | ostringstream oss; | |
510 | err = store->open(oss); | |
511 | if (oss.tellp()) | |
512 | derr << oss.str() << dendl; | |
513 | if (err < 0) { | |
514 | derr << "error opening mon data directory at '" | |
515 | << g_conf->mon_data << "': " << cpp_strerror(err) << dendl; | |
516 | prefork.exit(1); | |
517 | } | |
7c673cae FG |
518 | } |
519 | ||
520 | bufferlist magicbl; | |
521 | err = store->get(Monitor::MONITOR_NAME, "magic", magicbl); | |
522 | if (err || !magicbl.length()) { | |
523 | derr << "unable to read magic from mon data" << dendl; | |
524 | prefork.exit(1); | |
525 | } | |
526 | string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n | |
527 | if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) { | |
528 | derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl; | |
529 | prefork.exit(1); | |
530 | } | |
531 | ||
532 | err = Monitor::check_features(store); | |
533 | if (err < 0) { | |
534 | derr << "error checking features: " << cpp_strerror(err) << dendl; | |
535 | prefork.exit(1); | |
536 | } | |
537 | ||
538 | // inject new monmap? | |
539 | if (!inject_monmap.empty()) { | |
540 | bufferlist bl; | |
541 | std::string error; | |
542 | int r = bl.read_file(inject_monmap.c_str(), &error); | |
543 | if (r) { | |
544 | derr << "unable to read monmap from " << inject_monmap << ": " | |
545 | << error << dendl; | |
546 | prefork.exit(1); | |
547 | } | |
548 | ||
549 | // get next version | |
550 | version_t v = store->get("monmap", "last_committed"); | |
551 | dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1) | |
552 | << dendl; | |
553 | v++; | |
554 | ||
555 | // set the version | |
556 | MonMap tmp; | |
557 | tmp.decode(bl); | |
558 | if (tmp.get_epoch() != v) { | |
559 | dout(0) << "changing monmap epoch from " << tmp.get_epoch() | |
560 | << " to " << v << dendl; | |
561 | tmp.set_epoch(v); | |
562 | } | |
563 | bufferlist mapbl; | |
564 | tmp.encode(mapbl, CEPH_FEATURES_ALL); | |
565 | bufferlist final; | |
566 | ::encode(v, final); | |
567 | ::encode(mapbl, final); | |
568 | ||
569 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
570 | // save it | |
571 | t->put("monmap", v, mapbl); | |
572 | t->put("monmap", "latest", final); | |
573 | t->put("monmap", "last_committed", v); | |
574 | store->apply_transaction(t); | |
575 | ||
576 | dout(0) << "done." << dendl; | |
577 | prefork.exit(0); | |
578 | } | |
579 | ||
580 | // monmap? | |
581 | MonMap monmap; | |
582 | { | |
583 | // note that even if we don't find a viable monmap, we should go ahead | |
584 | // and try to build it up in the next if-else block. | |
585 | bufferlist mapbl; | |
586 | int err = obtain_monmap(*store, mapbl); | |
587 | if (err >= 0) { | |
588 | try { | |
589 | monmap.decode(mapbl); | |
590 | } catch (const buffer::error& e) { | |
31f18b77 | 591 | derr << "can't decode monmap: " << e.what() << dendl; |
7c673cae FG |
592 | } |
593 | } else { | |
594 | derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl; | |
595 | } | |
596 | if (!extract_monmap.empty()) { | |
597 | int r = mapbl.write_file(extract_monmap.c_str()); | |
598 | if (r < 0) { | |
599 | r = -errno; | |
600 | derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl; | |
601 | prefork.exit(1); | |
602 | } | |
603 | derr << "wrote monmap to " << extract_monmap << dendl; | |
604 | prefork.exit(0); | |
605 | } | |
606 | } | |
607 | ||
608 | // this is what i will bind to | |
609 | entity_addr_t ipaddr; | |
610 | ||
611 | if (monmap.contains(g_conf->name.get_id())) { | |
612 | ipaddr = monmap.get_addr(g_conf->name.get_id()); | |
613 | ||
614 | // print helpful warning if the conf file doesn't match | |
615 | entity_addr_t conf_addr; | |
616 | std::vector <std::string> my_sections; | |
617 | g_conf->get_my_sections(my_sections); | |
618 | std::string mon_addr_str; | |
619 | if (g_conf->get_val_from_conf_file(my_sections, "mon addr", | |
620 | mon_addr_str, true) == 0) { | |
621 | if (conf_addr.parse(mon_addr_str.c_str()) && (ipaddr != conf_addr)) { | |
622 | derr << "WARNING: 'mon addr' config option " << conf_addr | |
623 | << " does not match monmap file" << std::endl | |
624 | << " continuing with monmap configuration" << dendl; | |
625 | } | |
626 | } | |
627 | } else { | |
628 | dout(0) << g_conf->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl; | |
629 | ||
630 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
631 | if (!g_conf->public_addr.is_blank_ip()) { | |
632 | ipaddr = g_conf->public_addr; | |
633 | if (ipaddr.get_port() == 0) | |
634 | ipaddr.set_port(CEPH_MON_PORT); | |
635 | dout(0) << "using public_addr " << g_conf->public_addr << " -> " | |
636 | << ipaddr << dendl; | |
637 | } else { | |
638 | MonMap tmpmap; | |
31f18b77 FG |
639 | ostringstream oss; |
640 | int err = tmpmap.build_initial(g_ceph_context, oss); | |
641 | if (oss.tellp()) | |
642 | derr << oss.str() << dendl; | |
7c673cae FG |
643 | if (err < 0) { |
644 | derr << argv[0] << ": error generating initial monmap: " | |
645 | << cpp_strerror(err) << dendl; | |
646 | usage(); | |
647 | prefork.exit(1); | |
648 | } | |
649 | if (tmpmap.contains(g_conf->name.get_id())) { | |
650 | ipaddr = tmpmap.get_addr(g_conf->name.get_id()); | |
651 | } else { | |
652 | derr << "no public_addr or public_network specified, and " << g_conf->name | |
653 | << " not present in monmap or ceph.conf" << dendl; | |
654 | prefork.exit(1); | |
655 | } | |
656 | } | |
657 | } | |
658 | ||
659 | // bind | |
660 | int rank = monmap.get_rank(g_conf->name.get_id()); | |
661 | std::string public_msgr_type = g_conf->ms_public_type.empty() ? g_conf->get_val<std::string>("ms_type") : g_conf->ms_public_type; | |
662 | Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type, | |
663 | entity_name_t::MON(rank), "mon", | |
664 | 0, Messenger::HAS_MANY_CONNECTIONS); | |
665 | if (!msgr) | |
666 | exit(1); | |
667 | msgr->set_cluster_protocol(CEPH_MON_PROTOCOL); | |
668 | msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH); | |
669 | ||
670 | msgr->set_default_policy(Messenger::Policy::stateless_server(0)); | |
671 | msgr->set_policy(entity_name_t::TYPE_MON, | |
672 | Messenger::Policy::lossless_peer_reuse( | |
673 | CEPH_FEATURE_UID | | |
674 | CEPH_FEATURE_PGID64 | | |
675 | CEPH_FEATURE_MON_SINGLE_PAXOS)); | |
676 | msgr->set_policy(entity_name_t::TYPE_OSD, | |
677 | Messenger::Policy::stateless_server( | |
678 | CEPH_FEATURE_PGID64 | | |
679 | CEPH_FEATURE_OSDENC)); | |
680 | msgr->set_policy(entity_name_t::TYPE_CLIENT, | |
681 | Messenger::Policy::stateless_server(0)); | |
682 | msgr->set_policy(entity_name_t::TYPE_MDS, | |
683 | Messenger::Policy::stateless_server(0)); | |
684 | ||
685 | // throttle client traffic | |
686 | Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes", | |
687 | g_conf->mon_client_bytes); | |
688 | msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT, | |
689 | client_throttler, NULL); | |
690 | ||
691 | // throttle daemon traffic | |
692 | // NOTE: actual usage on the leader may multiply by the number of | |
693 | // monitors if they forward large update messages from daemons. | |
694 | Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes", | |
695 | g_conf->mon_daemon_bytes); | |
696 | msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler, | |
697 | NULL); | |
698 | msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler, | |
699 | NULL); | |
700 | ||
224ce89b WB |
701 | entity_addr_t bind_addr = ipaddr; |
702 | entity_addr_t public_addr = ipaddr; | |
703 | ||
704 | // check if the public_bind_addr option is set | |
705 | if (!g_conf->public_bind_addr.is_blank_ip()) { | |
706 | bind_addr = g_conf->public_bind_addr; | |
707 | ||
708 | // set the default port if not already set | |
709 | if (bind_addr.get_port() == 0) { | |
710 | bind_addr.set_port(CEPH_MON_PORT); | |
711 | } | |
712 | } | |
713 | ||
7c673cae | 714 | dout(0) << "starting " << g_conf->name << " rank " << rank |
224ce89b WB |
715 | << " at public addr " << public_addr |
716 | << " at bind addr " << bind_addr | |
7c673cae FG |
717 | << " mon_data " << g_conf->mon_data |
718 | << " fsid " << monmap.get_fsid() | |
719 | << dendl; | |
720 | ||
224ce89b | 721 | err = msgr->bind(bind_addr); |
7c673cae | 722 | if (err < 0) { |
224ce89b | 723 | derr << "unable to bind monitor to " << bind_addr << dendl; |
7c673cae FG |
724 | prefork.exit(1); |
725 | } | |
726 | ||
224ce89b WB |
727 | // if the public and bind addr are different set the msgr addr |
728 | // to the public one, now that the bind is complete. | |
729 | if (public_addr != bind_addr) { | |
730 | msgr->set_addr(public_addr); | |
731 | } | |
732 | ||
7c673cae FG |
733 | Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type, |
734 | entity_name_t::MON(rank), "mon-mgrc", | |
735 | getpid(), 0); | |
736 | if (!mgr_msgr) { | |
737 | derr << "unable to create mgr_msgr" << dendl; | |
738 | prefork.exit(1); | |
739 | } | |
740 | ||
224ce89b | 741 | dout(0) << "starting " << g_conf->name << " rank " << rank |
7c673cae FG |
742 | << " at " << ipaddr |
743 | << " mon_data " << g_conf->mon_data | |
744 | << " fsid " << monmap.get_fsid() | |
224ce89b | 745 | << dendl; |
7c673cae FG |
746 | |
747 | // start monitor | |
748 | mon = new Monitor(g_ceph_context, g_conf->name.get_id(), store, | |
749 | msgr, mgr_msgr, &monmap); | |
750 | ||
751 | if (force_sync) { | |
752 | derr << "flagging a forced sync ..." << dendl; | |
31f18b77 FG |
753 | ostringstream oss; |
754 | mon->sync_force(NULL, oss); | |
755 | if (oss.tellp()) | |
756 | derr << oss.str() << dendl; | |
7c673cae FG |
757 | } |
758 | ||
759 | err = mon->preinit(); | |
760 | if (err < 0) { | |
761 | derr << "failed to initialize" << dendl; | |
762 | prefork.exit(1); | |
763 | } | |
764 | ||
765 | if (compact || g_conf->mon_compact_on_start) { | |
766 | derr << "compacting monitor store ..." << dendl; | |
767 | mon->store->compact(); | |
768 | derr << "done compacting" << dendl; | |
769 | } | |
770 | ||
771 | if (g_conf->daemonize) { | |
772 | global_init_postfork_finish(g_ceph_context); | |
773 | prefork.daemonize(); | |
774 | } | |
775 | ||
776 | msgr->start(); | |
777 | mgr_msgr->start(); | |
778 | ||
779 | mon->init(); | |
780 | ||
781 | // set up signal handlers, now that we've daemonized/forked. | |
782 | init_async_signal_handler(); | |
783 | register_async_signal_handler(SIGHUP, sighup_handler); | |
784 | register_async_signal_handler_oneshot(SIGINT, handle_mon_signal); | |
785 | register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal); | |
786 | ||
787 | if (g_conf->inject_early_sigterm) | |
788 | kill(getpid(), SIGTERM); | |
789 | ||
790 | msgr->wait(); | |
791 | mgr_msgr->wait(); | |
792 | ||
793 | store->close(); | |
794 | ||
795 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
796 | unregister_async_signal_handler(SIGINT, handle_mon_signal); | |
797 | unregister_async_signal_handler(SIGTERM, handle_mon_signal); | |
798 | shutdown_async_signal_handler(); | |
799 | ||
800 | delete mon; | |
801 | delete store; | |
802 | delete msgr; | |
803 | delete mgr_msgr; | |
804 | delete client_throttler; | |
805 | delete daemon_throttler; | |
806 | ||
807 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
808 | char s[20]; | |
809 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
810 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
811 | dout(0) << "ceph-mon: gmon.out should be in " << s << dendl; | |
812 | } | |
813 | ||
814 | prefork.signal_exit(0); | |
815 | return 0; | |
816 | } |