]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | ||
19 | #include <iostream> | |
20 | #include <string> | |
7c673cae FG |
21 | |
22 | #include "common/config.h" | |
23 | #include "include/ceph_features.h" | |
24 | ||
25 | #include "mon/MonMap.h" | |
26 | #include "mon/Monitor.h" | |
27 | #include "mon/MonitorDBStore.h" | |
28 | #include "mon/MonClient.h" | |
29 | ||
30 | #include "msg/Messenger.h" | |
31 | ||
32 | #include "include/CompatSet.h" | |
33 | ||
34 | #include "common/ceph_argparse.h" | |
35 | #include "common/pick_address.h" | |
11fdf7f2 | 36 | #include "common/Throttle.h" |
7c673cae FG |
37 | #include "common/Timer.h" |
38 | #include "common/errno.h" | |
39 | #include "common/Preforker.h" | |
40 | ||
41 | #include "global/global_init.h" | |
42 | #include "global/signal_handler.h" | |
43 | ||
44 | #include "perfglue/heap_profiler.h" | |
45 | ||
11fdf7f2 | 46 | #include "include/ceph_assert.h" |
7c673cae FG |
47 | |
48 | #define dout_subsys ceph_subsys_mon | |
49 | ||
50 | Monitor *mon = NULL; | |
51 | ||
52 | void handle_mon_signal(int signum) | |
53 | { | |
54 | if (mon) | |
55 | mon->handle_signal(signum); | |
56 | } | |
57 | ||
58 | ||
59 | int obtain_monmap(MonitorDBStore &store, bufferlist &bl) | |
60 | { | |
61 | dout(10) << __func__ << dendl; | |
62 | /* | |
63 | * the monmap may be in one of three places: | |
64 | * 'monmap:<latest_version_no>' - the monmap we'd really like to have | |
65 | * 'mon_sync:latest_monmap' - last monmap backed up for the last sync | |
66 | * 'mkfs:monmap' - a monmap resulting from mkfs | |
67 | */ | |
68 | ||
69 | if (store.exists("monmap", "last_committed")) { | |
70 | version_t latest_ver = store.get("monmap", "last_committed"); | |
71 | if (store.exists("monmap", latest_ver)) { | |
72 | int err = store.get("monmap", latest_ver, bl); | |
11fdf7f2 TL |
73 | ceph_assert(err == 0); |
74 | ceph_assert(bl.length() > 0); | |
7c673cae FG |
75 | dout(10) << __func__ << " read last committed monmap ver " |
76 | << latest_ver << dendl; | |
77 | return 0; | |
78 | } | |
79 | } | |
80 | ||
81 | if (store.exists("mon_sync", "in_sync") | |
82 | || store.exists("mon_sync", "force_sync")) { | |
83 | dout(10) << __func__ << " detected aborted sync" << dendl; | |
84 | if (store.exists("mon_sync", "latest_monmap")) { | |
85 | int err = store.get("mon_sync", "latest_monmap", bl); | |
11fdf7f2 TL |
86 | ceph_assert(err == 0); |
87 | ceph_assert(bl.length() > 0); | |
7c673cae FG |
88 | dout(10) << __func__ << " read backup monmap" << dendl; |
89 | return 0; | |
90 | } | |
91 | } | |
92 | ||
93 | if (store.exists("mkfs", "monmap")) { | |
94 | dout(10) << __func__ << " found mkfs monmap" << dendl; | |
95 | int err = store.get("mkfs", "monmap", bl); | |
11fdf7f2 TL |
96 | ceph_assert(err == 0); |
97 | ceph_assert(bl.length() > 0); | |
7c673cae FG |
98 | return 0; |
99 | } | |
100 | ||
101 | derr << __func__ << " unable to find a monmap" << dendl; | |
102 | return -ENOENT; | |
103 | } | |
104 | ||
105 | int check_mon_data_exists() | |
106 | { | |
11fdf7f2 | 107 | string mon_data = g_conf()->mon_data; |
7c673cae FG |
108 | struct stat buf; |
109 | if (::stat(mon_data.c_str(), &buf)) { | |
110 | if (errno != ENOENT) { | |
31f18b77 | 111 | derr << "stat(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
112 | } |
113 | return -errno; | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | /** Check whether **mon data** is empty. | |
119 | * | |
120 | * Being empty means mkfs has not been run and there's no monitor setup | |
11fdf7f2 | 121 | * at **g_conf()->mon_data**. |
7c673cae | 122 | * |
11fdf7f2 | 123 | * If the directory g_conf()->mon_data is not empty we will return -ENOTEMPTY. |
7c673cae FG |
124 | * Otherwise we will return 0. Any other negative returns will represent |
125 | * a failure to be handled by the caller. | |
126 | * | |
127 | * @return **0** on success, -ENOTEMPTY if not empty or **-errno** otherwise. | |
128 | */ | |
129 | int check_mon_data_empty() | |
130 | { | |
11fdf7f2 | 131 | string mon_data = g_conf()->mon_data; |
7c673cae FG |
132 | |
133 | DIR *dir = ::opendir(mon_data.c_str()); | |
134 | if (!dir) { | |
31f18b77 | 135 | derr << "opendir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
136 | return -errno; |
137 | } | |
138 | int code = 0; | |
139 | struct dirent *de = nullptr; | |
140 | errno = 0; | |
141 | while ((de = ::readdir(dir))) { | |
142 | if (string(".") != de->d_name && | |
143 | string("..") != de->d_name && | |
144 | string("kv_backend") != de->d_name) { | |
145 | code = -ENOTEMPTY; | |
146 | break; | |
147 | } | |
148 | } | |
149 | if (!de && errno) { | |
31f18b77 | 150 | derr << "readdir(" << mon_data << ") " << cpp_strerror(errno) << dendl; |
7c673cae FG |
151 | code = -errno; |
152 | } | |
153 | ||
154 | ::closedir(dir); | |
155 | ||
156 | return code; | |
157 | } | |
158 | ||
159 | static void usage() | |
160 | { | |
31f18b77 FG |
161 | cout << "usage: ceph-mon -i <ID> [flags]\n" |
162 | << " --debug_mon n\n" | |
163 | << " debug monitor level (e.g. 10)\n" | |
164 | << " --mkfs\n" | |
165 | << " build fresh monitor fs\n" | |
166 | << " --force-sync\n" | |
167 | << " force a sync from another mon by wiping local data (BE CAREFUL)\n" | |
168 | << " --yes-i-really-mean-it\n" | |
169 | << " mandatory safeguard for --force-sync\n" | |
170 | << " --compact\n" | |
171 | << " compact the monitor store\n" | |
172 | << " --osdmap <filename>\n" | |
173 | << " only used when --mkfs is provided: load the osdmap from <filename>\n" | |
174 | << " --inject-monmap <filename>\n" | |
175 | << " write the <filename> monmap to the local monitor store and exit\n" | |
176 | << " --extract-monmap <filename>\n" | |
177 | << " extract the monmap from the local monitor store and exit\n" | |
178 | << " --mon-data <directory>\n" | |
179 | << " where the mon store and keyring are located\n" | |
180 | << std::endl; | |
7c673cae FG |
181 | generic_server_usage(); |
182 | } | |
183 | ||
11fdf7f2 TL |
184 | entity_addrvec_t make_mon_addrs(entity_addr_t a) |
185 | { | |
186 | entity_addrvec_t addrs; | |
187 | if (a.get_port() == 0) { | |
188 | a.set_type(entity_addr_t::TYPE_MSGR2); | |
189 | a.set_port(CEPH_MON_PORT_IANA); | |
190 | addrs.v.push_back(a); | |
191 | a.set_type(entity_addr_t::TYPE_LEGACY); | |
192 | a.set_port(CEPH_MON_PORT_LEGACY); | |
193 | addrs.v.push_back(a); | |
194 | } else if (a.get_port() == CEPH_MON_PORT_LEGACY) { | |
195 | a.set_type(entity_addr_t::TYPE_LEGACY); | |
196 | addrs.v.push_back(a); | |
92f5a8d4 | 197 | } else if (a.get_type() == entity_addr_t::TYPE_ANY) { |
11fdf7f2 TL |
198 | a.set_type(entity_addr_t::TYPE_MSGR2); |
199 | addrs.v.push_back(a); | |
92f5a8d4 TL |
200 | } else { |
201 | addrs.v.push_back(a); | |
11fdf7f2 TL |
202 | } |
203 | return addrs; | |
204 | } | |
205 | ||
7c673cae | 206 | int main(int argc, const char **argv) |
7c673cae | 207 | { |
11fdf7f2 TL |
208 | // reset our process name, in case we did a respawn, so that it's not |
209 | // left as "exe". | |
210 | ceph_pthread_setname(pthread_self(), "ceph-mon"); | |
211 | ||
7c673cae FG |
212 | int err; |
213 | ||
214 | bool mkfs = false; | |
215 | bool compact = false; | |
216 | bool force_sync = false; | |
217 | bool yes_really = false; | |
218 | std::string osdmapfn, inject_monmap, extract_monmap; | |
219 | ||
220 | vector<const char*> args; | |
221 | argv_to_vec(argc, argv, args); | |
11fdf7f2 TL |
222 | if (args.empty()) { |
223 | cerr << argv[0] << ": -h or --help for usage" << std::endl; | |
224 | exit(1); | |
225 | } | |
226 | if (ceph_argparse_need_usage(args)) { | |
227 | usage(); | |
228 | exit(0); | |
229 | } | |
7c673cae FG |
230 | |
231 | // We need to specify some default values that may be overridden by the | |
232 | // user, that are specific to the monitor. The options we are overriding | |
233 | // are also used on the OSD (or in any other component that uses leveldb), | |
c07f9fc5 | 234 | // so changing the global defaults is not an option. |
7c673cae | 235 | // This is not the prettiest way of doing this, especially since it has us |
c07f9fc5 FG |
236 | // having a different place defining default values, but it's not horribly |
237 | // wrong enough to prevent us from doing it :) | |
7c673cae FG |
238 | // |
239 | // NOTE: user-defined options will take precedence over ours. | |
240 | // | |
241 | // leveldb_write_buffer_size = 32*1024*1024 = 33554432 // 32MB | |
242 | // leveldb_cache_size = 512*1024*1204 = 536870912 // 512MB | |
243 | // leveldb_block_size = 64*1024 = 65536 // 64KB | |
244 | // leveldb_compression = false | |
245 | // leveldb_log = "" | |
11fdf7f2 TL |
246 | map<string,string> defaults = { |
247 | { "leveldb_write_buffer_size", "33554432" }, | |
248 | { "leveldb_cache_size", "536870912" }, | |
249 | { "leveldb_block_size", "65536" }, | |
250 | { "leveldb_compression", "false"}, | |
251 | { "leveldb_log", "" }, | |
252 | { "keyring", "$mon_data/keyring" }, | |
253 | }; | |
7c673cae FG |
254 | |
255 | int flags = 0; | |
256 | { | |
257 | vector<const char*> args_copy = args; | |
258 | std::string val; | |
259 | for (std::vector<const char*>::iterator i = args_copy.begin(); | |
260 | i != args_copy.end(); ) { | |
261 | if (ceph_argparse_double_dash(args_copy, i)) { | |
262 | break; | |
263 | } else if (ceph_argparse_flag(args_copy, i, "--mkfs", (char*)NULL)) { | |
264 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
265 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--inject_monmap", (char*)NULL)) { | |
266 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
267 | } else if (ceph_argparse_witharg(args_copy, i, &val, "--extract-monmap", (char*)NULL)) { | |
268 | flags |= CINIT_FLAG_NO_DAEMON_ACTIONS; | |
269 | } else { | |
270 | ++i; | |
271 | } | |
272 | } | |
273 | } | |
274 | ||
11fdf7f2 TL |
275 | // don't try to get config from mon cluster during startup |
276 | flags |= CINIT_FLAG_NO_MON_CONFIG; | |
277 | ||
278 | auto cct = global_init(&defaults, args, | |
7c673cae FG |
279 | CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, |
280 | flags, "mon_data"); | |
281 | ceph_heap_profiler_init(); | |
282 | ||
7c673cae FG |
283 | std::string val; |
284 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
285 | if (ceph_argparse_double_dash(args, i)) { | |
286 | break; | |
7c673cae FG |
287 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { |
288 | mkfs = true; | |
289 | } else if (ceph_argparse_flag(args, i, "--compact", (char*)NULL)) { | |
290 | compact = true; | |
291 | } else if (ceph_argparse_flag(args, i, "--force-sync", (char*)NULL)) { | |
292 | force_sync = true; | |
293 | } else if (ceph_argparse_flag(args, i, "--yes-i-really-mean-it", (char*)NULL)) { | |
294 | yes_really = true; | |
295 | } else if (ceph_argparse_witharg(args, i, &val, "--osdmap", (char*)NULL)) { | |
296 | osdmapfn = val; | |
297 | } else if (ceph_argparse_witharg(args, i, &val, "--inject_monmap", (char*)NULL)) { | |
298 | inject_monmap = val; | |
299 | } else if (ceph_argparse_witharg(args, i, &val, "--extract-monmap", (char*)NULL)) { | |
300 | extract_monmap = val; | |
301 | } else { | |
302 | ++i; | |
303 | } | |
304 | } | |
305 | if (!args.empty()) { | |
11fdf7f2 TL |
306 | cerr << "too many arguments: " << args << std::endl; |
307 | exit(1); | |
7c673cae FG |
308 | } |
309 | ||
310 | if (force_sync && !yes_really) { | |
11fdf7f2 TL |
311 | cerr << "are you SURE you want to force a sync? this will erase local data and may\n" |
312 | << "break your mon cluster. pass --yes-i-really-mean-it if you do." << std::endl; | |
7c673cae FG |
313 | exit(1); |
314 | } | |
315 | ||
11fdf7f2 TL |
316 | if (g_conf()->mon_data.empty()) { |
317 | cerr << "must specify '--mon-data=foo' data path" << std::endl; | |
318 | exit(1); | |
7c673cae FG |
319 | } |
320 | ||
11fdf7f2 TL |
321 | if (g_conf()->name.get_id().empty()) { |
322 | cerr << "must specify id (--id <id> or --name mon.<id>)" << std::endl; | |
323 | exit(1); | |
7c673cae FG |
324 | } |
325 | ||
326 | // -- mkfs -- | |
327 | if (mkfs) { | |
328 | ||
329 | int err = check_mon_data_exists(); | |
330 | if (err == -ENOENT) { | |
11fdf7f2 TL |
331 | if (::mkdir(g_conf()->mon_data.c_str(), 0755)) { |
332 | derr << "mkdir(" << g_conf()->mon_data << ") : " | |
31f18b77 | 333 | << cpp_strerror(errno) << dendl; |
7c673cae FG |
334 | exit(1); |
335 | } | |
336 | } else if (err < 0) { | |
11fdf7f2 | 337 | derr << "error opening '" << g_conf()->mon_data << "': " |
31f18b77 | 338 | << cpp_strerror(-err) << dendl; |
7c673cae FG |
339 | exit(-err); |
340 | } | |
341 | ||
342 | err = check_mon_data_empty(); | |
343 | if (err == -ENOTEMPTY) { | |
344 | // Mon may exist. Let the user know and exit gracefully. | |
11fdf7f2 | 345 | derr << "'" << g_conf()->mon_data << "' already exists and is not empty" |
31f18b77 | 346 | << ": monitor may already exist" << dendl; |
7c673cae FG |
347 | exit(0); |
348 | } else if (err < 0) { | |
11fdf7f2 | 349 | derr << "error checking if '" << g_conf()->mon_data << "' is empty: " |
31f18b77 | 350 | << cpp_strerror(-err) << dendl; |
7c673cae FG |
351 | exit(-err); |
352 | } | |
353 | ||
354 | // resolve public_network -> public_addr | |
355 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
356 | ||
11fdf7f2 TL |
357 | dout(10) << "public_network " << g_conf()->public_network << dendl; |
358 | dout(10) << "public_addr " << g_conf()->public_network << dendl; | |
359 | ||
7c673cae FG |
360 | common_init_finish(g_ceph_context); |
361 | ||
362 | bufferlist monmapbl, osdmapbl; | |
363 | std::string error; | |
364 | MonMap monmap; | |
365 | ||
366 | // load or generate monmap | |
11fdf7f2 | 367 | const auto monmap_fn = g_conf().get_val<string>("monmap"); |
3efd9988 FG |
368 | if (monmap_fn.length()) { |
369 | int err = monmapbl.read_file(monmap_fn.c_str(), &error); | |
7c673cae | 370 | if (err < 0) { |
3efd9988 | 371 | derr << argv[0] << ": error reading " << monmap_fn << ": " << error << dendl; |
7c673cae FG |
372 | exit(1); |
373 | } | |
374 | try { | |
375 | monmap.decode(monmapbl); | |
376 | ||
377 | // always mark seed/mkfs monmap as epoch 0 | |
378 | monmap.set_epoch(0); | |
3efd9988 FG |
379 | } catch (const buffer::error& e) { |
380 | derr << argv[0] << ": error decoding monmap " << monmap_fn << ": " << e.what() << dendl; | |
7c673cae | 381 | exit(1); |
11fdf7f2 TL |
382 | } |
383 | ||
384 | dout(1) << "imported monmap:\n"; | |
385 | monmap.print(*_dout); | |
386 | *_dout << dendl; | |
387 | ||
7c673cae | 388 | } else { |
31f18b77 | 389 | ostringstream oss; |
11fdf7f2 | 390 | int err = monmap.build_initial(g_ceph_context, true, oss); |
31f18b77 FG |
391 | if (oss.tellp()) |
392 | derr << oss.str() << dendl; | |
7c673cae | 393 | if (err < 0) { |
31f18b77 | 394 | derr << argv[0] << ": warning: no initial monitors; must use admin socket to feed hints" << dendl; |
7c673cae FG |
395 | } |
396 | ||
11fdf7f2 TL |
397 | dout(1) << "initial generated monmap:\n"; |
398 | monmap.print(*_dout); | |
399 | *_dout << dendl; | |
400 | ||
7c673cae | 401 | // am i part of the initial quorum? |
11fdf7f2 | 402 | if (monmap.contains(g_conf()->name.get_id())) { |
7c673cae FG |
403 | // hmm, make sure the ip listed exists on the current host? |
404 | // maybe later. | |
11fdf7f2 TL |
405 | } else if (!g_conf()->public_addr.is_blank_ip()) { |
406 | entity_addrvec_t av = make_mon_addrs(g_conf()->public_addr); | |
407 | string name; | |
408 | if (monmap.contains(av, &name)) { | |
409 | monmap.rename(name, g_conf()->name.get_id()); | |
410 | dout(0) << argv[0] << ": renaming mon." << name << " " << av | |
411 | << " to mon." << g_conf()->name.get_id() << dendl; | |
7c673cae FG |
412 | } |
413 | } else { | |
414 | // is a local address listed without a name? if so, name myself. | |
415 | list<entity_addr_t> ls; | |
416 | monmap.list_addrs(ls); | |
11fdf7f2 TL |
417 | dout(0) << " monmap addrs are " << ls << ", checking if any are local" |
418 | << dendl; | |
7c673cae | 419 | |
11fdf7f2 | 420 | entity_addr_t local; |
7c673cae | 421 | if (have_local_addr(g_ceph_context, ls, &local)) { |
11fdf7f2 | 422 | dout(0) << " have local addr " << local << dendl; |
7c673cae | 423 | string name; |
11fdf7f2 TL |
424 | local.set_type(entity_addr_t::TYPE_MSGR2); |
425 | if (!monmap.get_addr_name(local, name)) { | |
426 | local.set_type(entity_addr_t::TYPE_LEGACY); | |
427 | if (!monmap.get_addr_name(local, name)) { | |
428 | dout(0) << "no local addresses appear in bootstrap monmap" | |
429 | << dendl; | |
430 | } | |
431 | } | |
7c673cae | 432 | if (name.compare(0, 7, "noname-") == 0) { |
224ce89b | 433 | dout(0) << argv[0] << ": mon." << name << " " << local |
11fdf7f2 TL |
434 | << " is local, renaming to mon." << g_conf()->name.get_id() |
435 | << dendl; | |
436 | monmap.rename(name, g_conf()->name.get_id()); | |
437 | } else if (name.size()) { | |
224ce89b | 438 | dout(0) << argv[0] << ": mon." << name << " " << local |
11fdf7f2 TL |
439 | << " is local, but not 'noname-' + something; " |
440 | << "not assuming it's me" << dendl; | |
7c673cae | 441 | } |
11fdf7f2 TL |
442 | } else { |
443 | dout(0) << " no local addrs match monmap" << dendl; | |
7c673cae FG |
444 | } |
445 | } | |
446 | } | |
447 | ||
11fdf7f2 | 448 | const auto fsid = g_conf().get_val<uuid_d>("fsid"); |
3efd9988 FG |
449 | if (!fsid.is_zero()) { |
450 | monmap.fsid = fsid; | |
451 | dout(0) << argv[0] << ": set fsid to " << fsid << dendl; | |
7c673cae FG |
452 | } |
453 | ||
454 | if (monmap.fsid.is_zero()) { | |
31f18b77 | 455 | derr << argv[0] << ": generated monmap has no fsid; use '--fsid <uuid>'" << dendl; |
7c673cae FG |
456 | exit(10); |
457 | } | |
458 | ||
459 | //monmap.print(cout); | |
460 | ||
461 | // osdmap | |
462 | if (osdmapfn.length()) { | |
463 | err = osdmapbl.read_file(osdmapfn.c_str(), &error); | |
464 | if (err < 0) { | |
31f18b77 FG |
465 | derr << argv[0] << ": error reading " << osdmapfn << ": " |
466 | << error << dendl; | |
7c673cae FG |
467 | exit(1); |
468 | } | |
469 | } | |
470 | ||
471 | // go | |
11fdf7f2 | 472 | MonitorDBStore store(g_conf()->mon_data); |
31f18b77 FG |
473 | ostringstream oss; |
474 | int r = store.create_and_open(oss); | |
475 | if (oss.tellp()) | |
476 | derr << oss.str() << dendl; | |
7c673cae | 477 | if (r < 0) { |
31f18b77 | 478 | derr << argv[0] << ": error opening mon data directory at '" |
11fdf7f2 | 479 | << g_conf()->mon_data << "': " << cpp_strerror(r) << dendl; |
7c673cae FG |
480 | exit(1); |
481 | } | |
11fdf7f2 | 482 | ceph_assert(r == 0); |
7c673cae | 483 | |
11fdf7f2 | 484 | Monitor mon(g_ceph_context, g_conf()->name.get_id(), &store, 0, 0, &monmap); |
7c673cae FG |
485 | r = mon.mkfs(osdmapbl); |
486 | if (r < 0) { | |
31f18b77 | 487 | derr << argv[0] << ": error creating monfs: " << cpp_strerror(r) << dendl; |
7c673cae FG |
488 | exit(1); |
489 | } | |
490 | store.close(); | |
11fdf7f2 TL |
491 | dout(0) << argv[0] << ": created monfs at " << g_conf()->mon_data |
492 | << " for " << g_conf()->name << dendl; | |
7c673cae FG |
493 | return 0; |
494 | } | |
495 | ||
496 | err = check_mon_data_exists(); | |
497 | if (err < 0 && err == -ENOENT) { | |
11fdf7f2 | 498 | derr << "monitor data directory at '" << g_conf()->mon_data << "'" |
31f18b77 | 499 | << " does not exist: have you run 'mkfs'?" << dendl; |
7c673cae FG |
500 | exit(1); |
501 | } else if (err < 0) { | |
31f18b77 | 502 | derr << "error accessing monitor data directory at '" |
11fdf7f2 | 503 | << g_conf()->mon_data << "': " << cpp_strerror(-err) << dendl; |
7c673cae FG |
504 | exit(1); |
505 | } | |
506 | ||
507 | err = check_mon_data_empty(); | |
508 | if (err == 0) { | |
11fdf7f2 | 509 | derr << "monitor data directory at '" << g_conf()->mon_data |
7c673cae FG |
510 | << "' is empty: have you run 'mkfs'?" << dendl; |
511 | exit(1); | |
512 | } else if (err < 0 && err != -ENOTEMPTY) { | |
513 | // we don't want an empty data dir by now | |
11fdf7f2 | 514 | derr << "error accessing '" << g_conf()->mon_data << "': " |
31f18b77 | 515 | << cpp_strerror(-err) << dendl; |
7c673cae FG |
516 | exit(1); |
517 | } | |
518 | ||
519 | { | |
520 | // check fs stats. don't start if it's critically close to full. | |
521 | ceph_data_stats_t stats; | |
11fdf7f2 | 522 | int err = get_fs_stats(stats, g_conf()->mon_data.c_str()); |
7c673cae | 523 | if (err < 0) { |
31f18b77 FG |
524 | derr << "error checking monitor data's fs stats: " << cpp_strerror(err) |
525 | << dendl; | |
7c673cae FG |
526 | exit(-err); |
527 | } | |
11fdf7f2 | 528 | if (stats.avail_percent <= g_conf()->mon_data_avail_crit) { |
31f18b77 | 529 | derr << "error: monitor data filesystem reached concerning levels of" |
7c673cae | 530 | << " available storage space (available: " |
1adf2230 | 531 | << stats.avail_percent << "% " << byte_u_t(stats.byte_avail) |
7c673cae | 532 | << ")\nyou may adjust 'mon data avail crit' to a lower value" |
11fdf7f2 | 533 | << " to make this go away (default: " << g_conf()->mon_data_avail_crit |
31f18b77 | 534 | << "%)\n" << dendl; |
7c673cae FG |
535 | exit(ENOSPC); |
536 | } | |
537 | } | |
538 | ||
539 | // we fork early to prevent leveldb's environment static state from | |
540 | // screwing us over | |
541 | Preforker prefork; | |
542 | if (!(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) { | |
543 | if (global_init_prefork(g_ceph_context) >= 0) { | |
544 | string err_msg; | |
545 | err = prefork.prefork(err_msg); | |
546 | if (err < 0) { | |
31f18b77 | 547 | derr << err_msg << dendl; |
7c673cae FG |
548 | prefork.exit(err); |
549 | } | |
550 | if (prefork.is_parent()) { | |
551 | err = prefork.parent_wait(err_msg); | |
552 | if (err < 0) | |
31f18b77 | 553 | derr << err_msg << dendl; |
7c673cae FG |
554 | prefork.exit(err); |
555 | } | |
224ce89b | 556 | setsid(); |
7c673cae FG |
557 | global_init_postfork_start(g_ceph_context); |
558 | } | |
559 | common_init_finish(g_ceph_context); | |
560 | global_init_chdir(g_ceph_context); | |
7c673cae FG |
561 | if (global_init_preload_erasure_code(g_ceph_context) < 0) |
562 | prefork.exit(1); | |
7c673cae FG |
563 | } |
564 | ||
11fdf7f2 TL |
565 | // set up signal handlers, now that we've daemonized/forked. |
566 | init_async_signal_handler(); | |
567 | register_async_signal_handler(SIGHUP, sighup_handler); | |
568 | ||
569 | MonitorDBStore *store = new MonitorDBStore(g_conf()->mon_data); | |
570 | ||
571 | // make sure we aren't upgrading too fast | |
572 | { | |
573 | string val; | |
574 | int r = store->read_meta("min_mon_release", &val); | |
575 | if (r >= 0 && val.size()) { | |
576 | int min = atoi(val.c_str()); | |
577 | if (min && | |
578 | min + 2 < (int)ceph_release()) { | |
579 | derr << "recorded min_mon_release is " << min | |
580 | << " (" << ceph_release_name(min) | |
581 | << ") which is >2 releases older than installed " | |
582 | << ceph_release() << " (" << ceph_release_name(ceph_release()) | |
583 | << "); you can only upgrade 2 releases at a time" << dendl; | |
584 | derr << "you should first upgrade to " | |
585 | << (min + 1) << " (" << ceph_release_name(min + 1) << ") or " | |
586 | << (min + 2) << " (" << ceph_release_name(min + 2) << ")" << dendl; | |
587 | prefork.exit(1); | |
588 | } | |
589 | } | |
590 | } | |
591 | ||
31f18b77 FG |
592 | { |
593 | ostringstream oss; | |
594 | err = store->open(oss); | |
595 | if (oss.tellp()) | |
596 | derr << oss.str() << dendl; | |
597 | if (err < 0) { | |
598 | derr << "error opening mon data directory at '" | |
11fdf7f2 | 599 | << g_conf()->mon_data << "': " << cpp_strerror(err) << dendl; |
31f18b77 FG |
600 | prefork.exit(1); |
601 | } | |
7c673cae FG |
602 | } |
603 | ||
604 | bufferlist magicbl; | |
605 | err = store->get(Monitor::MONITOR_NAME, "magic", magicbl); | |
606 | if (err || !magicbl.length()) { | |
607 | derr << "unable to read magic from mon data" << dendl; | |
608 | prefork.exit(1); | |
609 | } | |
610 | string magic(magicbl.c_str(), magicbl.length()-1); // ignore trailing \n | |
611 | if (strcmp(magic.c_str(), CEPH_MON_ONDISK_MAGIC)) { | |
612 | derr << "mon fs magic '" << magic << "' != current '" << CEPH_MON_ONDISK_MAGIC << "'" << dendl; | |
613 | prefork.exit(1); | |
614 | } | |
615 | ||
616 | err = Monitor::check_features(store); | |
617 | if (err < 0) { | |
618 | derr << "error checking features: " << cpp_strerror(err) << dendl; | |
619 | prefork.exit(1); | |
620 | } | |
621 | ||
622 | // inject new monmap? | |
623 | if (!inject_monmap.empty()) { | |
624 | bufferlist bl; | |
625 | std::string error; | |
626 | int r = bl.read_file(inject_monmap.c_str(), &error); | |
627 | if (r) { | |
628 | derr << "unable to read monmap from " << inject_monmap << ": " | |
629 | << error << dendl; | |
630 | prefork.exit(1); | |
631 | } | |
632 | ||
633 | // get next version | |
634 | version_t v = store->get("monmap", "last_committed"); | |
635 | dout(0) << "last committed monmap epoch is " << v << ", injected map will be " << (v+1) | |
636 | << dendl; | |
637 | v++; | |
638 | ||
639 | // set the version | |
640 | MonMap tmp; | |
641 | tmp.decode(bl); | |
642 | if (tmp.get_epoch() != v) { | |
643 | dout(0) << "changing monmap epoch from " << tmp.get_epoch() | |
644 | << " to " << v << dendl; | |
645 | tmp.set_epoch(v); | |
646 | } | |
647 | bufferlist mapbl; | |
648 | tmp.encode(mapbl, CEPH_FEATURES_ALL); | |
649 | bufferlist final; | |
11fdf7f2 TL |
650 | encode(v, final); |
651 | encode(mapbl, final); | |
7c673cae FG |
652 | |
653 | auto t(std::make_shared<MonitorDBStore::Transaction>()); | |
654 | // save it | |
655 | t->put("monmap", v, mapbl); | |
656 | t->put("monmap", "latest", final); | |
657 | t->put("monmap", "last_committed", v); | |
658 | store->apply_transaction(t); | |
659 | ||
660 | dout(0) << "done." << dendl; | |
661 | prefork.exit(0); | |
662 | } | |
663 | ||
664 | // monmap? | |
665 | MonMap monmap; | |
666 | { | |
667 | // note that even if we don't find a viable monmap, we should go ahead | |
668 | // and try to build it up in the next if-else block. | |
669 | bufferlist mapbl; | |
670 | int err = obtain_monmap(*store, mapbl); | |
671 | if (err >= 0) { | |
672 | try { | |
673 | monmap.decode(mapbl); | |
674 | } catch (const buffer::error& e) { | |
31f18b77 | 675 | derr << "can't decode monmap: " << e.what() << dendl; |
7c673cae FG |
676 | } |
677 | } else { | |
678 | derr << "unable to obtain a monmap: " << cpp_strerror(err) << dendl; | |
679 | } | |
11fdf7f2 TL |
680 | |
681 | dout(10) << __func__ << " monmap:\n"; | |
682 | JSONFormatter jf(true); | |
683 | jf.dump_object("monmap", monmap); | |
684 | jf.flush(*_dout); | |
685 | *_dout << dendl; | |
686 | ||
7c673cae FG |
687 | if (!extract_monmap.empty()) { |
688 | int r = mapbl.write_file(extract_monmap.c_str()); | |
689 | if (r < 0) { | |
690 | r = -errno; | |
691 | derr << "error writing monmap to " << extract_monmap << ": " << cpp_strerror(r) << dendl; | |
692 | prefork.exit(1); | |
693 | } | |
694 | derr << "wrote monmap to " << extract_monmap << dendl; | |
695 | prefork.exit(0); | |
696 | } | |
697 | } | |
698 | ||
699 | // this is what i will bind to | |
11fdf7f2 | 700 | entity_addrvec_t ipaddrs; |
7c673cae | 701 | |
11fdf7f2 TL |
702 | if (monmap.contains(g_conf()->name.get_id())) { |
703 | ipaddrs = monmap.get_addrs(g_conf()->name.get_id()); | |
7c673cae FG |
704 | |
705 | // print helpful warning if the conf file doesn't match | |
7c673cae | 706 | std::vector <std::string> my_sections; |
11fdf7f2 | 707 | g_conf().get_my_sections(my_sections); |
7c673cae | 708 | std::string mon_addr_str; |
11fdf7f2 | 709 | if (g_conf().get_val_from_conf_file(my_sections, "mon addr", |
7c673cae | 710 | mon_addr_str, true) == 0) { |
11fdf7f2 TL |
711 | entity_addr_t conf_addr; |
712 | if (conf_addr.parse(mon_addr_str.c_str())) { | |
713 | entity_addrvec_t conf_addrs = make_mon_addrs(conf_addr); | |
714 | if (ipaddrs != conf_addrs) { | |
715 | derr << "WARNING: 'mon addr' config option " << conf_addrs | |
716 | << " does not match monmap file" << std::endl | |
717 | << " continuing with monmap configuration" << dendl; | |
718 | } | |
719 | } else | |
720 | derr << "WARNING: invalid 'mon addr' config option" << std::endl | |
7c673cae | 721 | << " continuing with monmap configuration" << dendl; |
7c673cae FG |
722 | } |
723 | } else { | |
11fdf7f2 | 724 | dout(0) << g_conf()->name << " does not exist in monmap, will attempt to join an existing cluster" << dendl; |
7c673cae FG |
725 | |
726 | pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); | |
11fdf7f2 TL |
727 | if (!g_conf()->public_addr.is_blank_ip()) { |
728 | ipaddrs = make_mon_addrs(g_conf()->public_addr); | |
729 | dout(0) << "using public_addr " << g_conf()->public_addr << " -> " | |
730 | << ipaddrs << dendl; | |
7c673cae FG |
731 | } else { |
732 | MonMap tmpmap; | |
31f18b77 | 733 | ostringstream oss; |
11fdf7f2 | 734 | int err = tmpmap.build_initial(g_ceph_context, true, oss); |
31f18b77 FG |
735 | if (oss.tellp()) |
736 | derr << oss.str() << dendl; | |
7c673cae FG |
737 | if (err < 0) { |
738 | derr << argv[0] << ": error generating initial monmap: " | |
739 | << cpp_strerror(err) << dendl; | |
7c673cae FG |
740 | prefork.exit(1); |
741 | } | |
11fdf7f2 TL |
742 | if (tmpmap.contains(g_conf()->name.get_id())) { |
743 | ipaddrs = tmpmap.get_addrs(g_conf()->name.get_id()); | |
7c673cae | 744 | } else { |
11fdf7f2 TL |
745 | derr << "no public_addr or public_network specified, and " |
746 | << g_conf()->name << " not present in monmap or ceph.conf" << dendl; | |
7c673cae FG |
747 | prefork.exit(1); |
748 | } | |
749 | } | |
750 | } | |
751 | ||
752 | // bind | |
11fdf7f2 TL |
753 | int rank = monmap.get_rank(g_conf()->name.get_id()); |
754 | std::string public_msgr_type = g_conf()->ms_public_type.empty() ? g_conf().get_val<std::string>("ms_type") : g_conf()->ms_public_type; | |
7c673cae FG |
755 | Messenger *msgr = Messenger::create(g_ceph_context, public_msgr_type, |
756 | entity_name_t::MON(rank), "mon", | |
757 | 0, Messenger::HAS_MANY_CONNECTIONS); | |
758 | if (!msgr) | |
759 | exit(1); | |
760 | msgr->set_cluster_protocol(CEPH_MON_PROTOCOL); | |
761 | msgr->set_default_send_priority(CEPH_MSG_PRIO_HIGH); | |
762 | ||
763 | msgr->set_default_policy(Messenger::Policy::stateless_server(0)); | |
764 | msgr->set_policy(entity_name_t::TYPE_MON, | |
765 | Messenger::Policy::lossless_peer_reuse( | |
11fdf7f2 | 766 | CEPH_FEATURE_SERVER_LUMINOUS)); |
7c673cae FG |
767 | msgr->set_policy(entity_name_t::TYPE_OSD, |
768 | Messenger::Policy::stateless_server( | |
11fdf7f2 | 769 | CEPH_FEATURE_SERVER_LUMINOUS)); |
7c673cae FG |
770 | msgr->set_policy(entity_name_t::TYPE_CLIENT, |
771 | Messenger::Policy::stateless_server(0)); | |
772 | msgr->set_policy(entity_name_t::TYPE_MDS, | |
773 | Messenger::Policy::stateless_server(0)); | |
774 | ||
775 | // throttle client traffic | |
776 | Throttle *client_throttler = new Throttle(g_ceph_context, "mon_client_bytes", | |
11fdf7f2 | 777 | g_conf()->mon_client_bytes); |
7c673cae FG |
778 | msgr->set_policy_throttlers(entity_name_t::TYPE_CLIENT, |
779 | client_throttler, NULL); | |
780 | ||
781 | // throttle daemon traffic | |
782 | // NOTE: actual usage on the leader may multiply by the number of | |
783 | // monitors if they forward large update messages from daemons. | |
784 | Throttle *daemon_throttler = new Throttle(g_ceph_context, "mon_daemon_bytes", | |
11fdf7f2 | 785 | g_conf()->mon_daemon_bytes); |
7c673cae FG |
786 | msgr->set_policy_throttlers(entity_name_t::TYPE_OSD, daemon_throttler, |
787 | NULL); | |
788 | msgr->set_policy_throttlers(entity_name_t::TYPE_MDS, daemon_throttler, | |
789 | NULL); | |
790 | ||
11fdf7f2 TL |
791 | entity_addrvec_t bind_addrs = ipaddrs; |
792 | entity_addrvec_t public_addrs = ipaddrs; | |
224ce89b WB |
793 | |
794 | // check if the public_bind_addr option is set | |
11fdf7f2 TL |
795 | if (!g_conf()->public_bind_addr.is_blank_ip()) { |
796 | bind_addrs = make_mon_addrs(g_conf()->public_bind_addr); | |
7c673cae FG |
797 | } |
798 | ||
11fdf7f2 TL |
799 | dout(0) << "starting " << g_conf()->name << " rank " << rank |
800 | << " at public addrs " << public_addrs | |
801 | << " at bind addrs " << bind_addrs | |
802 | << " mon_data " << g_conf()->mon_data | |
803 | << " fsid " << monmap.get_fsid() | |
804 | << dendl; | |
224ce89b | 805 | |
7c673cae FG |
806 | Messenger *mgr_msgr = Messenger::create(g_ceph_context, public_msgr_type, |
807 | entity_name_t::MON(rank), "mon-mgrc", | |
808 | getpid(), 0); | |
809 | if (!mgr_msgr) { | |
810 | derr << "unable to create mgr_msgr" << dendl; | |
811 | prefork.exit(1); | |
812 | } | |
813 | ||
11fdf7f2 | 814 | mon = new Monitor(g_ceph_context, g_conf()->name.get_id(), store, |
7c673cae FG |
815 | msgr, mgr_msgr, &monmap); |
816 | ||
11fdf7f2 TL |
817 | mon->orig_argc = argc; |
818 | mon->orig_argv = argv; | |
819 | ||
7c673cae FG |
820 | if (force_sync) { |
821 | derr << "flagging a forced sync ..." << dendl; | |
31f18b77 FG |
822 | ostringstream oss; |
823 | mon->sync_force(NULL, oss); | |
824 | if (oss.tellp()) | |
825 | derr << oss.str() << dendl; | |
7c673cae FG |
826 | } |
827 | ||
828 | err = mon->preinit(); | |
829 | if (err < 0) { | |
830 | derr << "failed to initialize" << dendl; | |
831 | prefork.exit(1); | |
832 | } | |
833 | ||
11fdf7f2 | 834 | if (compact || g_conf()->mon_compact_on_start) { |
7c673cae FG |
835 | derr << "compacting monitor store ..." << dendl; |
836 | mon->store->compact(); | |
837 | derr << "done compacting" << dendl; | |
838 | } | |
839 | ||
11fdf7f2 TL |
840 | // bind |
841 | err = msgr->bindv(bind_addrs); | |
842 | if (err < 0) { | |
843 | derr << "unable to bind monitor to " << bind_addrs << dendl; | |
844 | prefork.exit(1); | |
845 | } | |
846 | ||
847 | // if the public and bind addr are different set the msgr addr | |
848 | // to the public one, now that the bind is complete. | |
849 | if (public_addrs != bind_addrs) { | |
850 | msgr->set_addrs(public_addrs); | |
851 | } | |
852 | ||
853 | if (g_conf()->daemonize) { | |
7c673cae FG |
854 | global_init_postfork_finish(g_ceph_context); |
855 | prefork.daemonize(); | |
856 | } | |
857 | ||
858 | msgr->start(); | |
859 | mgr_msgr->start(); | |
860 | ||
861 | mon->init(); | |
862 | ||
7c673cae FG |
863 | register_async_signal_handler_oneshot(SIGINT, handle_mon_signal); |
864 | register_async_signal_handler_oneshot(SIGTERM, handle_mon_signal); | |
865 | ||
11fdf7f2 | 866 | if (g_conf()->inject_early_sigterm) |
7c673cae FG |
867 | kill(getpid(), SIGTERM); |
868 | ||
869 | msgr->wait(); | |
870 | mgr_msgr->wait(); | |
871 | ||
872 | store->close(); | |
873 | ||
874 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
875 | unregister_async_signal_handler(SIGINT, handle_mon_signal); | |
876 | unregister_async_signal_handler(SIGTERM, handle_mon_signal); | |
877 | shutdown_async_signal_handler(); | |
878 | ||
879 | delete mon; | |
880 | delete store; | |
881 | delete msgr; | |
882 | delete mgr_msgr; | |
883 | delete client_throttler; | |
884 | delete daemon_throttler; | |
885 | ||
886 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
887 | char s[20]; | |
888 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
889 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
890 | dout(0) << "ceph-mon: gmon.out should be in " << s << dendl; | |
891 | } | |
892 | ||
893 | prefork.signal_exit(0); | |
894 | return 0; | |
895 | } |