]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | #include <boost/scoped_ptr.hpp> | |
19 | ||
20 | #include <iostream> | |
21 | #include <string> | |
7c673cae FG |
22 | |
23 | #include "osd/OSD.h" | |
24 | #include "os/ObjectStore.h" | |
25 | #include "mon/MonClient.h" | |
26 | #include "include/ceph_features.h" | |
27 | ||
28 | #include "common/config.h" | |
29 | ||
30 | #include "mon/MonMap.h" | |
31 | ||
32 | #include "msg/Messenger.h" | |
33 | ||
11fdf7f2 | 34 | #include "common/Throttle.h" |
7c673cae FG |
35 | #include "common/Timer.h" |
36 | #include "common/TracepointProvider.h" | |
37 | #include "common/ceph_argparse.h" | |
11fdf7f2 | 38 | #include "common/numa.h" |
7c673cae FG |
39 | |
40 | #include "global/global_init.h" | |
41 | #include "global/signal_handler.h" | |
42 | ||
43 | #include "include/color.h" | |
44 | #include "common/errno.h" | |
45 | #include "common/pick_address.h" | |
46 | ||
47 | #include "perfglue/heap_profiler.h" | |
48 | ||
11fdf7f2 TL |
49 | #include "include/ceph_assert.h" |
50 | ||
51 | #include "common/Preforker.h" | |
7c673cae FG |
52 | |
53 | #define dout_context g_ceph_context | |
54 | #define dout_subsys ceph_subsys_osd | |
55 | ||
56 | namespace { | |
57 | ||
58 | TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so", | |
59 | "osd_tracing"); | |
60 | TracepointProvider::Traits os_tracepoint_traits("libos_tp.so", | |
61 | "osd_objectstore_tracing"); | |
31f18b77 FG |
62 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
63 | TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so", | |
64 | "osd_function_tracing"); | |
65 | #endif | |
7c673cae FG |
66 | |
67 | } // anonymous namespace | |
68 | ||
11fdf7f2 | 69 | OSD *osd = nullptr; |
7c673cae FG |
70 | |
71 | void handle_osd_signal(int signum) | |
72 | { | |
73 | if (osd) | |
74 | osd->handle_signal(signum); | |
75 | } | |
76 | ||
77 | static void usage() | |
78 | { | |
31f18b77 | 79 | cout << "usage: ceph-osd -i <ID> [flags]\n" |
7c673cae FG |
80 | << " --osd-data PATH data directory\n" |
81 | << " --osd-journal PATH\n" | |
82 | << " journal file or block device\n" | |
83 | << " --mkfs create a [new] data directory\n" | |
31f18b77 | 84 | << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n" |
11fdf7f2 TL |
85 | << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n" |
86 | << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n" | |
87 | << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n" | |
7c673cae FG |
88 | << " --convert-filestore\n" |
89 | << " run any pending upgrade operations\n" | |
90 | << " --flush-journal flush all data out of journal\n" | |
11fdf7f2 | 91 | << " --dump-journal dump all data of journal\n" |
7c673cae FG |
92 | << " --mkjournal initialize a new journal\n" |
93 | << " --check-wants-journal\n" | |
94 | << " check whether a journal is desired\n" | |
95 | << " --check-allows-journal\n" | |
96 | << " check whether a journal is allowed\n" | |
97 | << " --check-needs-journal\n" | |
98 | << " check whether a journal is required\n" | |
99 | << " --debug_osd <N> set debug level (e.g. 10)\n" | |
100 | << " --get-device-fsid PATH\n" | |
101 | << " get OSD fsid for the given block device\n" | |
102 | << std::endl; | |
103 | generic_server_usage(); | |
104 | } | |
105 | ||
7c673cae | 106 | int main(int argc, const char **argv) |
7c673cae FG |
107 | { |
108 | vector<const char*> args; | |
109 | argv_to_vec(argc, argv, args); | |
11fdf7f2 TL |
110 | if (args.empty()) { |
111 | cerr << argv[0] << ": -h or --help for usage" << std::endl; | |
112 | exit(1); | |
113 | } | |
114 | if (ceph_argparse_need_usage(args)) { | |
115 | usage(); | |
116 | exit(0); | |
117 | } | |
7c673cae | 118 | |
11fdf7f2 TL |
119 | map<string,string> defaults = { |
120 | // We want to enable leveldb's log, while allowing users to override this | |
121 | // option, therefore we will pass it as a default argument to global_init(). | |
122 | { "leveldb_log", "" } | |
123 | }; | |
124 | auto cct = global_init( | |
125 | &defaults, | |
126 | args, CEPH_ENTITY_TYPE_OSD, | |
127 | CODE_ENVIRONMENT_DAEMON, | |
128 | 0, "osd_data"); | |
7c673cae FG |
129 | ceph_heap_profiler_init(); |
130 | ||
11fdf7f2 TL |
131 | Preforker forker; |
132 | ||
7c673cae FG |
133 | // osd specific args |
134 | bool mkfs = false; | |
135 | bool mkjournal = false; | |
136 | bool check_wants_journal = false; | |
137 | bool check_allows_journal = false; | |
138 | bool check_needs_journal = false; | |
139 | bool mkkey = false; | |
140 | bool flushjournal = false; | |
141 | bool dump_journal = false; | |
142 | bool convertfilestore = false; | |
143 | bool get_osd_fsid = false; | |
144 | bool get_cluster_fsid = false; | |
145 | bool get_journal_fsid = false; | |
146 | bool get_device_fsid = false; | |
147 | string device_path; | |
148 | std::string dump_pg_log; | |
149 | ||
150 | std::string val; | |
151 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
152 | if (ceph_argparse_double_dash(args, i)) { | |
153 | break; | |
7c673cae FG |
154 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { |
155 | mkfs = true; | |
156 | } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) { | |
157 | mkjournal = true; | |
158 | } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) { | |
159 | check_allows_journal = true; | |
160 | } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) { | |
161 | check_wants_journal = true; | |
162 | } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) { | |
163 | check_needs_journal = true; | |
164 | } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) { | |
165 | mkkey = true; | |
166 | } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) { | |
167 | flushjournal = true; | |
168 | } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) { | |
169 | convertfilestore = true; | |
170 | } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) { | |
171 | dump_pg_log = val; | |
172 | } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) { | |
173 | dump_journal = true; | |
174 | } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) { | |
175 | get_cluster_fsid = true; | |
176 | } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) { | |
177 | get_osd_fsid = true; | |
178 | } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) { | |
179 | get_journal_fsid = true; | |
180 | } else if (ceph_argparse_witharg(args, i, &device_path, | |
181 | "--get-device-fsid", (char*)NULL)) { | |
182 | get_device_fsid = true; | |
183 | } else { | |
184 | ++i; | |
185 | } | |
186 | } | |
187 | if (!args.empty()) { | |
11fdf7f2 TL |
188 | cerr << "unrecognized arg " << args[0] << std::endl; |
189 | exit(1); | |
7c673cae FG |
190 | } |
191 | ||
11fdf7f2 TL |
192 | if (global_init_prefork(g_ceph_context) >= 0) { |
193 | std::string err; | |
194 | int r = forker.prefork(err); | |
195 | if (r < 0) { | |
196 | cerr << err << std::endl; | |
197 | return r; | |
198 | } | |
199 | if (forker.is_parent()) { | |
200 | g_ceph_context->_log->start(); | |
201 | if (forker.parent_wait(err) != 0) { | |
202 | return -ENXIO; | |
203 | } | |
204 | return 0; | |
205 | } | |
206 | setsid(); | |
207 | global_init_postfork_start(g_ceph_context); | |
208 | } | |
209 | common_init_finish(g_ceph_context); | |
210 | global_init_chdir(g_ceph_context); | |
211 | ||
7c673cae | 212 | if (get_journal_fsid) { |
11fdf7f2 | 213 | device_path = g_conf().get_val<std::string>("osd_journal"); |
7c673cae FG |
214 | get_device_fsid = true; |
215 | } | |
216 | if (get_device_fsid) { | |
217 | uuid_d uuid; | |
218 | int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path, | |
219 | &uuid); | |
220 | if (r < 0) { | |
221 | cerr << "failed to get device fsid for " << device_path | |
222 | << ": " << cpp_strerror(r) << std::endl; | |
11fdf7f2 | 223 | forker.exit(1); |
7c673cae FG |
224 | } |
225 | cout << uuid << std::endl; | |
11fdf7f2 | 226 | forker.exit(0); |
7c673cae FG |
227 | } |
228 | ||
229 | if (!dump_pg_log.empty()) { | |
230 | common_init_finish(g_ceph_context); | |
231 | bufferlist bl; | |
232 | std::string error; | |
11fdf7f2 TL |
233 | |
234 | if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) { | |
7c673cae | 235 | pg_log_entry_t e; |
11fdf7f2 | 236 | auto p = bl.cbegin(); |
7c673cae FG |
237 | while (!p.end()) { |
238 | uint64_t pos = p.get_off(); | |
239 | try { | |
11fdf7f2 | 240 | decode(e, p); |
7c673cae FG |
241 | } |
242 | catch (const buffer::error &e) { | |
243 | derr << "failed to decode LogEntry at offset " << pos << dendl; | |
11fdf7f2 | 244 | forker.exit(1); |
7c673cae FG |
245 | } |
246 | derr << pos << ":\t" << e << dendl; | |
247 | } | |
248 | } else { | |
249 | derr << "unable to open " << dump_pg_log << ": " << error << dendl; | |
250 | } | |
11fdf7f2 | 251 | forker.exit(0); |
7c673cae FG |
252 | } |
253 | ||
254 | // whoami | |
255 | char *end; | |
11fdf7f2 | 256 | const char *id = g_conf()->name.get_id().c_str(); |
7c673cae | 257 | int whoami = strtol(id, &end, 10); |
11fdf7f2 | 258 | std::string data_path = g_conf().get_val<std::string>("osd_data"); |
7c673cae FG |
259 | if (*end || end == id || whoami < 0) { |
260 | derr << "must specify '-i #' where # is the osd number" << dendl; | |
11fdf7f2 | 261 | forker.exit(1); |
7c673cae FG |
262 | } |
263 | ||
11fdf7f2 | 264 | if (data_path.empty()) { |
7c673cae | 265 | derr << "must specify '--osd-data=foo' data path" << dendl; |
11fdf7f2 | 266 | forker.exit(1); |
7c673cae FG |
267 | } |
268 | ||
269 | // the store | |
11fdf7f2 | 270 | std::string store_type; |
7c673cae FG |
271 | { |
272 | char fn[PATH_MAX]; | |
11fdf7f2 | 273 | snprintf(fn, sizeof(fn), "%s/type", data_path.c_str()); |
91327a77 | 274 | int fd = ::open(fn, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
275 | if (fd >= 0) { |
276 | bufferlist bl; | |
277 | bl.read_fd(fd, 64); | |
278 | if (bl.length()) { | |
279 | store_type = string(bl.c_str(), bl.length() - 1); // drop \n | |
280 | dout(5) << "object store type is " << store_type << dendl; | |
281 | } | |
282 | ::close(fd); | |
11fdf7f2 TL |
283 | } else if (mkfs) { |
284 | store_type = g_conf().get_val<std::string>("osd_objectstore"); | |
285 | } else { | |
286 | // hrm, infer the type | |
287 | snprintf(fn, sizeof(fn), "%s/current", data_path.c_str()); | |
288 | struct stat st; | |
289 | if (::stat(fn, &st) == 0 && | |
290 | S_ISDIR(st.st_mode)) { | |
291 | derr << "missing 'type' file, inferring filestore from current/ dir" | |
292 | << dendl; | |
293 | store_type = "filestore"; | |
294 | } else { | |
295 | snprintf(fn, sizeof(fn), "%s/block", data_path.c_str()); | |
296 | if (::stat(fn, &st) == 0 && | |
297 | S_ISLNK(st.st_mode)) { | |
298 | derr << "missing 'type' file, inferring bluestore from block symlink" | |
299 | << dendl; | |
300 | store_type = "bluestore"; | |
301 | } else { | |
302 | derr << "missing 'type' file and unable to infer osd type" << dendl; | |
303 | forker.exit(1); | |
304 | } | |
305 | } | |
7c673cae FG |
306 | } |
307 | } | |
11fdf7f2 TL |
308 | |
309 | std::string journal_path = g_conf().get_val<std::string>("osd_journal"); | |
310 | uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags"); | |
7c673cae FG |
311 | ObjectStore *store = ObjectStore::create(g_ceph_context, |
312 | store_type, | |
11fdf7f2 TL |
313 | data_path, |
314 | journal_path, | |
315 | flags); | |
7c673cae FG |
316 | if (!store) { |
317 | derr << "unable to create object store" << dendl; | |
11fdf7f2 | 318 | forker.exit(-ENODEV); |
7c673cae FG |
319 | } |
320 | ||
7c673cae | 321 | |
7c673cae FG |
322 | if (mkkey) { |
323 | common_init_finish(g_ceph_context); | |
324 | KeyRing *keyring = KeyRing::create_empty(); | |
325 | if (!keyring) { | |
326 | derr << "Unable to get a Ceph keyring." << dendl; | |
11fdf7f2 | 327 | forker.exit(1); |
7c673cae FG |
328 | } |
329 | ||
11fdf7f2 | 330 | EntityName ename{g_conf()->name}; |
7c673cae FG |
331 | EntityAuth eauth; |
332 | ||
11fdf7f2 TL |
333 | std::string keyring_path = g_conf().get_val<std::string>("keyring"); |
334 | int ret = keyring->load(g_ceph_context, keyring_path); | |
7c673cae FG |
335 | if (ret == 0 && |
336 | keyring->get_auth(ename, eauth)) { | |
11fdf7f2 | 337 | derr << "already have key in keyring " << keyring_path << dendl; |
7c673cae FG |
338 | } else { |
339 | eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES); | |
340 | keyring->add(ename, eauth); | |
341 | bufferlist bl; | |
342 | keyring->encode_plaintext(bl); | |
11fdf7f2 | 343 | int r = bl.write_file(keyring_path.c_str(), 0600); |
7c673cae | 344 | if (r) |
11fdf7f2 TL |
345 | derr << TEXT_RED << " ** ERROR: writing new keyring to " |
346 | << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL | |
347 | << dendl; | |
7c673cae | 348 | else |
11fdf7f2 | 349 | derr << "created new key in keyring " << keyring_path << dendl; |
7c673cae FG |
350 | } |
351 | } | |
3efd9988 FG |
352 | if (mkfs) { |
353 | common_init_finish(g_ceph_context); | |
3efd9988 | 354 | |
11fdf7f2 | 355 | if (g_conf().get_val<uuid_d>("fsid").is_zero()) { |
3efd9988 | 356 | derr << "must specify cluster fsid" << dendl; |
11fdf7f2 | 357 | forker.exit(-EINVAL); |
3efd9988 FG |
358 | } |
359 | ||
11fdf7f2 TL |
360 | int err = OSD::mkfs(g_ceph_context, store, g_conf().get_val<uuid_d>("fsid"), |
361 | whoami); | |
3efd9988 FG |
362 | if (err < 0) { |
363 | derr << TEXT_RED << " ** ERROR: error creating empty object store in " | |
11fdf7f2 TL |
364 | << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
365 | forker.exit(1); | |
3efd9988 | 366 | } |
11fdf7f2 TL |
367 | dout(0) << "created object store " << data_path |
368 | << " for osd." << whoami | |
369 | << " fsid " << g_conf().get_val<uuid_d>("fsid") | |
370 | << dendl; | |
371 | } | |
372 | if (mkfs || mkkey) { | |
373 | forker.exit(0); | |
3efd9988 | 374 | } |
7c673cae FG |
375 | if (mkjournal) { |
376 | common_init_finish(g_ceph_context); | |
377 | int err = store->mkjournal(); | |
378 | if (err < 0) { | |
11fdf7f2 TL |
379 | derr << TEXT_RED << " ** ERROR: error creating fresh journal " |
380 | << journal_path << " for object store " << data_path << ": " | |
381 | << cpp_strerror(-err) << TEXT_NORMAL << dendl; | |
382 | forker.exit(1); | |
7c673cae | 383 | } |
11fdf7f2 TL |
384 | derr << "created new journal " << journal_path |
385 | << " for object store " << data_path << dendl; | |
386 | forker.exit(0); | |
7c673cae FG |
387 | } |
388 | if (check_wants_journal) { | |
389 | if (store->wants_journal()) { | |
d2e6a577 | 390 | cout << "wants journal: yes" << std::endl; |
11fdf7f2 | 391 | forker.exit(0); |
7c673cae | 392 | } else { |
d2e6a577 | 393 | cout << "wants journal: no" << std::endl; |
11fdf7f2 | 394 | forker.exit(1); |
7c673cae FG |
395 | } |
396 | } | |
397 | if (check_allows_journal) { | |
398 | if (store->allows_journal()) { | |
d2e6a577 | 399 | cout << "allows journal: yes" << std::endl; |
11fdf7f2 | 400 | forker.exit(0); |
7c673cae | 401 | } else { |
d2e6a577 | 402 | cout << "allows journal: no" << std::endl; |
11fdf7f2 | 403 | forker.exit(1); |
7c673cae FG |
404 | } |
405 | } | |
406 | if (check_needs_journal) { | |
407 | if (store->needs_journal()) { | |
d2e6a577 | 408 | cout << "needs journal: yes" << std::endl; |
11fdf7f2 | 409 | forker.exit(0); |
7c673cae | 410 | } else { |
d2e6a577 | 411 | cout << "needs journal: no" << std::endl; |
11fdf7f2 | 412 | forker.exit(1); |
7c673cae FG |
413 | } |
414 | } | |
415 | if (flushjournal) { | |
416 | common_init_finish(g_ceph_context); | |
417 | int err = store->mount(); | |
418 | if (err < 0) { | |
11fdf7f2 TL |
419 | derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path |
420 | << " for object store " << data_path | |
7c673cae FG |
421 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
422 | goto flushjournal_out; | |
423 | } | |
424 | store->umount(); | |
11fdf7f2 TL |
425 | derr << "flushed journal " << journal_path |
426 | << " for object store " << data_path | |
7c673cae FG |
427 | << dendl; |
428 | flushjournal_out: | |
429 | delete store; | |
11fdf7f2 | 430 | forker.exit(err < 0 ? 1 : 0); |
7c673cae FG |
431 | } |
432 | if (dump_journal) { | |
433 | common_init_finish(g_ceph_context); | |
434 | int err = store->dump_journal(cout); | |
435 | if (err < 0) { | |
11fdf7f2 TL |
436 | derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path |
437 | << " for object store " << data_path | |
7c673cae | 438 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 439 | forker.exit(1); |
7c673cae | 440 | } |
11fdf7f2 TL |
441 | derr << "dumped journal " << journal_path |
442 | << " for object store " << data_path | |
7c673cae | 443 | << dendl; |
11fdf7f2 | 444 | forker.exit(0); |
7c673cae FG |
445 | } |
446 | ||
447 | ||
448 | if (convertfilestore) { | |
449 | int err = store->mount(); | |
450 | if (err < 0) { | |
11fdf7f2 | 451 | derr << TEXT_RED << " ** ERROR: error mounting store " << data_path |
7c673cae | 452 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 453 | forker.exit(1); |
7c673cae FG |
454 | } |
455 | err = store->upgrade(); | |
456 | store->umount(); | |
457 | if (err < 0) { | |
11fdf7f2 | 458 | derr << TEXT_RED << " ** ERROR: error converting store " << data_path |
7c673cae | 459 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 460 | forker.exit(1); |
7c673cae | 461 | } |
11fdf7f2 | 462 | forker.exit(0); |
7c673cae FG |
463 | } |
464 | ||
465 | string magic; | |
466 | uuid_d cluster_fsid, osd_fsid; | |
11fdf7f2 | 467 | int require_osd_release = 0; |
7c673cae | 468 | int w; |
11fdf7f2 TL |
469 | int r = OSD::peek_meta(store, &magic, &cluster_fsid, &osd_fsid, &w, |
470 | &require_osd_release); | |
7c673cae FG |
471 | if (r < 0) { |
472 | derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on " | |
11fdf7f2 | 473 | << data_path << ": " << cpp_strerror(-r) |
7c673cae FG |
474 | << TEXT_NORMAL << dendl; |
475 | if (r == -ENOTSUP) { | |
476 | derr << TEXT_RED << " ** please verify that underlying storage " | |
477 | << "supports xattrs" << TEXT_NORMAL << dendl; | |
478 | } | |
11fdf7f2 | 479 | forker.exit(1); |
7c673cae FG |
480 | } |
481 | if (w != whoami) { | |
482 | derr << "OSD id " << w << " != my id " << whoami << dendl; | |
11fdf7f2 | 483 | forker.exit(1); |
7c673cae FG |
484 | } |
485 | if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) { | |
486 | derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC | |
487 | << dendl; | |
11fdf7f2 | 488 | forker.exit(1); |
7c673cae FG |
489 | } |
490 | ||
491 | if (get_cluster_fsid) { | |
492 | cout << cluster_fsid << std::endl; | |
11fdf7f2 | 493 | forker.exit(0); |
7c673cae FG |
494 | } |
495 | if (get_osd_fsid) { | |
496 | cout << osd_fsid << std::endl; | |
11fdf7f2 | 497 | forker.exit(0); |
7c673cae FG |
498 | } |
499 | ||
11fdf7f2 TL |
500 | if (require_osd_release > 0 && |
501 | require_osd_release + 2 < (int)ceph_release()) { | |
502 | derr << "OSD's recorded require_osd_release " << require_osd_release | |
503 | << " (" << ceph_release_name(require_osd_release) | |
504 | << ") is >2 releases older than installed " << ceph_release() | |
505 | << " (" << ceph_release_name(ceph_release()) | |
506 | << "); you can only upgrade 2 releases at a time" << dendl; | |
507 | derr << "you should first upgrade to " | |
508 | << (require_osd_release + 1) | |
509 | << " (" << ceph_release_name(require_osd_release + 1) << ") or " | |
510 | << (require_osd_release + 2) | |
511 | << " (" << ceph_release_name(require_osd_release + 2) << ")" << dendl; | |
512 | forker.exit(1); | |
513 | } | |
7c673cae | 514 | |
11fdf7f2 TL |
515 | // consider objectstore numa node |
516 | int os_numa_node = -1; | |
517 | r = store->get_numa_node(&os_numa_node, nullptr, nullptr); | |
518 | if (r >= 0 && os_numa_node >= 0) { | |
519 | dout(1) << " objectstore numa_node " << os_numa_node << dendl; | |
520 | } | |
521 | int iface_preferred_numa_node = -1; | |
522 | if (g_conf().get_val<bool>("osd_numa_prefer_iface")) { | |
523 | iface_preferred_numa_node = os_numa_node; | |
7c673cae FG |
524 | } |
525 | ||
11fdf7f2 TL |
526 | // messengers |
527 | std::string msg_type = g_conf().get_val<std::string>("ms_type"); | |
528 | std::string public_msg_type = | |
529 | g_conf().get_val<std::string>("ms_public_type"); | |
530 | std::string cluster_msg_type = | |
531 | g_conf().get_val<std::string>("ms_cluster_type"); | |
532 | ||
533 | public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type; | |
534 | cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type; | |
535 | Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type, | |
7c673cae FG |
536 | entity_name_t::OSD(whoami), "client", |
537 | getpid(), | |
538 | Messenger::HAS_HEAVY_TRAFFIC | | |
539 | Messenger::HAS_MANY_CONNECTIONS); | |
11fdf7f2 | 540 | Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type, |
7c673cae FG |
541 | entity_name_t::OSD(whoami), "cluster", |
542 | getpid(), | |
543 | Messenger::HAS_HEAVY_TRAFFIC | | |
544 | Messenger::HAS_MANY_CONNECTIONS); | |
11fdf7f2 | 545 | Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type, |
7c673cae FG |
546 | entity_name_t::OSD(whoami), "hb_back_client", |
547 | getpid(), Messenger::HEARTBEAT); | |
11fdf7f2 | 548 | Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type, |
7c673cae FG |
549 | entity_name_t::OSD(whoami), "hb_front_client", |
550 | getpid(), Messenger::HEARTBEAT); | |
11fdf7f2 | 551 | Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type, |
7c673cae FG |
552 | entity_name_t::OSD(whoami), "hb_back_server", |
553 | getpid(), Messenger::HEARTBEAT); | |
11fdf7f2 | 554 | Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type, |
7c673cae FG |
555 | entity_name_t::OSD(whoami), "hb_front_server", |
556 | getpid(), Messenger::HEARTBEAT); | |
11fdf7f2 | 557 | Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type, |
7c673cae FG |
558 | entity_name_t::OSD(whoami), "ms_objecter", |
559 | getpid(), 0); | |
560 | if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter) | |
11fdf7f2 | 561 | forker.exit(1); |
7c673cae FG |
562 | ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL); |
563 | ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
564 | ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
565 | ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
566 | ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
567 | ||
11fdf7f2 TL |
568 | dout(0) << "starting osd." << whoami |
569 | << " osd_data " << data_path | |
570 | << " " << ((journal_path.empty()) ? | |
571 | "(no journal)" : journal_path) | |
572 | << dendl; | |
7c673cae | 573 | |
11fdf7f2 TL |
574 | uint64_t message_size = |
575 | g_conf().get_val<Option::size_t>("osd_client_message_size_cap"); | |
7c673cae | 576 | boost::scoped_ptr<Throttle> client_byte_throttler( |
11fdf7f2 | 577 | new Throttle(g_ceph_context, "osd_client_bytes", message_size)); |
7c673cae FG |
578 | |
579 | // All feature bits 0 - 34 should be present from dumpling v0.67 forward | |
580 | uint64_t osd_required = | |
581 | CEPH_FEATURE_UID | | |
582 | CEPH_FEATURE_PGID64 | | |
583 | CEPH_FEATURE_OSDENC; | |
584 | ||
585 | ms_public->set_default_policy(Messenger::Policy::stateless_server(0)); | |
586 | ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT, | |
587 | client_byte_throttler.get(), | |
31f18b77 | 588 | nullptr); |
7c673cae | 589 | ms_public->set_policy(entity_name_t::TYPE_MON, |
11fdf7f2 | 590 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 591 | ms_public->set_policy(entity_name_t::TYPE_MGR, |
11fdf7f2 | 592 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae FG |
593 | |
594 | //try to poison pill any OSD connections on the wrong address | |
595 | ms_public->set_policy(entity_name_t::TYPE_OSD, | |
596 | Messenger::Policy::stateless_server(0)); | |
597 | ||
598 | ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0)); | |
599 | ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0)); | |
600 | ms_cluster->set_policy(entity_name_t::TYPE_OSD, | |
601 | Messenger::Policy::lossless_peer(osd_required)); | |
602 | ms_cluster->set_policy(entity_name_t::TYPE_CLIENT, | |
603 | Messenger::Policy::stateless_server(0)); | |
604 | ||
605 | ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD, | |
606 | Messenger::Policy::lossy_client(0)); | |
607 | ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD, | |
608 | Messenger::Policy::lossy_client(0)); | |
609 | ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD, | |
610 | Messenger::Policy::stateless_server(0)); | |
611 | ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD, | |
612 | Messenger::Policy::stateless_server(0)); | |
613 | ||
614 | ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); | |
615 | ||
11fdf7f2 TL |
616 | entity_addrvec_t public_addrs, cluster_addrs; |
617 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs, | |
618 | iface_preferred_numa_node); | |
619 | if (r < 0) { | |
620 | derr << "Failed to pick public address." << dendl; | |
621 | forker.exit(1); | |
622 | } | |
623 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs, | |
624 | iface_preferred_numa_node); | |
625 | if (r < 0) { | |
626 | derr << "Failed to pick cluster address." << dendl; | |
627 | forker.exit(1); | |
628 | } | |
629 | ||
630 | if (ms_public->bindv(public_addrs) < 0) | |
631 | forker.exit(1); | |
7c673cae | 632 | |
11fdf7f2 TL |
633 | if (ms_cluster->bindv(cluster_addrs) < 0) |
634 | forker.exit(1); | |
635 | ||
636 | bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket"); | |
637 | if (is_delay) { | |
7c673cae FG |
638 | ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); |
639 | ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
640 | ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
641 | ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
642 | } | |
643 | ||
11fdf7f2 TL |
644 | entity_addrvec_t hb_front_addrs = public_addrs; |
645 | for (auto& a : hb_front_addrs.v) { | |
646 | a.set_port(0); | |
7c673cae | 647 | } |
11fdf7f2 TL |
648 | if (ms_hb_front_server->bindv(hb_front_addrs) < 0) |
649 | forker.exit(1); | |
650 | if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0) | |
651 | forker.exit(1); | |
652 | ||
653 | entity_addrvec_t hb_back_addrs = cluster_addrs; | |
654 | for (auto& a : hb_back_addrs.v) { | |
655 | a.set_port(0); | |
656 | } | |
657 | if (ms_hb_back_server->bindv(hb_back_addrs) < 0) | |
658 | forker.exit(1); | |
659 | if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0) | |
660 | forker.exit(1); | |
7c673cae | 661 | |
11fdf7f2 TL |
662 | // install signal handlers |
663 | init_async_signal_handler(); | |
664 | register_async_signal_handler(SIGHUP, sighup_handler); | |
7c673cae FG |
665 | |
666 | TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context); | |
667 | TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context); | |
31f18b77 FG |
668 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
669 | TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context); | |
670 | #endif | |
7c673cae | 671 | |
11fdf7f2 TL |
672 | srand(time(NULL) + getpid()); |
673 | ||
7c673cae FG |
674 | MonClient mc(g_ceph_context); |
675 | if (mc.build_initial_monmap() < 0) | |
676 | return -1; | |
677 | global_init_chdir(g_ceph_context); | |
678 | ||
11fdf7f2 TL |
679 | if (global_init_preload_erasure_code(g_ceph_context) < 0) { |
680 | forker.exit(1); | |
681 | } | |
224ce89b | 682 | |
7c673cae FG |
683 | osd = new OSD(g_ceph_context, |
684 | store, | |
685 | whoami, | |
686 | ms_cluster, | |
687 | ms_public, | |
688 | ms_hb_front_client, | |
689 | ms_hb_back_client, | |
690 | ms_hb_front_server, | |
691 | ms_hb_back_server, | |
692 | ms_objecter, | |
693 | &mc, | |
11fdf7f2 TL |
694 | data_path, |
695 | journal_path); | |
7c673cae FG |
696 | |
697 | int err = osd->pre_init(); | |
698 | if (err < 0) { | |
699 | derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err) | |
700 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 701 | forker.exit(1); |
7c673cae FG |
702 | } |
703 | ||
704 | ms_public->start(); | |
705 | ms_hb_front_client->start(); | |
706 | ms_hb_back_client->start(); | |
707 | ms_hb_front_server->start(); | |
708 | ms_hb_back_server->start(); | |
709 | ms_cluster->start(); | |
710 | ms_objecter->start(); | |
711 | ||
712 | // start osd | |
713 | err = osd->init(); | |
714 | if (err < 0) { | |
715 | derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err) | |
716 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 717 | forker.exit(1); |
7c673cae FG |
718 | } |
719 | ||
11fdf7f2 TL |
720 | // -- daemonize -- |
721 | ||
722 | if (g_conf()->daemonize) { | |
723 | global_init_postfork_finish(g_ceph_context); | |
724 | forker.daemonize(); | |
725 | } | |
726 | ||
727 | ||
7c673cae FG |
728 | register_async_signal_handler_oneshot(SIGINT, handle_osd_signal); |
729 | register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal); | |
730 | ||
731 | osd->final_init(); | |
732 | ||
11fdf7f2 | 733 | if (g_conf().get_val<bool>("inject_early_sigterm")) |
7c673cae FG |
734 | kill(getpid(), SIGTERM); |
735 | ||
736 | ms_public->wait(); | |
737 | ms_hb_front_client->wait(); | |
738 | ms_hb_back_client->wait(); | |
739 | ms_hb_front_server->wait(); | |
740 | ms_hb_back_server->wait(); | |
741 | ms_cluster->wait(); | |
742 | ms_objecter->wait(); | |
743 | ||
744 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
745 | unregister_async_signal_handler(SIGINT, handle_osd_signal); | |
746 | unregister_async_signal_handler(SIGTERM, handle_osd_signal); | |
747 | shutdown_async_signal_handler(); | |
748 | ||
749 | // done | |
750 | delete osd; | |
751 | delete ms_public; | |
752 | delete ms_hb_front_client; | |
753 | delete ms_hb_back_client; | |
754 | delete ms_hb_front_server; | |
755 | delete ms_hb_back_server; | |
756 | delete ms_cluster; | |
757 | delete ms_objecter; | |
758 | ||
759 | client_byte_throttler.reset(); | |
7c673cae FG |
760 | |
761 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
762 | char s[20]; | |
763 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
764 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
765 | dout(0) << "ceph-osd: gmon.out should be in " << s << dendl; | |
766 | } | |
767 | ||
768 | return 0; | |
769 | } |