]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph_osd.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / ceph_osd.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18#include <boost/scoped_ptr.hpp>
19
20#include <iostream>
21#include <string>
7c673cae
FG
22
23#include "osd/OSD.h"
24#include "os/ObjectStore.h"
25#include "mon/MonClient.h"
26#include "include/ceph_features.h"
27
28#include "common/config.h"
29
30#include "mon/MonMap.h"
31
32#include "msg/Messenger.h"
33
11fdf7f2 34#include "common/Throttle.h"
7c673cae
FG
35#include "common/Timer.h"
36#include "common/TracepointProvider.h"
37#include "common/ceph_argparse.h"
11fdf7f2 38#include "common/numa.h"
7c673cae
FG
39
40#include "global/global_init.h"
41#include "global/signal_handler.h"
42
43#include "include/color.h"
44#include "common/errno.h"
45#include "common/pick_address.h"
46
47#include "perfglue/heap_profiler.h"
48
11fdf7f2
TL
49#include "include/ceph_assert.h"
50
51#include "common/Preforker.h"
7c673cae
FG
52
53#define dout_context g_ceph_context
54#define dout_subsys ceph_subsys_osd
55
56namespace {
57
58TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so",
59 "osd_tracing");
60TracepointProvider::Traits os_tracepoint_traits("libos_tp.so",
61 "osd_objectstore_tracing");
31f18b77
FG
62#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
63TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so",
64 "osd_function_tracing");
65#endif
7c673cae
FG
66
67} // anonymous namespace
68
11fdf7f2 69OSD *osd = nullptr;
7c673cae
FG
70
71void handle_osd_signal(int signum)
72{
73 if (osd)
74 osd->handle_signal(signum);
75}
76
77static void usage()
78{
31f18b77 79 cout << "usage: ceph-osd -i <ID> [flags]\n"
7c673cae
FG
80 << " --osd-data PATH data directory\n"
81 << " --osd-journal PATH\n"
82 << " journal file or block device\n"
83 << " --mkfs create a [new] data directory\n"
31f18b77 84 << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n"
11fdf7f2
TL
85 << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n"
86 << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n"
87 << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n"
7c673cae
FG
88 << " --convert-filestore\n"
89 << " run any pending upgrade operations\n"
90 << " --flush-journal flush all data out of journal\n"
11fdf7f2 91 << " --dump-journal dump all data of journal\n"
7c673cae
FG
92 << " --mkjournal initialize a new journal\n"
93 << " --check-wants-journal\n"
94 << " check whether a journal is desired\n"
95 << " --check-allows-journal\n"
96 << " check whether a journal is allowed\n"
97 << " --check-needs-journal\n"
98 << " check whether a journal is required\n"
99 << " --debug_osd <N> set debug level (e.g. 10)\n"
100 << " --get-device-fsid PATH\n"
101 << " get OSD fsid for the given block device\n"
102 << std::endl;
103 generic_server_usage();
104}
105
7c673cae 106int main(int argc, const char **argv)
7c673cae
FG
107{
108 vector<const char*> args;
109 argv_to_vec(argc, argv, args);
11fdf7f2
TL
110 if (args.empty()) {
111 cerr << argv[0] << ": -h or --help for usage" << std::endl;
112 exit(1);
113 }
114 if (ceph_argparse_need_usage(args)) {
115 usage();
116 exit(0);
117 }
7c673cae 118
11fdf7f2
TL
119 map<string,string> defaults = {
120 // We want to enable leveldb's log, while allowing users to override this
121 // option, therefore we will pass it as a default argument to global_init().
122 { "leveldb_log", "" }
123 };
124 auto cct = global_init(
125 &defaults,
126 args, CEPH_ENTITY_TYPE_OSD,
127 CODE_ENVIRONMENT_DAEMON,
128 0, "osd_data");
7c673cae
FG
129 ceph_heap_profiler_init();
130
11fdf7f2
TL
131 Preforker forker;
132
7c673cae
FG
133 // osd specific args
134 bool mkfs = false;
135 bool mkjournal = false;
136 bool check_wants_journal = false;
137 bool check_allows_journal = false;
138 bool check_needs_journal = false;
139 bool mkkey = false;
140 bool flushjournal = false;
141 bool dump_journal = false;
142 bool convertfilestore = false;
143 bool get_osd_fsid = false;
144 bool get_cluster_fsid = false;
145 bool get_journal_fsid = false;
146 bool get_device_fsid = false;
147 string device_path;
148 std::string dump_pg_log;
149
150 std::string val;
151 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
152 if (ceph_argparse_double_dash(args, i)) {
153 break;
7c673cae
FG
154 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
155 mkfs = true;
156 } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) {
157 mkjournal = true;
158 } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) {
159 check_allows_journal = true;
160 } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) {
161 check_wants_journal = true;
162 } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) {
163 check_needs_journal = true;
164 } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) {
165 mkkey = true;
166 } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) {
167 flushjournal = true;
168 } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) {
169 convertfilestore = true;
170 } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) {
171 dump_pg_log = val;
172 } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) {
173 dump_journal = true;
174 } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) {
175 get_cluster_fsid = true;
176 } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) {
177 get_osd_fsid = true;
178 } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) {
179 get_journal_fsid = true;
180 } else if (ceph_argparse_witharg(args, i, &device_path,
181 "--get-device-fsid", (char*)NULL)) {
182 get_device_fsid = true;
183 } else {
184 ++i;
185 }
186 }
187 if (!args.empty()) {
11fdf7f2
TL
188 cerr << "unrecognized arg " << args[0] << std::endl;
189 exit(1);
7c673cae
FG
190 }
191
11fdf7f2
TL
192 if (global_init_prefork(g_ceph_context) >= 0) {
193 std::string err;
194 int r = forker.prefork(err);
195 if (r < 0) {
196 cerr << err << std::endl;
197 return r;
198 }
199 if (forker.is_parent()) {
200 g_ceph_context->_log->start();
201 if (forker.parent_wait(err) != 0) {
202 return -ENXIO;
203 }
204 return 0;
205 }
206 setsid();
207 global_init_postfork_start(g_ceph_context);
208 }
209 common_init_finish(g_ceph_context);
210 global_init_chdir(g_ceph_context);
211
7c673cae 212 if (get_journal_fsid) {
11fdf7f2 213 device_path = g_conf().get_val<std::string>("osd_journal");
7c673cae
FG
214 get_device_fsid = true;
215 }
216 if (get_device_fsid) {
217 uuid_d uuid;
218 int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path,
219 &uuid);
220 if (r < 0) {
221 cerr << "failed to get device fsid for " << device_path
222 << ": " << cpp_strerror(r) << std::endl;
11fdf7f2 223 forker.exit(1);
7c673cae
FG
224 }
225 cout << uuid << std::endl;
11fdf7f2 226 forker.exit(0);
7c673cae
FG
227 }
228
229 if (!dump_pg_log.empty()) {
230 common_init_finish(g_ceph_context);
231 bufferlist bl;
232 std::string error;
11fdf7f2
TL
233
234 if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) {
7c673cae 235 pg_log_entry_t e;
11fdf7f2 236 auto p = bl.cbegin();
7c673cae
FG
237 while (!p.end()) {
238 uint64_t pos = p.get_off();
239 try {
11fdf7f2 240 decode(e, p);
7c673cae
FG
241 }
242 catch (const buffer::error &e) {
243 derr << "failed to decode LogEntry at offset " << pos << dendl;
11fdf7f2 244 forker.exit(1);
7c673cae
FG
245 }
246 derr << pos << ":\t" << e << dendl;
247 }
248 } else {
249 derr << "unable to open " << dump_pg_log << ": " << error << dendl;
250 }
11fdf7f2 251 forker.exit(0);
7c673cae
FG
252 }
253
254 // whoami
255 char *end;
11fdf7f2 256 const char *id = g_conf()->name.get_id().c_str();
7c673cae 257 int whoami = strtol(id, &end, 10);
11fdf7f2 258 std::string data_path = g_conf().get_val<std::string>("osd_data");
7c673cae
FG
259 if (*end || end == id || whoami < 0) {
260 derr << "must specify '-i #' where # is the osd number" << dendl;
11fdf7f2 261 forker.exit(1);
7c673cae
FG
262 }
263
11fdf7f2 264 if (data_path.empty()) {
7c673cae 265 derr << "must specify '--osd-data=foo' data path" << dendl;
11fdf7f2 266 forker.exit(1);
7c673cae
FG
267 }
268
269 // the store
11fdf7f2 270 std::string store_type;
7c673cae
FG
271 {
272 char fn[PATH_MAX];
11fdf7f2 273 snprintf(fn, sizeof(fn), "%s/type", data_path.c_str());
91327a77 274 int fd = ::open(fn, O_RDONLY|O_CLOEXEC);
7c673cae
FG
275 if (fd >= 0) {
276 bufferlist bl;
277 bl.read_fd(fd, 64);
278 if (bl.length()) {
279 store_type = string(bl.c_str(), bl.length() - 1); // drop \n
280 dout(5) << "object store type is " << store_type << dendl;
281 }
282 ::close(fd);
11fdf7f2
TL
283 } else if (mkfs) {
284 store_type = g_conf().get_val<std::string>("osd_objectstore");
285 } else {
286 // hrm, infer the type
287 snprintf(fn, sizeof(fn), "%s/current", data_path.c_str());
288 struct stat st;
289 if (::stat(fn, &st) == 0 &&
290 S_ISDIR(st.st_mode)) {
291 derr << "missing 'type' file, inferring filestore from current/ dir"
292 << dendl;
293 store_type = "filestore";
294 } else {
295 snprintf(fn, sizeof(fn), "%s/block", data_path.c_str());
296 if (::stat(fn, &st) == 0 &&
297 S_ISLNK(st.st_mode)) {
298 derr << "missing 'type' file, inferring bluestore from block symlink"
299 << dendl;
300 store_type = "bluestore";
301 } else {
302 derr << "missing 'type' file and unable to infer osd type" << dendl;
303 forker.exit(1);
304 }
305 }
7c673cae
FG
306 }
307 }
11fdf7f2
TL
308
309 std::string journal_path = g_conf().get_val<std::string>("osd_journal");
310 uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags");
7c673cae
FG
311 ObjectStore *store = ObjectStore::create(g_ceph_context,
312 store_type,
11fdf7f2
TL
313 data_path,
314 journal_path,
315 flags);
7c673cae
FG
316 if (!store) {
317 derr << "unable to create object store" << dendl;
11fdf7f2 318 forker.exit(-ENODEV);
7c673cae
FG
319 }
320
7c673cae 321
7c673cae
FG
322 if (mkkey) {
323 common_init_finish(g_ceph_context);
324 KeyRing *keyring = KeyRing::create_empty();
325 if (!keyring) {
326 derr << "Unable to get a Ceph keyring." << dendl;
11fdf7f2 327 forker.exit(1);
7c673cae
FG
328 }
329
11fdf7f2 330 EntityName ename{g_conf()->name};
7c673cae
FG
331 EntityAuth eauth;
332
11fdf7f2
TL
333 std::string keyring_path = g_conf().get_val<std::string>("keyring");
334 int ret = keyring->load(g_ceph_context, keyring_path);
7c673cae
FG
335 if (ret == 0 &&
336 keyring->get_auth(ename, eauth)) {
11fdf7f2 337 derr << "already have key in keyring " << keyring_path << dendl;
7c673cae
FG
338 } else {
339 eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES);
340 keyring->add(ename, eauth);
341 bufferlist bl;
342 keyring->encode_plaintext(bl);
11fdf7f2 343 int r = bl.write_file(keyring_path.c_str(), 0600);
7c673cae 344 if (r)
11fdf7f2
TL
345 derr << TEXT_RED << " ** ERROR: writing new keyring to "
346 << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL
347 << dendl;
7c673cae 348 else
11fdf7f2 349 derr << "created new key in keyring " << keyring_path << dendl;
7c673cae
FG
350 }
351 }
3efd9988
FG
352 if (mkfs) {
353 common_init_finish(g_ceph_context);
3efd9988 354
11fdf7f2 355 if (g_conf().get_val<uuid_d>("fsid").is_zero()) {
3efd9988 356 derr << "must specify cluster fsid" << dendl;
11fdf7f2 357 forker.exit(-EINVAL);
3efd9988
FG
358 }
359
11fdf7f2
TL
360 int err = OSD::mkfs(g_ceph_context, store, g_conf().get_val<uuid_d>("fsid"),
361 whoami);
3efd9988
FG
362 if (err < 0) {
363 derr << TEXT_RED << " ** ERROR: error creating empty object store in "
11fdf7f2
TL
364 << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
365 forker.exit(1);
3efd9988 366 }
11fdf7f2
TL
367 dout(0) << "created object store " << data_path
368 << " for osd." << whoami
369 << " fsid " << g_conf().get_val<uuid_d>("fsid")
370 << dendl;
371 }
372 if (mkfs || mkkey) {
373 forker.exit(0);
3efd9988 374 }
7c673cae
FG
375 if (mkjournal) {
376 common_init_finish(g_ceph_context);
377 int err = store->mkjournal();
378 if (err < 0) {
11fdf7f2
TL
379 derr << TEXT_RED << " ** ERROR: error creating fresh journal "
380 << journal_path << " for object store " << data_path << ": "
381 << cpp_strerror(-err) << TEXT_NORMAL << dendl;
382 forker.exit(1);
7c673cae 383 }
11fdf7f2
TL
384 derr << "created new journal " << journal_path
385 << " for object store " << data_path << dendl;
386 forker.exit(0);
7c673cae
FG
387 }
388 if (check_wants_journal) {
389 if (store->wants_journal()) {
d2e6a577 390 cout << "wants journal: yes" << std::endl;
11fdf7f2 391 forker.exit(0);
7c673cae 392 } else {
d2e6a577 393 cout << "wants journal: no" << std::endl;
11fdf7f2 394 forker.exit(1);
7c673cae
FG
395 }
396 }
397 if (check_allows_journal) {
398 if (store->allows_journal()) {
d2e6a577 399 cout << "allows journal: yes" << std::endl;
11fdf7f2 400 forker.exit(0);
7c673cae 401 } else {
d2e6a577 402 cout << "allows journal: no" << std::endl;
11fdf7f2 403 forker.exit(1);
7c673cae
FG
404 }
405 }
406 if (check_needs_journal) {
407 if (store->needs_journal()) {
d2e6a577 408 cout << "needs journal: yes" << std::endl;
11fdf7f2 409 forker.exit(0);
7c673cae 410 } else {
d2e6a577 411 cout << "needs journal: no" << std::endl;
11fdf7f2 412 forker.exit(1);
7c673cae
FG
413 }
414 }
415 if (flushjournal) {
416 common_init_finish(g_ceph_context);
417 int err = store->mount();
418 if (err < 0) {
11fdf7f2
TL
419 derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path
420 << " for object store " << data_path
7c673cae
FG
421 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
422 goto flushjournal_out;
423 }
424 store->umount();
11fdf7f2
TL
425 derr << "flushed journal " << journal_path
426 << " for object store " << data_path
7c673cae
FG
427 << dendl;
428flushjournal_out:
429 delete store;
11fdf7f2 430 forker.exit(err < 0 ? 1 : 0);
7c673cae
FG
431 }
432 if (dump_journal) {
433 common_init_finish(g_ceph_context);
434 int err = store->dump_journal(cout);
435 if (err < 0) {
11fdf7f2
TL
436 derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path
437 << " for object store " << data_path
7c673cae 438 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 439 forker.exit(1);
7c673cae 440 }
11fdf7f2
TL
441 derr << "dumped journal " << journal_path
442 << " for object store " << data_path
7c673cae 443 << dendl;
11fdf7f2 444 forker.exit(0);
7c673cae
FG
445 }
446
447
448 if (convertfilestore) {
449 int err = store->mount();
450 if (err < 0) {
11fdf7f2 451 derr << TEXT_RED << " ** ERROR: error mounting store " << data_path
7c673cae 452 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 453 forker.exit(1);
7c673cae
FG
454 }
455 err = store->upgrade();
456 store->umount();
457 if (err < 0) {
11fdf7f2 458 derr << TEXT_RED << " ** ERROR: error converting store " << data_path
7c673cae 459 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 460 forker.exit(1);
7c673cae 461 }
11fdf7f2 462 forker.exit(0);
7c673cae
FG
463 }
464
465 string magic;
466 uuid_d cluster_fsid, osd_fsid;
11fdf7f2 467 int require_osd_release = 0;
7c673cae 468 int w;
11fdf7f2
TL
469 int r = OSD::peek_meta(store, &magic, &cluster_fsid, &osd_fsid, &w,
470 &require_osd_release);
7c673cae
FG
471 if (r < 0) {
472 derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on "
11fdf7f2 473 << data_path << ": " << cpp_strerror(-r)
7c673cae
FG
474 << TEXT_NORMAL << dendl;
475 if (r == -ENOTSUP) {
476 derr << TEXT_RED << " ** please verify that underlying storage "
477 << "supports xattrs" << TEXT_NORMAL << dendl;
478 }
11fdf7f2 479 forker.exit(1);
7c673cae
FG
480 }
481 if (w != whoami) {
482 derr << "OSD id " << w << " != my id " << whoami << dendl;
11fdf7f2 483 forker.exit(1);
7c673cae
FG
484 }
485 if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) {
486 derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC
487 << dendl;
11fdf7f2 488 forker.exit(1);
7c673cae
FG
489 }
490
491 if (get_cluster_fsid) {
492 cout << cluster_fsid << std::endl;
11fdf7f2 493 forker.exit(0);
7c673cae
FG
494 }
495 if (get_osd_fsid) {
496 cout << osd_fsid << std::endl;
11fdf7f2 497 forker.exit(0);
7c673cae
FG
498 }
499
11fdf7f2
TL
500 if (require_osd_release > 0 &&
501 require_osd_release + 2 < (int)ceph_release()) {
502 derr << "OSD's recorded require_osd_release " << require_osd_release
503 << " (" << ceph_release_name(require_osd_release)
504 << ") is >2 releases older than installed " << ceph_release()
505 << " (" << ceph_release_name(ceph_release())
506 << "); you can only upgrade 2 releases at a time" << dendl;
507 derr << "you should first upgrade to "
508 << (require_osd_release + 1)
509 << " (" << ceph_release_name(require_osd_release + 1) << ") or "
510 << (require_osd_release + 2)
511 << " (" << ceph_release_name(require_osd_release + 2) << ")" << dendl;
512 forker.exit(1);
513 }
7c673cae 514
11fdf7f2
TL
515 // consider objectstore numa node
516 int os_numa_node = -1;
517 r = store->get_numa_node(&os_numa_node, nullptr, nullptr);
518 if (r >= 0 && os_numa_node >= 0) {
519 dout(1) << " objectstore numa_node " << os_numa_node << dendl;
520 }
521 int iface_preferred_numa_node = -1;
522 if (g_conf().get_val<bool>("osd_numa_prefer_iface")) {
523 iface_preferred_numa_node = os_numa_node;
7c673cae
FG
524 }
525
11fdf7f2
TL
526 // messengers
527 std::string msg_type = g_conf().get_val<std::string>("ms_type");
528 std::string public_msg_type =
529 g_conf().get_val<std::string>("ms_public_type");
530 std::string cluster_msg_type =
531 g_conf().get_val<std::string>("ms_cluster_type");
532
533 public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type;
534 cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type;
535 Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type,
7c673cae
FG
536 entity_name_t::OSD(whoami), "client",
537 getpid(),
538 Messenger::HAS_HEAVY_TRAFFIC |
539 Messenger::HAS_MANY_CONNECTIONS);
11fdf7f2 540 Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type,
7c673cae
FG
541 entity_name_t::OSD(whoami), "cluster",
542 getpid(),
543 Messenger::HAS_HEAVY_TRAFFIC |
544 Messenger::HAS_MANY_CONNECTIONS);
11fdf7f2 545 Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type,
7c673cae
FG
546 entity_name_t::OSD(whoami), "hb_back_client",
547 getpid(), Messenger::HEARTBEAT);
11fdf7f2 548 Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type,
7c673cae
FG
549 entity_name_t::OSD(whoami), "hb_front_client",
550 getpid(), Messenger::HEARTBEAT);
11fdf7f2 551 Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type,
7c673cae
FG
552 entity_name_t::OSD(whoami), "hb_back_server",
553 getpid(), Messenger::HEARTBEAT);
11fdf7f2 554 Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type,
7c673cae
FG
555 entity_name_t::OSD(whoami), "hb_front_server",
556 getpid(), Messenger::HEARTBEAT);
11fdf7f2 557 Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type,
7c673cae
FG
558 entity_name_t::OSD(whoami), "ms_objecter",
559 getpid(), 0);
560 if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter)
11fdf7f2 561 forker.exit(1);
7c673cae
FG
562 ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL);
563 ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
564 ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
565 ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
566 ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
567
11fdf7f2
TL
568 dout(0) << "starting osd." << whoami
569 << " osd_data " << data_path
570 << " " << ((journal_path.empty()) ?
571 "(no journal)" : journal_path)
572 << dendl;
7c673cae 573
11fdf7f2
TL
574 uint64_t message_size =
575 g_conf().get_val<Option::size_t>("osd_client_message_size_cap");
7c673cae 576 boost::scoped_ptr<Throttle> client_byte_throttler(
11fdf7f2 577 new Throttle(g_ceph_context, "osd_client_bytes", message_size));
7c673cae
FG
578
579 // All feature bits 0 - 34 should be present from dumpling v0.67 forward
580 uint64_t osd_required =
581 CEPH_FEATURE_UID |
582 CEPH_FEATURE_PGID64 |
583 CEPH_FEATURE_OSDENC;
584
585 ms_public->set_default_policy(Messenger::Policy::stateless_server(0));
586 ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
587 client_byte_throttler.get(),
31f18b77 588 nullptr);
7c673cae 589 ms_public->set_policy(entity_name_t::TYPE_MON,
11fdf7f2 590 Messenger::Policy::lossy_client(osd_required));
7c673cae 591 ms_public->set_policy(entity_name_t::TYPE_MGR,
11fdf7f2 592 Messenger::Policy::lossy_client(osd_required));
7c673cae
FG
593
594 //try to poison pill any OSD connections on the wrong address
595 ms_public->set_policy(entity_name_t::TYPE_OSD,
596 Messenger::Policy::stateless_server(0));
597
598 ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0));
599 ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0));
600 ms_cluster->set_policy(entity_name_t::TYPE_OSD,
601 Messenger::Policy::lossless_peer(osd_required));
602 ms_cluster->set_policy(entity_name_t::TYPE_CLIENT,
603 Messenger::Policy::stateless_server(0));
604
605 ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD,
606 Messenger::Policy::lossy_client(0));
607 ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD,
608 Messenger::Policy::lossy_client(0));
609 ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD,
610 Messenger::Policy::stateless_server(0));
611 ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD,
612 Messenger::Policy::stateless_server(0));
613
614 ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX));
615
11fdf7f2
TL
616 entity_addrvec_t public_addrs, cluster_addrs;
617 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs,
618 iface_preferred_numa_node);
619 if (r < 0) {
620 derr << "Failed to pick public address." << dendl;
621 forker.exit(1);
622 }
623 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs,
624 iface_preferred_numa_node);
625 if (r < 0) {
626 derr << "Failed to pick cluster address." << dendl;
627 forker.exit(1);
628 }
629
630 if (ms_public->bindv(public_addrs) < 0)
631 forker.exit(1);
7c673cae 632
11fdf7f2
TL
633 if (ms_cluster->bindv(cluster_addrs) < 0)
634 forker.exit(1);
635
636 bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket");
637 if (is_delay) {
7c673cae
FG
638 ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
639 ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
640 ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
641 ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
642 }
643
11fdf7f2
TL
644 entity_addrvec_t hb_front_addrs = public_addrs;
645 for (auto& a : hb_front_addrs.v) {
646 a.set_port(0);
7c673cae 647 }
11fdf7f2
TL
648 if (ms_hb_front_server->bindv(hb_front_addrs) < 0)
649 forker.exit(1);
650 if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0)
651 forker.exit(1);
652
653 entity_addrvec_t hb_back_addrs = cluster_addrs;
654 for (auto& a : hb_back_addrs.v) {
655 a.set_port(0);
656 }
657 if (ms_hb_back_server->bindv(hb_back_addrs) < 0)
658 forker.exit(1);
659 if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0)
660 forker.exit(1);
7c673cae 661
11fdf7f2
TL
662 // install signal handlers
663 init_async_signal_handler();
664 register_async_signal_handler(SIGHUP, sighup_handler);
7c673cae
FG
665
666 TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context);
667 TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context);
31f18b77
FG
668#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
669 TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context);
670#endif
7c673cae 671
11fdf7f2
TL
672 srand(time(NULL) + getpid());
673
7c673cae
FG
674 MonClient mc(g_ceph_context);
675 if (mc.build_initial_monmap() < 0)
676 return -1;
677 global_init_chdir(g_ceph_context);
678
11fdf7f2
TL
679 if (global_init_preload_erasure_code(g_ceph_context) < 0) {
680 forker.exit(1);
681 }
224ce89b 682
7c673cae
FG
683 osd = new OSD(g_ceph_context,
684 store,
685 whoami,
686 ms_cluster,
687 ms_public,
688 ms_hb_front_client,
689 ms_hb_back_client,
690 ms_hb_front_server,
691 ms_hb_back_server,
692 ms_objecter,
693 &mc,
11fdf7f2
TL
694 data_path,
695 journal_path);
7c673cae
FG
696
697 int err = osd->pre_init();
698 if (err < 0) {
699 derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err)
700 << TEXT_NORMAL << dendl;
11fdf7f2 701 forker.exit(1);
7c673cae
FG
702 }
703
704 ms_public->start();
705 ms_hb_front_client->start();
706 ms_hb_back_client->start();
707 ms_hb_front_server->start();
708 ms_hb_back_server->start();
709 ms_cluster->start();
710 ms_objecter->start();
711
712 // start osd
713 err = osd->init();
714 if (err < 0) {
715 derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err)
716 << TEXT_NORMAL << dendl;
11fdf7f2 717 forker.exit(1);
7c673cae
FG
718 }
719
11fdf7f2
TL
720 // -- daemonize --
721
722 if (g_conf()->daemonize) {
723 global_init_postfork_finish(g_ceph_context);
724 forker.daemonize();
725 }
726
727
7c673cae
FG
728 register_async_signal_handler_oneshot(SIGINT, handle_osd_signal);
729 register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal);
730
731 osd->final_init();
732
11fdf7f2 733 if (g_conf().get_val<bool>("inject_early_sigterm"))
7c673cae
FG
734 kill(getpid(), SIGTERM);
735
736 ms_public->wait();
737 ms_hb_front_client->wait();
738 ms_hb_back_client->wait();
739 ms_hb_front_server->wait();
740 ms_hb_back_server->wait();
741 ms_cluster->wait();
742 ms_objecter->wait();
743
744 unregister_async_signal_handler(SIGHUP, sighup_handler);
745 unregister_async_signal_handler(SIGINT, handle_osd_signal);
746 unregister_async_signal_handler(SIGTERM, handle_osd_signal);
747 shutdown_async_signal_handler();
748
749 // done
750 delete osd;
751 delete ms_public;
752 delete ms_hb_front_client;
753 delete ms_hb_back_client;
754 delete ms_hb_front_server;
755 delete ms_hb_back_server;
756 delete ms_cluster;
757 delete ms_objecter;
758
759 client_byte_throttler.reset();
7c673cae
FG
760
761 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
762 char s[20];
763 snprintf(s, sizeof(s), "gmon/%d", getpid());
764 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
765 dout(0) << "ceph-osd: gmon.out should be in " << s << dendl;
766 }
767
768 return 0;
769}