]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph_osd.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / ceph_osd.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18#include <boost/scoped_ptr.hpp>
19
20#include <iostream>
21#include <string>
7c673cae 22
f67539c2 23#include "auth/KeyRing.h"
7c673cae
FG
24#include "osd/OSD.h"
25#include "os/ObjectStore.h"
26#include "mon/MonClient.h"
27#include "include/ceph_features.h"
7c673cae
FG
28#include "common/config.h"
29
30#include "mon/MonMap.h"
31
32#include "msg/Messenger.h"
33
11fdf7f2 34#include "common/Throttle.h"
7c673cae
FG
35#include "common/Timer.h"
36#include "common/TracepointProvider.h"
37#include "common/ceph_argparse.h"
11fdf7f2 38#include "common/numa.h"
7c673cae
FG
39
40#include "global/global_init.h"
41#include "global/signal_handler.h"
42
43#include "include/color.h"
44#include "common/errno.h"
45#include "common/pick_address.h"
46
47#include "perfglue/heap_profiler.h"
48
11fdf7f2
TL
49#include "include/ceph_assert.h"
50
51#include "common/Preforker.h"
7c673cae
FG
52
53#define dout_context g_ceph_context
54#define dout_subsys ceph_subsys_osd
55
f67539c2
TL
56using std::cerr;
57using std::cout;
58using std::map;
59using std::ostringstream;
60using std::string;
61using std::vector;
62
63using ceph::bufferlist;
64
7c673cae
FG
65namespace {
66
67TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so",
68 "osd_tracing");
69TracepointProvider::Traits os_tracepoint_traits("libos_tp.so",
70 "osd_objectstore_tracing");
9f95a23c
TL
71TracepointProvider::Traits bluestore_tracepoint_traits("libbluestore_tp.so",
72 "bluestore_tracing");
31f18b77
FG
73#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
74TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so",
75 "osd_function_tracing");
76#endif
7c673cae
FG
77
78} // anonymous namespace
79
9f95a23c 80OSD *osdptr = nullptr;
7c673cae
FG
81
82void handle_osd_signal(int signum)
83{
9f95a23c
TL
84 if (osdptr)
85 osdptr->handle_signal(signum);
7c673cae
FG
86}
87
88static void usage()
89{
31f18b77 90 cout << "usage: ceph-osd -i <ID> [flags]\n"
7c673cae
FG
91 << " --osd-data PATH data directory\n"
92 << " --osd-journal PATH\n"
93 << " journal file or block device\n"
94 << " --mkfs create a [new] data directory\n"
31f18b77 95 << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n"
11fdf7f2
TL
96 << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n"
97 << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n"
98 << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n"
7c673cae
FG
99 << " --convert-filestore\n"
100 << " run any pending upgrade operations\n"
101 << " --flush-journal flush all data out of journal\n"
e306af50
TL
102 << " --osdspec-affinity\n"
103 << " set affinity to an osdspec\n"
11fdf7f2 104 << " --dump-journal dump all data of journal\n"
7c673cae
FG
105 << " --mkjournal initialize a new journal\n"
106 << " --check-wants-journal\n"
107 << " check whether a journal is desired\n"
108 << " --check-allows-journal\n"
109 << " check whether a journal is allowed\n"
110 << " --check-needs-journal\n"
111 << " check whether a journal is required\n"
112 << " --debug_osd <N> set debug level (e.g. 10)\n"
113 << " --get-device-fsid PATH\n"
114 << " get OSD fsid for the given block device\n"
115 << std::endl;
116 generic_server_usage();
117}
118
7c673cae 119int main(int argc, const char **argv)
7c673cae 120{
20effc67 121 auto args = argv_to_vec(argc, argv);
11fdf7f2
TL
122 if (args.empty()) {
123 cerr << argv[0] << ": -h or --help for usage" << std::endl;
124 exit(1);
125 }
126 if (ceph_argparse_need_usage(args)) {
127 usage();
128 exit(0);
129 }
7c673cae 130
11fdf7f2
TL
131 map<string,string> defaults = {
132 // We want to enable leveldb's log, while allowing users to override this
133 // option, therefore we will pass it as a default argument to global_init().
134 { "leveldb_log", "" }
135 };
136 auto cct = global_init(
137 &defaults,
138 args, CEPH_ENTITY_TYPE_OSD,
f67539c2 139 CODE_ENVIRONMENT_DAEMON, 0);
7c673cae
FG
140 ceph_heap_profiler_init();
141
11fdf7f2
TL
142 Preforker forker;
143
7c673cae
FG
144 // osd specific args
145 bool mkfs = false;
146 bool mkjournal = false;
147 bool check_wants_journal = false;
148 bool check_allows_journal = false;
149 bool check_needs_journal = false;
150 bool mkkey = false;
151 bool flushjournal = false;
152 bool dump_journal = false;
153 bool convertfilestore = false;
154 bool get_osd_fsid = false;
155 bool get_cluster_fsid = false;
156 bool get_journal_fsid = false;
157 bool get_device_fsid = false;
158 string device_path;
159 std::string dump_pg_log;
e306af50 160 std::string osdspec_affinity;
7c673cae
FG
161
162 std::string val;
163 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
164 if (ceph_argparse_double_dash(args, i)) {
165 break;
7c673cae
FG
166 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
167 mkfs = true;
e306af50
TL
168 } else if (ceph_argparse_witharg(args, i, &val, "--osdspec-affinity", (char*)NULL)) {
169 osdspec_affinity = val;
7c673cae
FG
170 } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) {
171 mkjournal = true;
172 } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) {
173 check_allows_journal = true;
174 } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) {
175 check_wants_journal = true;
176 } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) {
177 check_needs_journal = true;
178 } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) {
179 mkkey = true;
180 } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) {
181 flushjournal = true;
182 } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) {
183 convertfilestore = true;
184 } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) {
185 dump_pg_log = val;
186 } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) {
187 dump_journal = true;
188 } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) {
189 get_cluster_fsid = true;
190 } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) {
191 get_osd_fsid = true;
192 } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) {
193 get_journal_fsid = true;
194 } else if (ceph_argparse_witharg(args, i, &device_path,
195 "--get-device-fsid", (char*)NULL)) {
196 get_device_fsid = true;
197 } else {
198 ++i;
199 }
200 }
201 if (!args.empty()) {
11fdf7f2
TL
202 cerr << "unrecognized arg " << args[0] << std::endl;
203 exit(1);
7c673cae
FG
204 }
205
11fdf7f2
TL
206 if (global_init_prefork(g_ceph_context) >= 0) {
207 std::string err;
208 int r = forker.prefork(err);
209 if (r < 0) {
210 cerr << err << std::endl;
211 return r;
212 }
213 if (forker.is_parent()) {
214 g_ceph_context->_log->start();
215 if (forker.parent_wait(err) != 0) {
216 return -ENXIO;
217 }
218 return 0;
219 }
220 setsid();
221 global_init_postfork_start(g_ceph_context);
222 }
223 common_init_finish(g_ceph_context);
224 global_init_chdir(g_ceph_context);
225
7c673cae 226 if (get_journal_fsid) {
11fdf7f2 227 device_path = g_conf().get_val<std::string>("osd_journal");
7c673cae
FG
228 get_device_fsid = true;
229 }
230 if (get_device_fsid) {
231 uuid_d uuid;
232 int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path,
233 &uuid);
234 if (r < 0) {
235 cerr << "failed to get device fsid for " << device_path
236 << ": " << cpp_strerror(r) << std::endl;
11fdf7f2 237 forker.exit(1);
7c673cae
FG
238 }
239 cout << uuid << std::endl;
11fdf7f2 240 forker.exit(0);
7c673cae
FG
241 }
242
243 if (!dump_pg_log.empty()) {
244 common_init_finish(g_ceph_context);
245 bufferlist bl;
246 std::string error;
11fdf7f2
TL
247
248 if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) {
7c673cae 249 pg_log_entry_t e;
11fdf7f2 250 auto p = bl.cbegin();
7c673cae
FG
251 while (!p.end()) {
252 uint64_t pos = p.get_off();
253 try {
11fdf7f2 254 decode(e, p);
7c673cae 255 }
f67539c2 256 catch (const ceph::buffer::error &e) {
7c673cae 257 derr << "failed to decode LogEntry at offset " << pos << dendl;
11fdf7f2 258 forker.exit(1);
7c673cae
FG
259 }
260 derr << pos << ":\t" << e << dendl;
261 }
262 } else {
263 derr << "unable to open " << dump_pg_log << ": " << error << dendl;
264 }
11fdf7f2 265 forker.exit(0);
7c673cae
FG
266 }
267
268 // whoami
269 char *end;
11fdf7f2 270 const char *id = g_conf()->name.get_id().c_str();
7c673cae 271 int whoami = strtol(id, &end, 10);
11fdf7f2 272 std::string data_path = g_conf().get_val<std::string>("osd_data");
7c673cae
FG
273 if (*end || end == id || whoami < 0) {
274 derr << "must specify '-i #' where # is the osd number" << dendl;
11fdf7f2 275 forker.exit(1);
7c673cae
FG
276 }
277
11fdf7f2 278 if (data_path.empty()) {
7c673cae 279 derr << "must specify '--osd-data=foo' data path" << dendl;
11fdf7f2 280 forker.exit(1);
7c673cae
FG
281 }
282
283 // the store
11fdf7f2 284 std::string store_type;
7c673cae
FG
285 {
286 char fn[PATH_MAX];
11fdf7f2 287 snprintf(fn, sizeof(fn), "%s/type", data_path.c_str());
91327a77 288 int fd = ::open(fn, O_RDONLY|O_CLOEXEC);
7c673cae
FG
289 if (fd >= 0) {
290 bufferlist bl;
291 bl.read_fd(fd, 64);
292 if (bl.length()) {
293 store_type = string(bl.c_str(), bl.length() - 1); // drop \n
294 dout(5) << "object store type is " << store_type << dendl;
295 }
296 ::close(fd);
11fdf7f2
TL
297 } else if (mkfs) {
298 store_type = g_conf().get_val<std::string>("osd_objectstore");
299 } else {
300 // hrm, infer the type
301 snprintf(fn, sizeof(fn), "%s/current", data_path.c_str());
302 struct stat st;
303 if (::stat(fn, &st) == 0 &&
304 S_ISDIR(st.st_mode)) {
305 derr << "missing 'type' file, inferring filestore from current/ dir"
306 << dendl;
307 store_type = "filestore";
308 } else {
309 snprintf(fn, sizeof(fn), "%s/block", data_path.c_str());
310 if (::stat(fn, &st) == 0 &&
311 S_ISLNK(st.st_mode)) {
312 derr << "missing 'type' file, inferring bluestore from block symlink"
313 << dendl;
314 store_type = "bluestore";
315 } else {
316 derr << "missing 'type' file and unable to infer osd type" << dendl;
317 forker.exit(1);
318 }
319 }
7c673cae
FG
320 }
321 }
11fdf7f2
TL
322
323 std::string journal_path = g_conf().get_val<std::string>("osd_journal");
324 uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags");
20effc67
TL
325 std::unique_ptr<ObjectStore> store = ObjectStore::create(g_ceph_context,
326 store_type,
327 data_path,
328 journal_path,
329 flags);
7c673cae
FG
330 if (!store) {
331 derr << "unable to create object store" << dendl;
11fdf7f2 332 forker.exit(-ENODEV);
7c673cae
FG
333 }
334
7c673cae 335
7c673cae
FG
336 if (mkkey) {
337 common_init_finish(g_ceph_context);
9f95a23c 338 KeyRing keyring;
7c673cae 339
11fdf7f2 340 EntityName ename{g_conf()->name};
7c673cae
FG
341 EntityAuth eauth;
342
11fdf7f2 343 std::string keyring_path = g_conf().get_val<std::string>("keyring");
9f95a23c 344 int ret = keyring.load(g_ceph_context, keyring_path);
7c673cae 345 if (ret == 0 &&
9f95a23c 346 keyring.get_auth(ename, eauth)) {
11fdf7f2 347 derr << "already have key in keyring " << keyring_path << dendl;
7c673cae
FG
348 } else {
349 eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES);
9f95a23c 350 keyring.add(ename, eauth);
7c673cae 351 bufferlist bl;
9f95a23c 352 keyring.encode_plaintext(bl);
11fdf7f2 353 int r = bl.write_file(keyring_path.c_str(), 0600);
7c673cae 354 if (r)
11fdf7f2
TL
355 derr << TEXT_RED << " ** ERROR: writing new keyring to "
356 << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL
357 << dendl;
7c673cae 358 else
11fdf7f2 359 derr << "created new key in keyring " << keyring_path << dendl;
7c673cae
FG
360 }
361 }
e306af50 362
3efd9988
FG
363 if (mkfs) {
364 common_init_finish(g_ceph_context);
3efd9988 365
11fdf7f2 366 if (g_conf().get_val<uuid_d>("fsid").is_zero()) {
3efd9988 367 derr << "must specify cluster fsid" << dendl;
11fdf7f2 368 forker.exit(-EINVAL);
3efd9988
FG
369 }
370
20effc67 371 int err = OSD::mkfs(g_ceph_context, std::move(store), g_conf().get_val<uuid_d>("fsid"),
e306af50 372 whoami, osdspec_affinity);
3efd9988
FG
373 if (err < 0) {
374 derr << TEXT_RED << " ** ERROR: error creating empty object store in "
11fdf7f2
TL
375 << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
376 forker.exit(1);
3efd9988 377 }
11fdf7f2
TL
378 dout(0) << "created object store " << data_path
379 << " for osd." << whoami
380 << " fsid " << g_conf().get_val<uuid_d>("fsid")
381 << dendl;
382 }
383 if (mkfs || mkkey) {
384 forker.exit(0);
3efd9988 385 }
7c673cae
FG
386 if (mkjournal) {
387 common_init_finish(g_ceph_context);
388 int err = store->mkjournal();
389 if (err < 0) {
11fdf7f2
TL
390 derr << TEXT_RED << " ** ERROR: error creating fresh journal "
391 << journal_path << " for object store " << data_path << ": "
392 << cpp_strerror(-err) << TEXT_NORMAL << dendl;
393 forker.exit(1);
7c673cae 394 }
11fdf7f2
TL
395 derr << "created new journal " << journal_path
396 << " for object store " << data_path << dendl;
397 forker.exit(0);
7c673cae
FG
398 }
399 if (check_wants_journal) {
400 if (store->wants_journal()) {
d2e6a577 401 cout << "wants journal: yes" << std::endl;
11fdf7f2 402 forker.exit(0);
7c673cae 403 } else {
d2e6a577 404 cout << "wants journal: no" << std::endl;
11fdf7f2 405 forker.exit(1);
7c673cae
FG
406 }
407 }
408 if (check_allows_journal) {
409 if (store->allows_journal()) {
d2e6a577 410 cout << "allows journal: yes" << std::endl;
11fdf7f2 411 forker.exit(0);
7c673cae 412 } else {
d2e6a577 413 cout << "allows journal: no" << std::endl;
11fdf7f2 414 forker.exit(1);
7c673cae
FG
415 }
416 }
417 if (check_needs_journal) {
418 if (store->needs_journal()) {
d2e6a577 419 cout << "needs journal: yes" << std::endl;
11fdf7f2 420 forker.exit(0);
7c673cae 421 } else {
d2e6a577 422 cout << "needs journal: no" << std::endl;
11fdf7f2 423 forker.exit(1);
7c673cae
FG
424 }
425 }
426 if (flushjournal) {
427 common_init_finish(g_ceph_context);
428 int err = store->mount();
429 if (err < 0) {
11fdf7f2
TL
430 derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path
431 << " for object store " << data_path
7c673cae
FG
432 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
433 goto flushjournal_out;
434 }
435 store->umount();
11fdf7f2
TL
436 derr << "flushed journal " << journal_path
437 << " for object store " << data_path
7c673cae
FG
438 << dendl;
439flushjournal_out:
20effc67 440 store.reset();
11fdf7f2 441 forker.exit(err < 0 ? 1 : 0);
7c673cae
FG
442 }
443 if (dump_journal) {
444 common_init_finish(g_ceph_context);
445 int err = store->dump_journal(cout);
446 if (err < 0) {
11fdf7f2
TL
447 derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path
448 << " for object store " << data_path
7c673cae 449 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 450 forker.exit(1);
7c673cae 451 }
11fdf7f2
TL
452 derr << "dumped journal " << journal_path
453 << " for object store " << data_path
7c673cae 454 << dendl;
11fdf7f2 455 forker.exit(0);
7c673cae
FG
456 }
457
7c673cae
FG
458 if (convertfilestore) {
459 int err = store->mount();
460 if (err < 0) {
11fdf7f2 461 derr << TEXT_RED << " ** ERROR: error mounting store " << data_path
7c673cae 462 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 463 forker.exit(1);
7c673cae
FG
464 }
465 err = store->upgrade();
466 store->umount();
467 if (err < 0) {
11fdf7f2 468 derr << TEXT_RED << " ** ERROR: error converting store " << data_path
7c673cae 469 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 470 forker.exit(1);
7c673cae 471 }
11fdf7f2 472 forker.exit(0);
7c673cae
FG
473 }
474
475 string magic;
476 uuid_d cluster_fsid, osd_fsid;
9f95a23c 477 ceph_release_t require_osd_release = ceph_release_t::unknown;
7c673cae 478 int w;
20effc67 479 int r = OSD::peek_meta(store.get(), &magic, &cluster_fsid, &osd_fsid, &w,
11fdf7f2 480 &require_osd_release);
7c673cae
FG
481 if (r < 0) {
482 derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on "
11fdf7f2 483 << data_path << ": " << cpp_strerror(-r)
7c673cae
FG
484 << TEXT_NORMAL << dendl;
485 if (r == -ENOTSUP) {
486 derr << TEXT_RED << " ** please verify that underlying storage "
487 << "supports xattrs" << TEXT_NORMAL << dendl;
488 }
11fdf7f2 489 forker.exit(1);
7c673cae
FG
490 }
491 if (w != whoami) {
492 derr << "OSD id " << w << " != my id " << whoami << dendl;
11fdf7f2 493 forker.exit(1);
7c673cae
FG
494 }
495 if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) {
496 derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC
497 << dendl;
11fdf7f2 498 forker.exit(1);
7c673cae
FG
499 }
500
501 if (get_cluster_fsid) {
502 cout << cluster_fsid << std::endl;
11fdf7f2 503 forker.exit(0);
7c673cae
FG
504 }
505 if (get_osd_fsid) {
506 cout << osd_fsid << std::endl;
11fdf7f2 507 forker.exit(0);
7c673cae
FG
508 }
509
9f95a23c 510 {
9f95a23c 511 ostringstream err;
f67539c2 512 if (!can_upgrade_from(require_osd_release, "require_osd_release", err)) {
9f95a23c
TL
513 derr << err.str() << dendl;
514 forker.exit(1);
515 }
11fdf7f2 516 }
7c673cae 517
11fdf7f2
TL
518 // consider objectstore numa node
519 int os_numa_node = -1;
520 r = store->get_numa_node(&os_numa_node, nullptr, nullptr);
521 if (r >= 0 && os_numa_node >= 0) {
522 dout(1) << " objectstore numa_node " << os_numa_node << dendl;
523 }
524 int iface_preferred_numa_node = -1;
525 if (g_conf().get_val<bool>("osd_numa_prefer_iface")) {
526 iface_preferred_numa_node = os_numa_node;
7c673cae
FG
527 }
528
11fdf7f2
TL
529 // messengers
530 std::string msg_type = g_conf().get_val<std::string>("ms_type");
531 std::string public_msg_type =
532 g_conf().get_val<std::string>("ms_public_type");
533 std::string cluster_msg_type =
534 g_conf().get_val<std::string>("ms_cluster_type");
535
536 public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type;
537 cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type;
9f95a23c 538 uint64_t nonce = Messenger::get_pid_nonce();
11fdf7f2 539 Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 540 entity_name_t::OSD(whoami), "client", nonce);
11fdf7f2 541 Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 542 entity_name_t::OSD(whoami), "cluster", nonce);
11fdf7f2 543 Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 544 entity_name_t::OSD(whoami), "hb_back_client", nonce);
11fdf7f2 545 Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 546 entity_name_t::OSD(whoami), "hb_front_client", nonce);
11fdf7f2 547 Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 548 entity_name_t::OSD(whoami), "hb_back_server", nonce);
11fdf7f2 549 Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 550 entity_name_t::OSD(whoami), "hb_front_server", nonce);
11fdf7f2 551 Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 552 entity_name_t::OSD(whoami), "ms_objecter", nonce);
7c673cae 553 if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter)
11fdf7f2 554 forker.exit(1);
7c673cae
FG
555 ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL);
556 ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
557 ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
558 ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
559 ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
560
11fdf7f2
TL
561 dout(0) << "starting osd." << whoami
562 << " osd_data " << data_path
563 << " " << ((journal_path.empty()) ?
564 "(no journal)" : journal_path)
565 << dendl;
7c673cae 566
11fdf7f2
TL
567 uint64_t message_size =
568 g_conf().get_val<Option::size_t>("osd_client_message_size_cap");
7c673cae 569 boost::scoped_ptr<Throttle> client_byte_throttler(
11fdf7f2 570 new Throttle(g_ceph_context, "osd_client_bytes", message_size));
f6b5b4d7
TL
571 uint64_t message_cap = g_conf().get_val<uint64_t>("osd_client_message_cap");
572 boost::scoped_ptr<Throttle> client_msg_throttler(
573 new Throttle(g_ceph_context, "osd_client_messages", message_cap));
7c673cae
FG
574
575 // All feature bits 0 - 34 should be present from dumpling v0.67 forward
576 uint64_t osd_required =
577 CEPH_FEATURE_UID |
578 CEPH_FEATURE_PGID64 |
579 CEPH_FEATURE_OSDENC;
580
9f95a23c 581 ms_public->set_default_policy(Messenger::Policy::stateless_registered_server(0));
7c673cae
FG
582 ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
583 client_byte_throttler.get(),
f6b5b4d7 584 client_msg_throttler.get());
7c673cae 585 ms_public->set_policy(entity_name_t::TYPE_MON,
11fdf7f2 586 Messenger::Policy::lossy_client(osd_required));
7c673cae 587 ms_public->set_policy(entity_name_t::TYPE_MGR,
11fdf7f2 588 Messenger::Policy::lossy_client(osd_required));
7c673cae 589
7c673cae
FG
590 ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0));
591 ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0));
592 ms_cluster->set_policy(entity_name_t::TYPE_OSD,
593 Messenger::Policy::lossless_peer(osd_required));
594 ms_cluster->set_policy(entity_name_t::TYPE_CLIENT,
595 Messenger::Policy::stateless_server(0));
596
597 ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD,
598 Messenger::Policy::lossy_client(0));
599 ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD,
600 Messenger::Policy::lossy_client(0));
601 ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD,
602 Messenger::Policy::stateless_server(0));
603 ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD,
604 Messenger::Policy::stateless_server(0));
605
606 ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX));
607
11fdf7f2
TL
608 entity_addrvec_t public_addrs, cluster_addrs;
609 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs,
610 iface_preferred_numa_node);
611 if (r < 0) {
612 derr << "Failed to pick public address." << dendl;
613 forker.exit(1);
614 }
615 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs,
616 iface_preferred_numa_node);
617 if (r < 0) {
618 derr << "Failed to pick cluster address." << dendl;
619 forker.exit(1);
620 }
621
622 if (ms_public->bindv(public_addrs) < 0)
623 forker.exit(1);
7c673cae 624
11fdf7f2
TL
625 if (ms_cluster->bindv(cluster_addrs) < 0)
626 forker.exit(1);
627
628 bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket");
629 if (is_delay) {
7c673cae
FG
630 ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
631 ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
632 ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
633 ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
634 }
635
11fdf7f2
TL
636 entity_addrvec_t hb_front_addrs = public_addrs;
637 for (auto& a : hb_front_addrs.v) {
638 a.set_port(0);
7c673cae 639 }
11fdf7f2
TL
640 if (ms_hb_front_server->bindv(hb_front_addrs) < 0)
641 forker.exit(1);
642 if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0)
643 forker.exit(1);
644
645 entity_addrvec_t hb_back_addrs = cluster_addrs;
646 for (auto& a : hb_back_addrs.v) {
647 a.set_port(0);
648 }
649 if (ms_hb_back_server->bindv(hb_back_addrs) < 0)
650 forker.exit(1);
651 if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0)
652 forker.exit(1);
7c673cae 653
11fdf7f2
TL
654 // install signal handlers
655 init_async_signal_handler();
656 register_async_signal_handler(SIGHUP, sighup_handler);
7c673cae
FG
657
658 TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context);
659 TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context);
9f95a23c 660 TracepointProvider::initialize<bluestore_tracepoint_traits>(g_ceph_context);
31f18b77
FG
661#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
662 TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context);
663#endif
7c673cae 664
11fdf7f2
TL
665 srand(time(NULL) + getpid());
666
f67539c2
TL
667 ceph::async::io_context_pool poolctx(
668 cct->_conf.get_val<std::uint64_t>("osd_asio_thread_count"));
669
670 MonClient mc(g_ceph_context, poolctx);
7c673cae
FG
671 if (mc.build_initial_monmap() < 0)
672 return -1;
673 global_init_chdir(g_ceph_context);
674
11fdf7f2
TL
675 if (global_init_preload_erasure_code(g_ceph_context) < 0) {
676 forker.exit(1);
677 }
224ce89b 678
9f95a23c 679 osdptr = new OSD(g_ceph_context,
20effc67 680 std::move(store),
9f95a23c
TL
681 whoami,
682 ms_cluster,
683 ms_public,
684 ms_hb_front_client,
685 ms_hb_back_client,
686 ms_hb_front_server,
687 ms_hb_back_server,
688 ms_objecter,
689 &mc,
690 data_path,
f67539c2
TL
691 journal_path,
692 poolctx);
9f95a23c
TL
693
694 int err = osdptr->pre_init();
7c673cae
FG
695 if (err < 0) {
696 derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err)
697 << TEXT_NORMAL << dendl;
11fdf7f2 698 forker.exit(1);
7c673cae
FG
699 }
700
701 ms_public->start();
702 ms_hb_front_client->start();
703 ms_hb_back_client->start();
704 ms_hb_front_server->start();
705 ms_hb_back_server->start();
706 ms_cluster->start();
707 ms_objecter->start();
708
709 // start osd
9f95a23c 710 err = osdptr->init();
7c673cae
FG
711 if (err < 0) {
712 derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err)
713 << TEXT_NORMAL << dendl;
11fdf7f2 714 forker.exit(1);
7c673cae
FG
715 }
716
11fdf7f2
TL
717 // -- daemonize --
718
719 if (g_conf()->daemonize) {
720 global_init_postfork_finish(g_ceph_context);
721 forker.daemonize();
722 }
723
724
7c673cae
FG
725 register_async_signal_handler_oneshot(SIGINT, handle_osd_signal);
726 register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal);
727
9f95a23c 728 osdptr->final_init();
7c673cae 729
11fdf7f2 730 if (g_conf().get_val<bool>("inject_early_sigterm"))
7c673cae
FG
731 kill(getpid(), SIGTERM);
732
733 ms_public->wait();
734 ms_hb_front_client->wait();
735 ms_hb_back_client->wait();
736 ms_hb_front_server->wait();
737 ms_hb_back_server->wait();
738 ms_cluster->wait();
739 ms_objecter->wait();
740
741 unregister_async_signal_handler(SIGHUP, sighup_handler);
742 unregister_async_signal_handler(SIGINT, handle_osd_signal);
743 unregister_async_signal_handler(SIGTERM, handle_osd_signal);
744 shutdown_async_signal_handler();
745
746 // done
f67539c2 747 poolctx.stop();
9f95a23c 748 delete osdptr;
7c673cae
FG
749 delete ms_public;
750 delete ms_hb_front_client;
751 delete ms_hb_back_client;
752 delete ms_hb_front_server;
753 delete ms_hb_back_server;
754 delete ms_cluster;
755 delete ms_objecter;
756
757 client_byte_throttler.reset();
f6b5b4d7 758 client_msg_throttler.reset();
7c673cae
FG
759
760 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
761 char s[20];
762 snprintf(s, sizeof(s), "gmon/%d", getpid());
763 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
764 dout(0) << "ceph-osd: gmon.out should be in " << s << dendl;
765 }
766
767 return 0;
768}