]> git.proxmox.com Git - ceph.git/blame - ceph/src/ceph_osd.cc
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / ceph_osd.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <sys/types.h>
16#include <sys/stat.h>
17#include <fcntl.h>
18#include <boost/scoped_ptr.hpp>
19
20#include <iostream>
21#include <string>
7c673cae 22
f67539c2 23#include "auth/KeyRing.h"
7c673cae
FG
24#include "osd/OSD.h"
25#include "os/ObjectStore.h"
26#include "mon/MonClient.h"
27#include "include/ceph_features.h"
7c673cae 28#include "common/config.h"
1e59de90 29#include "extblkdev/ExtBlkDevPlugin.h"
7c673cae
FG
30
31#include "mon/MonMap.h"
32
33#include "msg/Messenger.h"
34
11fdf7f2 35#include "common/Throttle.h"
7c673cae
FG
36#include "common/Timer.h"
37#include "common/TracepointProvider.h"
38#include "common/ceph_argparse.h"
11fdf7f2 39#include "common/numa.h"
7c673cae
FG
40
41#include "global/global_init.h"
42#include "global/signal_handler.h"
43
44#include "include/color.h"
45#include "common/errno.h"
46#include "common/pick_address.h"
47
48#include "perfglue/heap_profiler.h"
49
11fdf7f2
TL
50#include "include/ceph_assert.h"
51
52#include "common/Preforker.h"
7c673cae
FG
53
54#define dout_context g_ceph_context
55#define dout_subsys ceph_subsys_osd
56
f67539c2
TL
57using std::cerr;
58using std::cout;
59using std::map;
60using std::ostringstream;
61using std::string;
62using std::vector;
63
64using ceph::bufferlist;
65
7c673cae
FG
66namespace {
67
68TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so",
69 "osd_tracing");
70TracepointProvider::Traits os_tracepoint_traits("libos_tp.so",
71 "osd_objectstore_tracing");
9f95a23c
TL
72TracepointProvider::Traits bluestore_tracepoint_traits("libbluestore_tp.so",
73 "bluestore_tracing");
31f18b77
FG
74#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
75TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so",
76 "osd_function_tracing");
77#endif
7c673cae
FG
78
79} // anonymous namespace
80
9f95a23c 81OSD *osdptr = nullptr;
7c673cae
FG
82
83void handle_osd_signal(int signum)
84{
9f95a23c
TL
85 if (osdptr)
86 osdptr->handle_signal(signum);
7c673cae
FG
87}
88
89static void usage()
90{
31f18b77 91 cout << "usage: ceph-osd -i <ID> [flags]\n"
7c673cae
FG
92 << " --osd-data PATH data directory\n"
93 << " --osd-journal PATH\n"
94 << " journal file or block device\n"
95 << " --mkfs create a [new] data directory\n"
31f18b77 96 << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n"
11fdf7f2
TL
97 << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n"
98 << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n"
99 << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n"
7c673cae
FG
100 << " --convert-filestore\n"
101 << " run any pending upgrade operations\n"
102 << " --flush-journal flush all data out of journal\n"
e306af50
TL
103 << " --osdspec-affinity\n"
104 << " set affinity to an osdspec\n"
11fdf7f2 105 << " --dump-journal dump all data of journal\n"
7c673cae
FG
106 << " --mkjournal initialize a new journal\n"
107 << " --check-wants-journal\n"
108 << " check whether a journal is desired\n"
109 << " --check-allows-journal\n"
110 << " check whether a journal is allowed\n"
111 << " --check-needs-journal\n"
112 << " check whether a journal is required\n"
113 << " --debug_osd <N> set debug level (e.g. 10)\n"
114 << " --get-device-fsid PATH\n"
115 << " get OSD fsid for the given block device\n"
116 << std::endl;
117 generic_server_usage();
118}
119
7c673cae 120int main(int argc, const char **argv)
7c673cae 121{
20effc67 122 auto args = argv_to_vec(argc, argv);
11fdf7f2
TL
123 if (args.empty()) {
124 cerr << argv[0] << ": -h or --help for usage" << std::endl;
125 exit(1);
126 }
127 if (ceph_argparse_need_usage(args)) {
128 usage();
129 exit(0);
130 }
7c673cae 131
11fdf7f2
TL
132 map<string,string> defaults = {
133 // We want to enable leveldb's log, while allowing users to override this
134 // option, therefore we will pass it as a default argument to global_init().
135 { "leveldb_log", "" }
136 };
137 auto cct = global_init(
138 &defaults,
139 args, CEPH_ENTITY_TYPE_OSD,
f67539c2 140 CODE_ENVIRONMENT_DAEMON, 0);
7c673cae
FG
141 ceph_heap_profiler_init();
142
11fdf7f2
TL
143 Preforker forker;
144
7c673cae
FG
145 // osd specific args
146 bool mkfs = false;
147 bool mkjournal = false;
148 bool check_wants_journal = false;
149 bool check_allows_journal = false;
150 bool check_needs_journal = false;
151 bool mkkey = false;
152 bool flushjournal = false;
153 bool dump_journal = false;
154 bool convertfilestore = false;
155 bool get_osd_fsid = false;
156 bool get_cluster_fsid = false;
157 bool get_journal_fsid = false;
158 bool get_device_fsid = false;
159 string device_path;
160 std::string dump_pg_log;
e306af50 161 std::string osdspec_affinity;
7c673cae
FG
162
163 std::string val;
164 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
165 if (ceph_argparse_double_dash(args, i)) {
166 break;
7c673cae
FG
167 } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) {
168 mkfs = true;
e306af50
TL
169 } else if (ceph_argparse_witharg(args, i, &val, "--osdspec-affinity", (char*)NULL)) {
170 osdspec_affinity = val;
7c673cae
FG
171 } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) {
172 mkjournal = true;
173 } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) {
174 check_allows_journal = true;
175 } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) {
176 check_wants_journal = true;
177 } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) {
178 check_needs_journal = true;
179 } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) {
180 mkkey = true;
181 } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) {
182 flushjournal = true;
183 } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) {
184 convertfilestore = true;
185 } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) {
186 dump_pg_log = val;
187 } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) {
188 dump_journal = true;
189 } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) {
190 get_cluster_fsid = true;
191 } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) {
192 get_osd_fsid = true;
193 } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) {
194 get_journal_fsid = true;
195 } else if (ceph_argparse_witharg(args, i, &device_path,
196 "--get-device-fsid", (char*)NULL)) {
197 get_device_fsid = true;
198 } else {
199 ++i;
200 }
201 }
202 if (!args.empty()) {
11fdf7f2
TL
203 cerr << "unrecognized arg " << args[0] << std::endl;
204 exit(1);
7c673cae
FG
205 }
206
11fdf7f2
TL
207 if (global_init_prefork(g_ceph_context) >= 0) {
208 std::string err;
209 int r = forker.prefork(err);
210 if (r < 0) {
211 cerr << err << std::endl;
212 return r;
213 }
214 if (forker.is_parent()) {
215 g_ceph_context->_log->start();
216 if (forker.parent_wait(err) != 0) {
217 return -ENXIO;
218 }
219 return 0;
220 }
221 setsid();
222 global_init_postfork_start(g_ceph_context);
223 }
224 common_init_finish(g_ceph_context);
225 global_init_chdir(g_ceph_context);
226
7c673cae 227 if (get_journal_fsid) {
11fdf7f2 228 device_path = g_conf().get_val<std::string>("osd_journal");
7c673cae
FG
229 get_device_fsid = true;
230 }
231 if (get_device_fsid) {
232 uuid_d uuid;
233 int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path,
234 &uuid);
235 if (r < 0) {
236 cerr << "failed to get device fsid for " << device_path
237 << ": " << cpp_strerror(r) << std::endl;
11fdf7f2 238 forker.exit(1);
7c673cae
FG
239 }
240 cout << uuid << std::endl;
11fdf7f2 241 forker.exit(0);
7c673cae
FG
242 }
243
244 if (!dump_pg_log.empty()) {
245 common_init_finish(g_ceph_context);
246 bufferlist bl;
247 std::string error;
11fdf7f2
TL
248
249 if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) {
7c673cae 250 pg_log_entry_t e;
11fdf7f2 251 auto p = bl.cbegin();
7c673cae
FG
252 while (!p.end()) {
253 uint64_t pos = p.get_off();
254 try {
11fdf7f2 255 decode(e, p);
7c673cae 256 }
f67539c2 257 catch (const ceph::buffer::error &e) {
7c673cae 258 derr << "failed to decode LogEntry at offset " << pos << dendl;
11fdf7f2 259 forker.exit(1);
7c673cae
FG
260 }
261 derr << pos << ":\t" << e << dendl;
262 }
263 } else {
264 derr << "unable to open " << dump_pg_log << ": " << error << dendl;
265 }
11fdf7f2 266 forker.exit(0);
7c673cae
FG
267 }
268
269 // whoami
270 char *end;
11fdf7f2 271 const char *id = g_conf()->name.get_id().c_str();
7c673cae 272 int whoami = strtol(id, &end, 10);
11fdf7f2 273 std::string data_path = g_conf().get_val<std::string>("osd_data");
7c673cae
FG
274 if (*end || end == id || whoami < 0) {
275 derr << "must specify '-i #' where # is the osd number" << dendl;
11fdf7f2 276 forker.exit(1);
7c673cae
FG
277 }
278
11fdf7f2 279 if (data_path.empty()) {
7c673cae 280 derr << "must specify '--osd-data=foo' data path" << dendl;
11fdf7f2 281 forker.exit(1);
7c673cae
FG
282 }
283
284 // the store
11fdf7f2 285 std::string store_type;
7c673cae
FG
286 {
287 char fn[PATH_MAX];
11fdf7f2 288 snprintf(fn, sizeof(fn), "%s/type", data_path.c_str());
91327a77 289 int fd = ::open(fn, O_RDONLY|O_CLOEXEC);
7c673cae
FG
290 if (fd >= 0) {
291 bufferlist bl;
292 bl.read_fd(fd, 64);
293 if (bl.length()) {
294 store_type = string(bl.c_str(), bl.length() - 1); // drop \n
295 dout(5) << "object store type is " << store_type << dendl;
296 }
297 ::close(fd);
11fdf7f2
TL
298 } else if (mkfs) {
299 store_type = g_conf().get_val<std::string>("osd_objectstore");
300 } else {
301 // hrm, infer the type
302 snprintf(fn, sizeof(fn), "%s/current", data_path.c_str());
303 struct stat st;
304 if (::stat(fn, &st) == 0 &&
305 S_ISDIR(st.st_mode)) {
306 derr << "missing 'type' file, inferring filestore from current/ dir"
307 << dendl;
308 store_type = "filestore";
309 } else {
310 snprintf(fn, sizeof(fn), "%s/block", data_path.c_str());
311 if (::stat(fn, &st) == 0 &&
312 S_ISLNK(st.st_mode)) {
313 derr << "missing 'type' file, inferring bluestore from block symlink"
314 << dendl;
315 store_type = "bluestore";
316 } else {
317 derr << "missing 'type' file and unable to infer osd type" << dendl;
318 forker.exit(1);
319 }
320 }
7c673cae
FG
321 }
322 }
11fdf7f2
TL
323
324 std::string journal_path = g_conf().get_val<std::string>("osd_journal");
325 uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags");
20effc67
TL
326 std::unique_ptr<ObjectStore> store = ObjectStore::create(g_ceph_context,
327 store_type,
328 data_path,
329 journal_path,
330 flags);
7c673cae
FG
331 if (!store) {
332 derr << "unable to create object store" << dendl;
11fdf7f2 333 forker.exit(-ENODEV);
7c673cae
FG
334 }
335
7c673cae 336
7c673cae
FG
337 if (mkkey) {
338 common_init_finish(g_ceph_context);
9f95a23c 339 KeyRing keyring;
7c673cae 340
11fdf7f2 341 EntityName ename{g_conf()->name};
7c673cae
FG
342 EntityAuth eauth;
343
11fdf7f2 344 std::string keyring_path = g_conf().get_val<std::string>("keyring");
9f95a23c 345 int ret = keyring.load(g_ceph_context, keyring_path);
7c673cae 346 if (ret == 0 &&
9f95a23c 347 keyring.get_auth(ename, eauth)) {
11fdf7f2 348 derr << "already have key in keyring " << keyring_path << dendl;
7c673cae
FG
349 } else {
350 eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES);
9f95a23c 351 keyring.add(ename, eauth);
7c673cae 352 bufferlist bl;
9f95a23c 353 keyring.encode_plaintext(bl);
11fdf7f2 354 int r = bl.write_file(keyring_path.c_str(), 0600);
7c673cae 355 if (r)
11fdf7f2
TL
356 derr << TEXT_RED << " ** ERROR: writing new keyring to "
357 << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL
358 << dendl;
7c673cae 359 else
11fdf7f2 360 derr << "created new key in keyring " << keyring_path << dendl;
7c673cae
FG
361 }
362 }
e306af50 363
3efd9988
FG
364 if (mkfs) {
365 common_init_finish(g_ceph_context);
3efd9988 366
11fdf7f2 367 if (g_conf().get_val<uuid_d>("fsid").is_zero()) {
3efd9988 368 derr << "must specify cluster fsid" << dendl;
11fdf7f2 369 forker.exit(-EINVAL);
3efd9988
FG
370 }
371
20effc67 372 int err = OSD::mkfs(g_ceph_context, std::move(store), g_conf().get_val<uuid_d>("fsid"),
e306af50 373 whoami, osdspec_affinity);
3efd9988
FG
374 if (err < 0) {
375 derr << TEXT_RED << " ** ERROR: error creating empty object store in "
11fdf7f2
TL
376 << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
377 forker.exit(1);
3efd9988 378 }
11fdf7f2
TL
379 dout(0) << "created object store " << data_path
380 << " for osd." << whoami
381 << " fsid " << g_conf().get_val<uuid_d>("fsid")
382 << dendl;
383 }
384 if (mkfs || mkkey) {
385 forker.exit(0);
3efd9988 386 }
7c673cae
FG
387 if (mkjournal) {
388 common_init_finish(g_ceph_context);
389 int err = store->mkjournal();
390 if (err < 0) {
11fdf7f2
TL
391 derr << TEXT_RED << " ** ERROR: error creating fresh journal "
392 << journal_path << " for object store " << data_path << ": "
393 << cpp_strerror(-err) << TEXT_NORMAL << dendl;
394 forker.exit(1);
7c673cae 395 }
11fdf7f2
TL
396 derr << "created new journal " << journal_path
397 << " for object store " << data_path << dendl;
398 forker.exit(0);
7c673cae
FG
399 }
400 if (check_wants_journal) {
401 if (store->wants_journal()) {
d2e6a577 402 cout << "wants journal: yes" << std::endl;
11fdf7f2 403 forker.exit(0);
7c673cae 404 } else {
d2e6a577 405 cout << "wants journal: no" << std::endl;
11fdf7f2 406 forker.exit(1);
7c673cae
FG
407 }
408 }
409 if (check_allows_journal) {
410 if (store->allows_journal()) {
d2e6a577 411 cout << "allows journal: yes" << std::endl;
11fdf7f2 412 forker.exit(0);
7c673cae 413 } else {
d2e6a577 414 cout << "allows journal: no" << std::endl;
11fdf7f2 415 forker.exit(1);
7c673cae
FG
416 }
417 }
418 if (check_needs_journal) {
419 if (store->needs_journal()) {
d2e6a577 420 cout << "needs journal: yes" << std::endl;
11fdf7f2 421 forker.exit(0);
7c673cae 422 } else {
d2e6a577 423 cout << "needs journal: no" << std::endl;
11fdf7f2 424 forker.exit(1);
7c673cae
FG
425 }
426 }
427 if (flushjournal) {
428 common_init_finish(g_ceph_context);
429 int err = store->mount();
430 if (err < 0) {
11fdf7f2
TL
431 derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path
432 << " for object store " << data_path
7c673cae
FG
433 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
434 goto flushjournal_out;
435 }
436 store->umount();
11fdf7f2
TL
437 derr << "flushed journal " << journal_path
438 << " for object store " << data_path
7c673cae
FG
439 << dendl;
440flushjournal_out:
20effc67 441 store.reset();
11fdf7f2 442 forker.exit(err < 0 ? 1 : 0);
7c673cae
FG
443 }
444 if (dump_journal) {
445 common_init_finish(g_ceph_context);
446 int err = store->dump_journal(cout);
447 if (err < 0) {
11fdf7f2
TL
448 derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path
449 << " for object store " << data_path
7c673cae 450 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 451 forker.exit(1);
7c673cae 452 }
11fdf7f2
TL
453 derr << "dumped journal " << journal_path
454 << " for object store " << data_path
7c673cae 455 << dendl;
11fdf7f2 456 forker.exit(0);
7c673cae
FG
457 }
458
7c673cae
FG
459 if (convertfilestore) {
460 int err = store->mount();
461 if (err < 0) {
11fdf7f2 462 derr << TEXT_RED << " ** ERROR: error mounting store " << data_path
7c673cae 463 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 464 forker.exit(1);
7c673cae
FG
465 }
466 err = store->upgrade();
467 store->umount();
468 if (err < 0) {
11fdf7f2 469 derr << TEXT_RED << " ** ERROR: error converting store " << data_path
7c673cae 470 << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl;
11fdf7f2 471 forker.exit(1);
7c673cae 472 }
11fdf7f2 473 forker.exit(0);
7c673cae
FG
474 }
475
1e59de90
TL
476 {
477 int r = extblkdev::preload(g_ceph_context);
478 if (r < 0) {
479 derr << "Failed preloading extblkdev plugins, error code: " << r << dendl;
480 forker.exit(1);
481 }
482 }
483
7c673cae
FG
484 string magic;
485 uuid_d cluster_fsid, osd_fsid;
9f95a23c 486 ceph_release_t require_osd_release = ceph_release_t::unknown;
7c673cae 487 int w;
20effc67 488 int r = OSD::peek_meta(store.get(), &magic, &cluster_fsid, &osd_fsid, &w,
11fdf7f2 489 &require_osd_release);
7c673cae
FG
490 if (r < 0) {
491 derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on "
11fdf7f2 492 << data_path << ": " << cpp_strerror(-r)
7c673cae
FG
493 << TEXT_NORMAL << dendl;
494 if (r == -ENOTSUP) {
495 derr << TEXT_RED << " ** please verify that underlying storage "
496 << "supports xattrs" << TEXT_NORMAL << dendl;
497 }
11fdf7f2 498 forker.exit(1);
7c673cae
FG
499 }
500 if (w != whoami) {
501 derr << "OSD id " << w << " != my id " << whoami << dendl;
11fdf7f2 502 forker.exit(1);
7c673cae
FG
503 }
504 if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) {
505 derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC
506 << dendl;
11fdf7f2 507 forker.exit(1);
7c673cae
FG
508 }
509
510 if (get_cluster_fsid) {
511 cout << cluster_fsid << std::endl;
11fdf7f2 512 forker.exit(0);
7c673cae
FG
513 }
514 if (get_osd_fsid) {
515 cout << osd_fsid << std::endl;
11fdf7f2 516 forker.exit(0);
7c673cae
FG
517 }
518
9f95a23c 519 {
9f95a23c 520 ostringstream err;
f67539c2 521 if (!can_upgrade_from(require_osd_release, "require_osd_release", err)) {
9f95a23c
TL
522 derr << err.str() << dendl;
523 forker.exit(1);
524 }
11fdf7f2 525 }
7c673cae 526
11fdf7f2
TL
527 // consider objectstore numa node
528 int os_numa_node = -1;
529 r = store->get_numa_node(&os_numa_node, nullptr, nullptr);
530 if (r >= 0 && os_numa_node >= 0) {
531 dout(1) << " objectstore numa_node " << os_numa_node << dendl;
532 }
533 int iface_preferred_numa_node = -1;
534 if (g_conf().get_val<bool>("osd_numa_prefer_iface")) {
535 iface_preferred_numa_node = os_numa_node;
7c673cae
FG
536 }
537
11fdf7f2
TL
538 // messengers
539 std::string msg_type = g_conf().get_val<std::string>("ms_type");
540 std::string public_msg_type =
541 g_conf().get_val<std::string>("ms_public_type");
542 std::string cluster_msg_type =
543 g_conf().get_val<std::string>("ms_cluster_type");
544
545 public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type;
546 cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type;
9f95a23c 547 uint64_t nonce = Messenger::get_pid_nonce();
11fdf7f2 548 Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 549 entity_name_t::OSD(whoami), "client", nonce);
11fdf7f2 550 Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 551 entity_name_t::OSD(whoami), "cluster", nonce);
11fdf7f2 552 Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 553 entity_name_t::OSD(whoami), "hb_back_client", nonce);
11fdf7f2 554 Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 555 entity_name_t::OSD(whoami), "hb_front_client", nonce);
11fdf7f2 556 Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type,
f67539c2 557 entity_name_t::OSD(whoami), "hb_back_server", nonce);
11fdf7f2 558 Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 559 entity_name_t::OSD(whoami), "hb_front_server", nonce);
11fdf7f2 560 Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type,
f67539c2 561 entity_name_t::OSD(whoami), "ms_objecter", nonce);
7c673cae 562 if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter)
11fdf7f2 563 forker.exit(1);
7c673cae
FG
564 ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL);
565 ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
566 ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL);
567 ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
568 ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL);
569
11fdf7f2
TL
570 dout(0) << "starting osd." << whoami
571 << " osd_data " << data_path
572 << " " << ((journal_path.empty()) ?
573 "(no journal)" : journal_path)
574 << dendl;
7c673cae 575
11fdf7f2
TL
576 uint64_t message_size =
577 g_conf().get_val<Option::size_t>("osd_client_message_size_cap");
7c673cae 578 boost::scoped_ptr<Throttle> client_byte_throttler(
11fdf7f2 579 new Throttle(g_ceph_context, "osd_client_bytes", message_size));
f6b5b4d7
TL
580 uint64_t message_cap = g_conf().get_val<uint64_t>("osd_client_message_cap");
581 boost::scoped_ptr<Throttle> client_msg_throttler(
582 new Throttle(g_ceph_context, "osd_client_messages", message_cap));
7c673cae
FG
583
584 // All feature bits 0 - 34 should be present from dumpling v0.67 forward
585 uint64_t osd_required =
586 CEPH_FEATURE_UID |
587 CEPH_FEATURE_PGID64 |
588 CEPH_FEATURE_OSDENC;
589
9f95a23c 590 ms_public->set_default_policy(Messenger::Policy::stateless_registered_server(0));
7c673cae
FG
591 ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT,
592 client_byte_throttler.get(),
f6b5b4d7 593 client_msg_throttler.get());
7c673cae 594 ms_public->set_policy(entity_name_t::TYPE_MON,
11fdf7f2 595 Messenger::Policy::lossy_client(osd_required));
7c673cae 596 ms_public->set_policy(entity_name_t::TYPE_MGR,
11fdf7f2 597 Messenger::Policy::lossy_client(osd_required));
7c673cae 598
7c673cae
FG
599 ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0));
600 ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0));
601 ms_cluster->set_policy(entity_name_t::TYPE_OSD,
602 Messenger::Policy::lossless_peer(osd_required));
603 ms_cluster->set_policy(entity_name_t::TYPE_CLIENT,
604 Messenger::Policy::stateless_server(0));
605
606 ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD,
607 Messenger::Policy::lossy_client(0));
608 ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD,
609 Messenger::Policy::lossy_client(0));
610 ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD,
611 Messenger::Policy::stateless_server(0));
612 ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD,
613 Messenger::Policy::stateless_server(0));
614
615 ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX));
616
39ae355f 617 entity_addrvec_t public_addrs, public_bind_addrs, cluster_addrs;
11fdf7f2
TL
618 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs,
619 iface_preferred_numa_node);
620 if (r < 0) {
621 derr << "Failed to pick public address." << dendl;
622 forker.exit(1);
39ae355f
TL
623 } else {
624 dout(10) << "picked public_addrs " << public_addrs << dendl;
625 }
626 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC_BIND,
627 &public_bind_addrs, iface_preferred_numa_node);
628 if (r == -ENOENT) {
629 dout(10) << "there is no public_bind_addrs, defaulting to public_addrs"
630 << dendl;
631 public_bind_addrs = public_addrs;
632 } else if (r < 0) {
633 derr << "Failed to pick public bind address." << dendl;
634 forker.exit(1);
635 } else {
636 dout(10) << "picked public_bind_addrs " << public_bind_addrs << dendl;
11fdf7f2
TL
637 }
638 r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs,
639 iface_preferred_numa_node);
640 if (r < 0) {
641 derr << "Failed to pick cluster address." << dendl;
642 forker.exit(1);
643 }
644
39ae355f
TL
645 if (ms_public->bindv(public_bind_addrs, public_addrs) < 0) {
646 derr << "Failed to bind to " << public_bind_addrs << dendl;
11fdf7f2 647 forker.exit(1);
39ae355f 648 }
7c673cae 649
11fdf7f2
TL
650 if (ms_cluster->bindv(cluster_addrs) < 0)
651 forker.exit(1);
652
653 bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket");
654 if (is_delay) {
7c673cae
FG
655 ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
656 ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
657 ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
658 ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY);
659 }
660
39ae355f 661 entity_addrvec_t hb_front_addrs = public_bind_addrs;
11fdf7f2
TL
662 for (auto& a : hb_front_addrs.v) {
663 a.set_port(0);
7c673cae 664 }
11fdf7f2
TL
665 if (ms_hb_front_server->bindv(hb_front_addrs) < 0)
666 forker.exit(1);
667 if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0)
668 forker.exit(1);
669
670 entity_addrvec_t hb_back_addrs = cluster_addrs;
671 for (auto& a : hb_back_addrs.v) {
672 a.set_port(0);
673 }
674 if (ms_hb_back_server->bindv(hb_back_addrs) < 0)
675 forker.exit(1);
676 if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0)
677 forker.exit(1);
7c673cae 678
11fdf7f2
TL
679 // install signal handlers
680 init_async_signal_handler();
681 register_async_signal_handler(SIGHUP, sighup_handler);
7c673cae
FG
682
683 TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context);
684 TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context);
9f95a23c 685 TracepointProvider::initialize<bluestore_tracepoint_traits>(g_ceph_context);
31f18b77
FG
686#ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
687 TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context);
688#endif
7c673cae 689
11fdf7f2
TL
690 srand(time(NULL) + getpid());
691
f67539c2
TL
692 ceph::async::io_context_pool poolctx(
693 cct->_conf.get_val<std::uint64_t>("osd_asio_thread_count"));
694
695 MonClient mc(g_ceph_context, poolctx);
7c673cae
FG
696 if (mc.build_initial_monmap() < 0)
697 return -1;
698 global_init_chdir(g_ceph_context);
699
11fdf7f2
TL
700 if (global_init_preload_erasure_code(g_ceph_context) < 0) {
701 forker.exit(1);
702 }
224ce89b 703
9f95a23c 704 osdptr = new OSD(g_ceph_context,
20effc67 705 std::move(store),
9f95a23c
TL
706 whoami,
707 ms_cluster,
708 ms_public,
709 ms_hb_front_client,
710 ms_hb_back_client,
711 ms_hb_front_server,
712 ms_hb_back_server,
713 ms_objecter,
714 &mc,
715 data_path,
f67539c2
TL
716 journal_path,
717 poolctx);
9f95a23c
TL
718
719 int err = osdptr->pre_init();
7c673cae
FG
720 if (err < 0) {
721 derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err)
722 << TEXT_NORMAL << dendl;
11fdf7f2 723 forker.exit(1);
7c673cae
FG
724 }
725
726 ms_public->start();
727 ms_hb_front_client->start();
728 ms_hb_back_client->start();
729 ms_hb_front_server->start();
730 ms_hb_back_server->start();
731 ms_cluster->start();
732 ms_objecter->start();
733
734 // start osd
9f95a23c 735 err = osdptr->init();
7c673cae
FG
736 if (err < 0) {
737 derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err)
738 << TEXT_NORMAL << dendl;
11fdf7f2 739 forker.exit(1);
7c673cae
FG
740 }
741
11fdf7f2
TL
742 // -- daemonize --
743
744 if (g_conf()->daemonize) {
745 global_init_postfork_finish(g_ceph_context);
746 forker.daemonize();
747 }
748
749
7c673cae
FG
750 register_async_signal_handler_oneshot(SIGINT, handle_osd_signal);
751 register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal);
752
9f95a23c 753 osdptr->final_init();
7c673cae 754
11fdf7f2 755 if (g_conf().get_val<bool>("inject_early_sigterm"))
7c673cae
FG
756 kill(getpid(), SIGTERM);
757
758 ms_public->wait();
759 ms_hb_front_client->wait();
760 ms_hb_back_client->wait();
761 ms_hb_front_server->wait();
762 ms_hb_back_server->wait();
763 ms_cluster->wait();
764 ms_objecter->wait();
765
766 unregister_async_signal_handler(SIGHUP, sighup_handler);
767 unregister_async_signal_handler(SIGINT, handle_osd_signal);
768 unregister_async_signal_handler(SIGTERM, handle_osd_signal);
769 shutdown_async_signal_handler();
770
771 // done
f67539c2 772 poolctx.stop();
9f95a23c 773 delete osdptr;
7c673cae
FG
774 delete ms_public;
775 delete ms_hb_front_client;
776 delete ms_hb_back_client;
777 delete ms_hb_front_server;
778 delete ms_hb_back_server;
779 delete ms_cluster;
780 delete ms_objecter;
781
782 client_byte_throttler.reset();
f6b5b4d7 783 client_msg_throttler.reset();
7c673cae
FG
784
785 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
786 char s[20];
787 snprintf(s, sizeof(s), "gmon/%d", getpid());
788 if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) {
789 dout(0) << "ceph-osd: gmon.out should be in " << s << dendl;
790 }
791
792 return 0;
793}