1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include <sys/types.h>
18 #include <boost/scoped_ptr.hpp>
25 #include "os/ObjectStore.h"
26 #include "mon/MonClient.h"
27 #include "include/ceph_features.h"
29 #include "common/config.h"
31 #include "mon/MonMap.h"
33 #include "msg/Messenger.h"
35 #include "common/Timer.h"
36 #include "common/TracepointProvider.h"
37 #include "common/ceph_argparse.h"
39 #include "global/global_init.h"
40 #include "global/signal_handler.h"
42 #include "include/color.h"
43 #include "common/errno.h"
44 #include "common/pick_address.h"
46 #include "perfglue/heap_profiler.h"
48 #include "include/assert.h"
50 #define dout_context g_ceph_context
51 #define dout_subsys ceph_subsys_osd
55 TracepointProvider::Traits
osd_tracepoint_traits("libosd_tp.so",
57 TracepointProvider::Traits
os_tracepoint_traits("libos_tp.so",
58 "osd_objectstore_tracing");
59 #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
60 TracepointProvider::Traits
cyg_profile_traits("libcyg_profile_tp.so",
61 "osd_function_tracing");
64 } // anonymous namespace
68 void handle_osd_signal(int signum
)
71 osd
->handle_signal(signum
);
76 cout
<< "usage: ceph-osd -i <ID> [flags]\n"
77 << " --osd-data PATH data directory\n"
78 << " --osd-journal PATH\n"
79 << " journal file or block device\n"
80 << " --mkfs create a [new] data directory\n"
81 << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n"
82 << " --convert-filestore\n"
83 << " run any pending upgrade operations\n"
84 << " --flush-journal flush all data out of journal\n"
85 << " --mkjournal initialize a new journal\n"
86 << " --check-wants-journal\n"
87 << " check whether a journal is desired\n"
88 << " --check-allows-journal\n"
89 << " check whether a journal is allowed\n"
90 << " --check-needs-journal\n"
91 << " check whether a journal is required\n"
92 << " --debug_osd <N> set debug level (e.g. 10)\n"
93 << " --get-device-fsid PATH\n"
94 << " get OSD fsid for the given block device\n"
96 generic_server_usage();
99 int main(int argc
, const char **argv
)
101 vector
<const char*> args
;
102 argv_to_vec(argc
, argv
, args
);
105 vector
<const char*> def_args
;
106 // We want to enable leveldb's log, while allowing users to override this
107 // option, therefore we will pass it as a default argument to global_init().
108 def_args
.push_back("--leveldb-log=");
110 auto cct
= global_init(&def_args
, args
, CEPH_ENTITY_TYPE_OSD
,
111 CODE_ENVIRONMENT_DAEMON
,
113 ceph_heap_profiler_init();
117 bool mkjournal
= false;
118 bool check_wants_journal
= false;
119 bool check_allows_journal
= false;
120 bool check_needs_journal
= false;
122 bool flushjournal
= false;
123 bool dump_journal
= false;
124 bool convertfilestore
= false;
125 bool get_osd_fsid
= false;
126 bool get_cluster_fsid
= false;
127 bool get_journal_fsid
= false;
128 bool get_device_fsid
= false;
130 std::string dump_pg_log
;
133 for (std::vector
<const char*>::iterator i
= args
.begin(); i
!= args
.end(); ) {
134 if (ceph_argparse_double_dash(args
, i
)) {
136 } else if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
138 } else if (ceph_argparse_flag(args
, i
, "--mkfs", (char*)NULL
)) {
140 } else if (ceph_argparse_flag(args
, i
, "--mkjournal", (char*)NULL
)) {
142 } else if (ceph_argparse_flag(args
, i
, "--check-allows-journal", (char*)NULL
)) {
143 check_allows_journal
= true;
144 } else if (ceph_argparse_flag(args
, i
, "--check-wants-journal", (char*)NULL
)) {
145 check_wants_journal
= true;
146 } else if (ceph_argparse_flag(args
, i
, "--check-needs-journal", (char*)NULL
)) {
147 check_needs_journal
= true;
148 } else if (ceph_argparse_flag(args
, i
, "--mkkey", (char*)NULL
)) {
150 } else if (ceph_argparse_flag(args
, i
, "--flush-journal", (char*)NULL
)) {
152 } else if (ceph_argparse_flag(args
, i
, "--convert-filestore", (char*)NULL
)) {
153 convertfilestore
= true;
154 } else if (ceph_argparse_witharg(args
, i
, &val
, "--dump-pg-log", (char*)NULL
)) {
156 } else if (ceph_argparse_flag(args
, i
, "--dump-journal", (char*)NULL
)) {
158 } else if (ceph_argparse_flag(args
, i
, "--get-cluster-fsid", (char*)NULL
)) {
159 get_cluster_fsid
= true;
160 } else if (ceph_argparse_flag(args
, i
, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL
)) {
162 } else if (ceph_argparse_flag(args
, i
, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL
)) {
163 get_journal_fsid
= true;
164 } else if (ceph_argparse_witharg(args
, i
, &device_path
,
165 "--get-device-fsid", (char*)NULL
)) {
166 get_device_fsid
= true;
172 derr
<< "unrecognized arg " << args
[0] << dendl
;
176 if (get_journal_fsid
) {
177 device_path
= g_conf
->osd_journal
;
178 get_device_fsid
= true;
180 if (get_device_fsid
) {
182 int r
= ObjectStore::probe_block_device_fsid(g_ceph_context
, device_path
,
185 cerr
<< "failed to get device fsid for " << device_path
186 << ": " << cpp_strerror(r
) << std::endl
;
189 cout
<< uuid
<< std::endl
;
193 if (!dump_pg_log
.empty()) {
194 common_init_finish(g_ceph_context
);
197 int r
= bl
.read_file(dump_pg_log
.c_str(), &error
);
200 bufferlist::iterator p
= bl
.begin();
202 uint64_t pos
= p
.get_off();
206 catch (const buffer::error
&e
) {
207 derr
<< "failed to decode LogEntry at offset " << pos
<< dendl
;
210 derr
<< pos
<< ":\t" << e
<< dendl
;
213 derr
<< "unable to open " << dump_pg_log
<< ": " << error
<< dendl
;
220 const char *id
= g_conf
->name
.get_id().c_str();
221 int whoami
= strtol(id
, &end
, 10);
222 if (*end
|| end
== id
|| whoami
< 0) {
223 derr
<< "must specify '-i #' where # is the osd number" << dendl
;
227 if (g_conf
->osd_data
.empty()) {
228 derr
<< "must specify '--osd-data=foo' data path" << dendl
;
233 string store_type
= g_conf
->osd_objectstore
;
236 snprintf(fn
, sizeof(fn
), "%s/type", g_conf
->osd_data
.c_str());
237 int fd
= ::open(fn
, O_RDONLY
);
242 store_type
= string(bl
.c_str(), bl
.length() - 1); // drop \n
243 g_conf
->set_val("osd_objectstore", store_type
);
244 dout(5) << "object store type is " << store_type
<< dendl
;
249 ObjectStore
*store
= ObjectStore::create(g_ceph_context
,
253 g_conf
->osd_os_flags
);
255 derr
<< "unable to create object store" << dendl
;
261 common_init_finish(g_ceph_context
);
262 KeyRing
*keyring
= KeyRing::create_empty();
264 derr
<< "Unable to get a Ceph keyring." << dendl
;
268 EntityName
ename(g_conf
->name
);
271 int ret
= keyring
->load(g_ceph_context
, g_conf
->keyring
);
273 keyring
->get_auth(ename
, eauth
)) {
274 derr
<< "already have key in keyring " << g_conf
->keyring
<< dendl
;
276 eauth
.key
.create(g_ceph_context
, CEPH_CRYPTO_AES
);
277 keyring
->add(ename
, eauth
);
279 keyring
->encode_plaintext(bl
);
280 int r
= bl
.write_file(g_conf
->keyring
.c_str(), 0600);
282 derr
<< TEXT_RED
<< " ** ERROR: writing new keyring to " << g_conf
->keyring
283 << ": " << cpp_strerror(r
) << TEXT_NORMAL
<< dendl
;
285 derr
<< "created new key in keyring " << g_conf
->keyring
<< dendl
;
289 common_init_finish(g_ceph_context
);
290 MonClient
mc(g_ceph_context
);
291 if (mc
.build_initial_monmap() < 0)
293 if (mc
.get_monmap_privately() < 0)
296 if (mc
.monmap
.fsid
.is_zero()) {
297 derr
<< "must specify cluster fsid" << dendl
;
301 int err
= OSD::mkfs(g_ceph_context
, store
, g_conf
->osd_data
,
302 mc
.monmap
.fsid
, whoami
);
304 derr
<< TEXT_RED
<< " ** ERROR: error creating empty object store in "
305 << g_conf
->osd_data
<< ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
308 derr
<< "created object store " << g_conf
->osd_data
309 << " for osd." << whoami
<< " fsid " << mc
.monmap
.fsid
<< dendl
;
314 common_init_finish(g_ceph_context
);
315 int err
= store
->mkjournal();
317 derr
<< TEXT_RED
<< " ** ERROR: error creating fresh journal " << g_conf
->osd_journal
318 << " for object store " << g_conf
->osd_data
319 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
322 derr
<< "created new journal " << g_conf
->osd_journal
323 << " for object store " << g_conf
->osd_data
<< dendl
;
326 if (check_wants_journal
) {
327 if (store
->wants_journal()) {
328 cout
<< "wants journal: yes" << std::endl
;
331 cout
<< "wants journal: no" << std::endl
;
335 if (check_allows_journal
) {
336 if (store
->allows_journal()) {
337 cout
<< "allows journal: yes" << std::endl
;
340 cout
<< "allows journal: no" << std::endl
;
344 if (check_needs_journal
) {
345 if (store
->needs_journal()) {
346 cout
<< "needs journal: yes" << std::endl
;
349 cout
<< "needs journal: no" << std::endl
;
354 common_init_finish(g_ceph_context
);
355 int err
= store
->mount();
357 derr
<< TEXT_RED
<< " ** ERROR: error flushing journal " << g_conf
->osd_journal
358 << " for object store " << g_conf
->osd_data
359 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
360 goto flushjournal_out
;
363 derr
<< "flushed journal " << g_conf
->osd_journal
364 << " for object store " << g_conf
->osd_data
368 exit(err
< 0 ? 1 : 0);
371 common_init_finish(g_ceph_context
);
372 int err
= store
->dump_journal(cout
);
374 derr
<< TEXT_RED
<< " ** ERROR: error dumping journal " << g_conf
->osd_journal
375 << " for object store " << g_conf
->osd_data
376 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
379 derr
<< "dumped journal " << g_conf
->osd_journal
380 << " for object store " << g_conf
->osd_data
387 if (convertfilestore
) {
388 int err
= store
->mount();
390 derr
<< TEXT_RED
<< " ** ERROR: error mounting store " << g_conf
->osd_data
391 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
394 err
= store
->upgrade();
397 derr
<< TEXT_RED
<< " ** ERROR: error converting store " << g_conf
->osd_data
398 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
405 uuid_d cluster_fsid
, osd_fsid
;
407 int r
= OSD::peek_meta(store
, magic
, cluster_fsid
, osd_fsid
, w
);
409 derr
<< TEXT_RED
<< " ** ERROR: unable to open OSD superblock on "
410 << g_conf
->osd_data
<< ": " << cpp_strerror(-r
)
411 << TEXT_NORMAL
<< dendl
;
413 derr
<< TEXT_RED
<< " ** please verify that underlying storage "
414 << "supports xattrs" << TEXT_NORMAL
<< dendl
;
419 derr
<< "OSD id " << w
<< " != my id " << whoami
<< dendl
;
422 if (strcmp(magic
.c_str(), CEPH_OSD_ONDISK_MAGIC
)) {
423 derr
<< "OSD magic " << magic
<< " != my " << CEPH_OSD_ONDISK_MAGIC
428 if (get_cluster_fsid
) {
429 cout
<< cluster_fsid
<< std::endl
;
433 cout
<< osd_fsid
<< std::endl
;
437 pick_addresses(g_ceph_context
, CEPH_PICK_ADDRESS_PUBLIC
438 |CEPH_PICK_ADDRESS_CLUSTER
);
440 if (g_conf
->public_addr
.is_blank_ip() && !g_conf
->cluster_addr
.is_blank_ip()) {
442 << " ** WARNING: specified cluster addr but not public addr; we recommend **\n"
443 << " ** you specify neither or both. **"
444 << TEXT_NORMAL
<< dendl
;
447 std::string public_msgr_type
= g_conf
->ms_public_type
.empty() ? g_conf
->get_val
<std::string
>("ms_type") : g_conf
->ms_public_type
;
448 std::string cluster_msgr_type
= g_conf
->ms_cluster_type
.empty() ? g_conf
->get_val
<std::string
>("ms_type") : g_conf
->ms_cluster_type
;
449 Messenger
*ms_public
= Messenger::create(g_ceph_context
, public_msgr_type
,
450 entity_name_t::OSD(whoami
), "client",
452 Messenger::HAS_HEAVY_TRAFFIC
|
453 Messenger::HAS_MANY_CONNECTIONS
);
454 Messenger
*ms_cluster
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
455 entity_name_t::OSD(whoami
), "cluster",
457 Messenger::HAS_HEAVY_TRAFFIC
|
458 Messenger::HAS_MANY_CONNECTIONS
);
459 Messenger
*ms_hb_back_client
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
460 entity_name_t::OSD(whoami
), "hb_back_client",
461 getpid(), Messenger::HEARTBEAT
);
462 Messenger
*ms_hb_front_client
= Messenger::create(g_ceph_context
, public_msgr_type
,
463 entity_name_t::OSD(whoami
), "hb_front_client",
464 getpid(), Messenger::HEARTBEAT
);
465 Messenger
*ms_hb_back_server
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
466 entity_name_t::OSD(whoami
), "hb_back_server",
467 getpid(), Messenger::HEARTBEAT
);
468 Messenger
*ms_hb_front_server
= Messenger::create(g_ceph_context
, public_msgr_type
,
469 entity_name_t::OSD(whoami
), "hb_front_server",
470 getpid(), Messenger::HEARTBEAT
);
471 Messenger
*ms_objecter
= Messenger::create(g_ceph_context
, public_msgr_type
,
472 entity_name_t::OSD(whoami
), "ms_objecter",
474 if (!ms_public
|| !ms_cluster
|| !ms_hb_front_client
|| !ms_hb_back_client
|| !ms_hb_back_server
|| !ms_hb_front_server
|| !ms_objecter
)
476 ms_cluster
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
477 ms_hb_front_client
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
478 ms_hb_back_client
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
479 ms_hb_back_server
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
480 ms_hb_front_server
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
482 cout
<< "starting osd." << whoami
483 << " at " << ms_public
->get_myaddr()
484 << " osd_data " << g_conf
->osd_data
485 << " " << ((g_conf
->osd_journal
.empty()) ?
486 "(no journal)" : g_conf
->osd_journal
)
489 boost::scoped_ptr
<Throttle
> client_byte_throttler(
490 new Throttle(g_ceph_context
, "osd_client_bytes",
491 g_conf
->osd_client_message_size_cap
));
493 // All feature bits 0 - 34 should be present from dumpling v0.67 forward
494 uint64_t osd_required
=
496 CEPH_FEATURE_PGID64
|
499 ms_public
->set_default_policy(Messenger::Policy::stateless_server(0));
500 ms_public
->set_policy_throttlers(entity_name_t::TYPE_CLIENT
,
501 client_byte_throttler
.get(),
503 ms_public
->set_policy(entity_name_t::TYPE_MON
,
504 Messenger::Policy::lossy_client(CEPH_FEATURE_UID
|
505 CEPH_FEATURE_PGID64
|
506 CEPH_FEATURE_OSDENC
));
507 ms_public
->set_policy(entity_name_t::TYPE_MGR
,
508 Messenger::Policy::lossy_client(CEPH_FEATURE_UID
|
509 CEPH_FEATURE_PGID64
|
510 CEPH_FEATURE_OSDENC
));
512 //try to poison pill any OSD connections on the wrong address
513 ms_public
->set_policy(entity_name_t::TYPE_OSD
,
514 Messenger::Policy::stateless_server(0));
516 ms_cluster
->set_default_policy(Messenger::Policy::stateless_server(0));
517 ms_cluster
->set_policy(entity_name_t::TYPE_MON
, Messenger::Policy::lossy_client(0));
518 ms_cluster
->set_policy(entity_name_t::TYPE_OSD
,
519 Messenger::Policy::lossless_peer(osd_required
));
520 ms_cluster
->set_policy(entity_name_t::TYPE_CLIENT
,
521 Messenger::Policy::stateless_server(0));
523 ms_hb_front_client
->set_policy(entity_name_t::TYPE_OSD
,
524 Messenger::Policy::lossy_client(0));
525 ms_hb_back_client
->set_policy(entity_name_t::TYPE_OSD
,
526 Messenger::Policy::lossy_client(0));
527 ms_hb_back_server
->set_policy(entity_name_t::TYPE_OSD
,
528 Messenger::Policy::stateless_server(0));
529 ms_hb_front_server
->set_policy(entity_name_t::TYPE_OSD
,
530 Messenger::Policy::stateless_server(0));
532 ms_objecter
->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX
));
534 r
= ms_public
->bind(g_conf
->public_addr
);
537 r
= ms_cluster
->bind(g_conf
->cluster_addr
);
541 if (g_conf
->osd_heartbeat_use_min_delay_socket
) {
542 ms_hb_front_client
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
543 ms_hb_back_client
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
544 ms_hb_back_server
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
545 ms_hb_front_server
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
548 // hb back should bind to same ip as cluster_addr (if specified)
549 entity_addr_t hb_back_addr
= g_conf
->osd_heartbeat_addr
;
550 if (hb_back_addr
.is_blank_ip()) {
551 hb_back_addr
= g_conf
->cluster_addr
;
552 if (hb_back_addr
.is_ip())
553 hb_back_addr
.set_port(0);
555 r
= ms_hb_back_server
->bind(hb_back_addr
);
558 r
= ms_hb_back_client
->client_bind(hb_back_addr
);
562 // hb front should bind to same ip as public_addr
563 entity_addr_t hb_front_addr
= g_conf
->public_addr
;
564 if (hb_front_addr
.is_ip())
565 hb_front_addr
.set_port(0);
566 r
= ms_hb_front_server
->bind(hb_front_addr
);
569 r
= ms_hb_front_client
->client_bind(hb_front_addr
);
573 // Set up crypto, daemonize, etc.
574 global_init_daemonize(g_ceph_context
);
575 common_init_finish(g_ceph_context
);
577 TracepointProvider::initialize
<osd_tracepoint_traits
>(g_ceph_context
);
578 TracepointProvider::initialize
<os_tracepoint_traits
>(g_ceph_context
);
579 #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
580 TracepointProvider::initialize
<cyg_profile_traits
>(g_ceph_context
);
583 MonClient
mc(g_ceph_context
);
584 if (mc
.build_initial_monmap() < 0)
586 global_init_chdir(g_ceph_context
);
588 if (global_init_preload_erasure_code(g_ceph_context
) < 0)
591 srand(time(NULL
) + getpid());
593 osd
= new OSD(g_ceph_context
,
605 g_conf
->osd_journal
);
607 int err
= osd
->pre_init();
609 derr
<< TEXT_RED
<< " ** ERROR: osd pre_init failed: " << cpp_strerror(-err
)
610 << TEXT_NORMAL
<< dendl
;
615 ms_hb_front_client
->start();
616 ms_hb_back_client
->start();
617 ms_hb_front_server
->start();
618 ms_hb_back_server
->start();
620 ms_objecter
->start();
625 derr
<< TEXT_RED
<< " ** ERROR: osd init failed: " << cpp_strerror(-err
)
626 << TEXT_NORMAL
<< dendl
;
630 // install signal handlers
631 init_async_signal_handler();
632 register_async_signal_handler(SIGHUP
, sighup_handler
);
633 register_async_signal_handler_oneshot(SIGINT
, handle_osd_signal
);
634 register_async_signal_handler_oneshot(SIGTERM
, handle_osd_signal
);
638 if (g_conf
->inject_early_sigterm
)
639 kill(getpid(), SIGTERM
);
642 ms_hb_front_client
->wait();
643 ms_hb_back_client
->wait();
644 ms_hb_front_server
->wait();
645 ms_hb_back_server
->wait();
649 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
650 unregister_async_signal_handler(SIGINT
, handle_osd_signal
);
651 unregister_async_signal_handler(SIGTERM
, handle_osd_signal
);
652 shutdown_async_signal_handler();
657 delete ms_hb_front_client
;
658 delete ms_hb_back_client
;
659 delete ms_hb_front_server
;
660 delete ms_hb_back_server
;
664 client_byte_throttler
.reset();
666 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
668 snprintf(s
, sizeof(s
), "gmon/%d", getpid());
669 if ((mkdir(s
, 0755) == 0) && (chdir(s
) == 0)) {
670 dout(0) << "ceph-osd: gmon.out should be in " << s
<< dendl
;