1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include <sys/types.h>
18 #include <boost/scoped_ptr.hpp>
25 #include "os/ObjectStore.h"
26 #include "mon/MonClient.h"
27 #include "include/ceph_features.h"
29 #include "common/config.h"
31 #include "mon/MonMap.h"
33 #include "msg/Messenger.h"
35 #include "common/Timer.h"
36 #include "common/TracepointProvider.h"
37 #include "common/ceph_argparse.h"
39 #include "global/global_init.h"
40 #include "global/signal_handler.h"
42 #include "include/color.h"
43 #include "common/errno.h"
44 #include "common/pick_address.h"
46 #include "perfglue/heap_profiler.h"
48 #include "include/assert.h"
50 #define dout_context g_ceph_context
51 #define dout_subsys ceph_subsys_osd
55 TracepointProvider::Traits
osd_tracepoint_traits("libosd_tp.so",
57 TracepointProvider::Traits
os_tracepoint_traits("libos_tp.so",
58 "osd_objectstore_tracing");
59 #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
60 TracepointProvider::Traits
cyg_profile_traits("libcyg_profile_tp.so",
61 "osd_function_tracing");
64 } // anonymous namespace
68 void handle_osd_signal(int signum
)
71 osd
->handle_signal(signum
);
76 cout
<< "usage: ceph-osd -i <ID> [flags]\n"
77 << " --osd-data PATH data directory\n"
78 << " --osd-journal PATH\n"
79 << " journal file or block device\n"
80 << " --mkfs create a [new] data directory\n"
81 << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n"
82 << " --convert-filestore\n"
83 << " run any pending upgrade operations\n"
84 << " --flush-journal flush all data out of journal\n"
85 << " --mkjournal initialize a new journal\n"
86 << " --check-wants-journal\n"
87 << " check whether a journal is desired\n"
88 << " --check-allows-journal\n"
89 << " check whether a journal is allowed\n"
90 << " --check-needs-journal\n"
91 << " check whether a journal is required\n"
92 << " --debug_osd <N> set debug level (e.g. 10)\n"
93 << " --get-device-fsid PATH\n"
94 << " get OSD fsid for the given block device\n"
96 generic_server_usage();
99 #ifdef BUILDING_FOR_EMBEDDED
100 void cephd_preload_embedded_plugins();
101 void cephd_preload_rados_classes(OSD
*osd
);
102 extern "C" int cephd_osd(int argc
, const char **argv
)
104 int main(int argc
, const char **argv
)
107 vector
<const char*> args
;
108 argv_to_vec(argc
, argv
, args
);
111 vector
<const char*> def_args
;
112 // We want to enable leveldb's log, while allowing users to override this
113 // option, therefore we will pass it as a default argument to global_init().
114 def_args
.push_back("--leveldb-log=");
116 auto cct
= global_init(&def_args
, args
, CEPH_ENTITY_TYPE_OSD
,
117 CODE_ENVIRONMENT_DAEMON
,
119 ceph_heap_profiler_init();
123 bool mkjournal
= false;
124 bool check_wants_journal
= false;
125 bool check_allows_journal
= false;
126 bool check_needs_journal
= false;
128 bool flushjournal
= false;
129 bool dump_journal
= false;
130 bool convertfilestore
= false;
131 bool get_osd_fsid
= false;
132 bool get_cluster_fsid
= false;
133 bool get_journal_fsid
= false;
134 bool get_device_fsid
= false;
136 std::string dump_pg_log
;
139 for (std::vector
<const char*>::iterator i
= args
.begin(); i
!= args
.end(); ) {
140 if (ceph_argparse_double_dash(args
, i
)) {
142 } else if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
144 } else if (ceph_argparse_flag(args
, i
, "--mkfs", (char*)NULL
)) {
146 } else if (ceph_argparse_flag(args
, i
, "--mkjournal", (char*)NULL
)) {
148 } else if (ceph_argparse_flag(args
, i
, "--check-allows-journal", (char*)NULL
)) {
149 check_allows_journal
= true;
150 } else if (ceph_argparse_flag(args
, i
, "--check-wants-journal", (char*)NULL
)) {
151 check_wants_journal
= true;
152 } else if (ceph_argparse_flag(args
, i
, "--check-needs-journal", (char*)NULL
)) {
153 check_needs_journal
= true;
154 } else if (ceph_argparse_flag(args
, i
, "--mkkey", (char*)NULL
)) {
156 } else if (ceph_argparse_flag(args
, i
, "--flush-journal", (char*)NULL
)) {
158 } else if (ceph_argparse_flag(args
, i
, "--convert-filestore", (char*)NULL
)) {
159 convertfilestore
= true;
160 } else if (ceph_argparse_witharg(args
, i
, &val
, "--dump-pg-log", (char*)NULL
)) {
162 } else if (ceph_argparse_flag(args
, i
, "--dump-journal", (char*)NULL
)) {
164 } else if (ceph_argparse_flag(args
, i
, "--get-cluster-fsid", (char*)NULL
)) {
165 get_cluster_fsid
= true;
166 } else if (ceph_argparse_flag(args
, i
, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL
)) {
168 } else if (ceph_argparse_flag(args
, i
, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL
)) {
169 get_journal_fsid
= true;
170 } else if (ceph_argparse_witharg(args
, i
, &device_path
,
171 "--get-device-fsid", (char*)NULL
)) {
172 get_device_fsid
= true;
178 derr
<< "unrecognized arg " << args
[0] << dendl
;
182 if (get_journal_fsid
) {
183 device_path
= g_conf
->osd_journal
;
184 get_device_fsid
= true;
186 if (get_device_fsid
) {
188 int r
= ObjectStore::probe_block_device_fsid(g_ceph_context
, device_path
,
191 cerr
<< "failed to get device fsid for " << device_path
192 << ": " << cpp_strerror(r
) << std::endl
;
195 cout
<< uuid
<< std::endl
;
199 if (!dump_pg_log
.empty()) {
200 common_init_finish(g_ceph_context
);
203 int r
= bl
.read_file(dump_pg_log
.c_str(), &error
);
206 bufferlist::iterator p
= bl
.begin();
208 uint64_t pos
= p
.get_off();
212 catch (const buffer::error
&e
) {
213 derr
<< "failed to decode LogEntry at offset " << pos
<< dendl
;
216 derr
<< pos
<< ":\t" << e
<< dendl
;
219 derr
<< "unable to open " << dump_pg_log
<< ": " << error
<< dendl
;
226 const char *id
= g_conf
->name
.get_id().c_str();
227 int whoami
= strtol(id
, &end
, 10);
228 if (*end
|| end
== id
|| whoami
< 0) {
229 derr
<< "must specify '-i #' where # is the osd number" << dendl
;
233 if (g_conf
->osd_data
.empty()) {
234 derr
<< "must specify '--osd-data=foo' data path" << dendl
;
239 string store_type
= g_conf
->osd_objectstore
;
242 snprintf(fn
, sizeof(fn
), "%s/type", g_conf
->osd_data
.c_str());
243 int fd
= ::open(fn
, O_RDONLY
);
248 store_type
= string(bl
.c_str(), bl
.length() - 1); // drop \n
249 g_conf
->set_val("osd_objectstore", store_type
);
250 dout(5) << "object store type is " << store_type
<< dendl
;
255 ObjectStore
*store
= ObjectStore::create(g_ceph_context
,
259 g_conf
->osd_os_flags
);
261 derr
<< "unable to create object store" << dendl
;
265 #ifdef BUILDING_FOR_EMBEDDED
266 cephd_preload_embedded_plugins();
270 common_init_finish(g_ceph_context
);
271 KeyRing
*keyring
= KeyRing::create_empty();
273 derr
<< "Unable to get a Ceph keyring." << dendl
;
277 EntityName
ename(g_conf
->name
);
280 int ret
= keyring
->load(g_ceph_context
, g_conf
->keyring
);
282 keyring
->get_auth(ename
, eauth
)) {
283 derr
<< "already have key in keyring " << g_conf
->keyring
<< dendl
;
285 eauth
.key
.create(g_ceph_context
, CEPH_CRYPTO_AES
);
286 keyring
->add(ename
, eauth
);
288 keyring
->encode_plaintext(bl
);
289 int r
= bl
.write_file(g_conf
->keyring
.c_str(), 0600);
291 derr
<< TEXT_RED
<< " ** ERROR: writing new keyring to " << g_conf
->keyring
292 << ": " << cpp_strerror(r
) << TEXT_NORMAL
<< dendl
;
294 derr
<< "created new key in keyring " << g_conf
->keyring
<< dendl
;
298 common_init_finish(g_ceph_context
);
299 MonClient
mc(g_ceph_context
);
300 if (mc
.build_initial_monmap() < 0)
302 if (mc
.get_monmap_privately() < 0)
305 if (mc
.monmap
.fsid
.is_zero()) {
306 derr
<< "must specify cluster fsid" << dendl
;
310 int err
= OSD::mkfs(g_ceph_context
, store
, g_conf
->osd_data
,
311 mc
.monmap
.fsid
, whoami
);
313 derr
<< TEXT_RED
<< " ** ERROR: error creating empty object store in "
314 << g_conf
->osd_data
<< ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
317 derr
<< "created object store " << g_conf
->osd_data
318 << " for osd." << whoami
<< " fsid " << mc
.monmap
.fsid
<< dendl
;
323 common_init_finish(g_ceph_context
);
324 int err
= store
->mkjournal();
326 derr
<< TEXT_RED
<< " ** ERROR: error creating fresh journal " << g_conf
->osd_journal
327 << " for object store " << g_conf
->osd_data
328 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
331 derr
<< "created new journal " << g_conf
->osd_journal
332 << " for object store " << g_conf
->osd_data
<< dendl
;
335 if (check_wants_journal
) {
336 if (store
->wants_journal()) {
337 cout
<< "wants journal: yes" << std::endl
;
340 cout
<< "wants journal: no" << std::endl
;
344 if (check_allows_journal
) {
345 if (store
->allows_journal()) {
346 cout
<< "allows journal: yes" << std::endl
;
349 cout
<< "allows journal: no" << std::endl
;
353 if (check_needs_journal
) {
354 if (store
->needs_journal()) {
355 cout
<< "needs journal: yes" << std::endl
;
358 cout
<< "needs journal: no" << std::endl
;
363 common_init_finish(g_ceph_context
);
364 int err
= store
->mount();
366 derr
<< TEXT_RED
<< " ** ERROR: error flushing journal " << g_conf
->osd_journal
367 << " for object store " << g_conf
->osd_data
368 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
369 goto flushjournal_out
;
372 derr
<< "flushed journal " << g_conf
->osd_journal
373 << " for object store " << g_conf
->osd_data
377 exit(err
< 0 ? 1 : 0);
380 common_init_finish(g_ceph_context
);
381 int err
= store
->dump_journal(cout
);
383 derr
<< TEXT_RED
<< " ** ERROR: error dumping journal " << g_conf
->osd_journal
384 << " for object store " << g_conf
->osd_data
385 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
388 derr
<< "dumped journal " << g_conf
->osd_journal
389 << " for object store " << g_conf
->osd_data
396 if (convertfilestore
) {
397 int err
= store
->mount();
399 derr
<< TEXT_RED
<< " ** ERROR: error mounting store " << g_conf
->osd_data
400 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
403 err
= store
->upgrade();
406 derr
<< TEXT_RED
<< " ** ERROR: error converting store " << g_conf
->osd_data
407 << ": " << cpp_strerror(-err
) << TEXT_NORMAL
<< dendl
;
414 uuid_d cluster_fsid
, osd_fsid
;
416 int r
= OSD::peek_meta(store
, magic
, cluster_fsid
, osd_fsid
, w
);
418 derr
<< TEXT_RED
<< " ** ERROR: unable to open OSD superblock on "
419 << g_conf
->osd_data
<< ": " << cpp_strerror(-r
)
420 << TEXT_NORMAL
<< dendl
;
422 derr
<< TEXT_RED
<< " ** please verify that underlying storage "
423 << "supports xattrs" << TEXT_NORMAL
<< dendl
;
428 derr
<< "OSD id " << w
<< " != my id " << whoami
<< dendl
;
431 if (strcmp(magic
.c_str(), CEPH_OSD_ONDISK_MAGIC
)) {
432 derr
<< "OSD magic " << magic
<< " != my " << CEPH_OSD_ONDISK_MAGIC
437 if (get_cluster_fsid
) {
438 cout
<< cluster_fsid
<< std::endl
;
442 cout
<< osd_fsid
<< std::endl
;
446 pick_addresses(g_ceph_context
, CEPH_PICK_ADDRESS_PUBLIC
447 |CEPH_PICK_ADDRESS_CLUSTER
);
449 if (g_conf
->public_addr
.is_blank_ip() && !g_conf
->cluster_addr
.is_blank_ip()) {
451 << " ** WARNING: specified cluster addr but not public addr; we recommend **\n"
452 << " ** you specify neither or both. **"
453 << TEXT_NORMAL
<< dendl
;
456 std::string public_msgr_type
= g_conf
->ms_public_type
.empty() ? g_conf
->get_val
<std::string
>("ms_type") : g_conf
->ms_public_type
;
457 std::string cluster_msgr_type
= g_conf
->ms_cluster_type
.empty() ? g_conf
->get_val
<std::string
>("ms_type") : g_conf
->ms_cluster_type
;
458 Messenger
*ms_public
= Messenger::create(g_ceph_context
, public_msgr_type
,
459 entity_name_t::OSD(whoami
), "client",
461 Messenger::HAS_HEAVY_TRAFFIC
|
462 Messenger::HAS_MANY_CONNECTIONS
);
463 Messenger
*ms_cluster
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
464 entity_name_t::OSD(whoami
), "cluster",
466 Messenger::HAS_HEAVY_TRAFFIC
|
467 Messenger::HAS_MANY_CONNECTIONS
);
468 Messenger
*ms_hb_back_client
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
469 entity_name_t::OSD(whoami
), "hb_back_client",
470 getpid(), Messenger::HEARTBEAT
);
471 Messenger
*ms_hb_front_client
= Messenger::create(g_ceph_context
, public_msgr_type
,
472 entity_name_t::OSD(whoami
), "hb_front_client",
473 getpid(), Messenger::HEARTBEAT
);
474 Messenger
*ms_hb_back_server
= Messenger::create(g_ceph_context
, cluster_msgr_type
,
475 entity_name_t::OSD(whoami
), "hb_back_server",
476 getpid(), Messenger::HEARTBEAT
);
477 Messenger
*ms_hb_front_server
= Messenger::create(g_ceph_context
, public_msgr_type
,
478 entity_name_t::OSD(whoami
), "hb_front_server",
479 getpid(), Messenger::HEARTBEAT
);
480 Messenger
*ms_objecter
= Messenger::create(g_ceph_context
, public_msgr_type
,
481 entity_name_t::OSD(whoami
), "ms_objecter",
483 if (!ms_public
|| !ms_cluster
|| !ms_hb_front_client
|| !ms_hb_back_client
|| !ms_hb_back_server
|| !ms_hb_front_server
|| !ms_objecter
)
485 ms_cluster
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
486 ms_hb_front_client
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
487 ms_hb_back_client
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
488 ms_hb_back_server
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
489 ms_hb_front_server
->set_cluster_protocol(CEPH_OSD_PROTOCOL
);
491 cout
<< "starting osd." << whoami
492 << " at " << ms_public
->get_myaddr()
493 << " osd_data " << g_conf
->osd_data
494 << " " << ((g_conf
->osd_journal
.empty()) ?
495 "(no journal)" : g_conf
->osd_journal
)
498 boost::scoped_ptr
<Throttle
> client_byte_throttler(
499 new Throttle(g_ceph_context
, "osd_client_bytes",
500 g_conf
->osd_client_message_size_cap
));
502 // All feature bits 0 - 34 should be present from dumpling v0.67 forward
503 uint64_t osd_required
=
505 CEPH_FEATURE_PGID64
|
508 ms_public
->set_default_policy(Messenger::Policy::stateless_server(0));
509 ms_public
->set_policy_throttlers(entity_name_t::TYPE_CLIENT
,
510 client_byte_throttler
.get(),
512 ms_public
->set_policy(entity_name_t::TYPE_MON
,
513 Messenger::Policy::lossy_client(CEPH_FEATURE_UID
|
514 CEPH_FEATURE_PGID64
|
515 CEPH_FEATURE_OSDENC
));
516 ms_public
->set_policy(entity_name_t::TYPE_MGR
,
517 Messenger::Policy::lossy_client(CEPH_FEATURE_UID
|
518 CEPH_FEATURE_PGID64
|
519 CEPH_FEATURE_OSDENC
));
521 //try to poison pill any OSD connections on the wrong address
522 ms_public
->set_policy(entity_name_t::TYPE_OSD
,
523 Messenger::Policy::stateless_server(0));
525 ms_cluster
->set_default_policy(Messenger::Policy::stateless_server(0));
526 ms_cluster
->set_policy(entity_name_t::TYPE_MON
, Messenger::Policy::lossy_client(0));
527 ms_cluster
->set_policy(entity_name_t::TYPE_OSD
,
528 Messenger::Policy::lossless_peer(osd_required
));
529 ms_cluster
->set_policy(entity_name_t::TYPE_CLIENT
,
530 Messenger::Policy::stateless_server(0));
532 ms_hb_front_client
->set_policy(entity_name_t::TYPE_OSD
,
533 Messenger::Policy::lossy_client(0));
534 ms_hb_back_client
->set_policy(entity_name_t::TYPE_OSD
,
535 Messenger::Policy::lossy_client(0));
536 ms_hb_back_server
->set_policy(entity_name_t::TYPE_OSD
,
537 Messenger::Policy::stateless_server(0));
538 ms_hb_front_server
->set_policy(entity_name_t::TYPE_OSD
,
539 Messenger::Policy::stateless_server(0));
541 ms_objecter
->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX
));
543 r
= ms_public
->bind(g_conf
->public_addr
);
546 r
= ms_cluster
->bind(g_conf
->cluster_addr
);
550 if (g_conf
->osd_heartbeat_use_min_delay_socket
) {
551 ms_hb_front_client
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
552 ms_hb_back_client
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
553 ms_hb_back_server
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
554 ms_hb_front_server
->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY
);
557 // hb back should bind to same ip as cluster_addr (if specified)
558 entity_addr_t hb_back_addr
= g_conf
->osd_heartbeat_addr
;
559 if (hb_back_addr
.is_blank_ip()) {
560 hb_back_addr
= g_conf
->cluster_addr
;
561 if (hb_back_addr
.is_ip())
562 hb_back_addr
.set_port(0);
564 r
= ms_hb_back_server
->bind(hb_back_addr
);
567 r
= ms_hb_back_client
->client_bind(hb_back_addr
);
571 // hb front should bind to same ip as public_addr
572 entity_addr_t hb_front_addr
= g_conf
->public_addr
;
573 if (hb_front_addr
.is_ip())
574 hb_front_addr
.set_port(0);
575 r
= ms_hb_front_server
->bind(hb_front_addr
);
578 r
= ms_hb_front_client
->client_bind(hb_front_addr
);
582 // Set up crypto, daemonize, etc.
583 global_init_daemonize(g_ceph_context
);
584 common_init_finish(g_ceph_context
);
586 TracepointProvider::initialize
<osd_tracepoint_traits
>(g_ceph_context
);
587 TracepointProvider::initialize
<os_tracepoint_traits
>(g_ceph_context
);
588 #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS
589 TracepointProvider::initialize
<cyg_profile_traits
>(g_ceph_context
);
592 MonClient
mc(g_ceph_context
);
593 if (mc
.build_initial_monmap() < 0)
595 global_init_chdir(g_ceph_context
);
597 #ifndef BUILDING_FOR_EMBEDDED
598 if (global_init_preload_erasure_code(g_ceph_context
) < 0)
602 srand(time(NULL
) + getpid());
604 osd
= new OSD(g_ceph_context
,
616 g_conf
->osd_journal
);
618 int err
= osd
->pre_init();
620 derr
<< TEXT_RED
<< " ** ERROR: osd pre_init failed: " << cpp_strerror(-err
)
621 << TEXT_NORMAL
<< dendl
;
626 ms_hb_front_client
->start();
627 ms_hb_back_client
->start();
628 ms_hb_front_server
->start();
629 ms_hb_back_server
->start();
631 ms_objecter
->start();
636 derr
<< TEXT_RED
<< " ** ERROR: osd init failed: " << cpp_strerror(-err
)
637 << TEXT_NORMAL
<< dendl
;
641 #ifdef BUILDING_FOR_EMBEDDED
642 cephd_preload_rados_classes(osd
);
645 // install signal handlers
646 init_async_signal_handler();
647 register_async_signal_handler(SIGHUP
, sighup_handler
);
648 register_async_signal_handler_oneshot(SIGINT
, handle_osd_signal
);
649 register_async_signal_handler_oneshot(SIGTERM
, handle_osd_signal
);
653 if (g_conf
->inject_early_sigterm
)
654 kill(getpid(), SIGTERM
);
657 ms_hb_front_client
->wait();
658 ms_hb_back_client
->wait();
659 ms_hb_front_server
->wait();
660 ms_hb_back_server
->wait();
664 unregister_async_signal_handler(SIGHUP
, sighup_handler
);
665 unregister_async_signal_handler(SIGINT
, handle_osd_signal
);
666 unregister_async_signal_handler(SIGTERM
, handle_osd_signal
);
667 shutdown_async_signal_handler();
672 delete ms_hb_front_client
;
673 delete ms_hb_back_client
;
674 delete ms_hb_front_server
;
675 delete ms_hb_back_server
;
679 client_byte_throttler
.reset();
681 // cd on exit, so that gmon.out (if any) goes into a separate directory for each node.
683 snprintf(s
, sizeof(s
), "gmon/%d", getpid());
684 if ((mkdir(s
, 0755) == 0) && (chdir(s
) == 0)) {
685 dout(0) << "ceph-osd: gmon.out should be in " << s
<< dendl
;