]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | #include <boost/scoped_ptr.hpp> | |
19 | ||
20 | #include <iostream> | |
21 | #include <string> | |
7c673cae | 22 | |
f67539c2 | 23 | #include "auth/KeyRing.h" |
7c673cae FG |
24 | #include "osd/OSD.h" |
25 | #include "os/ObjectStore.h" | |
26 | #include "mon/MonClient.h" | |
27 | #include "include/ceph_features.h" | |
7c673cae | 28 | #include "common/config.h" |
1e59de90 | 29 | #include "extblkdev/ExtBlkDevPlugin.h" |
7c673cae FG |
30 | |
31 | #include "mon/MonMap.h" | |
32 | ||
33 | #include "msg/Messenger.h" | |
34 | ||
11fdf7f2 | 35 | #include "common/Throttle.h" |
7c673cae FG |
36 | #include "common/Timer.h" |
37 | #include "common/TracepointProvider.h" | |
38 | #include "common/ceph_argparse.h" | |
11fdf7f2 | 39 | #include "common/numa.h" |
7c673cae FG |
40 | |
41 | #include "global/global_init.h" | |
42 | #include "global/signal_handler.h" | |
43 | ||
44 | #include "include/color.h" | |
45 | #include "common/errno.h" | |
46 | #include "common/pick_address.h" | |
47 | ||
48 | #include "perfglue/heap_profiler.h" | |
49 | ||
11fdf7f2 TL |
50 | #include "include/ceph_assert.h" |
51 | ||
52 | #include "common/Preforker.h" | |
7c673cae FG |
53 | |
54 | #define dout_context g_ceph_context | |
55 | #define dout_subsys ceph_subsys_osd | |
56 | ||
f67539c2 TL |
57 | using std::cerr; |
58 | using std::cout; | |
59 | using std::map; | |
60 | using std::ostringstream; | |
61 | using std::string; | |
62 | using std::vector; | |
63 | ||
64 | using ceph::bufferlist; | |
65 | ||
7c673cae FG |
66 | namespace { |
67 | ||
68 | TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so", | |
69 | "osd_tracing"); | |
70 | TracepointProvider::Traits os_tracepoint_traits("libos_tp.so", | |
71 | "osd_objectstore_tracing"); | |
9f95a23c TL |
72 | TracepointProvider::Traits bluestore_tracepoint_traits("libbluestore_tp.so", |
73 | "bluestore_tracing"); | |
31f18b77 FG |
74 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
75 | TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so", | |
76 | "osd_function_tracing"); | |
77 | #endif | |
7c673cae FG |
78 | |
79 | } // anonymous namespace | |
80 | ||
9f95a23c | 81 | OSD *osdptr = nullptr; |
7c673cae FG |
82 | |
83 | void handle_osd_signal(int signum) | |
84 | { | |
9f95a23c TL |
85 | if (osdptr) |
86 | osdptr->handle_signal(signum); | |
7c673cae FG |
87 | } |
88 | ||
89 | static void usage() | |
90 | { | |
31f18b77 | 91 | cout << "usage: ceph-osd -i <ID> [flags]\n" |
7c673cae FG |
92 | << " --osd-data PATH data directory\n" |
93 | << " --osd-journal PATH\n" | |
94 | << " journal file or block device\n" | |
95 | << " --mkfs create a [new] data directory\n" | |
31f18b77 | 96 | << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n" |
11fdf7f2 TL |
97 | << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n" |
98 | << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n" | |
99 | << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n" | |
7c673cae FG |
100 | << " --convert-filestore\n" |
101 | << " run any pending upgrade operations\n" | |
102 | << " --flush-journal flush all data out of journal\n" | |
e306af50 TL |
103 | << " --osdspec-affinity\n" |
104 | << " set affinity to an osdspec\n" | |
11fdf7f2 | 105 | << " --dump-journal dump all data of journal\n" |
7c673cae FG |
106 | << " --mkjournal initialize a new journal\n" |
107 | << " --check-wants-journal\n" | |
108 | << " check whether a journal is desired\n" | |
109 | << " --check-allows-journal\n" | |
110 | << " check whether a journal is allowed\n" | |
111 | << " --check-needs-journal\n" | |
112 | << " check whether a journal is required\n" | |
113 | << " --debug_osd <N> set debug level (e.g. 10)\n" | |
114 | << " --get-device-fsid PATH\n" | |
115 | << " get OSD fsid for the given block device\n" | |
116 | << std::endl; | |
117 | generic_server_usage(); | |
118 | } | |
119 | ||
7c673cae | 120 | int main(int argc, const char **argv) |
7c673cae | 121 | { |
20effc67 | 122 | auto args = argv_to_vec(argc, argv); |
11fdf7f2 TL |
123 | if (args.empty()) { |
124 | cerr << argv[0] << ": -h or --help for usage" << std::endl; | |
125 | exit(1); | |
126 | } | |
127 | if (ceph_argparse_need_usage(args)) { | |
128 | usage(); | |
129 | exit(0); | |
130 | } | |
7c673cae | 131 | |
11fdf7f2 TL |
132 | map<string,string> defaults = { |
133 | // We want to enable leveldb's log, while allowing users to override this | |
134 | // option, therefore we will pass it as a default argument to global_init(). | |
135 | { "leveldb_log", "" } | |
136 | }; | |
137 | auto cct = global_init( | |
138 | &defaults, | |
139 | args, CEPH_ENTITY_TYPE_OSD, | |
f67539c2 | 140 | CODE_ENVIRONMENT_DAEMON, 0); |
7c673cae FG |
141 | ceph_heap_profiler_init(); |
142 | ||
11fdf7f2 TL |
143 | Preforker forker; |
144 | ||
7c673cae FG |
145 | // osd specific args |
146 | bool mkfs = false; | |
147 | bool mkjournal = false; | |
148 | bool check_wants_journal = false; | |
149 | bool check_allows_journal = false; | |
150 | bool check_needs_journal = false; | |
151 | bool mkkey = false; | |
152 | bool flushjournal = false; | |
153 | bool dump_journal = false; | |
154 | bool convertfilestore = false; | |
155 | bool get_osd_fsid = false; | |
156 | bool get_cluster_fsid = false; | |
157 | bool get_journal_fsid = false; | |
158 | bool get_device_fsid = false; | |
159 | string device_path; | |
160 | std::string dump_pg_log; | |
e306af50 | 161 | std::string osdspec_affinity; |
7c673cae FG |
162 | |
163 | std::string val; | |
164 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
165 | if (ceph_argparse_double_dash(args, i)) { | |
166 | break; | |
7c673cae FG |
167 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { |
168 | mkfs = true; | |
e306af50 TL |
169 | } else if (ceph_argparse_witharg(args, i, &val, "--osdspec-affinity", (char*)NULL)) { |
170 | osdspec_affinity = val; | |
7c673cae FG |
171 | } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) { |
172 | mkjournal = true; | |
173 | } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) { | |
174 | check_allows_journal = true; | |
175 | } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) { | |
176 | check_wants_journal = true; | |
177 | } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) { | |
178 | check_needs_journal = true; | |
179 | } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) { | |
180 | mkkey = true; | |
181 | } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) { | |
182 | flushjournal = true; | |
183 | } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) { | |
184 | convertfilestore = true; | |
185 | } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) { | |
186 | dump_pg_log = val; | |
187 | } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) { | |
188 | dump_journal = true; | |
189 | } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) { | |
190 | get_cluster_fsid = true; | |
191 | } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) { | |
192 | get_osd_fsid = true; | |
193 | } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) { | |
194 | get_journal_fsid = true; | |
195 | } else if (ceph_argparse_witharg(args, i, &device_path, | |
196 | "--get-device-fsid", (char*)NULL)) { | |
197 | get_device_fsid = true; | |
198 | } else { | |
199 | ++i; | |
200 | } | |
201 | } | |
202 | if (!args.empty()) { | |
11fdf7f2 TL |
203 | cerr << "unrecognized arg " << args[0] << std::endl; |
204 | exit(1); | |
7c673cae FG |
205 | } |
206 | ||
11fdf7f2 TL |
207 | if (global_init_prefork(g_ceph_context) >= 0) { |
208 | std::string err; | |
209 | int r = forker.prefork(err); | |
210 | if (r < 0) { | |
211 | cerr << err << std::endl; | |
212 | return r; | |
213 | } | |
214 | if (forker.is_parent()) { | |
215 | g_ceph_context->_log->start(); | |
216 | if (forker.parent_wait(err) != 0) { | |
217 | return -ENXIO; | |
218 | } | |
219 | return 0; | |
220 | } | |
221 | setsid(); | |
222 | global_init_postfork_start(g_ceph_context); | |
223 | } | |
224 | common_init_finish(g_ceph_context); | |
225 | global_init_chdir(g_ceph_context); | |
226 | ||
7c673cae | 227 | if (get_journal_fsid) { |
11fdf7f2 | 228 | device_path = g_conf().get_val<std::string>("osd_journal"); |
7c673cae FG |
229 | get_device_fsid = true; |
230 | } | |
231 | if (get_device_fsid) { | |
232 | uuid_d uuid; | |
233 | int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path, | |
234 | &uuid); | |
235 | if (r < 0) { | |
236 | cerr << "failed to get device fsid for " << device_path | |
237 | << ": " << cpp_strerror(r) << std::endl; | |
11fdf7f2 | 238 | forker.exit(1); |
7c673cae FG |
239 | } |
240 | cout << uuid << std::endl; | |
11fdf7f2 | 241 | forker.exit(0); |
7c673cae FG |
242 | } |
243 | ||
244 | if (!dump_pg_log.empty()) { | |
245 | common_init_finish(g_ceph_context); | |
246 | bufferlist bl; | |
247 | std::string error; | |
11fdf7f2 TL |
248 | |
249 | if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) { | |
7c673cae | 250 | pg_log_entry_t e; |
11fdf7f2 | 251 | auto p = bl.cbegin(); |
7c673cae FG |
252 | while (!p.end()) { |
253 | uint64_t pos = p.get_off(); | |
254 | try { | |
11fdf7f2 | 255 | decode(e, p); |
7c673cae | 256 | } |
f67539c2 | 257 | catch (const ceph::buffer::error &e) { |
7c673cae | 258 | derr << "failed to decode LogEntry at offset " << pos << dendl; |
11fdf7f2 | 259 | forker.exit(1); |
7c673cae FG |
260 | } |
261 | derr << pos << ":\t" << e << dendl; | |
262 | } | |
263 | } else { | |
264 | derr << "unable to open " << dump_pg_log << ": " << error << dendl; | |
265 | } | |
11fdf7f2 | 266 | forker.exit(0); |
7c673cae FG |
267 | } |
268 | ||
269 | // whoami | |
270 | char *end; | |
11fdf7f2 | 271 | const char *id = g_conf()->name.get_id().c_str(); |
7c673cae | 272 | int whoami = strtol(id, &end, 10); |
11fdf7f2 | 273 | std::string data_path = g_conf().get_val<std::string>("osd_data"); |
7c673cae FG |
274 | if (*end || end == id || whoami < 0) { |
275 | derr << "must specify '-i #' where # is the osd number" << dendl; | |
11fdf7f2 | 276 | forker.exit(1); |
7c673cae FG |
277 | } |
278 | ||
11fdf7f2 | 279 | if (data_path.empty()) { |
7c673cae | 280 | derr << "must specify '--osd-data=foo' data path" << dendl; |
11fdf7f2 | 281 | forker.exit(1); |
7c673cae FG |
282 | } |
283 | ||
284 | // the store | |
11fdf7f2 | 285 | std::string store_type; |
7c673cae FG |
286 | { |
287 | char fn[PATH_MAX]; | |
11fdf7f2 | 288 | snprintf(fn, sizeof(fn), "%s/type", data_path.c_str()); |
91327a77 | 289 | int fd = ::open(fn, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
290 | if (fd >= 0) { |
291 | bufferlist bl; | |
292 | bl.read_fd(fd, 64); | |
293 | if (bl.length()) { | |
294 | store_type = string(bl.c_str(), bl.length() - 1); // drop \n | |
295 | dout(5) << "object store type is " << store_type << dendl; | |
296 | } | |
297 | ::close(fd); | |
11fdf7f2 TL |
298 | } else if (mkfs) { |
299 | store_type = g_conf().get_val<std::string>("osd_objectstore"); | |
300 | } else { | |
301 | // hrm, infer the type | |
302 | snprintf(fn, sizeof(fn), "%s/current", data_path.c_str()); | |
303 | struct stat st; | |
304 | if (::stat(fn, &st) == 0 && | |
305 | S_ISDIR(st.st_mode)) { | |
306 | derr << "missing 'type' file, inferring filestore from current/ dir" | |
307 | << dendl; | |
308 | store_type = "filestore"; | |
309 | } else { | |
310 | snprintf(fn, sizeof(fn), "%s/block", data_path.c_str()); | |
311 | if (::stat(fn, &st) == 0 && | |
312 | S_ISLNK(st.st_mode)) { | |
313 | derr << "missing 'type' file, inferring bluestore from block symlink" | |
314 | << dendl; | |
315 | store_type = "bluestore"; | |
316 | } else { | |
317 | derr << "missing 'type' file and unable to infer osd type" << dendl; | |
318 | forker.exit(1); | |
319 | } | |
320 | } | |
7c673cae FG |
321 | } |
322 | } | |
11fdf7f2 TL |
323 | |
324 | std::string journal_path = g_conf().get_val<std::string>("osd_journal"); | |
325 | uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags"); | |
20effc67 TL |
326 | std::unique_ptr<ObjectStore> store = ObjectStore::create(g_ceph_context, |
327 | store_type, | |
328 | data_path, | |
329 | journal_path, | |
330 | flags); | |
7c673cae FG |
331 | if (!store) { |
332 | derr << "unable to create object store" << dendl; | |
11fdf7f2 | 333 | forker.exit(-ENODEV); |
7c673cae FG |
334 | } |
335 | ||
7c673cae | 336 | |
7c673cae FG |
337 | if (mkkey) { |
338 | common_init_finish(g_ceph_context); | |
9f95a23c | 339 | KeyRing keyring; |
7c673cae | 340 | |
11fdf7f2 | 341 | EntityName ename{g_conf()->name}; |
7c673cae FG |
342 | EntityAuth eauth; |
343 | ||
11fdf7f2 | 344 | std::string keyring_path = g_conf().get_val<std::string>("keyring"); |
9f95a23c | 345 | int ret = keyring.load(g_ceph_context, keyring_path); |
7c673cae | 346 | if (ret == 0 && |
9f95a23c | 347 | keyring.get_auth(ename, eauth)) { |
11fdf7f2 | 348 | derr << "already have key in keyring " << keyring_path << dendl; |
7c673cae FG |
349 | } else { |
350 | eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES); | |
9f95a23c | 351 | keyring.add(ename, eauth); |
7c673cae | 352 | bufferlist bl; |
9f95a23c | 353 | keyring.encode_plaintext(bl); |
11fdf7f2 | 354 | int r = bl.write_file(keyring_path.c_str(), 0600); |
7c673cae | 355 | if (r) |
11fdf7f2 TL |
356 | derr << TEXT_RED << " ** ERROR: writing new keyring to " |
357 | << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL | |
358 | << dendl; | |
7c673cae | 359 | else |
11fdf7f2 | 360 | derr << "created new key in keyring " << keyring_path << dendl; |
7c673cae FG |
361 | } |
362 | } | |
e306af50 | 363 | |
3efd9988 FG |
364 | if (mkfs) { |
365 | common_init_finish(g_ceph_context); | |
3efd9988 | 366 | |
11fdf7f2 | 367 | if (g_conf().get_val<uuid_d>("fsid").is_zero()) { |
3efd9988 | 368 | derr << "must specify cluster fsid" << dendl; |
11fdf7f2 | 369 | forker.exit(-EINVAL); |
3efd9988 FG |
370 | } |
371 | ||
20effc67 | 372 | int err = OSD::mkfs(g_ceph_context, std::move(store), g_conf().get_val<uuid_d>("fsid"), |
e306af50 | 373 | whoami, osdspec_affinity); |
3efd9988 FG |
374 | if (err < 0) { |
375 | derr << TEXT_RED << " ** ERROR: error creating empty object store in " | |
11fdf7f2 TL |
376 | << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
377 | forker.exit(1); | |
3efd9988 | 378 | } |
11fdf7f2 TL |
379 | dout(0) << "created object store " << data_path |
380 | << " for osd." << whoami | |
381 | << " fsid " << g_conf().get_val<uuid_d>("fsid") | |
382 | << dendl; | |
383 | } | |
384 | if (mkfs || mkkey) { | |
385 | forker.exit(0); | |
3efd9988 | 386 | } |
7c673cae FG |
387 | if (mkjournal) { |
388 | common_init_finish(g_ceph_context); | |
389 | int err = store->mkjournal(); | |
390 | if (err < 0) { | |
11fdf7f2 TL |
391 | derr << TEXT_RED << " ** ERROR: error creating fresh journal " |
392 | << journal_path << " for object store " << data_path << ": " | |
393 | << cpp_strerror(-err) << TEXT_NORMAL << dendl; | |
394 | forker.exit(1); | |
7c673cae | 395 | } |
11fdf7f2 TL |
396 | derr << "created new journal " << journal_path |
397 | << " for object store " << data_path << dendl; | |
398 | forker.exit(0); | |
7c673cae FG |
399 | } |
400 | if (check_wants_journal) { | |
401 | if (store->wants_journal()) { | |
d2e6a577 | 402 | cout << "wants journal: yes" << std::endl; |
11fdf7f2 | 403 | forker.exit(0); |
7c673cae | 404 | } else { |
d2e6a577 | 405 | cout << "wants journal: no" << std::endl; |
11fdf7f2 | 406 | forker.exit(1); |
7c673cae FG |
407 | } |
408 | } | |
409 | if (check_allows_journal) { | |
410 | if (store->allows_journal()) { | |
d2e6a577 | 411 | cout << "allows journal: yes" << std::endl; |
11fdf7f2 | 412 | forker.exit(0); |
7c673cae | 413 | } else { |
d2e6a577 | 414 | cout << "allows journal: no" << std::endl; |
11fdf7f2 | 415 | forker.exit(1); |
7c673cae FG |
416 | } |
417 | } | |
418 | if (check_needs_journal) { | |
419 | if (store->needs_journal()) { | |
d2e6a577 | 420 | cout << "needs journal: yes" << std::endl; |
11fdf7f2 | 421 | forker.exit(0); |
7c673cae | 422 | } else { |
d2e6a577 | 423 | cout << "needs journal: no" << std::endl; |
11fdf7f2 | 424 | forker.exit(1); |
7c673cae FG |
425 | } |
426 | } | |
427 | if (flushjournal) { | |
428 | common_init_finish(g_ceph_context); | |
429 | int err = store->mount(); | |
430 | if (err < 0) { | |
11fdf7f2 TL |
431 | derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path |
432 | << " for object store " << data_path | |
7c673cae FG |
433 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
434 | goto flushjournal_out; | |
435 | } | |
436 | store->umount(); | |
11fdf7f2 TL |
437 | derr << "flushed journal " << journal_path |
438 | << " for object store " << data_path | |
7c673cae FG |
439 | << dendl; |
440 | flushjournal_out: | |
20effc67 | 441 | store.reset(); |
11fdf7f2 | 442 | forker.exit(err < 0 ? 1 : 0); |
7c673cae FG |
443 | } |
444 | if (dump_journal) { | |
445 | common_init_finish(g_ceph_context); | |
446 | int err = store->dump_journal(cout); | |
447 | if (err < 0) { | |
11fdf7f2 TL |
448 | derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path |
449 | << " for object store " << data_path | |
7c673cae | 450 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 451 | forker.exit(1); |
7c673cae | 452 | } |
11fdf7f2 TL |
453 | derr << "dumped journal " << journal_path |
454 | << " for object store " << data_path | |
7c673cae | 455 | << dendl; |
11fdf7f2 | 456 | forker.exit(0); |
7c673cae FG |
457 | } |
458 | ||
7c673cae FG |
459 | if (convertfilestore) { |
460 | int err = store->mount(); | |
461 | if (err < 0) { | |
11fdf7f2 | 462 | derr << TEXT_RED << " ** ERROR: error mounting store " << data_path |
7c673cae | 463 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 464 | forker.exit(1); |
7c673cae FG |
465 | } |
466 | err = store->upgrade(); | |
467 | store->umount(); | |
468 | if (err < 0) { | |
11fdf7f2 | 469 | derr << TEXT_RED << " ** ERROR: error converting store " << data_path |
7c673cae | 470 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 471 | forker.exit(1); |
7c673cae | 472 | } |
11fdf7f2 | 473 | forker.exit(0); |
7c673cae FG |
474 | } |
475 | ||
1e59de90 TL |
476 | { |
477 | int r = extblkdev::preload(g_ceph_context); | |
478 | if (r < 0) { | |
479 | derr << "Failed preloading extblkdev plugins, error code: " << r << dendl; | |
480 | forker.exit(1); | |
481 | } | |
482 | } | |
483 | ||
7c673cae FG |
484 | string magic; |
485 | uuid_d cluster_fsid, osd_fsid; | |
9f95a23c | 486 | ceph_release_t require_osd_release = ceph_release_t::unknown; |
7c673cae | 487 | int w; |
20effc67 | 488 | int r = OSD::peek_meta(store.get(), &magic, &cluster_fsid, &osd_fsid, &w, |
11fdf7f2 | 489 | &require_osd_release); |
7c673cae FG |
490 | if (r < 0) { |
491 | derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on " | |
11fdf7f2 | 492 | << data_path << ": " << cpp_strerror(-r) |
7c673cae FG |
493 | << TEXT_NORMAL << dendl; |
494 | if (r == -ENOTSUP) { | |
495 | derr << TEXT_RED << " ** please verify that underlying storage " | |
496 | << "supports xattrs" << TEXT_NORMAL << dendl; | |
497 | } | |
11fdf7f2 | 498 | forker.exit(1); |
7c673cae FG |
499 | } |
500 | if (w != whoami) { | |
501 | derr << "OSD id " << w << " != my id " << whoami << dendl; | |
11fdf7f2 | 502 | forker.exit(1); |
7c673cae FG |
503 | } |
504 | if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) { | |
505 | derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC | |
506 | << dendl; | |
11fdf7f2 | 507 | forker.exit(1); |
7c673cae FG |
508 | } |
509 | ||
510 | if (get_cluster_fsid) { | |
511 | cout << cluster_fsid << std::endl; | |
11fdf7f2 | 512 | forker.exit(0); |
7c673cae FG |
513 | } |
514 | if (get_osd_fsid) { | |
515 | cout << osd_fsid << std::endl; | |
11fdf7f2 | 516 | forker.exit(0); |
7c673cae FG |
517 | } |
518 | ||
9f95a23c | 519 | { |
9f95a23c | 520 | ostringstream err; |
f67539c2 | 521 | if (!can_upgrade_from(require_osd_release, "require_osd_release", err)) { |
9f95a23c TL |
522 | derr << err.str() << dendl; |
523 | forker.exit(1); | |
524 | } | |
11fdf7f2 | 525 | } |
7c673cae | 526 | |
11fdf7f2 TL |
527 | // consider objectstore numa node |
528 | int os_numa_node = -1; | |
529 | r = store->get_numa_node(&os_numa_node, nullptr, nullptr); | |
530 | if (r >= 0 && os_numa_node >= 0) { | |
531 | dout(1) << " objectstore numa_node " << os_numa_node << dendl; | |
532 | } | |
533 | int iface_preferred_numa_node = -1; | |
534 | if (g_conf().get_val<bool>("osd_numa_prefer_iface")) { | |
535 | iface_preferred_numa_node = os_numa_node; | |
7c673cae FG |
536 | } |
537 | ||
11fdf7f2 TL |
538 | // messengers |
539 | std::string msg_type = g_conf().get_val<std::string>("ms_type"); | |
540 | std::string public_msg_type = | |
541 | g_conf().get_val<std::string>("ms_public_type"); | |
542 | std::string cluster_msg_type = | |
543 | g_conf().get_val<std::string>("ms_cluster_type"); | |
544 | ||
545 | public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type; | |
546 | cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type; | |
9f95a23c | 547 | uint64_t nonce = Messenger::get_pid_nonce(); |
11fdf7f2 | 548 | Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 549 | entity_name_t::OSD(whoami), "client", nonce); |
11fdf7f2 | 550 | Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 551 | entity_name_t::OSD(whoami), "cluster", nonce); |
11fdf7f2 | 552 | Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 553 | entity_name_t::OSD(whoami), "hb_back_client", nonce); |
11fdf7f2 | 554 | Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 555 | entity_name_t::OSD(whoami), "hb_front_client", nonce); |
11fdf7f2 | 556 | Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 557 | entity_name_t::OSD(whoami), "hb_back_server", nonce); |
11fdf7f2 | 558 | Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 559 | entity_name_t::OSD(whoami), "hb_front_server", nonce); |
11fdf7f2 | 560 | Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 561 | entity_name_t::OSD(whoami), "ms_objecter", nonce); |
7c673cae | 562 | if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter) |
11fdf7f2 | 563 | forker.exit(1); |
7c673cae FG |
564 | ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL); |
565 | ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
566 | ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
567 | ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
568 | ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
569 | ||
11fdf7f2 TL |
570 | dout(0) << "starting osd." << whoami |
571 | << " osd_data " << data_path | |
572 | << " " << ((journal_path.empty()) ? | |
573 | "(no journal)" : journal_path) | |
574 | << dendl; | |
7c673cae | 575 | |
11fdf7f2 TL |
576 | uint64_t message_size = |
577 | g_conf().get_val<Option::size_t>("osd_client_message_size_cap"); | |
7c673cae | 578 | boost::scoped_ptr<Throttle> client_byte_throttler( |
11fdf7f2 | 579 | new Throttle(g_ceph_context, "osd_client_bytes", message_size)); |
f6b5b4d7 TL |
580 | uint64_t message_cap = g_conf().get_val<uint64_t>("osd_client_message_cap"); |
581 | boost::scoped_ptr<Throttle> client_msg_throttler( | |
582 | new Throttle(g_ceph_context, "osd_client_messages", message_cap)); | |
7c673cae FG |
583 | |
584 | // All feature bits 0 - 34 should be present from dumpling v0.67 forward | |
585 | uint64_t osd_required = | |
586 | CEPH_FEATURE_UID | | |
587 | CEPH_FEATURE_PGID64 | | |
588 | CEPH_FEATURE_OSDENC; | |
589 | ||
9f95a23c | 590 | ms_public->set_default_policy(Messenger::Policy::stateless_registered_server(0)); |
7c673cae FG |
591 | ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT, |
592 | client_byte_throttler.get(), | |
f6b5b4d7 | 593 | client_msg_throttler.get()); |
7c673cae | 594 | ms_public->set_policy(entity_name_t::TYPE_MON, |
11fdf7f2 | 595 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 596 | ms_public->set_policy(entity_name_t::TYPE_MGR, |
11fdf7f2 | 597 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 598 | |
7c673cae FG |
599 | ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0)); |
600 | ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0)); | |
601 | ms_cluster->set_policy(entity_name_t::TYPE_OSD, | |
602 | Messenger::Policy::lossless_peer(osd_required)); | |
603 | ms_cluster->set_policy(entity_name_t::TYPE_CLIENT, | |
604 | Messenger::Policy::stateless_server(0)); | |
605 | ||
606 | ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD, | |
607 | Messenger::Policy::lossy_client(0)); | |
608 | ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD, | |
609 | Messenger::Policy::lossy_client(0)); | |
610 | ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD, | |
611 | Messenger::Policy::stateless_server(0)); | |
612 | ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD, | |
613 | Messenger::Policy::stateless_server(0)); | |
614 | ||
615 | ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); | |
616 | ||
39ae355f | 617 | entity_addrvec_t public_addrs, public_bind_addrs, cluster_addrs; |
11fdf7f2 TL |
618 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs, |
619 | iface_preferred_numa_node); | |
620 | if (r < 0) { | |
621 | derr << "Failed to pick public address." << dendl; | |
622 | forker.exit(1); | |
39ae355f TL |
623 | } else { |
624 | dout(10) << "picked public_addrs " << public_addrs << dendl; | |
625 | } | |
626 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC_BIND, | |
627 | &public_bind_addrs, iface_preferred_numa_node); | |
628 | if (r == -ENOENT) { | |
629 | dout(10) << "there is no public_bind_addrs, defaulting to public_addrs" | |
630 | << dendl; | |
631 | public_bind_addrs = public_addrs; | |
632 | } else if (r < 0) { | |
633 | derr << "Failed to pick public bind address." << dendl; | |
634 | forker.exit(1); | |
635 | } else { | |
636 | dout(10) << "picked public_bind_addrs " << public_bind_addrs << dendl; | |
11fdf7f2 TL |
637 | } |
638 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs, | |
639 | iface_preferred_numa_node); | |
640 | if (r < 0) { | |
641 | derr << "Failed to pick cluster address." << dendl; | |
642 | forker.exit(1); | |
643 | } | |
644 | ||
39ae355f TL |
645 | if (ms_public->bindv(public_bind_addrs, public_addrs) < 0) { |
646 | derr << "Failed to bind to " << public_bind_addrs << dendl; | |
11fdf7f2 | 647 | forker.exit(1); |
39ae355f | 648 | } |
7c673cae | 649 | |
11fdf7f2 TL |
650 | if (ms_cluster->bindv(cluster_addrs) < 0) |
651 | forker.exit(1); | |
652 | ||
653 | bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket"); | |
654 | if (is_delay) { | |
7c673cae FG |
655 | ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); |
656 | ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
657 | ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
658 | ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
659 | } | |
660 | ||
39ae355f | 661 | entity_addrvec_t hb_front_addrs = public_bind_addrs; |
11fdf7f2 TL |
662 | for (auto& a : hb_front_addrs.v) { |
663 | a.set_port(0); | |
7c673cae | 664 | } |
11fdf7f2 TL |
665 | if (ms_hb_front_server->bindv(hb_front_addrs) < 0) |
666 | forker.exit(1); | |
667 | if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0) | |
668 | forker.exit(1); | |
669 | ||
670 | entity_addrvec_t hb_back_addrs = cluster_addrs; | |
671 | for (auto& a : hb_back_addrs.v) { | |
672 | a.set_port(0); | |
673 | } | |
674 | if (ms_hb_back_server->bindv(hb_back_addrs) < 0) | |
675 | forker.exit(1); | |
676 | if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0) | |
677 | forker.exit(1); | |
7c673cae | 678 | |
11fdf7f2 TL |
679 | // install signal handlers |
680 | init_async_signal_handler(); | |
681 | register_async_signal_handler(SIGHUP, sighup_handler); | |
7c673cae FG |
682 | |
683 | TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context); | |
684 | TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context); | |
9f95a23c | 685 | TracepointProvider::initialize<bluestore_tracepoint_traits>(g_ceph_context); |
31f18b77 FG |
686 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
687 | TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context); | |
688 | #endif | |
7c673cae | 689 | |
11fdf7f2 TL |
690 | srand(time(NULL) + getpid()); |
691 | ||
f67539c2 TL |
692 | ceph::async::io_context_pool poolctx( |
693 | cct->_conf.get_val<std::uint64_t>("osd_asio_thread_count")); | |
694 | ||
695 | MonClient mc(g_ceph_context, poolctx); | |
7c673cae FG |
696 | if (mc.build_initial_monmap() < 0) |
697 | return -1; | |
698 | global_init_chdir(g_ceph_context); | |
699 | ||
11fdf7f2 TL |
700 | if (global_init_preload_erasure_code(g_ceph_context) < 0) { |
701 | forker.exit(1); | |
702 | } | |
224ce89b | 703 | |
9f95a23c | 704 | osdptr = new OSD(g_ceph_context, |
20effc67 | 705 | std::move(store), |
9f95a23c TL |
706 | whoami, |
707 | ms_cluster, | |
708 | ms_public, | |
709 | ms_hb_front_client, | |
710 | ms_hb_back_client, | |
711 | ms_hb_front_server, | |
712 | ms_hb_back_server, | |
713 | ms_objecter, | |
714 | &mc, | |
715 | data_path, | |
f67539c2 TL |
716 | journal_path, |
717 | poolctx); | |
9f95a23c TL |
718 | |
719 | int err = osdptr->pre_init(); | |
7c673cae FG |
720 | if (err < 0) { |
721 | derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err) | |
722 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 723 | forker.exit(1); |
7c673cae FG |
724 | } |
725 | ||
726 | ms_public->start(); | |
727 | ms_hb_front_client->start(); | |
728 | ms_hb_back_client->start(); | |
729 | ms_hb_front_server->start(); | |
730 | ms_hb_back_server->start(); | |
731 | ms_cluster->start(); | |
732 | ms_objecter->start(); | |
733 | ||
734 | // start osd | |
9f95a23c | 735 | err = osdptr->init(); |
7c673cae FG |
736 | if (err < 0) { |
737 | derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err) | |
738 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 739 | forker.exit(1); |
7c673cae FG |
740 | } |
741 | ||
11fdf7f2 TL |
742 | // -- daemonize -- |
743 | ||
744 | if (g_conf()->daemonize) { | |
745 | global_init_postfork_finish(g_ceph_context); | |
746 | forker.daemonize(); | |
747 | } | |
748 | ||
749 | ||
7c673cae FG |
750 | register_async_signal_handler_oneshot(SIGINT, handle_osd_signal); |
751 | register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal); | |
752 | ||
9f95a23c | 753 | osdptr->final_init(); |
7c673cae | 754 | |
11fdf7f2 | 755 | if (g_conf().get_val<bool>("inject_early_sigterm")) |
7c673cae FG |
756 | kill(getpid(), SIGTERM); |
757 | ||
758 | ms_public->wait(); | |
759 | ms_hb_front_client->wait(); | |
760 | ms_hb_back_client->wait(); | |
761 | ms_hb_front_server->wait(); | |
762 | ms_hb_back_server->wait(); | |
763 | ms_cluster->wait(); | |
764 | ms_objecter->wait(); | |
765 | ||
766 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
767 | unregister_async_signal_handler(SIGINT, handle_osd_signal); | |
768 | unregister_async_signal_handler(SIGTERM, handle_osd_signal); | |
769 | shutdown_async_signal_handler(); | |
770 | ||
771 | // done | |
f67539c2 | 772 | poolctx.stop(); |
9f95a23c | 773 | delete osdptr; |
7c673cae FG |
774 | delete ms_public; |
775 | delete ms_hb_front_client; | |
776 | delete ms_hb_back_client; | |
777 | delete ms_hb_front_server; | |
778 | delete ms_hb_back_server; | |
779 | delete ms_cluster; | |
780 | delete ms_objecter; | |
781 | ||
782 | client_byte_throttler.reset(); | |
f6b5b4d7 | 783 | client_msg_throttler.reset(); |
7c673cae FG |
784 | |
785 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
786 | char s[20]; | |
787 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
788 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
789 | dout(0) << "ceph-osd: gmon.out should be in " << s << dendl; | |
790 | } | |
791 | ||
792 | return 0; | |
793 | } |