]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | #include <boost/scoped_ptr.hpp> | |
19 | ||
20 | #include <iostream> | |
21 | #include <string> | |
7c673cae | 22 | |
f67539c2 | 23 | #include "auth/KeyRing.h" |
7c673cae FG |
24 | #include "osd/OSD.h" |
25 | #include "os/ObjectStore.h" | |
26 | #include "mon/MonClient.h" | |
27 | #include "include/ceph_features.h" | |
7c673cae FG |
28 | #include "common/config.h" |
29 | ||
30 | #include "mon/MonMap.h" | |
31 | ||
32 | #include "msg/Messenger.h" | |
33 | ||
11fdf7f2 | 34 | #include "common/Throttle.h" |
7c673cae FG |
35 | #include "common/Timer.h" |
36 | #include "common/TracepointProvider.h" | |
37 | #include "common/ceph_argparse.h" | |
11fdf7f2 | 38 | #include "common/numa.h" |
7c673cae FG |
39 | |
40 | #include "global/global_init.h" | |
41 | #include "global/signal_handler.h" | |
42 | ||
43 | #include "include/color.h" | |
44 | #include "common/errno.h" | |
45 | #include "common/pick_address.h" | |
46 | ||
47 | #include "perfglue/heap_profiler.h" | |
48 | ||
11fdf7f2 TL |
49 | #include "include/ceph_assert.h" |
50 | ||
51 | #include "common/Preforker.h" | |
7c673cae FG |
52 | |
53 | #define dout_context g_ceph_context | |
54 | #define dout_subsys ceph_subsys_osd | |
55 | ||
f67539c2 TL |
56 | using std::cerr; |
57 | using std::cout; | |
58 | using std::map; | |
59 | using std::ostringstream; | |
60 | using std::string; | |
61 | using std::vector; | |
62 | ||
63 | using ceph::bufferlist; | |
64 | ||
7c673cae FG |
65 | namespace { |
66 | ||
67 | TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so", | |
68 | "osd_tracing"); | |
69 | TracepointProvider::Traits os_tracepoint_traits("libos_tp.so", | |
70 | "osd_objectstore_tracing"); | |
9f95a23c TL |
71 | TracepointProvider::Traits bluestore_tracepoint_traits("libbluestore_tp.so", |
72 | "bluestore_tracing"); | |
31f18b77 FG |
73 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
74 | TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so", | |
75 | "osd_function_tracing"); | |
76 | #endif | |
7c673cae FG |
77 | |
78 | } // anonymous namespace | |
79 | ||
9f95a23c | 80 | OSD *osdptr = nullptr; |
7c673cae FG |
81 | |
82 | void handle_osd_signal(int signum) | |
83 | { | |
9f95a23c TL |
84 | if (osdptr) |
85 | osdptr->handle_signal(signum); | |
7c673cae FG |
86 | } |
87 | ||
88 | static void usage() | |
89 | { | |
31f18b77 | 90 | cout << "usage: ceph-osd -i <ID> [flags]\n" |
7c673cae FG |
91 | << " --osd-data PATH data directory\n" |
92 | << " --osd-journal PATH\n" | |
93 | << " journal file or block device\n" | |
94 | << " --mkfs create a [new] data directory\n" | |
31f18b77 | 95 | << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n" |
11fdf7f2 TL |
96 | << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n" |
97 | << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n" | |
98 | << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n" | |
7c673cae FG |
99 | << " --convert-filestore\n" |
100 | << " run any pending upgrade operations\n" | |
101 | << " --flush-journal flush all data out of journal\n" | |
e306af50 TL |
102 | << " --osdspec-affinity\n" |
103 | << " set affinity to an osdspec\n" | |
11fdf7f2 | 104 | << " --dump-journal dump all data of journal\n" |
7c673cae FG |
105 | << " --mkjournal initialize a new journal\n" |
106 | << " --check-wants-journal\n" | |
107 | << " check whether a journal is desired\n" | |
108 | << " --check-allows-journal\n" | |
109 | << " check whether a journal is allowed\n" | |
110 | << " --check-needs-journal\n" | |
111 | << " check whether a journal is required\n" | |
112 | << " --debug_osd <N> set debug level (e.g. 10)\n" | |
113 | << " --get-device-fsid PATH\n" | |
114 | << " get OSD fsid for the given block device\n" | |
115 | << std::endl; | |
116 | generic_server_usage(); | |
117 | } | |
118 | ||
7c673cae | 119 | int main(int argc, const char **argv) |
7c673cae FG |
120 | { |
121 | vector<const char*> args; | |
122 | argv_to_vec(argc, argv, args); | |
11fdf7f2 TL |
123 | if (args.empty()) { |
124 | cerr << argv[0] << ": -h or --help for usage" << std::endl; | |
125 | exit(1); | |
126 | } | |
127 | if (ceph_argparse_need_usage(args)) { | |
128 | usage(); | |
129 | exit(0); | |
130 | } | |
7c673cae | 131 | |
11fdf7f2 TL |
132 | map<string,string> defaults = { |
133 | // We want to enable leveldb's log, while allowing users to override this | |
134 | // option, therefore we will pass it as a default argument to global_init(). | |
135 | { "leveldb_log", "" } | |
136 | }; | |
137 | auto cct = global_init( | |
138 | &defaults, | |
139 | args, CEPH_ENTITY_TYPE_OSD, | |
f67539c2 | 140 | CODE_ENVIRONMENT_DAEMON, 0); |
7c673cae FG |
141 | ceph_heap_profiler_init(); |
142 | ||
11fdf7f2 TL |
143 | Preforker forker; |
144 | ||
7c673cae FG |
145 | // osd specific args |
146 | bool mkfs = false; | |
147 | bool mkjournal = false; | |
148 | bool check_wants_journal = false; | |
149 | bool check_allows_journal = false; | |
150 | bool check_needs_journal = false; | |
151 | bool mkkey = false; | |
152 | bool flushjournal = false; | |
153 | bool dump_journal = false; | |
154 | bool convertfilestore = false; | |
155 | bool get_osd_fsid = false; | |
156 | bool get_cluster_fsid = false; | |
157 | bool get_journal_fsid = false; | |
158 | bool get_device_fsid = false; | |
159 | string device_path; | |
160 | std::string dump_pg_log; | |
e306af50 | 161 | std::string osdspec_affinity; |
7c673cae FG |
162 | |
163 | std::string val; | |
164 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
165 | if (ceph_argparse_double_dash(args, i)) { | |
166 | break; | |
7c673cae FG |
167 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { |
168 | mkfs = true; | |
e306af50 TL |
169 | } else if (ceph_argparse_witharg(args, i, &val, "--osdspec-affinity", (char*)NULL)) { |
170 | osdspec_affinity = val; | |
7c673cae FG |
171 | } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) { |
172 | mkjournal = true; | |
173 | } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) { | |
174 | check_allows_journal = true; | |
175 | } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) { | |
176 | check_wants_journal = true; | |
177 | } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) { | |
178 | check_needs_journal = true; | |
179 | } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) { | |
180 | mkkey = true; | |
181 | } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) { | |
182 | flushjournal = true; | |
183 | } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) { | |
184 | convertfilestore = true; | |
185 | } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) { | |
186 | dump_pg_log = val; | |
187 | } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) { | |
188 | dump_journal = true; | |
189 | } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) { | |
190 | get_cluster_fsid = true; | |
191 | } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) { | |
192 | get_osd_fsid = true; | |
193 | } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) { | |
194 | get_journal_fsid = true; | |
195 | } else if (ceph_argparse_witharg(args, i, &device_path, | |
196 | "--get-device-fsid", (char*)NULL)) { | |
197 | get_device_fsid = true; | |
198 | } else { | |
199 | ++i; | |
200 | } | |
201 | } | |
202 | if (!args.empty()) { | |
11fdf7f2 TL |
203 | cerr << "unrecognized arg " << args[0] << std::endl; |
204 | exit(1); | |
7c673cae FG |
205 | } |
206 | ||
11fdf7f2 TL |
207 | if (global_init_prefork(g_ceph_context) >= 0) { |
208 | std::string err; | |
209 | int r = forker.prefork(err); | |
210 | if (r < 0) { | |
211 | cerr << err << std::endl; | |
212 | return r; | |
213 | } | |
214 | if (forker.is_parent()) { | |
215 | g_ceph_context->_log->start(); | |
216 | if (forker.parent_wait(err) != 0) { | |
217 | return -ENXIO; | |
218 | } | |
219 | return 0; | |
220 | } | |
221 | setsid(); | |
222 | global_init_postfork_start(g_ceph_context); | |
223 | } | |
224 | common_init_finish(g_ceph_context); | |
225 | global_init_chdir(g_ceph_context); | |
226 | ||
7c673cae | 227 | if (get_journal_fsid) { |
11fdf7f2 | 228 | device_path = g_conf().get_val<std::string>("osd_journal"); |
7c673cae FG |
229 | get_device_fsid = true; |
230 | } | |
231 | if (get_device_fsid) { | |
232 | uuid_d uuid; | |
233 | int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path, | |
234 | &uuid); | |
235 | if (r < 0) { | |
236 | cerr << "failed to get device fsid for " << device_path | |
237 | << ": " << cpp_strerror(r) << std::endl; | |
11fdf7f2 | 238 | forker.exit(1); |
7c673cae FG |
239 | } |
240 | cout << uuid << std::endl; | |
11fdf7f2 | 241 | forker.exit(0); |
7c673cae FG |
242 | } |
243 | ||
244 | if (!dump_pg_log.empty()) { | |
245 | common_init_finish(g_ceph_context); | |
246 | bufferlist bl; | |
247 | std::string error; | |
11fdf7f2 TL |
248 | |
249 | if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) { | |
7c673cae | 250 | pg_log_entry_t e; |
11fdf7f2 | 251 | auto p = bl.cbegin(); |
7c673cae FG |
252 | while (!p.end()) { |
253 | uint64_t pos = p.get_off(); | |
254 | try { | |
11fdf7f2 | 255 | decode(e, p); |
7c673cae | 256 | } |
f67539c2 | 257 | catch (const ceph::buffer::error &e) { |
7c673cae | 258 | derr << "failed to decode LogEntry at offset " << pos << dendl; |
11fdf7f2 | 259 | forker.exit(1); |
7c673cae FG |
260 | } |
261 | derr << pos << ":\t" << e << dendl; | |
262 | } | |
263 | } else { | |
264 | derr << "unable to open " << dump_pg_log << ": " << error << dendl; | |
265 | } | |
11fdf7f2 | 266 | forker.exit(0); |
7c673cae FG |
267 | } |
268 | ||
269 | // whoami | |
270 | char *end; | |
11fdf7f2 | 271 | const char *id = g_conf()->name.get_id().c_str(); |
7c673cae | 272 | int whoami = strtol(id, &end, 10); |
11fdf7f2 | 273 | std::string data_path = g_conf().get_val<std::string>("osd_data"); |
7c673cae FG |
274 | if (*end || end == id || whoami < 0) { |
275 | derr << "must specify '-i #' where # is the osd number" << dendl; | |
11fdf7f2 | 276 | forker.exit(1); |
7c673cae FG |
277 | } |
278 | ||
11fdf7f2 | 279 | if (data_path.empty()) { |
7c673cae | 280 | derr << "must specify '--osd-data=foo' data path" << dendl; |
11fdf7f2 | 281 | forker.exit(1); |
7c673cae FG |
282 | } |
283 | ||
284 | // the store | |
11fdf7f2 | 285 | std::string store_type; |
7c673cae FG |
286 | { |
287 | char fn[PATH_MAX]; | |
11fdf7f2 | 288 | snprintf(fn, sizeof(fn), "%s/type", data_path.c_str()); |
91327a77 | 289 | int fd = ::open(fn, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
290 | if (fd >= 0) { |
291 | bufferlist bl; | |
292 | bl.read_fd(fd, 64); | |
293 | if (bl.length()) { | |
294 | store_type = string(bl.c_str(), bl.length() - 1); // drop \n | |
295 | dout(5) << "object store type is " << store_type << dendl; | |
296 | } | |
297 | ::close(fd); | |
11fdf7f2 TL |
298 | } else if (mkfs) { |
299 | store_type = g_conf().get_val<std::string>("osd_objectstore"); | |
300 | } else { | |
301 | // hrm, infer the type | |
302 | snprintf(fn, sizeof(fn), "%s/current", data_path.c_str()); | |
303 | struct stat st; | |
304 | if (::stat(fn, &st) == 0 && | |
305 | S_ISDIR(st.st_mode)) { | |
306 | derr << "missing 'type' file, inferring filestore from current/ dir" | |
307 | << dendl; | |
308 | store_type = "filestore"; | |
309 | } else { | |
310 | snprintf(fn, sizeof(fn), "%s/block", data_path.c_str()); | |
311 | if (::stat(fn, &st) == 0 && | |
312 | S_ISLNK(st.st_mode)) { | |
313 | derr << "missing 'type' file, inferring bluestore from block symlink" | |
314 | << dendl; | |
315 | store_type = "bluestore"; | |
316 | } else { | |
317 | derr << "missing 'type' file and unable to infer osd type" << dendl; | |
318 | forker.exit(1); | |
319 | } | |
320 | } | |
7c673cae FG |
321 | } |
322 | } | |
11fdf7f2 TL |
323 | |
324 | std::string journal_path = g_conf().get_val<std::string>("osd_journal"); | |
325 | uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags"); | |
7c673cae FG |
326 | ObjectStore *store = ObjectStore::create(g_ceph_context, |
327 | store_type, | |
11fdf7f2 TL |
328 | data_path, |
329 | journal_path, | |
330 | flags); | |
7c673cae FG |
331 | if (!store) { |
332 | derr << "unable to create object store" << dendl; | |
11fdf7f2 | 333 | forker.exit(-ENODEV); |
7c673cae FG |
334 | } |
335 | ||
7c673cae | 336 | |
7c673cae FG |
337 | if (mkkey) { |
338 | common_init_finish(g_ceph_context); | |
9f95a23c | 339 | KeyRing keyring; |
7c673cae | 340 | |
11fdf7f2 | 341 | EntityName ename{g_conf()->name}; |
7c673cae FG |
342 | EntityAuth eauth; |
343 | ||
11fdf7f2 | 344 | std::string keyring_path = g_conf().get_val<std::string>("keyring"); |
9f95a23c | 345 | int ret = keyring.load(g_ceph_context, keyring_path); |
7c673cae | 346 | if (ret == 0 && |
9f95a23c | 347 | keyring.get_auth(ename, eauth)) { |
11fdf7f2 | 348 | derr << "already have key in keyring " << keyring_path << dendl; |
7c673cae FG |
349 | } else { |
350 | eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES); | |
9f95a23c | 351 | keyring.add(ename, eauth); |
7c673cae | 352 | bufferlist bl; |
9f95a23c | 353 | keyring.encode_plaintext(bl); |
11fdf7f2 | 354 | int r = bl.write_file(keyring_path.c_str(), 0600); |
7c673cae | 355 | if (r) |
11fdf7f2 TL |
356 | derr << TEXT_RED << " ** ERROR: writing new keyring to " |
357 | << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL | |
358 | << dendl; | |
7c673cae | 359 | else |
11fdf7f2 | 360 | derr << "created new key in keyring " << keyring_path << dendl; |
7c673cae FG |
361 | } |
362 | } | |
e306af50 | 363 | |
3efd9988 FG |
364 | if (mkfs) { |
365 | common_init_finish(g_ceph_context); | |
3efd9988 | 366 | |
11fdf7f2 | 367 | if (g_conf().get_val<uuid_d>("fsid").is_zero()) { |
3efd9988 | 368 | derr << "must specify cluster fsid" << dendl; |
11fdf7f2 | 369 | forker.exit(-EINVAL); |
3efd9988 FG |
370 | } |
371 | ||
11fdf7f2 | 372 | int err = OSD::mkfs(g_ceph_context, store, g_conf().get_val<uuid_d>("fsid"), |
e306af50 | 373 | whoami, osdspec_affinity); |
3efd9988 FG |
374 | if (err < 0) { |
375 | derr << TEXT_RED << " ** ERROR: error creating empty object store in " | |
11fdf7f2 TL |
376 | << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
377 | forker.exit(1); | |
3efd9988 | 378 | } |
11fdf7f2 TL |
379 | dout(0) << "created object store " << data_path |
380 | << " for osd." << whoami | |
381 | << " fsid " << g_conf().get_val<uuid_d>("fsid") | |
382 | << dendl; | |
383 | } | |
384 | if (mkfs || mkkey) { | |
385 | forker.exit(0); | |
3efd9988 | 386 | } |
7c673cae FG |
387 | if (mkjournal) { |
388 | common_init_finish(g_ceph_context); | |
389 | int err = store->mkjournal(); | |
390 | if (err < 0) { | |
11fdf7f2 TL |
391 | derr << TEXT_RED << " ** ERROR: error creating fresh journal " |
392 | << journal_path << " for object store " << data_path << ": " | |
393 | << cpp_strerror(-err) << TEXT_NORMAL << dendl; | |
394 | forker.exit(1); | |
7c673cae | 395 | } |
11fdf7f2 TL |
396 | derr << "created new journal " << journal_path |
397 | << " for object store " << data_path << dendl; | |
398 | forker.exit(0); | |
7c673cae FG |
399 | } |
400 | if (check_wants_journal) { | |
401 | if (store->wants_journal()) { | |
d2e6a577 | 402 | cout << "wants journal: yes" << std::endl; |
11fdf7f2 | 403 | forker.exit(0); |
7c673cae | 404 | } else { |
d2e6a577 | 405 | cout << "wants journal: no" << std::endl; |
11fdf7f2 | 406 | forker.exit(1); |
7c673cae FG |
407 | } |
408 | } | |
409 | if (check_allows_journal) { | |
410 | if (store->allows_journal()) { | |
d2e6a577 | 411 | cout << "allows journal: yes" << std::endl; |
11fdf7f2 | 412 | forker.exit(0); |
7c673cae | 413 | } else { |
d2e6a577 | 414 | cout << "allows journal: no" << std::endl; |
11fdf7f2 | 415 | forker.exit(1); |
7c673cae FG |
416 | } |
417 | } | |
418 | if (check_needs_journal) { | |
419 | if (store->needs_journal()) { | |
d2e6a577 | 420 | cout << "needs journal: yes" << std::endl; |
11fdf7f2 | 421 | forker.exit(0); |
7c673cae | 422 | } else { |
d2e6a577 | 423 | cout << "needs journal: no" << std::endl; |
11fdf7f2 | 424 | forker.exit(1); |
7c673cae FG |
425 | } |
426 | } | |
427 | if (flushjournal) { | |
428 | common_init_finish(g_ceph_context); | |
429 | int err = store->mount(); | |
430 | if (err < 0) { | |
11fdf7f2 TL |
431 | derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path |
432 | << " for object store " << data_path | |
7c673cae FG |
433 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
434 | goto flushjournal_out; | |
435 | } | |
436 | store->umount(); | |
11fdf7f2 TL |
437 | derr << "flushed journal " << journal_path |
438 | << " for object store " << data_path | |
7c673cae FG |
439 | << dendl; |
440 | flushjournal_out: | |
441 | delete store; | |
11fdf7f2 | 442 | forker.exit(err < 0 ? 1 : 0); |
7c673cae FG |
443 | } |
444 | if (dump_journal) { | |
445 | common_init_finish(g_ceph_context); | |
446 | int err = store->dump_journal(cout); | |
447 | if (err < 0) { | |
11fdf7f2 TL |
448 | derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path |
449 | << " for object store " << data_path | |
7c673cae | 450 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 451 | forker.exit(1); |
7c673cae | 452 | } |
11fdf7f2 TL |
453 | derr << "dumped journal " << journal_path |
454 | << " for object store " << data_path | |
7c673cae | 455 | << dendl; |
11fdf7f2 | 456 | forker.exit(0); |
7c673cae FG |
457 | } |
458 | ||
7c673cae FG |
459 | if (convertfilestore) { |
460 | int err = store->mount(); | |
461 | if (err < 0) { | |
11fdf7f2 | 462 | derr << TEXT_RED << " ** ERROR: error mounting store " << data_path |
7c673cae | 463 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 464 | forker.exit(1); |
7c673cae FG |
465 | } |
466 | err = store->upgrade(); | |
467 | store->umount(); | |
468 | if (err < 0) { | |
11fdf7f2 | 469 | derr << TEXT_RED << " ** ERROR: error converting store " << data_path |
7c673cae | 470 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 471 | forker.exit(1); |
7c673cae | 472 | } |
11fdf7f2 | 473 | forker.exit(0); |
7c673cae FG |
474 | } |
475 | ||
476 | string magic; | |
477 | uuid_d cluster_fsid, osd_fsid; | |
9f95a23c | 478 | ceph_release_t require_osd_release = ceph_release_t::unknown; |
7c673cae | 479 | int w; |
11fdf7f2 TL |
480 | int r = OSD::peek_meta(store, &magic, &cluster_fsid, &osd_fsid, &w, |
481 | &require_osd_release); | |
7c673cae FG |
482 | if (r < 0) { |
483 | derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on " | |
11fdf7f2 | 484 | << data_path << ": " << cpp_strerror(-r) |
7c673cae FG |
485 | << TEXT_NORMAL << dendl; |
486 | if (r == -ENOTSUP) { | |
487 | derr << TEXT_RED << " ** please verify that underlying storage " | |
488 | << "supports xattrs" << TEXT_NORMAL << dendl; | |
489 | } | |
11fdf7f2 | 490 | forker.exit(1); |
7c673cae FG |
491 | } |
492 | if (w != whoami) { | |
493 | derr << "OSD id " << w << " != my id " << whoami << dendl; | |
11fdf7f2 | 494 | forker.exit(1); |
7c673cae FG |
495 | } |
496 | if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) { | |
497 | derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC | |
498 | << dendl; | |
11fdf7f2 | 499 | forker.exit(1); |
7c673cae FG |
500 | } |
501 | ||
502 | if (get_cluster_fsid) { | |
503 | cout << cluster_fsid << std::endl; | |
11fdf7f2 | 504 | forker.exit(0); |
7c673cae FG |
505 | } |
506 | if (get_osd_fsid) { | |
507 | cout << osd_fsid << std::endl; | |
11fdf7f2 | 508 | forker.exit(0); |
7c673cae FG |
509 | } |
510 | ||
9f95a23c | 511 | { |
9f95a23c | 512 | ostringstream err; |
f67539c2 | 513 | if (!can_upgrade_from(require_osd_release, "require_osd_release", err)) { |
9f95a23c TL |
514 | derr << err.str() << dendl; |
515 | forker.exit(1); | |
516 | } | |
11fdf7f2 | 517 | } |
7c673cae | 518 | |
11fdf7f2 TL |
519 | // consider objectstore numa node |
520 | int os_numa_node = -1; | |
521 | r = store->get_numa_node(&os_numa_node, nullptr, nullptr); | |
522 | if (r >= 0 && os_numa_node >= 0) { | |
523 | dout(1) << " objectstore numa_node " << os_numa_node << dendl; | |
524 | } | |
525 | int iface_preferred_numa_node = -1; | |
526 | if (g_conf().get_val<bool>("osd_numa_prefer_iface")) { | |
527 | iface_preferred_numa_node = os_numa_node; | |
7c673cae FG |
528 | } |
529 | ||
11fdf7f2 TL |
530 | // messengers |
531 | std::string msg_type = g_conf().get_val<std::string>("ms_type"); | |
532 | std::string public_msg_type = | |
533 | g_conf().get_val<std::string>("ms_public_type"); | |
534 | std::string cluster_msg_type = | |
535 | g_conf().get_val<std::string>("ms_cluster_type"); | |
536 | ||
537 | public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type; | |
538 | cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type; | |
9f95a23c | 539 | uint64_t nonce = Messenger::get_pid_nonce(); |
11fdf7f2 | 540 | Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 541 | entity_name_t::OSD(whoami), "client", nonce); |
11fdf7f2 | 542 | Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 543 | entity_name_t::OSD(whoami), "cluster", nonce); |
11fdf7f2 | 544 | Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 545 | entity_name_t::OSD(whoami), "hb_back_client", nonce); |
11fdf7f2 | 546 | Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 547 | entity_name_t::OSD(whoami), "hb_front_client", nonce); |
11fdf7f2 | 548 | Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 549 | entity_name_t::OSD(whoami), "hb_back_server", nonce); |
11fdf7f2 | 550 | Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 551 | entity_name_t::OSD(whoami), "hb_front_server", nonce); |
11fdf7f2 | 552 | Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 553 | entity_name_t::OSD(whoami), "ms_objecter", nonce); |
7c673cae | 554 | if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter) |
11fdf7f2 | 555 | forker.exit(1); |
7c673cae FG |
556 | ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL); |
557 | ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
558 | ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
559 | ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
560 | ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
561 | ||
11fdf7f2 TL |
562 | dout(0) << "starting osd." << whoami |
563 | << " osd_data " << data_path | |
564 | << " " << ((journal_path.empty()) ? | |
565 | "(no journal)" : journal_path) | |
566 | << dendl; | |
7c673cae | 567 | |
11fdf7f2 TL |
568 | uint64_t message_size = |
569 | g_conf().get_val<Option::size_t>("osd_client_message_size_cap"); | |
7c673cae | 570 | boost::scoped_ptr<Throttle> client_byte_throttler( |
11fdf7f2 | 571 | new Throttle(g_ceph_context, "osd_client_bytes", message_size)); |
f6b5b4d7 TL |
572 | uint64_t message_cap = g_conf().get_val<uint64_t>("osd_client_message_cap"); |
573 | boost::scoped_ptr<Throttle> client_msg_throttler( | |
574 | new Throttle(g_ceph_context, "osd_client_messages", message_cap)); | |
7c673cae FG |
575 | |
576 | // All feature bits 0 - 34 should be present from dumpling v0.67 forward | |
577 | uint64_t osd_required = | |
578 | CEPH_FEATURE_UID | | |
579 | CEPH_FEATURE_PGID64 | | |
580 | CEPH_FEATURE_OSDENC; | |
581 | ||
9f95a23c | 582 | ms_public->set_default_policy(Messenger::Policy::stateless_registered_server(0)); |
7c673cae FG |
583 | ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT, |
584 | client_byte_throttler.get(), | |
f6b5b4d7 | 585 | client_msg_throttler.get()); |
7c673cae | 586 | ms_public->set_policy(entity_name_t::TYPE_MON, |
11fdf7f2 | 587 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 588 | ms_public->set_policy(entity_name_t::TYPE_MGR, |
11fdf7f2 | 589 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 590 | |
7c673cae FG |
591 | ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0)); |
592 | ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0)); | |
593 | ms_cluster->set_policy(entity_name_t::TYPE_OSD, | |
594 | Messenger::Policy::lossless_peer(osd_required)); | |
595 | ms_cluster->set_policy(entity_name_t::TYPE_CLIENT, | |
596 | Messenger::Policy::stateless_server(0)); | |
597 | ||
598 | ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD, | |
599 | Messenger::Policy::lossy_client(0)); | |
600 | ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD, | |
601 | Messenger::Policy::lossy_client(0)); | |
602 | ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD, | |
603 | Messenger::Policy::stateless_server(0)); | |
604 | ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD, | |
605 | Messenger::Policy::stateless_server(0)); | |
606 | ||
607 | ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); | |
608 | ||
11fdf7f2 TL |
609 | entity_addrvec_t public_addrs, cluster_addrs; |
610 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs, | |
611 | iface_preferred_numa_node); | |
612 | if (r < 0) { | |
613 | derr << "Failed to pick public address." << dendl; | |
614 | forker.exit(1); | |
615 | } | |
616 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs, | |
617 | iface_preferred_numa_node); | |
618 | if (r < 0) { | |
619 | derr << "Failed to pick cluster address." << dendl; | |
620 | forker.exit(1); | |
621 | } | |
622 | ||
623 | if (ms_public->bindv(public_addrs) < 0) | |
624 | forker.exit(1); | |
7c673cae | 625 | |
11fdf7f2 TL |
626 | if (ms_cluster->bindv(cluster_addrs) < 0) |
627 | forker.exit(1); | |
628 | ||
629 | bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket"); | |
630 | if (is_delay) { | |
7c673cae FG |
631 | ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); |
632 | ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
633 | ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
634 | ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
635 | } | |
636 | ||
11fdf7f2 TL |
637 | entity_addrvec_t hb_front_addrs = public_addrs; |
638 | for (auto& a : hb_front_addrs.v) { | |
639 | a.set_port(0); | |
7c673cae | 640 | } |
11fdf7f2 TL |
641 | if (ms_hb_front_server->bindv(hb_front_addrs) < 0) |
642 | forker.exit(1); | |
643 | if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0) | |
644 | forker.exit(1); | |
645 | ||
646 | entity_addrvec_t hb_back_addrs = cluster_addrs; | |
647 | for (auto& a : hb_back_addrs.v) { | |
648 | a.set_port(0); | |
649 | } | |
650 | if (ms_hb_back_server->bindv(hb_back_addrs) < 0) | |
651 | forker.exit(1); | |
652 | if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0) | |
653 | forker.exit(1); | |
7c673cae | 654 | |
11fdf7f2 TL |
655 | // install signal handlers |
656 | init_async_signal_handler(); | |
657 | register_async_signal_handler(SIGHUP, sighup_handler); | |
7c673cae FG |
658 | |
659 | TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context); | |
660 | TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context); | |
9f95a23c | 661 | TracepointProvider::initialize<bluestore_tracepoint_traits>(g_ceph_context); |
31f18b77 FG |
662 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
663 | TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context); | |
664 | #endif | |
7c673cae | 665 | |
11fdf7f2 TL |
666 | srand(time(NULL) + getpid()); |
667 | ||
f67539c2 TL |
668 | ceph::async::io_context_pool poolctx( |
669 | cct->_conf.get_val<std::uint64_t>("osd_asio_thread_count")); | |
670 | ||
671 | MonClient mc(g_ceph_context, poolctx); | |
7c673cae FG |
672 | if (mc.build_initial_monmap() < 0) |
673 | return -1; | |
674 | global_init_chdir(g_ceph_context); | |
675 | ||
11fdf7f2 TL |
676 | if (global_init_preload_erasure_code(g_ceph_context) < 0) { |
677 | forker.exit(1); | |
678 | } | |
224ce89b | 679 | |
9f95a23c TL |
680 | osdptr = new OSD(g_ceph_context, |
681 | store, | |
682 | whoami, | |
683 | ms_cluster, | |
684 | ms_public, | |
685 | ms_hb_front_client, | |
686 | ms_hb_back_client, | |
687 | ms_hb_front_server, | |
688 | ms_hb_back_server, | |
689 | ms_objecter, | |
690 | &mc, | |
691 | data_path, | |
f67539c2 TL |
692 | journal_path, |
693 | poolctx); | |
9f95a23c TL |
694 | |
695 | int err = osdptr->pre_init(); | |
7c673cae FG |
696 | if (err < 0) { |
697 | derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err) | |
698 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 699 | forker.exit(1); |
7c673cae FG |
700 | } |
701 | ||
702 | ms_public->start(); | |
703 | ms_hb_front_client->start(); | |
704 | ms_hb_back_client->start(); | |
705 | ms_hb_front_server->start(); | |
706 | ms_hb_back_server->start(); | |
707 | ms_cluster->start(); | |
708 | ms_objecter->start(); | |
709 | ||
710 | // start osd | |
9f95a23c | 711 | err = osdptr->init(); |
7c673cae FG |
712 | if (err < 0) { |
713 | derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err) | |
714 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 715 | forker.exit(1); |
7c673cae FG |
716 | } |
717 | ||
11fdf7f2 TL |
718 | // -- daemonize -- |
719 | ||
720 | if (g_conf()->daemonize) { | |
721 | global_init_postfork_finish(g_ceph_context); | |
722 | forker.daemonize(); | |
723 | } | |
724 | ||
725 | ||
7c673cae FG |
726 | register_async_signal_handler_oneshot(SIGINT, handle_osd_signal); |
727 | register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal); | |
728 | ||
9f95a23c | 729 | osdptr->final_init(); |
7c673cae | 730 | |
11fdf7f2 | 731 | if (g_conf().get_val<bool>("inject_early_sigterm")) |
7c673cae FG |
732 | kill(getpid(), SIGTERM); |
733 | ||
734 | ms_public->wait(); | |
735 | ms_hb_front_client->wait(); | |
736 | ms_hb_back_client->wait(); | |
737 | ms_hb_front_server->wait(); | |
738 | ms_hb_back_server->wait(); | |
739 | ms_cluster->wait(); | |
740 | ms_objecter->wait(); | |
741 | ||
742 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
743 | unregister_async_signal_handler(SIGINT, handle_osd_signal); | |
744 | unregister_async_signal_handler(SIGTERM, handle_osd_signal); | |
745 | shutdown_async_signal_handler(); | |
746 | ||
747 | // done | |
f67539c2 | 748 | poolctx.stop(); |
9f95a23c | 749 | delete osdptr; |
7c673cae FG |
750 | delete ms_public; |
751 | delete ms_hb_front_client; | |
752 | delete ms_hb_back_client; | |
753 | delete ms_hb_front_server; | |
754 | delete ms_hb_back_server; | |
755 | delete ms_cluster; | |
756 | delete ms_objecter; | |
757 | ||
758 | client_byte_throttler.reset(); | |
f6b5b4d7 | 759 | client_msg_throttler.reset(); |
7c673cae FG |
760 | |
761 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
762 | char s[20]; | |
763 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
764 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
765 | dout(0) << "ceph-osd: gmon.out should be in " << s << dendl; | |
766 | } | |
767 | ||
768 | return 0; | |
769 | } |