]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | #include <fcntl.h> | |
18 | #include <boost/scoped_ptr.hpp> | |
19 | ||
20 | #include <iostream> | |
21 | #include <string> | |
7c673cae | 22 | |
f67539c2 | 23 | #include "auth/KeyRing.h" |
7c673cae FG |
24 | #include "osd/OSD.h" |
25 | #include "os/ObjectStore.h" | |
26 | #include "mon/MonClient.h" | |
27 | #include "include/ceph_features.h" | |
7c673cae FG |
28 | #include "common/config.h" |
29 | ||
30 | #include "mon/MonMap.h" | |
31 | ||
32 | #include "msg/Messenger.h" | |
33 | ||
11fdf7f2 | 34 | #include "common/Throttle.h" |
7c673cae FG |
35 | #include "common/Timer.h" |
36 | #include "common/TracepointProvider.h" | |
37 | #include "common/ceph_argparse.h" | |
11fdf7f2 | 38 | #include "common/numa.h" |
7c673cae FG |
39 | |
40 | #include "global/global_init.h" | |
41 | #include "global/signal_handler.h" | |
42 | ||
43 | #include "include/color.h" | |
44 | #include "common/errno.h" | |
45 | #include "common/pick_address.h" | |
46 | ||
47 | #include "perfglue/heap_profiler.h" | |
48 | ||
11fdf7f2 TL |
49 | #include "include/ceph_assert.h" |
50 | ||
51 | #include "common/Preforker.h" | |
7c673cae FG |
52 | |
53 | #define dout_context g_ceph_context | |
54 | #define dout_subsys ceph_subsys_osd | |
55 | ||
f67539c2 TL |
56 | using std::cerr; |
57 | using std::cout; | |
58 | using std::map; | |
59 | using std::ostringstream; | |
60 | using std::string; | |
61 | using std::vector; | |
62 | ||
63 | using ceph::bufferlist; | |
64 | ||
7c673cae FG |
65 | namespace { |
66 | ||
67 | TracepointProvider::Traits osd_tracepoint_traits("libosd_tp.so", | |
68 | "osd_tracing"); | |
69 | TracepointProvider::Traits os_tracepoint_traits("libos_tp.so", | |
70 | "osd_objectstore_tracing"); | |
9f95a23c TL |
71 | TracepointProvider::Traits bluestore_tracepoint_traits("libbluestore_tp.so", |
72 | "bluestore_tracing"); | |
31f18b77 FG |
73 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
74 | TracepointProvider::Traits cyg_profile_traits("libcyg_profile_tp.so", | |
75 | "osd_function_tracing"); | |
76 | #endif | |
7c673cae FG |
77 | |
78 | } // anonymous namespace | |
79 | ||
9f95a23c | 80 | OSD *osdptr = nullptr; |
7c673cae FG |
81 | |
82 | void handle_osd_signal(int signum) | |
83 | { | |
9f95a23c TL |
84 | if (osdptr) |
85 | osdptr->handle_signal(signum); | |
7c673cae FG |
86 | } |
87 | ||
88 | static void usage() | |
89 | { | |
31f18b77 | 90 | cout << "usage: ceph-osd -i <ID> [flags]\n" |
7c673cae FG |
91 | << " --osd-data PATH data directory\n" |
92 | << " --osd-journal PATH\n" | |
93 | << " journal file or block device\n" | |
94 | << " --mkfs create a [new] data directory\n" | |
31f18b77 | 95 | << " --mkkey generate a new secret key. This is normally used in combination with --mkfs\n" |
11fdf7f2 TL |
96 | << " --monmap specify the path to the monitor map. This is normally used in combination with --mkfs\n" |
97 | << " --osd-uuid specify the OSD's fsid. This is normally used in combination with --mkfs\n" | |
98 | << " --keyring specify a path to the osd keyring. This is normally used in combination with --mkfs\n" | |
7c673cae FG |
99 | << " --convert-filestore\n" |
100 | << " run any pending upgrade operations\n" | |
101 | << " --flush-journal flush all data out of journal\n" | |
e306af50 TL |
102 | << " --osdspec-affinity\n" |
103 | << " set affinity to an osdspec\n" | |
11fdf7f2 | 104 | << " --dump-journal dump all data of journal\n" |
7c673cae FG |
105 | << " --mkjournal initialize a new journal\n" |
106 | << " --check-wants-journal\n" | |
107 | << " check whether a journal is desired\n" | |
108 | << " --check-allows-journal\n" | |
109 | << " check whether a journal is allowed\n" | |
110 | << " --check-needs-journal\n" | |
111 | << " check whether a journal is required\n" | |
112 | << " --debug_osd <N> set debug level (e.g. 10)\n" | |
113 | << " --get-device-fsid PATH\n" | |
114 | << " get OSD fsid for the given block device\n" | |
115 | << std::endl; | |
116 | generic_server_usage(); | |
117 | } | |
118 | ||
7c673cae | 119 | int main(int argc, const char **argv) |
7c673cae | 120 | { |
20effc67 | 121 | auto args = argv_to_vec(argc, argv); |
11fdf7f2 TL |
122 | if (args.empty()) { |
123 | cerr << argv[0] << ": -h or --help for usage" << std::endl; | |
124 | exit(1); | |
125 | } | |
126 | if (ceph_argparse_need_usage(args)) { | |
127 | usage(); | |
128 | exit(0); | |
129 | } | |
7c673cae | 130 | |
11fdf7f2 TL |
131 | map<string,string> defaults = { |
132 | // We want to enable leveldb's log, while allowing users to override this | |
133 | // option, therefore we will pass it as a default argument to global_init(). | |
134 | { "leveldb_log", "" } | |
135 | }; | |
136 | auto cct = global_init( | |
137 | &defaults, | |
138 | args, CEPH_ENTITY_TYPE_OSD, | |
f67539c2 | 139 | CODE_ENVIRONMENT_DAEMON, 0); |
7c673cae FG |
140 | ceph_heap_profiler_init(); |
141 | ||
11fdf7f2 TL |
142 | Preforker forker; |
143 | ||
7c673cae FG |
144 | // osd specific args |
145 | bool mkfs = false; | |
146 | bool mkjournal = false; | |
147 | bool check_wants_journal = false; | |
148 | bool check_allows_journal = false; | |
149 | bool check_needs_journal = false; | |
150 | bool mkkey = false; | |
151 | bool flushjournal = false; | |
152 | bool dump_journal = false; | |
153 | bool convertfilestore = false; | |
154 | bool get_osd_fsid = false; | |
155 | bool get_cluster_fsid = false; | |
156 | bool get_journal_fsid = false; | |
157 | bool get_device_fsid = false; | |
158 | string device_path; | |
159 | std::string dump_pg_log; | |
e306af50 | 160 | std::string osdspec_affinity; |
7c673cae FG |
161 | |
162 | std::string val; | |
163 | for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) { | |
164 | if (ceph_argparse_double_dash(args, i)) { | |
165 | break; | |
7c673cae FG |
166 | } else if (ceph_argparse_flag(args, i, "--mkfs", (char*)NULL)) { |
167 | mkfs = true; | |
e306af50 TL |
168 | } else if (ceph_argparse_witharg(args, i, &val, "--osdspec-affinity", (char*)NULL)) { |
169 | osdspec_affinity = val; | |
7c673cae FG |
170 | } else if (ceph_argparse_flag(args, i, "--mkjournal", (char*)NULL)) { |
171 | mkjournal = true; | |
172 | } else if (ceph_argparse_flag(args, i, "--check-allows-journal", (char*)NULL)) { | |
173 | check_allows_journal = true; | |
174 | } else if (ceph_argparse_flag(args, i, "--check-wants-journal", (char*)NULL)) { | |
175 | check_wants_journal = true; | |
176 | } else if (ceph_argparse_flag(args, i, "--check-needs-journal", (char*)NULL)) { | |
177 | check_needs_journal = true; | |
178 | } else if (ceph_argparse_flag(args, i, "--mkkey", (char*)NULL)) { | |
179 | mkkey = true; | |
180 | } else if (ceph_argparse_flag(args, i, "--flush-journal", (char*)NULL)) { | |
181 | flushjournal = true; | |
182 | } else if (ceph_argparse_flag(args, i, "--convert-filestore", (char*)NULL)) { | |
183 | convertfilestore = true; | |
184 | } else if (ceph_argparse_witharg(args, i, &val, "--dump-pg-log", (char*)NULL)) { | |
185 | dump_pg_log = val; | |
186 | } else if (ceph_argparse_flag(args, i, "--dump-journal", (char*)NULL)) { | |
187 | dump_journal = true; | |
188 | } else if (ceph_argparse_flag(args, i, "--get-cluster-fsid", (char*)NULL)) { | |
189 | get_cluster_fsid = true; | |
190 | } else if (ceph_argparse_flag(args, i, "--get-osd-fsid", "--get-osd-uuid", (char*)NULL)) { | |
191 | get_osd_fsid = true; | |
192 | } else if (ceph_argparse_flag(args, i, "--get-journal-fsid", "--get-journal-uuid", (char*)NULL)) { | |
193 | get_journal_fsid = true; | |
194 | } else if (ceph_argparse_witharg(args, i, &device_path, | |
195 | "--get-device-fsid", (char*)NULL)) { | |
196 | get_device_fsid = true; | |
197 | } else { | |
198 | ++i; | |
199 | } | |
200 | } | |
201 | if (!args.empty()) { | |
11fdf7f2 TL |
202 | cerr << "unrecognized arg " << args[0] << std::endl; |
203 | exit(1); | |
7c673cae FG |
204 | } |
205 | ||
11fdf7f2 TL |
206 | if (global_init_prefork(g_ceph_context) >= 0) { |
207 | std::string err; | |
208 | int r = forker.prefork(err); | |
209 | if (r < 0) { | |
210 | cerr << err << std::endl; | |
211 | return r; | |
212 | } | |
213 | if (forker.is_parent()) { | |
214 | g_ceph_context->_log->start(); | |
215 | if (forker.parent_wait(err) != 0) { | |
216 | return -ENXIO; | |
217 | } | |
218 | return 0; | |
219 | } | |
220 | setsid(); | |
221 | global_init_postfork_start(g_ceph_context); | |
222 | } | |
223 | common_init_finish(g_ceph_context); | |
224 | global_init_chdir(g_ceph_context); | |
225 | ||
7c673cae | 226 | if (get_journal_fsid) { |
11fdf7f2 | 227 | device_path = g_conf().get_val<std::string>("osd_journal"); |
7c673cae FG |
228 | get_device_fsid = true; |
229 | } | |
230 | if (get_device_fsid) { | |
231 | uuid_d uuid; | |
232 | int r = ObjectStore::probe_block_device_fsid(g_ceph_context, device_path, | |
233 | &uuid); | |
234 | if (r < 0) { | |
235 | cerr << "failed to get device fsid for " << device_path | |
236 | << ": " << cpp_strerror(r) << std::endl; | |
11fdf7f2 | 237 | forker.exit(1); |
7c673cae FG |
238 | } |
239 | cout << uuid << std::endl; | |
11fdf7f2 | 240 | forker.exit(0); |
7c673cae FG |
241 | } |
242 | ||
243 | if (!dump_pg_log.empty()) { | |
244 | common_init_finish(g_ceph_context); | |
245 | bufferlist bl; | |
246 | std::string error; | |
11fdf7f2 TL |
247 | |
248 | if (bl.read_file(dump_pg_log.c_str(), &error) >= 0) { | |
7c673cae | 249 | pg_log_entry_t e; |
11fdf7f2 | 250 | auto p = bl.cbegin(); |
7c673cae FG |
251 | while (!p.end()) { |
252 | uint64_t pos = p.get_off(); | |
253 | try { | |
11fdf7f2 | 254 | decode(e, p); |
7c673cae | 255 | } |
f67539c2 | 256 | catch (const ceph::buffer::error &e) { |
7c673cae | 257 | derr << "failed to decode LogEntry at offset " << pos << dendl; |
11fdf7f2 | 258 | forker.exit(1); |
7c673cae FG |
259 | } |
260 | derr << pos << ":\t" << e << dendl; | |
261 | } | |
262 | } else { | |
263 | derr << "unable to open " << dump_pg_log << ": " << error << dendl; | |
264 | } | |
11fdf7f2 | 265 | forker.exit(0); |
7c673cae FG |
266 | } |
267 | ||
268 | // whoami | |
269 | char *end; | |
11fdf7f2 | 270 | const char *id = g_conf()->name.get_id().c_str(); |
7c673cae | 271 | int whoami = strtol(id, &end, 10); |
11fdf7f2 | 272 | std::string data_path = g_conf().get_val<std::string>("osd_data"); |
7c673cae FG |
273 | if (*end || end == id || whoami < 0) { |
274 | derr << "must specify '-i #' where # is the osd number" << dendl; | |
11fdf7f2 | 275 | forker.exit(1); |
7c673cae FG |
276 | } |
277 | ||
11fdf7f2 | 278 | if (data_path.empty()) { |
7c673cae | 279 | derr << "must specify '--osd-data=foo' data path" << dendl; |
11fdf7f2 | 280 | forker.exit(1); |
7c673cae FG |
281 | } |
282 | ||
283 | // the store | |
11fdf7f2 | 284 | std::string store_type; |
7c673cae FG |
285 | { |
286 | char fn[PATH_MAX]; | |
11fdf7f2 | 287 | snprintf(fn, sizeof(fn), "%s/type", data_path.c_str()); |
91327a77 | 288 | int fd = ::open(fn, O_RDONLY|O_CLOEXEC); |
7c673cae FG |
289 | if (fd >= 0) { |
290 | bufferlist bl; | |
291 | bl.read_fd(fd, 64); | |
292 | if (bl.length()) { | |
293 | store_type = string(bl.c_str(), bl.length() - 1); // drop \n | |
294 | dout(5) << "object store type is " << store_type << dendl; | |
295 | } | |
296 | ::close(fd); | |
11fdf7f2 TL |
297 | } else if (mkfs) { |
298 | store_type = g_conf().get_val<std::string>("osd_objectstore"); | |
299 | } else { | |
300 | // hrm, infer the type | |
301 | snprintf(fn, sizeof(fn), "%s/current", data_path.c_str()); | |
302 | struct stat st; | |
303 | if (::stat(fn, &st) == 0 && | |
304 | S_ISDIR(st.st_mode)) { | |
305 | derr << "missing 'type' file, inferring filestore from current/ dir" | |
306 | << dendl; | |
307 | store_type = "filestore"; | |
308 | } else { | |
309 | snprintf(fn, sizeof(fn), "%s/block", data_path.c_str()); | |
310 | if (::stat(fn, &st) == 0 && | |
311 | S_ISLNK(st.st_mode)) { | |
312 | derr << "missing 'type' file, inferring bluestore from block symlink" | |
313 | << dendl; | |
314 | store_type = "bluestore"; | |
315 | } else { | |
316 | derr << "missing 'type' file and unable to infer osd type" << dendl; | |
317 | forker.exit(1); | |
318 | } | |
319 | } | |
7c673cae FG |
320 | } |
321 | } | |
11fdf7f2 TL |
322 | |
323 | std::string journal_path = g_conf().get_val<std::string>("osd_journal"); | |
324 | uint32_t flags = g_conf().get_val<uint64_t>("osd_os_flags"); | |
20effc67 TL |
325 | std::unique_ptr<ObjectStore> store = ObjectStore::create(g_ceph_context, |
326 | store_type, | |
327 | data_path, | |
328 | journal_path, | |
329 | flags); | |
7c673cae FG |
330 | if (!store) { |
331 | derr << "unable to create object store" << dendl; | |
11fdf7f2 | 332 | forker.exit(-ENODEV); |
7c673cae FG |
333 | } |
334 | ||
7c673cae | 335 | |
7c673cae FG |
336 | if (mkkey) { |
337 | common_init_finish(g_ceph_context); | |
9f95a23c | 338 | KeyRing keyring; |
7c673cae | 339 | |
11fdf7f2 | 340 | EntityName ename{g_conf()->name}; |
7c673cae FG |
341 | EntityAuth eauth; |
342 | ||
11fdf7f2 | 343 | std::string keyring_path = g_conf().get_val<std::string>("keyring"); |
9f95a23c | 344 | int ret = keyring.load(g_ceph_context, keyring_path); |
7c673cae | 345 | if (ret == 0 && |
9f95a23c | 346 | keyring.get_auth(ename, eauth)) { |
11fdf7f2 | 347 | derr << "already have key in keyring " << keyring_path << dendl; |
7c673cae FG |
348 | } else { |
349 | eauth.key.create(g_ceph_context, CEPH_CRYPTO_AES); | |
9f95a23c | 350 | keyring.add(ename, eauth); |
7c673cae | 351 | bufferlist bl; |
9f95a23c | 352 | keyring.encode_plaintext(bl); |
11fdf7f2 | 353 | int r = bl.write_file(keyring_path.c_str(), 0600); |
7c673cae | 354 | if (r) |
11fdf7f2 TL |
355 | derr << TEXT_RED << " ** ERROR: writing new keyring to " |
356 | << keyring_path << ": " << cpp_strerror(r) << TEXT_NORMAL | |
357 | << dendl; | |
7c673cae | 358 | else |
11fdf7f2 | 359 | derr << "created new key in keyring " << keyring_path << dendl; |
7c673cae FG |
360 | } |
361 | } | |
e306af50 | 362 | |
3efd9988 FG |
363 | if (mkfs) { |
364 | common_init_finish(g_ceph_context); | |
3efd9988 | 365 | |
11fdf7f2 | 366 | if (g_conf().get_val<uuid_d>("fsid").is_zero()) { |
3efd9988 | 367 | derr << "must specify cluster fsid" << dendl; |
11fdf7f2 | 368 | forker.exit(-EINVAL); |
3efd9988 FG |
369 | } |
370 | ||
20effc67 | 371 | int err = OSD::mkfs(g_ceph_context, std::move(store), g_conf().get_val<uuid_d>("fsid"), |
e306af50 | 372 | whoami, osdspec_affinity); |
3efd9988 FG |
373 | if (err < 0) { |
374 | derr << TEXT_RED << " ** ERROR: error creating empty object store in " | |
11fdf7f2 TL |
375 | << data_path << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
376 | forker.exit(1); | |
3efd9988 | 377 | } |
11fdf7f2 TL |
378 | dout(0) << "created object store " << data_path |
379 | << " for osd." << whoami | |
380 | << " fsid " << g_conf().get_val<uuid_d>("fsid") | |
381 | << dendl; | |
382 | } | |
383 | if (mkfs || mkkey) { | |
384 | forker.exit(0); | |
3efd9988 | 385 | } |
7c673cae FG |
386 | if (mkjournal) { |
387 | common_init_finish(g_ceph_context); | |
388 | int err = store->mkjournal(); | |
389 | if (err < 0) { | |
11fdf7f2 TL |
390 | derr << TEXT_RED << " ** ERROR: error creating fresh journal " |
391 | << journal_path << " for object store " << data_path << ": " | |
392 | << cpp_strerror(-err) << TEXT_NORMAL << dendl; | |
393 | forker.exit(1); | |
7c673cae | 394 | } |
11fdf7f2 TL |
395 | derr << "created new journal " << journal_path |
396 | << " for object store " << data_path << dendl; | |
397 | forker.exit(0); | |
7c673cae FG |
398 | } |
399 | if (check_wants_journal) { | |
400 | if (store->wants_journal()) { | |
d2e6a577 | 401 | cout << "wants journal: yes" << std::endl; |
11fdf7f2 | 402 | forker.exit(0); |
7c673cae | 403 | } else { |
d2e6a577 | 404 | cout << "wants journal: no" << std::endl; |
11fdf7f2 | 405 | forker.exit(1); |
7c673cae FG |
406 | } |
407 | } | |
408 | if (check_allows_journal) { | |
409 | if (store->allows_journal()) { | |
d2e6a577 | 410 | cout << "allows journal: yes" << std::endl; |
11fdf7f2 | 411 | forker.exit(0); |
7c673cae | 412 | } else { |
d2e6a577 | 413 | cout << "allows journal: no" << std::endl; |
11fdf7f2 | 414 | forker.exit(1); |
7c673cae FG |
415 | } |
416 | } | |
417 | if (check_needs_journal) { | |
418 | if (store->needs_journal()) { | |
d2e6a577 | 419 | cout << "needs journal: yes" << std::endl; |
11fdf7f2 | 420 | forker.exit(0); |
7c673cae | 421 | } else { |
d2e6a577 | 422 | cout << "needs journal: no" << std::endl; |
11fdf7f2 | 423 | forker.exit(1); |
7c673cae FG |
424 | } |
425 | } | |
426 | if (flushjournal) { | |
427 | common_init_finish(g_ceph_context); | |
428 | int err = store->mount(); | |
429 | if (err < 0) { | |
11fdf7f2 TL |
430 | derr << TEXT_RED << " ** ERROR: error flushing journal " << journal_path |
431 | << " for object store " << data_path | |
7c673cae FG |
432 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
433 | goto flushjournal_out; | |
434 | } | |
435 | store->umount(); | |
11fdf7f2 TL |
436 | derr << "flushed journal " << journal_path |
437 | << " for object store " << data_path | |
7c673cae FG |
438 | << dendl; |
439 | flushjournal_out: | |
20effc67 | 440 | store.reset(); |
11fdf7f2 | 441 | forker.exit(err < 0 ? 1 : 0); |
7c673cae FG |
442 | } |
443 | if (dump_journal) { | |
444 | common_init_finish(g_ceph_context); | |
445 | int err = store->dump_journal(cout); | |
446 | if (err < 0) { | |
11fdf7f2 TL |
447 | derr << TEXT_RED << " ** ERROR: error dumping journal " << journal_path |
448 | << " for object store " << data_path | |
7c673cae | 449 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 450 | forker.exit(1); |
7c673cae | 451 | } |
11fdf7f2 TL |
452 | derr << "dumped journal " << journal_path |
453 | << " for object store " << data_path | |
7c673cae | 454 | << dendl; |
11fdf7f2 | 455 | forker.exit(0); |
7c673cae FG |
456 | } |
457 | ||
7c673cae FG |
458 | if (convertfilestore) { |
459 | int err = store->mount(); | |
460 | if (err < 0) { | |
11fdf7f2 | 461 | derr << TEXT_RED << " ** ERROR: error mounting store " << data_path |
7c673cae | 462 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 463 | forker.exit(1); |
7c673cae FG |
464 | } |
465 | err = store->upgrade(); | |
466 | store->umount(); | |
467 | if (err < 0) { | |
11fdf7f2 | 468 | derr << TEXT_RED << " ** ERROR: error converting store " << data_path |
7c673cae | 469 | << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; |
11fdf7f2 | 470 | forker.exit(1); |
7c673cae | 471 | } |
11fdf7f2 | 472 | forker.exit(0); |
7c673cae FG |
473 | } |
474 | ||
475 | string magic; | |
476 | uuid_d cluster_fsid, osd_fsid; | |
9f95a23c | 477 | ceph_release_t require_osd_release = ceph_release_t::unknown; |
7c673cae | 478 | int w; |
20effc67 | 479 | int r = OSD::peek_meta(store.get(), &magic, &cluster_fsid, &osd_fsid, &w, |
11fdf7f2 | 480 | &require_osd_release); |
7c673cae FG |
481 | if (r < 0) { |
482 | derr << TEXT_RED << " ** ERROR: unable to open OSD superblock on " | |
11fdf7f2 | 483 | << data_path << ": " << cpp_strerror(-r) |
7c673cae FG |
484 | << TEXT_NORMAL << dendl; |
485 | if (r == -ENOTSUP) { | |
486 | derr << TEXT_RED << " ** please verify that underlying storage " | |
487 | << "supports xattrs" << TEXT_NORMAL << dendl; | |
488 | } | |
11fdf7f2 | 489 | forker.exit(1); |
7c673cae FG |
490 | } |
491 | if (w != whoami) { | |
492 | derr << "OSD id " << w << " != my id " << whoami << dendl; | |
11fdf7f2 | 493 | forker.exit(1); |
7c673cae FG |
494 | } |
495 | if (strcmp(magic.c_str(), CEPH_OSD_ONDISK_MAGIC)) { | |
496 | derr << "OSD magic " << magic << " != my " << CEPH_OSD_ONDISK_MAGIC | |
497 | << dendl; | |
11fdf7f2 | 498 | forker.exit(1); |
7c673cae FG |
499 | } |
500 | ||
501 | if (get_cluster_fsid) { | |
502 | cout << cluster_fsid << std::endl; | |
11fdf7f2 | 503 | forker.exit(0); |
7c673cae FG |
504 | } |
505 | if (get_osd_fsid) { | |
506 | cout << osd_fsid << std::endl; | |
11fdf7f2 | 507 | forker.exit(0); |
7c673cae FG |
508 | } |
509 | ||
9f95a23c | 510 | { |
9f95a23c | 511 | ostringstream err; |
f67539c2 | 512 | if (!can_upgrade_from(require_osd_release, "require_osd_release", err)) { |
9f95a23c TL |
513 | derr << err.str() << dendl; |
514 | forker.exit(1); | |
515 | } | |
11fdf7f2 | 516 | } |
7c673cae | 517 | |
11fdf7f2 TL |
518 | // consider objectstore numa node |
519 | int os_numa_node = -1; | |
520 | r = store->get_numa_node(&os_numa_node, nullptr, nullptr); | |
521 | if (r >= 0 && os_numa_node >= 0) { | |
522 | dout(1) << " objectstore numa_node " << os_numa_node << dendl; | |
523 | } | |
524 | int iface_preferred_numa_node = -1; | |
525 | if (g_conf().get_val<bool>("osd_numa_prefer_iface")) { | |
526 | iface_preferred_numa_node = os_numa_node; | |
7c673cae FG |
527 | } |
528 | ||
11fdf7f2 TL |
529 | // messengers |
530 | std::string msg_type = g_conf().get_val<std::string>("ms_type"); | |
531 | std::string public_msg_type = | |
532 | g_conf().get_val<std::string>("ms_public_type"); | |
533 | std::string cluster_msg_type = | |
534 | g_conf().get_val<std::string>("ms_cluster_type"); | |
535 | ||
536 | public_msg_type = public_msg_type.empty() ? msg_type : public_msg_type; | |
537 | cluster_msg_type = cluster_msg_type.empty() ? msg_type : cluster_msg_type; | |
9f95a23c | 538 | uint64_t nonce = Messenger::get_pid_nonce(); |
11fdf7f2 | 539 | Messenger *ms_public = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 540 | entity_name_t::OSD(whoami), "client", nonce); |
11fdf7f2 | 541 | Messenger *ms_cluster = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 542 | entity_name_t::OSD(whoami), "cluster", nonce); |
11fdf7f2 | 543 | Messenger *ms_hb_back_client = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 544 | entity_name_t::OSD(whoami), "hb_back_client", nonce); |
11fdf7f2 | 545 | Messenger *ms_hb_front_client = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 546 | entity_name_t::OSD(whoami), "hb_front_client", nonce); |
11fdf7f2 | 547 | Messenger *ms_hb_back_server = Messenger::create(g_ceph_context, cluster_msg_type, |
f67539c2 | 548 | entity_name_t::OSD(whoami), "hb_back_server", nonce); |
11fdf7f2 | 549 | Messenger *ms_hb_front_server = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 550 | entity_name_t::OSD(whoami), "hb_front_server", nonce); |
11fdf7f2 | 551 | Messenger *ms_objecter = Messenger::create(g_ceph_context, public_msg_type, |
f67539c2 | 552 | entity_name_t::OSD(whoami), "ms_objecter", nonce); |
7c673cae | 553 | if (!ms_public || !ms_cluster || !ms_hb_front_client || !ms_hb_back_client || !ms_hb_back_server || !ms_hb_front_server || !ms_objecter) |
11fdf7f2 | 554 | forker.exit(1); |
7c673cae FG |
555 | ms_cluster->set_cluster_protocol(CEPH_OSD_PROTOCOL); |
556 | ms_hb_front_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
557 | ms_hb_back_client->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
558 | ms_hb_back_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
559 | ms_hb_front_server->set_cluster_protocol(CEPH_OSD_PROTOCOL); | |
560 | ||
11fdf7f2 TL |
561 | dout(0) << "starting osd." << whoami |
562 | << " osd_data " << data_path | |
563 | << " " << ((journal_path.empty()) ? | |
564 | "(no journal)" : journal_path) | |
565 | << dendl; | |
7c673cae | 566 | |
11fdf7f2 TL |
567 | uint64_t message_size = |
568 | g_conf().get_val<Option::size_t>("osd_client_message_size_cap"); | |
7c673cae | 569 | boost::scoped_ptr<Throttle> client_byte_throttler( |
11fdf7f2 | 570 | new Throttle(g_ceph_context, "osd_client_bytes", message_size)); |
f6b5b4d7 TL |
571 | uint64_t message_cap = g_conf().get_val<uint64_t>("osd_client_message_cap"); |
572 | boost::scoped_ptr<Throttle> client_msg_throttler( | |
573 | new Throttle(g_ceph_context, "osd_client_messages", message_cap)); | |
7c673cae FG |
574 | |
575 | // All feature bits 0 - 34 should be present from dumpling v0.67 forward | |
576 | uint64_t osd_required = | |
577 | CEPH_FEATURE_UID | | |
578 | CEPH_FEATURE_PGID64 | | |
579 | CEPH_FEATURE_OSDENC; | |
580 | ||
9f95a23c | 581 | ms_public->set_default_policy(Messenger::Policy::stateless_registered_server(0)); |
7c673cae FG |
582 | ms_public->set_policy_throttlers(entity_name_t::TYPE_CLIENT, |
583 | client_byte_throttler.get(), | |
f6b5b4d7 | 584 | client_msg_throttler.get()); |
7c673cae | 585 | ms_public->set_policy(entity_name_t::TYPE_MON, |
11fdf7f2 | 586 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 587 | ms_public->set_policy(entity_name_t::TYPE_MGR, |
11fdf7f2 | 588 | Messenger::Policy::lossy_client(osd_required)); |
7c673cae | 589 | |
7c673cae FG |
590 | ms_cluster->set_default_policy(Messenger::Policy::stateless_server(0)); |
591 | ms_cluster->set_policy(entity_name_t::TYPE_MON, Messenger::Policy::lossy_client(0)); | |
592 | ms_cluster->set_policy(entity_name_t::TYPE_OSD, | |
593 | Messenger::Policy::lossless_peer(osd_required)); | |
594 | ms_cluster->set_policy(entity_name_t::TYPE_CLIENT, | |
595 | Messenger::Policy::stateless_server(0)); | |
596 | ||
597 | ms_hb_front_client->set_policy(entity_name_t::TYPE_OSD, | |
598 | Messenger::Policy::lossy_client(0)); | |
599 | ms_hb_back_client->set_policy(entity_name_t::TYPE_OSD, | |
600 | Messenger::Policy::lossy_client(0)); | |
601 | ms_hb_back_server->set_policy(entity_name_t::TYPE_OSD, | |
602 | Messenger::Policy::stateless_server(0)); | |
603 | ms_hb_front_server->set_policy(entity_name_t::TYPE_OSD, | |
604 | Messenger::Policy::stateless_server(0)); | |
605 | ||
606 | ms_objecter->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); | |
607 | ||
11fdf7f2 TL |
608 | entity_addrvec_t public_addrs, cluster_addrs; |
609 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC, &public_addrs, | |
610 | iface_preferred_numa_node); | |
611 | if (r < 0) { | |
612 | derr << "Failed to pick public address." << dendl; | |
613 | forker.exit(1); | |
614 | } | |
615 | r = pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_CLUSTER, &cluster_addrs, | |
616 | iface_preferred_numa_node); | |
617 | if (r < 0) { | |
618 | derr << "Failed to pick cluster address." << dendl; | |
619 | forker.exit(1); | |
620 | } | |
621 | ||
622 | if (ms_public->bindv(public_addrs) < 0) | |
623 | forker.exit(1); | |
7c673cae | 624 | |
11fdf7f2 TL |
625 | if (ms_cluster->bindv(cluster_addrs) < 0) |
626 | forker.exit(1); | |
627 | ||
628 | bool is_delay = g_conf().get_val<bool>("osd_heartbeat_use_min_delay_socket"); | |
629 | if (is_delay) { | |
7c673cae FG |
630 | ms_hb_front_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); |
631 | ms_hb_back_client->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
632 | ms_hb_back_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
633 | ms_hb_front_server->set_socket_priority(SOCKET_PRIORITY_MIN_DELAY); | |
634 | } | |
635 | ||
11fdf7f2 TL |
636 | entity_addrvec_t hb_front_addrs = public_addrs; |
637 | for (auto& a : hb_front_addrs.v) { | |
638 | a.set_port(0); | |
7c673cae | 639 | } |
11fdf7f2 TL |
640 | if (ms_hb_front_server->bindv(hb_front_addrs) < 0) |
641 | forker.exit(1); | |
642 | if (ms_hb_front_client->client_bind(hb_front_addrs.front()) < 0) | |
643 | forker.exit(1); | |
644 | ||
645 | entity_addrvec_t hb_back_addrs = cluster_addrs; | |
646 | for (auto& a : hb_back_addrs.v) { | |
647 | a.set_port(0); | |
648 | } | |
649 | if (ms_hb_back_server->bindv(hb_back_addrs) < 0) | |
650 | forker.exit(1); | |
651 | if (ms_hb_back_client->client_bind(hb_back_addrs.front()) < 0) | |
652 | forker.exit(1); | |
7c673cae | 653 | |
11fdf7f2 TL |
654 | // install signal handlers |
655 | init_async_signal_handler(); | |
656 | register_async_signal_handler(SIGHUP, sighup_handler); | |
7c673cae FG |
657 | |
658 | TracepointProvider::initialize<osd_tracepoint_traits>(g_ceph_context); | |
659 | TracepointProvider::initialize<os_tracepoint_traits>(g_ceph_context); | |
9f95a23c | 660 | TracepointProvider::initialize<bluestore_tracepoint_traits>(g_ceph_context); |
31f18b77 FG |
661 | #ifdef WITH_OSD_INSTRUMENT_FUNCTIONS |
662 | TracepointProvider::initialize<cyg_profile_traits>(g_ceph_context); | |
663 | #endif | |
7c673cae | 664 | |
11fdf7f2 TL |
665 | srand(time(NULL) + getpid()); |
666 | ||
f67539c2 TL |
667 | ceph::async::io_context_pool poolctx( |
668 | cct->_conf.get_val<std::uint64_t>("osd_asio_thread_count")); | |
669 | ||
670 | MonClient mc(g_ceph_context, poolctx); | |
7c673cae FG |
671 | if (mc.build_initial_monmap() < 0) |
672 | return -1; | |
673 | global_init_chdir(g_ceph_context); | |
674 | ||
11fdf7f2 TL |
675 | if (global_init_preload_erasure_code(g_ceph_context) < 0) { |
676 | forker.exit(1); | |
677 | } | |
224ce89b | 678 | |
9f95a23c | 679 | osdptr = new OSD(g_ceph_context, |
20effc67 | 680 | std::move(store), |
9f95a23c TL |
681 | whoami, |
682 | ms_cluster, | |
683 | ms_public, | |
684 | ms_hb_front_client, | |
685 | ms_hb_back_client, | |
686 | ms_hb_front_server, | |
687 | ms_hb_back_server, | |
688 | ms_objecter, | |
689 | &mc, | |
690 | data_path, | |
f67539c2 TL |
691 | journal_path, |
692 | poolctx); | |
9f95a23c TL |
693 | |
694 | int err = osdptr->pre_init(); | |
7c673cae FG |
695 | if (err < 0) { |
696 | derr << TEXT_RED << " ** ERROR: osd pre_init failed: " << cpp_strerror(-err) | |
697 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 698 | forker.exit(1); |
7c673cae FG |
699 | } |
700 | ||
701 | ms_public->start(); | |
702 | ms_hb_front_client->start(); | |
703 | ms_hb_back_client->start(); | |
704 | ms_hb_front_server->start(); | |
705 | ms_hb_back_server->start(); | |
706 | ms_cluster->start(); | |
707 | ms_objecter->start(); | |
708 | ||
709 | // start osd | |
9f95a23c | 710 | err = osdptr->init(); |
7c673cae FG |
711 | if (err < 0) { |
712 | derr << TEXT_RED << " ** ERROR: osd init failed: " << cpp_strerror(-err) | |
713 | << TEXT_NORMAL << dendl; | |
11fdf7f2 | 714 | forker.exit(1); |
7c673cae FG |
715 | } |
716 | ||
11fdf7f2 TL |
717 | // -- daemonize -- |
718 | ||
719 | if (g_conf()->daemonize) { | |
720 | global_init_postfork_finish(g_ceph_context); | |
721 | forker.daemonize(); | |
722 | } | |
723 | ||
724 | ||
7c673cae FG |
725 | register_async_signal_handler_oneshot(SIGINT, handle_osd_signal); |
726 | register_async_signal_handler_oneshot(SIGTERM, handle_osd_signal); | |
727 | ||
9f95a23c | 728 | osdptr->final_init(); |
7c673cae | 729 | |
11fdf7f2 | 730 | if (g_conf().get_val<bool>("inject_early_sigterm")) |
7c673cae FG |
731 | kill(getpid(), SIGTERM); |
732 | ||
733 | ms_public->wait(); | |
734 | ms_hb_front_client->wait(); | |
735 | ms_hb_back_client->wait(); | |
736 | ms_hb_front_server->wait(); | |
737 | ms_hb_back_server->wait(); | |
738 | ms_cluster->wait(); | |
739 | ms_objecter->wait(); | |
740 | ||
741 | unregister_async_signal_handler(SIGHUP, sighup_handler); | |
742 | unregister_async_signal_handler(SIGINT, handle_osd_signal); | |
743 | unregister_async_signal_handler(SIGTERM, handle_osd_signal); | |
744 | shutdown_async_signal_handler(); | |
745 | ||
746 | // done | |
f67539c2 | 747 | poolctx.stop(); |
9f95a23c | 748 | delete osdptr; |
7c673cae FG |
749 | delete ms_public; |
750 | delete ms_hb_front_client; | |
751 | delete ms_hb_back_client; | |
752 | delete ms_hb_front_server; | |
753 | delete ms_hb_back_server; | |
754 | delete ms_cluster; | |
755 | delete ms_objecter; | |
756 | ||
757 | client_byte_throttler.reset(); | |
f6b5b4d7 | 758 | client_msg_throttler.reset(); |
7c673cae FG |
759 | |
760 | // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. | |
761 | char s[20]; | |
762 | snprintf(s, sizeof(s), "gmon/%d", getpid()); | |
763 | if ((mkdir(s, 0755) == 0) && (chdir(s) == 0)) { | |
764 | dout(0) << "ceph-osd: gmon.out should be in " << s << dendl; | |
765 | } | |
766 | ||
767 | return 0; | |
768 | } |