]> git.proxmox.com Git - ceph.git/blob - ceph/src/global/signal_handler.cc
0447f96e1b18138ddc0122a8b9bae9310c88b141
[ceph.git] / ceph / src / global / signal_handler.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include <sys/utsname.h>
16
17 #include "include/compat.h"
18 #include "pthread.h"
19
20 #include "common/ceph_mutex.h"
21 #include "common/BackTrace.h"
22 #include "common/debug.h"
23 #include "common/safe_io.h"
24 #include "common/version.h"
25
26 #include "include/uuid.h"
27 #include "global/pidfile.h"
28 #include "global/signal_handler.h"
29
30 #include <poll.h>
31 #include <signal.h>
32 #include <sstream>
33 #include <stdlib.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include "common/errno.h"
37 #if defined(_AIX)
38 extern char *sys_siglist[];
39 #endif
40
41 #define dout_context g_ceph_context
42
43 using std::ostringstream;
44 using std::string;
45
46 using ceph::BackTrace;
47 using ceph::JSONFormatter;
48
49 void install_sighandler(int signum, signal_handler_t handler, int flags)
50 {
51 int ret;
52 struct sigaction oldact;
53 struct sigaction act;
54 memset(&act, 0, sizeof(act));
55
56 act.sa_handler = handler;
57 sigemptyset(&act.sa_mask);
58 act.sa_flags = flags;
59
60 ret = sigaction(signum, &act, &oldact);
61 if (ret != 0) {
62 char buf[1024];
63 #if defined(__sun)
64 char message[SIG2STR_MAX];
65 sig2str(signum,message);
66 snprintf(buf, sizeof(buf), "install_sighandler: sigaction returned "
67 "%d when trying to install a signal handler for %s\n",
68 ret, message);
69 #else
70 snprintf(buf, sizeof(buf), "install_sighandler: sigaction returned "
71 "%d when trying to install a signal handler for %s\n",
72 ret, sig_str(signum));
73 #endif
74 dout_emergency(buf);
75 exit(1);
76 }
77 }
78
79 void sighup_handler(int signum)
80 {
81 g_ceph_context->reopen_logs();
82 }
83
84 static void reraise_fatal(int signum)
85 {
86 // Use default handler to dump core
87 int ret = raise(signum);
88
89 // Normally, we won't get here. If we do, something is very weird.
90 char buf[1024];
91 if (ret) {
92 snprintf(buf, sizeof(buf), "reraise_fatal: failed to re-raise "
93 "signal %d\n", signum);
94 dout_emergency(buf);
95 }
96 else {
97 snprintf(buf, sizeof(buf), "reraise_fatal: default handler for "
98 "signal %d didn't terminate the process?\n", signum);
99 dout_emergency(buf);
100 }
101 exit(1);
102 }
103
104
105 // /etc/os-release looks like
106 //
107 // NAME=Fedora
108 // VERSION="28 (Server Edition)"
109 // ID=fedora
110 // VERSION_ID=28
111 //
112 // or
113 //
114 // NAME="Ubuntu"
115 // VERSION="16.04.3 LTS (Xenial Xerus)"
116 // ID=ubuntu
117 // ID_LIKE=debian
118 //
119 // get_from_os_release("FOO=bar\nTHIS=\"that\"\n", "FOO=", ...) will
120 // write "bar\0" to out buffer, which is assumed to be as large as the input
121 // file.
122 static int parse_from_os_release(
123 const char *file, const char *key,
124 char *out)
125 {
126 const char *p = strstr(file, key);
127 if (!p) {
128 return -1;
129 }
130 const char *start = p + strlen(key);
131 const char *end = strchr(start, '\n');
132 if (!end) {
133 return -1;
134 }
135 if (*start == '"' && *(end - 1) == '"') {
136 ++start;
137 --end;
138 }
139 if (start >= end) {
140 return -1;
141 }
142 memcpy(out, start, end - start);
143 out[end - start] = 0;
144 return 0;
145 }
146
147 static void handle_fatal_signal(int signum)
148 {
149 // This code may itself trigger a SIGSEGV if the heap is corrupt. In that
150 // case, SA_RESETHAND specifies that the default signal handler--
151 // presumably dump core-- will handle it.
152 char buf[1024];
153 char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
154 int r = ceph_pthread_getname(pthread_self(), pthread_name, sizeof(pthread_name));
155 (void)r;
156 #if defined(__sun)
157 char message[SIG2STR_MAX];
158 sig2str(signum,message);
159 snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
160 "in thread %llx thread_name:%s\n", message, (unsigned long long)pthread_self(),
161 pthread_name);
162 #else
163 snprintf(buf, sizeof(buf), "*** Caught signal (%s) **\n "
164 "in thread %llx thread_name:%s\n", sig_str(signum), (unsigned long long)pthread_self(),
165 pthread_name);
166 #endif
167 dout_emergency(buf);
168 pidfile_remove();
169
170 // TODO: don't use an ostringstream here. It could call malloc(), which we
171 // don't want inside a signal handler.
172 // Also fix the backtrace code not to allocate memory.
173 BackTrace bt(1);
174 ostringstream oss;
175 bt.print(oss);
176 dout_emergency(oss.str());
177
178 char base[PATH_MAX] = { 0 };
179 if (g_ceph_context &&
180 g_ceph_context->_conf->crash_dir.size()) {
181 // -- crash dump --
182 // id
183 ostringstream idss;
184 utime_t now = ceph_clock_now();
185 now.gmtime(idss);
186 uuid_d uuid;
187 uuid.generate_random();
188 idss << "_" << uuid;
189 string id = idss.str();
190 std::replace(id.begin(), id.end(), ' ', '_');
191
192 snprintf(base, sizeof(base), "%s/%s",
193 g_ceph_context->_conf->crash_dir.c_str(),
194 id.c_str());
195 int r = ::mkdir(base, 0700);
196 if (r >= 0) {
197 char fn[PATH_MAX*2];
198 snprintf(fn, sizeof(fn)-1, "%s/meta", base);
199 int fd = ::open(fn, O_CREAT|O_WRONLY|O_CLOEXEC, 0600);
200 if (fd >= 0) {
201 JSONFormatter jf(true);
202 jf.open_object_section("crash");
203 jf.dump_string("crash_id", id);
204 now.gmtime(jf.dump_stream("timestamp"));
205 jf.dump_string("process_name", g_process_name);
206 jf.dump_string("entity_name", g_ceph_context->_conf->name.to_str());
207 jf.dump_string("ceph_version", ceph_version_to_str());
208
209 struct utsname u;
210 r = uname(&u);
211 if (r >= 0) {
212 jf.dump_string("utsname_hostname", u.nodename);
213 jf.dump_string("utsname_sysname", u.sysname);
214 jf.dump_string("utsname_release", u.release);
215 jf.dump_string("utsname_version", u.version);
216 jf.dump_string("utsname_machine", u.machine);
217 }
218 #if defined(__linux__)
219 // os-release
220 int in = ::open("/etc/os-release", O_RDONLY|O_CLOEXEC);
221 if (in >= 0) {
222 char buf[4096];
223 r = safe_read(in, buf, sizeof(buf)-1);
224 if (r >= 0) {
225 buf[r] = 0;
226 char v[4096];
227 if (parse_from_os_release(buf, "NAME=", v) >= 0) {
228 jf.dump_string("os_name", v);
229 }
230 if (parse_from_os_release(buf, "ID=", v) >= 0) {
231 jf.dump_string("os_id", v);
232 }
233 if (parse_from_os_release(buf, "VERSION_ID=", v) >= 0) {
234 jf.dump_string("os_version_id", v);
235 }
236 if (parse_from_os_release(buf, "VERSION=", v) >= 0) {
237 jf.dump_string("os_version", v);
238 }
239 }
240 ::close(in);
241 }
242 #endif
243
244 // assert?
245 if (g_assert_condition) {
246 jf.dump_string("assert_condition", g_assert_condition);
247 }
248 if (g_assert_func) {
249 jf.dump_string("assert_func", g_assert_func);
250 }
251 if (g_assert_file) {
252 jf.dump_string("assert_file", g_assert_file);
253 }
254 if (g_assert_line) {
255 jf.dump_unsigned("assert_line", g_assert_line);
256 }
257 if (g_assert_thread_name[0]) {
258 jf.dump_string("assert_thread_name", g_assert_thread_name);
259 }
260 if (g_assert_msg[0]) {
261 jf.dump_string("assert_msg", g_assert_msg);
262 }
263
264 // eio?
265 if (g_eio) {
266 jf.dump_bool("io_error", true);
267 if (g_eio_devname[0]) {
268 jf.dump_string("io_error_devname", g_eio_devname);
269 }
270 if (g_eio_path[0]) {
271 jf.dump_string("io_error_path", g_eio_path);
272 }
273 if (g_eio_error) {
274 jf.dump_int("io_error_code", g_eio_error);
275 }
276 if (g_eio_iotype) {
277 jf.dump_int("io_error_optype", g_eio_iotype);
278 }
279 if (g_eio_offset) {
280 jf.dump_unsigned("io_error_offset", g_eio_offset);
281 }
282 if (g_eio_length) {
283 jf.dump_unsigned("io_error_length", g_eio_length);
284 }
285 }
286
287 // backtrace
288 bt.dump(&jf);
289
290 jf.close_section();
291 ostringstream oss;
292 jf.flush(oss);
293 string s = oss.str();
294 r = safe_write(fd, s.c_str(), s.size());
295 (void)r;
296 ::close(fd);
297 }
298 snprintf(fn, sizeof(fn)-1, "%s/done", base);
299 ::creat(fn, 0444);
300 }
301 }
302
303 // avoid recursion back into logging code if that is where
304 // we got the SEGV.
305 if (g_ceph_context &&
306 g_ceph_context->_log &&
307 !g_ceph_context->_log->is_inside_log_lock()) {
308 // dump to log. this uses the heap extensively, but we're better
309 // off trying than not.
310 derr << buf << std::endl;
311 bt.print(*_dout);
312 *_dout << " NOTE: a copy of the executable, or `objdump -rdS <executable>` "
313 << "is needed to interpret this.\n"
314 << dendl;
315
316 g_ceph_context->_log->dump_recent();
317
318 if (base[0]) {
319 char fn[PATH_MAX*2];
320 snprintf(fn, sizeof(fn)-1, "%s/log", base);
321 g_ceph_context->_log->set_log_file(fn);
322 g_ceph_context->_log->reopen_log_file();
323 g_ceph_context->_log->dump_recent();
324 }
325 }
326
327 if (g_eio) {
328 // if this was an EIO crash, we don't need to trigger a core dump,
329 // since the problem is hardware, or some layer beneath us.
330 _exit(EIO);
331 } else {
332 reraise_fatal(signum);
333 }
334 }
335
336 void install_standard_sighandlers(void)
337 {
338 install_sighandler(SIGSEGV, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
339 install_sighandler(SIGABRT, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
340 install_sighandler(SIGBUS, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
341 install_sighandler(SIGILL, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
342 install_sighandler(SIGFPE, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
343 install_sighandler(SIGXCPU, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
344 install_sighandler(SIGXFSZ, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
345 install_sighandler(SIGSYS, handle_fatal_signal, SA_RESETHAND | SA_NODEFER);
346 }
347
348
349
350 /// --- safe handler ---
351
352 #include "common/Thread.h"
353 #include <errno.h>
354
355 #ifdef __APPLE__
356 #include <libproc.h>
357
358 string get_name_by_pid(pid_t pid)
359 {
360 char buf[PROC_PIDPATHINFO_MAXSIZE];
361 int ret = proc_pidpath(pid, buf, sizeof(buf));
362 if (ret == 0) {
363 derr << "Fail to proc_pidpath(" << pid << ")"
364 << " error = " << cpp_strerror(ret)
365 << dendl;
366 return "<unknown>";
367 }
368 return string(buf, ret);
369 }
370 #else
371 string get_name_by_pid(pid_t pid)
372 {
373 // If the PID is 0, its means the sender is the Kernel itself
374 if (pid == 0) {
375 return "Kernel";
376 }
377 char proc_pid_path[PATH_MAX] = {0};
378 snprintf(proc_pid_path, PATH_MAX, PROCPREFIX "/proc/%d/cmdline", pid);
379 int fd = open(proc_pid_path, O_RDONLY);
380
381 if (fd < 0) {
382 fd = -errno;
383 derr << "Fail to open '" << proc_pid_path
384 << "' error = " << cpp_strerror(fd)
385 << dendl;
386 return "<unknown>";
387 }
388 // assuming the cmdline length does not exceed PATH_MAX. if it
389 // really does, it's fine to return a truncated version.
390 char buf[PATH_MAX] = {0};
391 int ret = read(fd, buf, sizeof(buf));
392 close(fd);
393 if (ret < 0) {
394 ret = -errno;
395 derr << "Fail to read '" << proc_pid_path
396 << "' error = " << cpp_strerror(ret)
397 << dendl;
398 return "<unknown>";
399 }
400 std::replace(buf, buf + ret, '\0', ' ');
401 return string(buf, ret);
402 }
403 #endif
404
405 /**
406 * safe async signal handler / dispatcher
407 *
408 * This is an async unix signal handler based on the design from
409 *
410 * http://evbergen.home.xs4all.nl/unix-signals.html
411 *
412 * Features:
413 * - no unsafe work is done in the signal handler itself
414 * - callbacks are called from a regular thread
415 * - signals are not lost, unless multiple instances of the same signal
416 * are sent twice in quick succession.
417 */
418 struct SignalHandler : public Thread {
419 /// to kick the thread, for shutdown, new handlers, etc.
420 int pipefd[2]; // write to [1], read from [0]
421
422 /// to signal shutdown
423 bool stop = false;
424
425 /// for an individual signal
426 struct safe_handler {
427
428 safe_handler() {
429 memset(pipefd, 0, sizeof(pipefd));
430 memset(&handler, 0, sizeof(handler));
431 memset(&info_t, 0, sizeof(info_t));
432 }
433
434 siginfo_t info_t;
435 int pipefd[2]; // write to [1], read from [0]
436 signal_handler_t handler;
437 };
438
439 /// all handlers
440 safe_handler *handlers[32] = {nullptr};
441
442 /// to protect the handlers array
443 ceph::mutex lock = ceph::make_mutex("SignalHandler::lock");
444
445 SignalHandler() {
446 // create signal pipe
447 int r = pipe_cloexec(pipefd, 0);
448 ceph_assert(r == 0);
449 r = fcntl(pipefd[0], F_SETFL, O_NONBLOCK);
450 ceph_assert(r == 0);
451
452 // create thread
453 create("signal_handler");
454 }
455
456 ~SignalHandler() override {
457 shutdown();
458 }
459
460 void signal_thread() {
461 int r = write(pipefd[1], "\0", 1);
462 ceph_assert(r == 1);
463 }
464
465 void shutdown() {
466 stop = true;
467 signal_thread();
468 join();
469 }
470
471 // thread entry point
472 void *entry() override {
473 while (!stop) {
474 // build fd list
475 struct pollfd fds[33];
476
477 lock.lock();
478 int num_fds = 0;
479 fds[num_fds].fd = pipefd[0];
480 fds[num_fds].events = POLLIN | POLLERR;
481 fds[num_fds].revents = 0;
482 ++num_fds;
483 for (unsigned i=0; i<32; i++) {
484 if (handlers[i]) {
485 fds[num_fds].fd = handlers[i]->pipefd[0];
486 fds[num_fds].events = POLLIN | POLLERR;
487 fds[num_fds].revents = 0;
488 ++num_fds;
489 }
490 }
491 lock.unlock();
492
493 // wait for data on any of those pipes
494 int r = poll(fds, num_fds, -1);
495 if (stop)
496 break;
497 if (r > 0) {
498 char v;
499
500 // consume byte from signal socket, if any.
501 TEMP_FAILURE_RETRY(read(pipefd[0], &v, 1));
502
503 lock.lock();
504 for (unsigned signum=0; signum<32; signum++) {
505 if (handlers[signum]) {
506 r = read(handlers[signum]->pipefd[0], &v, 1);
507 if (r == 1) {
508 siginfo_t * siginfo = &handlers[signum]->info_t;
509 ostringstream message;
510 message << "received signal: " << sig_str(signum);
511 switch (siginfo->si_code) {
512 case SI_USER:
513 message << " from " << get_name_by_pid(siginfo->si_pid);
514 // If PID is undefined, it doesn't have a meaning to be displayed
515 if (siginfo->si_pid) {
516 message << " (PID: " << siginfo->si_pid << ")";
517 } else {
518 message << " ( Could be generated by pthread_kill(), raise(), abort(), alarm() )";
519 }
520 message << " UID: " << siginfo->si_uid;
521 break;
522 default:
523 /* As we have a not expected signal, let's report the structure to help debugging */
524 message << ", si_code : " << siginfo->si_code;
525 message << ", si_value (int): " << siginfo->si_value.sival_int;
526 message << ", si_value (ptr): " << siginfo->si_value.sival_ptr;
527 message << ", si_errno: " << siginfo->si_errno;
528 message << ", si_pid : " << siginfo->si_pid;
529 message << ", si_uid : " << siginfo->si_uid;
530 message << ", si_addr" << siginfo->si_addr;
531 message << ", si_status" << siginfo->si_status;
532 break;
533 }
534 derr << message.str() << dendl;
535 handlers[signum]->handler(signum);
536 }
537 }
538 }
539 lock.unlock();
540 }
541 }
542 return NULL;
543 }
544
545 void queue_signal(int signum) {
546 // If this signal handler is registered, the callback must be
547 // defined. We can do this without the lock because we will never
548 // have the signal handler defined without the handlers entry also
549 // being filled in.
550 ceph_assert(handlers[signum]);
551 int r = write(handlers[signum]->pipefd[1], " ", 1);
552 ceph_assert(r == 1);
553 }
554
555 void queue_signal_info(int signum, siginfo_t *siginfo, void * content) {
556 // If this signal handler is registered, the callback must be
557 // defined. We can do this without the lock because we will never
558 // have the signal handler defined without the handlers entry also
559 // being filled in.
560 ceph_assert(handlers[signum]);
561 memcpy(&handlers[signum]->info_t, siginfo, sizeof(siginfo_t));
562 int r = write(handlers[signum]->pipefd[1], " ", 1);
563 ceph_assert(r == 1);
564 }
565
566 void register_handler(int signum, signal_handler_t handler, bool oneshot);
567 void unregister_handler(int signum, signal_handler_t handler);
568 };
569
570 static SignalHandler *g_signal_handler = NULL;
571
572 static void handler_signal_hook(int signum, siginfo_t * siginfo, void * content) {
573 g_signal_handler->queue_signal_info(signum, siginfo, content);
574 }
575
576 void SignalHandler::register_handler(int signum, signal_handler_t handler, bool oneshot)
577 {
578 int r;
579
580 ceph_assert(signum >= 0 && signum < 32);
581
582 safe_handler *h = new safe_handler;
583
584 r = pipe_cloexec(h->pipefd, 0);
585 ceph_assert(r == 0);
586 r = fcntl(h->pipefd[0], F_SETFL, O_NONBLOCK);
587 ceph_assert(r == 0);
588
589 h->handler = handler;
590 lock.lock();
591 handlers[signum] = h;
592 lock.unlock();
593
594 // signal thread so that it sees our new handler
595 signal_thread();
596
597 // install our handler
598 struct sigaction oldact;
599 struct sigaction act;
600 memset(&act, 0, sizeof(act));
601
602 act.sa_handler = (signal_handler_t)handler_signal_hook;
603 sigfillset(&act.sa_mask); // mask all signals in the handler
604 act.sa_flags = SA_SIGINFO | (oneshot ? SA_RESETHAND : 0);
605 int ret = sigaction(signum, &act, &oldact);
606 ceph_assert(ret == 0);
607 }
608
609 void SignalHandler::unregister_handler(int signum, signal_handler_t handler)
610 {
611 ceph_assert(signum >= 0 && signum < 32);
612 safe_handler *h = handlers[signum];
613 ceph_assert(h);
614 ceph_assert(h->handler == handler);
615
616 // restore to default
617 signal(signum, SIG_DFL);
618
619 // _then_ remove our handlers entry
620 lock.lock();
621 handlers[signum] = NULL;
622 lock.unlock();
623
624 // this will wake up select() so that worker thread sees our handler is gone
625 close(h->pipefd[0]);
626 close(h->pipefd[1]);
627 delete h;
628 }
629
630
631 // -------
632
633 void init_async_signal_handler()
634 {
635 ceph_assert(!g_signal_handler);
636 g_signal_handler = new SignalHandler;
637 }
638
639 void shutdown_async_signal_handler()
640 {
641 ceph_assert(g_signal_handler);
642 delete g_signal_handler;
643 g_signal_handler = NULL;
644 }
645
646 void queue_async_signal(int signum)
647 {
648 ceph_assert(g_signal_handler);
649 g_signal_handler->queue_signal(signum);
650 }
651
652 void register_async_signal_handler(int signum, signal_handler_t handler)
653 {
654 ceph_assert(g_signal_handler);
655 g_signal_handler->register_handler(signum, handler, false);
656 }
657
658 void register_async_signal_handler_oneshot(int signum, signal_handler_t handler)
659 {
660 ceph_assert(g_signal_handler);
661 g_signal_handler->register_handler(signum, handler, true);
662 }
663
664 void unregister_async_signal_handler(int signum, signal_handler_t handler)
665 {
666 ceph_assert(g_signal_handler);
667 g_signal_handler->unregister_handler(signum, handler);
668 }
669
670
671