]>
git.proxmox.com Git - ceph.git/blob - ceph/src/global/signal_handler.cc
0447f96e1b18138ddc0122a8b9bae9310c88b141
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2011 New Dream Network
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include <sys/utsname.h>
17 #include "include/compat.h"
20 #include "common/ceph_mutex.h"
21 #include "common/BackTrace.h"
22 #include "common/debug.h"
23 #include "common/safe_io.h"
24 #include "common/version.h"
26 #include "include/uuid.h"
27 #include "global/pidfile.h"
28 #include "global/signal_handler.h"
35 #include <sys/types.h>
36 #include "common/errno.h"
38 extern char *sys_siglist
[];
41 #define dout_context g_ceph_context
43 using std::ostringstream
;
46 using ceph::BackTrace
;
47 using ceph::JSONFormatter
;
49 void install_sighandler(int signum
, signal_handler_t handler
, int flags
)
52 struct sigaction oldact
;
54 memset(&act
, 0, sizeof(act
));
56 act
.sa_handler
= handler
;
57 sigemptyset(&act
.sa_mask
);
60 ret
= sigaction(signum
, &act
, &oldact
);
64 char message
[SIG2STR_MAX
];
65 sig2str(signum
,message
);
66 snprintf(buf
, sizeof(buf
), "install_sighandler: sigaction returned "
67 "%d when trying to install a signal handler for %s\n",
70 snprintf(buf
, sizeof(buf
), "install_sighandler: sigaction returned "
71 "%d when trying to install a signal handler for %s\n",
72 ret
, sig_str(signum
));
79 void sighup_handler(int signum
)
81 g_ceph_context
->reopen_logs();
84 static void reraise_fatal(int signum
)
86 // Use default handler to dump core
87 int ret
= raise(signum
);
89 // Normally, we won't get here. If we do, something is very weird.
92 snprintf(buf
, sizeof(buf
), "reraise_fatal: failed to re-raise "
93 "signal %d\n", signum
);
97 snprintf(buf
, sizeof(buf
), "reraise_fatal: default handler for "
98 "signal %d didn't terminate the process?\n", signum
);
105 // /etc/os-release looks like
108 // VERSION="28 (Server Edition)"
115 // VERSION="16.04.3 LTS (Xenial Xerus)"
119 // get_from_os_release("FOO=bar\nTHIS=\"that\"\n", "FOO=", ...) will
120 // write "bar\0" to out buffer, which is assumed to be as large as the input
122 static int parse_from_os_release(
123 const char *file
, const char *key
,
126 const char *p
= strstr(file
, key
);
130 const char *start
= p
+ strlen(key
);
131 const char *end
= strchr(start
, '\n');
135 if (*start
== '"' && *(end
- 1) == '"') {
142 memcpy(out
, start
, end
- start
);
143 out
[end
- start
] = 0;
147 static void handle_fatal_signal(int signum
)
149 // This code may itself trigger a SIGSEGV if the heap is corrupt. In that
150 // case, SA_RESETHAND specifies that the default signal handler--
151 // presumably dump core-- will handle it.
153 char pthread_name
[16] = {0}; //limited by 16B include terminating null byte.
154 int r
= ceph_pthread_getname(pthread_self(), pthread_name
, sizeof(pthread_name
));
157 char message
[SIG2STR_MAX
];
158 sig2str(signum
,message
);
159 snprintf(buf
, sizeof(buf
), "*** Caught signal (%s) **\n "
160 "in thread %llx thread_name:%s\n", message
, (unsigned long long)pthread_self(),
163 snprintf(buf
, sizeof(buf
), "*** Caught signal (%s) **\n "
164 "in thread %llx thread_name:%s\n", sig_str(signum
), (unsigned long long)pthread_self(),
170 // TODO: don't use an ostringstream here. It could call malloc(), which we
171 // don't want inside a signal handler.
172 // Also fix the backtrace code not to allocate memory.
176 dout_emergency(oss
.str());
178 char base
[PATH_MAX
] = { 0 };
179 if (g_ceph_context
&&
180 g_ceph_context
->_conf
->crash_dir
.size()) {
184 utime_t now
= ceph_clock_now();
187 uuid
.generate_random();
189 string id
= idss
.str();
190 std::replace(id
.begin(), id
.end(), ' ', '_');
192 snprintf(base
, sizeof(base
), "%s/%s",
193 g_ceph_context
->_conf
->crash_dir
.c_str(),
195 int r
= ::mkdir(base
, 0700);
198 snprintf(fn
, sizeof(fn
)-1, "%s/meta", base
);
199 int fd
= ::open(fn
, O_CREAT
|O_WRONLY
|O_CLOEXEC
, 0600);
201 JSONFormatter
jf(true);
202 jf
.open_object_section("crash");
203 jf
.dump_string("crash_id", id
);
204 now
.gmtime(jf
.dump_stream("timestamp"));
205 jf
.dump_string("process_name", g_process_name
);
206 jf
.dump_string("entity_name", g_ceph_context
->_conf
->name
.to_str());
207 jf
.dump_string("ceph_version", ceph_version_to_str());
212 jf
.dump_string("utsname_hostname", u
.nodename
);
213 jf
.dump_string("utsname_sysname", u
.sysname
);
214 jf
.dump_string("utsname_release", u
.release
);
215 jf
.dump_string("utsname_version", u
.version
);
216 jf
.dump_string("utsname_machine", u
.machine
);
218 #if defined(__linux__)
220 int in
= ::open("/etc/os-release", O_RDONLY
|O_CLOEXEC
);
223 r
= safe_read(in
, buf
, sizeof(buf
)-1);
227 if (parse_from_os_release(buf
, "NAME=", v
) >= 0) {
228 jf
.dump_string("os_name", v
);
230 if (parse_from_os_release(buf
, "ID=", v
) >= 0) {
231 jf
.dump_string("os_id", v
);
233 if (parse_from_os_release(buf
, "VERSION_ID=", v
) >= 0) {
234 jf
.dump_string("os_version_id", v
);
236 if (parse_from_os_release(buf
, "VERSION=", v
) >= 0) {
237 jf
.dump_string("os_version", v
);
245 if (g_assert_condition
) {
246 jf
.dump_string("assert_condition", g_assert_condition
);
249 jf
.dump_string("assert_func", g_assert_func
);
252 jf
.dump_string("assert_file", g_assert_file
);
255 jf
.dump_unsigned("assert_line", g_assert_line
);
257 if (g_assert_thread_name
[0]) {
258 jf
.dump_string("assert_thread_name", g_assert_thread_name
);
260 if (g_assert_msg
[0]) {
261 jf
.dump_string("assert_msg", g_assert_msg
);
266 jf
.dump_bool("io_error", true);
267 if (g_eio_devname
[0]) {
268 jf
.dump_string("io_error_devname", g_eio_devname
);
271 jf
.dump_string("io_error_path", g_eio_path
);
274 jf
.dump_int("io_error_code", g_eio_error
);
277 jf
.dump_int("io_error_optype", g_eio_iotype
);
280 jf
.dump_unsigned("io_error_offset", g_eio_offset
);
283 jf
.dump_unsigned("io_error_length", g_eio_length
);
293 string s
= oss
.str();
294 r
= safe_write(fd
, s
.c_str(), s
.size());
298 snprintf(fn
, sizeof(fn
)-1, "%s/done", base
);
303 // avoid recursion back into logging code if that is where
305 if (g_ceph_context
&&
306 g_ceph_context
->_log
&&
307 !g_ceph_context
->_log
->is_inside_log_lock()) {
308 // dump to log. this uses the heap extensively, but we're better
309 // off trying than not.
310 derr
<< buf
<< std::endl
;
312 *_dout
<< " NOTE: a copy of the executable, or `objdump -rdS <executable>` "
313 << "is needed to interpret this.\n"
316 g_ceph_context
->_log
->dump_recent();
320 snprintf(fn
, sizeof(fn
)-1, "%s/log", base
);
321 g_ceph_context
->_log
->set_log_file(fn
);
322 g_ceph_context
->_log
->reopen_log_file();
323 g_ceph_context
->_log
->dump_recent();
328 // if this was an EIO crash, we don't need to trigger a core dump,
329 // since the problem is hardware, or some layer beneath us.
332 reraise_fatal(signum
);
336 void install_standard_sighandlers(void)
338 install_sighandler(SIGSEGV
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
339 install_sighandler(SIGABRT
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
340 install_sighandler(SIGBUS
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
341 install_sighandler(SIGILL
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
342 install_sighandler(SIGFPE
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
343 install_sighandler(SIGXCPU
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
344 install_sighandler(SIGXFSZ
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
345 install_sighandler(SIGSYS
, handle_fatal_signal
, SA_RESETHAND
| SA_NODEFER
);
350 /// --- safe handler ---
352 #include "common/Thread.h"
358 string
get_name_by_pid(pid_t pid
)
360 char buf
[PROC_PIDPATHINFO_MAXSIZE
];
361 int ret
= proc_pidpath(pid
, buf
, sizeof(buf
));
363 derr
<< "Fail to proc_pidpath(" << pid
<< ")"
364 << " error = " << cpp_strerror(ret
)
368 return string(buf
, ret
);
371 string
get_name_by_pid(pid_t pid
)
373 // If the PID is 0, its means the sender is the Kernel itself
377 char proc_pid_path
[PATH_MAX
] = {0};
378 snprintf(proc_pid_path
, PATH_MAX
, PROCPREFIX
"/proc/%d/cmdline", pid
);
379 int fd
= open(proc_pid_path
, O_RDONLY
);
383 derr
<< "Fail to open '" << proc_pid_path
384 << "' error = " << cpp_strerror(fd
)
388 // assuming the cmdline length does not exceed PATH_MAX. if it
389 // really does, it's fine to return a truncated version.
390 char buf
[PATH_MAX
] = {0};
391 int ret
= read(fd
, buf
, sizeof(buf
));
395 derr
<< "Fail to read '" << proc_pid_path
396 << "' error = " << cpp_strerror(ret
)
400 std::replace(buf
, buf
+ ret
, '\0', ' ');
401 return string(buf
, ret
);
406 * safe async signal handler / dispatcher
408 * This is an async unix signal handler based on the design from
410 * http://evbergen.home.xs4all.nl/unix-signals.html
413 * - no unsafe work is done in the signal handler itself
414 * - callbacks are called from a regular thread
415 * - signals are not lost, unless multiple instances of the same signal
416 * are sent twice in quick succession.
418 struct SignalHandler
: public Thread
{
419 /// to kick the thread, for shutdown, new handlers, etc.
420 int pipefd
[2]; // write to [1], read from [0]
422 /// to signal shutdown
425 /// for an individual signal
426 struct safe_handler
{
429 memset(pipefd
, 0, sizeof(pipefd
));
430 memset(&handler
, 0, sizeof(handler
));
431 memset(&info_t
, 0, sizeof(info_t
));
435 int pipefd
[2]; // write to [1], read from [0]
436 signal_handler_t handler
;
440 safe_handler
*handlers
[32] = {nullptr};
442 /// to protect the handlers array
443 ceph::mutex lock
= ceph::make_mutex("SignalHandler::lock");
446 // create signal pipe
447 int r
= pipe_cloexec(pipefd
, 0);
449 r
= fcntl(pipefd
[0], F_SETFL
, O_NONBLOCK
);
453 create("signal_handler");
456 ~SignalHandler() override
{
460 void signal_thread() {
461 int r
= write(pipefd
[1], "\0", 1);
471 // thread entry point
472 void *entry() override
{
475 struct pollfd fds
[33];
479 fds
[num_fds
].fd
= pipefd
[0];
480 fds
[num_fds
].events
= POLLIN
| POLLERR
;
481 fds
[num_fds
].revents
= 0;
483 for (unsigned i
=0; i
<32; i
++) {
485 fds
[num_fds
].fd
= handlers
[i
]->pipefd
[0];
486 fds
[num_fds
].events
= POLLIN
| POLLERR
;
487 fds
[num_fds
].revents
= 0;
493 // wait for data on any of those pipes
494 int r
= poll(fds
, num_fds
, -1);
500 // consume byte from signal socket, if any.
501 TEMP_FAILURE_RETRY(read(pipefd
[0], &v
, 1));
504 for (unsigned signum
=0; signum
<32; signum
++) {
505 if (handlers
[signum
]) {
506 r
= read(handlers
[signum
]->pipefd
[0], &v
, 1);
508 siginfo_t
* siginfo
= &handlers
[signum
]->info_t
;
509 ostringstream message
;
510 message
<< "received signal: " << sig_str(signum
);
511 switch (siginfo
->si_code
) {
513 message
<< " from " << get_name_by_pid(siginfo
->si_pid
);
514 // If PID is undefined, it doesn't have a meaning to be displayed
515 if (siginfo
->si_pid
) {
516 message
<< " (PID: " << siginfo
->si_pid
<< ")";
518 message
<< " ( Could be generated by pthread_kill(), raise(), abort(), alarm() )";
520 message
<< " UID: " << siginfo
->si_uid
;
523 /* As we have a not expected signal, let's report the structure to help debugging */
524 message
<< ", si_code : " << siginfo
->si_code
;
525 message
<< ", si_value (int): " << siginfo
->si_value
.sival_int
;
526 message
<< ", si_value (ptr): " << siginfo
->si_value
.sival_ptr
;
527 message
<< ", si_errno: " << siginfo
->si_errno
;
528 message
<< ", si_pid : " << siginfo
->si_pid
;
529 message
<< ", si_uid : " << siginfo
->si_uid
;
530 message
<< ", si_addr" << siginfo
->si_addr
;
531 message
<< ", si_status" << siginfo
->si_status
;
534 derr
<< message
.str() << dendl
;
535 handlers
[signum
]->handler(signum
);
545 void queue_signal(int signum
) {
546 // If this signal handler is registered, the callback must be
547 // defined. We can do this without the lock because we will never
548 // have the signal handler defined without the handlers entry also
550 ceph_assert(handlers
[signum
]);
551 int r
= write(handlers
[signum
]->pipefd
[1], " ", 1);
555 void queue_signal_info(int signum
, siginfo_t
*siginfo
, void * content
) {
556 // If this signal handler is registered, the callback must be
557 // defined. We can do this without the lock because we will never
558 // have the signal handler defined without the handlers entry also
560 ceph_assert(handlers
[signum
]);
561 memcpy(&handlers
[signum
]->info_t
, siginfo
, sizeof(siginfo_t
));
562 int r
= write(handlers
[signum
]->pipefd
[1], " ", 1);
566 void register_handler(int signum
, signal_handler_t handler
, bool oneshot
);
567 void unregister_handler(int signum
, signal_handler_t handler
);
570 static SignalHandler
*g_signal_handler
= NULL
;
572 static void handler_signal_hook(int signum
, siginfo_t
* siginfo
, void * content
) {
573 g_signal_handler
->queue_signal_info(signum
, siginfo
, content
);
576 void SignalHandler::register_handler(int signum
, signal_handler_t handler
, bool oneshot
)
580 ceph_assert(signum
>= 0 && signum
< 32);
582 safe_handler
*h
= new safe_handler
;
584 r
= pipe_cloexec(h
->pipefd
, 0);
586 r
= fcntl(h
->pipefd
[0], F_SETFL
, O_NONBLOCK
);
589 h
->handler
= handler
;
591 handlers
[signum
] = h
;
594 // signal thread so that it sees our new handler
597 // install our handler
598 struct sigaction oldact
;
599 struct sigaction act
;
600 memset(&act
, 0, sizeof(act
));
602 act
.sa_handler
= (signal_handler_t
)handler_signal_hook
;
603 sigfillset(&act
.sa_mask
); // mask all signals in the handler
604 act
.sa_flags
= SA_SIGINFO
| (oneshot
? SA_RESETHAND
: 0);
605 int ret
= sigaction(signum
, &act
, &oldact
);
606 ceph_assert(ret
== 0);
609 void SignalHandler::unregister_handler(int signum
, signal_handler_t handler
)
611 ceph_assert(signum
>= 0 && signum
< 32);
612 safe_handler
*h
= handlers
[signum
];
614 ceph_assert(h
->handler
== handler
);
616 // restore to default
617 signal(signum
, SIG_DFL
);
619 // _then_ remove our handlers entry
621 handlers
[signum
] = NULL
;
624 // this will wake up select() so that worker thread sees our handler is gone
633 void init_async_signal_handler()
635 ceph_assert(!g_signal_handler
);
636 g_signal_handler
= new SignalHandler
;
639 void shutdown_async_signal_handler()
641 ceph_assert(g_signal_handler
);
642 delete g_signal_handler
;
643 g_signal_handler
= NULL
;
646 void queue_async_signal(int signum
)
648 ceph_assert(g_signal_handler
);
649 g_signal_handler
->queue_signal(signum
);
652 void register_async_signal_handler(int signum
, signal_handler_t handler
)
654 ceph_assert(g_signal_handler
);
655 g_signal_handler
->register_handler(signum
, handler
, false);
658 void register_async_signal_handler_oneshot(int signum
, signal_handler_t handler
)
660 ceph_assert(g_signal_handler
);
661 g_signal_handler
->register_handler(signum
, handler
, true);
664 void unregister_async_signal_handler(int signum
, signal_handler_t handler
)
666 ceph_assert(g_signal_handler
);
667 g_signal_handler
->unregister_handler(signum
, handler
);