]>
git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
34 #include <sys/epoll.h>
35 #include <sys/socket.h>
36 #include <sys/types.h>
44 #define DEFAULT_KILL_TIMEOUT 60
46 static int verbose
= 0;
47 static int kill_timeout
= DEFAULT_KILL_TIMEOUT
;
48 static int epoll_fd
= 0;
49 static const char *progname
;
50 GHashTable
*vm_clients
; // key=vmid (freed on remove), value=*Client (free manually)
51 GSList
*forced_cleanups
;
52 static int needs_cleanup
= 0;
61 fprintf(stderr
, "Usage: %s [-f] [-v] PATH\n", progname
);
62 fprintf(stderr
, " -f run in foreground (default: false)\n");
63 fprintf(stderr
, " -v verbose (default: false)\n");
64 fprintf(stderr
, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT
);
65 fprintf(stderr
, " PATH use PATH for socket\n");
69 get_pid_from_fd(int fd
)
71 struct ucred credentials
= { .pid
= 0, .uid
= 0, .gid
= 0 };
72 socklen_t len
= sizeof(struct ucred
);
73 log_neg(getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &credentials
, &len
), "getsockopt");
74 return credentials
.pid
;
78 * reads the vmid from /proc/<pid>/cmdline
79 * after the '-id' argument
82 get_vmid_from_pid(pid_t pid
)
84 char filename
[32] = { 0 };
85 int len
= snprintf(filename
, sizeof(filename
), "/proc/%d/cmdline", pid
);
87 fprintf(stderr
, "error during snprintf for %d: %s\n", pid
,
91 if ((size_t)len
>= sizeof(filename
)) {
92 fprintf(stderr
, "error: pid %d too long\n", pid
);
95 FILE *fp
= fopen(filename
, "re");
97 fprintf(stderr
, "error opening %s: %s\n", filename
, strerror(errno
));
101 unsigned long vmid
= 0;
105 while ((rc
= getdelim(&buf
, &buflen
, '\0', fp
)) >= 0) {
106 if (!strcmp(buf
, "-id")) {
115 if (getdelim(&buf
, &buflen
, '\0', fp
) >= 0) {
116 if (buf
[0] == '-' || buf
[0] == '\0') {
117 fprintf(stderr
, "invalid vmid %s\n", buf
);
123 vmid
= strtoul(buf
, &endptr
, 10);
127 } else if (*endptr
!= '\0') {
128 fprintf(stderr
, "invalid vmid %s\n", buf
);
136 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
145 must_write(int fd
, const char *buf
, size_t len
)
149 wlen
= write(fd
, buf
, len
);
150 } while (wlen
< 0 && errno
== EINTR
);
152 return (wlen
== (ssize_t
)len
);
156 * qmp handling functions
160 send_qmp_cmd(struct Client
*client
, const char *buf
, size_t len
)
162 if (!must_write(client
->fd
, buf
, len
- 1)) {
163 fprintf(stderr
, "%s: cannot send QMP message\n", client
->qemu
.vmid
);
164 cleanup_client(client
);
169 handle_qmp_handshake(struct Client
*client
)
171 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client
->pid
);
173 // extract vmid from cmdline, now that we know it's a QEMU process
174 unsigned long vmid
= get_vmid_from_pid(client
->pid
);
175 int res
= snprintf(client
->qemu
.vmid
, sizeof(client
->qemu
.vmid
), "%lu", vmid
);
176 if (vmid
== 0 || res
< 0 || res
>= (int)sizeof(client
->qemu
.vmid
)) {
177 fprintf(stderr
, "could not get vmid from pid %d\n", client
->pid
);
178 cleanup_client(client
);
182 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client
->pid
, client
->qemu
.vmid
);
183 client
->type
= CLIENT_QEMU
;
184 if(!g_hash_table_insert(vm_clients
, strdup(client
->qemu
.vmid
), client
)) {
185 // not fatal, just means backup handling won't work
186 fprintf(stderr
, "%s: could not insert client into VMID->client table\n",
190 static const char qmp_answer
[] = "{\"execute\":\"qmp_capabilities\"}\n";
191 send_qmp_cmd(client
, qmp_answer
, sizeof(qmp_answer
));
195 handle_qmp_event(struct Client
*client
, struct json_object
*obj
)
197 struct json_object
*event
;
198 if (!json_object_object_get_ex(obj
, "event", &event
)) {
201 VERBOSE_PRINT("%s: got QMP event: %s\n", client
->qemu
.vmid
, json_object_get_string(event
));
203 if (client
->state
== STATE_TERMINATING
) {
204 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
205 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client
->qemu
.vmid
);
209 // event, check if shutdown and get guest parameter
210 if (!strcmp(json_object_get_string(event
), "SHUTDOWN")) {
211 client
->qemu
.graceful
= 1;
212 struct json_object
*data
;
213 struct json_object
*guest
;
214 if (json_object_object_get_ex(obj
, "data", &data
) &&
215 json_object_object_get_ex(data
, "guest", &guest
))
217 client
->qemu
.guest
= (unsigned short)json_object_get_boolean(guest
);
220 // check if a backup is running and kill QEMU process if not
221 terminate_check(client
);
226 terminate_check(struct Client
*client
)
228 if (client
->state
!= STATE_IDLE
) {
229 // if we're already in a request, queue this one until after
230 VERBOSE_PRINT("%s: terminate_check queued\n", client
->qemu
.vmid
);
231 client
->qemu
.term_check_queued
= true;
235 client
->qemu
.term_check_queued
= false;
237 VERBOSE_PRINT("%s: query-status\n", client
->qemu
.vmid
);
238 client
->state
= STATE_EXPECT_STATUS_RESP
;
239 static const char qmp_req
[] = "{\"execute\":\"query-status\"}\n";
240 send_qmp_cmd(client
, qmp_req
, sizeof(qmp_req
));
244 handle_qmp_return(struct Client
*client
, struct json_object
*data
, bool error
)
247 const char *msg
= "n/a";
248 struct json_object
*desc
;
249 if (json_object_object_get_ex(data
, "desc", &desc
)) {
250 msg
= json_object_get_string(desc
);
252 fprintf(stderr
, "%s: received error from QMP: %s\n",
253 client
->qemu
.vmid
, msg
);
254 client
->state
= STATE_IDLE
;
258 struct json_object
*status
;
259 json_bool has_status
= data
&&
260 json_object_object_get_ex(data
, "status", &status
);
264 const char *status_str
= json_object_get_string(status
);
265 active
= status_str
&&
266 (!strcmp(status_str
, "running") || !strcmp(status_str
, "paused"));
269 switch (client
->state
) {
270 case STATE_EXPECT_STATUS_RESP
:
271 client
->state
= STATE_IDLE
;
273 VERBOSE_PRINT("%s: got status: VM is active\n", client
->qemu
.vmid
);
274 } else if (!client
->qemu
.backup
) {
275 terminate_client(client
);
277 // if we're in a backup, don't do anything, vzdump will notify
278 // us when the backup finishes
279 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
284 // this means we received the empty return from our handshake answer
285 case STATE_HANDSHAKE
:
286 client
->state
= STATE_IDLE
;
287 VERBOSE_PRINT("%s: QMP handshake complete\n", client
->qemu
.vmid
);
291 case STATE_TERMINATING
:
292 VERBOSE_PRINT("%s: spurious return value received\n",
298 if (client
->qemu
.term_check_queued
) {
299 terminate_check(client
);
304 * VZDump specific client functions
308 handle_vzdump_handshake(struct Client
*client
, struct json_object
*data
)
310 client
->state
= STATE_IDLE
;
312 struct json_object
*vmid_obj
;
313 json_bool has_vmid
= data
&& json_object_object_get_ex(data
, "vmid", &vmid_obj
);
316 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client
->pid
);
320 const char *vmid_str
= json_object_get_string(vmid_obj
);
323 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client
->pid
);
327 int res
= snprintf(client
->vzdump
.vmid
, sizeof(client
->vzdump
.vmid
), "%s", vmid_str
);
328 if (res
< 0 || res
>= (int)sizeof(client
->vzdump
.vmid
)) {
329 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client
->pid
);
333 struct Client
*vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
335 vmc
->qemu
.backup
= true;
337 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
338 // might try to access an invalid value
339 client
->type
= CLIENT_VZDUMP
;
340 VERBOSE_PRINT("%s: vzdump backup started\n", client
->vzdump
.vmid
);
342 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client
->vzdump
.vmid
);
347 * client management functions
351 add_new_client(int client_fd
)
353 struct Client
*client
= calloc(sizeof(struct Client
), 1);
354 if (client
== NULL
) {
355 fprintf(stderr
, "could not add new client - allocation failed!\n");
359 client
->state
= STATE_HANDSHAKE
;
360 client
->type
= CLIENT_NONE
;
361 client
->fd
= client_fd
;
362 client
->pid
= get_pid_from_fd(client_fd
);
363 if (client
->pid
== 0) {
364 fprintf(stderr
, "could not get pid from client\n");
368 struct epoll_event ev
;
370 ev
.data
.ptr
= client
;
371 int res
= epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, client_fd
, &ev
);
373 perror("epoll_ctl client add");
377 VERBOSE_PRINT("added new client, pid: %d\n", client
->pid
);
381 (void)close(client_fd
);
386 cleanup_qemu_client(struct Client
*client
)
388 unsigned short graceful
= client
->qemu
.graceful
;
389 unsigned short guest
= client
->qemu
.guest
;
390 char vmid
[sizeof(client
->qemu
.vmid
)];
391 strncpy(vmid
, client
->qemu
.vmid
, sizeof(vmid
));
392 g_hash_table_remove(vm_clients
, &vmid
); // frees key, ignore errors
393 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
394 vmid
, graceful
, guest
);
398 fprintf(stderr
, "fork failed: %s\n", strerror(errno
));
402 char *script
= "/usr/sbin/qm";
408 graceful
? "1" : "0",
413 execvp(script
, args
);
420 cleanup_client(struct Client
*client
)
422 log_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_DEL
, client
->fd
, NULL
), "epoll del");
423 (void)close(client
->fd
);
426 switch (client
->type
) {
428 cleanup_qemu_client(client
);
432 vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
434 VERBOSE_PRINT("%s: backup ended\n", client
->vzdump
.vmid
);
435 vmc
->qemu
.backup
= false;
436 terminate_check(vmc
);
441 // do nothing, only close socket
445 if (client
->pidfd
> 0) {
446 (void)close(client
->pidfd
);
448 VERBOSE_PRINT("removing %s from forced cleanups\n", client
->qemu
.vmid
);
449 forced_cleanups
= g_slist_remove(forced_cleanups
, client
);
454 terminate_client(struct Client
*client
)
456 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client
->qemu
.vmid
, client
->pid
);
458 client
->state
= STATE_TERMINATING
;
460 // open a pidfd before kill for later cleanup
461 int pidfd
= pidfd_open(client
->pid
, 0);
465 // process already dead for some reason, cleanup done
466 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
467 client
->qemu
.vmid
, client
->pid
);
470 // otherwise fall back to just using the PID directly, but don't
471 // print if we only failed because we're running on an older kernel
475 perror("failed to open QEMU pidfd for cleanup");
480 int err
= kill(client
->pid
, SIGTERM
);
481 log_neg(err
, "kill");
483 time_t timeout
= time(NULL
) + kill_timeout
;
485 client
->pidfd
= pidfd
;
486 client
->timeout
= timeout
;
488 forced_cleanups
= g_slist_prepend(forced_cleanups
, (void *)client
);
493 handle_client(struct Client
*client
)
495 VERBOSE_PRINT("pid%d: entering handle\n", client
->pid
);
498 len
= read(client
->fd
, (client
->buf
+client
->buflen
),
499 sizeof(client
->buf
) - client
->buflen
);
500 } while (len
< 0 && errno
== EINTR
);
503 if (!(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)) {
504 log_neg((int)len
, "read");
505 cleanup_client(client
);
508 } else if (len
== 0) {
509 VERBOSE_PRINT("pid%d: got EOF\n", client
->pid
);
510 cleanup_client(client
);
514 VERBOSE_PRINT("pid%d: read %ld bytes\n", client
->pid
, len
);
515 client
->buflen
+= len
;
517 struct json_tokener
*tok
= json_tokener_new();
518 struct json_object
*jobj
= NULL
;
519 enum json_tokener_error jerr
= json_tokener_success
;
520 while (jerr
== json_tokener_success
&& client
->buflen
!= 0) {
521 jobj
= json_tokener_parse_ex(tok
, client
->buf
, (int)client
->buflen
);
522 jerr
= json_tokener_get_error(tok
);
523 unsigned int offset
= (unsigned int)tok
->char_offset
;
525 case json_tokener_success
:
526 // move rest from buffer to front
527 memmove(client
->buf
, client
->buf
+ offset
, client
->buflen
- offset
);
528 client
->buflen
-= offset
;
529 if (json_object_is_type(jobj
, json_type_object
)) {
530 struct json_object
*obj
;
531 if (json_object_object_get_ex(jobj
, "QMP", &obj
)) {
532 handle_qmp_handshake(client
);
533 } else if (json_object_object_get_ex(jobj
, "event", &obj
)) {
534 handle_qmp_event(client
, jobj
);
535 } else if (json_object_object_get_ex(jobj
, "return", &obj
)) {
536 handle_qmp_return(client
, obj
, false);
537 } else if (json_object_object_get_ex(jobj
, "error", &obj
)) {
538 handle_qmp_return(client
, obj
, true);
539 } else if (json_object_object_get_ex(jobj
, "vzdump", &obj
)) {
540 handle_vzdump_handshake(client
, obj
);
541 } // else ignore message
544 case json_tokener_continue
:
545 if (client
->buflen
>= sizeof(client
->buf
)) {
546 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client
->pid
);
547 memset(client
->buf
, 0, sizeof(client
->buf
));
549 } // else we have enough space try again after next read
552 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client
->pid
, jerr
);
553 memset(client
->buf
, 0, client
->buflen
);
557 json_object_put(jobj
);
559 json_tokener_free(tok
);
563 sigkill(void *ptr
, void *time_ptr
)
565 struct Client
*data
= ptr
;
568 if (data
->timeout
!= 0 && data
->timeout
> *(time_t *)time_ptr
) {
572 if (data
->pidfd
> 0) {
573 err
= pidfd_send_signal(data
->pidfd
, SIGKILL
, NULL
, 0);
574 (void)close(data
->pidfd
);
577 err
= kill(data
->pid
, SIGKILL
);
581 if (errno
!= ESRCH
) {
582 fprintf(stderr
, "SIGKILL cleanup of pid '%d' failed - %s\n",
583 data
->pid
, strerror(errno
));
586 fprintf(stderr
, "cleanup failed, terminating pid '%d' with SIGKILL\n",
592 // remove ourselves from the list
593 forced_cleanups
= g_slist_remove(forced_cleanups
, ptr
);
597 handle_forced_cleanup()
599 if (g_slist_length(forced_cleanups
) > 0) {
600 VERBOSE_PRINT("clearing forced cleanup backlog\n");
601 time_t cur_time
= time(NULL
);
602 g_slist_foreach(forced_cleanups
, sigkill
, &cur_time
);
604 needs_cleanup
= g_slist_length(forced_cleanups
) > 0;
608 main(int argc
, char *argv
[])
612 char *socket_path
= NULL
;
615 while ((opt
= getopt(argc
, argv
, "hfvt:")) != -1) {
626 kill_timeout
= strtoul(optarg
, &endptr
, 10);
627 if (errno
!= 0 || *endptr
!= '\0' || kill_timeout
== 0) {
642 if (optind
>= argc
) {
647 signal(SIGCHLD
, SIG_IGN
);
649 socket_path
= argv
[optind
];
651 int sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
652 bail_neg(sock
, "socket");
654 struct sockaddr_un addr
;
655 memset(&addr
, 0, sizeof(addr
));
656 addr
.sun_family
= AF_UNIX
;
657 strncpy(addr
.sun_path
, socket_path
, sizeof(addr
.sun_path
) - 1);
660 bail_neg(bind(sock
, (struct sockaddr
*)&addr
, sizeof(addr
)), "bind");
662 struct epoll_event ev
, events
[1];
663 epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
664 bail_neg(epoll_fd
, "epoll_create1");
668 bail_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, sock
, &ev
), "epoll_ctl");
670 bail_neg(listen(sock
, 10), "listen");
673 bail_neg(daemon(0, 1), "daemon");
676 vm_clients
= g_hash_table_new_full(g_str_hash
, g_str_equal
, free
, NULL
);
681 nevents
= epoll_wait(epoll_fd
, events
, 1, needs_cleanup
? 10*1000 : -1);
682 bail_neg(nevents
, "epoll_wait");
684 for (int n
= 0; n
< nevents
; n
++) {
685 if (events
[n
].data
.fd
== sock
) {
687 int conn_sock
= accept4(sock
, NULL
, NULL
, SOCK_NONBLOCK
| SOCK_CLOEXEC
);
688 log_neg(conn_sock
, "accept");
689 if (conn_sock
> -1) {
690 add_new_client(conn_sock
);
693 handle_client((struct Client
*)events
[n
].data
.ptr
);
696 handle_forced_cleanup();