]>
git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
503bce6901cf4ba467ccb516f0ff3d56dabc5a4e
1 // SPDX-License-Identifier: AGPL-3.0-or-later
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
34 #include <sys/epoll.h>
35 #include <sys/socket.h>
36 #include <sys/types.h>
44 #define DEFAULT_KILL_TIMEOUT 60
46 static int verbose
= 0;
47 static int kill_timeout
= DEFAULT_KILL_TIMEOUT
;
48 static int epoll_fd
= 0;
49 static const char *progname
;
50 GHashTable
*vm_clients
; // key=vmid (freed on remove), value=*Client (free manually)
51 GSList
*forced_cleanups
;
52 static int needs_cleanup
= 0;
61 fprintf(stderr
, "Usage: %s [-f] [-v] PATH\n", progname
);
62 fprintf(stderr
, " -f run in foreground (default: false)\n");
63 fprintf(stderr
, " -v verbose (default: false)\n");
64 fprintf(stderr
, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT
);
65 fprintf(stderr
, " PATH use PATH for socket\n");
69 get_pid_from_fd(int fd
)
71 struct ucred credentials
= { .pid
= 0, .uid
= 0, .gid
= 0 };
72 socklen_t len
= sizeof(struct ucred
);
73 log_neg(getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &credentials
, &len
), "getsockopt");
74 return credentials
.pid
;
78 * parses the vmid from the qemu.slice entry of /proc/<pid>/cgroup
81 get_vmid_from_pid(pid_t pid
)
83 char filename
[32] = { 0 };
84 int len
= snprintf(filename
, sizeof(filename
), "/proc/%d/cgroup", pid
);
86 fprintf(stderr
, "error during snprintf for %d: %s\n", pid
,
90 if ((size_t)len
>= sizeof(filename
)) {
91 fprintf(stderr
, "error: pid %d too long\n", pid
);
94 FILE *fp
= fopen(filename
, "re");
96 fprintf(stderr
, "error opening %s: %s\n", filename
, strerror(errno
));
100 unsigned long vmid
= 0;
104 while (getline(&buf
, &buflen
, fp
) >= 0) {
105 char *cgroup_path
= strrchr(buf
, ':');
107 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
112 if (strncmp(cgroup_path
, "/qemu.slice/", 12)) {
116 char *vmid_start
= strrchr(buf
, '/');
118 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
123 if (vmid_start
[0] == '-' || vmid_start
[0] == '\0') {
124 fprintf(stderr
, "invalid vmid in cgroup entry %s\n", buf
);
130 vmid
= strtoul(vmid_start
, &endptr
, 10);
132 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
134 } else if (*endptr
!= '.') {
135 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
143 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
145 fprintf(stderr
, "error parsing vmid for %d: no matching qemu.slice cgroup entry\n", pid
);
155 must_write(int fd
, const char *buf
, size_t len
)
159 wlen
= write(fd
, buf
, len
);
160 } while (wlen
< 0 && errno
== EINTR
);
162 return (wlen
== (ssize_t
)len
);
166 * qmp handling functions
170 send_qmp_cmd(struct Client
*client
, const char *buf
, size_t len
)
172 if (!must_write(client
->fd
, buf
, len
- 1)) {
173 fprintf(stderr
, "%s: cannot send QMP message\n", client
->qemu
.vmid
);
174 cleanup_client(client
);
179 handle_qmp_handshake(struct Client
*client
)
181 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client
->pid
);
183 // extract vmid from cmdline, now that we know it's a QEMU process
184 unsigned long vmid
= get_vmid_from_pid(client
->pid
);
185 int res
= snprintf(client
->qemu
.vmid
, sizeof(client
->qemu
.vmid
), "%lu", vmid
);
186 if (vmid
== 0 || res
< 0 || res
>= (int)sizeof(client
->qemu
.vmid
)) {
187 fprintf(stderr
, "could not get vmid from pid %d\n", client
->pid
);
188 cleanup_client(client
);
192 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client
->pid
, client
->qemu
.vmid
);
193 client
->type
= CLIENT_QEMU
;
194 if(!g_hash_table_insert(vm_clients
, strdup(client
->qemu
.vmid
), client
)) {
195 // not fatal, just means backup handling won't work
196 fprintf(stderr
, "%s: could not insert client into VMID->client table\n",
200 static const char qmp_answer
[] = "{\"execute\":\"qmp_capabilities\"}\n";
201 send_qmp_cmd(client
, qmp_answer
, sizeof(qmp_answer
));
205 handle_qmp_event(struct Client
*client
, struct json_object
*obj
)
207 struct json_object
*event
;
208 if (!json_object_object_get_ex(obj
, "event", &event
)) {
211 VERBOSE_PRINT("%s: got QMP event: %s\n", client
->qemu
.vmid
, json_object_get_string(event
));
213 if (client
->state
== STATE_TERMINATING
) {
214 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
215 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client
->qemu
.vmid
);
219 // event, check if shutdown and get guest parameter
220 if (!strcmp(json_object_get_string(event
), "SHUTDOWN")) {
221 client
->qemu
.graceful
= 1;
222 struct json_object
*data
;
223 struct json_object
*guest
;
224 if (json_object_object_get_ex(obj
, "data", &data
) &&
225 json_object_object_get_ex(data
, "guest", &guest
))
227 client
->qemu
.guest
= (unsigned short)json_object_get_boolean(guest
);
230 // check if a backup is running and kill QEMU process if not
231 terminate_check(client
);
236 terminate_check(struct Client
*client
)
238 if (client
->state
!= STATE_IDLE
) {
239 // if we're already in a request, queue this one until after
240 VERBOSE_PRINT("%s: terminate_check queued\n", client
->qemu
.vmid
);
241 client
->qemu
.term_check_queued
= true;
245 client
->qemu
.term_check_queued
= false;
247 VERBOSE_PRINT("%s: query-status\n", client
->qemu
.vmid
);
248 client
->state
= STATE_EXPECT_STATUS_RESP
;
249 static const char qmp_req
[] = "{\"execute\":\"query-status\"}\n";
250 send_qmp_cmd(client
, qmp_req
, sizeof(qmp_req
));
254 handle_qmp_return(struct Client
*client
, struct json_object
*data
, bool error
)
257 const char *msg
= "n/a";
258 struct json_object
*desc
;
259 if (json_object_object_get_ex(data
, "desc", &desc
)) {
260 msg
= json_object_get_string(desc
);
262 fprintf(stderr
, "%s: received error from QMP: %s\n",
263 client
->qemu
.vmid
, msg
);
264 client
->state
= STATE_IDLE
;
268 struct json_object
*status
;
269 json_bool has_status
= data
&&
270 json_object_object_get_ex(data
, "status", &status
);
274 const char *status_str
= json_object_get_string(status
);
275 active
= status_str
&&
276 (!strcmp(status_str
, "running") || !strcmp(status_str
, "paused"));
279 switch (client
->state
) {
280 case STATE_EXPECT_STATUS_RESP
:
281 client
->state
= STATE_IDLE
;
283 VERBOSE_PRINT("%s: got status: VM is active\n", client
->qemu
.vmid
);
284 } else if (!client
->qemu
.backup
) {
285 terminate_client(client
);
287 // if we're in a backup, don't do anything, vzdump will notify
288 // us when the backup finishes
289 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
294 // this means we received the empty return from our handshake answer
295 case STATE_HANDSHAKE
:
296 client
->state
= STATE_IDLE
;
297 VERBOSE_PRINT("%s: QMP handshake complete\n", client
->qemu
.vmid
);
300 // we expect an empty return object after sending quit
301 case STATE_TERMINATING
:
304 VERBOSE_PRINT("%s: spurious return value received\n",
310 if (client
->qemu
.term_check_queued
) {
311 terminate_check(client
);
316 * VZDump specific client functions
320 handle_vzdump_handshake(struct Client
*client
, struct json_object
*data
)
322 client
->state
= STATE_IDLE
;
324 struct json_object
*vmid_obj
;
325 json_bool has_vmid
= data
&& json_object_object_get_ex(data
, "vmid", &vmid_obj
);
328 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client
->pid
);
332 const char *vmid_str
= json_object_get_string(vmid_obj
);
335 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client
->pid
);
339 int res
= snprintf(client
->vzdump
.vmid
, sizeof(client
->vzdump
.vmid
), "%s", vmid_str
);
340 if (res
< 0 || res
>= (int)sizeof(client
->vzdump
.vmid
)) {
341 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client
->pid
);
345 struct Client
*vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
347 vmc
->qemu
.backup
= true;
349 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
350 // might try to access an invalid value
351 client
->type
= CLIENT_VZDUMP
;
352 VERBOSE_PRINT("%s: vzdump backup started\n", client
->vzdump
.vmid
);
354 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client
->vzdump
.vmid
);
359 * client management functions
363 add_new_client(int client_fd
)
365 struct Client
*client
= calloc(sizeof(struct Client
), 1);
366 if (client
== NULL
) {
367 fprintf(stderr
, "could not add new client - allocation failed!\n");
371 client
->state
= STATE_HANDSHAKE
;
372 client
->type
= CLIENT_NONE
;
373 client
->fd
= client_fd
;
374 client
->pid
= get_pid_from_fd(client_fd
);
375 if (client
->pid
== 0) {
376 fprintf(stderr
, "could not get pid from client\n");
380 struct epoll_event ev
;
382 ev
.data
.ptr
= client
;
383 int res
= epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, client_fd
, &ev
);
385 perror("epoll_ctl client add");
389 VERBOSE_PRINT("added new client, pid: %d\n", client
->pid
);
393 (void)close(client_fd
);
398 cleanup_qemu_client(struct Client
*client
)
400 unsigned short graceful
= client
->qemu
.graceful
;
401 unsigned short guest
= client
->qemu
.guest
;
402 char vmid
[sizeof(client
->qemu
.vmid
)];
403 strncpy(vmid
, client
->qemu
.vmid
, sizeof(vmid
));
404 g_hash_table_remove(vm_clients
, &vmid
); // frees key, ignore errors
405 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
406 vmid
, graceful
, guest
);
410 fprintf(stderr
, "fork failed: %s\n", strerror(errno
));
414 char *script
= "/usr/sbin/qm";
420 graceful
? "1" : "0",
425 execvp(script
, args
);
432 cleanup_client(struct Client
*client
)
434 log_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_DEL
, client
->fd
, NULL
), "epoll del");
435 (void)close(client
->fd
);
438 switch (client
->type
) {
440 cleanup_qemu_client(client
);
444 vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
446 VERBOSE_PRINT("%s: backup ended\n", client
->vzdump
.vmid
);
447 vmc
->qemu
.backup
= false;
448 terminate_check(vmc
);
453 // do nothing, only close socket
457 if (client
->pidfd
> 0) {
458 (void)close(client
->pidfd
);
460 VERBOSE_PRINT("removing %s from forced cleanups\n", client
->qemu
.vmid
);
461 forced_cleanups
= g_slist_remove(forced_cleanups
, client
);
466 terminate_client(struct Client
*client
)
468 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client
->qemu
.vmid
, client
->pid
);
470 client
->state
= STATE_TERMINATING
;
472 // open a pidfd before kill for later cleanup
473 int pidfd
= pidfd_open(client
->pid
, 0);
477 // process already dead for some reason, cleanup done
478 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
479 client
->qemu
.vmid
, client
->pid
);
482 // otherwise fall back to just using the PID directly, but don't
483 // print if we only failed because we're running on an older kernel
487 perror("failed to open QEMU pidfd for cleanup");
492 // try to send a 'quit' command first, fallback to SIGTERM of the pid
493 static const char qmp_quit_command
[] = "{\"execute\":\"quit\"}\n";
494 VERBOSE_PRINT("%s: sending 'quit' via QMP\n", client
->qemu
.vmid
);
495 if (!must_write(client
->fd
, qmp_quit_command
, sizeof(qmp_quit_command
) - 1)) {
496 VERBOSE_PRINT("%s: sending 'SIGTERM' to pid %d\n", client
->qemu
.vmid
, client
->pid
);
497 int err
= kill(client
->pid
, SIGTERM
);
498 log_neg(err
, "kill");
501 time_t timeout
= time(NULL
) + kill_timeout
;
503 client
->pidfd
= pidfd
;
504 client
->timeout
= timeout
;
506 forced_cleanups
= g_slist_prepend(forced_cleanups
, (void *)client
);
511 handle_client(struct Client
*client
)
513 VERBOSE_PRINT("pid%d: entering handle\n", client
->pid
);
516 len
= read(client
->fd
, (client
->buf
+client
->buflen
),
517 sizeof(client
->buf
) - client
->buflen
);
518 } while (len
< 0 && errno
== EINTR
);
521 if (!(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)) {
522 log_neg((int)len
, "read");
523 cleanup_client(client
);
526 } else if (len
== 0) {
527 VERBOSE_PRINT("pid%d: got EOF\n", client
->pid
);
528 cleanup_client(client
);
532 VERBOSE_PRINT("pid%d: read %ld bytes\n", client
->pid
, len
);
533 client
->buflen
+= len
;
535 struct json_tokener
*tok
= json_tokener_new();
536 struct json_object
*jobj
= NULL
;
537 enum json_tokener_error jerr
= json_tokener_success
;
538 while (jerr
== json_tokener_success
&& client
->buflen
!= 0) {
539 jobj
= json_tokener_parse_ex(tok
, client
->buf
, (int)client
->buflen
);
540 jerr
= json_tokener_get_error(tok
);
541 unsigned int offset
= (unsigned int)tok
->char_offset
;
543 case json_tokener_success
:
544 // move rest from buffer to front
545 memmove(client
->buf
, client
->buf
+ offset
, client
->buflen
- offset
);
546 client
->buflen
-= offset
;
547 if (json_object_is_type(jobj
, json_type_object
)) {
548 struct json_object
*obj
;
549 if (json_object_object_get_ex(jobj
, "QMP", &obj
)) {
550 handle_qmp_handshake(client
);
551 } else if (json_object_object_get_ex(jobj
, "event", &obj
)) {
552 handle_qmp_event(client
, jobj
);
553 } else if (json_object_object_get_ex(jobj
, "return", &obj
)) {
554 handle_qmp_return(client
, obj
, false);
555 } else if (json_object_object_get_ex(jobj
, "error", &obj
)) {
556 handle_qmp_return(client
, obj
, true);
557 } else if (json_object_object_get_ex(jobj
, "vzdump", &obj
)) {
558 handle_vzdump_handshake(client
, obj
);
559 } // else ignore message
562 case json_tokener_continue
:
563 if (client
->buflen
>= sizeof(client
->buf
)) {
564 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client
->pid
);
565 memset(client
->buf
, 0, sizeof(client
->buf
));
567 } // else we have enough space try again after next read
570 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client
->pid
, jerr
);
571 memset(client
->buf
, 0, client
->buflen
);
575 json_object_put(jobj
);
577 json_tokener_free(tok
);
581 sigkill(void *ptr
, void *time_ptr
)
583 struct Client
*data
= ptr
;
586 if (data
->timeout
!= 0 && data
->timeout
> *(time_t *)time_ptr
) {
590 if (data
->pidfd
> 0) {
591 err
= pidfd_send_signal(data
->pidfd
, SIGKILL
, NULL
, 0);
592 (void)close(data
->pidfd
);
595 err
= kill(data
->pid
, SIGKILL
);
599 if (errno
!= ESRCH
) {
600 fprintf(stderr
, "SIGKILL cleanup of pid '%d' failed - %s\n",
601 data
->pid
, strerror(errno
));
604 fprintf(stderr
, "cleanup failed, terminating pid '%d' with SIGKILL\n",
610 // remove ourselves from the list
611 forced_cleanups
= g_slist_remove(forced_cleanups
, ptr
);
615 handle_forced_cleanup()
617 if (g_slist_length(forced_cleanups
) > 0) {
618 VERBOSE_PRINT("clearing forced cleanup backlog\n");
619 time_t cur_time
= time(NULL
);
620 g_slist_foreach(forced_cleanups
, sigkill
, &cur_time
);
622 needs_cleanup
= g_slist_length(forced_cleanups
) > 0;
626 main(int argc
, char *argv
[])
630 char *socket_path
= NULL
;
633 while ((opt
= getopt(argc
, argv
, "hfvt:")) != -1) {
644 kill_timeout
= strtoul(optarg
, &endptr
, 10);
645 if (errno
!= 0 || *endptr
!= '\0' || kill_timeout
== 0) {
660 if (optind
>= argc
) {
665 signal(SIGCHLD
, SIG_IGN
);
667 socket_path
= argv
[optind
];
669 int sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
670 bail_neg(sock
, "socket");
672 struct sockaddr_un addr
;
673 memset(&addr
, 0, sizeof(addr
));
674 addr
.sun_family
= AF_UNIX
;
675 strncpy(addr
.sun_path
, socket_path
, sizeof(addr
.sun_path
) - 1);
678 bail_neg(bind(sock
, (struct sockaddr
*)&addr
, sizeof(addr
)), "bind");
680 struct epoll_event ev
, events
[1];
681 epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
682 bail_neg(epoll_fd
, "epoll_create1");
686 bail_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, sock
, &ev
), "epoll_ctl");
688 bail_neg(listen(sock
, 10), "listen");
691 bail_neg(daemon(0, 1), "daemon");
694 vm_clients
= g_hash_table_new_full(g_str_hash
, g_str_equal
, free
, NULL
);
699 nevents
= epoll_wait(epoll_fd
, events
, 1, needs_cleanup
? 10*1000 : -1);
700 if (nevents
< 0 && errno
== EINTR
) {
703 bail_neg(nevents
, "epoll_wait");
705 for (int n
= 0; n
< nevents
; n
++) {
706 if (events
[n
].data
.fd
== sock
) {
708 int conn_sock
= accept4(sock
, NULL
, NULL
, SOCK_NONBLOCK
| SOCK_CLOEXEC
);
709 log_neg(conn_sock
, "accept");
710 if (conn_sock
> -1) {
711 add_new_client(conn_sock
);
714 handle_client((struct Client
*)events
[n
].data
.ptr
);
717 handle_forced_cleanup();