]>
git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
34 #include <sys/epoll.h>
35 #include <sys/socket.h>
36 #include <sys/types.h>
44 #define DEFAULT_KILL_TIMEOUT 60
46 static int verbose
= 0;
47 static int kill_timeout
= DEFAULT_KILL_TIMEOUT
;
48 static int epoll_fd
= 0;
49 static const char *progname
;
50 GHashTable
*vm_clients
; // key=vmid (freed on remove), value=*Client (free manually)
51 GSList
*forced_cleanups
;
52 static int needs_cleanup
= 0;
61 fprintf(stderr
, "Usage: %s [-f] [-v] PATH\n", progname
);
62 fprintf(stderr
, " -f run in foreground (default: false)\n");
63 fprintf(stderr
, " -v verbose (default: false)\n");
64 fprintf(stderr
, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT
);
65 fprintf(stderr
, " PATH use PATH for socket\n");
69 get_pid_from_fd(int fd
)
71 struct ucred credentials
= { .pid
= 0, .uid
= 0, .gid
= 0 };
72 socklen_t len
= sizeof(struct ucred
);
73 log_neg(getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &credentials
, &len
), "getsockopt");
74 return credentials
.pid
;
78 * parses the vmid from the qemu.slice entry of /proc/<pid>/cgroup
81 get_vmid_from_pid(pid_t pid
)
83 char filename
[32] = { 0 };
84 int len
= snprintf(filename
, sizeof(filename
), "/proc/%d/cgroup", pid
);
86 fprintf(stderr
, "error during snprintf for %d: %s\n", pid
,
90 if ((size_t)len
>= sizeof(filename
)) {
91 fprintf(stderr
, "error: pid %d too long\n", pid
);
94 FILE *fp
= fopen(filename
, "re");
96 fprintf(stderr
, "error opening %s: %s\n", filename
, strerror(errno
));
100 unsigned long vmid
= 0;
104 while (getline(&buf
, &buflen
, fp
) >= 0) {
105 char *cgroup_path
= strrchr(buf
, ':');
107 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
112 if (strncmp(cgroup_path
, "/qemu.slice/", 12)) {
116 char *vmid_start
= strrchr(buf
, '/');
118 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
123 if (vmid_start
[0] == '-' || vmid_start
[0] == '\0') {
124 fprintf(stderr
, "invalid vmid in cgroup entry %s\n", buf
);
130 vmid
= strtoul(vmid_start
, &endptr
, 10);
131 if (!endptr
|| strncmp(endptr
, ".scope", 6)) {
132 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
137 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
145 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
147 fprintf(stderr
, "error parsing vmid for %d: no matching qemu.slice cgroup entry\n", pid
);
157 must_write(int fd
, const char *buf
, size_t len
)
161 wlen
= write(fd
, buf
, len
);
162 } while (wlen
< 0 && errno
== EINTR
);
164 return (wlen
== (ssize_t
)len
);
168 * qmp handling functions
172 send_qmp_cmd(struct Client
*client
, const char *buf
, size_t len
)
174 if (!must_write(client
->fd
, buf
, len
- 1)) {
175 fprintf(stderr
, "%s: cannot send QMP message\n", client
->qemu
.vmid
);
176 cleanup_client(client
);
181 handle_qmp_handshake(struct Client
*client
)
183 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client
->pid
);
185 // extract vmid from cmdline, now that we know it's a QEMU process
186 unsigned long vmid
= get_vmid_from_pid(client
->pid
);
187 int res
= snprintf(client
->qemu
.vmid
, sizeof(client
->qemu
.vmid
), "%lu", vmid
);
188 if (vmid
== 0 || res
< 0 || res
>= (int)sizeof(client
->qemu
.vmid
)) {
189 fprintf(stderr
, "could not get vmid from pid %d\n", client
->pid
);
190 cleanup_client(client
);
194 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client
->pid
, client
->qemu
.vmid
);
195 client
->type
= CLIENT_QEMU
;
196 if(!g_hash_table_insert(vm_clients
, strdup(client
->qemu
.vmid
), client
)) {
197 // not fatal, just means backup handling won't work
198 fprintf(stderr
, "%s: could not insert client into VMID->client table\n",
202 static const char qmp_answer
[] = "{\"execute\":\"qmp_capabilities\"}\n";
203 send_qmp_cmd(client
, qmp_answer
, sizeof(qmp_answer
));
207 handle_qmp_event(struct Client
*client
, struct json_object
*obj
)
209 struct json_object
*event
;
210 if (!json_object_object_get_ex(obj
, "event", &event
)) {
213 VERBOSE_PRINT("%s: got QMP event: %s\n", client
->qemu
.vmid
, json_object_get_string(event
));
215 if (client
->state
== STATE_TERMINATING
) {
216 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
217 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client
->qemu
.vmid
);
221 // event, check if shutdown and get guest parameter
222 if (!strcmp(json_object_get_string(event
), "SHUTDOWN")) {
223 client
->qemu
.graceful
= 1;
224 struct json_object
*data
;
225 struct json_object
*guest
;
226 if (json_object_object_get_ex(obj
, "data", &data
) &&
227 json_object_object_get_ex(data
, "guest", &guest
))
229 client
->qemu
.guest
= (unsigned short)json_object_get_boolean(guest
);
232 // check if a backup is running and kill QEMU process if not
233 terminate_check(client
);
238 terminate_check(struct Client
*client
)
240 if (client
->state
!= STATE_IDLE
) {
241 // if we're already in a request, queue this one until after
242 VERBOSE_PRINT("%s: terminate_check queued\n", client
->qemu
.vmid
);
243 client
->qemu
.term_check_queued
= true;
247 client
->qemu
.term_check_queued
= false;
249 VERBOSE_PRINT("%s: query-status\n", client
->qemu
.vmid
);
250 client
->state
= STATE_EXPECT_STATUS_RESP
;
251 static const char qmp_req
[] = "{\"execute\":\"query-status\"}\n";
252 send_qmp_cmd(client
, qmp_req
, sizeof(qmp_req
));
256 handle_qmp_return(struct Client
*client
, struct json_object
*data
, bool error
)
259 const char *msg
= "n/a";
260 struct json_object
*desc
;
261 if (json_object_object_get_ex(data
, "desc", &desc
)) {
262 msg
= json_object_get_string(desc
);
264 fprintf(stderr
, "%s: received error from QMP: %s\n",
265 client
->qemu
.vmid
, msg
);
266 client
->state
= STATE_IDLE
;
270 struct json_object
*status
;
271 json_bool has_status
= data
&&
272 json_object_object_get_ex(data
, "status", &status
);
276 const char *status_str
= json_object_get_string(status
);
277 active
= status_str
&&
278 (!strcmp(status_str
, "running") || !strcmp(status_str
, "paused"));
281 switch (client
->state
) {
282 case STATE_EXPECT_STATUS_RESP
:
283 client
->state
= STATE_IDLE
;
285 VERBOSE_PRINT("%s: got status: VM is active\n", client
->qemu
.vmid
);
286 } else if (!client
->qemu
.backup
) {
287 terminate_client(client
);
289 // if we're in a backup, don't do anything, vzdump will notify
290 // us when the backup finishes
291 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
296 // this means we received the empty return from our handshake answer
297 case STATE_HANDSHAKE
:
298 client
->state
= STATE_IDLE
;
299 VERBOSE_PRINT("%s: QMP handshake complete\n", client
->qemu
.vmid
);
302 // we expect an empty return object after sending quit
303 case STATE_TERMINATING
:
306 VERBOSE_PRINT("%s: spurious return value received\n",
312 if (client
->qemu
.term_check_queued
) {
313 terminate_check(client
);
318 * VZDump specific client functions
322 handle_vzdump_handshake(struct Client
*client
, struct json_object
*data
)
324 client
->state
= STATE_IDLE
;
326 struct json_object
*vmid_obj
;
327 json_bool has_vmid
= data
&& json_object_object_get_ex(data
, "vmid", &vmid_obj
);
330 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client
->pid
);
334 const char *vmid_str
= json_object_get_string(vmid_obj
);
337 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client
->pid
);
341 int res
= snprintf(client
->vzdump
.vmid
, sizeof(client
->vzdump
.vmid
), "%s", vmid_str
);
342 if (res
< 0 || res
>= (int)sizeof(client
->vzdump
.vmid
)) {
343 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client
->pid
);
347 struct Client
*vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
349 vmc
->qemu
.backup
= true;
351 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
352 // might try to access an invalid value
353 client
->type
= CLIENT_VZDUMP
;
354 VERBOSE_PRINT("%s: vzdump backup started\n", client
->vzdump
.vmid
);
356 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client
->vzdump
.vmid
);
361 * client management functions
365 add_new_client(int client_fd
)
367 struct Client
*client
= calloc(sizeof(struct Client
), 1);
368 if (client
== NULL
) {
369 fprintf(stderr
, "could not add new client - allocation failed!\n");
373 client
->state
= STATE_HANDSHAKE
;
374 client
->type
= CLIENT_NONE
;
375 client
->fd
= client_fd
;
376 client
->pid
= get_pid_from_fd(client_fd
);
377 if (client
->pid
== 0) {
378 fprintf(stderr
, "could not get pid from client\n");
382 struct epoll_event ev
;
384 ev
.data
.ptr
= client
;
385 int res
= epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, client_fd
, &ev
);
387 perror("epoll_ctl client add");
391 VERBOSE_PRINT("added new client, pid: %d\n", client
->pid
);
395 (void)close(client_fd
);
400 cleanup_qemu_client(struct Client
*client
)
402 unsigned short graceful
= client
->qemu
.graceful
;
403 unsigned short guest
= client
->qemu
.guest
;
404 char vmid
[sizeof(client
->qemu
.vmid
)];
405 strncpy(vmid
, client
->qemu
.vmid
, sizeof(vmid
));
406 g_hash_table_remove(vm_clients
, &vmid
); // frees key, ignore errors
407 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
408 vmid
, graceful
, guest
);
412 fprintf(stderr
, "fork failed: %s\n", strerror(errno
));
416 char *script
= "/usr/sbin/qm";
422 graceful
? "1" : "0",
427 execvp(script
, args
);
434 cleanup_client(struct Client
*client
)
436 log_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_DEL
, client
->fd
, NULL
), "epoll del");
437 (void)close(client
->fd
);
440 switch (client
->type
) {
442 cleanup_qemu_client(client
);
446 vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
448 VERBOSE_PRINT("%s: backup ended\n", client
->vzdump
.vmid
);
449 vmc
->qemu
.backup
= false;
450 terminate_check(vmc
);
455 // do nothing, only close socket
459 if (client
->pidfd
> 0) {
460 (void)close(client
->pidfd
);
462 VERBOSE_PRINT("removing %s from forced cleanups\n", client
->qemu
.vmid
);
463 forced_cleanups
= g_slist_remove(forced_cleanups
, client
);
468 terminate_client(struct Client
*client
)
470 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client
->qemu
.vmid
, client
->pid
);
472 client
->state
= STATE_TERMINATING
;
474 // open a pidfd before kill for later cleanup
475 int pidfd
= pidfd_open(client
->pid
, 0);
479 // process already dead for some reason, cleanup done
480 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
481 client
->qemu
.vmid
, client
->pid
);
484 // otherwise fall back to just using the PID directly, but don't
485 // print if we only failed because we're running on an older kernel
489 perror("failed to open QEMU pidfd for cleanup");
494 // try to send a 'quit' command first, fallback to SIGTERM of the pid
495 static const char qmp_quit_command
[] = "{\"execute\":\"quit\"}\n";
496 VERBOSE_PRINT("%s: sending 'quit' via QMP\n", client
->qemu
.vmid
);
497 if (!must_write(client
->fd
, qmp_quit_command
, sizeof(qmp_quit_command
) - 1)) {
498 VERBOSE_PRINT("%s: sending 'SIGTERM' to pid %d\n", client
->qemu
.vmid
, client
->pid
);
499 int err
= kill(client
->pid
, SIGTERM
);
500 log_neg(err
, "kill");
503 time_t timeout
= time(NULL
) + kill_timeout
;
505 client
->pidfd
= pidfd
;
506 client
->timeout
= timeout
;
508 forced_cleanups
= g_slist_prepend(forced_cleanups
, (void *)client
);
513 handle_client(struct Client
*client
)
515 VERBOSE_PRINT("pid%d: entering handle\n", client
->pid
);
518 len
= read(client
->fd
, (client
->buf
+client
->buflen
),
519 sizeof(client
->buf
) - client
->buflen
);
520 } while (len
< 0 && errno
== EINTR
);
523 if (!(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)) {
524 log_neg((int)len
, "read");
525 cleanup_client(client
);
528 } else if (len
== 0) {
529 VERBOSE_PRINT("pid%d: got EOF\n", client
->pid
);
530 cleanup_client(client
);
534 VERBOSE_PRINT("pid%d: read %ld bytes\n", client
->pid
, len
);
535 client
->buflen
+= len
;
537 struct json_tokener
*tok
= json_tokener_new();
538 struct json_object
*jobj
= NULL
;
539 enum json_tokener_error jerr
= json_tokener_success
;
540 while (jerr
== json_tokener_success
&& client
->buflen
!= 0) {
541 jobj
= json_tokener_parse_ex(tok
, client
->buf
, (int)client
->buflen
);
542 jerr
= json_tokener_get_error(tok
);
543 unsigned int offset
= (unsigned int)tok
->char_offset
;
545 case json_tokener_success
:
546 // move rest from buffer to front
547 memmove(client
->buf
, client
->buf
+ offset
, client
->buflen
- offset
);
548 client
->buflen
-= offset
;
549 if (json_object_is_type(jobj
, json_type_object
)) {
550 struct json_object
*obj
;
551 if (json_object_object_get_ex(jobj
, "QMP", &obj
)) {
552 handle_qmp_handshake(client
);
553 } else if (json_object_object_get_ex(jobj
, "event", &obj
)) {
554 handle_qmp_event(client
, jobj
);
555 } else if (json_object_object_get_ex(jobj
, "return", &obj
)) {
556 handle_qmp_return(client
, obj
, false);
557 } else if (json_object_object_get_ex(jobj
, "error", &obj
)) {
558 handle_qmp_return(client
, obj
, true);
559 } else if (json_object_object_get_ex(jobj
, "vzdump", &obj
)) {
560 handle_vzdump_handshake(client
, obj
);
561 } // else ignore message
564 case json_tokener_continue
:
565 if (client
->buflen
>= sizeof(client
->buf
)) {
566 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client
->pid
);
567 memset(client
->buf
, 0, sizeof(client
->buf
));
569 } // else we have enough space try again after next read
572 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client
->pid
, jerr
);
573 memset(client
->buf
, 0, client
->buflen
);
577 json_object_put(jobj
);
579 json_tokener_free(tok
);
583 sigkill(void *ptr
, void *time_ptr
)
585 struct Client
*data
= ptr
;
588 if (data
->timeout
!= 0 && data
->timeout
> *(time_t *)time_ptr
) {
592 if (data
->pidfd
> 0) {
593 err
= pidfd_send_signal(data
->pidfd
, SIGKILL
, NULL
, 0);
594 (void)close(data
->pidfd
);
597 err
= kill(data
->pid
, SIGKILL
);
601 if (errno
!= ESRCH
) {
602 fprintf(stderr
, "SIGKILL cleanup of pid '%d' failed - %s\n",
603 data
->pid
, strerror(errno
));
606 fprintf(stderr
, "cleanup failed, terminating pid '%d' with SIGKILL\n",
612 // remove ourselves from the list
613 forced_cleanups
= g_slist_remove(forced_cleanups
, ptr
);
617 handle_forced_cleanup()
619 if (g_slist_length(forced_cleanups
) > 0) {
620 VERBOSE_PRINT("clearing forced cleanup backlog\n");
621 time_t cur_time
= time(NULL
);
622 g_slist_foreach(forced_cleanups
, sigkill
, &cur_time
);
624 needs_cleanup
= g_slist_length(forced_cleanups
) > 0;
628 main(int argc
, char *argv
[])
632 char *socket_path
= NULL
;
635 while ((opt
= getopt(argc
, argv
, "hfvt:")) != -1) {
646 kill_timeout
= strtoul(optarg
, &endptr
, 10);
647 if (errno
!= 0 || *endptr
!= '\0' || kill_timeout
== 0) {
662 if (optind
>= argc
) {
667 signal(SIGCHLD
, SIG_IGN
);
669 socket_path
= argv
[optind
];
671 int sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
672 bail_neg(sock
, "socket");
674 struct sockaddr_un addr
;
675 memset(&addr
, 0, sizeof(addr
));
676 addr
.sun_family
= AF_UNIX
;
677 strncpy(addr
.sun_path
, socket_path
, sizeof(addr
.sun_path
) - 1);
680 bail_neg(bind(sock
, (struct sockaddr
*)&addr
, sizeof(addr
)), "bind");
682 struct epoll_event ev
, events
[1];
683 epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
684 bail_neg(epoll_fd
, "epoll_create1");
688 bail_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, sock
, &ev
), "epoll_ctl");
690 bail_neg(listen(sock
, 10), "listen");
693 bail_neg(daemon(0, 1), "daemon");
696 vm_clients
= g_hash_table_new_full(g_str_hash
, g_str_equal
, free
, NULL
);
701 nevents
= epoll_wait(epoll_fd
, events
, 1, needs_cleanup
? 10*1000 : -1);
702 if (nevents
< 0 && errno
== EINTR
) {
705 bail_neg(nevents
, "epoll_wait");
707 for (int n
= 0; n
< nevents
; n
++) {
708 if (events
[n
].data
.fd
== sock
) {
710 int conn_sock
= accept4(sock
, NULL
, NULL
, SOCK_NONBLOCK
| SOCK_CLOEXEC
);
711 log_neg(conn_sock
, "accept");
712 if (conn_sock
> -1) {
713 add_new_client(conn_sock
);
716 handle_client((struct Client
*)events
[n
].data
.ptr
);
719 handle_forced_cleanup();