]>
git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
34 #include <sys/epoll.h>
35 #include <sys/socket.h>
36 #include <sys/types.h>
44 #define DEFAULT_KILL_TIMEOUT 60
46 static int verbose
= 0;
47 static int kill_timeout
= DEFAULT_KILL_TIMEOUT
;
48 static int epoll_fd
= 0;
49 static const char *progname
;
50 GHashTable
*vm_clients
; // key=vmid (freed on remove), value=*Client (free manually)
51 GSList
*forced_cleanups
;
52 static int needs_cleanup
= 0;
61 fprintf(stderr
, "Usage: %s [-f] [-v] PATH\n", progname
);
62 fprintf(stderr
, " -f run in foreground (default: false)\n");
63 fprintf(stderr
, " -v verbose (default: false)\n");
64 fprintf(stderr
, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT
);
65 fprintf(stderr
, " PATH use PATH for socket\n");
69 get_pid_from_fd(int fd
)
71 struct ucred credentials
= { .pid
= 0, .uid
= 0, .gid
= 0 };
72 socklen_t len
= sizeof(struct ucred
);
73 log_neg(getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &credentials
, &len
), "getsockopt");
74 return credentials
.pid
;
78 * parses the vmid from the qemu.slice entry of /proc/<pid>/cgroup
81 get_vmid_from_pid(pid_t pid
)
83 char filename
[32] = { 0 };
84 int len
= snprintf(filename
, sizeof(filename
), "/proc/%d/cgroup", pid
);
86 fprintf(stderr
, "error during snprintf for %d: %s\n", pid
,
90 if ((size_t)len
>= sizeof(filename
)) {
91 fprintf(stderr
, "error: pid %d too long\n", pid
);
94 FILE *fp
= fopen(filename
, "re");
96 fprintf(stderr
, "error opening %s: %s\n", filename
, strerror(errno
));
100 unsigned long vmid
= 0;
104 while (getline(&buf
, &buflen
, fp
) >= 0) {
105 char *cgroup_path
= strrchr(buf
, ':');
107 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
112 if (strncmp(cgroup_path
, "/qemu.slice/", 12)) {
116 char *vmid_start
= strrchr(buf
, '/');
118 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
123 if (vmid_start
[0] == '-' || vmid_start
[0] == '\0') {
124 fprintf(stderr
, "invalid vmid in cgroup entry %s\n", buf
);
130 vmid
= strtoul(vmid_start
, &endptr
, 10);
131 if (!endptr
|| strncmp(endptr
, ".scope", 6)) {
132 fprintf(stderr
, "unexpected cgroup entry %s\n", buf
);
144 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
146 fprintf(stderr
, "error parsing vmid for %d: no matching qemu.slice cgroup entry\n", pid
);
155 must_write(int fd
, const char *buf
, size_t len
)
159 wlen
= write(fd
, buf
, len
);
160 } while (wlen
< 0 && errno
== EINTR
);
162 return (wlen
== (ssize_t
)len
);
166 * qmp handling functions
170 send_qmp_cmd(struct Client
*client
, const char *buf
, size_t len
)
172 if (!must_write(client
->fd
, buf
, len
- 1)) {
173 fprintf(stderr
, "%s: cannot send QMP message\n", client
->qemu
.vmid
);
174 cleanup_client(client
);
179 handle_qmp_handshake(struct Client
*client
)
181 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client
->pid
);
183 // extract vmid from cmdline, now that we know it's a QEMU process
184 unsigned long vmid
= get_vmid_from_pid(client
->pid
);
185 int res
= snprintf(client
->qemu
.vmid
, sizeof(client
->qemu
.vmid
), "%lu", vmid
);
186 if (vmid
== 0 || res
< 0 || res
>= (int)sizeof(client
->qemu
.vmid
)) {
187 fprintf(stderr
, "could not get vmid from pid %d\n", client
->pid
);
188 cleanup_client(client
);
192 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client
->pid
, client
->qemu
.vmid
);
193 client
->type
= CLIENT_QEMU
;
194 if(!g_hash_table_insert(vm_clients
, strdup(client
->qemu
.vmid
), client
)) {
195 // not fatal, just means backup handling won't work
196 fprintf(stderr
, "%s: could not insert client into VMID->client table\n",
200 static const char qmp_answer
[] = "{\"execute\":\"qmp_capabilities\"}\n";
201 send_qmp_cmd(client
, qmp_answer
, sizeof(qmp_answer
));
205 handle_qmp_event(struct Client
*client
, struct json_object
*obj
)
207 struct json_object
*event
;
208 if (!json_object_object_get_ex(obj
, "event", &event
)) {
211 VERBOSE_PRINT("%s: got QMP event: %s\n", client
->qemu
.vmid
, json_object_get_string(event
));
213 if (client
->state
== STATE_TERMINATING
) {
214 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
215 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client
->qemu
.vmid
);
219 // event, check if shutdown and get guest parameter
220 if (!strcmp(json_object_get_string(event
), "SHUTDOWN")) {
221 client
->qemu
.graceful
= 1;
222 struct json_object
*data
;
223 struct json_object
*guest
;
224 if (json_object_object_get_ex(obj
, "data", &data
) &&
225 json_object_object_get_ex(data
, "guest", &guest
))
227 client
->qemu
.guest
= (unsigned short)json_object_get_boolean(guest
);
230 // check if a backup is running and kill QEMU process if not
231 terminate_check(client
);
236 terminate_check(struct Client
*client
)
238 if (client
->state
!= STATE_IDLE
) {
239 // if we're already in a request, queue this one until after
240 VERBOSE_PRINT("%s: terminate_check queued\n", client
->qemu
.vmid
);
241 client
->qemu
.term_check_queued
= true;
245 client
->qemu
.term_check_queued
= false;
247 VERBOSE_PRINT("%s: query-status\n", client
->qemu
.vmid
);
248 client
->state
= STATE_EXPECT_STATUS_RESP
;
249 static const char qmp_req
[] = "{\"execute\":\"query-status\"}\n";
250 send_qmp_cmd(client
, qmp_req
, sizeof(qmp_req
));
254 handle_qmp_return(struct Client
*client
, struct json_object
*data
, bool error
)
257 const char *msg
= "n/a";
258 struct json_object
*desc
;
259 if (json_object_object_get_ex(data
, "desc", &desc
)) {
260 msg
= json_object_get_string(desc
);
262 fprintf(stderr
, "%s: received error from QMP: %s\n",
263 client
->qemu
.vmid
, msg
);
264 client
->state
= STATE_IDLE
;
268 struct json_object
*status
;
269 json_bool has_status
= data
&&
270 json_object_object_get_ex(data
, "status", &status
);
274 const char *status_str
= json_object_get_string(status
);
275 active
= status_str
&& (
276 !strcmp(status_str
, "running")
277 || !strcmp(status_str
, "paused")
278 || !strcmp(status_str
, "suspended")
279 || !strcmp(status_str
, "prelaunch")
283 switch (client
->state
) {
284 case STATE_EXPECT_STATUS_RESP
:
285 client
->state
= STATE_IDLE
;
287 VERBOSE_PRINT("%s: got status: VM is active\n", client
->qemu
.vmid
);
288 } else if (!client
->qemu
.backup
) {
289 terminate_client(client
);
291 // if we're in a backup, don't do anything, vzdump will notify
292 // us when the backup finishes
293 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
298 // this means we received the empty return from our handshake answer
299 case STATE_HANDSHAKE
:
300 client
->state
= STATE_IDLE
;
301 VERBOSE_PRINT("%s: QMP handshake complete\n", client
->qemu
.vmid
);
304 // we expect an empty return object after sending quit
305 case STATE_TERMINATING
:
308 VERBOSE_PRINT("%s: spurious return value received\n",
314 if (client
->qemu
.term_check_queued
) {
315 terminate_check(client
);
320 * VZDump specific client functions
324 handle_vzdump_handshake(struct Client
*client
, struct json_object
*data
)
326 client
->state
= STATE_IDLE
;
328 struct json_object
*vmid_obj
;
329 json_bool has_vmid
= data
&& json_object_object_get_ex(data
, "vmid", &vmid_obj
);
332 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client
->pid
);
336 const char *vmid_str
= json_object_get_string(vmid_obj
);
339 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client
->pid
);
343 int res
= snprintf(client
->vzdump
.vmid
, sizeof(client
->vzdump
.vmid
), "%s", vmid_str
);
344 if (res
< 0 || res
>= (int)sizeof(client
->vzdump
.vmid
)) {
345 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client
->pid
);
349 struct Client
*vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
351 vmc
->qemu
.backup
= true;
353 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
354 // might try to access an invalid value
355 client
->type
= CLIENT_VZDUMP
;
356 VERBOSE_PRINT("%s: vzdump backup started\n", client
->vzdump
.vmid
);
358 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client
->vzdump
.vmid
);
363 * client management functions
367 add_new_client(int client_fd
)
369 struct Client
*client
= calloc(sizeof(struct Client
), 1);
370 if (client
== NULL
) {
371 fprintf(stderr
, "could not add new client - allocation failed!\n");
375 client
->state
= STATE_HANDSHAKE
;
376 client
->type
= CLIENT_NONE
;
377 client
->fd
= client_fd
;
378 client
->pid
= get_pid_from_fd(client_fd
);
379 if (client
->pid
== 0) {
380 fprintf(stderr
, "could not get pid from client\n");
384 struct epoll_event ev
;
386 ev
.data
.ptr
= client
;
387 int res
= epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, client_fd
, &ev
);
389 perror("epoll_ctl client add");
393 VERBOSE_PRINT("added new client, pid: %d\n", client
->pid
);
397 (void)close(client_fd
);
402 cleanup_qemu_client(struct Client
*client
)
404 unsigned short graceful
= client
->qemu
.graceful
;
405 unsigned short guest
= client
->qemu
.guest
;
406 char vmid
[sizeof(client
->qemu
.vmid
)];
407 strncpy(vmid
, client
->qemu
.vmid
, sizeof(vmid
));
408 g_hash_table_remove(vm_clients
, &vmid
); // frees key, ignore errors
409 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
410 vmid
, graceful
, guest
);
414 fprintf(stderr
, "fork failed: %s\n", strerror(errno
));
418 char *script
= "/usr/sbin/qm";
424 graceful
? "1" : "0",
429 execvp(script
, args
);
436 cleanup_client(struct Client
*client
)
438 log_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_DEL
, client
->fd
, NULL
), "epoll del");
439 (void)close(client
->fd
);
442 switch (client
->type
) {
444 cleanup_qemu_client(client
);
448 vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
450 VERBOSE_PRINT("%s: backup ended\n", client
->vzdump
.vmid
);
451 vmc
->qemu
.backup
= false;
452 terminate_check(vmc
);
457 // do nothing, only close socket
461 if (client
->pidfd
> 0) {
462 (void)close(client
->pidfd
);
464 VERBOSE_PRINT("removing %s from forced cleanups\n", client
->qemu
.vmid
);
465 forced_cleanups
= g_slist_remove(forced_cleanups
, client
);
470 terminate_client(struct Client
*client
)
472 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client
->qemu
.vmid
, client
->pid
);
474 client
->state
= STATE_TERMINATING
;
476 // open a pidfd before kill for later cleanup
477 int pidfd
= pidfd_open(client
->pid
, 0);
481 // process already dead for some reason, cleanup done
482 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
483 client
->qemu
.vmid
, client
->pid
);
486 // otherwise fall back to just using the PID directly, but don't
487 // print if we only failed because we're running on an older kernel
491 perror("failed to open QEMU pidfd for cleanup");
496 // try to send a 'quit' command first, fallback to SIGTERM of the pid
497 static const char qmp_quit_command
[] = "{\"execute\":\"quit\"}\n";
498 VERBOSE_PRINT("%s: sending 'quit' via QMP\n", client
->qemu
.vmid
);
499 if (!must_write(client
->fd
, qmp_quit_command
, sizeof(qmp_quit_command
) - 1)) {
500 VERBOSE_PRINT("%s: sending 'SIGTERM' to pid %d\n", client
->qemu
.vmid
, client
->pid
);
501 int err
= kill(client
->pid
, SIGTERM
);
502 log_neg(err
, "kill");
505 time_t timeout
= time(NULL
) + kill_timeout
;
507 client
->pidfd
= pidfd
;
508 client
->timeout
= timeout
;
510 forced_cleanups
= g_slist_prepend(forced_cleanups
, (void *)client
);
515 handle_client(struct Client
*client
)
517 VERBOSE_PRINT("pid%d: entering handle\n", client
->pid
);
520 len
= read(client
->fd
, (client
->buf
+client
->buflen
),
521 sizeof(client
->buf
) - client
->buflen
);
522 } while (len
< 0 && errno
== EINTR
);
525 if (!(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)) {
526 log_neg((int)len
, "read");
527 cleanup_client(client
);
530 } else if (len
== 0) {
531 VERBOSE_PRINT("pid%d: got EOF\n", client
->pid
);
532 cleanup_client(client
);
536 VERBOSE_PRINT("pid%d: read %ld bytes\n", client
->pid
, len
);
537 client
->buflen
+= len
;
539 struct json_tokener
*tok
= json_tokener_new();
540 struct json_object
*jobj
= NULL
;
541 enum json_tokener_error jerr
= json_tokener_success
;
542 while (jerr
== json_tokener_success
&& client
->buflen
!= 0) {
543 jobj
= json_tokener_parse_ex(tok
, client
->buf
, (int)client
->buflen
);
544 jerr
= json_tokener_get_error(tok
);
545 unsigned int offset
= (unsigned int)tok
->char_offset
;
547 case json_tokener_success
:
548 // move rest from buffer to front
549 memmove(client
->buf
, client
->buf
+ offset
, client
->buflen
- offset
);
550 client
->buflen
-= offset
;
551 if (json_object_is_type(jobj
, json_type_object
)) {
552 struct json_object
*obj
;
553 if (json_object_object_get_ex(jobj
, "QMP", &obj
)) {
554 handle_qmp_handshake(client
);
555 } else if (json_object_object_get_ex(jobj
, "event", &obj
)) {
556 handle_qmp_event(client
, jobj
);
557 } else if (json_object_object_get_ex(jobj
, "return", &obj
)) {
558 handle_qmp_return(client
, obj
, false);
559 } else if (json_object_object_get_ex(jobj
, "error", &obj
)) {
560 handle_qmp_return(client
, obj
, true);
561 } else if (json_object_object_get_ex(jobj
, "vzdump", &obj
)) {
562 handle_vzdump_handshake(client
, obj
);
563 } // else ignore message
566 case json_tokener_continue
:
567 if (client
->buflen
>= sizeof(client
->buf
)) {
568 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client
->pid
);
569 memset(client
->buf
, 0, sizeof(client
->buf
));
571 } // else we have enough space try again after next read
574 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client
->pid
, jerr
);
575 memset(client
->buf
, 0, client
->buflen
);
579 json_object_put(jobj
);
581 json_tokener_free(tok
);
585 sigkill(void *ptr
, void *time_ptr
)
587 struct Client
*data
= ptr
;
590 if (data
->timeout
!= 0 && data
->timeout
> *(time_t *)time_ptr
) {
594 if (data
->pidfd
> 0) {
595 err
= pidfd_send_signal(data
->pidfd
, SIGKILL
, NULL
, 0);
596 (void)close(data
->pidfd
);
599 err
= kill(data
->pid
, SIGKILL
);
603 if (errno
!= ESRCH
) {
604 fprintf(stderr
, "SIGKILL cleanup of pid '%d' failed - %s\n",
605 data
->pid
, strerror(errno
));
608 fprintf(stderr
, "cleanup failed, terminating pid '%d' with SIGKILL\n",
614 // remove ourselves from the list
615 forced_cleanups
= g_slist_remove(forced_cleanups
, ptr
);
619 handle_forced_cleanup()
621 if (g_slist_length(forced_cleanups
) > 0) {
622 VERBOSE_PRINT("clearing forced cleanup backlog\n");
623 time_t cur_time
= time(NULL
);
624 g_slist_foreach(forced_cleanups
, sigkill
, &cur_time
);
626 needs_cleanup
= g_slist_length(forced_cleanups
) > 0;
630 main(int argc
, char *argv
[])
634 char *socket_path
= NULL
;
637 while ((opt
= getopt(argc
, argv
, "hfvt:")) != -1) {
648 kill_timeout
= strtoul(optarg
, &endptr
, 10);
649 if (errno
!= 0 || *endptr
!= '\0' || kill_timeout
== 0) {
664 if (optind
>= argc
) {
669 signal(SIGCHLD
, SIG_IGN
);
671 socket_path
= argv
[optind
];
673 int sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
674 bail_neg(sock
, "socket");
676 struct sockaddr_un addr
;
677 memset(&addr
, 0, sizeof(addr
));
678 addr
.sun_family
= AF_UNIX
;
679 strncpy(addr
.sun_path
, socket_path
, sizeof(addr
.sun_path
) - 1);
682 bail_neg(bind(sock
, (struct sockaddr
*)&addr
, sizeof(addr
)), "bind");
684 struct epoll_event ev
, events
[1];
685 epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
686 bail_neg(epoll_fd
, "epoll_create1");
690 bail_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, sock
, &ev
), "epoll_ctl");
692 bail_neg(listen(sock
, 10), "listen");
695 bail_neg(daemon(0, 1), "daemon");
698 vm_clients
= g_hash_table_new_full(g_str_hash
, g_str_equal
, free
, NULL
);
703 nevents
= epoll_wait(epoll_fd
, events
, 1, needs_cleanup
? 10*1000 : -1);
704 if (nevents
< 0 && errno
== EINTR
) {
707 bail_neg(nevents
, "epoll_wait");
709 for (int n
= 0; n
< nevents
; n
++) {
710 if (events
[n
].data
.fd
== sock
) {
712 int conn_sock
= accept4(sock
, NULL
, NULL
, SOCK_NONBLOCK
| SOCK_CLOEXEC
);
713 log_neg(conn_sock
, "accept");
714 if (conn_sock
> -1) {
715 add_new_client(conn_sock
);
718 handle_client((struct Client
*)events
[n
].data
.ptr
);
721 handle_forced_cleanup();