]>
git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
33 #include <sys/epoll.h>
34 #include <sys/socket.h>
35 #include <sys/types.h>
42 static int verbose
= 0;
43 static int epoll_fd
= 0;
44 static const char *progname
;
45 GHashTable
*vm_clients
; // key=vmid (freed on remove), value=*Client (free manually)
46 GSList
*forced_cleanups
;
47 volatile sig_atomic_t alarm_triggered
= 0;
56 fprintf(stderr
, "Usage: %s [-f] [-v] PATH\n", progname
);
57 fprintf(stderr
, " -f run in foreground (default: false)\n");
58 fprintf(stderr
, " -v verbose (default: false)\n");
59 fprintf(stderr
, " PATH use PATH for socket\n");
63 get_pid_from_fd(int fd
)
65 struct ucred credentials
= { .pid
= 0, .uid
= 0, .gid
= 0 };
66 socklen_t len
= sizeof(struct ucred
);
67 log_neg(getsockopt(fd
, SOL_SOCKET
, SO_PEERCRED
, &credentials
, &len
), "getsockopt");
68 return credentials
.pid
;
72 * reads the vmid from /proc/<pid>/cmdline
73 * after the '-id' argument
76 get_vmid_from_pid(pid_t pid
)
78 char filename
[32] = { 0 };
79 int len
= snprintf(filename
, sizeof(filename
), "/proc/%d/cmdline", pid
);
81 fprintf(stderr
, "error during snprintf for %d: %s\n", pid
,
85 if ((size_t)len
>= sizeof(filename
)) {
86 fprintf(stderr
, "error: pid %d too long\n", pid
);
89 FILE *fp
= fopen(filename
, "re");
91 fprintf(stderr
, "error opening %s: %s\n", filename
, strerror(errno
));
95 unsigned long vmid
= 0;
99 while ((rc
= getdelim(&buf
, &buflen
, '\0', fp
)) >= 0) {
100 if (!strcmp(buf
, "-id")) {
109 if (getdelim(&buf
, &buflen
, '\0', fp
) >= 0) {
110 if (buf
[0] == '-' || buf
[0] == '\0') {
111 fprintf(stderr
, "invalid vmid %s\n", buf
);
117 vmid
= strtoul(buf
, &endptr
, 10);
121 } else if (*endptr
!= '\0') {
122 fprintf(stderr
, "invalid vmid %s\n", buf
);
130 fprintf(stderr
, "error parsing vmid for %d: %s\n", pid
, strerror(errno
));
139 must_write(int fd
, const char *buf
, size_t len
)
143 wlen
= write(fd
, buf
, len
);
144 } while (wlen
< 0 && errno
== EINTR
);
146 return (wlen
== (ssize_t
)len
);
150 * qmp handling functions
154 send_qmp_cmd(struct Client
*client
, const char *buf
, size_t len
)
156 if (!must_write(client
->fd
, buf
, len
- 1)) {
157 fprintf(stderr
, "%s: cannot send QMP message\n", client
->qemu
.vmid
);
158 cleanup_client(client
);
163 handle_qmp_handshake(struct Client
*client
)
165 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client
->pid
);
167 // extract vmid from cmdline, now that we know it's a QEMU process
168 unsigned long vmid
= get_vmid_from_pid(client
->pid
);
169 int res
= snprintf(client
->qemu
.vmid
, sizeof(client
->qemu
.vmid
), "%lu", vmid
);
170 if (vmid
== 0 || res
< 0 || res
>= (int)sizeof(client
->qemu
.vmid
)) {
171 fprintf(stderr
, "could not get vmid from pid %d\n", client
->pid
);
172 cleanup_client(client
);
176 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client
->pid
, client
->qemu
.vmid
);
177 client
->type
= CLIENT_QEMU
;
178 if(!g_hash_table_insert(vm_clients
, strdup(client
->qemu
.vmid
), client
)) {
179 // not fatal, just means backup handling won't work
180 fprintf(stderr
, "%s: could not insert client into VMID->client table\n",
184 static const char qmp_answer
[] = "{\"execute\":\"qmp_capabilities\"}\n";
185 send_qmp_cmd(client
, qmp_answer
, sizeof(qmp_answer
));
189 handle_qmp_event(struct Client
*client
, struct json_object
*obj
)
191 struct json_object
*event
;
192 if (!json_object_object_get_ex(obj
, "event", &event
)) {
195 VERBOSE_PRINT("%s: got QMP event: %s\n", client
->qemu
.vmid
, json_object_get_string(event
));
197 if (client
->state
== STATE_TERMINATING
) {
198 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
199 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client
->qemu
.vmid
);
203 // event, check if shutdown and get guest parameter
204 if (!strcmp(json_object_get_string(event
), "SHUTDOWN")) {
205 client
->qemu
.graceful
= 1;
206 struct json_object
*data
;
207 struct json_object
*guest
;
208 if (json_object_object_get_ex(obj
, "data", &data
) &&
209 json_object_object_get_ex(data
, "guest", &guest
))
211 client
->qemu
.guest
= (unsigned short)json_object_get_boolean(guest
);
214 // check if a backup is running and kill QEMU process if not
215 terminate_check(client
);
220 terminate_check(struct Client
*client
)
222 if (client
->state
!= STATE_IDLE
) {
223 // if we're already in a request, queue this one until after
224 VERBOSE_PRINT("%s: terminate_check queued\n", client
->qemu
.vmid
);
225 client
->qemu
.term_check_queued
= true;
229 client
->qemu
.term_check_queued
= false;
231 VERBOSE_PRINT("%s: query-status\n", client
->qemu
.vmid
);
232 client
->state
= STATE_EXPECT_STATUS_RESP
;
233 static const char qmp_req
[] = "{\"execute\":\"query-status\"}\n";
234 send_qmp_cmd(client
, qmp_req
, sizeof(qmp_req
));
238 handle_qmp_return(struct Client
*client
, struct json_object
*data
, bool error
)
241 const char *msg
= "n/a";
242 struct json_object
*desc
;
243 if (json_object_object_get_ex(data
, "desc", &desc
)) {
244 msg
= json_object_get_string(desc
);
246 fprintf(stderr
, "%s: received error from QMP: %s\n",
247 client
->qemu
.vmid
, msg
);
248 client
->state
= STATE_IDLE
;
252 struct json_object
*status
;
253 json_bool has_status
= data
&&
254 json_object_object_get_ex(data
, "status", &status
);
258 const char *status_str
= json_object_get_string(status
);
259 active
= status_str
&&
260 (!strcmp(status_str
, "running") || !strcmp(status_str
, "paused"));
263 switch (client
->state
) {
264 case STATE_EXPECT_STATUS_RESP
:
265 client
->state
= STATE_IDLE
;
267 VERBOSE_PRINT("%s: got status: VM is active\n", client
->qemu
.vmid
);
268 } else if (!client
->qemu
.backup
) {
269 terminate_client(client
);
271 // if we're in a backup, don't do anything, vzdump will notify
272 // us when the backup finishes
273 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
278 // this means we received the empty return from our handshake answer
279 case STATE_HANDSHAKE
:
280 client
->state
= STATE_IDLE
;
281 VERBOSE_PRINT("%s: QMP handshake complete\n", client
->qemu
.vmid
);
285 case STATE_TERMINATING
:
286 VERBOSE_PRINT("%s: spurious return value received\n",
292 if (client
->qemu
.term_check_queued
) {
293 terminate_check(client
);
298 * VZDump specific client functions
302 handle_vzdump_handshake(struct Client
*client
, struct json_object
*data
)
304 client
->state
= STATE_IDLE
;
306 struct json_object
*vmid_obj
;
307 json_bool has_vmid
= data
&& json_object_object_get_ex(data
, "vmid", &vmid_obj
);
310 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client
->pid
);
314 const char *vmid_str
= json_object_get_string(vmid_obj
);
317 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client
->pid
);
321 int res
= snprintf(client
->vzdump
.vmid
, sizeof(client
->vzdump
.vmid
), "%s", vmid_str
);
322 if (res
< 0 || res
>= (int)sizeof(client
->vzdump
.vmid
)) {
323 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client
->pid
);
327 struct Client
*vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
329 vmc
->qemu
.backup
= true;
331 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
332 // might try to access an invalid value
333 client
->type
= CLIENT_VZDUMP
;
334 VERBOSE_PRINT("%s: vzdump backup started\n", client
->vzdump
.vmid
);
336 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client
->vzdump
.vmid
);
341 * client management functions
345 add_new_client(int client_fd
)
347 struct Client
*client
= calloc(sizeof(struct Client
), 1);
348 if (client
== NULL
) {
349 fprintf(stderr
, "could not add new client - allocation failed!\n");
353 client
->state
= STATE_HANDSHAKE
;
354 client
->type
= CLIENT_NONE
;
355 client
->fd
= client_fd
;
356 client
->pid
= get_pid_from_fd(client_fd
);
357 if (client
->pid
== 0) {
358 fprintf(stderr
, "could not get pid from client\n");
362 struct epoll_event ev
;
364 ev
.data
.ptr
= client
;
365 int res
= epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, client_fd
, &ev
);
367 perror("epoll_ctl client add");
371 VERBOSE_PRINT("added new client, pid: %d\n", client
->pid
);
375 (void)close(client_fd
);
380 cleanup_qemu_client(struct Client
*client
)
382 unsigned short graceful
= client
->qemu
.graceful
;
383 unsigned short guest
= client
->qemu
.guest
;
384 char vmid
[sizeof(client
->qemu
.vmid
)];
385 strncpy(vmid
, client
->qemu
.vmid
, sizeof(vmid
));
386 g_hash_table_remove(vm_clients
, &vmid
); // frees key, ignore errors
387 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
388 vmid
, graceful
, guest
);
392 fprintf(stderr
, "fork failed: %s\n", strerror(errno
));
396 char *script
= "/usr/sbin/qm";
402 graceful
? "1" : "0",
407 execvp(script
, args
);
414 cleanup_client(struct Client
*client
)
416 log_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_DEL
, client
->fd
, NULL
), "epoll del");
417 (void)close(client
->fd
);
420 switch (client
->type
) {
422 cleanup_qemu_client(client
);
426 vmc
= (struct Client
*) g_hash_table_lookup(vm_clients
, client
->vzdump
.vmid
);
428 VERBOSE_PRINT("%s: backup ended\n", client
->vzdump
.vmid
);
429 vmc
->qemu
.backup
= false;
430 terminate_check(vmc
);
435 // do nothing, only close socket
443 terminate_client(struct Client
*client
)
445 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client
->qemu
.vmid
, client
->pid
);
447 client
->state
= STATE_TERMINATING
;
449 // open a pidfd before kill for later cleanup
450 int pidfd
= pidfd_open(client
->pid
, 0);
454 // process already dead for some reason, cleanup done
455 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
456 client
->qemu
.vmid
, client
->pid
);
459 // otherwise fall back to just using the PID directly, but don't
460 // print if we only failed because we're running on an older kernel
464 perror("failed to open QEMU pidfd for cleanup");
469 int err
= kill(client
->pid
, SIGTERM
);
470 log_neg(err
, "kill");
472 struct CleanupData
*data_ptr
= malloc(sizeof(struct CleanupData
));
473 struct CleanupData data
= {
478 forced_cleanups
= g_slist_prepend(forced_cleanups
, (void *)data_ptr
);
480 // resets any other alarms, but will fire eventually and cleanup all
485 handle_client(struct Client
*client
)
487 VERBOSE_PRINT("pid%d: entering handle\n", client
->pid
);
490 len
= read(client
->fd
, (client
->buf
+client
->buflen
),
491 sizeof(client
->buf
) - client
->buflen
);
492 } while (len
< 0 && errno
== EINTR
);
495 if (!(errno
== EAGAIN
|| errno
== EWOULDBLOCK
)) {
496 log_neg((int)len
, "read");
497 cleanup_client(client
);
500 } else if (len
== 0) {
501 VERBOSE_PRINT("pid%d: got EOF\n", client
->pid
);
502 cleanup_client(client
);
506 VERBOSE_PRINT("pid%d: read %ld bytes\n", client
->pid
, len
);
507 client
->buflen
+= len
;
509 struct json_tokener
*tok
= json_tokener_new();
510 struct json_object
*jobj
= NULL
;
511 enum json_tokener_error jerr
= json_tokener_success
;
512 while (jerr
== json_tokener_success
&& client
->buflen
!= 0) {
513 jobj
= json_tokener_parse_ex(tok
, client
->buf
, (int)client
->buflen
);
514 jerr
= json_tokener_get_error(tok
);
515 unsigned int offset
= (unsigned int)tok
->char_offset
;
517 case json_tokener_success
:
518 // move rest from buffer to front
519 memmove(client
->buf
, client
->buf
+ offset
, client
->buflen
- offset
);
520 client
->buflen
-= offset
;
521 if (json_object_is_type(jobj
, json_type_object
)) {
522 struct json_object
*obj
;
523 if (json_object_object_get_ex(jobj
, "QMP", &obj
)) {
524 handle_qmp_handshake(client
);
525 } else if (json_object_object_get_ex(jobj
, "event", &obj
)) {
526 handle_qmp_event(client
, jobj
);
527 } else if (json_object_object_get_ex(jobj
, "return", &obj
)) {
528 handle_qmp_return(client
, obj
, false);
529 } else if (json_object_object_get_ex(jobj
, "error", &obj
)) {
530 handle_qmp_return(client
, obj
, true);
531 } else if (json_object_object_get_ex(jobj
, "vzdump", &obj
)) {
532 handle_vzdump_handshake(client
, obj
);
533 } // else ignore message
536 case json_tokener_continue
:
537 if (client
->buflen
>= sizeof(client
->buf
)) {
538 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client
->pid
);
539 memset(client
->buf
, 0, sizeof(client
->buf
));
541 } // else we have enough space try again after next read
544 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client
->pid
, jerr
);
545 memset(client
->buf
, 0, client
->buflen
);
549 json_object_put(jobj
);
551 json_tokener_free(tok
);
556 * SIGALRM and cleanup handling
558 * terminate_client will set an alarm for 5 seconds and add its client's PID to
559 * the forced_cleanups list - when the timer expires, we iterate the list and
560 * attempt to issue SIGKILL to all processes which haven't yet stopped.
564 alarm_handler(__attribute__((unused
)) int signum
)
570 sigkill(void *ptr
, __attribute__((unused
)) void *unused
)
572 struct CleanupData data
= *((struct CleanupData
*)ptr
);
575 if (data
.pidfd
> 0) {
576 err
= pidfd_send_signal(data
.pidfd
, SIGKILL
, NULL
, 0);
577 (void)close(data
.pidfd
);
579 err
= kill(data
.pid
, SIGKILL
);
583 if (errno
!= ESRCH
) {
584 fprintf(stderr
, "SIGKILL cleanup of pid '%d' failed - %s\n",
585 data
.pid
, strerror(errno
));
588 fprintf(stderr
, "cleanup failed, terminating pid '%d' with SIGKILL\n",
594 handle_forced_cleanup()
596 if (alarm_triggered
) {
597 VERBOSE_PRINT("clearing forced cleanup backlog\n");
599 g_slist_foreach(forced_cleanups
, sigkill
, NULL
);
600 g_slist_free_full(forced_cleanups
, free
);
601 forced_cleanups
= NULL
;
607 main(int argc
, char *argv
[])
611 char *socket_path
= NULL
;
614 while ((opt
= getopt(argc
, argv
, "hfv")) != -1) {
632 if (optind
>= argc
) {
637 signal(SIGCHLD
, SIG_IGN
);
638 signal(SIGALRM
, alarm_handler
);
640 socket_path
= argv
[optind
];
642 int sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
643 bail_neg(sock
, "socket");
645 struct sockaddr_un addr
;
646 memset(&addr
, 0, sizeof(addr
));
647 addr
.sun_family
= AF_UNIX
;
648 strncpy(addr
.sun_path
, socket_path
, sizeof(addr
.sun_path
) - 1);
651 bail_neg(bind(sock
, (struct sockaddr
*)&addr
, sizeof(addr
)), "bind");
653 struct epoll_event ev
, events
[1];
654 epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
655 bail_neg(epoll_fd
, "epoll_create1");
659 bail_neg(epoll_ctl(epoll_fd
, EPOLL_CTL_ADD
, sock
, &ev
), "epoll_ctl");
661 bail_neg(listen(sock
, 10), "listen");
664 bail_neg(daemon(0, 1), "daemon");
667 vm_clients
= g_hash_table_new_full(g_str_hash
, g_str_equal
, free
, NULL
);
672 nevents
= epoll_wait(epoll_fd
, events
, 1, -1);
673 if (nevents
< 0 && errno
== EINTR
) {
674 handle_forced_cleanup();
677 bail_neg(nevents
, "epoll_wait");
679 for (int n
= 0; n
< nevents
; n
++) {
680 if (events
[n
].data
.fd
== sock
) {
682 int conn_sock
= accept4(sock
, NULL
, NULL
, SOCK_NONBLOCK
| SOCK_CLOEXEC
);
683 log_neg(conn_sock
, "accept");
684 if (conn_sock
> -1) {
685 add_new_client(conn_sock
);
688 handle_client((struct Client
*)events
[n
].data
.ptr
);
692 handle_forced_cleanup();