]> git.proxmox.com Git - qemu-server.git/blame - qmeventd/qmeventd.c
qmeventd: rework description, mention s.reiter as author
[qemu-server.git] / qmeventd / qmeventd.c
CommitLineData
649dbf42 1// SPDX-License-Identifier: AGPL-3.0-or-later
4c17b2e3 2/*
649dbf42 3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
4c17b2e3
DC
4
5 Author: Dominik Csapak <d.csapak@proxmox.com>
aedf8208 6 Author: Stefan Reiter <s.reiter@proxmox.com>
4c17b2e3 7
aedf8208 8 Description:
4c17b2e3 9
aedf8208
TL
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
18 (i.e., the "inside").
4c17b2e3
DC
19*/
20
21#ifndef _GNU_SOURCE
22#define _GNU_SOURCE
23#endif
24
25#include <errno.h>
26#include <fcntl.h>
aedf8208 27#include <gmodule.h>
4c17b2e3
DC
28#include <json.h>
29#include <signal.h>
30#include <stdbool.h>
31#include <stdio.h>
32#include <string.h>
33#include <sys/epoll.h>
34#include <sys/socket.h>
35#include <sys/types.h>
36#include <sys/un.h>
37#include <sys/wait.h>
38#include <unistd.h>
39
40#include "qmeventd.h"
41
42static int verbose = 0;
43static int epoll_fd = 0;
44static const char *progname;
3ff85001 45GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
4c500f16
SR
46GSList *forced_cleanups;
47volatile sig_atomic_t alarm_triggered = 0;
3ff85001 48
4c17b2e3
DC
49/*
50 * Helper functions
51 */
52
53static void
54usage()
55{
56 fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
57 fprintf(stderr, " -f run in foreground (default: false)\n");
58 fprintf(stderr, " -v verbose (default: false)\n");
59 fprintf(stderr, " PATH use PATH for socket\n");
60}
61
62static pid_t
63get_pid_from_fd(int fd)
64{
65 struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
66 socklen_t len = sizeof(struct ucred);
67 log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
68 return credentials.pid;
69}
70
71/*
72 * reads the vmid from /proc/<pid>/cmdline
73 * after the '-id' argument
74 */
75static unsigned long
76get_vmid_from_pid(pid_t pid)
77{
78 char filename[32] = { 0 };
79 int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
80 if (len < 0) {
81 fprintf(stderr, "error during snprintf for %d: %s\n", pid,
82 strerror(errno));
83 return 0;
84 }
85 if ((size_t)len >= sizeof(filename)) {
86 fprintf(stderr, "error: pid %d too long\n", pid);
87 return 0;
88 }
89 FILE *fp = fopen(filename, "re");
90 if (fp == NULL) {
91 fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
92 return 0;
93 }
94
95 unsigned long vmid = 0;
96 ssize_t rc = 0;
97 char *buf = NULL;
98 size_t buflen = 0;
99 while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
100 if (!strcmp(buf, "-id")) {
101 break;
102 }
103 }
104
105 if (rc < 0) {
106 goto err;
107 }
108
109 if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
110 if (buf[0] == '-' || buf[0] == '\0') {
111 fprintf(stderr, "invalid vmid %s\n", buf);
112 goto ret;
113 }
114
115 errno = 0;
116 char *endptr = NULL;
117 vmid = strtoul(buf, &endptr, 10);
118 if (errno != 0) {
119 vmid = 0;
120 goto err;
121 } else if (*endptr != '\0') {
122 fprintf(stderr, "invalid vmid %s\n", buf);
123 vmid = 0;
124 }
125
126 goto ret;
127 }
128
129err:
130 fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
131
132ret:
133 free(buf);
134 fclose(fp);
135 return vmid;
136}
137
138static bool
139must_write(int fd, const char *buf, size_t len)
140{
141 ssize_t wlen;
142 do {
143 wlen = write(fd, buf, len);
144 } while (wlen < 0 && errno == EINTR);
145
146 return (wlen == (ssize_t)len);
147}
148
149/*
150 * qmp handling functions
151 */
152
3ff85001
SR
153static void
154send_qmp_cmd(struct Client *client, const char *buf, size_t len)
155{
156 if (!must_write(client->fd, buf, len - 1)) {
157 fprintf(stderr, "%s: cannot send QMP message\n", client->qemu.vmid);
158 cleanup_client(client);
159 }
160}
161
4c17b2e3
DC
162void
163handle_qmp_handshake(struct Client *client)
164{
3ff85001
SR
165 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client->pid);
166
167 // extract vmid from cmdline, now that we know it's a QEMU process
168 unsigned long vmid = get_vmid_from_pid(client->pid);
169 int res = snprintf(client->qemu.vmid, sizeof(client->qemu.vmid), "%lu", vmid);
170 if (vmid == 0 || res < 0 || res >= (int)sizeof(client->qemu.vmid)) {
171 fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
4c17b2e3 172 cleanup_client(client);
3ff85001
SR
173 return;
174 }
175
176 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client->pid, client->qemu.vmid);
177 client->type = CLIENT_QEMU;
178 if(!g_hash_table_insert(vm_clients, strdup(client->qemu.vmid), client)) {
179 // not fatal, just means backup handling won't work
180 fprintf(stderr, "%s: could not insert client into VMID->client table\n",
181 client->qemu.vmid);
4c17b2e3 182 }
3ff85001
SR
183
184 static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
185 send_qmp_cmd(client, qmp_answer, sizeof(qmp_answer));
4c17b2e3
DC
186}
187
188void
189handle_qmp_event(struct Client *client, struct json_object *obj)
190{
191 struct json_object *event;
192 if (!json_object_object_get_ex(obj, "event", &event)) {
193 return;
194 }
3ff85001 195 VERBOSE_PRINT("%s: got QMP event: %s\n", client->qemu.vmid,
4c17b2e3 196 json_object_get_string(event));
3ff85001
SR
197
198 if (client->state == STATE_TERMINATING) {
199 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
200 VERBOSE_PRINT("%s: event was after termination, ignoring\n",
201 client->qemu.vmid);
202 return;
203 }
204
4c17b2e3
DC
205 // event, check if shutdown and get guest parameter
206 if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
3ff85001 207 client->qemu.graceful = 1;
4c17b2e3
DC
208 struct json_object *data;
209 struct json_object *guest;
210 if (json_object_object_get_ex(obj, "data", &data) &&
211 json_object_object_get_ex(data, "guest", &guest))
212 {
3ff85001 213 client->qemu.guest = (unsigned short)json_object_get_boolean(guest);
4c17b2e3 214 }
3ff85001
SR
215
216 // check if a backup is running and kill QEMU process if not
217 terminate_check(client);
218 }
219}
220
221void
222terminate_check(struct Client *client)
223{
224 if (client->state != STATE_IDLE) {
225 // if we're already in a request, queue this one until after
226 VERBOSE_PRINT("%s: terminate_check queued\n", client->qemu.vmid);
227 client->qemu.term_check_queued = true;
228 return;
229 }
230
231 client->qemu.term_check_queued = false;
232
233 VERBOSE_PRINT("%s: query-status\n", client->qemu.vmid);
234 client->state = STATE_EXPECT_STATUS_RESP;
235 static const char qmp_req[] = "{\"execute\":\"query-status\"}\n";
236 send_qmp_cmd(client, qmp_req, sizeof(qmp_req));
237}
238
239void
240handle_qmp_return(struct Client *client, struct json_object *data, bool error)
241{
242 if (error) {
243 const char *msg = "n/a";
244 struct json_object *desc;
245 if (json_object_object_get_ex(data, "desc", &desc)) {
246 msg = json_object_get_string(desc);
247 }
248 fprintf(stderr, "%s: received error from QMP: %s\n",
249 client->qemu.vmid, msg);
250 client->state = STATE_IDLE;
251 goto out;
252 }
253
254 struct json_object *status;
255 json_bool has_status = data &&
256 json_object_object_get_ex(data, "status", &status);
257
258 bool active = false;
259 if (has_status) {
260 const char *status_str = json_object_get_string(status);
261 active = status_str &&
262 (!strcmp(status_str, "running") || !strcmp(status_str, "paused"));
263 }
264
265 switch (client->state) {
266 case STATE_EXPECT_STATUS_RESP:
267 client->state = STATE_IDLE;
268 if (active) {
269 VERBOSE_PRINT("%s: got status: VM is active\n", client->qemu.vmid);
270 } else if (!client->qemu.backup) {
271 terminate_client(client);
272 } else {
273 // if we're in a backup, don't do anything, vzdump will notify
274 // us when the backup finishes
275 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
276 client->qemu.vmid);
277 }
278 break;
279
280 // this means we received the empty return from our handshake answer
281 case STATE_HANDSHAKE:
282 client->state = STATE_IDLE;
283 VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
284 break;
285
286 case STATE_IDLE:
287 case STATE_TERMINATING:
288 VERBOSE_PRINT("%s: spurious return value received\n",
289 client->qemu.vmid);
290 break;
291 }
292
293out:
294 if (client->qemu.term_check_queued) {
295 terminate_check(client);
296 }
297}
298
299/*
300 * VZDump specific client functions
301 */
302
303void
304handle_vzdump_handshake(struct Client *client, struct json_object *data)
305{
306 client->state = STATE_IDLE;
307
308 struct json_object *vmid_obj;
309 json_bool has_vmid = data && json_object_object_get_ex(data, "vmid", &vmid_obj);
310
311 if (!has_vmid) {
312 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n",
313 client->pid);
314 return;
315 }
316
317 const char *vmid_str = json_object_get_string(vmid_obj);
318
319 if (!vmid_str) {
320 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n",
321 client->pid);
322 return;
323 }
324
325 int res = snprintf(client->vzdump.vmid, sizeof(client->vzdump.vmid), "%s", vmid_str);
326 if (res < 0 || res >= (int)sizeof(client->vzdump.vmid)) {
327 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n",
328 client->pid);
329 return;
330 }
331
332 struct Client *vmc =
333 (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
334 if (vmc) {
335 vmc->qemu.backup = true;
336
337 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
338 // might try to access an invalid value
339 client->type = CLIENT_VZDUMP;
340 VERBOSE_PRINT("%s: vzdump backup started\n",
341 client->vzdump.vmid);
342 } else {
343 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n",
344 client->vzdump.vmid);
4c17b2e3
DC
345 }
346}
347
348/*
349 * client management functions
350 */
351
352void
353add_new_client(int client_fd)
354{
355 struct Client *client = calloc(sizeof(struct Client), 1);
3ff85001
SR
356 client->state = STATE_HANDSHAKE;
357 client->type = CLIENT_NONE;
4c17b2e3
DC
358 client->fd = client_fd;
359 client->pid = get_pid_from_fd(client_fd);
360 if (client->pid == 0) {
361 fprintf(stderr, "could not get pid from client\n");
362 goto err;
363 }
4c17b2e3
DC
364
365 struct epoll_event ev;
366 ev.events = EPOLLIN;
367 ev.data.ptr = client;
3ff85001 368 int res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
4c17b2e3
DC
369 if (res < 0) {
370 perror("epoll_ctl client add");
371 goto err;
372 }
373
3ff85001 374 VERBOSE_PRINT("added new client, pid: %d\n", client->pid);
4c17b2e3
DC
375
376 return;
377err:
378 (void)close(client_fd);
379 free(client);
380}
381
3ff85001
SR
382static void
383cleanup_qemu_client(struct Client *client)
4c17b2e3 384{
3ff85001
SR
385 unsigned short graceful = client->qemu.graceful;
386 unsigned short guest = client->qemu.guest;
387 char vmid[sizeof(client->qemu.vmid)];
388 strncpy(vmid, client->qemu.vmid, sizeof(vmid));
389 g_hash_table_remove(vm_clients, &vmid); // frees key, ignore errors
390 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
391 vmid, graceful, guest);
4c17b2e3
DC
392
393 int pid = fork();
394 if (pid < 0) {
395 fprintf(stderr, "fork failed: %s\n", strerror(errno));
396 return;
397 }
398 if (pid == 0) {
399 char *script = "/usr/sbin/qm";
400
401 char *args[] = {
402 script,
403 "cleanup",
404 vmid,
405 graceful ? "1" : "0",
406 guest ? "1" : "0",
407 NULL
408 };
409
410 execvp(script, args);
411 perror("execvp");
412 _exit(1);
413 }
414}
415
3ff85001
SR
416void
417cleanup_client(struct Client *client)
418{
419 log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
420 (void)close(client->fd);
421
422 struct Client *vmc;
423 switch (client->type) {
424 case CLIENT_QEMU:
425 cleanup_qemu_client(client);
426 break;
427
428 case CLIENT_VZDUMP:
429 vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
430 if (vmc) {
431 VERBOSE_PRINT("%s: backup ended\n", client->vzdump.vmid);
432 vmc->qemu.backup = false;
433 terminate_check(vmc);
434 }
435 break;
436
437 case CLIENT_NONE:
438 // do nothing, only close socket
439 break;
440 }
441
442 free(client);
443}
444
445void
446terminate_client(struct Client *client)
447{
448 VERBOSE_PRINT("%s: terminating client (pid %d)\n",
449 client->qemu.vmid, client->pid);
450
451 client->state = STATE_TERMINATING;
452
4c500f16
SR
453 // open a pidfd before kill for later cleanup
454 int pidfd = pidfd_open(client->pid, 0);
455 if (pidfd < 0) {
456 switch (errno) {
457 case ESRCH:
458 // process already dead for some reason, cleanup done
459 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
460 client->qemu.vmid, client->pid);
461 return;
462
463 // otherwise fall back to just using the PID directly, but don't
464 // print if we only failed because we're running on an older kernel
465 case ENOSYS:
466 break;
467 default:
468 perror("failed to open QEMU pidfd for cleanup");
469 break;
470 }
471 }
472
3ff85001
SR
473 int err = kill(client->pid, SIGTERM);
474 log_neg(err, "kill");
4c500f16
SR
475
476 struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
477 struct CleanupData data = {
478 .pid = client->pid,
479 .pidfd = pidfd
480 };
481 *data_ptr = data;
482 forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
483
484 // resets any other alarms, but will fire eventually and cleanup all
485 alarm(5);
3ff85001
SR
486}
487
4c17b2e3
DC
488void
489handle_client(struct Client *client)
490{
3ff85001 491 VERBOSE_PRINT("pid%d: entering handle\n", client->pid);
4c17b2e3
DC
492 ssize_t len;
493 do {
494 len = read(client->fd, (client->buf+client->buflen),
495 sizeof(client->buf) - client->buflen);
496 } while (len < 0 && errno == EINTR);
497
498 if (len < 0) {
499 if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
500 log_neg((int)len, "read");
501 cleanup_client(client);
502 }
503 return;
504 } else if (len == 0) {
3ff85001 505 VERBOSE_PRINT("pid%d: got EOF\n", client->pid);
4c17b2e3
DC
506 cleanup_client(client);
507 return;
508 }
509
3ff85001 510 VERBOSE_PRINT("pid%d: read %ld bytes\n", client->pid, len);
4c17b2e3
DC
511 client->buflen += len;
512
513 struct json_tokener *tok = json_tokener_new();
514 struct json_object *jobj = NULL;
515 enum json_tokener_error jerr = json_tokener_success;
516 while (jerr == json_tokener_success && client->buflen != 0) {
517 jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
518 jerr = json_tokener_get_error(tok);
519 unsigned int offset = (unsigned int)tok->char_offset;
520 switch (jerr) {
521 case json_tokener_success:
522 // move rest from buffer to front
523 memmove(client->buf, client->buf + offset, client->buflen - offset);
524 client->buflen -= offset;
525 if (json_object_is_type(jobj, json_type_object)) {
526 struct json_object *obj;
527 if (json_object_object_get_ex(jobj, "QMP", &obj)) {
528 handle_qmp_handshake(client);
529 } else if (json_object_object_get_ex(jobj, "event", &obj)) {
530 handle_qmp_event(client, jobj);
3ff85001
SR
531 } else if (json_object_object_get_ex(jobj, "return", &obj)) {
532 handle_qmp_return(client, obj, false);
533 } else if (json_object_object_get_ex(jobj, "error", &obj)) {
534 handle_qmp_return(client, obj, true);
535 } else if (json_object_object_get_ex(jobj, "vzdump", &obj)) {
536 handle_vzdump_handshake(client, obj);
4c17b2e3
DC
537 } // else ignore message
538 }
539 break;
540 case json_tokener_continue:
541 if (client->buflen >= sizeof(client->buf)) {
3ff85001
SR
542 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n",
543 client->pid);
4c17b2e3
DC
544 memset(client->buf, 0, sizeof(client->buf));
545 client->buflen = 0;
546 } // else we have enough space try again after next read
547 break;
548 default:
3ff85001
SR
549 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n",
550 client->pid, jerr);
4c17b2e3
DC
551 memset(client->buf, 0, client->buflen);
552 client->buflen = 0;
553 break;
554 }
555 json_object_put(jobj);
556 }
557 json_tokener_free(tok);
558}
559
560
4c500f16
SR
561/*
562 * SIGALRM and cleanup handling
563 *
564 * terminate_client will set an alarm for 5 seconds and add its client's PID to
565 * the forced_cleanups list - when the timer expires, we iterate the list and
566 * attempt to issue SIGKILL to all processes which haven't yet stopped.
567 */
568
569static void
570alarm_handler(__attribute__((unused)) int signum)
571{
572 alarm_triggered = 1;
573}
574
575static void
576sigkill(void *ptr, __attribute__((unused)) void *unused)
577{
578 struct CleanupData data = *((struct CleanupData *)ptr);
579 int err;
580
581 if (data.pidfd > 0) {
582 err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
0a279963 583 (void)close(data.pidfd);
4c500f16
SR
584 } else {
585 err = kill(data.pid, SIGKILL);
586 }
587
588 if (err < 0) {
589 if (errno != ESRCH) {
590 fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
591 data.pid, strerror(errno));
592 }
593 } else {
594 fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
595 data.pid);
596 }
597}
598
599static void
600handle_forced_cleanup()
601{
602 if (alarm_triggered) {
0a279963 603 VERBOSE_PRINT("clearing forced cleanup backlog\n");
4c500f16
SR
604 alarm_triggered = 0;
605 g_slist_foreach(forced_cleanups, sigkill, NULL);
606 g_slist_free_full(forced_cleanups, free);
607 forced_cleanups = NULL;
608 }
609}
610
611
4c17b2e3
DC
612int
613main(int argc, char *argv[])
614{
615 int opt;
616 int daemonize = 1;
617 char *socket_path = NULL;
618 progname = argv[0];
619
620 while ((opt = getopt(argc, argv, "hfv")) != -1) {
621 switch (opt) {
622 case 'f':
623 daemonize = 0;
624 break;
625 case 'v':
626 verbose = 1;
627 break;
628 case 'h':
629 usage();
630 exit(EXIT_SUCCESS);
631 break;
632 default:
633 usage();
634 exit(EXIT_FAILURE);
635 }
636 }
637
638 if (optind >= argc) {
639 usage();
640 exit(EXIT_FAILURE);
641 }
642
643 signal(SIGCHLD, SIG_IGN);
4c500f16 644 signal(SIGALRM, alarm_handler);
4c17b2e3
DC
645
646 socket_path = argv[optind];
647
648 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
649 bail_neg(sock, "socket");
650
651 struct sockaddr_un addr;
652 memset(&addr, 0, sizeof(addr));
653 addr.sun_family = AF_UNIX;
654 strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
655
656 unlink(socket_path);
657 bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
658
659 struct epoll_event ev, events[1];
660 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
661 bail_neg(epoll_fd, "epoll_create1");
662
663 ev.events = EPOLLIN;
664 ev.data.fd = sock;
665 bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
666
667 bail_neg(listen(sock, 10), "listen");
668
669 if (daemonize) {
670 bail_neg(daemon(0, 1), "daemon");
671 }
672
3ff85001
SR
673 vm_clients = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
674
4c17b2e3
DC
675 int nevents;
676
677 for(;;) {
678 nevents = epoll_wait(epoll_fd, events, 1, -1);
679 if (nevents < 0 && errno == EINTR) {
4c500f16 680 handle_forced_cleanup();
4c17b2e3
DC
681 continue;
682 }
683 bail_neg(nevents, "epoll_wait");
684
685 for (int n = 0; n < nevents; n++) {
686 if (events[n].data.fd == sock) {
687
688 int conn_sock = accept4(sock, NULL, NULL,
689 SOCK_NONBLOCK | SOCK_CLOEXEC);
690 log_neg(conn_sock, "accept");
691 if (conn_sock > -1) {
692 add_new_client(conn_sock);
693 }
694 } else {
695 handle_client((struct Client *)events[n].data.ptr);
696 }
697 }
4c500f16
SR
698
699 handle_forced_cleanup();
4c17b2e3
DC
700 }
701}