]> git.proxmox.com Git - qemu-server.git/blame - qmeventd/qmeventd.c
qmeventd: fix linker flags order
[qemu-server.git] / qmeventd / qmeventd.c
CommitLineData
649dbf42 1// SPDX-License-Identifier: AGPL-3.0-or-later
4c17b2e3 2/*
649dbf42 3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
4c17b2e3
DC
4
5 Author: Dominik Csapak <d.csapak@proxmox.com>
aedf8208 6 Author: Stefan Reiter <s.reiter@proxmox.com>
4c17b2e3 7
aedf8208 8 Description:
4c17b2e3 9
aedf8208
TL
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
18 (i.e., the "inside").
4c17b2e3
DC
19*/
20
21#ifndef _GNU_SOURCE
22#define _GNU_SOURCE
23#endif
24
25#include <errno.h>
26#include <fcntl.h>
aedf8208 27#include <gmodule.h>
4c17b2e3
DC
28#include <json.h>
29#include <signal.h>
30#include <stdbool.h>
31#include <stdio.h>
32#include <string.h>
33#include <sys/epoll.h>
34#include <sys/socket.h>
35#include <sys/types.h>
36#include <sys/un.h>
37#include <sys/wait.h>
38#include <unistd.h>
39
40#include "qmeventd.h"
41
42static int verbose = 0;
43static int epoll_fd = 0;
44static const char *progname;
3ff85001 45GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
4c500f16
SR
46GSList *forced_cleanups;
47volatile sig_atomic_t alarm_triggered = 0;
3ff85001 48
4c17b2e3
DC
49/*
50 * Helper functions
51 */
52
53static void
54usage()
55{
56 fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
57 fprintf(stderr, " -f run in foreground (default: false)\n");
58 fprintf(stderr, " -v verbose (default: false)\n");
59 fprintf(stderr, " PATH use PATH for socket\n");
60}
61
62static pid_t
63get_pid_from_fd(int fd)
64{
65 struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
66 socklen_t len = sizeof(struct ucred);
67 log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
68 return credentials.pid;
69}
70
71/*
72 * reads the vmid from /proc/<pid>/cmdline
73 * after the '-id' argument
74 */
75static unsigned long
76get_vmid_from_pid(pid_t pid)
77{
78 char filename[32] = { 0 };
79 int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
80 if (len < 0) {
81 fprintf(stderr, "error during snprintf for %d: %s\n", pid,
82 strerror(errno));
83 return 0;
84 }
85 if ((size_t)len >= sizeof(filename)) {
86 fprintf(stderr, "error: pid %d too long\n", pid);
87 return 0;
88 }
89 FILE *fp = fopen(filename, "re");
90 if (fp == NULL) {
91 fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
92 return 0;
93 }
94
95 unsigned long vmid = 0;
96 ssize_t rc = 0;
97 char *buf = NULL;
98 size_t buflen = 0;
99 while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
100 if (!strcmp(buf, "-id")) {
101 break;
102 }
103 }
104
105 if (rc < 0) {
106 goto err;
107 }
108
109 if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
110 if (buf[0] == '-' || buf[0] == '\0') {
111 fprintf(stderr, "invalid vmid %s\n", buf);
112 goto ret;
113 }
114
115 errno = 0;
116 char *endptr = NULL;
117 vmid = strtoul(buf, &endptr, 10);
118 if (errno != 0) {
119 vmid = 0;
120 goto err;
121 } else if (*endptr != '\0') {
122 fprintf(stderr, "invalid vmid %s\n", buf);
123 vmid = 0;
124 }
125
126 goto ret;
127 }
128
129err:
130 fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
131
132ret:
133 free(buf);
134 fclose(fp);
135 return vmid;
136}
137
138static bool
139must_write(int fd, const char *buf, size_t len)
140{
141 ssize_t wlen;
142 do {
143 wlen = write(fd, buf, len);
144 } while (wlen < 0 && errno == EINTR);
145
146 return (wlen == (ssize_t)len);
147}
148
149/*
150 * qmp handling functions
151 */
152
3ff85001
SR
153static void
154send_qmp_cmd(struct Client *client, const char *buf, size_t len)
155{
156 if (!must_write(client->fd, buf, len - 1)) {
157 fprintf(stderr, "%s: cannot send QMP message\n", client->qemu.vmid);
158 cleanup_client(client);
159 }
160}
161
4c17b2e3
DC
162void
163handle_qmp_handshake(struct Client *client)
164{
3ff85001
SR
165 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client->pid);
166
167 // extract vmid from cmdline, now that we know it's a QEMU process
168 unsigned long vmid = get_vmid_from_pid(client->pid);
169 int res = snprintf(client->qemu.vmid, sizeof(client->qemu.vmid), "%lu", vmid);
170 if (vmid == 0 || res < 0 || res >= (int)sizeof(client->qemu.vmid)) {
171 fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
4c17b2e3 172 cleanup_client(client);
3ff85001
SR
173 return;
174 }
175
176 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client->pid, client->qemu.vmid);
177 client->type = CLIENT_QEMU;
178 if(!g_hash_table_insert(vm_clients, strdup(client->qemu.vmid), client)) {
179 // not fatal, just means backup handling won't work
180 fprintf(stderr, "%s: could not insert client into VMID->client table\n",
181 client->qemu.vmid);
4c17b2e3 182 }
3ff85001
SR
183
184 static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
185 send_qmp_cmd(client, qmp_answer, sizeof(qmp_answer));
4c17b2e3
DC
186}
187
188void
189handle_qmp_event(struct Client *client, struct json_object *obj)
190{
191 struct json_object *event;
192 if (!json_object_object_get_ex(obj, "event", &event)) {
193 return;
194 }
a2488e4c 195 VERBOSE_PRINT("%s: got QMP event: %s\n", client->qemu.vmid, json_object_get_string(event));
3ff85001
SR
196
197 if (client->state == STATE_TERMINATING) {
198 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
a2488e4c 199 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client->qemu.vmid);
3ff85001
SR
200 return;
201 }
202
4c17b2e3
DC
203 // event, check if shutdown and get guest parameter
204 if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
3ff85001 205 client->qemu.graceful = 1;
4c17b2e3
DC
206 struct json_object *data;
207 struct json_object *guest;
208 if (json_object_object_get_ex(obj, "data", &data) &&
209 json_object_object_get_ex(data, "guest", &guest))
210 {
3ff85001 211 client->qemu.guest = (unsigned short)json_object_get_boolean(guest);
4c17b2e3 212 }
3ff85001
SR
213
214 // check if a backup is running and kill QEMU process if not
215 terminate_check(client);
216 }
217}
218
219void
220terminate_check(struct Client *client)
221{
222 if (client->state != STATE_IDLE) {
223 // if we're already in a request, queue this one until after
224 VERBOSE_PRINT("%s: terminate_check queued\n", client->qemu.vmid);
225 client->qemu.term_check_queued = true;
226 return;
227 }
228
229 client->qemu.term_check_queued = false;
230
231 VERBOSE_PRINT("%s: query-status\n", client->qemu.vmid);
232 client->state = STATE_EXPECT_STATUS_RESP;
233 static const char qmp_req[] = "{\"execute\":\"query-status\"}\n";
234 send_qmp_cmd(client, qmp_req, sizeof(qmp_req));
235}
236
237void
238handle_qmp_return(struct Client *client, struct json_object *data, bool error)
239{
240 if (error) {
241 const char *msg = "n/a";
242 struct json_object *desc;
243 if (json_object_object_get_ex(data, "desc", &desc)) {
244 msg = json_object_get_string(desc);
245 }
246 fprintf(stderr, "%s: received error from QMP: %s\n",
247 client->qemu.vmid, msg);
248 client->state = STATE_IDLE;
249 goto out;
250 }
251
252 struct json_object *status;
253 json_bool has_status = data &&
254 json_object_object_get_ex(data, "status", &status);
255
256 bool active = false;
257 if (has_status) {
258 const char *status_str = json_object_get_string(status);
259 active = status_str &&
260 (!strcmp(status_str, "running") || !strcmp(status_str, "paused"));
261 }
262
263 switch (client->state) {
264 case STATE_EXPECT_STATUS_RESP:
265 client->state = STATE_IDLE;
266 if (active) {
267 VERBOSE_PRINT("%s: got status: VM is active\n", client->qemu.vmid);
268 } else if (!client->qemu.backup) {
269 terminate_client(client);
270 } else {
271 // if we're in a backup, don't do anything, vzdump will notify
272 // us when the backup finishes
273 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
274 client->qemu.vmid);
275 }
276 break;
277
278 // this means we received the empty return from our handshake answer
279 case STATE_HANDSHAKE:
280 client->state = STATE_IDLE;
281 VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
282 break;
283
284 case STATE_IDLE:
285 case STATE_TERMINATING:
286 VERBOSE_PRINT("%s: spurious return value received\n",
287 client->qemu.vmid);
288 break;
289 }
290
291out:
292 if (client->qemu.term_check_queued) {
293 terminate_check(client);
294 }
295}
296
297/*
298 * VZDump specific client functions
299 */
300
301void
302handle_vzdump_handshake(struct Client *client, struct json_object *data)
303{
304 client->state = STATE_IDLE;
305
306 struct json_object *vmid_obj;
307 json_bool has_vmid = data && json_object_object_get_ex(data, "vmid", &vmid_obj);
308
309 if (!has_vmid) {
310 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n",
311 client->pid);
312 return;
313 }
314
315 const char *vmid_str = json_object_get_string(vmid_obj);
316
317 if (!vmid_str) {
318 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n",
319 client->pid);
320 return;
321 }
322
323 int res = snprintf(client->vzdump.vmid, sizeof(client->vzdump.vmid), "%s", vmid_str);
324 if (res < 0 || res >= (int)sizeof(client->vzdump.vmid)) {
325 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n",
326 client->pid);
327 return;
328 }
329
330 struct Client *vmc =
331 (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
332 if (vmc) {
333 vmc->qemu.backup = true;
334
335 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
336 // might try to access an invalid value
337 client->type = CLIENT_VZDUMP;
338 VERBOSE_PRINT("%s: vzdump backup started\n",
339 client->vzdump.vmid);
340 } else {
341 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n",
342 client->vzdump.vmid);
4c17b2e3
DC
343 }
344}
345
346/*
347 * client management functions
348 */
349
350void
351add_new_client(int client_fd)
352{
353 struct Client *client = calloc(sizeof(struct Client), 1);
6d4f89b6
TL
354 if (client == NULL) {
355 fprintf(stderr, "could not add new client - allocation failed!\n");
356 fflush(stderr);
357 return;
358 }
3ff85001
SR
359 client->state = STATE_HANDSHAKE;
360 client->type = CLIENT_NONE;
4c17b2e3
DC
361 client->fd = client_fd;
362 client->pid = get_pid_from_fd(client_fd);
363 if (client->pid == 0) {
364 fprintf(stderr, "could not get pid from client\n");
365 goto err;
366 }
4c17b2e3
DC
367
368 struct epoll_event ev;
369 ev.events = EPOLLIN;
370 ev.data.ptr = client;
3ff85001 371 int res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
4c17b2e3
DC
372 if (res < 0) {
373 perror("epoll_ctl client add");
374 goto err;
375 }
376
3ff85001 377 VERBOSE_PRINT("added new client, pid: %d\n", client->pid);
4c17b2e3
DC
378
379 return;
380err:
381 (void)close(client_fd);
382 free(client);
383}
384
3ff85001
SR
385static void
386cleanup_qemu_client(struct Client *client)
4c17b2e3 387{
3ff85001
SR
388 unsigned short graceful = client->qemu.graceful;
389 unsigned short guest = client->qemu.guest;
390 char vmid[sizeof(client->qemu.vmid)];
391 strncpy(vmid, client->qemu.vmid, sizeof(vmid));
392 g_hash_table_remove(vm_clients, &vmid); // frees key, ignore errors
393 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
394 vmid, graceful, guest);
4c17b2e3
DC
395
396 int pid = fork();
397 if (pid < 0) {
398 fprintf(stderr, "fork failed: %s\n", strerror(errno));
399 return;
400 }
401 if (pid == 0) {
402 char *script = "/usr/sbin/qm";
403
404 char *args[] = {
405 script,
406 "cleanup",
407 vmid,
408 graceful ? "1" : "0",
409 guest ? "1" : "0",
410 NULL
411 };
412
413 execvp(script, args);
414 perror("execvp");
415 _exit(1);
416 }
417}
418
3ff85001
SR
419void
420cleanup_client(struct Client *client)
421{
422 log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
423 (void)close(client->fd);
424
425 struct Client *vmc;
426 switch (client->type) {
427 case CLIENT_QEMU:
428 cleanup_qemu_client(client);
429 break;
430
431 case CLIENT_VZDUMP:
432 vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
433 if (vmc) {
434 VERBOSE_PRINT("%s: backup ended\n", client->vzdump.vmid);
435 vmc->qemu.backup = false;
436 terminate_check(vmc);
437 }
438 break;
439
440 case CLIENT_NONE:
441 // do nothing, only close socket
442 break;
443 }
444
445 free(client);
446}
447
448void
449terminate_client(struct Client *client)
450{
451 VERBOSE_PRINT("%s: terminating client (pid %d)\n",
452 client->qemu.vmid, client->pid);
453
454 client->state = STATE_TERMINATING;
455
4c500f16
SR
456 // open a pidfd before kill for later cleanup
457 int pidfd = pidfd_open(client->pid, 0);
458 if (pidfd < 0) {
459 switch (errno) {
460 case ESRCH:
461 // process already dead for some reason, cleanup done
462 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
463 client->qemu.vmid, client->pid);
464 return;
465
466 // otherwise fall back to just using the PID directly, but don't
467 // print if we only failed because we're running on an older kernel
468 case ENOSYS:
469 break;
470 default:
471 perror("failed to open QEMU pidfd for cleanup");
472 break;
473 }
474 }
475
3ff85001
SR
476 int err = kill(client->pid, SIGTERM);
477 log_neg(err, "kill");
4c500f16
SR
478
479 struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
480 struct CleanupData data = {
481 .pid = client->pid,
482 .pidfd = pidfd
483 };
484 *data_ptr = data;
485 forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
486
487 // resets any other alarms, but will fire eventually and cleanup all
488 alarm(5);
3ff85001
SR
489}
490
4c17b2e3
DC
491void
492handle_client(struct Client *client)
493{
3ff85001 494 VERBOSE_PRINT("pid%d: entering handle\n", client->pid);
4c17b2e3
DC
495 ssize_t len;
496 do {
497 len = read(client->fd, (client->buf+client->buflen),
498 sizeof(client->buf) - client->buflen);
499 } while (len < 0 && errno == EINTR);
500
501 if (len < 0) {
502 if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
503 log_neg((int)len, "read");
504 cleanup_client(client);
505 }
506 return;
507 } else if (len == 0) {
3ff85001 508 VERBOSE_PRINT("pid%d: got EOF\n", client->pid);
4c17b2e3
DC
509 cleanup_client(client);
510 return;
511 }
512
3ff85001 513 VERBOSE_PRINT("pid%d: read %ld bytes\n", client->pid, len);
4c17b2e3
DC
514 client->buflen += len;
515
516 struct json_tokener *tok = json_tokener_new();
517 struct json_object *jobj = NULL;
518 enum json_tokener_error jerr = json_tokener_success;
519 while (jerr == json_tokener_success && client->buflen != 0) {
520 jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
521 jerr = json_tokener_get_error(tok);
522 unsigned int offset = (unsigned int)tok->char_offset;
523 switch (jerr) {
524 case json_tokener_success:
525 // move rest from buffer to front
526 memmove(client->buf, client->buf + offset, client->buflen - offset);
527 client->buflen -= offset;
528 if (json_object_is_type(jobj, json_type_object)) {
529 struct json_object *obj;
530 if (json_object_object_get_ex(jobj, "QMP", &obj)) {
531 handle_qmp_handshake(client);
532 } else if (json_object_object_get_ex(jobj, "event", &obj)) {
533 handle_qmp_event(client, jobj);
3ff85001
SR
534 } else if (json_object_object_get_ex(jobj, "return", &obj)) {
535 handle_qmp_return(client, obj, false);
536 } else if (json_object_object_get_ex(jobj, "error", &obj)) {
537 handle_qmp_return(client, obj, true);
538 } else if (json_object_object_get_ex(jobj, "vzdump", &obj)) {
539 handle_vzdump_handshake(client, obj);
4c17b2e3
DC
540 } // else ignore message
541 }
542 break;
543 case json_tokener_continue:
544 if (client->buflen >= sizeof(client->buf)) {
a2488e4c 545 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client->pid);
4c17b2e3
DC
546 memset(client->buf, 0, sizeof(client->buf));
547 client->buflen = 0;
548 } // else we have enough space try again after next read
549 break;
550 default:
a2488e4c 551 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client->pid, jerr);
4c17b2e3
DC
552 memset(client->buf, 0, client->buflen);
553 client->buflen = 0;
554 break;
555 }
556 json_object_put(jobj);
557 }
558 json_tokener_free(tok);
559}
560
561
4c500f16
SR
562/*
563 * SIGALRM and cleanup handling
564 *
565 * terminate_client will set an alarm for 5 seconds and add its client's PID to
566 * the forced_cleanups list - when the timer expires, we iterate the list and
567 * attempt to issue SIGKILL to all processes which haven't yet stopped.
568 */
569
570static void
571alarm_handler(__attribute__((unused)) int signum)
572{
573 alarm_triggered = 1;
574}
575
576static void
577sigkill(void *ptr, __attribute__((unused)) void *unused)
578{
579 struct CleanupData data = *((struct CleanupData *)ptr);
580 int err;
581
582 if (data.pidfd > 0) {
583 err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
0a279963 584 (void)close(data.pidfd);
4c500f16
SR
585 } else {
586 err = kill(data.pid, SIGKILL);
587 }
588
589 if (err < 0) {
590 if (errno != ESRCH) {
591 fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
592 data.pid, strerror(errno));
593 }
594 } else {
595 fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
596 data.pid);
597 }
598}
599
600static void
601handle_forced_cleanup()
602{
603 if (alarm_triggered) {
0a279963 604 VERBOSE_PRINT("clearing forced cleanup backlog\n");
4c500f16
SR
605 alarm_triggered = 0;
606 g_slist_foreach(forced_cleanups, sigkill, NULL);
607 g_slist_free_full(forced_cleanups, free);
608 forced_cleanups = NULL;
609 }
610}
611
612
4c17b2e3
DC
613int
614main(int argc, char *argv[])
615{
616 int opt;
617 int daemonize = 1;
618 char *socket_path = NULL;
619 progname = argv[0];
620
621 while ((opt = getopt(argc, argv, "hfv")) != -1) {
622 switch (opt) {
623 case 'f':
624 daemonize = 0;
625 break;
626 case 'v':
627 verbose = 1;
628 break;
629 case 'h':
630 usage();
631 exit(EXIT_SUCCESS);
632 break;
633 default:
634 usage();
635 exit(EXIT_FAILURE);
636 }
637 }
638
639 if (optind >= argc) {
640 usage();
641 exit(EXIT_FAILURE);
642 }
643
644 signal(SIGCHLD, SIG_IGN);
4c500f16 645 signal(SIGALRM, alarm_handler);
4c17b2e3
DC
646
647 socket_path = argv[optind];
648
649 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
650 bail_neg(sock, "socket");
651
652 struct sockaddr_un addr;
653 memset(&addr, 0, sizeof(addr));
654 addr.sun_family = AF_UNIX;
655 strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
656
657 unlink(socket_path);
658 bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
659
660 struct epoll_event ev, events[1];
661 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
662 bail_neg(epoll_fd, "epoll_create1");
663
664 ev.events = EPOLLIN;
665 ev.data.fd = sock;
666 bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
667
668 bail_neg(listen(sock, 10), "listen");
669
670 if (daemonize) {
671 bail_neg(daemon(0, 1), "daemon");
672 }
673
3ff85001
SR
674 vm_clients = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
675
4c17b2e3
DC
676 int nevents;
677
678 for(;;) {
679 nevents = epoll_wait(epoll_fd, events, 1, -1);
680 if (nevents < 0 && errno == EINTR) {
4c500f16 681 handle_forced_cleanup();
4c17b2e3
DC
682 continue;
683 }
684 bail_neg(nevents, "epoll_wait");
685
686 for (int n = 0; n < nevents; n++) {
687 if (events[n].data.fd == sock) {
688
689 int conn_sock = accept4(sock, NULL, NULL,
690 SOCK_NONBLOCK | SOCK_CLOEXEC);
691 log_neg(conn_sock, "accept");
692 if (conn_sock > -1) {
693 add_new_client(conn_sock);
694 }
695 } else {
696 handle_client((struct Client *)events[n].data.ptr);
697 }
698 }
4c500f16
SR
699
700 handle_forced_cleanup();
4c17b2e3
DC
701 }
702}