]> git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
a843da5f73ce68058dd7019ec7bd12641b6c8656
[qemu-server.git] / qmeventd / qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2 /*
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
4
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
7
8 Description:
9
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
18 (i.e., the "inside").
19 */
20
21 #ifndef _GNU_SOURCE
22 #define _GNU_SOURCE
23 #endif
24
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <gmodule.h>
28 #include <json.h>
29 #include <signal.h>
30 #include <stdbool.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/epoll.h>
35 #include <sys/socket.h>
36 #include <sys/types.h>
37 #include <sys/un.h>
38 #include <sys/wait.h>
39 #include <unistd.h>
40 #include <time.h>
41
42 #include "qmeventd.h"
43
44 #define DEFAULT_KILL_TIMEOUT 60
45
46 static int verbose = 0;
47 static int kill_timeout = DEFAULT_KILL_TIMEOUT;
48 static int epoll_fd = 0;
49 static const char *progname;
50 GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
51 GSList *forced_cleanups;
52 static int needs_cleanup = 0;
53
54 /*
55 * Helper functions
56 */
57
58 static void
59 usage()
60 {
61 fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
62 fprintf(stderr, " -f run in foreground (default: false)\n");
63 fprintf(stderr, " -v verbose (default: false)\n");
64 fprintf(stderr, " -t <s> kill timeout (default: %ds)\n", DEFAULT_KILL_TIMEOUT);
65 fprintf(stderr, " PATH use PATH for socket\n");
66 }
67
68 static pid_t
69 get_pid_from_fd(int fd)
70 {
71 struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
72 socklen_t len = sizeof(struct ucred);
73 log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
74 return credentials.pid;
75 }
76
77 /*
78 * reads the vmid from /proc/<pid>/cmdline
79 * after the '-id' argument
80 */
81 static unsigned long
82 get_vmid_from_pid(pid_t pid)
83 {
84 char filename[32] = { 0 };
85 int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
86 if (len < 0) {
87 fprintf(stderr, "error during snprintf for %d: %s\n", pid,
88 strerror(errno));
89 return 0;
90 }
91 if ((size_t)len >= sizeof(filename)) {
92 fprintf(stderr, "error: pid %d too long\n", pid);
93 return 0;
94 }
95 FILE *fp = fopen(filename, "re");
96 if (fp == NULL) {
97 fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
98 return 0;
99 }
100
101 unsigned long vmid = 0;
102 ssize_t rc = 0;
103 char *buf = NULL;
104 size_t buflen = 0;
105 while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
106 if (!strcmp(buf, "-id")) {
107 break;
108 }
109 }
110
111 if (rc < 0) {
112 goto err;
113 }
114
115 if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
116 if (buf[0] == '-' || buf[0] == '\0') {
117 fprintf(stderr, "invalid vmid %s\n", buf);
118 goto ret;
119 }
120
121 errno = 0;
122 char *endptr = NULL;
123 vmid = strtoul(buf, &endptr, 10);
124 if (errno != 0) {
125 vmid = 0;
126 goto err;
127 } else if (*endptr != '\0') {
128 fprintf(stderr, "invalid vmid %s\n", buf);
129 vmid = 0;
130 }
131
132 goto ret;
133 }
134
135 err:
136 fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
137
138 ret:
139 free(buf);
140 fclose(fp);
141 return vmid;
142 }
143
144 static bool
145 must_write(int fd, const char *buf, size_t len)
146 {
147 ssize_t wlen;
148 do {
149 wlen = write(fd, buf, len);
150 } while (wlen < 0 && errno == EINTR);
151
152 return (wlen == (ssize_t)len);
153 }
154
155 /*
156 * qmp handling functions
157 */
158
159 static void
160 send_qmp_cmd(struct Client *client, const char *buf, size_t len)
161 {
162 if (!must_write(client->fd, buf, len - 1)) {
163 fprintf(stderr, "%s: cannot send QMP message\n", client->qemu.vmid);
164 cleanup_client(client);
165 }
166 }
167
168 void
169 handle_qmp_handshake(struct Client *client)
170 {
171 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client->pid);
172
173 // extract vmid from cmdline, now that we know it's a QEMU process
174 unsigned long vmid = get_vmid_from_pid(client->pid);
175 int res = snprintf(client->qemu.vmid, sizeof(client->qemu.vmid), "%lu", vmid);
176 if (vmid == 0 || res < 0 || res >= (int)sizeof(client->qemu.vmid)) {
177 fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
178 cleanup_client(client);
179 return;
180 }
181
182 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client->pid, client->qemu.vmid);
183 client->type = CLIENT_QEMU;
184 if(!g_hash_table_insert(vm_clients, strdup(client->qemu.vmid), client)) {
185 // not fatal, just means backup handling won't work
186 fprintf(stderr, "%s: could not insert client into VMID->client table\n",
187 client->qemu.vmid);
188 }
189
190 static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
191 send_qmp_cmd(client, qmp_answer, sizeof(qmp_answer));
192 }
193
194 void
195 handle_qmp_event(struct Client *client, struct json_object *obj)
196 {
197 struct json_object *event;
198 if (!json_object_object_get_ex(obj, "event", &event)) {
199 return;
200 }
201 VERBOSE_PRINT("%s: got QMP event: %s\n", client->qemu.vmid, json_object_get_string(event));
202
203 if (client->state == STATE_TERMINATING) {
204 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
205 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client->qemu.vmid);
206 return;
207 }
208
209 // event, check if shutdown and get guest parameter
210 if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
211 client->qemu.graceful = 1;
212 struct json_object *data;
213 struct json_object *guest;
214 if (json_object_object_get_ex(obj, "data", &data) &&
215 json_object_object_get_ex(data, "guest", &guest))
216 {
217 client->qemu.guest = (unsigned short)json_object_get_boolean(guest);
218 }
219
220 // check if a backup is running and kill QEMU process if not
221 terminate_check(client);
222 }
223 }
224
225 void
226 terminate_check(struct Client *client)
227 {
228 if (client->state != STATE_IDLE) {
229 // if we're already in a request, queue this one until after
230 VERBOSE_PRINT("%s: terminate_check queued\n", client->qemu.vmid);
231 client->qemu.term_check_queued = true;
232 return;
233 }
234
235 client->qemu.term_check_queued = false;
236
237 VERBOSE_PRINT("%s: query-status\n", client->qemu.vmid);
238 client->state = STATE_EXPECT_STATUS_RESP;
239 static const char qmp_req[] = "{\"execute\":\"query-status\"}\n";
240 send_qmp_cmd(client, qmp_req, sizeof(qmp_req));
241 }
242
243 void
244 handle_qmp_return(struct Client *client, struct json_object *data, bool error)
245 {
246 if (error) {
247 const char *msg = "n/a";
248 struct json_object *desc;
249 if (json_object_object_get_ex(data, "desc", &desc)) {
250 msg = json_object_get_string(desc);
251 }
252 fprintf(stderr, "%s: received error from QMP: %s\n",
253 client->qemu.vmid, msg);
254 client->state = STATE_IDLE;
255 goto out;
256 }
257
258 struct json_object *status;
259 json_bool has_status = data &&
260 json_object_object_get_ex(data, "status", &status);
261
262 bool active = false;
263 if (has_status) {
264 const char *status_str = json_object_get_string(status);
265 active = status_str &&
266 (!strcmp(status_str, "running") || !strcmp(status_str, "paused"));
267 }
268
269 switch (client->state) {
270 case STATE_EXPECT_STATUS_RESP:
271 client->state = STATE_IDLE;
272 if (active) {
273 VERBOSE_PRINT("%s: got status: VM is active\n", client->qemu.vmid);
274 } else if (!client->qemu.backup) {
275 terminate_client(client);
276 } else {
277 // if we're in a backup, don't do anything, vzdump will notify
278 // us when the backup finishes
279 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
280 client->qemu.vmid);
281 }
282 break;
283
284 // this means we received the empty return from our handshake answer
285 case STATE_HANDSHAKE:
286 client->state = STATE_IDLE;
287 VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
288 break;
289
290 // we expect an empty return object after sending quit
291 case STATE_TERMINATING:
292 break;
293 case STATE_IDLE:
294 VERBOSE_PRINT("%s: spurious return value received\n",
295 client->qemu.vmid);
296 break;
297 }
298
299 out:
300 if (client->qemu.term_check_queued) {
301 terminate_check(client);
302 }
303 }
304
305 /*
306 * VZDump specific client functions
307 */
308
309 void
310 handle_vzdump_handshake(struct Client *client, struct json_object *data)
311 {
312 client->state = STATE_IDLE;
313
314 struct json_object *vmid_obj;
315 json_bool has_vmid = data && json_object_object_get_ex(data, "vmid", &vmid_obj);
316
317 if (!has_vmid) {
318 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client->pid);
319 return;
320 }
321
322 const char *vmid_str = json_object_get_string(vmid_obj);
323
324 if (!vmid_str) {
325 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client->pid);
326 return;
327 }
328
329 int res = snprintf(client->vzdump.vmid, sizeof(client->vzdump.vmid), "%s", vmid_str);
330 if (res < 0 || res >= (int)sizeof(client->vzdump.vmid)) {
331 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client->pid);
332 return;
333 }
334
335 struct Client *vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
336 if (vmc) {
337 vmc->qemu.backup = true;
338
339 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
340 // might try to access an invalid value
341 client->type = CLIENT_VZDUMP;
342 VERBOSE_PRINT("%s: vzdump backup started\n", client->vzdump.vmid);
343 } else {
344 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client->vzdump.vmid);
345 }
346 }
347
348 /*
349 * client management functions
350 */
351
352 void
353 add_new_client(int client_fd)
354 {
355 struct Client *client = calloc(sizeof(struct Client), 1);
356 if (client == NULL) {
357 fprintf(stderr, "could not add new client - allocation failed!\n");
358 fflush(stderr);
359 return;
360 }
361 client->state = STATE_HANDSHAKE;
362 client->type = CLIENT_NONE;
363 client->fd = client_fd;
364 client->pid = get_pid_from_fd(client_fd);
365 if (client->pid == 0) {
366 fprintf(stderr, "could not get pid from client\n");
367 goto err;
368 }
369
370 struct epoll_event ev;
371 ev.events = EPOLLIN;
372 ev.data.ptr = client;
373 int res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
374 if (res < 0) {
375 perror("epoll_ctl client add");
376 goto err;
377 }
378
379 VERBOSE_PRINT("added new client, pid: %d\n", client->pid);
380
381 return;
382 err:
383 (void)close(client_fd);
384 free(client);
385 }
386
387 static void
388 cleanup_qemu_client(struct Client *client)
389 {
390 unsigned short graceful = client->qemu.graceful;
391 unsigned short guest = client->qemu.guest;
392 char vmid[sizeof(client->qemu.vmid)];
393 strncpy(vmid, client->qemu.vmid, sizeof(vmid));
394 g_hash_table_remove(vm_clients, &vmid); // frees key, ignore errors
395 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
396 vmid, graceful, guest);
397
398 int pid = fork();
399 if (pid < 0) {
400 fprintf(stderr, "fork failed: %s\n", strerror(errno));
401 return;
402 }
403 if (pid == 0) {
404 char *script = "/usr/sbin/qm";
405
406 char *args[] = {
407 script,
408 "cleanup",
409 vmid,
410 graceful ? "1" : "0",
411 guest ? "1" : "0",
412 NULL
413 };
414
415 execvp(script, args);
416 perror("execvp");
417 _exit(1);
418 }
419 }
420
421 void
422 cleanup_client(struct Client *client)
423 {
424 log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
425 (void)close(client->fd);
426
427 struct Client *vmc;
428 switch (client->type) {
429 case CLIENT_QEMU:
430 cleanup_qemu_client(client);
431 break;
432
433 case CLIENT_VZDUMP:
434 vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
435 if (vmc) {
436 VERBOSE_PRINT("%s: backup ended\n", client->vzdump.vmid);
437 vmc->qemu.backup = false;
438 terminate_check(vmc);
439 }
440 break;
441
442 case CLIENT_NONE:
443 // do nothing, only close socket
444 break;
445 }
446
447 if (client->pidfd > 0) {
448 (void)close(client->pidfd);
449 }
450 VERBOSE_PRINT("removing %s from forced cleanups\n", client->qemu.vmid);
451 forced_cleanups = g_slist_remove(forced_cleanups, client);
452 free(client);
453 }
454
455 void
456 terminate_client(struct Client *client)
457 {
458 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client->qemu.vmid, client->pid);
459
460 client->state = STATE_TERMINATING;
461
462 // open a pidfd before kill for later cleanup
463 int pidfd = pidfd_open(client->pid, 0);
464 if (pidfd < 0) {
465 switch (errno) {
466 case ESRCH:
467 // process already dead for some reason, cleanup done
468 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
469 client->qemu.vmid, client->pid);
470 return;
471
472 // otherwise fall back to just using the PID directly, but don't
473 // print if we only failed because we're running on an older kernel
474 case ENOSYS:
475 break;
476 default:
477 perror("failed to open QEMU pidfd for cleanup");
478 break;
479 }
480 }
481
482 // try to send a 'quit' command first, fallback to SIGTERM of the pid
483 static const char qmp_quit_command[] = "{\"execute\":\"quit\"}\n";
484 VERBOSE_PRINT("%s: sending 'quit' via QMP\n", client->qemu.vmid);
485 if (!must_write(client->fd, qmp_quit_command, sizeof(qmp_quit_command) - 1)) {
486 VERBOSE_PRINT("%s: sending 'SIGTERM' to pid %d\n", client->qemu.vmid, client->pid);
487 int err = kill(client->pid, SIGTERM);
488 log_neg(err, "kill");
489 }
490
491 time_t timeout = time(NULL) + kill_timeout;
492
493 client->pidfd = pidfd;
494 client->timeout = timeout;
495
496 forced_cleanups = g_slist_prepend(forced_cleanups, (void *)client);
497 needs_cleanup = 1;
498 }
499
500 void
501 handle_client(struct Client *client)
502 {
503 VERBOSE_PRINT("pid%d: entering handle\n", client->pid);
504 ssize_t len;
505 do {
506 len = read(client->fd, (client->buf+client->buflen),
507 sizeof(client->buf) - client->buflen);
508 } while (len < 0 && errno == EINTR);
509
510 if (len < 0) {
511 if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
512 log_neg((int)len, "read");
513 cleanup_client(client);
514 }
515 return;
516 } else if (len == 0) {
517 VERBOSE_PRINT("pid%d: got EOF\n", client->pid);
518 cleanup_client(client);
519 return;
520 }
521
522 VERBOSE_PRINT("pid%d: read %ld bytes\n", client->pid, len);
523 client->buflen += len;
524
525 struct json_tokener *tok = json_tokener_new();
526 struct json_object *jobj = NULL;
527 enum json_tokener_error jerr = json_tokener_success;
528 while (jerr == json_tokener_success && client->buflen != 0) {
529 jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
530 jerr = json_tokener_get_error(tok);
531 unsigned int offset = (unsigned int)tok->char_offset;
532 switch (jerr) {
533 case json_tokener_success:
534 // move rest from buffer to front
535 memmove(client->buf, client->buf + offset, client->buflen - offset);
536 client->buflen -= offset;
537 if (json_object_is_type(jobj, json_type_object)) {
538 struct json_object *obj;
539 if (json_object_object_get_ex(jobj, "QMP", &obj)) {
540 handle_qmp_handshake(client);
541 } else if (json_object_object_get_ex(jobj, "event", &obj)) {
542 handle_qmp_event(client, jobj);
543 } else if (json_object_object_get_ex(jobj, "return", &obj)) {
544 handle_qmp_return(client, obj, false);
545 } else if (json_object_object_get_ex(jobj, "error", &obj)) {
546 handle_qmp_return(client, obj, true);
547 } else if (json_object_object_get_ex(jobj, "vzdump", &obj)) {
548 handle_vzdump_handshake(client, obj);
549 } // else ignore message
550 }
551 break;
552 case json_tokener_continue:
553 if (client->buflen >= sizeof(client->buf)) {
554 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client->pid);
555 memset(client->buf, 0, sizeof(client->buf));
556 client->buflen = 0;
557 } // else we have enough space try again after next read
558 break;
559 default:
560 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client->pid, jerr);
561 memset(client->buf, 0, client->buflen);
562 client->buflen = 0;
563 break;
564 }
565 json_object_put(jobj);
566 }
567 json_tokener_free(tok);
568 }
569
570 static void
571 sigkill(void *ptr, void *time_ptr)
572 {
573 struct Client *data = ptr;
574 int err;
575
576 if (data->timeout != 0 && data->timeout > *(time_t *)time_ptr) {
577 return;
578 }
579
580 if (data->pidfd > 0) {
581 err = pidfd_send_signal(data->pidfd, SIGKILL, NULL, 0);
582 (void)close(data->pidfd);
583 data->pidfd = -1;
584 } else {
585 err = kill(data->pid, SIGKILL);
586 }
587
588 if (err < 0) {
589 if (errno != ESRCH) {
590 fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
591 data->pid, strerror(errno));
592 }
593 } else {
594 fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
595 data->pid);
596 }
597
598 data->timeout = 0;
599
600 // remove ourselves from the list
601 forced_cleanups = g_slist_remove(forced_cleanups, ptr);
602 }
603
604 static void
605 handle_forced_cleanup()
606 {
607 if (g_slist_length(forced_cleanups) > 0) {
608 VERBOSE_PRINT("clearing forced cleanup backlog\n");
609 time_t cur_time = time(NULL);
610 g_slist_foreach(forced_cleanups, sigkill, &cur_time);
611 }
612 needs_cleanup = g_slist_length(forced_cleanups) > 0;
613 }
614
615 int
616 main(int argc, char *argv[])
617 {
618 int opt;
619 int daemonize = 1;
620 char *socket_path = NULL;
621 progname = argv[0];
622
623 while ((opt = getopt(argc, argv, "hfvt:")) != -1) {
624 switch (opt) {
625 case 'f':
626 daemonize = 0;
627 break;
628 case 'v':
629 verbose = 1;
630 break;
631 case 't':
632 errno = 0;
633 char *endptr = NULL;
634 kill_timeout = strtoul(optarg, &endptr, 10);
635 if (errno != 0 || *endptr != '\0' || kill_timeout == 0) {
636 usage();
637 exit(EXIT_FAILURE);
638 }
639 break;
640 case 'h':
641 usage();
642 exit(EXIT_SUCCESS);
643 break;
644 default:
645 usage();
646 exit(EXIT_FAILURE);
647 }
648 }
649
650 if (optind >= argc) {
651 usage();
652 exit(EXIT_FAILURE);
653 }
654
655 signal(SIGCHLD, SIG_IGN);
656
657 socket_path = argv[optind];
658
659 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
660 bail_neg(sock, "socket");
661
662 struct sockaddr_un addr;
663 memset(&addr, 0, sizeof(addr));
664 addr.sun_family = AF_UNIX;
665 strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
666
667 unlink(socket_path);
668 bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
669
670 struct epoll_event ev, events[1];
671 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
672 bail_neg(epoll_fd, "epoll_create1");
673
674 ev.events = EPOLLIN;
675 ev.data.fd = sock;
676 bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
677
678 bail_neg(listen(sock, 10), "listen");
679
680 if (daemonize) {
681 bail_neg(daemon(0, 1), "daemon");
682 }
683
684 vm_clients = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
685
686 int nevents;
687
688 for(;;) {
689 nevents = epoll_wait(epoll_fd, events, 1, needs_cleanup ? 10*1000 : -1);
690 if (nevents < 0 && errno == EINTR) {
691 continue;
692 }
693 bail_neg(nevents, "epoll_wait");
694
695 for (int n = 0; n < nevents; n++) {
696 if (events[n].data.fd == sock) {
697
698 int conn_sock = accept4(sock, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
699 log_neg(conn_sock, "accept");
700 if (conn_sock > -1) {
701 add_new_client(conn_sock);
702 }
703 } else {
704 handle_client((struct Client *)events[n].data.ptr);
705 }
706 }
707 handle_forced_cleanup();
708 }
709 }