]> git.proxmox.com Git - qemu-server.git/blob - qmeventd/qmeventd.c
api: create_vm: check serial and usb permissions
[qemu-server.git] / qmeventd / qmeventd.c
1 // SPDX-License-Identifier: AGPL-3.0-or-later
2 /*
3 Copyright (C) 2018 - 2021 Proxmox Server Solutions GmbH
4
5 Author: Dominik Csapak <d.csapak@proxmox.com>
6 Author: Stefan Reiter <s.reiter@proxmox.com>
7
8 Description:
9
10 qmeventd listens on a given socket, and waits for qemu processes to
11 connect. After accepting a connection qmeventd waits for shutdown events
12 followed by the closing of the socket. Once that happens `qm cleanup` will
13 be executed with following three arguments:
14 VMID <graceful> <guest>
15 Where `graceful` can be `1` or `0` depending if shutdown event was observed
16 before the socket got closed. The second parameter `guest` is also boolean
17 `1` or `0` depending if the shutdown was requested from the guest OS
18 (i.e., the "inside").
19 */
20
21 #ifndef _GNU_SOURCE
22 #define _GNU_SOURCE
23 #endif
24
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <gmodule.h>
28 #include <json.h>
29 #include <signal.h>
30 #include <stdbool.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <sys/epoll.h>
34 #include <sys/socket.h>
35 #include <sys/types.h>
36 #include <sys/un.h>
37 #include <sys/wait.h>
38 #include <unistd.h>
39
40 #include "qmeventd.h"
41
42 static int verbose = 0;
43 static int epoll_fd = 0;
44 static const char *progname;
45 GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
46 GSList *forced_cleanups;
47 volatile sig_atomic_t alarm_triggered = 0;
48
49 /*
50 * Helper functions
51 */
52
53 static void
54 usage()
55 {
56 fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
57 fprintf(stderr, " -f run in foreground (default: false)\n");
58 fprintf(stderr, " -v verbose (default: false)\n");
59 fprintf(stderr, " PATH use PATH for socket\n");
60 }
61
62 static pid_t
63 get_pid_from_fd(int fd)
64 {
65 struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
66 socklen_t len = sizeof(struct ucred);
67 log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
68 return credentials.pid;
69 }
70
71 /*
72 * reads the vmid from /proc/<pid>/cmdline
73 * after the '-id' argument
74 */
75 static unsigned long
76 get_vmid_from_pid(pid_t pid)
77 {
78 char filename[32] = { 0 };
79 int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
80 if (len < 0) {
81 fprintf(stderr, "error during snprintf for %d: %s\n", pid,
82 strerror(errno));
83 return 0;
84 }
85 if ((size_t)len >= sizeof(filename)) {
86 fprintf(stderr, "error: pid %d too long\n", pid);
87 return 0;
88 }
89 FILE *fp = fopen(filename, "re");
90 if (fp == NULL) {
91 fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
92 return 0;
93 }
94
95 unsigned long vmid = 0;
96 ssize_t rc = 0;
97 char *buf = NULL;
98 size_t buflen = 0;
99 while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
100 if (!strcmp(buf, "-id")) {
101 break;
102 }
103 }
104
105 if (rc < 0) {
106 goto err;
107 }
108
109 if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
110 if (buf[0] == '-' || buf[0] == '\0') {
111 fprintf(stderr, "invalid vmid %s\n", buf);
112 goto ret;
113 }
114
115 errno = 0;
116 char *endptr = NULL;
117 vmid = strtoul(buf, &endptr, 10);
118 if (errno != 0) {
119 vmid = 0;
120 goto err;
121 } else if (*endptr != '\0') {
122 fprintf(stderr, "invalid vmid %s\n", buf);
123 vmid = 0;
124 }
125
126 goto ret;
127 }
128
129 err:
130 fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
131
132 ret:
133 free(buf);
134 fclose(fp);
135 return vmid;
136 }
137
138 static bool
139 must_write(int fd, const char *buf, size_t len)
140 {
141 ssize_t wlen;
142 do {
143 wlen = write(fd, buf, len);
144 } while (wlen < 0 && errno == EINTR);
145
146 return (wlen == (ssize_t)len);
147 }
148
149 /*
150 * qmp handling functions
151 */
152
153 static void
154 send_qmp_cmd(struct Client *client, const char *buf, size_t len)
155 {
156 if (!must_write(client->fd, buf, len - 1)) {
157 fprintf(stderr, "%s: cannot send QMP message\n", client->qemu.vmid);
158 cleanup_client(client);
159 }
160 }
161
162 void
163 handle_qmp_handshake(struct Client *client)
164 {
165 VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client->pid);
166
167 // extract vmid from cmdline, now that we know it's a QEMU process
168 unsigned long vmid = get_vmid_from_pid(client->pid);
169 int res = snprintf(client->qemu.vmid, sizeof(client->qemu.vmid), "%lu", vmid);
170 if (vmid == 0 || res < 0 || res >= (int)sizeof(client->qemu.vmid)) {
171 fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
172 cleanup_client(client);
173 return;
174 }
175
176 VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client->pid, client->qemu.vmid);
177 client->type = CLIENT_QEMU;
178 if(!g_hash_table_insert(vm_clients, strdup(client->qemu.vmid), client)) {
179 // not fatal, just means backup handling won't work
180 fprintf(stderr, "%s: could not insert client into VMID->client table\n",
181 client->qemu.vmid);
182 }
183
184 static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
185 send_qmp_cmd(client, qmp_answer, sizeof(qmp_answer));
186 }
187
188 void
189 handle_qmp_event(struct Client *client, struct json_object *obj)
190 {
191 struct json_object *event;
192 if (!json_object_object_get_ex(obj, "event", &event)) {
193 return;
194 }
195 VERBOSE_PRINT("%s: got QMP event: %s\n", client->qemu.vmid, json_object_get_string(event));
196
197 if (client->state == STATE_TERMINATING) {
198 // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
199 VERBOSE_PRINT("%s: event was after termination, ignoring\n", client->qemu.vmid);
200 return;
201 }
202
203 // event, check if shutdown and get guest parameter
204 if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
205 client->qemu.graceful = 1;
206 struct json_object *data;
207 struct json_object *guest;
208 if (json_object_object_get_ex(obj, "data", &data) &&
209 json_object_object_get_ex(data, "guest", &guest))
210 {
211 client->qemu.guest = (unsigned short)json_object_get_boolean(guest);
212 }
213
214 // check if a backup is running and kill QEMU process if not
215 terminate_check(client);
216 }
217 }
218
219 void
220 terminate_check(struct Client *client)
221 {
222 if (client->state != STATE_IDLE) {
223 // if we're already in a request, queue this one until after
224 VERBOSE_PRINT("%s: terminate_check queued\n", client->qemu.vmid);
225 client->qemu.term_check_queued = true;
226 return;
227 }
228
229 client->qemu.term_check_queued = false;
230
231 VERBOSE_PRINT("%s: query-status\n", client->qemu.vmid);
232 client->state = STATE_EXPECT_STATUS_RESP;
233 static const char qmp_req[] = "{\"execute\":\"query-status\"}\n";
234 send_qmp_cmd(client, qmp_req, sizeof(qmp_req));
235 }
236
237 void
238 handle_qmp_return(struct Client *client, struct json_object *data, bool error)
239 {
240 if (error) {
241 const char *msg = "n/a";
242 struct json_object *desc;
243 if (json_object_object_get_ex(data, "desc", &desc)) {
244 msg = json_object_get_string(desc);
245 }
246 fprintf(stderr, "%s: received error from QMP: %s\n",
247 client->qemu.vmid, msg);
248 client->state = STATE_IDLE;
249 goto out;
250 }
251
252 struct json_object *status;
253 json_bool has_status = data &&
254 json_object_object_get_ex(data, "status", &status);
255
256 bool active = false;
257 if (has_status) {
258 const char *status_str = json_object_get_string(status);
259 active = status_str &&
260 (!strcmp(status_str, "running") || !strcmp(status_str, "paused"));
261 }
262
263 switch (client->state) {
264 case STATE_EXPECT_STATUS_RESP:
265 client->state = STATE_IDLE;
266 if (active) {
267 VERBOSE_PRINT("%s: got status: VM is active\n", client->qemu.vmid);
268 } else if (!client->qemu.backup) {
269 terminate_client(client);
270 } else {
271 // if we're in a backup, don't do anything, vzdump will notify
272 // us when the backup finishes
273 VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
274 client->qemu.vmid);
275 }
276 break;
277
278 // this means we received the empty return from our handshake answer
279 case STATE_HANDSHAKE:
280 client->state = STATE_IDLE;
281 VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
282 break;
283
284 case STATE_IDLE:
285 case STATE_TERMINATING:
286 VERBOSE_PRINT("%s: spurious return value received\n",
287 client->qemu.vmid);
288 break;
289 }
290
291 out:
292 if (client->qemu.term_check_queued) {
293 terminate_check(client);
294 }
295 }
296
297 /*
298 * VZDump specific client functions
299 */
300
301 void
302 handle_vzdump_handshake(struct Client *client, struct json_object *data)
303 {
304 client->state = STATE_IDLE;
305
306 struct json_object *vmid_obj;
307 json_bool has_vmid = data && json_object_object_get_ex(data, "vmid", &vmid_obj);
308
309 if (!has_vmid) {
310 VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n", client->pid);
311 return;
312 }
313
314 const char *vmid_str = json_object_get_string(vmid_obj);
315
316 if (!vmid_str) {
317 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n", client->pid);
318 return;
319 }
320
321 int res = snprintf(client->vzdump.vmid, sizeof(client->vzdump.vmid), "%s", vmid_str);
322 if (res < 0 || res >= (int)sizeof(client->vzdump.vmid)) {
323 VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n", client->pid);
324 return;
325 }
326
327 struct Client *vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
328 if (vmc) {
329 vmc->qemu.backup = true;
330
331 // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
332 // might try to access an invalid value
333 client->type = CLIENT_VZDUMP;
334 VERBOSE_PRINT("%s: vzdump backup started\n", client->vzdump.vmid);
335 } else {
336 VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n", client->vzdump.vmid);
337 }
338 }
339
340 /*
341 * client management functions
342 */
343
344 void
345 add_new_client(int client_fd)
346 {
347 struct Client *client = calloc(sizeof(struct Client), 1);
348 if (client == NULL) {
349 fprintf(stderr, "could not add new client - allocation failed!\n");
350 fflush(stderr);
351 return;
352 }
353 client->state = STATE_HANDSHAKE;
354 client->type = CLIENT_NONE;
355 client->fd = client_fd;
356 client->pid = get_pid_from_fd(client_fd);
357 if (client->pid == 0) {
358 fprintf(stderr, "could not get pid from client\n");
359 goto err;
360 }
361
362 struct epoll_event ev;
363 ev.events = EPOLLIN;
364 ev.data.ptr = client;
365 int res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
366 if (res < 0) {
367 perror("epoll_ctl client add");
368 goto err;
369 }
370
371 VERBOSE_PRINT("added new client, pid: %d\n", client->pid);
372
373 return;
374 err:
375 (void)close(client_fd);
376 free(client);
377 }
378
379 static void
380 cleanup_qemu_client(struct Client *client)
381 {
382 unsigned short graceful = client->qemu.graceful;
383 unsigned short guest = client->qemu.guest;
384 char vmid[sizeof(client->qemu.vmid)];
385 strncpy(vmid, client->qemu.vmid, sizeof(vmid));
386 g_hash_table_remove(vm_clients, &vmid); // frees key, ignore errors
387 VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
388 vmid, graceful, guest);
389
390 int pid = fork();
391 if (pid < 0) {
392 fprintf(stderr, "fork failed: %s\n", strerror(errno));
393 return;
394 }
395 if (pid == 0) {
396 char *script = "/usr/sbin/qm";
397
398 char *args[] = {
399 script,
400 "cleanup",
401 vmid,
402 graceful ? "1" : "0",
403 guest ? "1" : "0",
404 NULL
405 };
406
407 execvp(script, args);
408 perror("execvp");
409 _exit(1);
410 }
411 }
412
413 void
414 cleanup_client(struct Client *client)
415 {
416 log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
417 (void)close(client->fd);
418
419 struct Client *vmc;
420 switch (client->type) {
421 case CLIENT_QEMU:
422 cleanup_qemu_client(client);
423 break;
424
425 case CLIENT_VZDUMP:
426 vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
427 if (vmc) {
428 VERBOSE_PRINT("%s: backup ended\n", client->vzdump.vmid);
429 vmc->qemu.backup = false;
430 terminate_check(vmc);
431 }
432 break;
433
434 case CLIENT_NONE:
435 // do nothing, only close socket
436 break;
437 }
438
439 free(client);
440 }
441
442 void
443 terminate_client(struct Client *client)
444 {
445 VERBOSE_PRINT("%s: terminating client (pid %d)\n", client->qemu.vmid, client->pid);
446
447 client->state = STATE_TERMINATING;
448
449 // open a pidfd before kill for later cleanup
450 int pidfd = pidfd_open(client->pid, 0);
451 if (pidfd < 0) {
452 switch (errno) {
453 case ESRCH:
454 // process already dead for some reason, cleanup done
455 VERBOSE_PRINT("%s: failed to open pidfd, process already dead (pid %d)\n",
456 client->qemu.vmid, client->pid);
457 return;
458
459 // otherwise fall back to just using the PID directly, but don't
460 // print if we only failed because we're running on an older kernel
461 case ENOSYS:
462 break;
463 default:
464 perror("failed to open QEMU pidfd for cleanup");
465 break;
466 }
467 }
468
469 int err = kill(client->pid, SIGTERM);
470 log_neg(err, "kill");
471
472 struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
473 struct CleanupData data = {
474 .pid = client->pid,
475 .pidfd = pidfd
476 };
477 *data_ptr = data;
478 forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
479
480 // resets any other alarms, but will fire eventually and cleanup all
481 alarm(5);
482 }
483
484 void
485 handle_client(struct Client *client)
486 {
487 VERBOSE_PRINT("pid%d: entering handle\n", client->pid);
488 ssize_t len;
489 do {
490 len = read(client->fd, (client->buf+client->buflen),
491 sizeof(client->buf) - client->buflen);
492 } while (len < 0 && errno == EINTR);
493
494 if (len < 0) {
495 if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
496 log_neg((int)len, "read");
497 cleanup_client(client);
498 }
499 return;
500 } else if (len == 0) {
501 VERBOSE_PRINT("pid%d: got EOF\n", client->pid);
502 cleanup_client(client);
503 return;
504 }
505
506 VERBOSE_PRINT("pid%d: read %ld bytes\n", client->pid, len);
507 client->buflen += len;
508
509 struct json_tokener *tok = json_tokener_new();
510 struct json_object *jobj = NULL;
511 enum json_tokener_error jerr = json_tokener_success;
512 while (jerr == json_tokener_success && client->buflen != 0) {
513 jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
514 jerr = json_tokener_get_error(tok);
515 unsigned int offset = (unsigned int)tok->char_offset;
516 switch (jerr) {
517 case json_tokener_success:
518 // move rest from buffer to front
519 memmove(client->buf, client->buf + offset, client->buflen - offset);
520 client->buflen -= offset;
521 if (json_object_is_type(jobj, json_type_object)) {
522 struct json_object *obj;
523 if (json_object_object_get_ex(jobj, "QMP", &obj)) {
524 handle_qmp_handshake(client);
525 } else if (json_object_object_get_ex(jobj, "event", &obj)) {
526 handle_qmp_event(client, jobj);
527 } else if (json_object_object_get_ex(jobj, "return", &obj)) {
528 handle_qmp_return(client, obj, false);
529 } else if (json_object_object_get_ex(jobj, "error", &obj)) {
530 handle_qmp_return(client, obj, true);
531 } else if (json_object_object_get_ex(jobj, "vzdump", &obj)) {
532 handle_vzdump_handshake(client, obj);
533 } // else ignore message
534 }
535 break;
536 case json_tokener_continue:
537 if (client->buflen >= sizeof(client->buf)) {
538 VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n", client->pid);
539 memset(client->buf, 0, sizeof(client->buf));
540 client->buflen = 0;
541 } // else we have enough space try again after next read
542 break;
543 default:
544 VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n", client->pid, jerr);
545 memset(client->buf, 0, client->buflen);
546 client->buflen = 0;
547 break;
548 }
549 json_object_put(jobj);
550 }
551 json_tokener_free(tok);
552 }
553
554
555 /*
556 * SIGALRM and cleanup handling
557 *
558 * terminate_client will set an alarm for 5 seconds and add its client's PID to
559 * the forced_cleanups list - when the timer expires, we iterate the list and
560 * attempt to issue SIGKILL to all processes which haven't yet stopped.
561 */
562
563 static void
564 alarm_handler(__attribute__((unused)) int signum)
565 {
566 alarm_triggered = 1;
567 }
568
569 static void
570 sigkill(void *ptr, __attribute__((unused)) void *unused)
571 {
572 struct CleanupData data = *((struct CleanupData *)ptr);
573 int err;
574
575 if (data.pidfd > 0) {
576 err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
577 (void)close(data.pidfd);
578 } else {
579 err = kill(data.pid, SIGKILL);
580 }
581
582 if (err < 0) {
583 if (errno != ESRCH) {
584 fprintf(stderr, "SIGKILL cleanup of pid '%d' failed - %s\n",
585 data.pid, strerror(errno));
586 }
587 } else {
588 fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
589 data.pid);
590 }
591 }
592
593 static void
594 handle_forced_cleanup()
595 {
596 if (alarm_triggered) {
597 VERBOSE_PRINT("clearing forced cleanup backlog\n");
598 alarm_triggered = 0;
599 g_slist_foreach(forced_cleanups, sigkill, NULL);
600 g_slist_free_full(forced_cleanups, free);
601 forced_cleanups = NULL;
602 }
603 }
604
605
606 int
607 main(int argc, char *argv[])
608 {
609 int opt;
610 int daemonize = 1;
611 char *socket_path = NULL;
612 progname = argv[0];
613
614 while ((opt = getopt(argc, argv, "hfv")) != -1) {
615 switch (opt) {
616 case 'f':
617 daemonize = 0;
618 break;
619 case 'v':
620 verbose = 1;
621 break;
622 case 'h':
623 usage();
624 exit(EXIT_SUCCESS);
625 break;
626 default:
627 usage();
628 exit(EXIT_FAILURE);
629 }
630 }
631
632 if (optind >= argc) {
633 usage();
634 exit(EXIT_FAILURE);
635 }
636
637 signal(SIGCHLD, SIG_IGN);
638 signal(SIGALRM, alarm_handler);
639
640 socket_path = argv[optind];
641
642 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
643 bail_neg(sock, "socket");
644
645 struct sockaddr_un addr;
646 memset(&addr, 0, sizeof(addr));
647 addr.sun_family = AF_UNIX;
648 strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
649
650 unlink(socket_path);
651 bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
652
653 struct epoll_event ev, events[1];
654 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
655 bail_neg(epoll_fd, "epoll_create1");
656
657 ev.events = EPOLLIN;
658 ev.data.fd = sock;
659 bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
660
661 bail_neg(listen(sock, 10), "listen");
662
663 if (daemonize) {
664 bail_neg(daemon(0, 1), "daemon");
665 }
666
667 vm_clients = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
668
669 int nevents;
670
671 for(;;) {
672 nevents = epoll_wait(epoll_fd, events, 1, -1);
673 if (nevents < 0 && errno == EINTR) {
674 handle_forced_cleanup();
675 continue;
676 }
677 bail_neg(nevents, "epoll_wait");
678
679 for (int n = 0; n < nevents; n++) {
680 if (events[n].data.fd == sock) {
681
682 int conn_sock = accept4(sock, NULL, NULL, SOCK_NONBLOCK | SOCK_CLOEXEC);
683 log_neg(conn_sock, "accept");
684 if (conn_sock > -1) {
685 add_new_client(conn_sock);
686 }
687 } else {
688 handle_client((struct Client *)events[n].data.ptr);
689 }
690 }
691
692 handle_forced_cleanup();
693 }
694 }