]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
fix #2429: allow to specify cloud-init vendor snippet via cicustom
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94 };
95
96 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98 # Note about locking: we use flock on the config file protect against concurent actions.
99 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101 # But you can ignore this kind of lock with the --skiplock flag.
102
103 cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112 });
113
114 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120 });
121
122
123 sub map_storage {
124 my ($map, $source) = @_;
125
126 return $source if !defined($map);
127
128 return $map->{entries}->{$source}
129 if $map->{entries} && defined($map->{entries}->{$source});
130
131 return $map->{default} if $map->{default};
132
133 # identity (fallback)
134 return $source;
135 }
136
137 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
138 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
139 type => 'string',
140 format => 'storagepair-list',
141 optional => 1,
142 });
143
144 #no warnings 'redefine';
145
146 my $nodename_cache;
147 sub nodename {
148 $nodename_cache //= PVE::INotify::nodename();
149 return $nodename_cache;
150 }
151
152 my $watchdog_fmt = {
153 model => {
154 default_key => 1,
155 type => 'string',
156 enum => [qw(i6300esb ib700)],
157 description => "Watchdog type to emulate.",
158 default => 'i6300esb',
159 optional => 1,
160 },
161 action => {
162 type => 'string',
163 enum => [qw(reset shutdown poweroff pause debug none)],
164 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
165 optional => 1,
166 },
167 };
168 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
169
170 my $agent_fmt = {
171 enabled => {
172 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
173 type => 'boolean',
174 default => 0,
175 default_key => 1,
176 },
177 fstrim_cloned_disks => {
178 description => "Run fstrim after moving a disk or migrating the VM.",
179 type => 'boolean',
180 optional => 1,
181 default => 0
182 },
183 type => {
184 description => "Select the agent type",
185 type => 'string',
186 default => 'virtio',
187 optional => 1,
188 enum => [qw(virtio isa)],
189 },
190 };
191
192 my $vga_fmt = {
193 type => {
194 description => "Select the VGA type.",
195 type => 'string',
196 default => 'std',
197 optional => 1,
198 default_key => 1,
199 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
200 },
201 memory => {
202 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
203 type => 'integer',
204 optional => 1,
205 minimum => 4,
206 maximum => 512,
207 },
208 };
209
210 my $ivshmem_fmt = {
211 size => {
212 type => 'integer',
213 minimum => 1,
214 description => "The size of the file in MB.",
215 },
216 name => {
217 type => 'string',
218 pattern => '[a-zA-Z0-9\-]+',
219 optional => 1,
220 format_description => 'string',
221 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
222 },
223 };
224
225 my $audio_fmt = {
226 device => {
227 type => 'string',
228 enum => [qw(ich9-intel-hda intel-hda AC97)],
229 description => "Configure an audio device."
230 },
231 driver => {
232 type => 'string',
233 enum => ['spice', 'none'],
234 default => 'spice',
235 optional => 1,
236 description => "Driver backend for the audio device."
237 },
238 };
239
240 my $spice_enhancements_fmt = {
241 foldersharing => {
242 type => 'boolean',
243 optional => 1,
244 default => '0',
245 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
246 },
247 videostreaming => {
248 type => 'string',
249 enum => ['off', 'all', 'filter'],
250 default => 'off',
251 optional => 1,
252 description => "Enable video streaming. Uses compression for detected video streams."
253 },
254 };
255
256 my $rng_fmt = {
257 source => {
258 type => 'string',
259 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
260 default_key => 1,
261 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
262 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
263 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
264 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
265 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
266 ." a hardware RNG from the host.",
267 },
268 max_bytes => {
269 type => 'integer',
270 description => "Maximum bytes of entropy allowed to get injected into the guest every"
271 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
272 ." `0` to disable limiting (potentially dangerous!).",
273 optional => 1,
274
275 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
276 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
277 # reading from /dev/urandom
278 default => 1024,
279 },
280 period => {
281 type => 'integer',
282 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
283 ." the guest to retrieve another 'max_bytes' of entropy.",
284 optional => 1,
285 default => 1000,
286 },
287 };
288
289 my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400 Specify guest operating system. This is used to enable special
401 optimization/features for specific operating systems:
402
403 [horizontal]
404 other;; unspecified OS
405 wxp;; Microsoft Windows XP
406 w2k;; Microsoft Windows 2000
407 w2k3;; Microsoft Windows 2003
408 w2k8;; Microsoft Windows 2008
409 wvista;; Microsoft Windows Vista
410 win7;; Microsoft Windows 7
411 win8;; Microsoft Windows 8/2012/2012r2
412 win10;; Microsoft Windows 10/2016/2019
413 win11;; Microsoft Windows 11/2022
414 l24;; Linux 2.4 Kernel
415 l26;; Linux 2.6 - 5.X Kernel
416 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417 EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554 Arbitrary arguments passed to kvm, for example:
555
556 args: -no-reboot -no-hpet
557
558 NOTE: this option is for experts only.
559 EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 };
711
712 my $cicustom_fmt = {
713 meta => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all meta data passed to the VM via"
717 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 network => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all network data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 user => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all user data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 vendor => {
738 type => 'string',
739 optional => 1,
740 description => 'Specify a custom file containing all vendor data passed to the VM via'
741 .' cloud-init.',
742 format => 'pve-volume-id',
743 format_description => 'volume',
744 },
745 };
746 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
747
748 my $confdesc_cloudinit = {
749 citype => {
750 optional => 1,
751 type => 'string',
752 description => 'Specifies the cloud-init configuration format. The default depends on the'
753 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
754 .' and `configdrive2` for windows.',
755 enum => ['configdrive2', 'nocloud', 'opennebula'],
756 },
757 ciuser => {
758 optional => 1,
759 type => 'string',
760 description => "cloud-init: User name to change ssh keys and password for instead of the"
761 ." image's configured default user.",
762 },
763 cipassword => {
764 optional => 1,
765 type => 'string',
766 description => 'cloud-init: Password to assign the user. Using this is generally not'
767 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
768 .' support hashed passwords.',
769 },
770 cicustom => {
771 optional => 1,
772 type => 'string',
773 description => 'cloud-init: Specify custom files to replace the automatically generated'
774 .' ones at start.',
775 format => 'pve-qm-cicustom',
776 },
777 searchdomain => {
778 optional => 1,
779 type => 'string',
780 description => "cloud-init: Sets DNS search domains for a container. Create will'
781 .' automatically use the setting from the host if neither searchdomain nor nameserver'
782 .' are set.",
783 },
784 nameserver => {
785 optional => 1,
786 type => 'string', format => 'address-list',
787 description => "cloud-init: Sets DNS server IP address for a container. Create will'
788 .' automatically use the setting from the host if neither searchdomain nor nameserver'
789 .' are set.",
790 },
791 sshkeys => {
792 optional => 1,
793 type => 'string',
794 format => 'urlencoded',
795 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
796 },
797 };
798
799 # what about other qemu settings ?
800 #cpu => 'string',
801 #machine => 'string',
802 #fda => 'file',
803 #fdb => 'file',
804 #mtdblock => 'file',
805 #sd => 'file',
806 #pflash => 'file',
807 #snapshot => 'bool',
808 #bootp => 'file',
809 ##tftp => 'dir',
810 ##smb => 'dir',
811 #kernel => 'file',
812 #append => 'string',
813 #initrd => 'file',
814 ##soundhw => 'string',
815
816 while (my ($k, $v) = each %$confdesc) {
817 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
818 }
819
820 my $MAX_USB_DEVICES = 5;
821 my $MAX_NETS = 32;
822 my $MAX_SERIAL_PORTS = 4;
823 my $MAX_PARALLEL_PORTS = 3;
824 my $MAX_NUMA = 8;
825
826 my $numa_fmt = {
827 cpus => {
828 type => "string",
829 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
830 description => "CPUs accessing this NUMA node.",
831 format_description => "id[-id];...",
832 },
833 memory => {
834 type => "number",
835 description => "Amount of memory this NUMA node provides.",
836 optional => 1,
837 },
838 hostnodes => {
839 type => "string",
840 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
841 description => "Host NUMA nodes to use.",
842 format_description => "id[-id];...",
843 optional => 1,
844 },
845 policy => {
846 type => 'string',
847 enum => [qw(preferred bind interleave)],
848 description => "NUMA allocation policy.",
849 optional => 1,
850 },
851 };
852 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
853 my $numadesc = {
854 optional => 1,
855 type => 'string', format => $numa_fmt,
856 description => "NUMA topology.",
857 };
858 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
859
860 for (my $i = 0; $i < $MAX_NUMA; $i++) {
861 $confdesc->{"numa$i"} = $numadesc;
862 }
863
864 my $nic_model_list = [
865 'e1000',
866 'e1000-82540em',
867 'e1000-82544gc',
868 'e1000-82545em',
869 'e1000e',
870 'i82551',
871 'i82557b',
872 'i82559er',
873 'ne2k_isa',
874 'ne2k_pci',
875 'pcnet',
876 'rtl8139',
877 'virtio',
878 'vmxnet3',
879 ];
880 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
881
882 my $net_fmt_bridge_descr = <<__EOD__;
883 Bridge to attach the network device to. The Proxmox VE standard bridge
884 is called 'vmbr0'.
885
886 If you do not specify a bridge, we create a kvm user (NATed) network
887 device, which provides DHCP and DNS services. The following addresses
888 are used:
889
890 10.0.2.2 Gateway
891 10.0.2.3 DNS Server
892 10.0.2.4 SMB Server
893
894 The DHCP server assign addresses to the guest starting from 10.0.2.15.
895 __EOD__
896
897 my $net_fmt = {
898 macaddr => get_standard_option('mac-addr', {
899 description => "MAC address. That address must be unique withing your network. This is"
900 ." automatically generated if not specified.",
901 }),
902 model => {
903 type => 'string',
904 description => "Network Card Model. The 'virtio' model provides the best performance with"
905 ." very low CPU overhead. If your guest does not support this driver, it is usually"
906 ." best to use 'e1000'.",
907 enum => $nic_model_list,
908 default_key => 1,
909 },
910 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
911 bridge => {
912 type => 'string',
913 description => $net_fmt_bridge_descr,
914 format_description => 'bridge',
915 pattern => '[-_.\w\d]+',
916 optional => 1,
917 },
918 queues => {
919 type => 'integer',
920 minimum => 0, maximum => 16,
921 description => 'Number of packet queues to be used on the device.',
922 optional => 1,
923 },
924 rate => {
925 type => 'number',
926 minimum => 0,
927 description => "Rate limit in mbps (megabytes per second) as floating point number.",
928 optional => 1,
929 },
930 tag => {
931 type => 'integer',
932 minimum => 1, maximum => 4094,
933 description => 'VLAN tag to apply to packets on this interface.',
934 optional => 1,
935 },
936 trunks => {
937 type => 'string',
938 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
939 description => 'VLAN trunks to pass through this interface.',
940 format_description => 'vlanid[;vlanid...]',
941 optional => 1,
942 },
943 firewall => {
944 type => 'boolean',
945 description => 'Whether this interface should be protected by the firewall.',
946 optional => 1,
947 },
948 link_down => {
949 type => 'boolean',
950 description => 'Whether this interface should be disconnected (like pulling the plug).',
951 optional => 1,
952 },
953 mtu => {
954 type => 'integer',
955 minimum => 1, maximum => 65520,
956 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
957 optional => 1,
958 },
959 };
960
961 my $netdesc = {
962 optional => 1,
963 type => 'string', format => $net_fmt,
964 description => "Specify network devices.",
965 };
966
967 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
968
969 my $ipconfig_fmt = {
970 ip => {
971 type => 'string',
972 format => 'pve-ipv4-config',
973 format_description => 'IPv4Format/CIDR',
974 description => 'IPv4 address in CIDR format.',
975 optional => 1,
976 default => 'dhcp',
977 },
978 gw => {
979 type => 'string',
980 format => 'ipv4',
981 format_description => 'GatewayIPv4',
982 description => 'Default gateway for IPv4 traffic.',
983 optional => 1,
984 requires => 'ip',
985 },
986 ip6 => {
987 type => 'string',
988 format => 'pve-ipv6-config',
989 format_description => 'IPv6Format/CIDR',
990 description => 'IPv6 address in CIDR format.',
991 optional => 1,
992 default => 'dhcp',
993 },
994 gw6 => {
995 type => 'string',
996 format => 'ipv6',
997 format_description => 'GatewayIPv6',
998 description => 'Default gateway for IPv6 traffic.',
999 optional => 1,
1000 requires => 'ip6',
1001 },
1002 };
1003 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1004 my $ipconfigdesc = {
1005 optional => 1,
1006 type => 'string', format => 'pve-qm-ipconfig',
1007 description => <<'EODESCR',
1008 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1009
1010 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1011
1012 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1013 gateway should be provided.
1014 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1015 cloud-init 19.4 or newer.
1016
1017 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1018 dhcp on IPv4.
1019 EODESCR
1020 };
1021 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1022
1023 for (my $i = 0; $i < $MAX_NETS; $i++) {
1024 $confdesc->{"net$i"} = $netdesc;
1025 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1026 }
1027
1028 foreach my $key (keys %$confdesc_cloudinit) {
1029 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1030 }
1031
1032 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1033 sub verify_volume_id_or_qm_path {
1034 my ($volid, $noerr) = @_;
1035
1036 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1037 return $volid;
1038 }
1039
1040 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1041 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1042 if ($@) {
1043 return if $noerr;
1044 die $@;
1045 }
1046 return $volid;
1047 }
1048
1049 my $usb_fmt = {
1050 host => {
1051 default_key => 1,
1052 type => 'string', format => 'pve-qm-usb-device',
1053 format_description => 'HOSTUSBDEVICE|spice',
1054 description => <<EODESCR,
1055 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1056
1057 'bus-port(.port)*' (decimal numbers) or
1058 'vendor_id:product_id' (hexadeciaml numbers) or
1059 'spice'
1060
1061 You can use the 'lsusb -t' command to list existing usb devices.
1062
1063 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1064 machines - use with special care.
1065
1066 The value 'spice' can be used to add a usb redirection devices for spice.
1067 EODESCR
1068 },
1069 usb3 => {
1070 optional => 1,
1071 type => 'boolean',
1072 description => "Specifies whether if given host option is a USB3 device or port.",
1073 default => 0,
1074 },
1075 };
1076
1077 my $usbdesc = {
1078 optional => 1,
1079 type => 'string', format => $usb_fmt,
1080 description => "Configure an USB device (n is 0 to 4).",
1081 };
1082 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1083
1084 my $serialdesc = {
1085 optional => 1,
1086 type => 'string',
1087 pattern => '(/dev/.+|socket)',
1088 description => "Create a serial device inside the VM (n is 0 to 3)",
1089 verbose_description => <<EODESCR,
1090 Create a serial device inside the VM (n is 0 to 3), and pass through a
1091 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1092 host side (use 'qm terminal' to open a terminal connection).
1093
1094 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1095 use with special care.
1096
1097 CAUTION: Experimental! User reported problems with this option.
1098 EODESCR
1099 };
1100
1101 my $paralleldesc= {
1102 optional => 1,
1103 type => 'string',
1104 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1105 description => "Map host parallel devices (n is 0 to 2).",
1106 verbose_description => <<EODESCR,
1107 Map host parallel devices (n is 0 to 2).
1108
1109 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1110 machines - use with special care.
1111
1112 CAUTION: Experimental! User reported problems with this option.
1113 EODESCR
1114 };
1115
1116 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1117 $confdesc->{"parallel$i"} = $paralleldesc;
1118 }
1119
1120 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1121 $confdesc->{"serial$i"} = $serialdesc;
1122 }
1123
1124 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1125 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1126 }
1127
1128 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1129 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1130 }
1131
1132 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1133 $confdesc->{"usb$i"} = $usbdesc;
1134 }
1135
1136 my $boot_fmt = {
1137 legacy => {
1138 optional => 1,
1139 default_key => 1,
1140 type => 'string',
1141 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1142 . " Deprecated, use 'order=' instead.",
1143 pattern => '[acdn]{1,4}',
1144 format_description => "[acdn]{1,4}",
1145
1146 # note: this is also the fallback if boot: is not given at all
1147 default => 'cdn',
1148 },
1149 order => {
1150 optional => 1,
1151 type => 'string',
1152 format => 'pve-qm-bootdev-list',
1153 format_description => "device[;device...]",
1154 description => <<EODESC,
1155 The guest will attempt to boot from devices in the order they appear here.
1156
1157 Disks, optical drives and passed-through storage USB devices will be directly
1158 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1159 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1160
1161 Note that only devices in this list will be marked as bootable and thus loaded
1162 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1163 (e.g. software-raid), you need to specify all of them here.
1164
1165 Overrides the deprecated 'legacy=[acdn]*' value when given.
1166 EODESC
1167 },
1168 };
1169 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1170
1171 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1172 sub verify_bootdev {
1173 my ($dev, $noerr) = @_;
1174
1175 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1176 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1177
1178 my $check = sub {
1179 my ($base) = @_;
1180 return 0 if $dev !~ m/^$base\d+$/;
1181 return 0 if !$confdesc->{$dev};
1182 return 1;
1183 };
1184
1185 return $dev if $check->("net");
1186 return $dev if $check->("usb");
1187 return $dev if $check->("hostpci");
1188
1189 return if $noerr;
1190 die "invalid boot device '$dev'\n";
1191 }
1192
1193 sub print_bootorder {
1194 my ($devs) = @_;
1195 return "" if !@$devs;
1196 my $data = { order => join(';', @$devs) };
1197 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1198 }
1199
1200 my $kvm_api_version = 0;
1201
1202 sub kvm_version {
1203 return $kvm_api_version if $kvm_api_version;
1204
1205 open my $fh, '<', '/dev/kvm' or return;
1206
1207 # 0xae00 => KVM_GET_API_VERSION
1208 $kvm_api_version = ioctl($fh, 0xae00, 0);
1209 close($fh);
1210
1211 return $kvm_api_version;
1212 }
1213
1214 my $kvm_user_version = {};
1215 my $kvm_mtime = {};
1216
1217 sub kvm_user_version {
1218 my ($binary) = @_;
1219
1220 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1221 my $st = stat($binary);
1222
1223 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1224 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1225 $cachedmtime == $st->mtime;
1226
1227 $kvm_user_version->{$binary} = 'unknown';
1228 $kvm_mtime->{$binary} = $st->mtime;
1229
1230 my $code = sub {
1231 my $line = shift;
1232 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1233 $kvm_user_version->{$binary} = $2;
1234 }
1235 };
1236
1237 eval { run_command([$binary, '--version'], outfunc => $code); };
1238 warn $@ if $@;
1239
1240 return $kvm_user_version->{$binary};
1241
1242 }
1243 my sub extract_version {
1244 my ($machine_type, $version) = @_;
1245 $version = kvm_user_version() if !defined($version);
1246 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1247 }
1248
1249 sub kernel_has_vhost_net {
1250 return -c '/dev/vhost-net';
1251 }
1252
1253 sub option_exists {
1254 my $key = shift;
1255 return defined($confdesc->{$key});
1256 }
1257
1258 my $cdrom_path;
1259 sub get_cdrom_path {
1260
1261 return $cdrom_path if $cdrom_path;
1262
1263 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1264 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1265 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1266 }
1267
1268 sub get_iso_path {
1269 my ($storecfg, $vmid, $cdrom) = @_;
1270
1271 if ($cdrom eq 'cdrom') {
1272 return get_cdrom_path();
1273 } elsif ($cdrom eq 'none') {
1274 return '';
1275 } elsif ($cdrom =~ m|^/|) {
1276 return $cdrom;
1277 } else {
1278 return PVE::Storage::path($storecfg, $cdrom);
1279 }
1280 }
1281
1282 # try to convert old style file names to volume IDs
1283 sub filename_to_volume_id {
1284 my ($vmid, $file, $media) = @_;
1285
1286 if (!($file eq 'none' || $file eq 'cdrom' ||
1287 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1288
1289 return if $file =~ m|/|;
1290
1291 if ($media && $media eq 'cdrom') {
1292 $file = "local:iso/$file";
1293 } else {
1294 $file = "local:$vmid/$file";
1295 }
1296 }
1297
1298 return $file;
1299 }
1300
1301 sub verify_media_type {
1302 my ($opt, $vtype, $media) = @_;
1303
1304 return if !$media;
1305
1306 my $etype;
1307 if ($media eq 'disk') {
1308 $etype = 'images';
1309 } elsif ($media eq 'cdrom') {
1310 $etype = 'iso';
1311 } else {
1312 die "internal error";
1313 }
1314
1315 return if ($vtype eq $etype);
1316
1317 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1318 }
1319
1320 sub cleanup_drive_path {
1321 my ($opt, $storecfg, $drive) = @_;
1322
1323 # try to convert filesystem paths to volume IDs
1324
1325 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1326 ($drive->{file} !~ m|^/dev/.+|) &&
1327 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1328 ($drive->{file} !~ m/^\d+$/)) {
1329 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1330 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1331 if !$vtype;
1332 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1333 verify_media_type($opt, $vtype, $drive->{media});
1334 $drive->{file} = $volid;
1335 }
1336
1337 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1338 }
1339
1340 sub parse_hotplug_features {
1341 my ($data) = @_;
1342
1343 my $res = {};
1344
1345 return $res if $data eq '0';
1346
1347 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1348
1349 foreach my $feature (PVE::Tools::split_list($data)) {
1350 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1351 $res->{$1} = 1;
1352 } else {
1353 die "invalid hotplug feature '$feature'\n";
1354 }
1355 }
1356 return $res;
1357 }
1358
1359 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1360 sub pve_verify_hotplug_features {
1361 my ($value, $noerr) = @_;
1362
1363 return $value if parse_hotplug_features($value);
1364
1365 return if $noerr;
1366
1367 die "unable to parse hotplug option\n";
1368 }
1369
1370 sub scsi_inquiry {
1371 my($fh, $noerr) = @_;
1372
1373 my $SG_IO = 0x2285;
1374 my $SG_GET_VERSION_NUM = 0x2282;
1375
1376 my $versionbuf = "\x00" x 8;
1377 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1378 if (!$ret) {
1379 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1380 return;
1381 }
1382 my $version = unpack("I", $versionbuf);
1383 if ($version < 30000) {
1384 die "scsi generic interface too old\n" if !$noerr;
1385 return;
1386 }
1387
1388 my $buf = "\x00" x 36;
1389 my $sensebuf = "\x00" x 8;
1390 my $cmd = pack("C x3 C x1", 0x12, 36);
1391
1392 # see /usr/include/scsi/sg.h
1393 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1394
1395 my $packet = pack(
1396 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1397 );
1398
1399 $ret = ioctl($fh, $SG_IO, $packet);
1400 if (!$ret) {
1401 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1402 return;
1403 }
1404
1405 my @res = unpack($sg_io_hdr_t, $packet);
1406 if ($res[17] || $res[18]) {
1407 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1408 return;
1409 }
1410
1411 my $res = {};
1412 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1413
1414 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1415 $res->{type} &= 0x1F;
1416
1417 return $res;
1418 }
1419
1420 sub path_is_scsi {
1421 my ($path) = @_;
1422
1423 my $fh = IO::File->new("+<$path") || return;
1424 my $res = scsi_inquiry($fh, 1);
1425 close($fh);
1426
1427 return $res;
1428 }
1429
1430 sub print_tabletdevice_full {
1431 my ($conf, $arch) = @_;
1432
1433 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1434
1435 # we use uhci for old VMs because tablet driver was buggy in older qemu
1436 my $usbbus;
1437 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1438 $usbbus = 'ehci';
1439 } else {
1440 $usbbus = 'uhci';
1441 }
1442
1443 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1444 }
1445
1446 sub print_keyboarddevice_full {
1447 my ($conf, $arch) = @_;
1448
1449 return if $arch ne 'aarch64';
1450
1451 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1452 }
1453
1454 my sub get_drive_id {
1455 my ($drive) = @_;
1456 return "$drive->{interface}$drive->{index}";
1457 }
1458
1459 sub print_drivedevice_full {
1460 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1461
1462 my $device = '';
1463 my $maxdev = 0;
1464
1465 my $drive_id = get_drive_id($drive);
1466 if ($drive->{interface} eq 'virtio') {
1467 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1468 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1469 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1470 } elsif ($drive->{interface} eq 'scsi') {
1471
1472 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1473 my $unit = $drive->{index} % $maxdev;
1474 my $devicetype = 'hd';
1475 my $path = '';
1476 if (drive_is_cdrom($drive)) {
1477 $devicetype = 'cd';
1478 } else {
1479 if ($drive->{file} =~ m|^/|) {
1480 $path = $drive->{file};
1481 if (my $info = path_is_scsi($path)) {
1482 if ($info->{type} == 0 && $drive->{scsiblock}) {
1483 $devicetype = 'block';
1484 } elsif ($info->{type} == 1) { # tape
1485 $devicetype = 'generic';
1486 }
1487 }
1488 } else {
1489 $path = PVE::Storage::path($storecfg, $drive->{file});
1490 }
1491
1492 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1493 my $version = extract_version($machine_type, kvm_user_version());
1494 if ($path =~ m/^iscsi\:\/\// &&
1495 !min_version($version, 4, 1)) {
1496 $devicetype = 'generic';
1497 }
1498 }
1499
1500 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1501 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1502 } else {
1503 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1504 .",lun=$drive->{index}";
1505 }
1506 $device .= ",drive=drive-$drive_id,id=$drive_id";
1507
1508 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1509 $device .= ",rotation_rate=1";
1510 }
1511 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1512
1513 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1514 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1515 my $controller = int($drive->{index} / $maxdev);
1516 my $unit = $drive->{index} % $maxdev;
1517 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1518
1519 $device = "ide-$devicetype";
1520 if ($drive->{interface} eq 'ide') {
1521 $device .= ",bus=ide.$controller,unit=$unit";
1522 } else {
1523 $device .= ",bus=ahci$controller.$unit";
1524 }
1525 $device .= ",drive=drive-$drive_id,id=$drive_id";
1526
1527 if ($devicetype eq 'hd') {
1528 if (my $model = $drive->{model}) {
1529 $model = URI::Escape::uri_unescape($model);
1530 $device .= ",model=$model";
1531 }
1532 if ($drive->{ssd}) {
1533 $device .= ",rotation_rate=1";
1534 }
1535 }
1536 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1537 } elsif ($drive->{interface} eq 'usb') {
1538 die "implement me";
1539 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1540 } else {
1541 die "unsupported interface type";
1542 }
1543
1544 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1545
1546 if (my $serial = $drive->{serial}) {
1547 $serial = URI::Escape::uri_unescape($serial);
1548 $device .= ",serial=$serial";
1549 }
1550
1551
1552 return $device;
1553 }
1554
1555 sub get_initiator_name {
1556 my $initiator;
1557
1558 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1559 while (defined(my $line = <$fh>)) {
1560 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1561 $initiator = $1;
1562 last;
1563 }
1564 $fh->close();
1565
1566 return $initiator;
1567 }
1568
1569 sub print_drive_commandline_full {
1570 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1571
1572 my $path;
1573 my $volid = $drive->{file};
1574 my $format = $drive->{format};
1575 my $drive_id = get_drive_id($drive);
1576
1577 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1578 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1579
1580 if (drive_is_cdrom($drive)) {
1581 $path = get_iso_path($storecfg, $vmid, $volid);
1582 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1583 } else {
1584 if ($storeid) {
1585 $path = PVE::Storage::path($storecfg, $volid);
1586 $format //= qemu_img_format($scfg, $volname);
1587 } else {
1588 $path = $volid;
1589 $format //= "raw";
1590 }
1591 }
1592
1593 my $is_rbd = $path =~ m/^rbd:/;
1594
1595 my $opts = '';
1596 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1597 foreach my $o (@qemu_drive_options) {
1598 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1599 }
1600
1601 # snapshot only accepts on|off
1602 if (defined($drive->{snapshot})) {
1603 my $v = $drive->{snapshot} ? 'on' : 'off';
1604 $opts .= ",snapshot=$v";
1605 }
1606
1607 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1608 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1609 }
1610
1611 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1612 my ($dir, $qmpname) = @$type;
1613 if (my $v = $drive->{"mbps$dir"}) {
1614 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1615 }
1616 if (my $v = $drive->{"mbps${dir}_max"}) {
1617 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1618 }
1619 if (my $v = $drive->{"bps${dir}_max_length"}) {
1620 $opts .= ",throttling.bps$qmpname-max-length=$v";
1621 }
1622 if (my $v = $drive->{"iops${dir}"}) {
1623 $opts .= ",throttling.iops$qmpname=$v";
1624 }
1625 if (my $v = $drive->{"iops${dir}_max"}) {
1626 $opts .= ",throttling.iops$qmpname-max=$v";
1627 }
1628 if (my $v = $drive->{"iops${dir}_max_length"}) {
1629 $opts .= ",throttling.iops$qmpname-max-length=$v";
1630 }
1631 }
1632
1633 if ($pbs_name) {
1634 $format = "rbd" if $is_rbd;
1635 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1636 if !$format;
1637 $opts .= ",format=alloc-track,file.driver=$format";
1638 } elsif ($format) {
1639 $opts .= ",format=$format";
1640 }
1641
1642 my $cache_direct = 0;
1643
1644 if (my $cache = $drive->{cache}) {
1645 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1646 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1647 $opts .= ",cache=none";
1648 $cache_direct = 1;
1649 }
1650
1651 # io_uring with cache mode writeback or writethrough on krbd will hang...
1652 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1653
1654 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1655 # sometimes, just plain disable...
1656 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1657
1658 if (!$drive->{aio}) {
1659 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1660 # io_uring supports all cache modes
1661 $opts .= ",aio=io_uring";
1662 } else {
1663 # aio native works only with O_DIRECT
1664 if($cache_direct) {
1665 $opts .= ",aio=native";
1666 } else {
1667 $opts .= ",aio=threads";
1668 }
1669 }
1670 }
1671
1672 if (!drive_is_cdrom($drive)) {
1673 my $detectzeroes;
1674 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1675 $detectzeroes = 'off';
1676 } elsif ($drive->{discard}) {
1677 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1678 } else {
1679 # This used to be our default with discard not being specified:
1680 $detectzeroes = 'on';
1681 }
1682
1683 # note: 'detect-zeroes' works per blockdev and we want it to persist
1684 # after the alloc-track is removed, so put it on 'file' directly
1685 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1686 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1687 }
1688
1689 if ($pbs_name) {
1690 $opts .= ",backing=$pbs_name";
1691 $opts .= ",auto-remove=on";
1692 }
1693
1694 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1695 my $file_param = "file";
1696 if ($pbs_name) {
1697 # non-rbd drivers require the underlying file to be a seperate block
1698 # node, so add a second .file indirection
1699 $file_param .= ".file" if !$is_rbd;
1700 $file_param .= ".filename";
1701 }
1702 my $pathinfo = $path ? "$file_param=$path," : '';
1703
1704 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1705 }
1706
1707 sub print_pbs_blockdev {
1708 my ($pbs_conf, $pbs_name) = @_;
1709 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1710 $blockdev .= ",repository=$pbs_conf->{repository}";
1711 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1712 $blockdev .= ",archive=$pbs_conf->{archive}";
1713 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1714 return $blockdev;
1715 }
1716
1717 sub print_netdevice_full {
1718 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1719
1720 my $device = $net->{model};
1721 if ($net->{model} eq 'virtio') {
1722 $device = 'virtio-net-pci';
1723 };
1724
1725 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1726 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1727 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1728 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1729 # and out of each queue plus one config interrupt and control vector queue
1730 my $vectors = $net->{queues} * 2 + 2;
1731 $tmpstr .= ",vectors=$vectors,mq=on";
1732 }
1733 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1734
1735 if (my $mtu = $net->{mtu}) {
1736 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1737 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1738 if ($mtu == 1) {
1739 $mtu = $bridge_mtu;
1740 } elsif ($mtu < 576) {
1741 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1742 } elsif ($mtu > $bridge_mtu) {
1743 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1744 }
1745 $tmpstr .= ",host_mtu=$mtu";
1746 } else {
1747 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1748 }
1749 }
1750
1751 if ($use_old_bios_files) {
1752 my $romfile;
1753 if ($device eq 'virtio-net-pci') {
1754 $romfile = 'pxe-virtio.rom';
1755 } elsif ($device eq 'e1000') {
1756 $romfile = 'pxe-e1000.rom';
1757 } elsif ($device eq 'e1000e') {
1758 $romfile = 'pxe-e1000e.rom';
1759 } elsif ($device eq 'ne2k') {
1760 $romfile = 'pxe-ne2k_pci.rom';
1761 } elsif ($device eq 'pcnet') {
1762 $romfile = 'pxe-pcnet.rom';
1763 } elsif ($device eq 'rtl8139') {
1764 $romfile = 'pxe-rtl8139.rom';
1765 }
1766 $tmpstr .= ",romfile=$romfile" if $romfile;
1767 }
1768
1769 return $tmpstr;
1770 }
1771
1772 sub print_netdev_full {
1773 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1774
1775 my $i = '';
1776 if ($netid =~ m/^net(\d+)$/) {
1777 $i = int($1);
1778 }
1779
1780 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1781
1782 my $ifname = "tap${vmid}i$i";
1783
1784 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1785 die "interface name '$ifname' is too long (max 15 character)\n"
1786 if length($ifname) >= 16;
1787
1788 my $vhostparam = '';
1789 if (is_native($arch)) {
1790 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1791 }
1792
1793 my $vmname = $conf->{name} || "vm$vmid";
1794
1795 my $netdev = "";
1796 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1797
1798 if ($net->{bridge}) {
1799 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1800 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1801 } else {
1802 $netdev = "type=user,id=$netid,hostname=$vmname";
1803 }
1804
1805 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1806
1807 return $netdev;
1808 }
1809
1810 my $vga_map = {
1811 'cirrus' => 'cirrus-vga',
1812 'std' => 'VGA',
1813 'vmware' => 'vmware-svga',
1814 'virtio' => 'virtio-vga',
1815 };
1816
1817 sub print_vga_device {
1818 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1819
1820 my $type = $vga_map->{$vga->{type}};
1821 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1822 $type = 'virtio-gpu';
1823 }
1824 my $vgamem_mb = $vga->{memory};
1825
1826 my $max_outputs = '';
1827 if ($qxlnum) {
1828 $type = $id ? 'qxl' : 'qxl-vga';
1829
1830 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1831 # set max outputs so linux can have up to 4 qxl displays with one device
1832 if (min_version($machine_version, 4, 1)) {
1833 $max_outputs = ",max_outputs=4";
1834 }
1835 }
1836 }
1837
1838 die "no devicetype for $vga->{type}\n" if !$type;
1839
1840 my $memory = "";
1841 if ($vgamem_mb) {
1842 if ($vga->{type} eq 'virtio') {
1843 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1844 $memory = ",max_hostmem=$bytes";
1845 } elsif ($qxlnum) {
1846 # from https://www.spice-space.org/multiple-monitors.html
1847 $memory = ",vgamem_mb=$vga->{memory}";
1848 my $ram = $vgamem_mb * 4;
1849 my $vram = $vgamem_mb * 2;
1850 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1851 } else {
1852 $memory = ",vgamem_mb=$vga->{memory}";
1853 }
1854 } elsif ($qxlnum && $id) {
1855 $memory = ",ram_size=67108864,vram_size=33554432";
1856 }
1857
1858 my $edidoff = "";
1859 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1860 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1861 }
1862
1863 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1864 my $vgaid = "vga" . ($id // '');
1865 my $pciaddr;
1866 if ($q35 && $vgaid eq 'vga') {
1867 # the first display uses pcie.0 bus on q35 machines
1868 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1869 } else {
1870 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1871 }
1872
1873 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1874 }
1875
1876 sub parse_number_sets {
1877 my ($set) = @_;
1878 my $res = [];
1879 foreach my $part (split(/;/, $set)) {
1880 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1881 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1882 push @$res, [ $1, $2 ];
1883 } else {
1884 die "invalid range: $part\n";
1885 }
1886 }
1887 return $res;
1888 }
1889
1890 sub parse_numa {
1891 my ($data) = @_;
1892
1893 my $res = parse_property_string($numa_fmt, $data);
1894 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1895 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1896 return $res;
1897 }
1898
1899 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1900 sub parse_net {
1901 my ($data) = @_;
1902
1903 my $res = eval { parse_property_string($net_fmt, $data) };
1904 if ($@) {
1905 warn $@;
1906 return;
1907 }
1908 if (!defined($res->{macaddr})) {
1909 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1910 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1911 }
1912 return $res;
1913 }
1914
1915 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1916 sub parse_ipconfig {
1917 my ($data) = @_;
1918
1919 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1920 if ($@) {
1921 warn $@;
1922 return;
1923 }
1924
1925 if ($res->{gw} && !$res->{ip}) {
1926 warn 'gateway specified without specifying an IP address';
1927 return;
1928 }
1929 if ($res->{gw6} && !$res->{ip6}) {
1930 warn 'IPv6 gateway specified without specifying an IPv6 address';
1931 return;
1932 }
1933 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1934 warn 'gateway specified together with DHCP';
1935 return;
1936 }
1937 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1938 # gw6 + auto/dhcp
1939 warn "IPv6 gateway specified together with $res->{ip6} address";
1940 return;
1941 }
1942
1943 if (!$res->{ip} && !$res->{ip6}) {
1944 return { ip => 'dhcp', ip6 => 'dhcp' };
1945 }
1946
1947 return $res;
1948 }
1949
1950 sub print_net {
1951 my $net = shift;
1952
1953 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1954 }
1955
1956 sub add_random_macs {
1957 my ($settings) = @_;
1958
1959 foreach my $opt (keys %$settings) {
1960 next if $opt !~ m/^net(\d+)$/;
1961 my $net = parse_net($settings->{$opt});
1962 next if !$net;
1963 $settings->{$opt} = print_net($net);
1964 }
1965 }
1966
1967 sub vm_is_volid_owner {
1968 my ($storecfg, $vmid, $volid) = @_;
1969
1970 if ($volid !~ m|^/|) {
1971 my ($path, $owner);
1972 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1973 if ($owner && ($owner == $vmid)) {
1974 return 1;
1975 }
1976 }
1977
1978 return;
1979 }
1980
1981 sub vmconfig_register_unused_drive {
1982 my ($storecfg, $vmid, $conf, $drive) = @_;
1983
1984 if (drive_is_cloudinit($drive)) {
1985 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1986 warn $@ if $@;
1987 } elsif (!drive_is_cdrom($drive)) {
1988 my $volid = $drive->{file};
1989 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1990 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1991 }
1992 }
1993 }
1994
1995 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1996 my $smbios1_fmt = {
1997 uuid => {
1998 type => 'string',
1999 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2000 format_description => 'UUID',
2001 description => "Set SMBIOS1 UUID.",
2002 optional => 1,
2003 },
2004 version => {
2005 type => 'string',
2006 pattern => '[A-Za-z0-9+\/]+={0,2}',
2007 format_description => 'Base64 encoded string',
2008 description => "Set SMBIOS1 version.",
2009 optional => 1,
2010 },
2011 serial => {
2012 type => 'string',
2013 pattern => '[A-Za-z0-9+\/]+={0,2}',
2014 format_description => 'Base64 encoded string',
2015 description => "Set SMBIOS1 serial number.",
2016 optional => 1,
2017 },
2018 manufacturer => {
2019 type => 'string',
2020 pattern => '[A-Za-z0-9+\/]+={0,2}',
2021 format_description => 'Base64 encoded string',
2022 description => "Set SMBIOS1 manufacturer.",
2023 optional => 1,
2024 },
2025 product => {
2026 type => 'string',
2027 pattern => '[A-Za-z0-9+\/]+={0,2}',
2028 format_description => 'Base64 encoded string',
2029 description => "Set SMBIOS1 product ID.",
2030 optional => 1,
2031 },
2032 sku => {
2033 type => 'string',
2034 pattern => '[A-Za-z0-9+\/]+={0,2}',
2035 format_description => 'Base64 encoded string',
2036 description => "Set SMBIOS1 SKU string.",
2037 optional => 1,
2038 },
2039 family => {
2040 type => 'string',
2041 pattern => '[A-Za-z0-9+\/]+={0,2}',
2042 format_description => 'Base64 encoded string',
2043 description => "Set SMBIOS1 family string.",
2044 optional => 1,
2045 },
2046 base64 => {
2047 type => 'boolean',
2048 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2049 optional => 1,
2050 },
2051 };
2052
2053 sub parse_smbios1 {
2054 my ($data) = @_;
2055
2056 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2057 warn $@ if $@;
2058 return $res;
2059 }
2060
2061 sub print_smbios1 {
2062 my ($smbios1) = @_;
2063 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2064 }
2065
2066 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2067
2068 sub parse_watchdog {
2069 my ($value) = @_;
2070
2071 return if !$value;
2072
2073 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2074 warn $@ if $@;
2075 return $res;
2076 }
2077
2078 sub parse_guest_agent {
2079 my ($conf) = @_;
2080
2081 return {} if !defined($conf->{agent});
2082
2083 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2084 warn $@ if $@;
2085
2086 # if the agent is disabled ignore the other potentially set properties
2087 return {} if !$res->{enabled};
2088 return $res;
2089 }
2090
2091 sub get_qga_key {
2092 my ($conf, $key) = @_;
2093 return undef if !defined($conf->{agent});
2094
2095 my $agent = parse_guest_agent($conf);
2096 return $agent->{$key};
2097 }
2098
2099 sub parse_vga {
2100 my ($value) = @_;
2101
2102 return {} if !$value;
2103 my $res = eval { parse_property_string($vga_fmt, $value) };
2104 warn $@ if $@;
2105 return $res;
2106 }
2107
2108 sub parse_rng {
2109 my ($value) = @_;
2110
2111 return if !$value;
2112
2113 my $res = eval { parse_property_string($rng_fmt, $value) };
2114 warn $@ if $@;
2115 return $res;
2116 }
2117
2118 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2119 sub verify_usb_device {
2120 my ($value, $noerr) = @_;
2121
2122 return $value if parse_usb_device($value);
2123
2124 return if $noerr;
2125
2126 die "unable to parse usb device\n";
2127 }
2128
2129 # add JSON properties for create and set function
2130 sub json_config_properties {
2131 my $prop = shift;
2132
2133 my $skip_json_config_opts = {
2134 parent => 1,
2135 snaptime => 1,
2136 vmstate => 1,
2137 runningmachine => 1,
2138 runningcpu => 1,
2139 };
2140
2141 foreach my $opt (keys %$confdesc) {
2142 next if $skip_json_config_opts->{$opt};
2143 $prop->{$opt} = $confdesc->{$opt};
2144 }
2145
2146 return $prop;
2147 }
2148
2149 # return copy of $confdesc_cloudinit to generate documentation
2150 sub cloudinit_config_properties {
2151
2152 return dclone($confdesc_cloudinit);
2153 }
2154
2155 sub check_type {
2156 my ($key, $value) = @_;
2157
2158 die "unknown setting '$key'\n" if !$confdesc->{$key};
2159
2160 my $type = $confdesc->{$key}->{type};
2161
2162 if (!defined($value)) {
2163 die "got undefined value\n";
2164 }
2165
2166 if ($value =~ m/[\n\r]/) {
2167 die "property contains a line feed\n";
2168 }
2169
2170 if ($type eq 'boolean') {
2171 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2172 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2173 die "type check ('boolean') failed - got '$value'\n";
2174 } elsif ($type eq 'integer') {
2175 return int($1) if $value =~ m/^(\d+)$/;
2176 die "type check ('integer') failed - got '$value'\n";
2177 } elsif ($type eq 'number') {
2178 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2179 die "type check ('number') failed - got '$value'\n";
2180 } elsif ($type eq 'string') {
2181 if (my $fmt = $confdesc->{$key}->{format}) {
2182 PVE::JSONSchema::check_format($fmt, $value);
2183 return $value;
2184 }
2185 $value =~ s/^\"(.*)\"$/$1/;
2186 return $value;
2187 } else {
2188 die "internal error"
2189 }
2190 }
2191
2192 sub destroy_vm {
2193 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2194
2195 my $conf = PVE::QemuConfig->load_config($vmid);
2196
2197 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2198
2199 if ($conf->{template}) {
2200 # check if any base image is still used by a linked clone
2201 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2202 my ($ds, $drive) = @_;
2203 return if drive_is_cdrom($drive);
2204
2205 my $volid = $drive->{file};
2206 return if !$volid || $volid =~ m|^/|;
2207
2208 die "base volume '$volid' is still in use by linked cloned\n"
2209 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2210
2211 });
2212 }
2213
2214 my $volids = {};
2215 my $remove_owned_drive = sub {
2216 my ($ds, $drive) = @_;
2217 return if drive_is_cdrom($drive, 1);
2218
2219 my $volid = $drive->{file};
2220 return if !$volid || $volid =~ m|^/|;
2221 return if $volids->{$volid};
2222
2223 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2224 return if !$path || !$owner || ($owner != $vmid);
2225
2226 $volids->{$volid} = 1;
2227 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2228 warn "Could not remove disk '$volid', check manually: $@" if $@;
2229 };
2230
2231 # only remove disks owned by this VM (referenced in the config)
2232 my $include_opts = {
2233 include_unused => 1,
2234 extra_keys => ['vmstate'],
2235 };
2236 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2237
2238 for my $snap (values %{$conf->{snapshots}}) {
2239 next if !defined($snap->{vmstate});
2240 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2241 next if !defined($drive);
2242 $remove_owned_drive->('vmstate', $drive);
2243 }
2244
2245 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2246
2247 if ($purge_unreferenced) { # also remove unreferenced disk
2248 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2249 PVE::Storage::foreach_volid($vmdisks, sub {
2250 my ($volid, $sid, $volname, $d) = @_;
2251 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2252 warn $@ if $@;
2253 });
2254 }
2255
2256 if (defined $replacement_conf) {
2257 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2258 } else {
2259 PVE::QemuConfig->destroy_config($vmid);
2260 }
2261 }
2262
2263 sub parse_vm_config {
2264 my ($filename, $raw) = @_;
2265
2266 return if !defined($raw);
2267
2268 my $res = {
2269 digest => Digest::SHA::sha1_hex($raw),
2270 snapshots => {},
2271 pending => {},
2272 };
2273
2274 $filename =~ m|/qemu-server/(\d+)\.conf$|
2275 || die "got strange filename '$filename'";
2276
2277 my $vmid = $1;
2278
2279 my $conf = $res;
2280 my $descr;
2281 my $section = '';
2282
2283 my @lines = split(/\n/, $raw);
2284 foreach my $line (@lines) {
2285 next if $line =~ m/^\s*$/;
2286
2287 if ($line =~ m/^\[PENDING\]\s*$/i) {
2288 $section = 'pending';
2289 if (defined($descr)) {
2290 $descr =~ s/\s+$//;
2291 $conf->{description} = $descr;
2292 }
2293 $descr = undef;
2294 $conf = $res->{$section} = {};
2295 next;
2296
2297 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2298 $section = $1;
2299 if (defined($descr)) {
2300 $descr =~ s/\s+$//;
2301 $conf->{description} = $descr;
2302 }
2303 $descr = undef;
2304 $conf = $res->{snapshots}->{$section} = {};
2305 next;
2306 }
2307
2308 if ($line =~ m/^\#(.*)\s*$/) {
2309 $descr = '' if !defined($descr);
2310 $descr .= PVE::Tools::decode_text($1) . "\n";
2311 next;
2312 }
2313
2314 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2315 $descr = '' if !defined($descr);
2316 $descr .= PVE::Tools::decode_text($2);
2317 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2318 $conf->{snapstate} = $1;
2319 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2320 my $key = $1;
2321 my $value = $2;
2322 $conf->{$key} = $value;
2323 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2324 my $value = $1;
2325 if ($section eq 'pending') {
2326 $conf->{delete} = $value; # we parse this later
2327 } else {
2328 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2329 }
2330 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2331 my $key = $1;
2332 my $value = $2;
2333 eval { $value = check_type($key, $value); };
2334 if ($@) {
2335 warn "vm $vmid - unable to parse value of '$key' - $@";
2336 } else {
2337 $key = 'ide2' if $key eq 'cdrom';
2338 my $fmt = $confdesc->{$key}->{format};
2339 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2340 my $v = parse_drive($key, $value);
2341 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2342 $v->{file} = $volid;
2343 $value = print_drive($v);
2344 } else {
2345 warn "vm $vmid - unable to parse value of '$key'\n";
2346 next;
2347 }
2348 }
2349
2350 $conf->{$key} = $value;
2351 }
2352 } else {
2353 warn "vm $vmid - unable to parse config: $line\n";
2354 }
2355 }
2356
2357 if (defined($descr)) {
2358 $descr =~ s/\s+$//;
2359 $conf->{description} = $descr;
2360 }
2361 delete $res->{snapstate}; # just to be sure
2362
2363 return $res;
2364 }
2365
2366 sub write_vm_config {
2367 my ($filename, $conf) = @_;
2368
2369 delete $conf->{snapstate}; # just to be sure
2370
2371 if ($conf->{cdrom}) {
2372 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2373 $conf->{ide2} = $conf->{cdrom};
2374 delete $conf->{cdrom};
2375 }
2376
2377 # we do not use 'smp' any longer
2378 if ($conf->{sockets}) {
2379 delete $conf->{smp};
2380 } elsif ($conf->{smp}) {
2381 $conf->{sockets} = $conf->{smp};
2382 delete $conf->{cores};
2383 delete $conf->{smp};
2384 }
2385
2386 my $used_volids = {};
2387
2388 my $cleanup_config = sub {
2389 my ($cref, $pending, $snapname) = @_;
2390
2391 foreach my $key (keys %$cref) {
2392 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2393 $key eq 'snapstate' || $key eq 'pending';
2394 my $value = $cref->{$key};
2395 if ($key eq 'delete') {
2396 die "propertry 'delete' is only allowed in [PENDING]\n"
2397 if !$pending;
2398 # fixme: check syntax?
2399 next;
2400 }
2401 eval { $value = check_type($key, $value); };
2402 die "unable to parse value of '$key' - $@" if $@;
2403
2404 $cref->{$key} = $value;
2405
2406 if (!$snapname && is_valid_drivename($key)) {
2407 my $drive = parse_drive($key, $value);
2408 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2409 }
2410 }
2411 };
2412
2413 &$cleanup_config($conf);
2414
2415 &$cleanup_config($conf->{pending}, 1);
2416
2417 foreach my $snapname (keys %{$conf->{snapshots}}) {
2418 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2419 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2420 }
2421
2422 # remove 'unusedX' settings if we re-add a volume
2423 foreach my $key (keys %$conf) {
2424 my $value = $conf->{$key};
2425 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2426 delete $conf->{$key};
2427 }
2428 }
2429
2430 my $generate_raw_config = sub {
2431 my ($conf, $pending) = @_;
2432
2433 my $raw = '';
2434
2435 # add description as comment to top of file
2436 if (defined(my $descr = $conf->{description})) {
2437 if ($descr) {
2438 foreach my $cl (split(/\n/, $descr)) {
2439 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2440 }
2441 } else {
2442 $raw .= "#\n" if $pending;
2443 }
2444 }
2445
2446 foreach my $key (sort keys %$conf) {
2447 next if $key =~ /^(digest|description|pending|snapshots)$/;
2448 $raw .= "$key: $conf->{$key}\n";
2449 }
2450 return $raw;
2451 };
2452
2453 my $raw = &$generate_raw_config($conf);
2454
2455 if (scalar(keys %{$conf->{pending}})){
2456 $raw .= "\n[PENDING]\n";
2457 $raw .= &$generate_raw_config($conf->{pending}, 1);
2458 }
2459
2460 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2461 $raw .= "\n[$snapname]\n";
2462 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2463 }
2464
2465 return $raw;
2466 }
2467
2468 sub load_defaults {
2469
2470 my $res = {};
2471
2472 # we use static defaults from our JSON schema configuration
2473 foreach my $key (keys %$confdesc) {
2474 if (defined(my $default = $confdesc->{$key}->{default})) {
2475 $res->{$key} = $default;
2476 }
2477 }
2478
2479 return $res;
2480 }
2481
2482 sub config_list {
2483 my $vmlist = PVE::Cluster::get_vmlist();
2484 my $res = {};
2485 return $res if !$vmlist || !$vmlist->{ids};
2486 my $ids = $vmlist->{ids};
2487 my $nodename = nodename();
2488
2489 foreach my $vmid (keys %$ids) {
2490 my $d = $ids->{$vmid};
2491 next if !$d->{node} || $d->{node} ne $nodename;
2492 next if !$d->{type} || $d->{type} ne 'qemu';
2493 $res->{$vmid}->{exists} = 1;
2494 }
2495 return $res;
2496 }
2497
2498 # test if VM uses local resources (to prevent migration)
2499 sub check_local_resources {
2500 my ($conf, $noerr) = @_;
2501
2502 my @loc_res = ();
2503
2504 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2505 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2506
2507 push @loc_res, "ivshmem" if $conf->{ivshmem};
2508
2509 foreach my $k (keys %$conf) {
2510 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2511 # sockets are safe: they will recreated be on the target side post-migrate
2512 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2513 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2514 }
2515
2516 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2517
2518 return \@loc_res;
2519 }
2520
2521 # check if used storages are available on all nodes (use by migrate)
2522 sub check_storage_availability {
2523 my ($storecfg, $conf, $node) = @_;
2524
2525 PVE::QemuConfig->foreach_volume($conf, sub {
2526 my ($ds, $drive) = @_;
2527
2528 my $volid = $drive->{file};
2529 return if !$volid;
2530
2531 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2532 return if !$sid;
2533
2534 # check if storage is available on both nodes
2535 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2536 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2537
2538 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2539
2540 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2541 if !$scfg->{content}->{$vtype};
2542 });
2543 }
2544
2545 # list nodes where all VM images are available (used by has_feature API)
2546 sub shared_nodes {
2547 my ($conf, $storecfg) = @_;
2548
2549 my $nodelist = PVE::Cluster::get_nodelist();
2550 my $nodehash = { map { $_ => 1 } @$nodelist };
2551 my $nodename = nodename();
2552
2553 PVE::QemuConfig->foreach_volume($conf, sub {
2554 my ($ds, $drive) = @_;
2555
2556 my $volid = $drive->{file};
2557 return if !$volid;
2558
2559 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2560 if ($storeid) {
2561 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2562 if ($scfg->{disable}) {
2563 $nodehash = {};
2564 } elsif (my $avail = $scfg->{nodes}) {
2565 foreach my $node (keys %$nodehash) {
2566 delete $nodehash->{$node} if !$avail->{$node};
2567 }
2568 } elsif (!$scfg->{shared}) {
2569 foreach my $node (keys %$nodehash) {
2570 delete $nodehash->{$node} if $node ne $nodename
2571 }
2572 }
2573 }
2574 });
2575
2576 return $nodehash
2577 }
2578
2579 sub check_local_storage_availability {
2580 my ($conf, $storecfg) = @_;
2581
2582 my $nodelist = PVE::Cluster::get_nodelist();
2583 my $nodehash = { map { $_ => {} } @$nodelist };
2584
2585 PVE::QemuConfig->foreach_volume($conf, sub {
2586 my ($ds, $drive) = @_;
2587
2588 my $volid = $drive->{file};
2589 return if !$volid;
2590
2591 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2592 if ($storeid) {
2593 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2594
2595 if ($scfg->{disable}) {
2596 foreach my $node (keys %$nodehash) {
2597 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2598 }
2599 } elsif (my $avail = $scfg->{nodes}) {
2600 foreach my $node (keys %$nodehash) {
2601 if (!$avail->{$node}) {
2602 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2603 }
2604 }
2605 }
2606 }
2607 });
2608
2609 foreach my $node (values %$nodehash) {
2610 if (my $unavail = $node->{unavailable_storages}) {
2611 $node->{unavailable_storages} = [ sort keys %$unavail ];
2612 }
2613 }
2614
2615 return $nodehash
2616 }
2617
2618 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2619 sub check_running {
2620 my ($vmid, $nocheck, $node) = @_;
2621
2622 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2623 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2624 }
2625
2626 sub vzlist {
2627
2628 my $vzlist = config_list();
2629
2630 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2631
2632 while (defined(my $de = $fd->read)) {
2633 next if $de !~ m/^(\d+)\.pid$/;
2634 my $vmid = $1;
2635 next if !defined($vzlist->{$vmid});
2636 if (my $pid = check_running($vmid)) {
2637 $vzlist->{$vmid}->{pid} = $pid;
2638 }
2639 }
2640
2641 return $vzlist;
2642 }
2643
2644 our $vmstatus_return_properties = {
2645 vmid => get_standard_option('pve-vmid'),
2646 status => {
2647 description => "Qemu process status.",
2648 type => 'string',
2649 enum => ['stopped', 'running'],
2650 },
2651 maxmem => {
2652 description => "Maximum memory in bytes.",
2653 type => 'integer',
2654 optional => 1,
2655 renderer => 'bytes',
2656 },
2657 maxdisk => {
2658 description => "Root disk size in bytes.",
2659 type => 'integer',
2660 optional => 1,
2661 renderer => 'bytes',
2662 },
2663 name => {
2664 description => "VM name.",
2665 type => 'string',
2666 optional => 1,
2667 },
2668 qmpstatus => {
2669 description => "Qemu QMP agent status.",
2670 type => 'string',
2671 optional => 1,
2672 },
2673 pid => {
2674 description => "PID of running qemu process.",
2675 type => 'integer',
2676 optional => 1,
2677 },
2678 uptime => {
2679 description => "Uptime.",
2680 type => 'integer',
2681 optional => 1,
2682 renderer => 'duration',
2683 },
2684 cpus => {
2685 description => "Maximum usable CPUs.",
2686 type => 'number',
2687 optional => 1,
2688 },
2689 lock => {
2690 description => "The current config lock, if any.",
2691 type => 'string',
2692 optional => 1,
2693 },
2694 tags => {
2695 description => "The current configured tags, if any",
2696 type => 'string',
2697 optional => 1,
2698 },
2699 'running-machine' => {
2700 description => "The currently running machine type (if running).",
2701 type => 'string',
2702 optional => 1,
2703 },
2704 'running-qemu' => {
2705 description => "The currently running QEMU version (if running).",
2706 type => 'string',
2707 optional => 1,
2708 },
2709 };
2710
2711 my $last_proc_pid_stat;
2712
2713 # get VM status information
2714 # This must be fast and should not block ($full == false)
2715 # We only query KVM using QMP if $full == true (this can be slow)
2716 sub vmstatus {
2717 my ($opt_vmid, $full) = @_;
2718
2719 my $res = {};
2720
2721 my $storecfg = PVE::Storage::config();
2722
2723 my $list = vzlist();
2724 my $defaults = load_defaults();
2725
2726 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2727
2728 my $cpucount = $cpuinfo->{cpus} || 1;
2729
2730 foreach my $vmid (keys %$list) {
2731 next if $opt_vmid && ($vmid ne $opt_vmid);
2732
2733 my $conf = PVE::QemuConfig->load_config($vmid);
2734
2735 my $d = { vmid => int($vmid) };
2736 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2737
2738 # fixme: better status?
2739 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2740
2741 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2742 if (defined($size)) {
2743 $d->{disk} = 0; # no info available
2744 $d->{maxdisk} = $size;
2745 } else {
2746 $d->{disk} = 0;
2747 $d->{maxdisk} = 0;
2748 }
2749
2750 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2751 * ($conf->{cores} || $defaults->{cores});
2752 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2753 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2754
2755 $d->{name} = $conf->{name} || "VM $vmid";
2756 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2757 : $defaults->{memory}*(1024*1024);
2758
2759 if ($conf->{balloon}) {
2760 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2761 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2762 : $defaults->{shares};
2763 }
2764
2765 $d->{uptime} = 0;
2766 $d->{cpu} = 0;
2767 $d->{mem} = 0;
2768
2769 $d->{netout} = 0;
2770 $d->{netin} = 0;
2771
2772 $d->{diskread} = 0;
2773 $d->{diskwrite} = 0;
2774
2775 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2776
2777 $d->{serial} = 1 if conf_has_serial($conf);
2778 $d->{lock} = $conf->{lock} if $conf->{lock};
2779 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2780
2781 $res->{$vmid} = $d;
2782 }
2783
2784 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2785 foreach my $dev (keys %$netdev) {
2786 next if $dev !~ m/^tap([1-9]\d*)i/;
2787 my $vmid = $1;
2788 my $d = $res->{$vmid};
2789 next if !$d;
2790
2791 $d->{netout} += $netdev->{$dev}->{receive};
2792 $d->{netin} += $netdev->{$dev}->{transmit};
2793
2794 if ($full) {
2795 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2796 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2797 }
2798
2799 }
2800
2801 my $ctime = gettimeofday;
2802
2803 foreach my $vmid (keys %$list) {
2804
2805 my $d = $res->{$vmid};
2806 my $pid = $d->{pid};
2807 next if !$pid;
2808
2809 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2810 next if !$pstat; # not running
2811
2812 my $used = $pstat->{utime} + $pstat->{stime};
2813
2814 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2815
2816 if ($pstat->{vsize}) {
2817 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2818 }
2819
2820 my $old = $last_proc_pid_stat->{$pid};
2821 if (!$old) {
2822 $last_proc_pid_stat->{$pid} = {
2823 time => $ctime,
2824 used => $used,
2825 cpu => 0,
2826 };
2827 next;
2828 }
2829
2830 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2831
2832 if ($dtime > 1000) {
2833 my $dutime = $used - $old->{used};
2834
2835 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2836 $last_proc_pid_stat->{$pid} = {
2837 time => $ctime,
2838 used => $used,
2839 cpu => $d->{cpu},
2840 };
2841 } else {
2842 $d->{cpu} = $old->{cpu};
2843 }
2844 }
2845
2846 return $res if !$full;
2847
2848 my $qmpclient = PVE::QMPClient->new();
2849
2850 my $ballooncb = sub {
2851 my ($vmid, $resp) = @_;
2852
2853 my $info = $resp->{'return'};
2854 return if !$info->{max_mem};
2855
2856 my $d = $res->{$vmid};
2857
2858 # use memory assigned to VM
2859 $d->{maxmem} = $info->{max_mem};
2860 $d->{balloon} = $info->{actual};
2861
2862 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2863 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2864 $d->{freemem} = $info->{free_mem};
2865 }
2866
2867 $d->{ballooninfo} = $info;
2868 };
2869
2870 my $blockstatscb = sub {
2871 my ($vmid, $resp) = @_;
2872 my $data = $resp->{'return'} || [];
2873 my $totalrdbytes = 0;
2874 my $totalwrbytes = 0;
2875
2876 for my $blockstat (@$data) {
2877 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2878 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2879
2880 $blockstat->{device} =~ s/drive-//;
2881 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2882 }
2883 $res->{$vmid}->{diskread} = $totalrdbytes;
2884 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2885 };
2886
2887 my $machinecb = sub {
2888 my ($vmid, $resp) = @_;
2889 my $data = $resp->{'return'} || [];
2890
2891 $res->{$vmid}->{'running-machine'} =
2892 PVE::QemuServer::Machine::current_from_query_machines($data);
2893 };
2894
2895 my $versioncb = sub {
2896 my ($vmid, $resp) = @_;
2897 my $data = $resp->{'return'} // {};
2898 my $version = 'unknown';
2899
2900 if (my $v = $data->{qemu}) {
2901 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2902 }
2903
2904 $res->{$vmid}->{'running-qemu'} = $version;
2905 };
2906
2907 my $statuscb = sub {
2908 my ($vmid, $resp) = @_;
2909
2910 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2911 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2912 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2913 # this fails if ballon driver is not loaded, so this must be
2914 # the last commnand (following command are aborted if this fails).
2915 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2916
2917 my $status = 'unknown';
2918 if (!defined($status = $resp->{'return'}->{status})) {
2919 warn "unable to get VM status\n";
2920 return;
2921 }
2922
2923 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2924 };
2925
2926 foreach my $vmid (keys %$list) {
2927 next if $opt_vmid && ($vmid ne $opt_vmid);
2928 next if !$res->{$vmid}->{pid}; # not running
2929 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2930 }
2931
2932 $qmpclient->queue_execute(undef, 2);
2933
2934 foreach my $vmid (keys %$list) {
2935 next if $opt_vmid && ($vmid ne $opt_vmid);
2936 next if !$res->{$vmid}->{pid}; #not running
2937
2938 # we can't use the $qmpclient since it might have already aborted on
2939 # 'query-balloon', but this might also fail for older versions...
2940 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2941 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2942 }
2943
2944 foreach my $vmid (keys %$list) {
2945 next if $opt_vmid && ($vmid ne $opt_vmid);
2946 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2947 }
2948
2949 return $res;
2950 }
2951
2952 sub conf_has_serial {
2953 my ($conf) = @_;
2954
2955 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2956 if ($conf->{"serial$i"}) {
2957 return 1;
2958 }
2959 }
2960
2961 return 0;
2962 }
2963
2964 sub conf_has_audio {
2965 my ($conf, $id) = @_;
2966
2967 $id //= 0;
2968 my $audio = $conf->{"audio$id"};
2969 return if !defined($audio);
2970
2971 my $audioproperties = parse_property_string($audio_fmt, $audio);
2972 my $audiodriver = $audioproperties->{driver} // 'spice';
2973
2974 return {
2975 dev => $audioproperties->{device},
2976 dev_id => "audiodev$id",
2977 backend => $audiodriver,
2978 backend_id => "$audiodriver-backend${id}",
2979 };
2980 }
2981
2982 sub audio_devs {
2983 my ($audio, $audiopciaddr, $machine_version) = @_;
2984
2985 my $devs = [];
2986
2987 my $id = $audio->{dev_id};
2988 my $audiodev = "";
2989 if (min_version($machine_version, 4, 2)) {
2990 $audiodev = ",audiodev=$audio->{backend_id}";
2991 }
2992
2993 if ($audio->{dev} eq 'AC97') {
2994 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
2995 } elsif ($audio->{dev} =~ /intel\-hda$/) {
2996 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
2997 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
2998 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
2999 } else {
3000 die "unkown audio device '$audio->{dev}', implement me!";
3001 }
3002
3003 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3004
3005 return $devs;
3006 }
3007
3008 sub get_tpm_paths {
3009 my ($vmid) = @_;
3010 return {
3011 socket => "/var/run/qemu-server/$vmid.swtpm",
3012 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3013 };
3014 }
3015
3016 sub add_tpm_device {
3017 my ($vmid, $devices, $conf) = @_;
3018
3019 return if !$conf->{tpmstate0};
3020
3021 my $paths = get_tpm_paths($vmid);
3022
3023 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3024 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3025 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3026 }
3027
3028 sub start_swtpm {
3029 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3030
3031 return if !$tpmdrive;
3032
3033 my $state;
3034 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3035 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3036 if ($storeid) {
3037 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3038 } else {
3039 $state = $tpm->{file};
3040 }
3041
3042 my $paths = get_tpm_paths($vmid);
3043
3044 # during migration, we will get state from remote
3045 #
3046 if (!$migration) {
3047 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3048 my $setup_cmd = [
3049 "swtpm_setup",
3050 "--tpmstate",
3051 "file://$state",
3052 "--createek",
3053 "--create-ek-cert",
3054 "--create-platform-cert",
3055 "--lock-nvram",
3056 "--config",
3057 "/etc/swtpm_setup.conf", # do not use XDG configs
3058 "--runas",
3059 "0", # force creation as root, error if not possible
3060 "--not-overwrite", # ignore existing state, do not modify
3061 ];
3062
3063 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3064 # TPM 2.0 supports ECC crypto, use if possible
3065 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3066
3067 run_command($setup_cmd, outfunc => sub {
3068 print "swtpm_setup: $1\n";
3069 });
3070 }
3071
3072 my $emulator_cmd = [
3073 "swtpm",
3074 "socket",
3075 "--tpmstate",
3076 "backend-uri=file://$state,mode=0600",
3077 "--ctrl",
3078 "type=unixio,path=$paths->{socket},mode=0600",
3079 "--pid",
3080 "file=$paths->{pid}",
3081 "--terminate", # terminate on QEMU disconnect
3082 "--daemon",
3083 ];
3084 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3085 run_command($emulator_cmd, outfunc => sub { print $1; });
3086
3087 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3088 while (! -e $paths->{pid}) {
3089 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3090 usleep(50_000);
3091 }
3092
3093 # return untainted PID of swtpm daemon so it can be killed on error
3094 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3095 return $1;
3096 }
3097
3098 sub vga_conf_has_spice {
3099 my ($vga) = @_;
3100
3101 my $vgaconf = parse_vga($vga);
3102 my $vgatype = $vgaconf->{type};
3103 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3104
3105 return $1 || 1;
3106 }
3107
3108 sub is_native($) {
3109 my ($arch) = @_;
3110 return get_host_arch() eq $arch;
3111 }
3112
3113 sub get_vm_arch {
3114 my ($conf) = @_;
3115 return $conf->{arch} // get_host_arch();
3116 }
3117
3118 my $default_machines = {
3119 x86_64 => 'pc',
3120 aarch64 => 'virt',
3121 };
3122
3123 sub get_installed_machine_version {
3124 my ($kvmversion) = @_;
3125 $kvmversion = kvm_user_version() if !defined($kvmversion);
3126 $kvmversion =~ m/^(\d+\.\d+)/;
3127 return $1;
3128 }
3129
3130 sub windows_get_pinned_machine_version {
3131 my ($machine, $base_version, $kvmversion) = @_;
3132
3133 my $pin_version = $base_version;
3134 if (!defined($base_version) ||
3135 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3136 ) {
3137 $pin_version = get_installed_machine_version($kvmversion);
3138 }
3139 if (!$machine || $machine eq 'pc') {
3140 $machine = "pc-i440fx-$pin_version";
3141 } elsif ($machine eq 'q35') {
3142 $machine = "pc-q35-$pin_version";
3143 } elsif ($machine eq 'virt') {
3144 $machine = "virt-$pin_version";
3145 } else {
3146 warn "unknown machine type '$machine', not touching that!\n";
3147 }
3148
3149 return $machine;
3150 }
3151
3152 sub get_vm_machine {
3153 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3154
3155 my $machine = $forcemachine || $conf->{machine};
3156
3157 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3158 $kvmversion //= kvm_user_version();
3159 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3160 # layout which confuses windows quite a bit and may result in various regressions..
3161 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3162 if (windows_version($conf->{ostype})) {
3163 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3164 }
3165 $arch //= 'x86_64';
3166 $machine ||= $default_machines->{$arch};
3167 if ($add_pve_version) {
3168 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3169 $machine .= "+pve$pvever";
3170 }
3171 }
3172
3173 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3174 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3175 $machine = $1 if $is_pxe;
3176
3177 # for version-pinned machines that do not include a pve-version (e.g.
3178 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3179 $machine .= '+pve0';
3180
3181 $machine .= '.pxe' if $is_pxe;
3182 }
3183
3184 return $machine;
3185 }
3186
3187 sub get_ovmf_files($$$) {
3188 my ($arch, $efidisk, $smm) = @_;
3189
3190 my $types = $OVMF->{$arch}
3191 or die "no OVMF images known for architecture '$arch'\n";
3192
3193 my $type = 'default';
3194 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3195 $type = $smm ? "4m" : "4m-no-smm";
3196 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3197 }
3198
3199 return $types->{$type}->@*;
3200 }
3201
3202 my $Arch2Qemu = {
3203 aarch64 => '/usr/bin/qemu-system-aarch64',
3204 x86_64 => '/usr/bin/qemu-system-x86_64',
3205 };
3206 sub get_command_for_arch($) {
3207 my ($arch) = @_;
3208 return '/usr/bin/kvm' if is_native($arch);
3209
3210 my $cmd = $Arch2Qemu->{$arch}
3211 or die "don't know how to emulate architecture '$arch'\n";
3212 return $cmd;
3213 }
3214
3215 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3216 # to use in a QEMU command line (-cpu element), first array_intersect the result
3217 # of query_supported_ with query_understood_. This is necessary because:
3218 #
3219 # a) query_understood_ returns flags the host cannot use and
3220 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3221 # flags, but CPU settings - with most of them being flags. Those settings
3222 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3223 #
3224 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3225 # expensive. If you need the value returned from this, you can get it much
3226 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3227 # $accel being 'kvm' or 'tcg'.
3228 #
3229 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3230 # changes, automatically populating pmxcfs.
3231 #
3232 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3233 # since kvm and tcg machines support different flags
3234 #
3235 sub query_supported_cpu_flags {
3236 my ($arch) = @_;
3237
3238 $arch //= get_host_arch();
3239 my $default_machine = $default_machines->{$arch};
3240
3241 my $flags = {};
3242
3243 # FIXME: Once this is merged, the code below should work for ARM as well:
3244 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3245 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3246 $arch eq "aarch64";
3247
3248 my $kvm_supported = defined(kvm_version());
3249 my $qemu_cmd = get_command_for_arch($arch);
3250 my $fakevmid = -1;
3251 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3252
3253 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3254 my $query_supported_run_qemu = sub {
3255 my ($kvm) = @_;
3256
3257 my $flags = {};
3258 my $cmd = [
3259 $qemu_cmd,
3260 '-machine', $default_machine,
3261 '-display', 'none',
3262 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3263 '-mon', 'chardev=qmp,mode=control',
3264 '-pidfile', $pidfile,
3265 '-S', '-daemonize'
3266 ];
3267
3268 if (!$kvm) {
3269 push @$cmd, '-accel', 'tcg';
3270 }
3271
3272 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3273 die "QEMU flag querying VM exited with code " . $rc if $rc;
3274
3275 eval {
3276 my $cmd_result = mon_cmd(
3277 $fakevmid,
3278 'query-cpu-model-expansion',
3279 type => 'full',
3280 model => { name => 'host' }
3281 );
3282
3283 my $props = $cmd_result->{model}->{props};
3284 foreach my $prop (keys %$props) {
3285 next if $props->{$prop} ne '1';
3286 # QEMU returns some flags multiple times, with '_', '.' or '-'
3287 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3288 # We only keep those with underscores, to match /proc/cpuinfo
3289 $prop =~ s/\.|-/_/g;
3290 $flags->{$prop} = 1;
3291 }
3292 };
3293 my $err = $@;
3294
3295 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3296 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3297
3298 die $err if $err;
3299
3300 return [ sort keys %$flags ];
3301 };
3302
3303 # We need to query QEMU twice, since KVM and TCG have different supported flags
3304 PVE::QemuConfig->lock_config($fakevmid, sub {
3305 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3306 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3307
3308 if ($kvm_supported) {
3309 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3310 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3311 }
3312 });
3313
3314 return $flags;
3315 }
3316
3317 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3318 my $understood_cpu_flag_dir = "/usr/share/kvm";
3319 sub query_understood_cpu_flags {
3320 my $arch = get_host_arch();
3321 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3322
3323 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3324 if ! -e $filepath;
3325
3326 my $raw = file_get_contents($filepath);
3327 $raw =~ s/^\s+|\s+$//g;
3328 my @flags = split(/\s+/, $raw);
3329
3330 return \@flags;
3331 }
3332
3333 my sub get_cpuunits {
3334 my ($conf) = @_;
3335 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3336 }
3337 sub config_to_command {
3338 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3339 $pbs_backing) = @_;
3340
3341 my $cmd = [];
3342 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3343 my $devices = [];
3344 my $bridges = {};
3345 my $ostype = $conf->{ostype};
3346 my $winversion = windows_version($ostype);
3347 my $kvm = $conf->{kvm};
3348 my $nodename = nodename();
3349
3350 my $arch = get_vm_arch($conf);
3351 my $kvm_binary = get_command_for_arch($arch);
3352 my $kvmver = kvm_user_version($kvm_binary);
3353
3354 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3355 $kvmver //= "undefined";
3356 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3357 }
3358
3359 my $add_pve_version = min_version($kvmver, 4, 1);
3360
3361 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3362 my $machine_version = extract_version($machine_type, $kvmver);
3363 $kvm //= 1 if is_native($arch);
3364
3365 $machine_version =~ m/(\d+)\.(\d+)/;
3366 my ($machine_major, $machine_minor) = ($1, $2);
3367
3368 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3369 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3370 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3371 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3372 ." please upgrade node '$nodename'\n"
3373 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3374 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3375 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3376 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3377 ." node '$nodename'\n";
3378 }
3379
3380 # if a specific +pve version is required for a feature, use $version_guard
3381 # instead of min_version to allow machines to be run with the minimum
3382 # required version
3383 my $required_pve_version = 0;
3384 my $version_guard = sub {
3385 my ($major, $minor, $pve) = @_;
3386 return 0 if !min_version($machine_version, $major, $minor, $pve);
3387 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3388 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3389 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3390 return 1;
3391 };
3392
3393 if ($kvm && !defined kvm_version()) {
3394 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3395 ." or enable in BIOS.\n";
3396 }
3397
3398 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3399 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3400 my $use_old_bios_files = undef;
3401 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3402
3403 my $cpuunits = get_cpuunits($conf);
3404
3405 push @$cmd, $kvm_binary;
3406
3407 push @$cmd, '-id', $vmid;
3408
3409 my $vmname = $conf->{name} || "vm$vmid";
3410
3411 push @$cmd, '-name', $vmname;
3412
3413 push @$cmd, '-no-shutdown';
3414
3415 my $use_virtio = 0;
3416
3417 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3418 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3419 push @$cmd, '-mon', "chardev=qmp,mode=control";
3420
3421 if (min_version($machine_version, 2, 12)) {
3422 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3423 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3424 }
3425
3426 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3427
3428 push @$cmd, '-daemonize';
3429
3430 if ($conf->{smbios1}) {
3431 my $smbios_conf = parse_smbios1($conf->{smbios1});
3432 if ($smbios_conf->{base64}) {
3433 # Do not pass base64 flag to qemu
3434 delete $smbios_conf->{base64};
3435 my $smbios_string = "";
3436 foreach my $key (keys %$smbios_conf) {
3437 my $value;
3438 if ($key eq "uuid") {
3439 $value = $smbios_conf->{uuid}
3440 } else {
3441 $value = decode_base64($smbios_conf->{$key});
3442 }
3443 # qemu accepts any binary data, only commas need escaping by double comma
3444 $value =~ s/,/,,/g;
3445 $smbios_string .= "," . $key . "=" . $value if $value;
3446 }
3447 push @$cmd, '-smbios', "type=1" . $smbios_string;
3448 } else {
3449 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3450 }
3451 }
3452
3453 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3454 my $d;
3455 if (my $efidisk = $conf->{efidisk0}) {
3456 $d = parse_drive('efidisk0', $efidisk);
3457 }
3458
3459 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3460 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3461
3462 my ($path, $format);
3463 my $read_only_str = '';
3464 if ($d) {
3465 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3466 $format = $d->{format};
3467 if ($storeid) {
3468 $path = PVE::Storage::path($storecfg, $d->{file});
3469 if (!defined($format)) {
3470 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3471 $format = qemu_img_format($scfg, $volname);
3472 }
3473 } else {
3474 $path = $d->{file};
3475 die "efidisk format must be specified\n"
3476 if !defined($format);
3477 }
3478
3479 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3480 } else {
3481 warn "no efidisk configured! Using temporary efivars disk.\n";
3482 $path = "/tmp/$vmid-ovmf.fd";
3483 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3484 $format = 'raw';
3485 }
3486
3487 my $size_str = "";
3488
3489 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3490 $size_str = ",size=" . (-s $ovmf_vars);
3491 }
3492
3493 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3494 my $cache = "";
3495 if ($path =~ m/^rbd:/) {
3496 $cache = ',cache=writeback';
3497 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3498 }
3499
3500 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3501 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3502 }
3503
3504 if ($q35) { # tell QEMU to load q35 config early
3505 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3506 if (min_version($machine_version, 4, 0)) {
3507 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3508 } else {
3509 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3510 }
3511 }
3512
3513 if ($conf->{vmgenid}) {
3514 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3515 }
3516
3517 # add usb controllers
3518 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3519 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3520 push @$devices, @usbcontrollers if @usbcontrollers;
3521 my $vga = parse_vga($conf->{vga});
3522
3523 my $qxlnum = vga_conf_has_spice($conf->{vga});
3524 $vga->{type} = 'qxl' if $qxlnum;
3525
3526 if (!$vga->{type}) {
3527 if ($arch eq 'aarch64') {
3528 $vga->{type} = 'virtio';
3529 } elsif (min_version($machine_version, 2, 9)) {
3530 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3531 } else {
3532 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3533 }
3534 }
3535
3536 # enable absolute mouse coordinates (needed by vnc)
3537 my $tablet = $conf->{tablet};
3538 if (!defined($tablet)) {
3539 $tablet = $defaults->{tablet};
3540 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3541 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3542 }
3543
3544 if ($tablet) {
3545 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3546 my $kbd = print_keyboarddevice_full($conf, $arch);
3547 push @$devices, '-device', $kbd if defined($kbd);
3548 }
3549
3550 my $bootorder = device_bootorder($conf);
3551
3552 # host pci device passthrough
3553 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3554 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3555
3556 # usb devices
3557 my $usb_dev_features = {};
3558 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3559
3560 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3561 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3562 push @$devices, @usbdevices if @usbdevices;
3563
3564 # serial devices
3565 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3566 my $path = $conf->{"serial$i"} or next;
3567 if ($path eq 'socket') {
3568 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3569 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3570 # On aarch64, serial0 is the UART device. Qemu only allows
3571 # connecting UART devices via the '-serial' command line, as
3572 # the device has a fixed slot on the hardware...
3573 if ($arch eq 'aarch64' && $i == 0) {
3574 push @$devices, '-serial', "chardev:serial$i";
3575 } else {
3576 push @$devices, '-device', "isa-serial,chardev=serial$i";
3577 }
3578 } else {
3579 die "no such serial device\n" if ! -c $path;
3580 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3581 push @$devices, '-device', "isa-serial,chardev=serial$i";
3582 }
3583 }
3584
3585 # parallel devices
3586 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3587 if (my $path = $conf->{"parallel$i"}) {
3588 die "no such parallel device\n" if ! -c $path;
3589 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3590 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3591 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3592 }
3593 }
3594
3595 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3596 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3597 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3598 push @$devices, @$audio_devs;
3599 }
3600
3601 add_tpm_device($vmid, $devices, $conf);
3602
3603 my $sockets = 1;
3604 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3605 $sockets = $conf->{sockets} if $conf->{sockets};
3606
3607 my $cores = $conf->{cores} || 1;
3608
3609 my $maxcpus = $sockets * $cores;
3610
3611 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3612
3613 my $allowed_vcpus = $cpuinfo->{cpus};
3614
3615 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3616
3617 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3618 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3619 for (my $i = 2; $i <= $vcpus; $i++) {
3620 my $cpustr = print_cpu_device($conf,$i);
3621 push @$cmd, '-device', $cpustr;
3622 }
3623
3624 } else {
3625
3626 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3627 }
3628 push @$cmd, '-nodefaults';
3629
3630 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3631
3632 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3633
3634 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3635
3636 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3637 push @$devices, '-device', print_vga_device(
3638 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3639 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3640 push @$cmd, '-vnc', "unix:$socket,password=on";
3641 } else {
3642 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3643 push @$cmd, '-nographic';
3644 }
3645
3646 # time drift fix
3647 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3648 my $useLocaltime = $conf->{localtime};
3649
3650 if ($winversion >= 5) { # windows
3651 $useLocaltime = 1 if !defined($conf->{localtime});
3652
3653 # use time drift fix when acpi is enabled
3654 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3655 $tdf = 1 if !defined($conf->{tdf});
3656 }
3657 }
3658
3659 if ($winversion >= 6) {
3660 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3661 push @$cmd, '-no-hpet';
3662 }
3663
3664 push @$rtcFlags, 'driftfix=slew' if $tdf;
3665
3666 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3667 push @$rtcFlags, "base=$conf->{startdate}";
3668 } elsif ($useLocaltime) {
3669 push @$rtcFlags, 'base=localtime';
3670 }
3671
3672 if ($forcecpu) {
3673 push @$cmd, '-cpu', $forcecpu;
3674 } else {
3675 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3676 }
3677
3678 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3679
3680 push @$cmd, '-S' if $conf->{freeze};
3681
3682 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3683
3684 my $guest_agent = parse_guest_agent($conf);
3685
3686 if ($guest_agent->{enabled}) {
3687 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3688 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3689
3690 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3691 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3692 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3693 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3694 } elsif ($guest_agent->{type} eq 'isa') {
3695 push @$devices, '-device', "isa-serial,chardev=qga0";
3696 }
3697 }
3698
3699 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3700 if ($rng && $version_guard->(4, 1, 2)) {
3701 check_rng_source($rng->{source});
3702
3703 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3704 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3705 my $limiter_str = "";
3706 if ($max_bytes) {
3707 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3708 }
3709
3710 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3711 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3712 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3713 }
3714
3715 my $spice_port;
3716
3717 if ($qxlnum) {
3718 if ($qxlnum > 1) {
3719 if ($winversion){
3720 for (my $i = 1; $i < $qxlnum; $i++){
3721 push @$devices, '-device', print_vga_device(
3722 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3723 }
3724 } else {
3725 # assume other OS works like Linux
3726 my ($ram, $vram) = ("134217728", "67108864");
3727 if ($vga->{memory}) {
3728 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3729 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3730 }
3731 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3732 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3733 }
3734 }
3735
3736 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3737
3738 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3739 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3740 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3741
3742 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3743 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3744 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3745
3746 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3747 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3748
3749 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3750 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3751 if ($spice_enhancement->{foldersharing}) {
3752 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3753 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3754 }
3755
3756 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3757 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3758 if $spice_enhancement->{videostreaming};
3759
3760 push @$devices, '-spice', "$spice_opts";
3761 }
3762
3763 # enable balloon by default, unless explicitly disabled
3764 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3765 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3766 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3767 }
3768
3769 if ($conf->{watchdog}) {
3770 my $wdopts = parse_watchdog($conf->{watchdog});
3771 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3772 my $watchdog = $wdopts->{model} || 'i6300esb';
3773 push @$devices, '-device', "$watchdog$pciaddr";
3774 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3775 }
3776
3777 my $vollist = [];
3778 my $scsicontroller = {};
3779 my $ahcicontroller = {};
3780 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3781
3782 # Add iscsi initiator name if available
3783 if (my $initiator = get_initiator_name()) {
3784 push @$devices, '-iscsi', "initiator-name=$initiator";
3785 }
3786
3787 PVE::QemuConfig->foreach_volume($conf, sub {
3788 my ($ds, $drive) = @_;
3789
3790 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3791 check_volume_storage_type($storecfg, $drive->{file});
3792 push @$vollist, $drive->{file};
3793 }
3794
3795 # ignore efidisk here, already added in bios/fw handling code above
3796 return if $drive->{interface} eq 'efidisk';
3797 # similar for TPM
3798 return if $drive->{interface} eq 'tpmstate';
3799
3800 $use_virtio = 1 if $ds =~ m/^virtio/;
3801
3802 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3803
3804 if ($drive->{interface} eq 'virtio'){
3805 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3806 }
3807
3808 if ($drive->{interface} eq 'scsi') {
3809
3810 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3811
3812 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3813 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3814
3815 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3816 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3817
3818 my $iothread = '';
3819 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3820 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3821 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3822 } elsif ($drive->{iothread}) {
3823 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3824 }
3825
3826 my $queues = '';
3827 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3828 $queues = ",num_queues=$drive->{queues}";
3829 }
3830
3831 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3832 if !$scsicontroller->{$controller};
3833 $scsicontroller->{$controller}=1;
3834 }
3835
3836 if ($drive->{interface} eq 'sata') {
3837 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3838 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3839 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3840 if !$ahcicontroller->{$controller};
3841 $ahcicontroller->{$controller}=1;
3842 }
3843
3844 my $pbs_conf = $pbs_backing->{$ds};
3845 my $pbs_name = undef;
3846 if ($pbs_conf) {
3847 $pbs_name = "drive-$ds-pbs";
3848 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3849 }
3850
3851 my $drive_cmd = print_drive_commandline_full(
3852 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3853
3854 # extra protection for templates, but SATA and IDE don't support it..
3855 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3856
3857 push @$devices, '-drive',$drive_cmd;
3858 push @$devices, '-device', print_drivedevice_full(
3859 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3860 });
3861
3862 for (my $i = 0; $i < $MAX_NETS; $i++) {
3863 my $netname = "net$i";
3864
3865 next if !$conf->{$netname};
3866 my $d = parse_net($conf->{$netname});
3867 next if !$d;
3868
3869 $use_virtio = 1 if $d->{model} eq 'virtio';
3870
3871 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3872
3873 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3874 push @$devices, '-netdev', $netdevfull;
3875
3876 my $netdevicefull = print_netdevice_full(
3877 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3878
3879 push @$devices, '-device', $netdevicefull;
3880 }
3881
3882 if ($conf->{ivshmem}) {
3883 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3884
3885 my $bus;
3886 if ($q35) {
3887 $bus = print_pcie_addr("ivshmem");
3888 } else {
3889 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3890 }
3891
3892 my $ivshmem_name = $ivshmem->{name} // $vmid;
3893 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3894
3895 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3896 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3897 .",size=$ivshmem->{size}M";
3898 }
3899
3900 # pci.4 is nested in pci.1
3901 $bridges->{1} = 1 if $bridges->{4};
3902
3903 if (!$q35) { # add pci bridges
3904 if (min_version($machine_version, 2, 3)) {
3905 $bridges->{1} = 1;
3906 $bridges->{2} = 1;
3907 }
3908 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3909 }
3910
3911 for my $k (sort {$b cmp $a} keys %$bridges) {
3912 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3913
3914 my $k_name = $k;
3915 if ($k == 2 && $legacy_igd) {
3916 $k_name = "$k-igd";
3917 }
3918 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3919 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3920
3921 if ($q35) { # add after -readconfig pve-q35.cfg
3922 splice @$devices, 2, 0, '-device', $devstr;
3923 } else {
3924 unshift @$devices, '-device', $devstr if $k > 0;
3925 }
3926 }
3927
3928 if (!$kvm) {
3929 push @$machineFlags, 'accel=tcg';
3930 }
3931
3932 my $machine_type_min = $machine_type;
3933 if ($add_pve_version) {
3934 $machine_type_min =~ s/\+pve\d+$//;
3935 $machine_type_min .= "+pve$required_pve_version";
3936 }
3937 push @$machineFlags, "type=${machine_type_min}";
3938
3939 push @$cmd, @$devices;
3940 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3941 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3942 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3943
3944 if (my $vmstate = $conf->{vmstate}) {
3945 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3946 push @$vollist, $vmstate;
3947 push @$cmd, '-loadstate', $statepath;
3948 print "activating and using '$vmstate' as vmstate\n";
3949 }
3950
3951 if (PVE::QemuConfig->is_template($conf)) {
3952 # needed to workaround base volumes being read-only
3953 push @$cmd, '-snapshot';
3954 }
3955
3956 # add custom args
3957 if ($conf->{args}) {
3958 my $aa = PVE::Tools::split_args($conf->{args});
3959 push @$cmd, @$aa;
3960 }
3961
3962 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3963 }
3964
3965 sub check_rng_source {
3966 my ($source) = @_;
3967
3968 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
3969 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
3970 if ! -e $source;
3971
3972 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
3973 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
3974 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
3975 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
3976 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
3977 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
3978 ." to the host.\n";
3979 }
3980 }
3981
3982 sub spice_port {
3983 my ($vmid) = @_;
3984
3985 my $res = mon_cmd($vmid, 'query-spice');
3986
3987 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3988 }
3989
3990 sub vm_devices_list {
3991 my ($vmid) = @_;
3992
3993 my $res = mon_cmd($vmid, 'query-pci');
3994 my $devices_to_check = [];
3995 my $devices = {};
3996 foreach my $pcibus (@$res) {
3997 push @$devices_to_check, @{$pcibus->{devices}},
3998 }
3999
4000 while (@$devices_to_check) {
4001 my $to_check = [];
4002 for my $d (@$devices_to_check) {
4003 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4004 next if !$d->{'pci_bridge'};
4005
4006 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4007 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4008 }
4009 $devices_to_check = $to_check;
4010 }
4011
4012 my $resblock = mon_cmd($vmid, 'query-block');
4013 foreach my $block (@$resblock) {
4014 if($block->{device} =~ m/^drive-(\S+)/){
4015 $devices->{$1} = 1;
4016 }
4017 }
4018
4019 my $resmice = mon_cmd($vmid, 'query-mice');
4020 foreach my $mice (@$resmice) {
4021 if ($mice->{name} eq 'QEMU HID Tablet') {
4022 $devices->{tablet} = 1;
4023 last;
4024 }
4025 }
4026
4027 # for usb devices there is no query-usb
4028 # but we can iterate over the entries in
4029 # qom-list path=/machine/peripheral
4030 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4031 foreach my $per (@$resperipheral) {
4032 if ($per->{name} =~ m/^usb\d+$/) {
4033 $devices->{$per->{name}} = 1;
4034 }
4035 }
4036
4037 return $devices;
4038 }
4039
4040 sub vm_deviceplug {
4041 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4042
4043 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4044
4045 my $devices_list = vm_devices_list($vmid);
4046 return 1 if defined($devices_list->{$deviceid});
4047
4048 # add PCI bridge if we need it for the device
4049 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4050
4051 if ($deviceid eq 'tablet') {
4052 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4053 } elsif ($deviceid eq 'keyboard') {
4054 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4055 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4056 die "usb hotplug currently not reliable\n";
4057 # since we can't reliably hot unplug all added usb devices and usb
4058 # passthrough breaks live migration we disable usb hotplugging for now
4059 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4060 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4061 qemu_iothread_add($vmid, $deviceid, $device);
4062
4063 qemu_driveadd($storecfg, $vmid, $device);
4064 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4065
4066 qemu_deviceadd($vmid, $devicefull);
4067 eval { qemu_deviceaddverify($vmid, $deviceid); };
4068 if (my $err = $@) {
4069 eval { qemu_drivedel($vmid, $deviceid); };
4070 warn $@ if $@;
4071 die $err;
4072 }
4073 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4074 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4075 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4076 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4077
4078 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4079
4080 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4081 qemu_iothread_add($vmid, $deviceid, $device);
4082 $devicefull .= ",iothread=iothread-$deviceid";
4083 }
4084
4085 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4086 $devicefull .= ",num_queues=$device->{queues}";
4087 }
4088
4089 qemu_deviceadd($vmid, $devicefull);
4090 qemu_deviceaddverify($vmid, $deviceid);
4091 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4092 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4093 qemu_driveadd($storecfg, $vmid, $device);
4094
4095 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4096 eval { qemu_deviceadd($vmid, $devicefull); };
4097 if (my $err = $@) {
4098 eval { qemu_drivedel($vmid, $deviceid); };
4099 warn $@ if $@;
4100 die $err;
4101 }
4102 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4103 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4104
4105 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4106 my $use_old_bios_files = undef;
4107 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4108
4109 my $netdevicefull = print_netdevice_full(
4110 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4111 qemu_deviceadd($vmid, $netdevicefull);
4112 eval {
4113 qemu_deviceaddverify($vmid, $deviceid);
4114 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4115 };
4116 if (my $err = $@) {
4117 eval { qemu_netdevdel($vmid, $deviceid); };
4118 warn $@ if $@;
4119 die $err;
4120 }
4121 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4122 my $bridgeid = $2;
4123 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4124 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4125
4126 qemu_deviceadd($vmid, $devicefull);
4127 qemu_deviceaddverify($vmid, $deviceid);
4128 } else {
4129 die "can't hotplug device '$deviceid'\n";
4130 }
4131
4132 return 1;
4133 }
4134
4135 # fixme: this should raise exceptions on error!
4136 sub vm_deviceunplug {
4137 my ($vmid, $conf, $deviceid) = @_;
4138
4139 my $devices_list = vm_devices_list($vmid);
4140 return 1 if !defined($devices_list->{$deviceid});
4141
4142 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4143 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4144
4145 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4146 qemu_devicedel($vmid, $deviceid);
4147 } elsif ($deviceid =~ m/^usb\d+$/) {
4148 die "usb hotplug currently not reliable\n";
4149 # when unplugging usb devices this way, there may be remaining usb
4150 # controllers/hubs so we disable it for now
4151 #qemu_devicedel($vmid, $deviceid);
4152 #qemu_devicedelverify($vmid, $deviceid);
4153 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4154 my $device = parse_drive($deviceid, $conf->{$deviceid});
4155
4156 qemu_devicedel($vmid, $deviceid);
4157 qemu_devicedelverify($vmid, $deviceid);
4158 qemu_drivedel($vmid, $deviceid);
4159 qemu_iothread_del($vmid, $deviceid, $device);
4160 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4161 qemu_devicedel($vmid, $deviceid);
4162 qemu_devicedelverify($vmid, $deviceid);
4163 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4164 my $device = parse_drive($deviceid, $conf->{$deviceid});
4165
4166 qemu_devicedel($vmid, $deviceid);
4167 qemu_drivedel($vmid, $deviceid);
4168 qemu_deletescsihw($conf, $vmid, $deviceid);
4169
4170 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4171 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4172 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4173 qemu_devicedel($vmid, $deviceid);
4174 qemu_devicedelverify($vmid, $deviceid);
4175 qemu_netdevdel($vmid, $deviceid);
4176 } else {
4177 die "can't unplug device '$deviceid'\n";
4178 }
4179
4180 return 1;
4181 }
4182
4183 sub qemu_deviceadd {
4184 my ($vmid, $devicefull) = @_;
4185
4186 $devicefull = "driver=".$devicefull;
4187 my %options = split(/[=,]/, $devicefull);
4188
4189 mon_cmd($vmid, "device_add" , %options);
4190 }
4191
4192 sub qemu_devicedel {
4193 my ($vmid, $deviceid) = @_;
4194
4195 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4196 }
4197
4198 sub qemu_iothread_add {
4199 my ($vmid, $deviceid, $device) = @_;
4200
4201 if ($device->{iothread}) {
4202 my $iothreads = vm_iothreads_list($vmid);
4203 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4204 }
4205 }
4206
4207 sub qemu_iothread_del {
4208 my ($vmid, $deviceid, $device) = @_;
4209
4210 if ($device->{iothread}) {
4211 my $iothreads = vm_iothreads_list($vmid);
4212 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4213 }
4214 }
4215
4216 sub qemu_objectadd {
4217 my ($vmid, $objectid, $qomtype) = @_;
4218
4219 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4220
4221 return 1;
4222 }
4223
4224 sub qemu_objectdel {
4225 my ($vmid, $objectid) = @_;
4226
4227 mon_cmd($vmid, "object-del", id => $objectid);
4228
4229 return 1;
4230 }
4231
4232 sub qemu_driveadd {
4233 my ($storecfg, $vmid, $device) = @_;
4234
4235 my $kvmver = get_running_qemu_version($vmid);
4236 my $io_uring = min_version($kvmver, 6, 0);
4237 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4238 $drive =~ s/\\/\\\\/g;
4239 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4240
4241 # If the command succeeds qemu prints: "OK"
4242 return 1 if $ret =~ m/OK/s;
4243
4244 die "adding drive failed: $ret\n";
4245 }
4246
4247 sub qemu_drivedel {
4248 my ($vmid, $deviceid) = @_;
4249
4250 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4251 $ret =~ s/^\s+//;
4252
4253 return 1 if $ret eq "";
4254
4255 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4256 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4257
4258 die "deleting drive $deviceid failed : $ret\n";
4259 }
4260
4261 sub qemu_deviceaddverify {
4262 my ($vmid, $deviceid) = @_;
4263
4264 for (my $i = 0; $i <= 5; $i++) {
4265 my $devices_list = vm_devices_list($vmid);
4266 return 1 if defined($devices_list->{$deviceid});
4267 sleep 1;
4268 }
4269
4270 die "error on hotplug device '$deviceid'\n";
4271 }
4272
4273
4274 sub qemu_devicedelverify {
4275 my ($vmid, $deviceid) = @_;
4276
4277 # need to verify that the device is correctly removed as device_del
4278 # is async and empty return is not reliable
4279
4280 for (my $i = 0; $i <= 5; $i++) {
4281 my $devices_list = vm_devices_list($vmid);
4282 return 1 if !defined($devices_list->{$deviceid});
4283 sleep 1;
4284 }
4285
4286 die "error on hot-unplugging device '$deviceid'\n";
4287 }
4288
4289 sub qemu_findorcreatescsihw {
4290 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4291
4292 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4293
4294 my $scsihwid="$controller_prefix$controller";
4295 my $devices_list = vm_devices_list($vmid);
4296
4297 if (!defined($devices_list->{$scsihwid})) {
4298 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4299 }
4300
4301 return 1;
4302 }
4303
4304 sub qemu_deletescsihw {
4305 my ($conf, $vmid, $opt) = @_;
4306
4307 my $device = parse_drive($opt, $conf->{$opt});
4308
4309 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4310 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4311 return 1;
4312 }
4313
4314 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4315
4316 my $devices_list = vm_devices_list($vmid);
4317 foreach my $opt (keys %{$devices_list}) {
4318 if (is_valid_drivename($opt)) {
4319 my $drive = parse_drive($opt, $conf->{$opt});
4320 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4321 return 1;
4322 }
4323 }
4324 }
4325
4326 my $scsihwid="scsihw$controller";
4327
4328 vm_deviceunplug($vmid, $conf, $scsihwid);
4329
4330 return 1;
4331 }
4332
4333 sub qemu_add_pci_bridge {
4334 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4335
4336 my $bridges = {};
4337
4338 my $bridgeid;
4339
4340 print_pci_addr($device, $bridges, $arch, $machine_type);
4341
4342 while (my ($k, $v) = each %$bridges) {
4343 $bridgeid = $k;
4344 }
4345 return 1 if !defined($bridgeid) || $bridgeid < 1;
4346
4347 my $bridge = "pci.$bridgeid";
4348 my $devices_list = vm_devices_list($vmid);
4349
4350 if (!defined($devices_list->{$bridge})) {
4351 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4352 }
4353
4354 return 1;
4355 }
4356
4357 sub qemu_set_link_status {
4358 my ($vmid, $device, $up) = @_;
4359
4360 mon_cmd($vmid, "set_link", name => $device,
4361 up => $up ? JSON::true : JSON::false);
4362 }
4363
4364 sub qemu_netdevadd {
4365 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4366
4367 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4368 my %options = split(/[=,]/, $netdev);
4369
4370 if (defined(my $vhost = $options{vhost})) {
4371 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4372 }
4373
4374 if (defined(my $queues = $options{queues})) {
4375 $options{queues} = $queues + 0;
4376 }
4377
4378 mon_cmd($vmid, "netdev_add", %options);
4379 return 1;
4380 }
4381
4382 sub qemu_netdevdel {
4383 my ($vmid, $deviceid) = @_;
4384
4385 mon_cmd($vmid, "netdev_del", id => $deviceid);
4386 }
4387
4388 sub qemu_usb_hotplug {
4389 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4390
4391 return if !$device;
4392
4393 # remove the old one first
4394 vm_deviceunplug($vmid, $conf, $deviceid);
4395
4396 # check if xhci controller is necessary and available
4397 if ($device->{usb3}) {
4398
4399 my $devicelist = vm_devices_list($vmid);
4400
4401 if (!$devicelist->{xhci}) {
4402 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4403 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4404 }
4405 }
4406 my $d = parse_usb_device($device->{host});
4407 $d->{usb3} = $device->{usb3};
4408
4409 # add the new one
4410 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4411 }
4412
4413 sub qemu_cpu_hotplug {
4414 my ($vmid, $conf, $vcpus) = @_;
4415
4416 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4417
4418 my $sockets = 1;
4419 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4420 $sockets = $conf->{sockets} if $conf->{sockets};
4421 my $cores = $conf->{cores} || 1;
4422 my $maxcpus = $sockets * $cores;
4423
4424 $vcpus = $maxcpus if !$vcpus;
4425
4426 die "you can't add more vcpus than maxcpus\n"
4427 if $vcpus > $maxcpus;
4428
4429 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4430
4431 if ($vcpus < $currentvcpus) {
4432
4433 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4434
4435 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4436 qemu_devicedel($vmid, "cpu$i");
4437 my $retry = 0;
4438 my $currentrunningvcpus = undef;
4439 while (1) {
4440 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4441 last if scalar(@{$currentrunningvcpus}) == $i-1;
4442 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4443 $retry++;
4444 sleep 1;
4445 }
4446 #update conf after each succesfull cpu unplug
4447 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4448 PVE::QemuConfig->write_config($vmid, $conf);
4449 }
4450 } else {
4451 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4452 }
4453
4454 return;
4455 }
4456
4457 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4458 die "vcpus in running vm does not match its configuration\n"
4459 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4460
4461 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4462
4463 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4464 my $cpustr = print_cpu_device($conf, $i);
4465 qemu_deviceadd($vmid, $cpustr);
4466
4467 my $retry = 0;
4468 my $currentrunningvcpus = undef;
4469 while (1) {
4470 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4471 last if scalar(@{$currentrunningvcpus}) == $i;
4472 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4473 sleep 1;
4474 $retry++;
4475 }
4476 #update conf after each succesfull cpu hotplug
4477 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4478 PVE::QemuConfig->write_config($vmid, $conf);
4479 }
4480 } else {
4481
4482 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4483 mon_cmd($vmid, "cpu-add", id => int($i));
4484 }
4485 }
4486 }
4487
4488 sub qemu_block_set_io_throttle {
4489 my ($vmid, $deviceid,
4490 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4491 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4492 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4493 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4494
4495 return if !check_running($vmid) ;
4496
4497 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4498 bps => int($bps),
4499 bps_rd => int($bps_rd),
4500 bps_wr => int($bps_wr),
4501 iops => int($iops),
4502 iops_rd => int($iops_rd),
4503 iops_wr => int($iops_wr),
4504 bps_max => int($bps_max),
4505 bps_rd_max => int($bps_rd_max),
4506 bps_wr_max => int($bps_wr_max),
4507 iops_max => int($iops_max),
4508 iops_rd_max => int($iops_rd_max),
4509 iops_wr_max => int($iops_wr_max),
4510 bps_max_length => int($bps_max_length),
4511 bps_rd_max_length => int($bps_rd_max_length),
4512 bps_wr_max_length => int($bps_wr_max_length),
4513 iops_max_length => int($iops_max_length),
4514 iops_rd_max_length => int($iops_rd_max_length),
4515 iops_wr_max_length => int($iops_wr_max_length),
4516 );
4517
4518 }
4519
4520 sub qemu_block_resize {
4521 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4522
4523 my $running = check_running($vmid);
4524
4525 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4526
4527 return if !$running;
4528
4529 my $padding = (1024 - $size % 1024) % 1024;
4530 $size = $size + $padding;
4531
4532 mon_cmd(
4533 $vmid,
4534 "block_resize",
4535 device => $deviceid,
4536 size => int($size),
4537 timeout => 60,
4538 );
4539 }
4540
4541 sub qemu_volume_snapshot {
4542 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4543
4544 my $running = check_running($vmid);
4545
4546 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4547 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4548 } else {
4549 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4550 }
4551 }
4552
4553 sub qemu_volume_snapshot_delete {
4554 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4555
4556 my $running = check_running($vmid);
4557
4558 if($running) {
4559
4560 $running = undef;
4561 my $conf = PVE::QemuConfig->load_config($vmid);
4562 PVE::QemuConfig->foreach_volume($conf, sub {
4563 my ($ds, $drive) = @_;
4564 $running = 1 if $drive->{file} eq $volid;
4565 });
4566 }
4567
4568 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4569 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4570 } else {
4571 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4572 }
4573 }
4574
4575 sub set_migration_caps {
4576 my ($vmid, $savevm) = @_;
4577
4578 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4579
4580 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4581 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4582
4583 my $cap_ref = [];
4584
4585 my $enabled_cap = {
4586 "auto-converge" => 1,
4587 "xbzrle" => 1,
4588 "x-rdma-pin-all" => 0,
4589 "zero-blocks" => 0,
4590 "compress" => 0,
4591 "dirty-bitmaps" => $dirty_bitmaps,
4592 };
4593
4594 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4595
4596 for my $supported_capability (@$supported_capabilities) {
4597 push @$cap_ref, {
4598 capability => $supported_capability->{capability},
4599 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4600 };
4601 }
4602
4603 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4604 }
4605
4606 sub foreach_volid {
4607 my ($conf, $func, @param) = @_;
4608
4609 my $volhash = {};
4610
4611 my $test_volid = sub {
4612 my ($key, $drive, $snapname) = @_;
4613
4614 my $volid = $drive->{file};
4615 return if !$volid;
4616
4617 $volhash->{$volid}->{cdrom} //= 1;
4618 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4619
4620 my $replicate = $drive->{replicate} // 1;
4621 $volhash->{$volid}->{replicate} //= 0;
4622 $volhash->{$volid}->{replicate} = 1 if $replicate;
4623
4624 $volhash->{$volid}->{shared} //= 0;
4625 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4626
4627 $volhash->{$volid}->{referenced_in_config} //= 0;
4628 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4629
4630 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4631 if defined($snapname);
4632
4633 my $size = $drive->{size};
4634 $volhash->{$volid}->{size} //= $size if $size;
4635
4636 $volhash->{$volid}->{is_vmstate} //= 0;
4637 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4638
4639 $volhash->{$volid}->{is_tpmstate} //= 0;
4640 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4641
4642 $volhash->{$volid}->{is_unused} //= 0;
4643 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4644
4645 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4646 };
4647
4648 my $include_opts = {
4649 extra_keys => ['vmstate'],
4650 include_unused => 1,
4651 };
4652
4653 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4654 foreach my $snapname (keys %{$conf->{snapshots}}) {
4655 my $snap = $conf->{snapshots}->{$snapname};
4656 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4657 }
4658
4659 foreach my $volid (keys %$volhash) {
4660 &$func($volid, $volhash->{$volid}, @param);
4661 }
4662 }
4663
4664 my $fast_plug_option = {
4665 'lock' => 1,
4666 'name' => 1,
4667 'onboot' => 1,
4668 'shares' => 1,
4669 'startup' => 1,
4670 'description' => 1,
4671 'protection' => 1,
4672 'vmstatestorage' => 1,
4673 'hookscript' => 1,
4674 'tags' => 1,
4675 };
4676
4677 # hotplug changes in [PENDING]
4678 # $selection hash can be used to only apply specified options, for
4679 # example: { cores => 1 } (only apply changed 'cores')
4680 # $errors ref is used to return error messages
4681 sub vmconfig_hotplug_pending {
4682 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4683
4684 my $defaults = load_defaults();
4685 my $arch = get_vm_arch($conf);
4686 my $machine_type = get_vm_machine($conf, undef, $arch);
4687
4688 # commit values which do not have any impact on running VM first
4689 # Note: those option cannot raise errors, we we do not care about
4690 # $selection and always apply them.
4691
4692 my $add_error = sub {
4693 my ($opt, $msg) = @_;
4694 $errors->{$opt} = "hotplug problem - $msg";
4695 };
4696
4697 my $changes = 0;
4698 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4699 if ($fast_plug_option->{$opt}) {
4700 $conf->{$opt} = $conf->{pending}->{$opt};
4701 delete $conf->{pending}->{$opt};
4702 $changes = 1;
4703 }
4704 }
4705
4706 if ($changes) {
4707 PVE::QemuConfig->write_config($vmid, $conf);
4708 }
4709
4710 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4711
4712 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4713 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4714 foreach my $opt (sort keys %$pending_delete_hash) {
4715 next if $selection && !$selection->{$opt};
4716 my $force = $pending_delete_hash->{$opt}->{force};
4717 eval {
4718 if ($opt eq 'hotplug') {
4719 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4720 } elsif ($opt eq 'tablet') {
4721 die "skip\n" if !$hotplug_features->{usb};
4722 if ($defaults->{tablet}) {
4723 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4724 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4725 if $arch eq 'aarch64';
4726 } else {
4727 vm_deviceunplug($vmid, $conf, 'tablet');
4728 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4729 }
4730 } elsif ($opt =~ m/^usb\d+/) {
4731 die "skip\n";
4732 # since we cannot reliably hot unplug usb devices we are disabling it
4733 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4734 #vm_deviceunplug($vmid, $conf, $opt);
4735 } elsif ($opt eq 'vcpus') {
4736 die "skip\n" if !$hotplug_features->{cpu};
4737 qemu_cpu_hotplug($vmid, $conf, undef);
4738 } elsif ($opt eq 'balloon') {
4739 # enable balloon device is not hotpluggable
4740 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4741 # here we reset the ballooning value to memory
4742 my $balloon = $conf->{memory} || $defaults->{memory};
4743 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4744 } elsif ($fast_plug_option->{$opt}) {
4745 # do nothing
4746 } elsif ($opt =~ m/^net(\d+)$/) {
4747 die "skip\n" if !$hotplug_features->{network};
4748 vm_deviceunplug($vmid, $conf, $opt);
4749 } elsif (is_valid_drivename($opt)) {
4750 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4751 vm_deviceunplug($vmid, $conf, $opt);
4752 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4753 } elsif ($opt =~ m/^memory$/) {
4754 die "skip\n" if !$hotplug_features->{memory};
4755 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4756 } elsif ($opt eq 'cpuunits') {
4757 $cgroup->change_cpu_shares(undef, 1024);
4758 } elsif ($opt eq 'cpulimit') {
4759 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4760 } else {
4761 die "skip\n";
4762 }
4763 };
4764 if (my $err = $@) {
4765 &$add_error($opt, $err) if $err ne "skip\n";
4766 } else {
4767 delete $conf->{$opt};
4768 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4769 }
4770 }
4771
4772 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4773 $apply_pending_cloudinit = sub {
4774 return if $apply_pending_cloudinit_done; # once is enough
4775 $apply_pending_cloudinit_done = 1; # once is enough
4776
4777 my ($key, $value) = @_;
4778
4779 my @cloudinit_opts = keys %$confdesc_cloudinit;
4780 foreach my $opt (keys %{$conf->{pending}}) {
4781 next if !grep { $_ eq $opt } @cloudinit_opts;
4782 $conf->{$opt} = delete $conf->{pending}->{$opt};
4783 }
4784
4785 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4786 foreach my $opt (sort keys %$pending_delete_hash) {
4787 next if !grep { $_ eq $opt } @cloudinit_opts;
4788 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4789 delete $conf->{$opt};
4790 }
4791
4792 my $new_conf = { %$conf };
4793 $new_conf->{$key} = $value;
4794 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4795 };
4796
4797 foreach my $opt (keys %{$conf->{pending}}) {
4798 next if $selection && !$selection->{$opt};
4799 my $value = $conf->{pending}->{$opt};
4800 eval {
4801 if ($opt eq 'hotplug') {
4802 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4803 } elsif ($opt eq 'tablet') {
4804 die "skip\n" if !$hotplug_features->{usb};
4805 if ($value == 1) {
4806 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4807 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4808 if $arch eq 'aarch64';
4809 } elsif ($value == 0) {
4810 vm_deviceunplug($vmid, $conf, 'tablet');
4811 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4812 }
4813 } elsif ($opt =~ m/^usb\d+$/) {
4814 die "skip\n";
4815 # since we cannot reliably hot unplug usb devices we disable it for now
4816 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4817 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4818 #die "skip\n" if !$d;
4819 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4820 } elsif ($opt eq 'vcpus') {
4821 die "skip\n" if !$hotplug_features->{cpu};
4822 qemu_cpu_hotplug($vmid, $conf, $value);
4823 } elsif ($opt eq 'balloon') {
4824 # enable/disable balloning device is not hotpluggable
4825 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4826 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4827 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4828
4829 # allow manual ballooning if shares is set to zero
4830 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4831 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4832 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4833 }
4834 } elsif ($opt =~ m/^net(\d+)$/) {
4835 # some changes can be done without hotplug
4836 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4837 $vmid, $opt, $value, $arch, $machine_type);
4838 } elsif (is_valid_drivename($opt)) {
4839 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4840 # some changes can be done without hotplug
4841 my $drive = parse_drive($opt, $value);
4842 if (drive_is_cloudinit($drive)) {
4843 &$apply_pending_cloudinit($opt, $value);
4844 }
4845 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4846 $vmid, $opt, $value, $arch, $machine_type);
4847 } elsif ($opt =~ m/^memory$/) { #dimms
4848 die "skip\n" if !$hotplug_features->{memory};
4849 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4850 } elsif ($opt eq 'cpuunits') {
4851 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4852 } elsif ($opt eq 'cpulimit') {
4853 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4854 $cgroup->change_cpu_quota($cpulimit, 100000);
4855 } elsif ($opt eq 'agent') {
4856 vmconfig_update_agent($conf, $opt, $value);
4857 } else {
4858 die "skip\n"; # skip non-hot-pluggable options
4859 }
4860 };
4861 if (my $err = $@) {
4862 &$add_error($opt, $err) if $err ne "skip\n";
4863 } else {
4864 $conf->{$opt} = $value;
4865 delete $conf->{pending}->{$opt};
4866 }
4867 }
4868
4869 PVE::QemuConfig->write_config($vmid, $conf);
4870 }
4871
4872 sub try_deallocate_drive {
4873 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4874
4875 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4876 my $volid = $drive->{file};
4877 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4878 my $sid = PVE::Storage::parse_volume_id($volid);
4879 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4880
4881 # check if the disk is really unused
4882 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4883 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4884 PVE::Storage::vdisk_free($storecfg, $volid);
4885 return 1;
4886 } else {
4887 # If vm is not owner of this disk remove from config
4888 return 1;
4889 }
4890 }
4891
4892 return;
4893 }
4894
4895 sub vmconfig_delete_or_detach_drive {
4896 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4897
4898 my $drive = parse_drive($opt, $conf->{$opt});
4899
4900 my $rpcenv = PVE::RPCEnvironment::get();
4901 my $authuser = $rpcenv->get_user();
4902
4903 if ($force) {
4904 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4905 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4906 } else {
4907 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4908 }
4909 }
4910
4911
4912
4913 sub vmconfig_apply_pending {
4914 my ($vmid, $conf, $storecfg, $errors) = @_;
4915
4916 my $add_apply_error = sub {
4917 my ($opt, $msg) = @_;
4918 my $err_msg = "unable to apply pending change $opt : $msg";
4919 $errors->{$opt} = $err_msg;
4920 warn $err_msg;
4921 };
4922
4923 # cold plug
4924
4925 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4926 foreach my $opt (sort keys %$pending_delete_hash) {
4927 my $force = $pending_delete_hash->{$opt}->{force};
4928 eval {
4929 if ($opt =~ m/^unused/) {
4930 die "internal error";
4931 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4932 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4933 }
4934 };
4935 if (my $err = $@) {
4936 $add_apply_error->($opt, $err);
4937 } else {
4938 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4939 delete $conf->{$opt};
4940 }
4941 }
4942
4943 PVE::QemuConfig->cleanup_pending($conf);
4944
4945 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4946 next if $opt eq 'delete'; # just to be sure
4947 eval {
4948 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4949 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4950 }
4951 };
4952 if (my $err = $@) {
4953 $add_apply_error->($opt, $err);
4954 } else {
4955 $conf->{$opt} = delete $conf->{pending}->{$opt};
4956 }
4957 }
4958
4959 # write all changes at once to avoid unnecessary i/o
4960 PVE::QemuConfig->write_config($vmid, $conf);
4961 }
4962
4963 sub vmconfig_update_net {
4964 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
4965
4966 my $newnet = parse_net($value);
4967
4968 if ($conf->{$opt}) {
4969 my $oldnet = parse_net($conf->{$opt});
4970
4971 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4972 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4973 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4974 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4975
4976 # for non online change, we try to hot-unplug
4977 die "skip\n" if !$hotplug;
4978 vm_deviceunplug($vmid, $conf, $opt);
4979 } else {
4980
4981 die "internal error" if $opt !~ m/net(\d+)/;
4982 my $iface = "tap${vmid}i$1";
4983
4984 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4985 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4986 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
4987 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4988 PVE::Network::tap_unplug($iface);
4989
4990 if ($have_sdn) {
4991 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4992 } else {
4993 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4994 }
4995 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4996 # Rate can be applied on its own but any change above needs to
4997 # include the rate in tap_plug since OVS resets everything.
4998 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4999 }
5000
5001 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5002 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5003 }
5004
5005 return 1;
5006 }
5007 }
5008
5009 if ($hotplug) {
5010 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5011 } else {
5012 die "skip\n";
5013 }
5014 }
5015
5016 sub vmconfig_update_agent {
5017 my ($conf, $opt, $value) = @_;
5018
5019 die "skip\n" if !$conf->{$opt};
5020
5021 my $hotplug_options = { fstrim_cloned_disks => 1 };
5022
5023 my $old_agent = parse_guest_agent($conf);
5024 my $agent = parse_guest_agent({$opt => $value});
5025
5026 for my $option (keys %$agent) { # added/changed options
5027 next if defined($hotplug_options->{$option});
5028 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5029 }
5030
5031 for my $option (keys %$old_agent) { # removed options
5032 next if defined($hotplug_options->{$option});
5033 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5034 }
5035
5036 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5037 }
5038
5039 sub vmconfig_update_disk {
5040 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5041
5042 my $drive = parse_drive($opt, $value);
5043
5044 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5045 my $media = $drive->{media} || 'disk';
5046 my $oldmedia = $old_drive->{media} || 'disk';
5047 die "unable to change media type\n" if $media ne $oldmedia;
5048
5049 if (!drive_is_cdrom($old_drive)) {
5050
5051 if ($drive->{file} ne $old_drive->{file}) {
5052
5053 die "skip\n" if !$hotplug;
5054
5055 # unplug and register as unused
5056 vm_deviceunplug($vmid, $conf, $opt);
5057 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5058
5059 } else {
5060 # update existing disk
5061
5062 # skip non hotpluggable value
5063 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5064 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5065 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5066 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5067 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5068 die "skip\n";
5069 }
5070
5071 # apply throttle
5072 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5073 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5074 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5075 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5076 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5077 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5078 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5079 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5080 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5081 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5082 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5083 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5084 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5085 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5086 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5087 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5088 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5089 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5090
5091 qemu_block_set_io_throttle(
5092 $vmid,"drive-$opt",
5093 ($drive->{mbps} || 0)*1024*1024,
5094 ($drive->{mbps_rd} || 0)*1024*1024,
5095 ($drive->{mbps_wr} || 0)*1024*1024,
5096 $drive->{iops} || 0,
5097 $drive->{iops_rd} || 0,
5098 $drive->{iops_wr} || 0,
5099 ($drive->{mbps_max} || 0)*1024*1024,
5100 ($drive->{mbps_rd_max} || 0)*1024*1024,
5101 ($drive->{mbps_wr_max} || 0)*1024*1024,
5102 $drive->{iops_max} || 0,
5103 $drive->{iops_rd_max} || 0,
5104 $drive->{iops_wr_max} || 0,
5105 $drive->{bps_max_length} || 1,
5106 $drive->{bps_rd_max_length} || 1,
5107 $drive->{bps_wr_max_length} || 1,
5108 $drive->{iops_max_length} || 1,
5109 $drive->{iops_rd_max_length} || 1,
5110 $drive->{iops_wr_max_length} || 1,
5111 );
5112
5113 }
5114
5115 return 1;
5116 }
5117
5118 } else { # cdrom
5119
5120 if ($drive->{file} eq 'none') {
5121 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5122 if (drive_is_cloudinit($old_drive)) {
5123 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5124 }
5125 } else {
5126 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5127
5128 # force eject if locked
5129 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5130
5131 if ($path) {
5132 mon_cmd($vmid, "blockdev-change-medium",
5133 id => "$opt", filename => "$path");
5134 }
5135 }
5136
5137 return 1;
5138 }
5139 }
5140
5141 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5142 # hotplug new disks
5143 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5144 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5145 }
5146
5147 # called in locked context by incoming migration
5148 sub vm_migrate_get_nbd_disks {
5149 my ($storecfg, $conf, $replicated_volumes) = @_;
5150
5151 my $local_volumes = {};
5152 PVE::QemuConfig->foreach_volume($conf, sub {
5153 my ($ds, $drive) = @_;
5154
5155 return if drive_is_cdrom($drive);
5156
5157 my $volid = $drive->{file};
5158
5159 return if !$volid;
5160
5161 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5162
5163 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5164 return if $scfg->{shared};
5165
5166 # replicated disks re-use existing state via bitmap
5167 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5168 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5169 });
5170 return $local_volumes;
5171 }
5172
5173 # called in locked context by incoming migration
5174 sub vm_migrate_alloc_nbd_disks {
5175 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5176
5177 my $format = undef;
5178
5179 my $nbd = {};
5180 foreach my $opt (sort keys %$source_volumes) {
5181 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5182
5183 if ($use_existing) {
5184 $nbd->{$opt}->{drivestr} = print_drive($drive);
5185 $nbd->{$opt}->{volid} = $volid;
5186 $nbd->{$opt}->{replicated} = 1;
5187 next;
5188 }
5189
5190 # If a remote storage is specified and the format of the original
5191 # volume is not available there, fall back to the default format.
5192 # Otherwise use the same format as the original.
5193 if (!$storagemap->{identity}) {
5194 $storeid = map_storage($storagemap, $storeid);
5195 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5196 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5197 my $fileFormat = qemu_img_format($scfg, $volname);
5198 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5199 } else {
5200 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5201 $format = qemu_img_format($scfg, $volname);
5202 }
5203
5204 my $size = $drive->{size} / 1024;
5205 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5206 my $newdrive = $drive;
5207 $newdrive->{format} = $format;
5208 $newdrive->{file} = $newvolid;
5209 my $drivestr = print_drive($newdrive);
5210 $nbd->{$opt}->{drivestr} = $drivestr;
5211 $nbd->{$opt}->{volid} = $newvolid;
5212 }
5213
5214 return $nbd;
5215 }
5216
5217 # see vm_start_nolock for parameters, additionally:
5218 # migrate_opts:
5219 # storagemap = parsed storage map for allocating NBD disks
5220 sub vm_start {
5221 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5222
5223 return PVE::QemuConfig->lock_config($vmid, sub {
5224 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5225
5226 die "you can't start a vm if it's a template\n"
5227 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5228
5229 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5230 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5231
5232 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5233
5234 if ($has_backup_lock && $running) {
5235 # a backup is currently running, attempt to start the guest in the
5236 # existing QEMU instance
5237 return vm_resume($vmid);
5238 }
5239
5240 PVE::QemuConfig->check_lock($conf)
5241 if !($params->{skiplock} || $has_suspended_lock);
5242
5243 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5244
5245 die "VM $vmid already running\n" if $running;
5246
5247 if (my $storagemap = $migrate_opts->{storagemap}) {
5248 my $replicated = $migrate_opts->{replicated_volumes};
5249 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5250 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5251
5252 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5253 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5254 }
5255 }
5256
5257 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5258 });
5259 }
5260
5261
5262 # params:
5263 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5264 # skiplock => 0/1, skip checking for config lock
5265 # skiptemplate => 0/1, skip checking whether VM is template
5266 # forcemachine => to force Qemu machine (rollback/migration)
5267 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5268 # timeout => in seconds
5269 # paused => start VM in paused state (backup)
5270 # resume => resume from hibernation
5271 # pbs-backing => {
5272 # sata0 => {
5273 # repository
5274 # snapshot
5275 # keyfile
5276 # archive
5277 # },
5278 # virtio2 => ...
5279 # }
5280 # migrate_opts:
5281 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5282 # migratedfrom => source node
5283 # spice_ticket => used for spice migration, passed via tunnel/stdin
5284 # network => CIDR of migration network
5285 # type => secure/insecure - tunnel over encrypted connection or plain-text
5286 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5287 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5288 sub vm_start_nolock {
5289 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5290
5291 my $statefile = $params->{statefile};
5292 my $resume = $params->{resume};
5293
5294 my $migratedfrom = $migrate_opts->{migratedfrom};
5295 my $migration_type = $migrate_opts->{type};
5296
5297 my $res = {};
5298
5299 # clean up leftover reboot request files
5300 eval { clear_reboot_request($vmid); };
5301 warn $@ if $@;
5302
5303 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5304 vmconfig_apply_pending($vmid, $conf, $storecfg);
5305 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5306 }
5307
5308 # don't regenerate the ISO if the VM is started as part of a live migration
5309 # this way we can reuse the old ISO with the correct config
5310 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5311
5312 my $defaults = load_defaults();
5313
5314 # set environment variable useful inside network script
5315 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5316
5317 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5318
5319 my $forcemachine = $params->{forcemachine};
5320 my $forcecpu = $params->{forcecpu};
5321 if ($resume) {
5322 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5323 $forcemachine = $conf->{runningmachine};
5324 $forcecpu = $conf->{runningcpu};
5325 print "Resuming suspended VM\n";
5326 }
5327
5328 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5329 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5330
5331 my $migration_ip;
5332 my $get_migration_ip = sub {
5333 my ($nodename) = @_;
5334
5335 return $migration_ip if defined($migration_ip);
5336
5337 my $cidr = $migrate_opts->{network};
5338
5339 if (!defined($cidr)) {
5340 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5341 $cidr = $dc_conf->{migration}->{network};
5342 }
5343
5344 if (defined($cidr)) {
5345 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5346
5347 die "could not get IP: no address configured on local " .
5348 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5349
5350 die "could not get IP: multiple addresses configured on local " .
5351 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5352
5353 $migration_ip = @$ips[0];
5354 }
5355
5356 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5357 if !defined($migration_ip);
5358
5359 return $migration_ip;
5360 };
5361
5362 my $migrate_uri;
5363 if ($statefile) {
5364 if ($statefile eq 'tcp') {
5365 my $localip = "localhost";
5366 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5367 my $nodename = nodename();
5368
5369 if (!defined($migration_type)) {
5370 if (defined($datacenterconf->{migration}->{type})) {
5371 $migration_type = $datacenterconf->{migration}->{type};
5372 } else {
5373 $migration_type = 'secure';
5374 }
5375 }
5376
5377 if ($migration_type eq 'insecure') {
5378 $localip = $get_migration_ip->($nodename);
5379 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5380 }
5381
5382 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5383 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5384 $migrate_uri = "tcp:${localip}:${migrate_port}";
5385 push @$cmd, '-incoming', $migrate_uri;
5386 push @$cmd, '-S';
5387
5388 } elsif ($statefile eq 'unix') {
5389 # should be default for secure migrations as a ssh TCP forward
5390 # tunnel is not deterministic reliable ready and fails regurarly
5391 # to set up in time, so use UNIX socket forwards
5392 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5393 unlink $socket_addr;
5394
5395 $migrate_uri = "unix:$socket_addr";
5396
5397 push @$cmd, '-incoming', $migrate_uri;
5398 push @$cmd, '-S';
5399
5400 } elsif (-e $statefile) {
5401 push @$cmd, '-loadstate', $statefile;
5402 } else {
5403 my $statepath = PVE::Storage::path($storecfg, $statefile);
5404 push @$vollist, $statefile;
5405 push @$cmd, '-loadstate', $statepath;
5406 }
5407 } elsif ($params->{paused}) {
5408 push @$cmd, '-S';
5409 }
5410
5411 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5412
5413 my $pci_devices = {}; # host pci devices
5414 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5415 my $dev = $conf->{"hostpci$i"} or next;
5416 $pci_devices->{$i} = parse_hostpci($dev);
5417 }
5418
5419 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5420 # reserve all PCI IDs before actually doing anything with them
5421 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5422
5423 eval {
5424 for my $id (sort keys %$pci_devices) {
5425 my $d = $pci_devices->{$id};
5426 for my $dev ($d->{pciid}->@*) {
5427 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5428 }
5429 }
5430 };
5431 if (my $err = $@) {
5432 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5433 warn $@ if $@;
5434 die $err;
5435 }
5436
5437 PVE::Storage::activate_volumes($storecfg, $vollist);
5438
5439 eval {
5440 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5441 };
5442 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5443 # timeout should be more than enough here...
5444 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5445
5446 my $cpuunits = get_cpuunits($conf);
5447
5448 my %run_params = (
5449 timeout => $statefile ? undef : $start_timeout,
5450 umask => 0077,
5451 noerr => 1,
5452 );
5453
5454 # when migrating, prefix QEMU output so other side can pick up any
5455 # errors that might occur and show the user
5456 if ($migratedfrom) {
5457 $run_params{quiet} = 1;
5458 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5459 }
5460
5461 my %systemd_properties = (
5462 Slice => 'qemu.slice',
5463 KillMode => 'process',
5464 SendSIGKILL => 0,
5465 TimeoutStopUSec => ULONG_MAX, # infinity
5466 );
5467
5468 if (PVE::CGroup::cgroup_mode() == 2) {
5469 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5470 $systemd_properties{CPUWeight} = $cpuunits;
5471 } else {
5472 $systemd_properties{CPUShares} = $cpuunits;
5473 }
5474
5475 if (my $cpulimit = $conf->{cpulimit}) {
5476 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5477 }
5478 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5479
5480 my $run_qemu = sub {
5481 PVE::Tools::run_fork sub {
5482 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5483
5484 my $tpmpid;
5485 if (my $tpm = $conf->{tpmstate0}) {
5486 # start the TPM emulator so QEMU can connect on start
5487 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5488 }
5489
5490 my $exitcode = run_command($cmd, %run_params);
5491 if ($exitcode) {
5492 if ($tpmpid) {
5493 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5494 kill 'TERM', $tpmpid;
5495 }
5496 die "QEMU exited with code $exitcode\n";
5497 }
5498 };
5499 };
5500
5501 if ($conf->{hugepages}) {
5502
5503 my $code = sub {
5504 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5505 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5506
5507 PVE::QemuServer::Memory::hugepages_mount();
5508 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5509
5510 eval { $run_qemu->() };
5511 if (my $err = $@) {
5512 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5513 if !$conf->{keephugepages};
5514 die $err;
5515 }
5516
5517 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5518 if !$conf->{keephugepages};
5519 };
5520 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5521
5522 } else {
5523 eval { $run_qemu->() };
5524 }
5525
5526 if (my $err = $@) {
5527 # deactivate volumes if start fails
5528 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5529 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5530
5531 die "start failed: $err";
5532 }
5533
5534 # re-reserve all PCI IDs now that we can know the actual VM PID
5535 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5536 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5537 warn $@ if $@;
5538
5539 print "migration listens on $migrate_uri\n" if $migrate_uri;
5540 $res->{migrate_uri} = $migrate_uri;
5541
5542 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5543 eval { mon_cmd($vmid, "cont"); };
5544 warn $@ if $@;
5545 }
5546
5547 #start nbd server for storage migration
5548 if (my $nbd = $migrate_opts->{nbd}) {
5549 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5550
5551 my $migrate_storage_uri;
5552 # nbd_protocol_version > 0 for unix socket support
5553 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5554 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5555 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5556 $migrate_storage_uri = "nbd:unix:$socket_path";
5557 } else {
5558 my $nodename = nodename();
5559 my $localip = $get_migration_ip->($nodename);
5560 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5561 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5562
5563 mon_cmd($vmid, "nbd-server-start", addr => {
5564 type => 'inet',
5565 data => {
5566 host => "${localip}",
5567 port => "${storage_migrate_port}",
5568 },
5569 });
5570 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5571 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5572 }
5573
5574 $res->{migrate_storage_uri} = $migrate_storage_uri;
5575
5576 foreach my $opt (sort keys %$nbd) {
5577 my $drivestr = $nbd->{$opt}->{drivestr};
5578 my $volid = $nbd->{$opt}->{volid};
5579 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5580 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5581 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5582 print "re-using replicated volume: $opt - $volid\n"
5583 if $nbd->{$opt}->{replicated};
5584
5585 $res->{drives}->{$opt} = $nbd->{$opt};
5586 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5587 }
5588 }
5589
5590 if ($migratedfrom) {
5591 eval {
5592 set_migration_caps($vmid);
5593 };
5594 warn $@ if $@;
5595
5596 if ($spice_port) {
5597 print "spice listens on port $spice_port\n";
5598 $res->{spice_port} = $spice_port;
5599 if ($migrate_opts->{spice_ticket}) {
5600 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5601 $migrate_opts->{spice_ticket});
5602 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5603 }
5604 }
5605
5606 } else {
5607 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5608 if !$statefile && $conf->{balloon};
5609
5610 foreach my $opt (keys %$conf) {
5611 next if $opt !~ m/^net\d+$/;
5612 my $nicconf = parse_net($conf->{$opt});
5613 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5614 }
5615 }
5616
5617 mon_cmd($vmid, 'qom-set',
5618 path => "machine/peripheral/balloon0",
5619 property => "guest-stats-polling-interval",
5620 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5621
5622 if ($resume) {
5623 print "Resumed VM, removing state\n";
5624 if (my $vmstate = $conf->{vmstate}) {
5625 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5626 PVE::Storage::vdisk_free($storecfg, $vmstate);
5627 }
5628 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5629 PVE::QemuConfig->write_config($vmid, $conf);
5630 }
5631
5632 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5633
5634 return $res;
5635 }
5636
5637 sub vm_commandline {
5638 my ($storecfg, $vmid, $snapname) = @_;
5639
5640 my $conf = PVE::QemuConfig->load_config($vmid);
5641
5642 my ($forcemachine, $forcecpu);
5643 if ($snapname) {
5644 my $snapshot = $conf->{snapshots}->{$snapname};
5645 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5646
5647 # check for machine or CPU overrides in snapshot
5648 $forcemachine = $snapshot->{runningmachine};
5649 $forcecpu = $snapshot->{runningcpu};
5650
5651 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5652
5653 $conf = $snapshot;
5654 }
5655
5656 my $defaults = load_defaults();
5657
5658 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
5659
5660 return PVE::Tools::cmd2string($cmd);
5661 }
5662
5663 sub vm_reset {
5664 my ($vmid, $skiplock) = @_;
5665
5666 PVE::QemuConfig->lock_config($vmid, sub {
5667
5668 my $conf = PVE::QemuConfig->load_config($vmid);
5669
5670 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5671
5672 mon_cmd($vmid, "system_reset");
5673 });
5674 }
5675
5676 sub get_vm_volumes {
5677 my ($conf) = @_;
5678
5679 my $vollist = [];
5680 foreach_volid($conf, sub {
5681 my ($volid, $attr) = @_;
5682
5683 return if $volid =~ m|^/|;
5684
5685 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5686 return if !$sid;
5687
5688 push @$vollist, $volid;
5689 });
5690
5691 return $vollist;
5692 }
5693
5694 sub vm_stop_cleanup {
5695 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5696
5697 eval {
5698
5699 if (!$keepActive) {
5700 my $vollist = get_vm_volumes($conf);
5701 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5702
5703 if (my $tpmdrive = $conf->{tpmstate0}) {
5704 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5705 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5706 if ($storeid) {
5707 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5708 }
5709 }
5710 }
5711
5712 foreach my $ext (qw(mon qmp pid vnc qga)) {
5713 unlink "/var/run/qemu-server/${vmid}.$ext";
5714 }
5715
5716 if ($conf->{ivshmem}) {
5717 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5718 # just delete it for now, VMs which have this already open do not
5719 # are affected, but new VMs will get a separated one. If this
5720 # becomes an issue we either add some sort of ref-counting or just
5721 # add a "don't delete on stop" flag to the ivshmem format.
5722 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5723 }
5724
5725 my $ids = [];
5726 foreach my $key (keys %$conf) {
5727 next if $key !~ m/^hostpci(\d+)$/;
5728 my $hostpciindex = $1;
5729 my $d = parse_hostpci($conf->{$key});
5730 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5731
5732 foreach my $pci (@{$d->{pciid}}) {
5733 my $pciid = $pci->{id};
5734 push @$ids, $pci->{id};
5735 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5736 }
5737 }
5738 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5739
5740 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5741 };
5742 warn $@ if $@; # avoid errors - just warn
5743 }
5744
5745 # call only in locked context
5746 sub _do_vm_stop {
5747 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5748
5749 my $pid = check_running($vmid, $nocheck);
5750 return if !$pid;
5751
5752 my $conf;
5753 if (!$nocheck) {
5754 $conf = PVE::QemuConfig->load_config($vmid);
5755 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5756 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5757 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5758 $timeout = $opts->{down} if $opts->{down};
5759 }
5760 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5761 }
5762
5763 eval {
5764 if ($shutdown) {
5765 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5766 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5767 } else {
5768 mon_cmd($vmid, "system_powerdown");
5769 }
5770 } else {
5771 mon_cmd($vmid, "quit");
5772 }
5773 };
5774 my $err = $@;
5775
5776 if (!$err) {
5777 $timeout = 60 if !defined($timeout);
5778
5779 my $count = 0;
5780 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5781 $count++;
5782 sleep 1;
5783 }
5784
5785 if ($count >= $timeout) {
5786 if ($force) {
5787 warn "VM still running - terminating now with SIGTERM\n";
5788 kill 15, $pid;
5789 } else {
5790 die "VM quit/powerdown failed - got timeout\n";
5791 }
5792 } else {
5793 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5794 return;
5795 }
5796 } else {
5797 if (!check_running($vmid, $nocheck)) {
5798 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5799 return;
5800 }
5801 if ($force) {
5802 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5803 kill 15, $pid;
5804 } else {
5805 die "VM quit/powerdown failed\n";
5806 }
5807 }
5808
5809 # wait again
5810 $timeout = 10;
5811
5812 my $count = 0;
5813 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5814 $count++;
5815 sleep 1;
5816 }
5817
5818 if ($count >= $timeout) {
5819 warn "VM still running - terminating now with SIGKILL\n";
5820 kill 9, $pid;
5821 sleep 1;
5822 }
5823
5824 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5825 }
5826
5827 # Note: use $nocheck to skip tests if VM configuration file exists.
5828 # We need that when migration VMs to other nodes (files already moved)
5829 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5830 sub vm_stop {
5831 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5832
5833 $force = 1 if !defined($force) && !$shutdown;
5834
5835 if ($migratedfrom){
5836 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5837 kill 15, $pid if $pid;
5838 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5839 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5840 return;
5841 }
5842
5843 PVE::QemuConfig->lock_config($vmid, sub {
5844 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5845 });
5846 }
5847
5848 sub vm_reboot {
5849 my ($vmid, $timeout) = @_;
5850
5851 PVE::QemuConfig->lock_config($vmid, sub {
5852 eval {
5853
5854 # only reboot if running, as qmeventd starts it again on a stop event
5855 return if !check_running($vmid);
5856
5857 create_reboot_request($vmid);
5858
5859 my $storecfg = PVE::Storage::config();
5860 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5861
5862 };
5863 if (my $err = $@) {
5864 # avoid that the next normal shutdown will be confused for a reboot
5865 clear_reboot_request($vmid);
5866 die $err;
5867 }
5868 });
5869 }
5870
5871 # note: if using the statestorage parameter, the caller has to check privileges
5872 sub vm_suspend {
5873 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5874
5875 my $conf;
5876 my $path;
5877 my $storecfg;
5878 my $vmstate;
5879
5880 PVE::QemuConfig->lock_config($vmid, sub {
5881
5882 $conf = PVE::QemuConfig->load_config($vmid);
5883
5884 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5885 PVE::QemuConfig->check_lock($conf)
5886 if !($skiplock || $is_backing_up);
5887
5888 die "cannot suspend to disk during backup\n"
5889 if $is_backing_up && $includestate;
5890
5891 if ($includestate) {
5892 $conf->{lock} = 'suspending';
5893 my $date = strftime("%Y-%m-%d", localtime(time()));
5894 $storecfg = PVE::Storage::config();
5895 if (!$statestorage) {
5896 $statestorage = find_vmstate_storage($conf, $storecfg);
5897 # check permissions for the storage
5898 my $rpcenv = PVE::RPCEnvironment::get();
5899 if ($rpcenv->{type} ne 'cli') {
5900 my $authuser = $rpcenv->get_user();
5901 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5902 }
5903 }
5904
5905
5906 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5907 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5908 $path = PVE::Storage::path($storecfg, $vmstate);
5909 PVE::QemuConfig->write_config($vmid, $conf);
5910 } else {
5911 mon_cmd($vmid, "stop");
5912 }
5913 });
5914
5915 if ($includestate) {
5916 # save vm state
5917 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5918
5919 eval {
5920 set_migration_caps($vmid, 1);
5921 mon_cmd($vmid, "savevm-start", statefile => $path);
5922 for(;;) {
5923 my $state = mon_cmd($vmid, "query-savevm");
5924 if (!$state->{status}) {
5925 die "savevm not active\n";
5926 } elsif ($state->{status} eq 'active') {
5927 sleep(1);
5928 next;
5929 } elsif ($state->{status} eq 'completed') {
5930 print "State saved, quitting\n";
5931 last;
5932 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5933 die "query-savevm failed with error '$state->{error}'\n"
5934 } else {
5935 die "query-savevm returned status '$state->{status}'\n";
5936 }
5937 }
5938 };
5939 my $err = $@;
5940
5941 PVE::QemuConfig->lock_config($vmid, sub {
5942 $conf = PVE::QemuConfig->load_config($vmid);
5943 if ($err) {
5944 # cleanup, but leave suspending lock, to indicate something went wrong
5945 eval {
5946 mon_cmd($vmid, "savevm-end");
5947 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5948 PVE::Storage::vdisk_free($storecfg, $vmstate);
5949 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5950 PVE::QemuConfig->write_config($vmid, $conf);
5951 };
5952 warn $@ if $@;
5953 die $err;
5954 }
5955
5956 die "lock changed unexpectedly\n"
5957 if !PVE::QemuConfig->has_lock($conf, 'suspending');
5958
5959 mon_cmd($vmid, "quit");
5960 $conf->{lock} = 'suspended';
5961 PVE::QemuConfig->write_config($vmid, $conf);
5962 });
5963 }
5964 }
5965
5966 sub vm_resume {
5967 my ($vmid, $skiplock, $nocheck) = @_;
5968
5969 PVE::QemuConfig->lock_config($vmid, sub {
5970 my $res = mon_cmd($vmid, 'query-status');
5971 my $resume_cmd = 'cont';
5972 my $reset = 0;
5973
5974 if ($res->{status}) {
5975 return if $res->{status} eq 'running'; # job done, go home
5976 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
5977 $reset = 1 if $res->{status} eq 'shutdown';
5978 }
5979
5980 if (!$nocheck) {
5981
5982 my $conf = PVE::QemuConfig->load_config($vmid);
5983
5984 PVE::QemuConfig->check_lock($conf)
5985 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
5986 }
5987
5988 if ($reset) {
5989 # required if a VM shuts down during a backup and we get a resume
5990 # request before the backup finishes for example
5991 mon_cmd($vmid, "system_reset");
5992 }
5993 mon_cmd($vmid, $resume_cmd);
5994 });
5995 }
5996
5997 sub vm_sendkey {
5998 my ($vmid, $skiplock, $key) = @_;
5999
6000 PVE::QemuConfig->lock_config($vmid, sub {
6001
6002 my $conf = PVE::QemuConfig->load_config($vmid);
6003
6004 # there is no qmp command, so we use the human monitor command
6005 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6006 die $res if $res ne '';
6007 });
6008 }
6009
6010 # vzdump restore implementaion
6011
6012 sub tar_archive_read_firstfile {
6013 my $archive = shift;
6014
6015 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6016
6017 # try to detect archive type first
6018 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6019 die "unable to open file '$archive'\n";
6020 my $firstfile = <$fh>;
6021 kill 15, $pid;
6022 close $fh;
6023
6024 die "ERROR: archive contaions no data\n" if !$firstfile;
6025 chomp $firstfile;
6026
6027 return $firstfile;
6028 }
6029
6030 sub tar_restore_cleanup {
6031 my ($storecfg, $statfile) = @_;
6032
6033 print STDERR "starting cleanup\n";
6034
6035 if (my $fd = IO::File->new($statfile, "r")) {
6036 while (defined(my $line = <$fd>)) {
6037 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6038 my $volid = $2;
6039 eval {
6040 if ($volid =~ m|^/|) {
6041 unlink $volid || die 'unlink failed\n';
6042 } else {
6043 PVE::Storage::vdisk_free($storecfg, $volid);
6044 }
6045 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6046 };
6047 print STDERR "unable to cleanup '$volid' - $@" if $@;
6048 } else {
6049 print STDERR "unable to parse line in statfile - $line";
6050 }
6051 }
6052 $fd->close();
6053 }
6054 }
6055
6056 sub restore_file_archive {
6057 my ($archive, $vmid, $user, $opts) = @_;
6058
6059 return restore_vma_archive($archive, $vmid, $user, $opts)
6060 if $archive eq '-';
6061
6062 my $info = PVE::Storage::archive_info($archive);
6063 my $format = $opts->{format} // $info->{format};
6064 my $comp = $info->{compression};
6065
6066 # try to detect archive format
6067 if ($format eq 'tar') {
6068 return restore_tar_archive($archive, $vmid, $user, $opts);
6069 } else {
6070 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6071 }
6072 }
6073
6074 # hepler to remove disks that will not be used after restore
6075 my $restore_cleanup_oldconf = sub {
6076 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6077
6078 PVE::QemuConfig->foreach_volume($oldconf, sub {
6079 my ($ds, $drive) = @_;
6080
6081 return if drive_is_cdrom($drive, 1);
6082
6083 my $volid = $drive->{file};
6084 return if !$volid || $volid =~ m|^/|;
6085
6086 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6087 return if !$path || !$owner || ($owner != $vmid);
6088
6089 # Note: only delete disk we want to restore
6090 # other volumes will become unused
6091 if ($virtdev_hash->{$ds}) {
6092 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6093 if (my $err = $@) {
6094 warn $err;
6095 }
6096 }
6097 });
6098
6099 # delete vmstate files, after the restore we have no snapshots anymore
6100 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6101 my $snap = $oldconf->{snapshots}->{$snapname};
6102 if ($snap->{vmstate}) {
6103 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6104 if (my $err = $@) {
6105 warn $err;
6106 }
6107 }
6108 }
6109 };
6110
6111 # Helper to parse vzdump backup device hints
6112 #
6113 # $rpcenv: Environment, used to ckeck storage permissions
6114 # $user: User ID, to check storage permissions
6115 # $storecfg: Storage configuration
6116 # $fh: the file handle for reading the configuration
6117 # $devinfo: should contain device sizes for all backu-up'ed devices
6118 # $options: backup options (pool, default storage)
6119 #
6120 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6121 my $parse_backup_hints = sub {
6122 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6123
6124 my $virtdev_hash = {};
6125
6126 while (defined(my $line = <$fh>)) {
6127 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6128 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6129 die "archive does not contain data for drive '$virtdev'\n"
6130 if !$devinfo->{$devname};
6131
6132 if (defined($options->{storage})) {
6133 $storeid = $options->{storage} || 'local';
6134 } elsif (!$storeid) {
6135 $storeid = 'local';
6136 }
6137 $format = 'raw' if !$format;
6138 $devinfo->{$devname}->{devname} = $devname;
6139 $devinfo->{$devname}->{virtdev} = $virtdev;
6140 $devinfo->{$devname}->{format} = $format;
6141 $devinfo->{$devname}->{storeid} = $storeid;
6142
6143 # check permission on storage
6144 my $pool = $options->{pool}; # todo: do we need that?
6145 if ($user ne 'root@pam') {
6146 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6147 }
6148
6149 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6150 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6151 my $virtdev = $1;
6152 my $drive = parse_drive($virtdev, $2);
6153 if (drive_is_cloudinit($drive)) {
6154 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6155 $storeid = $options->{storage} if defined ($options->{storage});
6156 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6157 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6158
6159 $virtdev_hash->{$virtdev} = {
6160 format => $format,
6161 storeid => $storeid,
6162 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6163 is_cloudinit => 1,
6164 };
6165 }
6166 }
6167 }
6168
6169 return $virtdev_hash;
6170 };
6171
6172 # Helper to allocate and activate all volumes required for a restore
6173 #
6174 # $storecfg: Storage configuration
6175 # $virtdev_hash: as returned by parse_backup_hints()
6176 #
6177 # Returns: { $virtdev => $volid }
6178 my $restore_allocate_devices = sub {
6179 my ($storecfg, $virtdev_hash, $vmid) = @_;
6180
6181 my $map = {};
6182 foreach my $virtdev (sort keys %$virtdev_hash) {
6183 my $d = $virtdev_hash->{$virtdev};
6184 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6185 my $storeid = $d->{storeid};
6186 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6187
6188 # test if requested format is supported
6189 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6190 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6191 $d->{format} = $defFormat if !$supported;
6192
6193 my $name;
6194 if ($d->{is_cloudinit}) {
6195 $name = "vm-$vmid-cloudinit";
6196 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6197 if ($scfg->{path}) {
6198 $name .= ".$d->{format}";
6199 }
6200 }
6201
6202 my $volid = PVE::Storage::vdisk_alloc(
6203 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6204
6205 print STDERR "new volume ID is '$volid'\n";
6206 $d->{volid} = $volid;
6207
6208 PVE::Storage::activate_volumes($storecfg, [$volid]);
6209
6210 $map->{$virtdev} = $volid;
6211 }
6212
6213 return $map;
6214 };
6215
6216 sub restore_update_config_line {
6217 my ($cookie, $map, $line, $unique) = @_;
6218
6219 return '' if $line =~ m/^\#qmdump\#/;
6220 return '' if $line =~ m/^\#vzdump\#/;
6221 return '' if $line =~ m/^lock:/;
6222 return '' if $line =~ m/^unused\d+:/;
6223 return '' if $line =~ m/^parent:/;
6224
6225 my $res = '';
6226
6227 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6228 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6229 # try to convert old 1.X settings
6230 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6231 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6232 my ($model, $macaddr) = split(/\=/, $devconfig);
6233 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6234 my $net = {
6235 model => $model,
6236 bridge => "vmbr$ind",
6237 macaddr => $macaddr,
6238 };
6239 my $netstr = print_net($net);
6240
6241 $res .= "net$cookie->{netcount}: $netstr\n";
6242 $cookie->{netcount}++;
6243 }
6244 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6245 my ($id, $netstr) = ($1, $2);
6246 my $net = parse_net($netstr);
6247 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6248 $netstr = print_net($net);
6249 $res .= "$id: $netstr\n";
6250 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6251 my $virtdev = $1;
6252 my $value = $3;
6253 my $di = parse_drive($virtdev, $value);
6254 if (defined($di->{backup}) && !$di->{backup}) {
6255 $res .= "#$line";
6256 } elsif ($map->{$virtdev}) {
6257 delete $di->{format}; # format can change on restore
6258 $di->{file} = $map->{$virtdev};
6259 $value = print_drive($di);
6260 $res .= "$virtdev: $value\n";
6261 } else {
6262 $res .= $line;
6263 }
6264 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6265 my $vmgenid = $1;
6266 if ($vmgenid ne '0') {
6267 # always generate a new vmgenid if there was a valid one setup
6268 $vmgenid = generate_uuid();
6269 }
6270 $res .= "vmgenid: $vmgenid\n";
6271 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6272 my ($uuid, $uuid_str);
6273 UUID::generate($uuid);
6274 UUID::unparse($uuid, $uuid_str);
6275 my $smbios1 = parse_smbios1($2);
6276 $smbios1->{uuid} = $uuid_str;
6277 $res .= $1.print_smbios1($smbios1)."\n";
6278 } else {
6279 $res .= $line;
6280 }
6281
6282 return $res;
6283 }
6284
6285 my $restore_deactivate_volumes = sub {
6286 my ($storecfg, $devinfo) = @_;
6287
6288 my $vollist = [];
6289 foreach my $devname (keys %$devinfo) {
6290 my $volid = $devinfo->{$devname}->{volid};
6291 push @$vollist, $volid if $volid;
6292 }
6293
6294 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6295 };
6296
6297 my $restore_destroy_volumes = sub {
6298 my ($storecfg, $devinfo) = @_;
6299
6300 foreach my $devname (keys %$devinfo) {
6301 my $volid = $devinfo->{$devname}->{volid};
6302 next if !$volid;
6303 eval {
6304 if ($volid =~ m|^/|) {
6305 unlink $volid || die 'unlink failed\n';
6306 } else {
6307 PVE::Storage::vdisk_free($storecfg, $volid);
6308 }
6309 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6310 };
6311 print STDERR "unable to cleanup '$volid' - $@" if $@;
6312 }
6313 };
6314
6315 sub scan_volids {
6316 my ($cfg, $vmid) = @_;
6317
6318 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6319
6320 my $volid_hash = {};
6321 foreach my $storeid (keys %$info) {
6322 foreach my $item (@{$info->{$storeid}}) {
6323 next if !($item->{volid} && $item->{size});
6324 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6325 $volid_hash->{$item->{volid}} = $item;
6326 }
6327 }
6328
6329 return $volid_hash;
6330 }
6331
6332 sub update_disk_config {
6333 my ($vmid, $conf, $volid_hash) = @_;
6334
6335 my $changes;
6336 my $prefix = "VM $vmid";
6337
6338 # used and unused disks
6339 my $referenced = {};
6340
6341 # Note: it is allowed to define multiple storages with same path (alias), so
6342 # we need to check both 'volid' and real 'path' (two different volid can point
6343 # to the same path).
6344
6345 my $referencedpath = {};
6346
6347 # update size info
6348 PVE::QemuConfig->foreach_volume($conf, sub {
6349 my ($opt, $drive) = @_;
6350
6351 my $volid = $drive->{file};
6352 return if !$volid;
6353 my $volume = $volid_hash->{$volid};
6354
6355 # mark volid as "in-use" for next step
6356 $referenced->{$volid} = 1;
6357 if ($volume && (my $path = $volume->{path})) {
6358 $referencedpath->{$path} = 1;
6359 }
6360
6361 return if drive_is_cdrom($drive);
6362 return if !$volume;
6363
6364 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6365 if (defined($updated)) {
6366 $changes = 1;
6367 $conf->{$opt} = print_drive($updated);
6368 print "$prefix ($opt): $msg\n";
6369 }
6370 });
6371
6372 # remove 'unusedX' entry if volume is used
6373 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6374 my ($opt, $drive) = @_;
6375
6376 my $volid = $drive->{file};
6377 return if !$volid;
6378
6379 my $path;
6380 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6381 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6382 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6383 $changes = 1;
6384 delete $conf->{$opt};
6385 }
6386
6387 $referenced->{$volid} = 1;
6388 $referencedpath->{$path} = 1 if $path;
6389 });
6390
6391 foreach my $volid (sort keys %$volid_hash) {
6392 next if $volid =~ m/vm-$vmid-state-/;
6393 next if $referenced->{$volid};
6394 my $path = $volid_hash->{$volid}->{path};
6395 next if !$path; # just to be sure
6396 next if $referencedpath->{$path};
6397 $changes = 1;
6398 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6399 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6400 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6401 }
6402
6403 return $changes;
6404 }
6405
6406 sub rescan {
6407 my ($vmid, $nolock, $dryrun) = @_;
6408
6409 my $cfg = PVE::Storage::config();
6410
6411 print "rescan volumes...\n";
6412 my $volid_hash = scan_volids($cfg, $vmid);
6413
6414 my $updatefn = sub {
6415 my ($vmid) = @_;
6416
6417 my $conf = PVE::QemuConfig->load_config($vmid);
6418
6419 PVE::QemuConfig->check_lock($conf);
6420
6421 my $vm_volids = {};
6422 foreach my $volid (keys %$volid_hash) {
6423 my $info = $volid_hash->{$volid};
6424 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6425 }
6426
6427 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6428
6429 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6430 };
6431
6432 if (defined($vmid)) {
6433 if ($nolock) {
6434 &$updatefn($vmid);
6435 } else {
6436 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6437 }
6438 } else {
6439 my $vmlist = config_list();
6440 foreach my $vmid (keys %$vmlist) {
6441 if ($nolock) {
6442 &$updatefn($vmid);
6443 } else {
6444 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6445 }
6446 }
6447 }
6448 }
6449
6450 sub restore_proxmox_backup_archive {
6451 my ($archive, $vmid, $user, $options) = @_;
6452
6453 my $storecfg = PVE::Storage::config();
6454
6455 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6456 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6457
6458 my $fingerprint = $scfg->{fingerprint};
6459 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6460
6461 my $repo = PVE::PBSClient::get_repository($scfg);
6462
6463 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6464 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6465 local $ENV{PBS_PASSWORD} = $password;
6466 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6467
6468 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6469 PVE::Storage::parse_volname($storecfg, $archive);
6470
6471 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6472
6473 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6474
6475 my $tmpdir = "/var/tmp/vzdumptmp$$";
6476 rmtree $tmpdir;
6477 mkpath $tmpdir;
6478
6479 my $conffile = PVE::QemuConfig->config_file($vmid);
6480 # disable interrupts (always do cleanups)
6481 local $SIG{INT} =
6482 local $SIG{TERM} =
6483 local $SIG{QUIT} =
6484 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6485
6486 # Note: $oldconf is undef if VM does not exists
6487 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6488 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6489 my $new_conf_raw = '';
6490
6491 my $rpcenv = PVE::RPCEnvironment::get();
6492 my $devinfo = {};
6493
6494 eval {
6495 # enable interrupts
6496 local $SIG{INT} =
6497 local $SIG{TERM} =
6498 local $SIG{QUIT} =
6499 local $SIG{HUP} =
6500 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6501
6502 my $cfgfn = "$tmpdir/qemu-server.conf";
6503 my $firewall_config_fn = "$tmpdir/fw.conf";
6504 my $index_fn = "$tmpdir/index.json";
6505
6506 my $cmd = "restore";
6507
6508 my $param = [$pbs_backup_name, "index.json", $index_fn];
6509 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6510 my $index = PVE::Tools::file_get_contents($index_fn);
6511 $index = decode_json($index);
6512
6513 # print Dumper($index);
6514 foreach my $info (@{$index->{files}}) {
6515 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6516 my $devname = $1;
6517 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6518 $devinfo->{$devname}->{size} = $1;
6519 } else {
6520 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6521 }
6522 }
6523 }
6524
6525 my $is_qemu_server_backup = scalar(
6526 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6527 );
6528 if (!$is_qemu_server_backup) {
6529 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6530 }
6531 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6532
6533 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6534 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6535
6536 if ($has_firewall_config) {
6537 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6538 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6539
6540 my $pve_firewall_dir = '/etc/pve/firewall';
6541 mkdir $pve_firewall_dir; # make sure the dir exists
6542 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6543 }
6544
6545 my $fh = IO::File->new($cfgfn, "r") ||
6546 die "unable to read qemu-server.conf - $!\n";
6547
6548 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6549
6550 # fixme: rate limit?
6551
6552 # create empty/temp config
6553 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6554
6555 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6556
6557 # allocate volumes
6558 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6559
6560 foreach my $virtdev (sort keys %$virtdev_hash) {
6561 my $d = $virtdev_hash->{$virtdev};
6562 next if $d->{is_cloudinit}; # no need to restore cloudinit
6563
6564 # this fails if storage is unavailable
6565 my $volid = $d->{volid};
6566 my $path = PVE::Storage::path($storecfg, $volid);
6567
6568 # for live-restore we only want to preload the efidisk and TPM state
6569 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6570
6571 my $pbs_restore_cmd = [
6572 '/usr/bin/pbs-restore',
6573 '--repository', $repo,
6574 $pbs_backup_name,
6575 "$d->{devname}.img.fidx",
6576 $path,
6577 '--verbose',
6578 ];
6579
6580 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6581 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6582
6583 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6584 push @$pbs_restore_cmd, '--skip-zero';
6585 }
6586
6587 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6588 print "restore proxmox backup image: $dbg_cmdstring\n";
6589 run_command($pbs_restore_cmd);
6590 }
6591
6592 $fh->seek(0, 0) || die "seek failed - $!\n";
6593
6594 my $cookie = { netcount => 0 };
6595 while (defined(my $line = <$fh>)) {
6596 $new_conf_raw .= restore_update_config_line(
6597 $cookie,
6598 $map,
6599 $line,
6600 $options->{unique},
6601 );
6602 }
6603
6604 $fh->close();
6605 };
6606 my $err = $@;
6607
6608 if ($err || !$options->{live}) {
6609 $restore_deactivate_volumes->($storecfg, $devinfo);
6610 }
6611
6612 rmtree $tmpdir;
6613
6614 if ($err) {
6615 $restore_destroy_volumes->($storecfg, $devinfo);
6616 die $err;
6617 }
6618
6619 if ($options->{live}) {
6620 # keep lock during live-restore
6621 $new_conf_raw .= "\nlock: create";
6622 }
6623
6624 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6625
6626 PVE::Cluster::cfs_update(); # make sure we read new file
6627
6628 eval { rescan($vmid, 1); };
6629 warn $@ if $@;
6630
6631 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6632
6633 if ($options->{live}) {
6634 # enable interrupts
6635 local $SIG{INT} =
6636 local $SIG{TERM} =
6637 local $SIG{QUIT} =
6638 local $SIG{HUP} =
6639 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6640
6641 my $conf = PVE::QemuConfig->load_config($vmid);
6642 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6643
6644 # these special drives are already restored before start
6645 delete $devinfo->{'drive-efidisk0'};
6646 delete $devinfo->{'drive-tpmstate0-backup'};
6647 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6648
6649 PVE::QemuConfig->remove_lock($vmid, "create");
6650 }
6651 }
6652
6653 sub pbs_live_restore {
6654 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6655
6656 print "starting VM for live-restore\n";
6657 print "repository: '$repo', snapshot: '$snap'\n";
6658
6659 my $pbs_backing = {};
6660 for my $ds (keys %$restored_disks) {
6661 $ds =~ m/^drive-(.*)$/;
6662 my $confname = $1;
6663 $pbs_backing->{$confname} = {
6664 repository => $repo,
6665 snapshot => $snap,
6666 archive => "$ds.img.fidx",
6667 };
6668 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6669
6670 my $drive = parse_drive($confname, $conf->{$confname});
6671 print "restoring '$ds' to '$drive->{file}'\n";
6672 }
6673
6674 my $drives_streamed = 0;
6675 eval {
6676 # make sure HA doesn't interrupt our restore by stopping the VM
6677 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6678 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6679 }
6680
6681 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6682 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6683 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6684
6685 my $qmeventd_fd = register_qmeventd_handle($vmid);
6686
6687 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6688 # this will effectively collapse the backing image chain consisting of
6689 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6690 # removes itself once all backing images vanish with 'auto-remove=on')
6691 my $jobs = {};
6692 for my $ds (sort keys %$restored_disks) {
6693 my $job_id = "restore-$ds";
6694 mon_cmd($vmid, 'block-stream',
6695 'job-id' => $job_id,
6696 device => "$ds",
6697 );
6698 $jobs->{$job_id} = {};
6699 }
6700
6701 mon_cmd($vmid, 'cont');
6702 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6703
6704 print "restore-drive jobs finished successfully, removing all tracking block devices"
6705 ." to disconnect from Proxmox Backup Server\n";
6706
6707 for my $ds (sort keys %$restored_disks) {
6708 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6709 }
6710
6711 close($qmeventd_fd);
6712 };
6713
6714 my $err = $@;
6715
6716 if ($err) {
6717 warn "An error occured during live-restore: $err\n";
6718 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6719 die "live-restore failed\n";
6720 }
6721 }
6722
6723 sub restore_vma_archive {
6724 my ($archive, $vmid, $user, $opts, $comp) = @_;
6725
6726 my $readfrom = $archive;
6727
6728 my $cfg = PVE::Storage::config();
6729 my $commands = [];
6730 my $bwlimit = $opts->{bwlimit};
6731
6732 my $dbg_cmdstring = '';
6733 my $add_pipe = sub {
6734 my ($cmd) = @_;
6735 push @$commands, $cmd;
6736 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6737 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6738 $readfrom = '-';
6739 };
6740
6741 my $input = undef;
6742 if ($archive eq '-') {
6743 $input = '<&STDIN';
6744 } else {
6745 # If we use a backup from a PVE defined storage we also consider that
6746 # storage's rate limit:
6747 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6748 if (defined($volid)) {
6749 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6750 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6751 if ($readlimit) {
6752 print STDERR "applying read rate limit: $readlimit\n";
6753 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6754 $add_pipe->($cstream);
6755 }
6756 }
6757 }
6758
6759 if ($comp) {
6760 my $info = PVE::Storage::decompressor_info('vma', $comp);
6761 my $cmd = $info->{decompressor};
6762 push @$cmd, $readfrom;
6763 $add_pipe->($cmd);
6764 }
6765
6766 my $tmpdir = "/var/tmp/vzdumptmp$$";
6767 rmtree $tmpdir;
6768
6769 # disable interrupts (always do cleanups)
6770 local $SIG{INT} =
6771 local $SIG{TERM} =
6772 local $SIG{QUIT} =
6773 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6774
6775 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6776 POSIX::mkfifo($mapfifo, 0600);
6777 my $fifofh;
6778 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6779
6780 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6781
6782 my $oldtimeout;
6783 my $timeout = 5;
6784
6785 my $devinfo = {};
6786
6787 my $rpcenv = PVE::RPCEnvironment::get();
6788
6789 my $conffile = PVE::QemuConfig->config_file($vmid);
6790
6791 # Note: $oldconf is undef if VM does not exist
6792 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6793 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6794 my $new_conf_raw = '';
6795
6796 my %storage_limits;
6797
6798 my $print_devmap = sub {
6799 my $cfgfn = "$tmpdir/qemu-server.conf";
6800
6801 # we can read the config - that is already extracted
6802 my $fh = IO::File->new($cfgfn, "r") ||
6803 die "unable to read qemu-server.conf - $!\n";
6804
6805 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6806 if (-f $fwcfgfn) {
6807 my $pve_firewall_dir = '/etc/pve/firewall';
6808 mkdir $pve_firewall_dir; # make sure the dir exists
6809 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6810 }
6811
6812 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6813
6814 foreach my $info (values %{$virtdev_hash}) {
6815 my $storeid = $info->{storeid};
6816 next if defined($storage_limits{$storeid});
6817
6818 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6819 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6820 $storage_limits{$storeid} = $limit * 1024;
6821 }
6822
6823 foreach my $devname (keys %$devinfo) {
6824 die "found no device mapping information for device '$devname'\n"
6825 if !$devinfo->{$devname}->{virtdev};
6826 }
6827
6828 # create empty/temp config
6829 if ($oldconf) {
6830 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6831 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6832 }
6833
6834 # allocate volumes
6835 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6836
6837 # print restore information to $fifofh
6838 foreach my $virtdev (sort keys %$virtdev_hash) {
6839 my $d = $virtdev_hash->{$virtdev};
6840 next if $d->{is_cloudinit}; # no need to restore cloudinit
6841
6842 my $storeid = $d->{storeid};
6843 my $volid = $d->{volid};
6844
6845 my $map_opts = '';
6846 if (my $limit = $storage_limits{$storeid}) {
6847 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6848 }
6849
6850 my $write_zeros = 1;
6851 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6852 $write_zeros = 0;
6853 }
6854
6855 my $path = PVE::Storage::path($cfg, $volid);
6856
6857 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6858
6859 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6860 }
6861
6862 $fh->seek(0, 0) || die "seek failed - $!\n";
6863
6864 my $cookie = { netcount => 0 };
6865 while (defined(my $line = <$fh>)) {
6866 $new_conf_raw .= restore_update_config_line(
6867 $cookie,
6868 $map,
6869 $line,
6870 $opts->{unique},
6871 );
6872 }
6873
6874 $fh->close();
6875 };
6876
6877 eval {
6878 # enable interrupts
6879 local $SIG{INT} =
6880 local $SIG{TERM} =
6881 local $SIG{QUIT} =
6882 local $SIG{HUP} =
6883 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6884 local $SIG{ALRM} = sub { die "got timeout\n"; };
6885
6886 $oldtimeout = alarm($timeout);
6887
6888 my $parser = sub {
6889 my $line = shift;
6890
6891 print "$line\n";
6892
6893 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6894 my ($dev_id, $size, $devname) = ($1, $2, $3);
6895 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6896 } elsif ($line =~ m/^CTIME: /) {
6897 # we correctly received the vma config, so we can disable
6898 # the timeout now for disk allocation (set to 10 minutes, so
6899 # that we always timeout if something goes wrong)
6900 alarm(600);
6901 &$print_devmap();
6902 print $fifofh "done\n";
6903 my $tmp = $oldtimeout || 0;
6904 $oldtimeout = undef;
6905 alarm($tmp);
6906 close($fifofh);
6907 $fifofh = undef;
6908 }
6909 };
6910
6911 print "restore vma archive: $dbg_cmdstring\n";
6912 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6913 };
6914 my $err = $@;
6915
6916 alarm($oldtimeout) if $oldtimeout;
6917
6918 $restore_deactivate_volumes->($cfg, $devinfo);
6919
6920 close($fifofh) if $fifofh;
6921 unlink $mapfifo;
6922 rmtree $tmpdir;
6923
6924 if ($err) {
6925 $restore_destroy_volumes->($cfg, $devinfo);
6926 die $err;
6927 }
6928
6929 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6930
6931 PVE::Cluster::cfs_update(); # make sure we read new file
6932
6933 eval { rescan($vmid, 1); };
6934 warn $@ if $@;
6935
6936 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6937 }
6938
6939 sub restore_tar_archive {
6940 my ($archive, $vmid, $user, $opts) = @_;
6941
6942 if ($archive ne '-') {
6943 my $firstfile = tar_archive_read_firstfile($archive);
6944 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6945 if $firstfile ne 'qemu-server.conf';
6946 }
6947
6948 my $storecfg = PVE::Storage::config();
6949
6950 # avoid zombie disks when restoring over an existing VM -> cleanup first
6951 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6952 # skiplock=1 because qmrestore has set the 'create' lock itself already
6953 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6954 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6955
6956 my $tocmd = "/usr/lib/qemu-server/qmextract";
6957
6958 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
6959 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
6960 $tocmd .= ' --prealloc' if $opts->{prealloc};
6961 $tocmd .= ' --info' if $opts->{info};
6962
6963 # tar option "xf" does not autodetect compression when read from STDIN,
6964 # so we pipe to zcat
6965 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
6966 PVE::Tools::shellquote("--to-command=$tocmd");
6967
6968 my $tmpdir = "/var/tmp/vzdumptmp$$";
6969 mkpath $tmpdir;
6970
6971 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
6972 local $ENV{VZDUMP_VMID} = $vmid;
6973 local $ENV{VZDUMP_USER} = $user;
6974
6975 my $conffile = PVE::QemuConfig->config_file($vmid);
6976 my $new_conf_raw = '';
6977
6978 # disable interrupts (always do cleanups)
6979 local $SIG{INT} =
6980 local $SIG{TERM} =
6981 local $SIG{QUIT} =
6982 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6983
6984 eval {
6985 # enable interrupts
6986 local $SIG{INT} =
6987 local $SIG{TERM} =
6988 local $SIG{QUIT} =
6989 local $SIG{HUP} =
6990 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6991
6992 if ($archive eq '-') {
6993 print "extracting archive from STDIN\n";
6994 run_command($cmd, input => "<&STDIN");
6995 } else {
6996 print "extracting archive '$archive'\n";
6997 run_command($cmd);
6998 }
6999
7000 return if $opts->{info};
7001
7002 # read new mapping
7003 my $map = {};
7004 my $statfile = "$tmpdir/qmrestore.stat";
7005 if (my $fd = IO::File->new($statfile, "r")) {
7006 while (defined (my $line = <$fd>)) {
7007 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7008 $map->{$1} = $2 if $1;
7009 } else {
7010 print STDERR "unable to parse line in statfile - $line\n";
7011 }
7012 }
7013 $fd->close();
7014 }
7015
7016 my $confsrc = "$tmpdir/qemu-server.conf";
7017
7018 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7019
7020 my $cookie = { netcount => 0 };
7021 while (defined (my $line = <$srcfd>)) {
7022 $new_conf_raw .= restore_update_config_line(
7023 $cookie,
7024 $map,
7025 $line,
7026 $opts->{unique},
7027 );
7028 }
7029
7030 $srcfd->close();
7031 };
7032 if (my $err = $@) {
7033 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7034 die $err;
7035 }
7036
7037 rmtree $tmpdir;
7038
7039 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7040
7041 PVE::Cluster::cfs_update(); # make sure we read new file
7042
7043 eval { rescan($vmid, 1); };
7044 warn $@ if $@;
7045 };
7046
7047 sub foreach_storage_used_by_vm {
7048 my ($conf, $func) = @_;
7049
7050 my $sidhash = {};
7051
7052 PVE::QemuConfig->foreach_volume($conf, sub {
7053 my ($ds, $drive) = @_;
7054 return if drive_is_cdrom($drive);
7055
7056 my $volid = $drive->{file};
7057
7058 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7059 $sidhash->{$sid} = $sid if $sid;
7060 });
7061
7062 foreach my $sid (sort keys %$sidhash) {
7063 &$func($sid);
7064 }
7065 }
7066
7067 my $qemu_snap_storage = {
7068 rbd => 1,
7069 };
7070 sub do_snapshots_with_qemu {
7071 my ($storecfg, $volid, $deviceid) = @_;
7072
7073 return if $deviceid =~ m/tpmstate0/;
7074
7075 my $storage_name = PVE::Storage::parse_volume_id($volid);
7076 my $scfg = $storecfg->{ids}->{$storage_name};
7077 die "could not find storage '$storage_name'\n" if !defined($scfg);
7078
7079 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7080 return 1;
7081 }
7082
7083 if ($volid =~ m/\.(qcow2|qed)$/){
7084 return 1;
7085 }
7086
7087 return;
7088 }
7089
7090 sub qga_check_running {
7091 my ($vmid, $nowarn) = @_;
7092
7093 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7094 if ($@) {
7095 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7096 return 0;
7097 }
7098 return 1;
7099 }
7100
7101 sub template_create {
7102 my ($vmid, $conf, $disk) = @_;
7103
7104 my $storecfg = PVE::Storage::config();
7105
7106 PVE::QemuConfig->foreach_volume($conf, sub {
7107 my ($ds, $drive) = @_;
7108
7109 return if drive_is_cdrom($drive);
7110 return if $disk && $ds ne $disk;
7111
7112 my $volid = $drive->{file};
7113 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7114
7115 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7116 $drive->{file} = $voliddst;
7117 $conf->{$ds} = print_drive($drive);
7118 PVE::QemuConfig->write_config($vmid, $conf);
7119 });
7120 }
7121
7122 sub convert_iscsi_path {
7123 my ($path) = @_;
7124
7125 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7126 my $portal = $1;
7127 my $target = $2;
7128 my $lun = $3;
7129
7130 my $initiator_name = get_initiator_name();
7131
7132 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7133 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7134 }
7135
7136 die "cannot convert iscsi path '$path', unkown format\n";
7137 }
7138
7139 sub qemu_img_convert {
7140 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7141
7142 my $storecfg = PVE::Storage::config();
7143 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7144 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7145
7146 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7147
7148 my $cachemode;
7149 my $src_path;
7150 my $src_is_iscsi = 0;
7151 my $src_format;
7152
7153 if ($src_storeid) {
7154 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7155 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7156 $src_format = qemu_img_format($src_scfg, $src_volname);
7157 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7158 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7159 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7160 } elsif (-f $src_volid) {
7161 $src_path = $src_volid;
7162 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7163 $src_format = $1;
7164 }
7165 }
7166
7167 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7168
7169 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7170 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7171 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7172 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7173
7174 my $cmd = [];
7175 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7176 push @$cmd, '-l', "snapshot.name=$snapname"
7177 if $snapname && $src_format && $src_format eq "qcow2";
7178 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7179 push @$cmd, '-T', $cachemode if defined($cachemode);
7180
7181 if ($src_is_iscsi) {
7182 push @$cmd, '--image-opts';
7183 $src_path = convert_iscsi_path($src_path);
7184 } elsif ($src_format) {
7185 push @$cmd, '-f', $src_format;
7186 }
7187
7188 if ($dst_is_iscsi) {
7189 push @$cmd, '--target-image-opts';
7190 $dst_path = convert_iscsi_path($dst_path);
7191 } else {
7192 push @$cmd, '-O', $dst_format;
7193 }
7194
7195 push @$cmd, $src_path;
7196
7197 if (!$dst_is_iscsi && $is_zero_initialized) {
7198 push @$cmd, "zeroinit:$dst_path";
7199 } else {
7200 push @$cmd, $dst_path;
7201 }
7202
7203 my $parser = sub {
7204 my $line = shift;
7205 if($line =~ m/\((\S+)\/100\%\)/){
7206 my $percent = $1;
7207 my $transferred = int($size * $percent / 100);
7208 my $total_h = render_bytes($size, 1);
7209 my $transferred_h = render_bytes($transferred, 1);
7210
7211 print "transferred $transferred_h of $total_h ($percent%)\n";
7212 }
7213
7214 };
7215
7216 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7217 my $err = $@;
7218 die "copy failed: $err" if $err;
7219 }
7220
7221 sub qemu_img_format {
7222 my ($scfg, $volname) = @_;
7223
7224 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7225 return $1;
7226 } else {
7227 return "raw";
7228 }
7229 }
7230
7231 sub qemu_drive_mirror {
7232 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7233
7234 $jobs = {} if !$jobs;
7235
7236 my $qemu_target;
7237 my $format;
7238 $jobs->{"drive-$drive"} = {};
7239
7240 if ($dst_volid =~ /^nbd:/) {
7241 $qemu_target = $dst_volid;
7242 $format = "nbd";
7243 } else {
7244 my $storecfg = PVE::Storage::config();
7245 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7246
7247 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7248
7249 $format = qemu_img_format($dst_scfg, $dst_volname);
7250
7251 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7252
7253 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7254 }
7255
7256 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7257 $opts->{format} = $format if $format;
7258
7259 if (defined($src_bitmap)) {
7260 $opts->{sync} = 'incremental';
7261 $opts->{bitmap} = $src_bitmap;
7262 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7263 }
7264
7265 if (defined($bwlimit)) {
7266 $opts->{speed} = $bwlimit * 1024;
7267 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7268 } else {
7269 print "drive mirror is starting for drive-$drive\n";
7270 }
7271
7272 # if a job already runs for this device we get an error, catch it for cleanup
7273 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7274 if (my $err = $@) {
7275 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7276 warn "$@\n" if $@;
7277 die "mirroring error: $err\n";
7278 }
7279
7280 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7281 }
7282
7283 # $completion can be either
7284 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7285 # 'cancel': wait until all jobs are ready, block-job-cancel them
7286 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7287 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7288 sub qemu_drive_mirror_monitor {
7289 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7290
7291 $completion //= 'complete';
7292 $op //= "mirror";
7293
7294 eval {
7295 my $err_complete = 0;
7296
7297 my $starttime = time ();
7298 while (1) {
7299 die "block job ('$op') timed out\n" if $err_complete > 300;
7300
7301 my $stats = mon_cmd($vmid, "query-block-jobs");
7302 my $ctime = time();
7303
7304 my $running_jobs = {};
7305 for my $stat (@$stats) {
7306 next if $stat->{type} ne $op;
7307 $running_jobs->{$stat->{device}} = $stat;
7308 }
7309
7310 my $readycounter = 0;
7311
7312 for my $job_id (sort keys %$jobs) {
7313 my $job = $running_jobs->{$job_id};
7314
7315 my $vanished = !defined($job);
7316 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7317 if($complete || ($vanished && $completion eq 'auto')) {
7318 print "$job_id: $op-job finished\n";
7319 delete $jobs->{$job_id};
7320 next;
7321 }
7322
7323 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7324
7325 my $busy = $job->{busy};
7326 my $ready = $job->{ready};
7327 if (my $total = $job->{len}) {
7328 my $transferred = $job->{offset} || 0;
7329 my $remaining = $total - $transferred;
7330 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7331
7332 my $duration = $ctime - $starttime;
7333 my $total_h = render_bytes($total, 1);
7334 my $transferred_h = render_bytes($transferred, 1);
7335
7336 my $status = sprintf(
7337 "transferred $transferred_h of $total_h ($percent%%) in %s",
7338 render_duration($duration),
7339 );
7340
7341 if ($ready) {
7342 if ($busy) {
7343 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7344 } else {
7345 $status .= ", ready";
7346 }
7347 }
7348 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7349 $jobs->{$job_id}->{ready} = $ready;
7350 }
7351
7352 $readycounter++ if $job->{ready};
7353 }
7354
7355 last if scalar(keys %$jobs) == 0;
7356
7357 if ($readycounter == scalar(keys %$jobs)) {
7358 print "all '$op' jobs are ready\n";
7359
7360 # do the complete later (or has already been done)
7361 last if $completion eq 'skip' || $completion eq 'auto';
7362
7363 if ($vmiddst && $vmiddst != $vmid) {
7364 my $agent_running = $qga && qga_check_running($vmid);
7365 if ($agent_running) {
7366 print "freeze filesystem\n";
7367 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7368 } else {
7369 print "suspend vm\n";
7370 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7371 }
7372
7373 # if we clone a disk for a new target vm, we don't switch the disk
7374 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7375
7376 if ($agent_running) {
7377 print "unfreeze filesystem\n";
7378 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7379 } else {
7380 print "resume vm\n";
7381 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7382 }
7383
7384 last;
7385 } else {
7386
7387 for my $job_id (sort keys %$jobs) {
7388 # try to switch the disk if source and destination are on the same guest
7389 print "$job_id: Completing block job_id...\n";
7390
7391 my $op;
7392 if ($completion eq 'complete') {
7393 $op = 'block-job-complete';
7394 } elsif ($completion eq 'cancel') {
7395 $op = 'block-job-cancel';
7396 } else {
7397 die "invalid completion value: $completion\n";
7398 }
7399 eval { mon_cmd($vmid, $op, device => $job_id) };
7400 if ($@ =~ m/cannot be completed/) {
7401 print "$job_id: block job cannot be completed, trying again.\n";
7402 $err_complete++;
7403 }else {
7404 print "$job_id: Completed successfully.\n";
7405 $jobs->{$job_id}->{complete} = 1;
7406 }
7407 }
7408 }
7409 }
7410 sleep 1;
7411 }
7412 };
7413 my $err = $@;
7414
7415 if ($err) {
7416 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7417 die "block job ($op) error: $err";
7418 }
7419 }
7420
7421 sub qemu_blockjobs_cancel {
7422 my ($vmid, $jobs) = @_;
7423
7424 foreach my $job (keys %$jobs) {
7425 print "$job: Cancelling block job\n";
7426 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7427 $jobs->{$job}->{cancel} = 1;
7428 }
7429
7430 while (1) {
7431 my $stats = mon_cmd($vmid, "query-block-jobs");
7432
7433 my $running_jobs = {};
7434 foreach my $stat (@$stats) {
7435 $running_jobs->{$stat->{device}} = $stat;
7436 }
7437
7438 foreach my $job (keys %$jobs) {
7439
7440 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7441 print "$job: Done.\n";
7442 delete $jobs->{$job};
7443 }
7444 }
7445
7446 last if scalar(keys %$jobs) == 0;
7447
7448 sleep 1;
7449 }
7450 }
7451
7452 sub clone_disk {
7453 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7454 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7455
7456 my $newvolid;
7457
7458 if (!$full) {
7459 print "create linked clone of drive $drivename ($drive->{file})\n";
7460 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7461 push @$newvollist, $newvolid;
7462 } else {
7463
7464 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7465 $storeid = $storage if $storage;
7466
7467 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7468
7469 print "create full clone of drive $drivename ($drive->{file})\n";
7470 my $name = undef;
7471 my $size = undef;
7472 if (drive_is_cloudinit($drive)) {
7473 $name = "vm-$newvmid-cloudinit";
7474 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7475 if ($scfg->{path}) {
7476 $name .= ".$dst_format";
7477 }
7478 $snapname = undef;
7479 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7480 } elsif ($drivename eq 'efidisk0') {
7481 $size = get_efivars_size($conf);
7482 } elsif ($drivename eq 'tpmstate0') {
7483 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7484 } else {
7485 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7486 }
7487 $newvolid = PVE::Storage::vdisk_alloc(
7488 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7489 );
7490 push @$newvollist, $newvolid;
7491
7492 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7493
7494 if (drive_is_cloudinit($drive)) {
7495 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7496 # if this is the case, we have to complete any block-jobs still there from
7497 # previous drive-mirrors
7498 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7499 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7500 }
7501 goto no_data_clone;
7502 }
7503
7504 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7505 if (!$running || $snapname) {
7506 # TODO: handle bwlimits
7507 if ($drivename eq 'efidisk0') {
7508 # the relevant data on the efidisk may be smaller than the source
7509 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7510 # that is given by the OVMF_VARS.fd
7511 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7512 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7513
7514 # better for Ceph if block size is not too small, see bug #3324
7515 my $bs = 1024*1024;
7516
7517 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7518 "if=$src_path", "of=$dst_path"]);
7519 } else {
7520 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7521 }
7522 } else {
7523
7524 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7525
7526 my $kvmver = get_running_qemu_version ($vmid);
7527 if (!min_version($kvmver, 2, 7)) {
7528 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7529 if $drive->{iothread};
7530 }
7531
7532 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7533 $completion, $qga, $bwlimit);
7534 }
7535 }
7536
7537 no_data_clone:
7538 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7539
7540 my $disk = $drive;
7541 $disk->{format} = undef;
7542 $disk->{file} = $newvolid;
7543 $disk->{size} = $size if defined($size);
7544
7545 return $disk;
7546 }
7547
7548 sub get_running_qemu_version {
7549 my ($vmid) = @_;
7550 my $res = mon_cmd($vmid, "query-version");
7551 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7552 }
7553
7554 sub qemu_use_old_bios_files {
7555 my ($machine_type) = @_;
7556
7557 return if !$machine_type;
7558
7559 my $use_old_bios_files = undef;
7560
7561 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7562 $machine_type = $1;
7563 $use_old_bios_files = 1;
7564 } else {
7565 my $version = extract_version($machine_type, kvm_user_version());
7566 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7567 # load new efi bios files on migration. So this hack is required to allow
7568 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7569 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7570 $use_old_bios_files = !min_version($version, 2, 4);
7571 }
7572
7573 return ($use_old_bios_files, $machine_type);
7574 }
7575
7576 sub get_efivars_size {
7577 my ($conf) = @_;
7578 my $arch = get_vm_arch($conf);
7579 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7580 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7581 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7582 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7583 return -s $ovmf_vars;
7584 }
7585
7586 sub update_efidisk_size {
7587 my ($conf) = @_;
7588
7589 return if !defined($conf->{efidisk0});
7590
7591 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7592 $disk->{size} = get_efivars_size($conf);
7593 $conf->{efidisk0} = print_drive($disk);
7594
7595 return;
7596 }
7597
7598 sub update_tpmstate_size {
7599 my ($conf) = @_;
7600
7601 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7602 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7603 $conf->{tpmstate0} = print_drive($disk);
7604 }
7605
7606 sub create_efidisk($$$$$$$) {
7607 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7608
7609 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7610 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7611
7612 my $vars_size_b = -s $ovmf_vars;
7613 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7614 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7615 PVE::Storage::activate_volumes($storecfg, [$volid]);
7616
7617 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7618 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7619
7620 return ($volid, $size/1024);
7621 }
7622
7623 sub vm_iothreads_list {
7624 my ($vmid) = @_;
7625
7626 my $res = mon_cmd($vmid, 'query-iothreads');
7627
7628 my $iothreads = {};
7629 foreach my $iothread (@$res) {
7630 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7631 }
7632
7633 return $iothreads;
7634 }
7635
7636 sub scsihw_infos {
7637 my ($conf, $drive) = @_;
7638
7639 my $maxdev = 0;
7640
7641 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7642 $maxdev = 7;
7643 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7644 $maxdev = 1;
7645 } else {
7646 $maxdev = 256;
7647 }
7648
7649 my $controller = int($drive->{index} / $maxdev);
7650 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7651 ? "virtioscsi"
7652 : "scsihw";
7653
7654 return ($maxdev, $controller, $controller_prefix);
7655 }
7656
7657 sub windows_version {
7658 my ($ostype) = @_;
7659
7660 return 0 if !$ostype;
7661
7662 my $winversion = 0;
7663
7664 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7665 $winversion = 5;
7666 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7667 $winversion = 6;
7668 } elsif ($ostype =~ m/^win(\d+)$/) {
7669 $winversion = $1;
7670 }
7671
7672 return $winversion;
7673 }
7674
7675 sub resolve_dst_disk_format {
7676 my ($storecfg, $storeid, $src_volname, $format) = @_;
7677 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7678
7679 if (!$format) {
7680 # if no target format is specified, use the source disk format as hint
7681 if ($src_volname) {
7682 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7683 $format = qemu_img_format($scfg, $src_volname);
7684 } else {
7685 return $defFormat;
7686 }
7687 }
7688
7689 # test if requested format is supported - else use default
7690 my $supported = grep { $_ eq $format } @$validFormats;
7691 $format = $defFormat if !$supported;
7692 return $format;
7693 }
7694
7695 # NOTE: if this logic changes, please update docs & possibly gui logic
7696 sub find_vmstate_storage {
7697 my ($conf, $storecfg) = @_;
7698
7699 # first, return storage from conf if set
7700 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7701
7702 my ($target, $shared, $local);
7703
7704 foreach_storage_used_by_vm($conf, sub {
7705 my ($sid) = @_;
7706 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7707 my $dst = $scfg->{shared} ? \$shared : \$local;
7708 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7709 });
7710
7711 # second, use shared storage where VM has at least one disk
7712 # third, use local storage where VM has at least one disk
7713 # fall back to local storage
7714 $target = $shared // $local // 'local';
7715
7716 return $target;
7717 }
7718
7719 sub generate_uuid {
7720 my ($uuid, $uuid_str);
7721 UUID::generate($uuid);
7722 UUID::unparse($uuid, $uuid_str);
7723 return $uuid_str;
7724 }
7725
7726 sub generate_smbios1_uuid {
7727 return "uuid=".generate_uuid();
7728 }
7729
7730 sub nbd_stop {
7731 my ($vmid) = @_;
7732
7733 mon_cmd($vmid, 'nbd-server-stop');
7734 }
7735
7736 sub create_reboot_request {
7737 my ($vmid) = @_;
7738 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7739 or die "failed to create reboot trigger file: $!\n";
7740 close($fh);
7741 }
7742
7743 sub clear_reboot_request {
7744 my ($vmid) = @_;
7745 my $path = "/run/qemu-server/$vmid.reboot";
7746 my $res = 0;
7747
7748 $res = unlink($path);
7749 die "could not remove reboot request for $vmid: $!"
7750 if !$res && $! != POSIX::ENOENT;
7751
7752 return $res;
7753 }
7754
7755 sub bootorder_from_legacy {
7756 my ($conf, $bootcfg) = @_;
7757
7758 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7759 my $bootindex_hash = {};
7760 my $i = 1;
7761 foreach my $o (split(//, $boot)) {
7762 $bootindex_hash->{$o} = $i*100;
7763 $i++;
7764 }
7765
7766 my $bootorder = {};
7767
7768 PVE::QemuConfig->foreach_volume($conf, sub {
7769 my ($ds, $drive) = @_;
7770
7771 if (drive_is_cdrom ($drive, 1)) {
7772 if ($bootindex_hash->{d}) {
7773 $bootorder->{$ds} = $bootindex_hash->{d};
7774 $bootindex_hash->{d} += 1;
7775 }
7776 } elsif ($bootindex_hash->{c}) {
7777 $bootorder->{$ds} = $bootindex_hash->{c}
7778 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7779 $bootindex_hash->{c} += 1;
7780 }
7781 });
7782
7783 if ($bootindex_hash->{n}) {
7784 for (my $i = 0; $i < $MAX_NETS; $i++) {
7785 my $netname = "net$i";
7786 next if !$conf->{$netname};
7787 $bootorder->{$netname} = $bootindex_hash->{n};
7788 $bootindex_hash->{n} += 1;
7789 }
7790 }
7791
7792 return $bootorder;
7793 }
7794
7795 # Generate default device list for 'boot: order=' property. Matches legacy
7796 # default boot order, but with explicit device names. This is important, since
7797 # the fallback for when neither 'order' nor the old format is specified relies
7798 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7799 sub get_default_bootdevices {
7800 my ($conf) = @_;
7801
7802 my @ret = ();
7803
7804 # harddisk
7805 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7806 push @ret, $first if $first;
7807
7808 # cdrom
7809 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7810 push @ret, $first if $first;
7811
7812 # network
7813 for (my $i = 0; $i < $MAX_NETS; $i++) {
7814 my $netname = "net$i";
7815 next if !$conf->{$netname};
7816 push @ret, $netname;
7817 last;
7818 }
7819
7820 return \@ret;
7821 }
7822
7823 sub device_bootorder {
7824 my ($conf) = @_;
7825
7826 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7827
7828 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7829
7830 my $bootorder = {};
7831 if (!defined($boot) || $boot->{legacy}) {
7832 $bootorder = bootorder_from_legacy($conf, $boot);
7833 } elsif ($boot->{order}) {
7834 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7835 for my $dev (PVE::Tools::split_list($boot->{order})) {
7836 $bootorder->{$dev} = $i++;
7837 }
7838 }
7839
7840 return $bootorder;
7841 }
7842
7843 sub register_qmeventd_handle {
7844 my ($vmid) = @_;
7845
7846 my $fh;
7847 my $peer = "/var/run/qmeventd.sock";
7848 my $count = 0;
7849
7850 for (;;) {
7851 $count++;
7852 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7853 last if $fh;
7854 if ($! != EINTR && $! != EAGAIN) {
7855 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7856 }
7857 if ($count > 4) {
7858 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7859 . "after $count retries\n";
7860 }
7861 usleep(25000);
7862 }
7863
7864 # send handshake to mark VM as backing up
7865 print $fh to_json({vzdump => {vmid => "$vmid"}});
7866
7867 # return handle to be closed later when inhibit is no longer required
7868 return $fh;
7869 }
7870
7871 # bash completion helper
7872
7873 sub complete_backup_archives {
7874 my ($cmdname, $pname, $cvalue) = @_;
7875
7876 my $cfg = PVE::Storage::config();
7877
7878 my $storeid;
7879
7880 if ($cvalue =~ m/^([^:]+):/) {
7881 $storeid = $1;
7882 }
7883
7884 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7885
7886 my $res = [];
7887 foreach my $id (keys %$data) {
7888 foreach my $item (@{$data->{$id}}) {
7889 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7890 push @$res, $item->{volid} if defined($item->{volid});
7891 }
7892 }
7893
7894 return $res;
7895 }
7896
7897 my $complete_vmid_full = sub {
7898 my ($running) = @_;
7899
7900 my $idlist = vmstatus();
7901
7902 my $res = [];
7903
7904 foreach my $id (keys %$idlist) {
7905 my $d = $idlist->{$id};
7906 if (defined($running)) {
7907 next if $d->{template};
7908 next if $running && $d->{status} ne 'running';
7909 next if !$running && $d->{status} eq 'running';
7910 }
7911 push @$res, $id;
7912
7913 }
7914 return $res;
7915 };
7916
7917 sub complete_vmid {
7918 return &$complete_vmid_full();
7919 }
7920
7921 sub complete_vmid_stopped {
7922 return &$complete_vmid_full(0);
7923 }
7924
7925 sub complete_vmid_running {
7926 return &$complete_vmid_full(1);
7927 }
7928
7929 sub complete_storage {
7930
7931 my $cfg = PVE::Storage::config();
7932 my $ids = $cfg->{ids};
7933
7934 my $res = [];
7935 foreach my $sid (keys %$ids) {
7936 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7937 next if !$ids->{$sid}->{content}->{images};
7938 push @$res, $sid;
7939 }
7940
7941 return $res;
7942 }
7943
7944 sub complete_migration_storage {
7945 my ($cmd, $param, $current_value, $all_args) = @_;
7946
7947 my $targetnode = @$all_args[1];
7948
7949 my $cfg = PVE::Storage::config();
7950 my $ids = $cfg->{ids};
7951
7952 my $res = [];
7953 foreach my $sid (keys %$ids) {
7954 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7955 next if !$ids->{$sid}->{content}->{images};
7956 push @$res, $sid;
7957 }
7958
7959 return $res;
7960 }
7961
7962 sub vm_is_paused {
7963 my ($vmid) = @_;
7964 my $qmpstatus = eval {
7965 PVE::QemuConfig::assert_config_exists_on_node($vmid);
7966 mon_cmd($vmid, "query-status");
7967 };
7968 warn "$@\n" if $@;
7969 return $qmpstatus && $qmpstatus->{status} eq "paused";
7970 }
7971
7972 sub check_volume_storage_type {
7973 my ($storecfg, $vol) = @_;
7974
7975 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
7976 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7977 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
7978
7979 die "storage '$storeid' does not support content-type '$vtype'\n"
7980 if !$scfg->{content}->{$vtype};
7981
7982 return 1;
7983 }
7984
7985 1;