]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
drives: expose 'readonly' flag of qemu for scsi/virtio
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use MIME::Base64;
23 use POSIX;
24 use Storable qw(dclone);
25 use Time::HiRes qw(gettimeofday usleep);
26 use URI::Escape;
27 use UUID;
28
29 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
30 use PVE::CGroup;
31 use PVE::DataCenterConfig;
32 use PVE::Exception qw(raise raise_param_exc);
33 use PVE::Format qw(render_duration render_bytes);
34 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
35 use PVE::INotify;
36 use PVE::JSONSchema qw(get_standard_option parse_property_string);
37 use PVE::ProcFSTools;
38 use PVE::PBSClient;
39 use PVE::RPCEnvironment;
40 use PVE::Storage;
41 use PVE::SysFSTools;
42 use PVE::Systemd;
43 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
44
45 use PVE::QMPClient;
46 use PVE::QemuConfig;
47 use PVE::QemuServer::Helpers qw(min_version config_aware_timeout);
48 use PVE::QemuServer::Cloudinit;
49 use PVE::QemuServer::CGroup;
50 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
51 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
52 use PVE::QemuServer::Machine;
53 use PVE::QemuServer::Memory;
54 use PVE::QemuServer::Monitor qw(mon_cmd);
55 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
56 use PVE::QemuServer::USB qw(parse_usb_device);
57
58 my $have_sdn;
59 eval {
60 require PVE::Network::SDN::Zones;
61 $have_sdn = 1;
62 };
63
64 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
65 my $OVMF = {
66 x86_64 => {
67 '4m-no-smm' => [
68 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
69 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
70 ],
71 '4m-no-smm-ms' => [
72 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
73 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
74 ],
75 '4m' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 default => [
84 "$EDK2_FW_BASE/OVMF_CODE.fd",
85 "$EDK2_FW_BASE/OVMF_VARS.fd",
86 ],
87 },
88 aarch64 => {
89 default => [
90 "$EDK2_FW_BASE/AAVMF_CODE.fd",
91 "$EDK2_FW_BASE/AAVMF_VARS.fd",
92 ],
93 },
94 };
95
96 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
97
98 # Note about locking: we use flock on the config file protect against concurent actions.
99 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
100 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
101 # But you can ignore this kind of lock with the --skiplock flag.
102
103 cfs_register_file('/qemu-server/',
104 \&parse_vm_config,
105 \&write_vm_config);
106
107 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
108 description => "Some command save/restore state from this location.",
109 type => 'string',
110 maxLength => 128,
111 optional => 1,
112 });
113
114 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
115 description => "Specifies the Qemu machine type.",
116 type => 'string',
117 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
118 maxLength => 40,
119 optional => 1,
120 });
121
122
123 sub map_storage {
124 my ($map, $source) = @_;
125
126 return $source if !defined($map);
127
128 return $map->{entries}->{$source}
129 if $map->{entries} && defined($map->{entries}->{$source});
130
131 return $map->{default} if $map->{default};
132
133 # identity (fallback)
134 return $source;
135 }
136
137 PVE::JSONSchema::register_standard_option('pve-targetstorage', {
138 description => "Mapping from source to target storages. Providing only a single storage ID maps all source storages to that storage. Providing the special value '1' will map each source storage to itself.",
139 type => 'string',
140 format => 'storagepair-list',
141 optional => 1,
142 });
143
144 #no warnings 'redefine';
145
146 my $nodename_cache;
147 sub nodename {
148 $nodename_cache //= PVE::INotify::nodename();
149 return $nodename_cache;
150 }
151
152 my $watchdog_fmt = {
153 model => {
154 default_key => 1,
155 type => 'string',
156 enum => [qw(i6300esb ib700)],
157 description => "Watchdog type to emulate.",
158 default => 'i6300esb',
159 optional => 1,
160 },
161 action => {
162 type => 'string',
163 enum => [qw(reset shutdown poweroff pause debug none)],
164 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
165 optional => 1,
166 },
167 };
168 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
169
170 my $agent_fmt = {
171 enabled => {
172 description => "Enable/disable communication with a Qemu Guest Agent (QGA) running in the VM.",
173 type => 'boolean',
174 default => 0,
175 default_key => 1,
176 },
177 fstrim_cloned_disks => {
178 description => "Run fstrim after moving a disk or migrating the VM.",
179 type => 'boolean',
180 optional => 1,
181 default => 0
182 },
183 type => {
184 description => "Select the agent type",
185 type => 'string',
186 default => 'virtio',
187 optional => 1,
188 enum => [qw(virtio isa)],
189 },
190 };
191
192 my $vga_fmt = {
193 type => {
194 description => "Select the VGA type.",
195 type => 'string',
196 default => 'std',
197 optional => 1,
198 default_key => 1,
199 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio vmware)],
200 },
201 memory => {
202 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
203 type => 'integer',
204 optional => 1,
205 minimum => 4,
206 maximum => 512,
207 },
208 };
209
210 my $ivshmem_fmt = {
211 size => {
212 type => 'integer',
213 minimum => 1,
214 description => "The size of the file in MB.",
215 },
216 name => {
217 type => 'string',
218 pattern => '[a-zA-Z0-9\-]+',
219 optional => 1,
220 format_description => 'string',
221 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
222 },
223 };
224
225 my $audio_fmt = {
226 device => {
227 type => 'string',
228 enum => [qw(ich9-intel-hda intel-hda AC97)],
229 description => "Configure an audio device."
230 },
231 driver => {
232 type => 'string',
233 enum => ['spice', 'none'],
234 default => 'spice',
235 optional => 1,
236 description => "Driver backend for the audio device."
237 },
238 };
239
240 my $spice_enhancements_fmt = {
241 foldersharing => {
242 type => 'boolean',
243 optional => 1,
244 default => '0',
245 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
246 },
247 videostreaming => {
248 type => 'string',
249 enum => ['off', 'all', 'filter'],
250 default => 'off',
251 optional => 1,
252 description => "Enable video streaming. Uses compression for detected video streams."
253 },
254 };
255
256 my $rng_fmt = {
257 source => {
258 type => 'string',
259 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
260 default_key => 1,
261 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
262 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
263 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
264 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
265 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
266 ." a hardware RNG from the host.",
267 },
268 max_bytes => {
269 type => 'integer',
270 description => "Maximum bytes of entropy allowed to get injected into the guest every"
271 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
272 ." `0` to disable limiting (potentially dangerous!).",
273 optional => 1,
274
275 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
276 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
277 # reading from /dev/urandom
278 default => 1024,
279 },
280 period => {
281 type => 'integer',
282 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
283 ." the guest to retrieve another 'max_bytes' of entropy.",
284 optional => 1,
285 default => 1000,
286 },
287 };
288
289 my $confdesc = {
290 onboot => {
291 optional => 1,
292 type => 'boolean',
293 description => "Specifies whether a VM will be started during system bootup.",
294 default => 0,
295 },
296 autostart => {
297 optional => 1,
298 type => 'boolean',
299 description => "Automatic restart after crash (currently ignored).",
300 default => 0,
301 },
302 hotplug => {
303 optional => 1,
304 type => 'string', format => 'pve-hotplug-features',
305 description => "Selectively enable hotplug features. This is a comma separated list of"
306 ." hotplug features: 'network', 'disk', 'cpu', 'memory' and 'usb'. Use '0' to disable"
307 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`.",
308 default => 'network,disk,usb',
309 },
310 reboot => {
311 optional => 1,
312 type => 'boolean',
313 description => "Allow reboot. If set to '0' the VM exit on reboot.",
314 default => 1,
315 },
316 lock => {
317 optional => 1,
318 type => 'string',
319 description => "Lock/unlock the VM.",
320 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
321 },
322 cpulimit => {
323 optional => 1,
324 type => 'number',
325 description => "Limit of CPU usage.",
326 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
327 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
328 minimum => 0,
329 maximum => 128,
330 default => 0,
331 },
332 cpuunits => {
333 optional => 1,
334 type => 'integer',
335 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
336 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
337 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
338 ." weights of all the other running VMs.",
339 minimum => 2,
340 maximum => 262144,
341 default => 'cgroup v1: 1024, cgroup v2: 100',
342 },
343 memory => {
344 optional => 1,
345 type => 'integer',
346 description => "Amount of RAM for the VM in MB. This is the maximum available memory when"
347 ." you use the balloon device.",
348 minimum => 16,
349 default => 512,
350 },
351 balloon => {
352 optional => 1,
353 type => 'integer',
354 description => "Amount of target RAM for the VM in MB. Using zero disables the ballon driver.",
355 minimum => 0,
356 },
357 shares => {
358 optional => 1,
359 type => 'integer',
360 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
361 ." more memory this VM gets. Number is relative to weights of all other running VMs."
362 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
363 minimum => 0,
364 maximum => 50000,
365 default => 1000,
366 },
367 keyboard => {
368 optional => 1,
369 type => 'string',
370 description => "Keyboard layout for VNC server. The default is read from the"
371 ."'/etc/pve/datacenter.cfg' configuration file. It should not be necessary to set it.",
372 enum => PVE::Tools::kvmkeymaplist(),
373 default => undef,
374 },
375 name => {
376 optional => 1,
377 type => 'string', format => 'dns-name',
378 description => "Set a name for the VM. Only used on the configuration web interface.",
379 },
380 scsihw => {
381 optional => 1,
382 type => 'string',
383 description => "SCSI controller model",
384 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
385 default => 'lsi',
386 },
387 description => {
388 optional => 1,
389 type => 'string',
390 description => "Description for the VM. Shown in the web-interface VM's summary."
391 ." This is saved as comment inside the configuration file.",
392 maxLength => 1024 * 8,
393 },
394 ostype => {
395 optional => 1,
396 type => 'string',
397 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
398 description => "Specify guest operating system.",
399 verbose_description => <<EODESC,
400 Specify guest operating system. This is used to enable special
401 optimization/features for specific operating systems:
402
403 [horizontal]
404 other;; unspecified OS
405 wxp;; Microsoft Windows XP
406 w2k;; Microsoft Windows 2000
407 w2k3;; Microsoft Windows 2003
408 w2k8;; Microsoft Windows 2008
409 wvista;; Microsoft Windows Vista
410 win7;; Microsoft Windows 7
411 win8;; Microsoft Windows 8/2012/2012r2
412 win10;; Microsoft Windows 10/2016/2019
413 win11;; Microsoft Windows 11/2022
414 l24;; Linux 2.4 Kernel
415 l26;; Linux 2.6 - 5.X Kernel
416 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
417 EODESC
418 },
419 boot => {
420 optional => 1,
421 type => 'string', format => 'pve-qm-boot',
422 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
423 ." key or 'legacy=' is deprecated.",
424 },
425 bootdisk => {
426 optional => 1,
427 type => 'string', format => 'pve-qm-bootdisk',
428 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
429 pattern => '(ide|sata|scsi|virtio)\d+',
430 },
431 smp => {
432 optional => 1,
433 type => 'integer',
434 description => "The number of CPUs. Please use option -sockets instead.",
435 minimum => 1,
436 default => 1,
437 },
438 sockets => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPU sockets.",
442 minimum => 1,
443 default => 1,
444 },
445 cores => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of cores per socket.",
449 minimum => 1,
450 default => 1,
451 },
452 numa => {
453 optional => 1,
454 type => 'boolean',
455 description => "Enable/disable NUMA.",
456 default => 0,
457 },
458 hugepages => {
459 optional => 1,
460 type => 'string',
461 description => "Enable/disable hugepages memory.",
462 enum => [qw(any 2 1024)],
463 },
464 keephugepages => {
465 optional => 1,
466 type => 'boolean',
467 default => 0,
468 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
469 ." after VM shutdown and can be used for subsequent starts.",
470 },
471 vcpus => {
472 optional => 1,
473 type => 'integer',
474 description => "Number of hotplugged vcpus.",
475 minimum => 1,
476 default => 0,
477 },
478 acpi => {
479 optional => 1,
480 type => 'boolean',
481 description => "Enable/disable ACPI.",
482 default => 1,
483 },
484 agent => {
485 optional => 1,
486 description => "Enable/disable communication with the Qemu Guest Agent and its properties.",
487 type => 'string',
488 format => $agent_fmt,
489 },
490 kvm => {
491 optional => 1,
492 type => 'boolean',
493 description => "Enable/disable KVM hardware virtualization.",
494 default => 1,
495 },
496 tdf => {
497 optional => 1,
498 type => 'boolean',
499 description => "Enable/disable time drift fix.",
500 default => 0,
501 },
502 localtime => {
503 optional => 1,
504 type => 'boolean',
505 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
506 ." the `ostype` indicates a Microsoft Windows OS.",
507 },
508 freeze => {
509 optional => 1,
510 type => 'boolean',
511 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
512 },
513 vga => {
514 optional => 1,
515 type => 'string', format => $vga_fmt,
516 description => "Configure the VGA hardware.",
517 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
518 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
519 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
520 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
521 ." display server. For win* OS you can select how many independent displays you want,"
522 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
523 ." using a serial device as terminal.",
524 },
525 watchdog => {
526 optional => 1,
527 type => 'string', format => 'pve-qm-watchdog',
528 description => "Create a virtual hardware watchdog device.",
529 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
530 ." action), the watchdog must be periodically polled by an agent inside the guest or"
531 ." else the watchdog will reset the guest (or execute the respective action specified)",
532 },
533 startdate => {
534 optional => 1,
535 type => 'string',
536 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
537 description => "Set the initial date of the real time clock. Valid format for date are:"
538 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
539 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
540 default => 'now',
541 },
542 startup => get_standard_option('pve-startup-order'),
543 template => {
544 optional => 1,
545 type => 'boolean',
546 description => "Enable/disable Template.",
547 default => 0,
548 },
549 args => {
550 optional => 1,
551 type => 'string',
552 description => "Arbitrary arguments passed to kvm.",
553 verbose_description => <<EODESCR,
554 Arbitrary arguments passed to kvm, for example:
555
556 args: -no-reboot -no-hpet
557
558 NOTE: this option is for experts only.
559 EODESCR
560 },
561 tablet => {
562 optional => 1,
563 type => 'boolean',
564 default => 1,
565 description => "Enable/disable the USB tablet device.",
566 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
567 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
568 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
569 ." may consider disabling this to save some context switches. This is turned off by"
570 ." default if you use spice (`qm set <vmid> --vga qxl`).",
571 },
572 migrate_speed => {
573 optional => 1,
574 type => 'integer',
575 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
576 minimum => 0,
577 default => 0,
578 },
579 migrate_downtime => {
580 optional => 1,
581 type => 'number',
582 description => "Set maximum tolerated downtime (in seconds) for migrations.",
583 minimum => 0,
584 default => 0.1,
585 },
586 cdrom => {
587 optional => 1,
588 type => 'string', format => 'pve-qm-ide',
589 typetext => '<volume>',
590 description => "This is an alias for option -ide2",
591 },
592 cpu => {
593 optional => 1,
594 description => "Emulated CPU type.",
595 type => 'string',
596 format => 'pve-vm-cpu-conf',
597 },
598 parent => get_standard_option('pve-snapshot-name', {
599 optional => 1,
600 description => "Parent snapshot name. This is used internally, and should not be modified.",
601 }),
602 snaptime => {
603 optional => 1,
604 description => "Timestamp for snapshots.",
605 type => 'integer',
606 minimum => 0,
607 },
608 vmstate => {
609 optional => 1,
610 type => 'string', format => 'pve-volume-id',
611 description => "Reference to a volume which stores the VM state. This is used internally"
612 ." for snapshots.",
613 },
614 vmstatestorage => get_standard_option('pve-storage-id', {
615 description => "Default storage for VM state volumes/files.",
616 optional => 1,
617 }),
618 runningmachine => get_standard_option('pve-qemu-machine', {
619 description => "Specifies the QEMU machine type of the running vm. This is used internally"
620 ." for snapshots.",
621 }),
622 runningcpu => {
623 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
624 ." internally for snapshots.",
625 optional => 1,
626 type => 'string',
627 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
628 format_description => 'QEMU -cpu parameter'
629 },
630 machine => get_standard_option('pve-qemu-machine'),
631 arch => {
632 description => "Virtual processor architecture. Defaults to the host.",
633 optional => 1,
634 type => 'string',
635 enum => [qw(x86_64 aarch64)],
636 },
637 smbios1 => {
638 description => "Specify SMBIOS type 1 fields.",
639 type => 'string', format => 'pve-qm-smbios1',
640 maxLength => 512,
641 optional => 1,
642 },
643 protection => {
644 optional => 1,
645 type => 'boolean',
646 description => "Sets the protection flag of the VM. This will disable the remove VM and"
647 ." remove disk operations.",
648 default => 0,
649 },
650 bios => {
651 optional => 1,
652 type => 'string',
653 enum => [ qw(seabios ovmf) ],
654 description => "Select BIOS implementation.",
655 default => 'seabios',
656 },
657 vmgenid => {
658 type => 'string',
659 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
660 format_description => 'UUID',
661 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
662 ." to disable explicitly.",
663 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
664 ." value identifier to the guest OS. This allows to notify the guest operating system"
665 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
666 ." execution or creation from a template). The guest operating system notices the"
667 ." change, and is then able to react as appropriate by marking its copies of"
668 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
669 ."Note that auto-creation only works when done through API/CLI create or update methods"
670 .", but not when manually editing the config file.",
671 default => "1 (autogenerated)",
672 optional => 1,
673 },
674 hookscript => {
675 type => 'string',
676 format => 'pve-volume-id',
677 optional => 1,
678 description => "Script that will be executed during various steps in the vms lifetime.",
679 },
680 ivshmem => {
681 type => 'string',
682 format => $ivshmem_fmt,
683 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
684 ." the host.",
685 optional => 1,
686 },
687 audio0 => {
688 type => 'string',
689 format => $audio_fmt,
690 description => "Configure a audio device, useful in combination with QXL/Spice.",
691 optional => 1
692 },
693 spice_enhancements => {
694 type => 'string',
695 format => $spice_enhancements_fmt,
696 description => "Configure additional enhancements for SPICE.",
697 optional => 1
698 },
699 tags => {
700 type => 'string', format => 'pve-tag-list',
701 description => 'Tags of the VM. This is only meta information.',
702 optional => 1,
703 },
704 rng0 => {
705 type => 'string',
706 format => $rng_fmt,
707 description => "Configure a VirtIO-based Random Number Generator.",
708 optional => 1,
709 },
710 };
711
712 my $cicustom_fmt = {
713 meta => {
714 type => 'string',
715 optional => 1,
716 description => 'Specify a custom file containing all meta data passed to the VM via"
717 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
718 format => 'pve-volume-id',
719 format_description => 'volume',
720 },
721 network => {
722 type => 'string',
723 optional => 1,
724 description => 'Specify a custom file containing all network data passed to the VM via'
725 .' cloud-init.',
726 format => 'pve-volume-id',
727 format_description => 'volume',
728 },
729 user => {
730 type => 'string',
731 optional => 1,
732 description => 'Specify a custom file containing all user data passed to the VM via'
733 .' cloud-init.',
734 format => 'pve-volume-id',
735 format_description => 'volume',
736 },
737 };
738 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
739
740 my $confdesc_cloudinit = {
741 citype => {
742 optional => 1,
743 type => 'string',
744 description => 'Specifies the cloud-init configuration format. The default depends on the'
745 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
746 .' and `configdrive2` for windows.',
747 enum => ['configdrive2', 'nocloud', 'opennebula'],
748 },
749 ciuser => {
750 optional => 1,
751 type => 'string',
752 description => "cloud-init: User name to change ssh keys and password for instead of the"
753 ." image's configured default user.",
754 },
755 cipassword => {
756 optional => 1,
757 type => 'string',
758 description => 'cloud-init: Password to assign the user. Using this is generally not'
759 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
760 .' support hashed passwords.',
761 },
762 cicustom => {
763 optional => 1,
764 type => 'string',
765 description => 'cloud-init: Specify custom files to replace the automatically generated'
766 .' ones at start.',
767 format => 'pve-qm-cicustom',
768 },
769 searchdomain => {
770 optional => 1,
771 type => 'string',
772 description => "cloud-init: Sets DNS search domains for a container. Create will'
773 .' automatically use the setting from the host if neither searchdomain nor nameserver'
774 .' are set.",
775 },
776 nameserver => {
777 optional => 1,
778 type => 'string', format => 'address-list',
779 description => "cloud-init: Sets DNS server IP address for a container. Create will'
780 .' automatically use the setting from the host if neither searchdomain nor nameserver'
781 .' are set.",
782 },
783 sshkeys => {
784 optional => 1,
785 type => 'string',
786 format => 'urlencoded',
787 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
788 },
789 };
790
791 # what about other qemu settings ?
792 #cpu => 'string',
793 #machine => 'string',
794 #fda => 'file',
795 #fdb => 'file',
796 #mtdblock => 'file',
797 #sd => 'file',
798 #pflash => 'file',
799 #snapshot => 'bool',
800 #bootp => 'file',
801 ##tftp => 'dir',
802 ##smb => 'dir',
803 #kernel => 'file',
804 #append => 'string',
805 #initrd => 'file',
806 ##soundhw => 'string',
807
808 while (my ($k, $v) = each %$confdesc) {
809 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
810 }
811
812 my $MAX_USB_DEVICES = 5;
813 my $MAX_NETS = 32;
814 my $MAX_SERIAL_PORTS = 4;
815 my $MAX_PARALLEL_PORTS = 3;
816 my $MAX_NUMA = 8;
817
818 my $numa_fmt = {
819 cpus => {
820 type => "string",
821 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
822 description => "CPUs accessing this NUMA node.",
823 format_description => "id[-id];...",
824 },
825 memory => {
826 type => "number",
827 description => "Amount of memory this NUMA node provides.",
828 optional => 1,
829 },
830 hostnodes => {
831 type => "string",
832 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
833 description => "Host NUMA nodes to use.",
834 format_description => "id[-id];...",
835 optional => 1,
836 },
837 policy => {
838 type => 'string',
839 enum => [qw(preferred bind interleave)],
840 description => "NUMA allocation policy.",
841 optional => 1,
842 },
843 };
844 PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
845 my $numadesc = {
846 optional => 1,
847 type => 'string', format => $numa_fmt,
848 description => "NUMA topology.",
849 };
850 PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
851
852 for (my $i = 0; $i < $MAX_NUMA; $i++) {
853 $confdesc->{"numa$i"} = $numadesc;
854 }
855
856 my $nic_model_list = [
857 'e1000',
858 'e1000-82540em',
859 'e1000-82544gc',
860 'e1000-82545em',
861 'e1000e',
862 'i82551',
863 'i82557b',
864 'i82559er',
865 'ne2k_isa',
866 'ne2k_pci',
867 'pcnet',
868 'rtl8139',
869 'virtio',
870 'vmxnet3',
871 ];
872 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
873
874 my $net_fmt_bridge_descr = <<__EOD__;
875 Bridge to attach the network device to. The Proxmox VE standard bridge
876 is called 'vmbr0'.
877
878 If you do not specify a bridge, we create a kvm user (NATed) network
879 device, which provides DHCP and DNS services. The following addresses
880 are used:
881
882 10.0.2.2 Gateway
883 10.0.2.3 DNS Server
884 10.0.2.4 SMB Server
885
886 The DHCP server assign addresses to the guest starting from 10.0.2.15.
887 __EOD__
888
889 my $net_fmt = {
890 macaddr => get_standard_option('mac-addr', {
891 description => "MAC address. That address must be unique withing your network. This is"
892 ." automatically generated if not specified.",
893 }),
894 model => {
895 type => 'string',
896 description => "Network Card Model. The 'virtio' model provides the best performance with"
897 ." very low CPU overhead. If your guest does not support this driver, it is usually"
898 ." best to use 'e1000'.",
899 enum => $nic_model_list,
900 default_key => 1,
901 },
902 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
903 bridge => {
904 type => 'string',
905 description => $net_fmt_bridge_descr,
906 format_description => 'bridge',
907 pattern => '[-_.\w\d]+',
908 optional => 1,
909 },
910 queues => {
911 type => 'integer',
912 minimum => 0, maximum => 16,
913 description => 'Number of packet queues to be used on the device.',
914 optional => 1,
915 },
916 rate => {
917 type => 'number',
918 minimum => 0,
919 description => "Rate limit in mbps (megabytes per second) as floating point number.",
920 optional => 1,
921 },
922 tag => {
923 type => 'integer',
924 minimum => 1, maximum => 4094,
925 description => 'VLAN tag to apply to packets on this interface.',
926 optional => 1,
927 },
928 trunks => {
929 type => 'string',
930 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
931 description => 'VLAN trunks to pass through this interface.',
932 format_description => 'vlanid[;vlanid...]',
933 optional => 1,
934 },
935 firewall => {
936 type => 'boolean',
937 description => 'Whether this interface should be protected by the firewall.',
938 optional => 1,
939 },
940 link_down => {
941 type => 'boolean',
942 description => 'Whether this interface should be disconnected (like pulling the plug).',
943 optional => 1,
944 },
945 mtu => {
946 type => 'integer',
947 minimum => 1, maximum => 65520,
948 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
949 optional => 1,
950 },
951 };
952
953 my $netdesc = {
954 optional => 1,
955 type => 'string', format => $net_fmt,
956 description => "Specify network devices.",
957 };
958
959 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
960
961 my $ipconfig_fmt = {
962 ip => {
963 type => 'string',
964 format => 'pve-ipv4-config',
965 format_description => 'IPv4Format/CIDR',
966 description => 'IPv4 address in CIDR format.',
967 optional => 1,
968 default => 'dhcp',
969 },
970 gw => {
971 type => 'string',
972 format => 'ipv4',
973 format_description => 'GatewayIPv4',
974 description => 'Default gateway for IPv4 traffic.',
975 optional => 1,
976 requires => 'ip',
977 },
978 ip6 => {
979 type => 'string',
980 format => 'pve-ipv6-config',
981 format_description => 'IPv6Format/CIDR',
982 description => 'IPv6 address in CIDR format.',
983 optional => 1,
984 default => 'dhcp',
985 },
986 gw6 => {
987 type => 'string',
988 format => 'ipv6',
989 format_description => 'GatewayIPv6',
990 description => 'Default gateway for IPv6 traffic.',
991 optional => 1,
992 requires => 'ip6',
993 },
994 };
995 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
996 my $ipconfigdesc = {
997 optional => 1,
998 type => 'string', format => 'pve-qm-ipconfig',
999 description => <<'EODESCR',
1000 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1001
1002 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1003
1004 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1005 gateway should be provided.
1006 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1007 cloud-init 19.4 or newer.
1008
1009 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1010 dhcp on IPv4.
1011 EODESCR
1012 };
1013 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1014
1015 for (my $i = 0; $i < $MAX_NETS; $i++) {
1016 $confdesc->{"net$i"} = $netdesc;
1017 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1018 }
1019
1020 foreach my $key (keys %$confdesc_cloudinit) {
1021 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1022 }
1023
1024 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1025 sub verify_volume_id_or_qm_path {
1026 my ($volid, $noerr) = @_;
1027
1028 if ($volid eq 'none' || $volid eq 'cdrom' || $volid =~ m|^/|) {
1029 return $volid;
1030 }
1031
1032 # if its neither 'none' nor 'cdrom' nor a path, check if its a volume-id
1033 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1034 if ($@) {
1035 return if $noerr;
1036 die $@;
1037 }
1038 return $volid;
1039 }
1040
1041 my $usb_fmt = {
1042 host => {
1043 default_key => 1,
1044 type => 'string', format => 'pve-qm-usb-device',
1045 format_description => 'HOSTUSBDEVICE|spice',
1046 description => <<EODESCR,
1047 The Host USB device or port or the value 'spice'. HOSTUSBDEVICE syntax is:
1048
1049 'bus-port(.port)*' (decimal numbers) or
1050 'vendor_id:product_id' (hexadeciaml numbers) or
1051 'spice'
1052
1053 You can use the 'lsusb -t' command to list existing usb devices.
1054
1055 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1056 machines - use with special care.
1057
1058 The value 'spice' can be used to add a usb redirection devices for spice.
1059 EODESCR
1060 },
1061 usb3 => {
1062 optional => 1,
1063 type => 'boolean',
1064 description => "Specifies whether if given host option is a USB3 device or port.",
1065 default => 0,
1066 },
1067 };
1068
1069 my $usbdesc = {
1070 optional => 1,
1071 type => 'string', format => $usb_fmt,
1072 description => "Configure an USB device (n is 0 to 4).",
1073 };
1074 PVE::JSONSchema::register_standard_option("pve-qm-usb", $usbdesc);
1075
1076 my $serialdesc = {
1077 optional => 1,
1078 type => 'string',
1079 pattern => '(/dev/.+|socket)',
1080 description => "Create a serial device inside the VM (n is 0 to 3)",
1081 verbose_description => <<EODESCR,
1082 Create a serial device inside the VM (n is 0 to 3), and pass through a
1083 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1084 host side (use 'qm terminal' to open a terminal connection).
1085
1086 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1087 use with special care.
1088
1089 CAUTION: Experimental! User reported problems with this option.
1090 EODESCR
1091 };
1092
1093 my $paralleldesc= {
1094 optional => 1,
1095 type => 'string',
1096 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1097 description => "Map host parallel devices (n is 0 to 2).",
1098 verbose_description => <<EODESCR,
1099 Map host parallel devices (n is 0 to 2).
1100
1101 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1102 machines - use with special care.
1103
1104 CAUTION: Experimental! User reported problems with this option.
1105 EODESCR
1106 };
1107
1108 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1109 $confdesc->{"parallel$i"} = $paralleldesc;
1110 }
1111
1112 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1113 $confdesc->{"serial$i"} = $serialdesc;
1114 }
1115
1116 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1117 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1118 }
1119
1120 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1121 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1122 }
1123
1124 for (my $i = 0; $i < $MAX_USB_DEVICES; $i++) {
1125 $confdesc->{"usb$i"} = $usbdesc;
1126 }
1127
1128 my $boot_fmt = {
1129 legacy => {
1130 optional => 1,
1131 default_key => 1,
1132 type => 'string',
1133 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1134 . " Deprecated, use 'order=' instead.",
1135 pattern => '[acdn]{1,4}',
1136 format_description => "[acdn]{1,4}",
1137
1138 # note: this is also the fallback if boot: is not given at all
1139 default => 'cdn',
1140 },
1141 order => {
1142 optional => 1,
1143 type => 'string',
1144 format => 'pve-qm-bootdev-list',
1145 format_description => "device[;device...]",
1146 description => <<EODESC,
1147 The guest will attempt to boot from devices in the order they appear here.
1148
1149 Disks, optical drives and passed-through storage USB devices will be directly
1150 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1151 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1152
1153 Note that only devices in this list will be marked as bootable and thus loaded
1154 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1155 (e.g. software-raid), you need to specify all of them here.
1156
1157 Overrides the deprecated 'legacy=[acdn]*' value when given.
1158 EODESC
1159 },
1160 };
1161 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1162
1163 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1164 sub verify_bootdev {
1165 my ($dev, $noerr) = @_;
1166
1167 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1168 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1169
1170 my $check = sub {
1171 my ($base) = @_;
1172 return 0 if $dev !~ m/^$base\d+$/;
1173 return 0 if !$confdesc->{$dev};
1174 return 1;
1175 };
1176
1177 return $dev if $check->("net");
1178 return $dev if $check->("usb");
1179 return $dev if $check->("hostpci");
1180
1181 return if $noerr;
1182 die "invalid boot device '$dev'\n";
1183 }
1184
1185 sub print_bootorder {
1186 my ($devs) = @_;
1187 return "" if !@$devs;
1188 my $data = { order => join(';', @$devs) };
1189 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1190 }
1191
1192 my $kvm_api_version = 0;
1193
1194 sub kvm_version {
1195 return $kvm_api_version if $kvm_api_version;
1196
1197 open my $fh, '<', '/dev/kvm' or return;
1198
1199 # 0xae00 => KVM_GET_API_VERSION
1200 $kvm_api_version = ioctl($fh, 0xae00, 0);
1201 close($fh);
1202
1203 return $kvm_api_version;
1204 }
1205
1206 my $kvm_user_version = {};
1207 my $kvm_mtime = {};
1208
1209 sub kvm_user_version {
1210 my ($binary) = @_;
1211
1212 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1213 my $st = stat($binary);
1214
1215 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1216 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1217 $cachedmtime == $st->mtime;
1218
1219 $kvm_user_version->{$binary} = 'unknown';
1220 $kvm_mtime->{$binary} = $st->mtime;
1221
1222 my $code = sub {
1223 my $line = shift;
1224 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1225 $kvm_user_version->{$binary} = $2;
1226 }
1227 };
1228
1229 eval { run_command([$binary, '--version'], outfunc => $code); };
1230 warn $@ if $@;
1231
1232 return $kvm_user_version->{$binary};
1233
1234 }
1235 my sub extract_version {
1236 my ($machine_type, $version) = @_;
1237 $version = kvm_user_version() if !defined($version);
1238 PVE::QemuServer::Machine::extract_version($machine_type, $version)
1239 }
1240
1241 sub kernel_has_vhost_net {
1242 return -c '/dev/vhost-net';
1243 }
1244
1245 sub option_exists {
1246 my $key = shift;
1247 return defined($confdesc->{$key});
1248 }
1249
1250 my $cdrom_path;
1251 sub get_cdrom_path {
1252
1253 return $cdrom_path if $cdrom_path;
1254
1255 return $cdrom_path = "/dev/cdrom" if -l "/dev/cdrom";
1256 return $cdrom_path = "/dev/cdrom1" if -l "/dev/cdrom1";
1257 return $cdrom_path = "/dev/cdrom2" if -l "/dev/cdrom2";
1258 }
1259
1260 sub get_iso_path {
1261 my ($storecfg, $vmid, $cdrom) = @_;
1262
1263 if ($cdrom eq 'cdrom') {
1264 return get_cdrom_path();
1265 } elsif ($cdrom eq 'none') {
1266 return '';
1267 } elsif ($cdrom =~ m|^/|) {
1268 return $cdrom;
1269 } else {
1270 return PVE::Storage::path($storecfg, $cdrom);
1271 }
1272 }
1273
1274 # try to convert old style file names to volume IDs
1275 sub filename_to_volume_id {
1276 my ($vmid, $file, $media) = @_;
1277
1278 if (!($file eq 'none' || $file eq 'cdrom' ||
1279 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1280
1281 return if $file =~ m|/|;
1282
1283 if ($media && $media eq 'cdrom') {
1284 $file = "local:iso/$file";
1285 } else {
1286 $file = "local:$vmid/$file";
1287 }
1288 }
1289
1290 return $file;
1291 }
1292
1293 sub verify_media_type {
1294 my ($opt, $vtype, $media) = @_;
1295
1296 return if !$media;
1297
1298 my $etype;
1299 if ($media eq 'disk') {
1300 $etype = 'images';
1301 } elsif ($media eq 'cdrom') {
1302 $etype = 'iso';
1303 } else {
1304 die "internal error";
1305 }
1306
1307 return if ($vtype eq $etype);
1308
1309 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1310 }
1311
1312 sub cleanup_drive_path {
1313 my ($opt, $storecfg, $drive) = @_;
1314
1315 # try to convert filesystem paths to volume IDs
1316
1317 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1318 ($drive->{file} !~ m|^/dev/.+|) &&
1319 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1320 ($drive->{file} !~ m/^\d+$/)) {
1321 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1322 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1323 if !$vtype;
1324 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1325 verify_media_type($opt, $vtype, $drive->{media});
1326 $drive->{file} = $volid;
1327 }
1328
1329 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1330 }
1331
1332 sub parse_hotplug_features {
1333 my ($data) = @_;
1334
1335 my $res = {};
1336
1337 return $res if $data eq '0';
1338
1339 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1340
1341 foreach my $feature (PVE::Tools::split_list($data)) {
1342 if ($feature =~ m/^(network|disk|cpu|memory|usb)$/) {
1343 $res->{$1} = 1;
1344 } else {
1345 die "invalid hotplug feature '$feature'\n";
1346 }
1347 }
1348 return $res;
1349 }
1350
1351 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1352 sub pve_verify_hotplug_features {
1353 my ($value, $noerr) = @_;
1354
1355 return $value if parse_hotplug_features($value);
1356
1357 return if $noerr;
1358
1359 die "unable to parse hotplug option\n";
1360 }
1361
1362 sub scsi_inquiry {
1363 my($fh, $noerr) = @_;
1364
1365 my $SG_IO = 0x2285;
1366 my $SG_GET_VERSION_NUM = 0x2282;
1367
1368 my $versionbuf = "\x00" x 8;
1369 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1370 if (!$ret) {
1371 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1372 return;
1373 }
1374 my $version = unpack("I", $versionbuf);
1375 if ($version < 30000) {
1376 die "scsi generic interface too old\n" if !$noerr;
1377 return;
1378 }
1379
1380 my $buf = "\x00" x 36;
1381 my $sensebuf = "\x00" x 8;
1382 my $cmd = pack("C x3 C x1", 0x12, 36);
1383
1384 # see /usr/include/scsi/sg.h
1385 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1386
1387 my $packet = pack($sg_io_hdr_t, ord('S'), -3, length($cmd),
1388 length($sensebuf), 0, length($buf), $buf,
1389 $cmd, $sensebuf, 6000);
1390
1391 $ret = ioctl($fh, $SG_IO, $packet);
1392 if (!$ret) {
1393 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1394 return;
1395 }
1396
1397 my @res = unpack($sg_io_hdr_t, $packet);
1398 if ($res[17] || $res[18]) {
1399 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1400 return;
1401 }
1402
1403 my $res = {};
1404 (my $byte0, my $byte1, $res->{vendor},
1405 $res->{product}, $res->{revision}) = unpack("C C x6 A8 A16 A4", $buf);
1406
1407 $res->{removable} = $byte1 & 128 ? 1 : 0;
1408 $res->{type} = $byte0 & 31;
1409
1410 return $res;
1411 }
1412
1413 sub path_is_scsi {
1414 my ($path) = @_;
1415
1416 my $fh = IO::File->new("+<$path") || return;
1417 my $res = scsi_inquiry($fh, 1);
1418 close($fh);
1419
1420 return $res;
1421 }
1422
1423 sub print_tabletdevice_full {
1424 my ($conf, $arch) = @_;
1425
1426 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1427
1428 # we use uhci for old VMs because tablet driver was buggy in older qemu
1429 my $usbbus;
1430 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) || $arch eq 'aarch64') {
1431 $usbbus = 'ehci';
1432 } else {
1433 $usbbus = 'uhci';
1434 }
1435
1436 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1437 }
1438
1439 sub print_keyboarddevice_full {
1440 my ($conf, $arch, $machine) = @_;
1441
1442 return if $arch ne 'aarch64';
1443
1444 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1445 }
1446
1447 my sub get_drive_id {
1448 my ($drive) = @_;
1449 return "$drive->{interface}$drive->{index}";
1450 }
1451
1452 sub print_drivedevice_full {
1453 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1454
1455 my $device = '';
1456 my $maxdev = 0;
1457
1458 my $drive_id = get_drive_id($drive);
1459 if ($drive->{interface} eq 'virtio') {
1460 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1461 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1462 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1463 } elsif ($drive->{interface} eq 'scsi') {
1464
1465 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1466 my $unit = $drive->{index} % $maxdev;
1467 my $devicetype = 'hd';
1468 my $path = '';
1469 if (drive_is_cdrom($drive)) {
1470 $devicetype = 'cd';
1471 } else {
1472 if ($drive->{file} =~ m|^/|) {
1473 $path = $drive->{file};
1474 if (my $info = path_is_scsi($path)) {
1475 if ($info->{type} == 0 && $drive->{scsiblock}) {
1476 $devicetype = 'block';
1477 } elsif ($info->{type} == 1) { # tape
1478 $devicetype = 'generic';
1479 }
1480 }
1481 } else {
1482 $path = PVE::Storage::path($storecfg, $drive->{file});
1483 }
1484
1485 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1486 my $version = extract_version($machine_type, kvm_user_version());
1487 if ($path =~ m/^iscsi\:\/\// &&
1488 !min_version($version, 4, 1)) {
1489 $devicetype = 'generic';
1490 }
1491 }
1492
1493 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1494 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1495 } else {
1496 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1497 .",lun=$drive->{index}";
1498 }
1499 $device .= ",drive=drive-$drive_id,id=$drive_id";
1500
1501 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1502 $device .= ",rotation_rate=1";
1503 }
1504 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1505
1506 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1507 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1508 my $controller = int($drive->{index} / $maxdev);
1509 my $unit = $drive->{index} % $maxdev;
1510 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1511
1512 $device = "ide-$devicetype";
1513 if ($drive->{interface} eq 'ide') {
1514 $device .= ",bus=ide.$controller,unit=$unit";
1515 } else {
1516 $device .= ",bus=ahci$controller.$unit";
1517 }
1518 $device .= ",drive=drive-$drive_id,id=$drive_id";
1519
1520 if ($devicetype eq 'hd') {
1521 if (my $model = $drive->{model}) {
1522 $model = URI::Escape::uri_unescape($model);
1523 $device .= ",model=$model";
1524 }
1525 if ($drive->{ssd}) {
1526 $device .= ",rotation_rate=1";
1527 }
1528 }
1529 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1530 } elsif ($drive->{interface} eq 'usb') {
1531 die "implement me";
1532 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1533 } else {
1534 die "unsupported interface type";
1535 }
1536
1537 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1538
1539 if (my $serial = $drive->{serial}) {
1540 $serial = URI::Escape::uri_unescape($serial);
1541 $device .= ",serial=$serial";
1542 }
1543
1544
1545 return $device;
1546 }
1547
1548 sub get_initiator_name {
1549 my $initiator;
1550
1551 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1552 while (defined(my $line = <$fh>)) {
1553 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1554 $initiator = $1;
1555 last;
1556 }
1557 $fh->close();
1558
1559 return $initiator;
1560 }
1561
1562 sub print_drive_commandline_full {
1563 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1564
1565 my $path;
1566 my $volid = $drive->{file};
1567 my $format = $drive->{format};
1568 my $drive_id = get_drive_id($drive);
1569
1570 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1571 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1572
1573 if (drive_is_cdrom($drive)) {
1574 $path = get_iso_path($storecfg, $vmid, $volid);
1575 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1576 } else {
1577 if ($storeid) {
1578 $path = PVE::Storage::path($storecfg, $volid);
1579 $format //= qemu_img_format($scfg, $volname);
1580 } else {
1581 $path = $volid;
1582 $format //= "raw";
1583 }
1584 }
1585
1586 my $is_rbd = $path =~ m/^rbd:/;
1587
1588 my $opts = '';
1589 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1590 foreach my $o (@qemu_drive_options) {
1591 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1592 }
1593
1594 # snapshot only accepts on|off
1595 if (defined($drive->{snapshot})) {
1596 my $v = $drive->{snapshot} ? 'on' : 'off';
1597 $opts .= ",snapshot=$v";
1598 }
1599
1600 # ro is 'readonly', and only accepts on|off
1601 if (defined($drive->{ro})) {
1602 my $v = $drive->{ro} ? 'on' : 'off';
1603 $opts .= ",readonly=$v";
1604 }
1605
1606 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1607 my ($dir, $qmpname) = @$type;
1608 if (my $v = $drive->{"mbps$dir"}) {
1609 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1610 }
1611 if (my $v = $drive->{"mbps${dir}_max"}) {
1612 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1613 }
1614 if (my $v = $drive->{"bps${dir}_max_length"}) {
1615 $opts .= ",throttling.bps$qmpname-max-length=$v";
1616 }
1617 if (my $v = $drive->{"iops${dir}"}) {
1618 $opts .= ",throttling.iops$qmpname=$v";
1619 }
1620 if (my $v = $drive->{"iops${dir}_max"}) {
1621 $opts .= ",throttling.iops$qmpname-max=$v";
1622 }
1623 if (my $v = $drive->{"iops${dir}_max_length"}) {
1624 $opts .= ",throttling.iops$qmpname-max-length=$v";
1625 }
1626 }
1627
1628 if ($pbs_name) {
1629 $format = "rbd" if $is_rbd;
1630 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1631 if !$format;
1632 $opts .= ",format=alloc-track,file.driver=$format";
1633 } elsif ($format) {
1634 $opts .= ",format=$format";
1635 }
1636
1637 my $cache_direct = 0;
1638
1639 if (my $cache = $drive->{cache}) {
1640 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1641 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1642 $opts .= ",cache=none";
1643 $cache_direct = 1;
1644 }
1645
1646 # io_uring with cache mode writeback or writethrough on krbd will hang...
1647 my $rbd_no_io_uring = $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1648
1649 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1650 # sometimes, just plain disable...
1651 my $lvm_no_io_uring = $scfg && $scfg->{type} eq 'lvm';
1652
1653 if (!$drive->{aio}) {
1654 if ($io_uring && !$rbd_no_io_uring && !$lvm_no_io_uring) {
1655 # io_uring supports all cache modes
1656 $opts .= ",aio=io_uring";
1657 } else {
1658 # aio native works only with O_DIRECT
1659 if($cache_direct) {
1660 $opts .= ",aio=native";
1661 } else {
1662 $opts .= ",aio=threads";
1663 }
1664 }
1665 }
1666
1667 if (!drive_is_cdrom($drive)) {
1668 my $detectzeroes;
1669 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1670 $detectzeroes = 'off';
1671 } elsif ($drive->{discard}) {
1672 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1673 } else {
1674 # This used to be our default with discard not being specified:
1675 $detectzeroes = 'on';
1676 }
1677
1678 # note: 'detect-zeroes' works per blockdev and we want it to persist
1679 # after the alloc-track is removed, so put it on 'file' directly
1680 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1681 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1682 }
1683
1684 if ($pbs_name) {
1685 $opts .= ",backing=$pbs_name";
1686 $opts .= ",auto-remove=on";
1687 }
1688
1689 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1690 my $file_param = "file";
1691 if ($pbs_name) {
1692 # non-rbd drivers require the underlying file to be a seperate block
1693 # node, so add a second .file indirection
1694 $file_param .= ".file" if !$is_rbd;
1695 $file_param .= ".filename";
1696 }
1697 my $pathinfo = $path ? "$file_param=$path," : '';
1698
1699 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1700 }
1701
1702 sub print_pbs_blockdev {
1703 my ($pbs_conf, $pbs_name) = @_;
1704 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1705 $blockdev .= ",repository=$pbs_conf->{repository}";
1706 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1707 $blockdev .= ",archive=$pbs_conf->{archive}";
1708 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1709 return $blockdev;
1710 }
1711
1712 sub print_netdevice_full {
1713 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type) = @_;
1714
1715 my $device = $net->{model};
1716 if ($net->{model} eq 'virtio') {
1717 $device = 'virtio-net-pci';
1718 };
1719
1720 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1721 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1722 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1723 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1724 # and out of each queue plus one config interrupt and control vector queue
1725 my $vectors = $net->{queues} * 2 + 2;
1726 $tmpstr .= ",vectors=$vectors,mq=on";
1727 }
1728 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1729
1730 if (my $mtu = $net->{mtu}) {
1731 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1732 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1733 if ($mtu == 1) {
1734 $mtu = $bridge_mtu;
1735 } elsif ($mtu < 576) {
1736 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1737 } elsif ($mtu > $bridge_mtu) {
1738 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1739 }
1740 $tmpstr .= ",host_mtu=$mtu";
1741 } else {
1742 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1743 }
1744 }
1745
1746 if ($use_old_bios_files) {
1747 my $romfile;
1748 if ($device eq 'virtio-net-pci') {
1749 $romfile = 'pxe-virtio.rom';
1750 } elsif ($device eq 'e1000') {
1751 $romfile = 'pxe-e1000.rom';
1752 } elsif ($device eq 'e1000e') {
1753 $romfile = 'pxe-e1000e.rom';
1754 } elsif ($device eq 'ne2k') {
1755 $romfile = 'pxe-ne2k_pci.rom';
1756 } elsif ($device eq 'pcnet') {
1757 $romfile = 'pxe-pcnet.rom';
1758 } elsif ($device eq 'rtl8139') {
1759 $romfile = 'pxe-rtl8139.rom';
1760 }
1761 $tmpstr .= ",romfile=$romfile" if $romfile;
1762 }
1763
1764 return $tmpstr;
1765 }
1766
1767 sub print_netdev_full {
1768 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1769
1770 my $i = '';
1771 if ($netid =~ m/^net(\d+)$/) {
1772 $i = int($1);
1773 }
1774
1775 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1776
1777 my $ifname = "tap${vmid}i$i";
1778
1779 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1780 die "interface name '$ifname' is too long (max 15 character)\n"
1781 if length($ifname) >= 16;
1782
1783 my $vhostparam = '';
1784 if (is_native($arch)) {
1785 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1786 }
1787
1788 my $vmname = $conf->{name} || "vm$vmid";
1789
1790 my $netdev = "";
1791 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1792
1793 if ($net->{bridge}) {
1794 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1795 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1796 } else {
1797 $netdev = "type=user,id=$netid,hostname=$vmname";
1798 }
1799
1800 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1801
1802 return $netdev;
1803 }
1804
1805 my $vga_map = {
1806 'cirrus' => 'cirrus-vga',
1807 'std' => 'VGA',
1808 'vmware' => 'vmware-svga',
1809 'virtio' => 'virtio-vga',
1810 };
1811
1812 sub print_vga_device {
1813 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1814
1815 my $type = $vga_map->{$vga->{type}};
1816 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1817 $type = 'virtio-gpu';
1818 }
1819 my $vgamem_mb = $vga->{memory};
1820
1821 my $max_outputs = '';
1822 if ($qxlnum) {
1823 $type = $id ? 'qxl' : 'qxl-vga';
1824
1825 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1826 # set max outputs so linux can have up to 4 qxl displays with one device
1827 if (min_version($machine_version, 4, 1)) {
1828 $max_outputs = ",max_outputs=4";
1829 }
1830 }
1831 }
1832
1833 die "no devicetype for $vga->{type}\n" if !$type;
1834
1835 my $memory = "";
1836 if ($vgamem_mb) {
1837 if ($vga->{type} eq 'virtio') {
1838 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1839 $memory = ",max_hostmem=$bytes";
1840 } elsif ($qxlnum) {
1841 # from https://www.spice-space.org/multiple-monitors.html
1842 $memory = ",vgamem_mb=$vga->{memory}";
1843 my $ram = $vgamem_mb * 4;
1844 my $vram = $vgamem_mb * 2;
1845 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1846 } else {
1847 $memory = ",vgamem_mb=$vga->{memory}";
1848 }
1849 } elsif ($qxlnum && $id) {
1850 $memory = ",ram_size=67108864,vram_size=33554432";
1851 }
1852
1853 my $edidoff = "";
1854 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1855 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1856 }
1857
1858 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1859 my $vgaid = "vga" . ($id // '');
1860 my $pciaddr;
1861 if ($q35 && $vgaid eq 'vga') {
1862 # the first display uses pcie.0 bus on q35 machines
1863 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1864 } else {
1865 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1866 }
1867
1868 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1869 }
1870
1871 sub parse_number_sets {
1872 my ($set) = @_;
1873 my $res = [];
1874 foreach my $part (split(/;/, $set)) {
1875 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1876 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1877 push @$res, [ $1, $2 ];
1878 } else {
1879 die "invalid range: $part\n";
1880 }
1881 }
1882 return $res;
1883 }
1884
1885 sub parse_numa {
1886 my ($data) = @_;
1887
1888 my $res = parse_property_string($numa_fmt, $data);
1889 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1890 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1891 return $res;
1892 }
1893
1894 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1895 sub parse_net {
1896 my ($data) = @_;
1897
1898 my $res = eval { parse_property_string($net_fmt, $data) };
1899 if ($@) {
1900 warn $@;
1901 return;
1902 }
1903 if (!defined($res->{macaddr})) {
1904 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1905 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1906 }
1907 return $res;
1908 }
1909
1910 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1911 sub parse_ipconfig {
1912 my ($data) = @_;
1913
1914 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1915 if ($@) {
1916 warn $@;
1917 return;
1918 }
1919
1920 if ($res->{gw} && !$res->{ip}) {
1921 warn 'gateway specified without specifying an IP address';
1922 return;
1923 }
1924 if ($res->{gw6} && !$res->{ip6}) {
1925 warn 'IPv6 gateway specified without specifying an IPv6 address';
1926 return;
1927 }
1928 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1929 warn 'gateway specified together with DHCP';
1930 return;
1931 }
1932 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1933 # gw6 + auto/dhcp
1934 warn "IPv6 gateway specified together with $res->{ip6} address";
1935 return;
1936 }
1937
1938 if (!$res->{ip} && !$res->{ip6}) {
1939 return { ip => 'dhcp', ip6 => 'dhcp' };
1940 }
1941
1942 return $res;
1943 }
1944
1945 sub print_net {
1946 my $net = shift;
1947
1948 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1949 }
1950
1951 sub add_random_macs {
1952 my ($settings) = @_;
1953
1954 foreach my $opt (keys %$settings) {
1955 next if $opt !~ m/^net(\d+)$/;
1956 my $net = parse_net($settings->{$opt});
1957 next if !$net;
1958 $settings->{$opt} = print_net($net);
1959 }
1960 }
1961
1962 sub vm_is_volid_owner {
1963 my ($storecfg, $vmid, $volid) = @_;
1964
1965 if ($volid !~ m|^/|) {
1966 my ($path, $owner);
1967 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1968 if ($owner && ($owner == $vmid)) {
1969 return 1;
1970 }
1971 }
1972
1973 return;
1974 }
1975
1976 sub vmconfig_register_unused_drive {
1977 my ($storecfg, $vmid, $conf, $drive) = @_;
1978
1979 if (drive_is_cloudinit($drive)) {
1980 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1981 warn $@ if $@;
1982 } elsif (!drive_is_cdrom($drive)) {
1983 my $volid = $drive->{file};
1984 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1985 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1986 }
1987 }
1988 }
1989
1990 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1991 my $smbios1_fmt = {
1992 uuid => {
1993 type => 'string',
1994 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1995 format_description => 'UUID',
1996 description => "Set SMBIOS1 UUID.",
1997 optional => 1,
1998 },
1999 version => {
2000 type => 'string',
2001 pattern => '[A-Za-z0-9+\/]+={0,2}',
2002 format_description => 'Base64 encoded string',
2003 description => "Set SMBIOS1 version.",
2004 optional => 1,
2005 },
2006 serial => {
2007 type => 'string',
2008 pattern => '[A-Za-z0-9+\/]+={0,2}',
2009 format_description => 'Base64 encoded string',
2010 description => "Set SMBIOS1 serial number.",
2011 optional => 1,
2012 },
2013 manufacturer => {
2014 type => 'string',
2015 pattern => '[A-Za-z0-9+\/]+={0,2}',
2016 format_description => 'Base64 encoded string',
2017 description => "Set SMBIOS1 manufacturer.",
2018 optional => 1,
2019 },
2020 product => {
2021 type => 'string',
2022 pattern => '[A-Za-z0-9+\/]+={0,2}',
2023 format_description => 'Base64 encoded string',
2024 description => "Set SMBIOS1 product ID.",
2025 optional => 1,
2026 },
2027 sku => {
2028 type => 'string',
2029 pattern => '[A-Za-z0-9+\/]+={0,2}',
2030 format_description => 'Base64 encoded string',
2031 description => "Set SMBIOS1 SKU string.",
2032 optional => 1,
2033 },
2034 family => {
2035 type => 'string',
2036 pattern => '[A-Za-z0-9+\/]+={0,2}',
2037 format_description => 'Base64 encoded string',
2038 description => "Set SMBIOS1 family string.",
2039 optional => 1,
2040 },
2041 base64 => {
2042 type => 'boolean',
2043 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2044 optional => 1,
2045 },
2046 };
2047
2048 sub parse_smbios1 {
2049 my ($data) = @_;
2050
2051 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2052 warn $@ if $@;
2053 return $res;
2054 }
2055
2056 sub print_smbios1 {
2057 my ($smbios1) = @_;
2058 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2059 }
2060
2061 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2062
2063 sub parse_watchdog {
2064 my ($value) = @_;
2065
2066 return if !$value;
2067
2068 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2069 warn $@ if $@;
2070 return $res;
2071 }
2072
2073 sub parse_guest_agent {
2074 my ($conf) = @_;
2075
2076 return {} if !defined($conf->{agent});
2077
2078 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2079 warn $@ if $@;
2080
2081 # if the agent is disabled ignore the other potentially set properties
2082 return {} if !$res->{enabled};
2083 return $res;
2084 }
2085
2086 sub get_qga_key {
2087 my ($conf, $key) = @_;
2088 return undef if !defined($conf->{agent});
2089
2090 my $agent = parse_guest_agent($conf);
2091 return $agent->{$key};
2092 }
2093
2094 sub parse_vga {
2095 my ($value) = @_;
2096
2097 return {} if !$value;
2098 my $res = eval { parse_property_string($vga_fmt, $value) };
2099 warn $@ if $@;
2100 return $res;
2101 }
2102
2103 sub parse_rng {
2104 my ($value) = @_;
2105
2106 return if !$value;
2107
2108 my $res = eval { parse_property_string($rng_fmt, $value) };
2109 warn $@ if $@;
2110 return $res;
2111 }
2112
2113 PVE::JSONSchema::register_format('pve-qm-usb-device', \&verify_usb_device);
2114 sub verify_usb_device {
2115 my ($value, $noerr) = @_;
2116
2117 return $value if parse_usb_device($value);
2118
2119 return if $noerr;
2120
2121 die "unable to parse usb device\n";
2122 }
2123
2124 # add JSON properties for create and set function
2125 sub json_config_properties {
2126 my $prop = shift;
2127
2128 my $skip_json_config_opts = {
2129 parent => 1,
2130 snaptime => 1,
2131 vmstate => 1,
2132 runningmachine => 1,
2133 runningcpu => 1,
2134 };
2135
2136 foreach my $opt (keys %$confdesc) {
2137 next if $skip_json_config_opts->{$opt};
2138 $prop->{$opt} = $confdesc->{$opt};
2139 }
2140
2141 return $prop;
2142 }
2143
2144 # return copy of $confdesc_cloudinit to generate documentation
2145 sub cloudinit_config_properties {
2146
2147 return dclone($confdesc_cloudinit);
2148 }
2149
2150 sub check_type {
2151 my ($key, $value) = @_;
2152
2153 die "unknown setting '$key'\n" if !$confdesc->{$key};
2154
2155 my $type = $confdesc->{$key}->{type};
2156
2157 if (!defined($value)) {
2158 die "got undefined value\n";
2159 }
2160
2161 if ($value =~ m/[\n\r]/) {
2162 die "property contains a line feed\n";
2163 }
2164
2165 if ($type eq 'boolean') {
2166 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2167 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2168 die "type check ('boolean') failed - got '$value'\n";
2169 } elsif ($type eq 'integer') {
2170 return int($1) if $value =~ m/^(\d+)$/;
2171 die "type check ('integer') failed - got '$value'\n";
2172 } elsif ($type eq 'number') {
2173 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2174 die "type check ('number') failed - got '$value'\n";
2175 } elsif ($type eq 'string') {
2176 if (my $fmt = $confdesc->{$key}->{format}) {
2177 PVE::JSONSchema::check_format($fmt, $value);
2178 return $value;
2179 }
2180 $value =~ s/^\"(.*)\"$/$1/;
2181 return $value;
2182 } else {
2183 die "internal error"
2184 }
2185 }
2186
2187 sub destroy_vm {
2188 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2189
2190 my $conf = PVE::QemuConfig->load_config($vmid);
2191
2192 PVE::QemuConfig->check_lock($conf) if !$skiplock;
2193
2194 if ($conf->{template}) {
2195 # check if any base image is still used by a linked clone
2196 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2197 my ($ds, $drive) = @_;
2198 return if drive_is_cdrom($drive);
2199
2200 my $volid = $drive->{file};
2201 return if !$volid || $volid =~ m|^/|;
2202
2203 die "base volume '$volid' is still in use by linked cloned\n"
2204 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2205
2206 });
2207 }
2208
2209 my $volids = {};
2210 my $remove_owned_drive = sub {
2211 my ($ds, $drive) = @_;
2212 return if drive_is_cdrom($drive, 1);
2213
2214 my $volid = $drive->{file};
2215 return if !$volid || $volid =~ m|^/|;
2216 return if $volids->{$volid};
2217
2218 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2219 return if !$path || !$owner || ($owner != $vmid);
2220
2221 $volids->{$volid} = 1;
2222 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2223 warn "Could not remove disk '$volid', check manually: $@" if $@;
2224 };
2225
2226 # only remove disks owned by this VM (referenced in the config)
2227 my $include_opts = {
2228 include_unused => 1,
2229 extra_keys => ['vmstate'],
2230 };
2231 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2232
2233 for my $snap (values %{$conf->{snapshots}}) {
2234 next if !defined($snap->{vmstate});
2235 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2236 next if !defined($drive);
2237 $remove_owned_drive->('vmstate', $drive);
2238 }
2239
2240 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2241
2242 if ($purge_unreferenced) { # also remove unreferenced disk
2243 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2244 PVE::Storage::foreach_volid($vmdisks, sub {
2245 my ($volid, $sid, $volname, $d) = @_;
2246 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2247 warn $@ if $@;
2248 });
2249 }
2250
2251 if (defined $replacement_conf) {
2252 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2253 } else {
2254 PVE::QemuConfig->destroy_config($vmid);
2255 }
2256 }
2257
2258 sub parse_vm_config {
2259 my ($filename, $raw) = @_;
2260
2261 return if !defined($raw);
2262
2263 my $res = {
2264 digest => Digest::SHA::sha1_hex($raw),
2265 snapshots => {},
2266 pending => {},
2267 };
2268
2269 $filename =~ m|/qemu-server/(\d+)\.conf$|
2270 || die "got strange filename '$filename'";
2271
2272 my $vmid = $1;
2273
2274 my $conf = $res;
2275 my $descr;
2276 my $section = '';
2277
2278 my @lines = split(/\n/, $raw);
2279 foreach my $line (@lines) {
2280 next if $line =~ m/^\s*$/;
2281
2282 if ($line =~ m/^\[PENDING\]\s*$/i) {
2283 $section = 'pending';
2284 if (defined($descr)) {
2285 $descr =~ s/\s+$//;
2286 $conf->{description} = $descr;
2287 }
2288 $descr = undef;
2289 $conf = $res->{$section} = {};
2290 next;
2291
2292 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2293 $section = $1;
2294 if (defined($descr)) {
2295 $descr =~ s/\s+$//;
2296 $conf->{description} = $descr;
2297 }
2298 $descr = undef;
2299 $conf = $res->{snapshots}->{$section} = {};
2300 next;
2301 }
2302
2303 if ($line =~ m/^\#(.*)\s*$/) {
2304 $descr = '' if !defined($descr);
2305 $descr .= PVE::Tools::decode_text($1) . "\n";
2306 next;
2307 }
2308
2309 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2310 $descr = '' if !defined($descr);
2311 $descr .= PVE::Tools::decode_text($2);
2312 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2313 $conf->{snapstate} = $1;
2314 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2315 my $key = $1;
2316 my $value = $2;
2317 $conf->{$key} = $value;
2318 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2319 my $value = $1;
2320 if ($section eq 'pending') {
2321 $conf->{delete} = $value; # we parse this later
2322 } else {
2323 warn "vm $vmid - propertry 'delete' is only allowed in [PENDING]\n";
2324 }
2325 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2326 my $key = $1;
2327 my $value = $2;
2328 eval { $value = check_type($key, $value); };
2329 if ($@) {
2330 warn "vm $vmid - unable to parse value of '$key' - $@";
2331 } else {
2332 $key = 'ide2' if $key eq 'cdrom';
2333 my $fmt = $confdesc->{$key}->{format};
2334 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2335 my $v = parse_drive($key, $value);
2336 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2337 $v->{file} = $volid;
2338 $value = print_drive($v);
2339 } else {
2340 warn "vm $vmid - unable to parse value of '$key'\n";
2341 next;
2342 }
2343 }
2344
2345 $conf->{$key} = $value;
2346 }
2347 } else {
2348 warn "vm $vmid - unable to parse config: $line\n";
2349 }
2350 }
2351
2352 if (defined($descr)) {
2353 $descr =~ s/\s+$//;
2354 $conf->{description} = $descr;
2355 }
2356 delete $res->{snapstate}; # just to be sure
2357
2358 return $res;
2359 }
2360
2361 sub write_vm_config {
2362 my ($filename, $conf) = @_;
2363
2364 delete $conf->{snapstate}; # just to be sure
2365
2366 if ($conf->{cdrom}) {
2367 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2368 $conf->{ide2} = $conf->{cdrom};
2369 delete $conf->{cdrom};
2370 }
2371
2372 # we do not use 'smp' any longer
2373 if ($conf->{sockets}) {
2374 delete $conf->{smp};
2375 } elsif ($conf->{smp}) {
2376 $conf->{sockets} = $conf->{smp};
2377 delete $conf->{cores};
2378 delete $conf->{smp};
2379 }
2380
2381 my $used_volids = {};
2382
2383 my $cleanup_config = sub {
2384 my ($cref, $pending, $snapname) = @_;
2385
2386 foreach my $key (keys %$cref) {
2387 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2388 $key eq 'snapstate' || $key eq 'pending';
2389 my $value = $cref->{$key};
2390 if ($key eq 'delete') {
2391 die "propertry 'delete' is only allowed in [PENDING]\n"
2392 if !$pending;
2393 # fixme: check syntax?
2394 next;
2395 }
2396 eval { $value = check_type($key, $value); };
2397 die "unable to parse value of '$key' - $@" if $@;
2398
2399 $cref->{$key} = $value;
2400
2401 if (!$snapname && is_valid_drivename($key)) {
2402 my $drive = parse_drive($key, $value);
2403 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2404 }
2405 }
2406 };
2407
2408 &$cleanup_config($conf);
2409
2410 &$cleanup_config($conf->{pending}, 1);
2411
2412 foreach my $snapname (keys %{$conf->{snapshots}}) {
2413 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2414 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2415 }
2416
2417 # remove 'unusedX' settings if we re-add a volume
2418 foreach my $key (keys %$conf) {
2419 my $value = $conf->{$key};
2420 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2421 delete $conf->{$key};
2422 }
2423 }
2424
2425 my $generate_raw_config = sub {
2426 my ($conf, $pending) = @_;
2427
2428 my $raw = '';
2429
2430 # add description as comment to top of file
2431 if (defined(my $descr = $conf->{description})) {
2432 if ($descr) {
2433 foreach my $cl (split(/\n/, $descr)) {
2434 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2435 }
2436 } else {
2437 $raw .= "#\n" if $pending;
2438 }
2439 }
2440
2441 foreach my $key (sort keys %$conf) {
2442 next if $key =~ /^(digest|description|pending|snapshots)$/;
2443 $raw .= "$key: $conf->{$key}\n";
2444 }
2445 return $raw;
2446 };
2447
2448 my $raw = &$generate_raw_config($conf);
2449
2450 if (scalar(keys %{$conf->{pending}})){
2451 $raw .= "\n[PENDING]\n";
2452 $raw .= &$generate_raw_config($conf->{pending}, 1);
2453 }
2454
2455 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2456 $raw .= "\n[$snapname]\n";
2457 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2458 }
2459
2460 return $raw;
2461 }
2462
2463 sub load_defaults {
2464
2465 my $res = {};
2466
2467 # we use static defaults from our JSON schema configuration
2468 foreach my $key (keys %$confdesc) {
2469 if (defined(my $default = $confdesc->{$key}->{default})) {
2470 $res->{$key} = $default;
2471 }
2472 }
2473
2474 return $res;
2475 }
2476
2477 sub config_list {
2478 my $vmlist = PVE::Cluster::get_vmlist();
2479 my $res = {};
2480 return $res if !$vmlist || !$vmlist->{ids};
2481 my $ids = $vmlist->{ids};
2482 my $nodename = nodename();
2483
2484 foreach my $vmid (keys %$ids) {
2485 my $d = $ids->{$vmid};
2486 next if !$d->{node} || $d->{node} ne $nodename;
2487 next if !$d->{type} || $d->{type} ne 'qemu';
2488 $res->{$vmid}->{exists} = 1;
2489 }
2490 return $res;
2491 }
2492
2493 # test if VM uses local resources (to prevent migration)
2494 sub check_local_resources {
2495 my ($conf, $noerr) = @_;
2496
2497 my @loc_res = ();
2498
2499 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2500 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2501
2502 push @loc_res, "ivshmem" if $conf->{ivshmem};
2503
2504 foreach my $k (keys %$conf) {
2505 next if $k =~ m/^usb/ && ($conf->{$k} =~ m/^spice(?![^,])/);
2506 # sockets are safe: they will recreated be on the target side post-migrate
2507 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2508 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2509 }
2510
2511 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2512
2513 return \@loc_res;
2514 }
2515
2516 # check if used storages are available on all nodes (use by migrate)
2517 sub check_storage_availability {
2518 my ($storecfg, $conf, $node) = @_;
2519
2520 PVE::QemuConfig->foreach_volume($conf, sub {
2521 my ($ds, $drive) = @_;
2522
2523 my $volid = $drive->{file};
2524 return if !$volid;
2525
2526 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2527 return if !$sid;
2528
2529 # check if storage is available on both nodes
2530 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2531 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2532
2533 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2534
2535 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2536 if !$scfg->{content}->{$vtype};
2537 });
2538 }
2539
2540 # list nodes where all VM images are available (used by has_feature API)
2541 sub shared_nodes {
2542 my ($conf, $storecfg) = @_;
2543
2544 my $nodelist = PVE::Cluster::get_nodelist();
2545 my $nodehash = { map { $_ => 1 } @$nodelist };
2546 my $nodename = nodename();
2547
2548 PVE::QemuConfig->foreach_volume($conf, sub {
2549 my ($ds, $drive) = @_;
2550
2551 my $volid = $drive->{file};
2552 return if !$volid;
2553
2554 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2555 if ($storeid) {
2556 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2557 if ($scfg->{disable}) {
2558 $nodehash = {};
2559 } elsif (my $avail = $scfg->{nodes}) {
2560 foreach my $node (keys %$nodehash) {
2561 delete $nodehash->{$node} if !$avail->{$node};
2562 }
2563 } elsif (!$scfg->{shared}) {
2564 foreach my $node (keys %$nodehash) {
2565 delete $nodehash->{$node} if $node ne $nodename
2566 }
2567 }
2568 }
2569 });
2570
2571 return $nodehash
2572 }
2573
2574 sub check_local_storage_availability {
2575 my ($conf, $storecfg) = @_;
2576
2577 my $nodelist = PVE::Cluster::get_nodelist();
2578 my $nodehash = { map { $_ => {} } @$nodelist };
2579
2580 PVE::QemuConfig->foreach_volume($conf, sub {
2581 my ($ds, $drive) = @_;
2582
2583 my $volid = $drive->{file};
2584 return if !$volid;
2585
2586 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2587 if ($storeid) {
2588 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2589
2590 if ($scfg->{disable}) {
2591 foreach my $node (keys %$nodehash) {
2592 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2593 }
2594 } elsif (my $avail = $scfg->{nodes}) {
2595 foreach my $node (keys %$nodehash) {
2596 if (!$avail->{$node}) {
2597 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2598 }
2599 }
2600 }
2601 }
2602 });
2603
2604 foreach my $node (values %$nodehash) {
2605 if (my $unavail = $node->{unavailable_storages}) {
2606 $node->{unavailable_storages} = [ sort keys %$unavail ];
2607 }
2608 }
2609
2610 return $nodehash
2611 }
2612
2613 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2614 sub check_running {
2615 my ($vmid, $nocheck, $node) = @_;
2616
2617 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2618 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2619 }
2620
2621 sub vzlist {
2622
2623 my $vzlist = config_list();
2624
2625 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2626
2627 while (defined(my $de = $fd->read)) {
2628 next if $de !~ m/^(\d+)\.pid$/;
2629 my $vmid = $1;
2630 next if !defined($vzlist->{$vmid});
2631 if (my $pid = check_running($vmid)) {
2632 $vzlist->{$vmid}->{pid} = $pid;
2633 }
2634 }
2635
2636 return $vzlist;
2637 }
2638
2639 our $vmstatus_return_properties = {
2640 vmid => get_standard_option('pve-vmid'),
2641 status => {
2642 description => "Qemu process status.",
2643 type => 'string',
2644 enum => ['stopped', 'running'],
2645 },
2646 maxmem => {
2647 description => "Maximum memory in bytes.",
2648 type => 'integer',
2649 optional => 1,
2650 renderer => 'bytes',
2651 },
2652 maxdisk => {
2653 description => "Root disk size in bytes.",
2654 type => 'integer',
2655 optional => 1,
2656 renderer => 'bytes',
2657 },
2658 name => {
2659 description => "VM name.",
2660 type => 'string',
2661 optional => 1,
2662 },
2663 qmpstatus => {
2664 description => "Qemu QMP agent status.",
2665 type => 'string',
2666 optional => 1,
2667 },
2668 pid => {
2669 description => "PID of running qemu process.",
2670 type => 'integer',
2671 optional => 1,
2672 },
2673 uptime => {
2674 description => "Uptime.",
2675 type => 'integer',
2676 optional => 1,
2677 renderer => 'duration',
2678 },
2679 cpus => {
2680 description => "Maximum usable CPUs.",
2681 type => 'number',
2682 optional => 1,
2683 },
2684 lock => {
2685 description => "The current config lock, if any.",
2686 type => 'string',
2687 optional => 1,
2688 },
2689 tags => {
2690 description => "The current configured tags, if any",
2691 type => 'string',
2692 optional => 1,
2693 },
2694 'running-machine' => {
2695 description => "The currently running machine type (if running).",
2696 type => 'string',
2697 optional => 1,
2698 },
2699 'running-qemu' => {
2700 description => "The currently running QEMU version (if running).",
2701 type => 'string',
2702 optional => 1,
2703 },
2704 };
2705
2706 my $last_proc_pid_stat;
2707
2708 # get VM status information
2709 # This must be fast and should not block ($full == false)
2710 # We only query KVM using QMP if $full == true (this can be slow)
2711 sub vmstatus {
2712 my ($opt_vmid, $full) = @_;
2713
2714 my $res = {};
2715
2716 my $storecfg = PVE::Storage::config();
2717
2718 my $list = vzlist();
2719 my $defaults = load_defaults();
2720
2721 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2722
2723 my $cpucount = $cpuinfo->{cpus} || 1;
2724
2725 foreach my $vmid (keys %$list) {
2726 next if $opt_vmid && ($vmid ne $opt_vmid);
2727
2728 my $conf = PVE::QemuConfig->load_config($vmid);
2729
2730 my $d = { vmid => int($vmid) };
2731 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2732
2733 # fixme: better status?
2734 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2735
2736 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2737 if (defined($size)) {
2738 $d->{disk} = 0; # no info available
2739 $d->{maxdisk} = $size;
2740 } else {
2741 $d->{disk} = 0;
2742 $d->{maxdisk} = 0;
2743 }
2744
2745 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2746 * ($conf->{cores} || $defaults->{cores});
2747 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2748 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2749
2750 $d->{name} = $conf->{name} || "VM $vmid";
2751 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2752 : $defaults->{memory}*(1024*1024);
2753
2754 if ($conf->{balloon}) {
2755 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2756 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2757 : $defaults->{shares};
2758 }
2759
2760 $d->{uptime} = 0;
2761 $d->{cpu} = 0;
2762 $d->{mem} = 0;
2763
2764 $d->{netout} = 0;
2765 $d->{netin} = 0;
2766
2767 $d->{diskread} = 0;
2768 $d->{diskwrite} = 0;
2769
2770 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2771
2772 $d->{serial} = 1 if conf_has_serial($conf);
2773 $d->{lock} = $conf->{lock} if $conf->{lock};
2774 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2775
2776 $res->{$vmid} = $d;
2777 }
2778
2779 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2780 foreach my $dev (keys %$netdev) {
2781 next if $dev !~ m/^tap([1-9]\d*)i/;
2782 my $vmid = $1;
2783 my $d = $res->{$vmid};
2784 next if !$d;
2785
2786 $d->{netout} += $netdev->{$dev}->{receive};
2787 $d->{netin} += $netdev->{$dev}->{transmit};
2788
2789 if ($full) {
2790 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2791 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2792 }
2793
2794 }
2795
2796 my $ctime = gettimeofday;
2797
2798 foreach my $vmid (keys %$list) {
2799
2800 my $d = $res->{$vmid};
2801 my $pid = $d->{pid};
2802 next if !$pid;
2803
2804 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2805 next if !$pstat; # not running
2806
2807 my $used = $pstat->{utime} + $pstat->{stime};
2808
2809 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2810
2811 if ($pstat->{vsize}) {
2812 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2813 }
2814
2815 my $old = $last_proc_pid_stat->{$pid};
2816 if (!$old) {
2817 $last_proc_pid_stat->{$pid} = {
2818 time => $ctime,
2819 used => $used,
2820 cpu => 0,
2821 };
2822 next;
2823 }
2824
2825 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2826
2827 if ($dtime > 1000) {
2828 my $dutime = $used - $old->{used};
2829
2830 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2831 $last_proc_pid_stat->{$pid} = {
2832 time => $ctime,
2833 used => $used,
2834 cpu => $d->{cpu},
2835 };
2836 } else {
2837 $d->{cpu} = $old->{cpu};
2838 }
2839 }
2840
2841 return $res if !$full;
2842
2843 my $qmpclient = PVE::QMPClient->new();
2844
2845 my $ballooncb = sub {
2846 my ($vmid, $resp) = @_;
2847
2848 my $info = $resp->{'return'};
2849 return if !$info->{max_mem};
2850
2851 my $d = $res->{$vmid};
2852
2853 # use memory assigned to VM
2854 $d->{maxmem} = $info->{max_mem};
2855 $d->{balloon} = $info->{actual};
2856
2857 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2858 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2859 $d->{freemem} = $info->{free_mem};
2860 }
2861
2862 $d->{ballooninfo} = $info;
2863 };
2864
2865 my $blockstatscb = sub {
2866 my ($vmid, $resp) = @_;
2867 my $data = $resp->{'return'} || [];
2868 my $totalrdbytes = 0;
2869 my $totalwrbytes = 0;
2870
2871 for my $blockstat (@$data) {
2872 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2873 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2874
2875 $blockstat->{device} =~ s/drive-//;
2876 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2877 }
2878 $res->{$vmid}->{diskread} = $totalrdbytes;
2879 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2880 };
2881
2882 my $machinecb = sub {
2883 my ($vmid, $resp) = @_;
2884 my $data = $resp->{'return'} || [];
2885
2886 $res->{$vmid}->{'running-machine'} =
2887 PVE::QemuServer::Machine::current_from_query_machines($data);
2888 };
2889
2890 my $versioncb = sub {
2891 my ($vmid, $resp) = @_;
2892 my $data = $resp->{'return'} // {};
2893 my $version = 'unknown';
2894
2895 if (my $v = $data->{qemu}) {
2896 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
2897 }
2898
2899 $res->{$vmid}->{'running-qemu'} = $version;
2900 };
2901
2902 my $statuscb = sub {
2903 my ($vmid, $resp) = @_;
2904
2905 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
2906 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
2907 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
2908 # this fails if ballon driver is not loaded, so this must be
2909 # the last commnand (following command are aborted if this fails).
2910 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
2911
2912 my $status = 'unknown';
2913 if (!defined($status = $resp->{'return'}->{status})) {
2914 warn "unable to get VM status\n";
2915 return;
2916 }
2917
2918 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
2919 };
2920
2921 foreach my $vmid (keys %$list) {
2922 next if $opt_vmid && ($vmid ne $opt_vmid);
2923 next if !$res->{$vmid}->{pid}; # not running
2924 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
2925 }
2926
2927 $qmpclient->queue_execute(undef, 2);
2928
2929 foreach my $vmid (keys %$list) {
2930 next if $opt_vmid && ($vmid ne $opt_vmid);
2931 next if !$res->{$vmid}->{pid}; #not running
2932
2933 # we can't use the $qmpclient since it might have already aborted on
2934 # 'query-balloon', but this might also fail for older versions...
2935 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
2936 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
2937 }
2938
2939 foreach my $vmid (keys %$list) {
2940 next if $opt_vmid && ($vmid ne $opt_vmid);
2941 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
2942 }
2943
2944 return $res;
2945 }
2946
2947 sub conf_has_serial {
2948 my ($conf) = @_;
2949
2950 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
2951 if ($conf->{"serial$i"}) {
2952 return 1;
2953 }
2954 }
2955
2956 return 0;
2957 }
2958
2959 sub conf_has_audio {
2960 my ($conf, $id) = @_;
2961
2962 $id //= 0;
2963 my $audio = $conf->{"audio$id"};
2964 return if !defined($audio);
2965
2966 my $audioproperties = parse_property_string($audio_fmt, $audio);
2967 my $audiodriver = $audioproperties->{driver} // 'spice';
2968
2969 return {
2970 dev => $audioproperties->{device},
2971 dev_id => "audiodev$id",
2972 backend => $audiodriver,
2973 backend_id => "$audiodriver-backend${id}",
2974 };
2975 }
2976
2977 sub audio_devs {
2978 my ($audio, $audiopciaddr, $machine_version) = @_;
2979
2980 my $devs = [];
2981
2982 my $id = $audio->{dev_id};
2983 my $audiodev = "";
2984 if (min_version($machine_version, 4, 2)) {
2985 $audiodev = ",audiodev=$audio->{backend_id}";
2986 }
2987
2988 if ($audio->{dev} eq 'AC97') {
2989 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
2990 } elsif ($audio->{dev} =~ /intel\-hda$/) {
2991 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
2992 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
2993 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
2994 } else {
2995 die "unkown audio device '$audio->{dev}', implement me!";
2996 }
2997
2998 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
2999
3000 return $devs;
3001 }
3002
3003 sub get_tpm_paths {
3004 my ($vmid) = @_;
3005 return {
3006 socket => "/var/run/qemu-server/$vmid.swtpm",
3007 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3008 };
3009 }
3010
3011 sub add_tpm_device {
3012 my ($vmid, $devices, $conf) = @_;
3013
3014 return if !$conf->{tpmstate0};
3015
3016 my $paths = get_tpm_paths($vmid);
3017
3018 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3019 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3020 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3021 }
3022
3023 sub start_swtpm {
3024 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3025
3026 return if !$tpmdrive;
3027
3028 my $state;
3029 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3030 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3031 if ($storeid) {
3032 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3033 } else {
3034 $state = $tpm->{file};
3035 }
3036
3037 my $paths = get_tpm_paths($vmid);
3038
3039 # during migration, we will get state from remote
3040 #
3041 if (!$migration) {
3042 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3043 my $setup_cmd = [
3044 "swtpm_setup",
3045 "--tpmstate",
3046 "file://$state",
3047 "--createek",
3048 "--create-ek-cert",
3049 "--create-platform-cert",
3050 "--lock-nvram",
3051 "--config",
3052 "/etc/swtpm_setup.conf", # do not use XDG configs
3053 "--runas",
3054 "0", # force creation as root, error if not possible
3055 "--not-overwrite", # ignore existing state, do not modify
3056 ];
3057
3058 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3059 # TPM 2.0 supports ECC crypto, use if possible
3060 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3061
3062 run_command($setup_cmd, outfunc => sub {
3063 print "swtpm_setup: $1\n";
3064 });
3065 }
3066
3067 my $emulator_cmd = [
3068 "swtpm",
3069 "socket",
3070 "--tpmstate",
3071 "backend-uri=file://$state,mode=0600",
3072 "--ctrl",
3073 "type=unixio,path=$paths->{socket},mode=0600",
3074 "--pid",
3075 "file=$paths->{pid}",
3076 "--terminate", # terminate on QEMU disconnect
3077 "--daemon",
3078 ];
3079 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3080 run_command($emulator_cmd, outfunc => sub { print $1; });
3081
3082 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3083 while (! -e $paths->{pid}) {
3084 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3085 usleep(50_000);
3086 }
3087
3088 # return untainted PID of swtpm daemon so it can be killed on error
3089 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3090 return $1;
3091 }
3092
3093 sub vga_conf_has_spice {
3094 my ($vga) = @_;
3095
3096 my $vgaconf = parse_vga($vga);
3097 my $vgatype = $vgaconf->{type};
3098 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3099
3100 return $1 || 1;
3101 }
3102
3103 sub is_native($) {
3104 my ($arch) = @_;
3105 return get_host_arch() eq $arch;
3106 }
3107
3108 sub get_vm_arch {
3109 my ($conf) = @_;
3110 return $conf->{arch} // get_host_arch();
3111 }
3112
3113 my $default_machines = {
3114 x86_64 => 'pc',
3115 aarch64 => 'virt',
3116 };
3117
3118 sub get_installed_machine_version {
3119 my ($kvmversion) = @_;
3120 $kvmversion = kvm_user_version() if !defined($kvmversion);
3121 $kvmversion =~ m/^(\d+\.\d+)/;
3122 return $1;
3123 }
3124
3125 sub windows_get_pinned_machine_version {
3126 my ($machine, $base_version, $kvmversion) = @_;
3127
3128 my $pin_version = $base_version;
3129 if (!defined($base_version) ||
3130 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3131 ) {
3132 $pin_version = get_installed_machine_version($kvmversion);
3133 }
3134 if (!$machine || $machine eq 'pc') {
3135 $machine = "pc-i440fx-$pin_version";
3136 } elsif ($machine eq 'q35') {
3137 $machine = "pc-q35-$pin_version";
3138 } elsif ($machine eq 'virt') {
3139 $machine = "virt-$pin_version";
3140 } else {
3141 warn "unknown machine type '$machine', not touching that!\n";
3142 }
3143
3144 return $machine;
3145 }
3146
3147 sub get_vm_machine {
3148 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3149
3150 my $machine = $forcemachine || $conf->{machine};
3151
3152 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3153 $kvmversion //= kvm_user_version();
3154 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3155 # layout which confuses windows quite a bit and may result in various regressions..
3156 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3157 if (windows_version($conf->{ostype})) {
3158 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3159 }
3160 $arch //= 'x86_64';
3161 $machine ||= $default_machines->{$arch};
3162 if ($add_pve_version) {
3163 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3164 $machine .= "+pve$pvever";
3165 }
3166 }
3167
3168 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3169 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3170 $machine = $1 if $is_pxe;
3171
3172 # for version-pinned machines that do not include a pve-version (e.g.
3173 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3174 $machine .= '+pve0';
3175
3176 $machine .= '.pxe' if $is_pxe;
3177 }
3178
3179 return $machine;
3180 }
3181
3182 sub get_ovmf_files($$$) {
3183 my ($arch, $efidisk, $smm) = @_;
3184
3185 my $types = $OVMF->{$arch}
3186 or die "no OVMF images known for architecture '$arch'\n";
3187
3188 my $type = 'default';
3189 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3190 $type = $smm ? "4m" : "4m-no-smm";
3191 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3192 }
3193
3194 return $types->{$type}->@*;
3195 }
3196
3197 my $Arch2Qemu = {
3198 aarch64 => '/usr/bin/qemu-system-aarch64',
3199 x86_64 => '/usr/bin/qemu-system-x86_64',
3200 };
3201 sub get_command_for_arch($) {
3202 my ($arch) = @_;
3203 return '/usr/bin/kvm' if is_native($arch);
3204
3205 my $cmd = $Arch2Qemu->{$arch}
3206 or die "don't know how to emulate architecture '$arch'\n";
3207 return $cmd;
3208 }
3209
3210 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3211 # to use in a QEMU command line (-cpu element), first array_intersect the result
3212 # of query_supported_ with query_understood_. This is necessary because:
3213 #
3214 # a) query_understood_ returns flags the host cannot use and
3215 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3216 # flags, but CPU settings - with most of them being flags. Those settings
3217 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3218 #
3219 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3220 # expensive. If you need the value returned from this, you can get it much
3221 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3222 # $accel being 'kvm' or 'tcg'.
3223 #
3224 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3225 # changes, automatically populating pmxcfs.
3226 #
3227 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3228 # since kvm and tcg machines support different flags
3229 #
3230 sub query_supported_cpu_flags {
3231 my ($arch) = @_;
3232
3233 $arch //= get_host_arch();
3234 my $default_machine = $default_machines->{$arch};
3235
3236 my $flags = {};
3237
3238 # FIXME: Once this is merged, the code below should work for ARM as well:
3239 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3240 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3241 $arch eq "aarch64";
3242
3243 my $kvm_supported = defined(kvm_version());
3244 my $qemu_cmd = get_command_for_arch($arch);
3245 my $fakevmid = -1;
3246 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3247
3248 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3249 my $query_supported_run_qemu = sub {
3250 my ($kvm) = @_;
3251
3252 my $flags = {};
3253 my $cmd = [
3254 $qemu_cmd,
3255 '-machine', $default_machine,
3256 '-display', 'none',
3257 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3258 '-mon', 'chardev=qmp,mode=control',
3259 '-pidfile', $pidfile,
3260 '-S', '-daemonize'
3261 ];
3262
3263 if (!$kvm) {
3264 push @$cmd, '-accel', 'tcg';
3265 }
3266
3267 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3268 die "QEMU flag querying VM exited with code " . $rc if $rc;
3269
3270 eval {
3271 my $cmd_result = mon_cmd(
3272 $fakevmid,
3273 'query-cpu-model-expansion',
3274 type => 'full',
3275 model => { name => 'host' }
3276 );
3277
3278 my $props = $cmd_result->{model}->{props};
3279 foreach my $prop (keys %$props) {
3280 next if $props->{$prop} ne '1';
3281 # QEMU returns some flags multiple times, with '_', '.' or '-'
3282 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3283 # We only keep those with underscores, to match /proc/cpuinfo
3284 $prop =~ s/\.|-/_/g;
3285 $flags->{$prop} = 1;
3286 }
3287 };
3288 my $err = $@;
3289
3290 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3291 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3292
3293 die $err if $err;
3294
3295 return [ sort keys %$flags ];
3296 };
3297
3298 # We need to query QEMU twice, since KVM and TCG have different supported flags
3299 PVE::QemuConfig->lock_config($fakevmid, sub {
3300 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3301 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3302
3303 if ($kvm_supported) {
3304 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3305 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3306 }
3307 });
3308
3309 return $flags;
3310 }
3311
3312 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3313 my $understood_cpu_flag_dir = "/usr/share/kvm";
3314 sub query_understood_cpu_flags {
3315 my $arch = get_host_arch();
3316 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3317
3318 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3319 if ! -e $filepath;
3320
3321 my $raw = file_get_contents($filepath);
3322 $raw =~ s/^\s+|\s+$//g;
3323 my @flags = split(/\s+/, $raw);
3324
3325 return \@flags;
3326 }
3327
3328 my sub get_cpuunits {
3329 my ($conf) = @_;
3330 return $conf->{cpuunits} // (PVE::CGroup::cgroup_mode() == 2 ? 100 : 1024);
3331 }
3332 sub config_to_command {
3333 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3334 $pbs_backing) = @_;
3335
3336 my $cmd = [];
3337 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3338 my $devices = [];
3339 my $bridges = {};
3340 my $ostype = $conf->{ostype};
3341 my $winversion = windows_version($ostype);
3342 my $kvm = $conf->{kvm};
3343 my $nodename = nodename();
3344
3345 my $arch = get_vm_arch($conf);
3346 my $kvm_binary = get_command_for_arch($arch);
3347 my $kvmver = kvm_user_version($kvm_binary);
3348
3349 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3350 $kvmver //= "undefined";
3351 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3352 }
3353
3354 my $add_pve_version = min_version($kvmver, 4, 1);
3355
3356 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3357 my $machine_version = extract_version($machine_type, $kvmver);
3358 $kvm //= 1 if is_native($arch);
3359
3360 $machine_version =~ m/(\d+)\.(\d+)/;
3361 my ($machine_major, $machine_minor) = ($1, $2);
3362
3363 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3364 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3365 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3366 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3367 ." please upgrade node '$nodename'\n"
3368 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3369 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3370 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3371 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3372 ." node '$nodename'\n";
3373 }
3374
3375 # if a specific +pve version is required for a feature, use $version_guard
3376 # instead of min_version to allow machines to be run with the minimum
3377 # required version
3378 my $required_pve_version = 0;
3379 my $version_guard = sub {
3380 my ($major, $minor, $pve) = @_;
3381 return 0 if !min_version($machine_version, $major, $minor, $pve);
3382 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3383 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3384 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3385 return 1;
3386 };
3387
3388 if ($kvm && !defined kvm_version()) {
3389 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3390 ." or enable in BIOS.\n";
3391 }
3392
3393 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3394 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3395 my $use_old_bios_files = undef;
3396 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3397
3398 my $cpuunits = get_cpuunits($conf);
3399
3400 push @$cmd, $kvm_binary;
3401
3402 push @$cmd, '-id', $vmid;
3403
3404 my $vmname = $conf->{name} || "vm$vmid";
3405
3406 push @$cmd, '-name', $vmname;
3407
3408 push @$cmd, '-no-shutdown';
3409
3410 my $use_virtio = 0;
3411
3412 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3413 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3414 push @$cmd, '-mon', "chardev=qmp,mode=control";
3415
3416 if (min_version($machine_version, 2, 12)) {
3417 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3418 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3419 }
3420
3421 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3422
3423 push @$cmd, '-daemonize';
3424
3425 if ($conf->{smbios1}) {
3426 my $smbios_conf = parse_smbios1($conf->{smbios1});
3427 if ($smbios_conf->{base64}) {
3428 # Do not pass base64 flag to qemu
3429 delete $smbios_conf->{base64};
3430 my $smbios_string = "";
3431 foreach my $key (keys %$smbios_conf) {
3432 my $value;
3433 if ($key eq "uuid") {
3434 $value = $smbios_conf->{uuid}
3435 } else {
3436 $value = decode_base64($smbios_conf->{$key});
3437 }
3438 # qemu accepts any binary data, only commas need escaping by double comma
3439 $value =~ s/,/,,/g;
3440 $smbios_string .= "," . $key . "=" . $value if $value;
3441 }
3442 push @$cmd, '-smbios', "type=1" . $smbios_string;
3443 } else {
3444 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3445 }
3446 }
3447
3448 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3449 my $d;
3450 if (my $efidisk = $conf->{efidisk0}) {
3451 $d = parse_drive('efidisk0', $efidisk);
3452 }
3453
3454 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3455 die "uefi base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3456
3457 my ($path, $format);
3458 my $read_only_str = '';
3459 if ($d) {
3460 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3461 $format = $d->{format};
3462 if ($storeid) {
3463 $path = PVE::Storage::path($storecfg, $d->{file});
3464 if (!defined($format)) {
3465 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3466 $format = qemu_img_format($scfg, $volname);
3467 }
3468 } else {
3469 $path = $d->{file};
3470 die "efidisk format must be specified\n"
3471 if !defined($format);
3472 }
3473
3474 $read_only_str = ',readonly=on' if drive_is_read_only($conf, $d);
3475 } else {
3476 warn "no efidisk configured! Using temporary efivars disk.\n";
3477 $path = "/tmp/$vmid-ovmf.fd";
3478 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3479 $format = 'raw';
3480 }
3481
3482 my $size_str = "";
3483
3484 if ($format eq 'raw' && $version_guard->(4, 1, 2)) {
3485 $size_str = ",size=" . (-s $ovmf_vars);
3486 }
3487
3488 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3489 my $cache = "";
3490 if ($path =~ m/^rbd:/) {
3491 $cache = ',cache=writeback';
3492 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3493 }
3494
3495 push @$cmd, '-drive', "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code";
3496 push @$cmd, '-drive', "if=pflash,unit=1$cache,format=$format,id=drive-efidisk0$size_str,file=${path}${read_only_str}";
3497 }
3498
3499 if ($q35) { # tell QEMU to load q35 config early
3500 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3501 if (min_version($machine_version, 4, 0)) {
3502 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3503 } else {
3504 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3505 }
3506 }
3507
3508 if ($conf->{vmgenid}) {
3509 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3510 }
3511
3512 # add usb controllers
3513 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3514 $conf, $bridges, $arch, $machine_type, $usbdesc->{format}, $MAX_USB_DEVICES);
3515 push @$devices, @usbcontrollers if @usbcontrollers;
3516 my $vga = parse_vga($conf->{vga});
3517
3518 my $qxlnum = vga_conf_has_spice($conf->{vga});
3519 $vga->{type} = 'qxl' if $qxlnum;
3520
3521 if (!$vga->{type}) {
3522 if ($arch eq 'aarch64') {
3523 $vga->{type} = 'virtio';
3524 } elsif (min_version($machine_version, 2, 9)) {
3525 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3526 } else {
3527 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3528 }
3529 }
3530
3531 # enable absolute mouse coordinates (needed by vnc)
3532 my $tablet = $conf->{tablet};
3533 if (!defined($tablet)) {
3534 $tablet = $defaults->{tablet};
3535 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3536 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3537 }
3538
3539 if ($tablet) {
3540 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3541 my $kbd = print_keyboarddevice_full($conf, $arch);
3542 push @$devices, '-device', $kbd if defined($kbd);
3543 }
3544
3545 my $bootorder = device_bootorder($conf);
3546
3547 # host pci device passthrough
3548 my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices(
3549 $vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder);
3550
3551 # usb devices
3552 my $usb_dev_features = {};
3553 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3554
3555 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3556 $conf, $usbdesc->{format}, $MAX_USB_DEVICES, $usb_dev_features, $bootorder);
3557 push @$devices, @usbdevices if @usbdevices;
3558
3559 # serial devices
3560 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3561 my $path = $conf->{"serial$i"} or next;
3562 if ($path eq 'socket') {
3563 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3564 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3565 # On aarch64, serial0 is the UART device. Qemu only allows
3566 # connecting UART devices via the '-serial' command line, as
3567 # the device has a fixed slot on the hardware...
3568 if ($arch eq 'aarch64' && $i == 0) {
3569 push @$devices, '-serial', "chardev:serial$i";
3570 } else {
3571 push @$devices, '-device', "isa-serial,chardev=serial$i";
3572 }
3573 } else {
3574 die "no such serial device\n" if ! -c $path;
3575 push @$devices, '-chardev', "tty,id=serial$i,path=$path";
3576 push @$devices, '-device', "isa-serial,chardev=serial$i";
3577 }
3578 }
3579
3580 # parallel devices
3581 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3582 if (my $path = $conf->{"parallel$i"}) {
3583 die "no such parallel device\n" if ! -c $path;
3584 my $devtype = $path =~ m!^/dev/usb/lp! ? 'tty' : 'parport';
3585 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3586 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3587 }
3588 }
3589
3590 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3591 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3592 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3593 push @$devices, @$audio_devs;
3594 }
3595
3596 add_tpm_device($vmid, $devices, $conf);
3597
3598 my $sockets = 1;
3599 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3600 $sockets = $conf->{sockets} if $conf->{sockets};
3601
3602 my $cores = $conf->{cores} || 1;
3603
3604 my $maxcpus = $sockets * $cores;
3605
3606 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3607
3608 my $allowed_vcpus = $cpuinfo->{cpus};
3609
3610 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3611
3612 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3613 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3614 for (my $i = 2; $i <= $vcpus; $i++) {
3615 my $cpustr = print_cpu_device($conf,$i);
3616 push @$cmd, '-device', $cpustr;
3617 }
3618
3619 } else {
3620
3621 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3622 }
3623 push @$cmd, '-nodefaults';
3624
3625 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3626
3627 push @$cmd, '-no-acpi' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3628
3629 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3630
3631 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3632 push @$devices, '-device', print_vga_device(
3633 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3634 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3635 push @$cmd, '-vnc', "unix:$socket,password=on";
3636 } else {
3637 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3638 push @$cmd, '-nographic';
3639 }
3640
3641 # time drift fix
3642 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3643 my $useLocaltime = $conf->{localtime};
3644
3645 if ($winversion >= 5) { # windows
3646 $useLocaltime = 1 if !defined($conf->{localtime});
3647
3648 # use time drift fix when acpi is enabled
3649 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3650 $tdf = 1 if !defined($conf->{tdf});
3651 }
3652 }
3653
3654 if ($winversion >= 6) {
3655 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3656 push @$cmd, '-no-hpet';
3657 }
3658
3659 push @$rtcFlags, 'driftfix=slew' if $tdf;
3660
3661 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3662 push @$rtcFlags, "base=$conf->{startdate}";
3663 } elsif ($useLocaltime) {
3664 push @$rtcFlags, 'base=localtime';
3665 }
3666
3667 if ($forcecpu) {
3668 push @$cmd, '-cpu', $forcecpu;
3669 } else {
3670 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3671 }
3672
3673 PVE::QemuServer::Memory::config($conf, $vmid, $sockets, $cores, $defaults, $hotplug_features, $cmd);
3674
3675 push @$cmd, '-S' if $conf->{freeze};
3676
3677 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3678
3679 my $guest_agent = parse_guest_agent($conf);
3680
3681 if ($guest_agent->{enabled}) {
3682 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3683 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3684
3685 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3686 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3687 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3688 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3689 } elsif ($guest_agent->{type} eq 'isa') {
3690 push @$devices, '-device', "isa-serial,chardev=qga0";
3691 }
3692 }
3693
3694 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3695 if ($rng && $version_guard->(4, 1, 2)) {
3696 check_rng_source($rng->{source});
3697
3698 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3699 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3700 my $limiter_str = "";
3701 if ($max_bytes) {
3702 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3703 }
3704
3705 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3706 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3707 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3708 }
3709
3710 my $spice_port;
3711
3712 if ($qxlnum) {
3713 if ($qxlnum > 1) {
3714 if ($winversion){
3715 for (my $i = 1; $i < $qxlnum; $i++){
3716 push @$devices, '-device', print_vga_device(
3717 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3718 }
3719 } else {
3720 # assume other OS works like Linux
3721 my ($ram, $vram) = ("134217728", "67108864");
3722 if ($vga->{memory}) {
3723 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3724 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3725 }
3726 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3727 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3728 }
3729 }
3730
3731 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3732
3733 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3734 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3735 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3736
3737 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3738 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3739 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3740
3741 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3742 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3743
3744 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3745 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3746 if ($spice_enhancement->{foldersharing}) {
3747 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3748 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3749 }
3750
3751 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3752 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3753 if $spice_enhancement->{videostreaming};
3754
3755 push @$devices, '-spice', "$spice_opts";
3756 }
3757
3758 # enable balloon by default, unless explicitly disabled
3759 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3760 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3761 push @$devices, '-device', "virtio-balloon-pci,id=balloon0$pciaddr";
3762 }
3763
3764 if ($conf->{watchdog}) {
3765 my $wdopts = parse_watchdog($conf->{watchdog});
3766 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3767 my $watchdog = $wdopts->{model} || 'i6300esb';
3768 push @$devices, '-device', "$watchdog$pciaddr";
3769 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3770 }
3771
3772 my $vollist = [];
3773 my $scsicontroller = {};
3774 my $ahcicontroller = {};
3775 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3776
3777 # Add iscsi initiator name if available
3778 if (my $initiator = get_initiator_name()) {
3779 push @$devices, '-iscsi', "initiator-name=$initiator";
3780 }
3781
3782 PVE::QemuConfig->foreach_volume($conf, sub {
3783 my ($ds, $drive) = @_;
3784
3785 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3786 check_volume_storage_type($storecfg, $drive->{file});
3787 push @$vollist, $drive->{file};
3788 }
3789
3790 # ignore efidisk here, already added in bios/fw handling code above
3791 return if $drive->{interface} eq 'efidisk';
3792 # similar for TPM
3793 return if $drive->{interface} eq 'tpmstate';
3794
3795 $use_virtio = 1 if $ds =~ m/^virtio/;
3796
3797 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3798
3799 if ($drive->{interface} eq 'virtio'){
3800 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3801 }
3802
3803 if ($drive->{interface} eq 'scsi') {
3804
3805 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3806
3807 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3808 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3809
3810 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3811 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3812
3813 my $iothread = '';
3814 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3815 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3816 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3817 } elsif ($drive->{iothread}) {
3818 warn "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n";
3819 }
3820
3821 my $queues = '';
3822 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3823 $queues = ",num_queues=$drive->{queues}";
3824 }
3825
3826 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3827 if !$scsicontroller->{$controller};
3828 $scsicontroller->{$controller}=1;
3829 }
3830
3831 if ($drive->{interface} eq 'sata') {
3832 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3833 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3834 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3835 if !$ahcicontroller->{$controller};
3836 $ahcicontroller->{$controller}=1;
3837 }
3838
3839 my $pbs_conf = $pbs_backing->{$ds};
3840 my $pbs_name = undef;
3841 if ($pbs_conf) {
3842 $pbs_name = "drive-$ds-pbs";
3843 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
3844 }
3845
3846 my $drive_cmd = print_drive_commandline_full(
3847 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3848
3849 # extra protection for templates, but SATA and IDE don't support it..
3850 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3851
3852 push @$devices, '-drive',$drive_cmd;
3853 push @$devices, '-device', print_drivedevice_full(
3854 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
3855 });
3856
3857 for (my $i = 0; $i < $MAX_NETS; $i++) {
3858 my $netname = "net$i";
3859
3860 next if !$conf->{$netname};
3861 my $d = parse_net($conf->{$netname});
3862 next if !$d;
3863
3864 $use_virtio = 1 if $d->{model} eq 'virtio';
3865
3866 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
3867
3868 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
3869 push @$devices, '-netdev', $netdevfull;
3870
3871 my $netdevicefull = print_netdevice_full(
3872 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type);
3873
3874 push @$devices, '-device', $netdevicefull;
3875 }
3876
3877 if ($conf->{ivshmem}) {
3878 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
3879
3880 my $bus;
3881 if ($q35) {
3882 $bus = print_pcie_addr("ivshmem");
3883 } else {
3884 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
3885 }
3886
3887 my $ivshmem_name = $ivshmem->{name} // $vmid;
3888 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
3889
3890 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
3891 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
3892 .",size=$ivshmem->{size}M";
3893 }
3894
3895 # pci.4 is nested in pci.1
3896 $bridges->{1} = 1 if $bridges->{4};
3897
3898 if (!$q35) { # add pci bridges
3899 if (min_version($machine_version, 2, 3)) {
3900 $bridges->{1} = 1;
3901 $bridges->{2} = 1;
3902 }
3903 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
3904 }
3905
3906 for my $k (sort {$b cmp $a} keys %$bridges) {
3907 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
3908
3909 my $k_name = $k;
3910 if ($k == 2 && $legacy_igd) {
3911 $k_name = "$k-igd";
3912 }
3913 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
3914 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3915
3916 if ($q35) { # add after -readconfig pve-q35.cfg
3917 splice @$devices, 2, 0, '-device', $devstr;
3918 } else {
3919 unshift @$devices, '-device', $devstr if $k > 0;
3920 }
3921 }
3922
3923 if (!$kvm) {
3924 push @$machineFlags, 'accel=tcg';
3925 }
3926
3927 my $machine_type_min = $machine_type;
3928 if ($add_pve_version) {
3929 $machine_type_min =~ s/\+pve\d+$//;
3930 $machine_type_min .= "+pve$required_pve_version";
3931 }
3932 push @$machineFlags, "type=${machine_type_min}";
3933
3934 push @$cmd, @$devices;
3935 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
3936 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
3937 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
3938
3939 if (my $vmstate = $conf->{vmstate}) {
3940 my $statepath = PVE::Storage::path($storecfg, $vmstate);
3941 push @$vollist, $vmstate;
3942 push @$cmd, '-loadstate', $statepath;
3943 print "activating and using '$vmstate' as vmstate\n";
3944 }
3945
3946 if (PVE::QemuConfig->is_template($conf)) {
3947 # needed to workaround base volumes being read-only
3948 push @$cmd, '-snapshot';
3949 }
3950
3951 # add custom args
3952 if ($conf->{args}) {
3953 my $aa = PVE::Tools::split_args($conf->{args});
3954 push @$cmd, @$aa;
3955 }
3956
3957 return wantarray ? ($cmd, $vollist, $spice_port) : $cmd;
3958 }
3959
3960 sub check_rng_source {
3961 my ($source) = @_;
3962
3963 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
3964 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
3965 if ! -e $source;
3966
3967 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
3968 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
3969 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
3970 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
3971 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
3972 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
3973 ." to the host.\n";
3974 }
3975 }
3976
3977 sub spice_port {
3978 my ($vmid) = @_;
3979
3980 my $res = mon_cmd($vmid, 'query-spice');
3981
3982 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
3983 }
3984
3985 sub vm_devices_list {
3986 my ($vmid) = @_;
3987
3988 my $res = mon_cmd($vmid, 'query-pci');
3989 my $devices_to_check = [];
3990 my $devices = {};
3991 foreach my $pcibus (@$res) {
3992 push @$devices_to_check, @{$pcibus->{devices}},
3993 }
3994
3995 while (@$devices_to_check) {
3996 my $to_check = [];
3997 for my $d (@$devices_to_check) {
3998 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
3999 next if !$d->{'pci_bridge'};
4000
4001 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4002 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4003 }
4004 $devices_to_check = $to_check;
4005 }
4006
4007 my $resblock = mon_cmd($vmid, 'query-block');
4008 foreach my $block (@$resblock) {
4009 if($block->{device} =~ m/^drive-(\S+)/){
4010 $devices->{$1} = 1;
4011 }
4012 }
4013
4014 my $resmice = mon_cmd($vmid, 'query-mice');
4015 foreach my $mice (@$resmice) {
4016 if ($mice->{name} eq 'QEMU HID Tablet') {
4017 $devices->{tablet} = 1;
4018 last;
4019 }
4020 }
4021
4022 # for usb devices there is no query-usb
4023 # but we can iterate over the entries in
4024 # qom-list path=/machine/peripheral
4025 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4026 foreach my $per (@$resperipheral) {
4027 if ($per->{name} =~ m/^usb\d+$/) {
4028 $devices->{$per->{name}} = 1;
4029 }
4030 }
4031
4032 return $devices;
4033 }
4034
4035 sub vm_deviceplug {
4036 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4037
4038 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4039
4040 my $devices_list = vm_devices_list($vmid);
4041 return 1 if defined($devices_list->{$deviceid});
4042
4043 # add PCI bridge if we need it for the device
4044 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4045
4046 if ($deviceid eq 'tablet') {
4047 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4048 } elsif ($deviceid eq 'keyboard') {
4049 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4050 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4051 die "usb hotplug currently not reliable\n";
4052 # since we can't reliably hot unplug all added usb devices and usb
4053 # passthrough breaks live migration we disable usb hotplugging for now
4054 #qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device));
4055 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4056 qemu_iothread_add($vmid, $deviceid, $device);
4057
4058 qemu_driveadd($storecfg, $vmid, $device);
4059 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4060
4061 qemu_deviceadd($vmid, $devicefull);
4062 eval { qemu_deviceaddverify($vmid, $deviceid); };
4063 if (my $err = $@) {
4064 eval { qemu_drivedel($vmid, $deviceid); };
4065 warn $@ if $@;
4066 die $err;
4067 }
4068 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4069 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4070 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4071 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4072
4073 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4074
4075 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4076 qemu_iothread_add($vmid, $deviceid, $device);
4077 $devicefull .= ",iothread=iothread-$deviceid";
4078 }
4079
4080 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4081 $devicefull .= ",num_queues=$device->{queues}";
4082 }
4083
4084 qemu_deviceadd($vmid, $devicefull);
4085 qemu_deviceaddverify($vmid, $deviceid);
4086 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4087 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4088 qemu_driveadd($storecfg, $vmid, $device);
4089
4090 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4091 eval { qemu_deviceadd($vmid, $devicefull); };
4092 if (my $err = $@) {
4093 eval { qemu_drivedel($vmid, $deviceid); };
4094 warn $@ if $@;
4095 die $err;
4096 }
4097 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4098 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4099
4100 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4101 my $use_old_bios_files = undef;
4102 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4103
4104 my $netdevicefull = print_netdevice_full(
4105 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type);
4106 qemu_deviceadd($vmid, $netdevicefull);
4107 eval {
4108 qemu_deviceaddverify($vmid, $deviceid);
4109 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4110 };
4111 if (my $err = $@) {
4112 eval { qemu_netdevdel($vmid, $deviceid); };
4113 warn $@ if $@;
4114 die $err;
4115 }
4116 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4117 my $bridgeid = $2;
4118 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4119 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4120
4121 qemu_deviceadd($vmid, $devicefull);
4122 qemu_deviceaddverify($vmid, $deviceid);
4123 } else {
4124 die "can't hotplug device '$deviceid'\n";
4125 }
4126
4127 return 1;
4128 }
4129
4130 # fixme: this should raise exceptions on error!
4131 sub vm_deviceunplug {
4132 my ($vmid, $conf, $deviceid) = @_;
4133
4134 my $devices_list = vm_devices_list($vmid);
4135 return 1 if !defined($devices_list->{$deviceid});
4136
4137 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4138 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4139
4140 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard') {
4141 qemu_devicedel($vmid, $deviceid);
4142 } elsif ($deviceid =~ m/^usb\d+$/) {
4143 die "usb hotplug currently not reliable\n";
4144 # when unplugging usb devices this way, there may be remaining usb
4145 # controllers/hubs so we disable it for now
4146 #qemu_devicedel($vmid, $deviceid);
4147 #qemu_devicedelverify($vmid, $deviceid);
4148 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4149 my $device = parse_drive($deviceid, $conf->{$deviceid});
4150
4151 qemu_devicedel($vmid, $deviceid);
4152 qemu_devicedelverify($vmid, $deviceid);
4153 qemu_drivedel($vmid, $deviceid);
4154 qemu_iothread_del($vmid, $deviceid, $device);
4155 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4156 qemu_devicedel($vmid, $deviceid);
4157 qemu_devicedelverify($vmid, $deviceid);
4158 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4159 my $device = parse_drive($deviceid, $conf->{$deviceid});
4160
4161 qemu_devicedel($vmid, $deviceid);
4162 qemu_drivedel($vmid, $deviceid);
4163 qemu_deletescsihw($conf, $vmid, $deviceid);
4164
4165 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4166 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4167 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4168 qemu_devicedel($vmid, $deviceid);
4169 qemu_devicedelverify($vmid, $deviceid);
4170 qemu_netdevdel($vmid, $deviceid);
4171 } else {
4172 die "can't unplug device '$deviceid'\n";
4173 }
4174
4175 return 1;
4176 }
4177
4178 sub qemu_deviceadd {
4179 my ($vmid, $devicefull) = @_;
4180
4181 $devicefull = "driver=".$devicefull;
4182 my %options = split(/[=,]/, $devicefull);
4183
4184 mon_cmd($vmid, "device_add" , %options);
4185 }
4186
4187 sub qemu_devicedel {
4188 my ($vmid, $deviceid) = @_;
4189
4190 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4191 }
4192
4193 sub qemu_iothread_add {
4194 my ($vmid, $deviceid, $device) = @_;
4195
4196 if ($device->{iothread}) {
4197 my $iothreads = vm_iothreads_list($vmid);
4198 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4199 }
4200 }
4201
4202 sub qemu_iothread_del {
4203 my ($vmid, $deviceid, $device) = @_;
4204
4205 if ($device->{iothread}) {
4206 my $iothreads = vm_iothreads_list($vmid);
4207 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4208 }
4209 }
4210
4211 sub qemu_objectadd {
4212 my ($vmid, $objectid, $qomtype) = @_;
4213
4214 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4215
4216 return 1;
4217 }
4218
4219 sub qemu_objectdel {
4220 my ($vmid, $objectid) = @_;
4221
4222 mon_cmd($vmid, "object-del", id => $objectid);
4223
4224 return 1;
4225 }
4226
4227 sub qemu_driveadd {
4228 my ($storecfg, $vmid, $device) = @_;
4229
4230 my $kvmver = get_running_qemu_version($vmid);
4231 my $io_uring = min_version($kvmver, 6, 0);
4232 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4233 $drive =~ s/\\/\\\\/g;
4234 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4235
4236 # If the command succeeds qemu prints: "OK"
4237 return 1 if $ret =~ m/OK/s;
4238
4239 die "adding drive failed: $ret\n";
4240 }
4241
4242 sub qemu_drivedel {
4243 my ($vmid, $deviceid) = @_;
4244
4245 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4246 $ret =~ s/^\s+//;
4247
4248 return 1 if $ret eq "";
4249
4250 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4251 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4252
4253 die "deleting drive $deviceid failed : $ret\n";
4254 }
4255
4256 sub qemu_deviceaddverify {
4257 my ($vmid, $deviceid) = @_;
4258
4259 for (my $i = 0; $i <= 5; $i++) {
4260 my $devices_list = vm_devices_list($vmid);
4261 return 1 if defined($devices_list->{$deviceid});
4262 sleep 1;
4263 }
4264
4265 die "error on hotplug device '$deviceid'\n";
4266 }
4267
4268
4269 sub qemu_devicedelverify {
4270 my ($vmid, $deviceid) = @_;
4271
4272 # need to verify that the device is correctly removed as device_del
4273 # is async and empty return is not reliable
4274
4275 for (my $i = 0; $i <= 5; $i++) {
4276 my $devices_list = vm_devices_list($vmid);
4277 return 1 if !defined($devices_list->{$deviceid});
4278 sleep 1;
4279 }
4280
4281 die "error on hot-unplugging device '$deviceid'\n";
4282 }
4283
4284 sub qemu_findorcreatescsihw {
4285 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4286
4287 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4288
4289 my $scsihwid="$controller_prefix$controller";
4290 my $devices_list = vm_devices_list($vmid);
4291
4292 if (!defined($devices_list->{$scsihwid})) {
4293 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4294 }
4295
4296 return 1;
4297 }
4298
4299 sub qemu_deletescsihw {
4300 my ($conf, $vmid, $opt) = @_;
4301
4302 my $device = parse_drive($opt, $conf->{$opt});
4303
4304 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4305 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4306 return 1;
4307 }
4308
4309 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4310
4311 my $devices_list = vm_devices_list($vmid);
4312 foreach my $opt (keys %{$devices_list}) {
4313 if (is_valid_drivename($opt)) {
4314 my $drive = parse_drive($opt, $conf->{$opt});
4315 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4316 return 1;
4317 }
4318 }
4319 }
4320
4321 my $scsihwid="scsihw$controller";
4322
4323 vm_deviceunplug($vmid, $conf, $scsihwid);
4324
4325 return 1;
4326 }
4327
4328 sub qemu_add_pci_bridge {
4329 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4330
4331 my $bridges = {};
4332
4333 my $bridgeid;
4334
4335 print_pci_addr($device, $bridges, $arch, $machine_type);
4336
4337 while (my ($k, $v) = each %$bridges) {
4338 $bridgeid = $k;
4339 }
4340 return 1 if !defined($bridgeid) || $bridgeid < 1;
4341
4342 my $bridge = "pci.$bridgeid";
4343 my $devices_list = vm_devices_list($vmid);
4344
4345 if (!defined($devices_list->{$bridge})) {
4346 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4347 }
4348
4349 return 1;
4350 }
4351
4352 sub qemu_set_link_status {
4353 my ($vmid, $device, $up) = @_;
4354
4355 mon_cmd($vmid, "set_link", name => $device,
4356 up => $up ? JSON::true : JSON::false);
4357 }
4358
4359 sub qemu_netdevadd {
4360 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4361
4362 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4363 my %options = split(/[=,]/, $netdev);
4364
4365 if (defined(my $vhost = $options{vhost})) {
4366 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4367 }
4368
4369 if (defined(my $queues = $options{queues})) {
4370 $options{queues} = $queues + 0;
4371 }
4372
4373 mon_cmd($vmid, "netdev_add", %options);
4374 return 1;
4375 }
4376
4377 sub qemu_netdevdel {
4378 my ($vmid, $deviceid) = @_;
4379
4380 mon_cmd($vmid, "netdev_del", id => $deviceid);
4381 }
4382
4383 sub qemu_usb_hotplug {
4384 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4385
4386 return if !$device;
4387
4388 # remove the old one first
4389 vm_deviceunplug($vmid, $conf, $deviceid);
4390
4391 # check if xhci controller is necessary and available
4392 if ($device->{usb3}) {
4393
4394 my $devicelist = vm_devices_list($vmid);
4395
4396 if (!$devicelist->{xhci}) {
4397 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4398 qemu_deviceadd($vmid, "nec-usb-xhci,id=xhci$pciaddr");
4399 }
4400 }
4401 my $d = parse_usb_device($device->{host});
4402 $d->{usb3} = $device->{usb3};
4403
4404 # add the new one
4405 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $d, $arch, $machine_type);
4406 }
4407
4408 sub qemu_cpu_hotplug {
4409 my ($vmid, $conf, $vcpus) = @_;
4410
4411 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4412
4413 my $sockets = 1;
4414 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4415 $sockets = $conf->{sockets} if $conf->{sockets};
4416 my $cores = $conf->{cores} || 1;
4417 my $maxcpus = $sockets * $cores;
4418
4419 $vcpus = $maxcpus if !$vcpus;
4420
4421 die "you can't add more vcpus than maxcpus\n"
4422 if $vcpus > $maxcpus;
4423
4424 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4425
4426 if ($vcpus < $currentvcpus) {
4427
4428 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4429
4430 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4431 qemu_devicedel($vmid, "cpu$i");
4432 my $retry = 0;
4433 my $currentrunningvcpus = undef;
4434 while (1) {
4435 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4436 last if scalar(@{$currentrunningvcpus}) == $i-1;
4437 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4438 $retry++;
4439 sleep 1;
4440 }
4441 #update conf after each succesfull cpu unplug
4442 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4443 PVE::QemuConfig->write_config($vmid, $conf);
4444 }
4445 } else {
4446 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4447 }
4448
4449 return;
4450 }
4451
4452 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4453 die "vcpus in running vm does not match its configuration\n"
4454 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4455
4456 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4457
4458 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4459 my $cpustr = print_cpu_device($conf, $i);
4460 qemu_deviceadd($vmid, $cpustr);
4461
4462 my $retry = 0;
4463 my $currentrunningvcpus = undef;
4464 while (1) {
4465 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4466 last if scalar(@{$currentrunningvcpus}) == $i;
4467 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4468 sleep 1;
4469 $retry++;
4470 }
4471 #update conf after each succesfull cpu hotplug
4472 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4473 PVE::QemuConfig->write_config($vmid, $conf);
4474 }
4475 } else {
4476
4477 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4478 mon_cmd($vmid, "cpu-add", id => int($i));
4479 }
4480 }
4481 }
4482
4483 sub qemu_block_set_io_throttle {
4484 my ($vmid, $deviceid,
4485 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4486 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4487 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4488 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4489
4490 return if !check_running($vmid) ;
4491
4492 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4493 bps => int($bps),
4494 bps_rd => int($bps_rd),
4495 bps_wr => int($bps_wr),
4496 iops => int($iops),
4497 iops_rd => int($iops_rd),
4498 iops_wr => int($iops_wr),
4499 bps_max => int($bps_max),
4500 bps_rd_max => int($bps_rd_max),
4501 bps_wr_max => int($bps_wr_max),
4502 iops_max => int($iops_max),
4503 iops_rd_max => int($iops_rd_max),
4504 iops_wr_max => int($iops_wr_max),
4505 bps_max_length => int($bps_max_length),
4506 bps_rd_max_length => int($bps_rd_max_length),
4507 bps_wr_max_length => int($bps_wr_max_length),
4508 iops_max_length => int($iops_max_length),
4509 iops_rd_max_length => int($iops_rd_max_length),
4510 iops_wr_max_length => int($iops_wr_max_length),
4511 );
4512
4513 }
4514
4515 sub qemu_block_resize {
4516 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4517
4518 my $running = check_running($vmid);
4519
4520 $size = 0 if !PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4521
4522 return if !$running;
4523
4524 my $padding = (1024 - $size % 1024) % 1024;
4525 $size = $size + $padding;
4526
4527 mon_cmd(
4528 $vmid,
4529 "block_resize",
4530 device => $deviceid,
4531 size => int($size),
4532 timeout => 60,
4533 );
4534 }
4535
4536 sub qemu_volume_snapshot {
4537 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4538
4539 my $running = check_running($vmid);
4540
4541 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4542 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4543 } else {
4544 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4545 }
4546 }
4547
4548 sub qemu_volume_snapshot_delete {
4549 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4550
4551 my $running = check_running($vmid);
4552
4553 if($running) {
4554
4555 $running = undef;
4556 my $conf = PVE::QemuConfig->load_config($vmid);
4557 PVE::QemuConfig->foreach_volume($conf, sub {
4558 my ($ds, $drive) = @_;
4559 $running = 1 if $drive->{file} eq $volid;
4560 });
4561 }
4562
4563 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4564 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4565 } else {
4566 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4567 }
4568 }
4569
4570 sub set_migration_caps {
4571 my ($vmid, $savevm) = @_;
4572
4573 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4574
4575 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4576 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4577
4578 my $cap_ref = [];
4579
4580 my $enabled_cap = {
4581 "auto-converge" => 1,
4582 "xbzrle" => 1,
4583 "x-rdma-pin-all" => 0,
4584 "zero-blocks" => 0,
4585 "compress" => 0,
4586 "dirty-bitmaps" => $dirty_bitmaps,
4587 };
4588
4589 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4590
4591 for my $supported_capability (@$supported_capabilities) {
4592 push @$cap_ref, {
4593 capability => $supported_capability->{capability},
4594 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4595 };
4596 }
4597
4598 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4599 }
4600
4601 sub foreach_volid {
4602 my ($conf, $func, @param) = @_;
4603
4604 my $volhash = {};
4605
4606 my $test_volid = sub {
4607 my ($key, $drive, $snapname) = @_;
4608
4609 my $volid = $drive->{file};
4610 return if !$volid;
4611
4612 $volhash->{$volid}->{cdrom} //= 1;
4613 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4614
4615 my $replicate = $drive->{replicate} // 1;
4616 $volhash->{$volid}->{replicate} //= 0;
4617 $volhash->{$volid}->{replicate} = 1 if $replicate;
4618
4619 $volhash->{$volid}->{shared} //= 0;
4620 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4621
4622 $volhash->{$volid}->{referenced_in_config} //= 0;
4623 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4624
4625 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4626 if defined($snapname);
4627
4628 my $size = $drive->{size};
4629 $volhash->{$volid}->{size} //= $size if $size;
4630
4631 $volhash->{$volid}->{is_vmstate} //= 0;
4632 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4633
4634 $volhash->{$volid}->{is_tpmstate} //= 0;
4635 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4636
4637 $volhash->{$volid}->{is_unused} //= 0;
4638 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4639
4640 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4641 };
4642
4643 my $include_opts = {
4644 extra_keys => ['vmstate'],
4645 include_unused => 1,
4646 };
4647
4648 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4649 foreach my $snapname (keys %{$conf->{snapshots}}) {
4650 my $snap = $conf->{snapshots}->{$snapname};
4651 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4652 }
4653
4654 foreach my $volid (keys %$volhash) {
4655 &$func($volid, $volhash->{$volid}, @param);
4656 }
4657 }
4658
4659 my $fast_plug_option = {
4660 'lock' => 1,
4661 'name' => 1,
4662 'onboot' => 1,
4663 'shares' => 1,
4664 'startup' => 1,
4665 'description' => 1,
4666 'protection' => 1,
4667 'vmstatestorage' => 1,
4668 'hookscript' => 1,
4669 'tags' => 1,
4670 };
4671
4672 # hotplug changes in [PENDING]
4673 # $selection hash can be used to only apply specified options, for
4674 # example: { cores => 1 } (only apply changed 'cores')
4675 # $errors ref is used to return error messages
4676 sub vmconfig_hotplug_pending {
4677 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4678
4679 my $defaults = load_defaults();
4680 my $arch = get_vm_arch($conf);
4681 my $machine_type = get_vm_machine($conf, undef, $arch);
4682
4683 # commit values which do not have any impact on running VM first
4684 # Note: those option cannot raise errors, we we do not care about
4685 # $selection and always apply them.
4686
4687 my $add_error = sub {
4688 my ($opt, $msg) = @_;
4689 $errors->{$opt} = "hotplug problem - $msg";
4690 };
4691
4692 my $changes = 0;
4693 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4694 if ($fast_plug_option->{$opt}) {
4695 $conf->{$opt} = $conf->{pending}->{$opt};
4696 delete $conf->{pending}->{$opt};
4697 $changes = 1;
4698 }
4699 }
4700
4701 if ($changes) {
4702 PVE::QemuConfig->write_config($vmid, $conf);
4703 }
4704
4705 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4706
4707 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4708 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4709 foreach my $opt (sort keys %$pending_delete_hash) {
4710 next if $selection && !$selection->{$opt};
4711 my $force = $pending_delete_hash->{$opt}->{force};
4712 eval {
4713 if ($opt eq 'hotplug') {
4714 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4715 } elsif ($opt eq 'tablet') {
4716 die "skip\n" if !$hotplug_features->{usb};
4717 if ($defaults->{tablet}) {
4718 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4719 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4720 if $arch eq 'aarch64';
4721 } else {
4722 vm_deviceunplug($vmid, $conf, 'tablet');
4723 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4724 }
4725 } elsif ($opt =~ m/^usb\d+/) {
4726 die "skip\n";
4727 # since we cannot reliably hot unplug usb devices we are disabling it
4728 #die "skip\n" if !$hotplug_features->{usb} || $conf->{$opt} =~ m/spice/i;
4729 #vm_deviceunplug($vmid, $conf, $opt);
4730 } elsif ($opt eq 'vcpus') {
4731 die "skip\n" if !$hotplug_features->{cpu};
4732 qemu_cpu_hotplug($vmid, $conf, undef);
4733 } elsif ($opt eq 'balloon') {
4734 # enable balloon device is not hotpluggable
4735 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4736 # here we reset the ballooning value to memory
4737 my $balloon = $conf->{memory} || $defaults->{memory};
4738 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4739 } elsif ($fast_plug_option->{$opt}) {
4740 # do nothing
4741 } elsif ($opt =~ m/^net(\d+)$/) {
4742 die "skip\n" if !$hotplug_features->{network};
4743 vm_deviceunplug($vmid, $conf, $opt);
4744 } elsif (is_valid_drivename($opt)) {
4745 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4746 vm_deviceunplug($vmid, $conf, $opt);
4747 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4748 } elsif ($opt =~ m/^memory$/) {
4749 die "skip\n" if !$hotplug_features->{memory};
4750 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt);
4751 } elsif ($opt eq 'cpuunits') {
4752 $cgroup->change_cpu_shares(undef, 1024);
4753 } elsif ($opt eq 'cpulimit') {
4754 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4755 } else {
4756 die "skip\n";
4757 }
4758 };
4759 if (my $err = $@) {
4760 &$add_error($opt, $err) if $err ne "skip\n";
4761 } else {
4762 delete $conf->{$opt};
4763 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4764 }
4765 }
4766
4767 my ($apply_pending_cloudinit, $apply_pending_cloudinit_done);
4768 $apply_pending_cloudinit = sub {
4769 return if $apply_pending_cloudinit_done; # once is enough
4770 $apply_pending_cloudinit_done = 1; # once is enough
4771
4772 my ($key, $value) = @_;
4773
4774 my @cloudinit_opts = keys %$confdesc_cloudinit;
4775 foreach my $opt (keys %{$conf->{pending}}) {
4776 next if !grep { $_ eq $opt } @cloudinit_opts;
4777 $conf->{$opt} = delete $conf->{pending}->{$opt};
4778 }
4779
4780 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4781 foreach my $opt (sort keys %$pending_delete_hash) {
4782 next if !grep { $_ eq $opt } @cloudinit_opts;
4783 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4784 delete $conf->{$opt};
4785 }
4786
4787 my $new_conf = { %$conf };
4788 $new_conf->{$key} = $value;
4789 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($new_conf, $vmid);
4790 };
4791
4792 foreach my $opt (keys %{$conf->{pending}}) {
4793 next if $selection && !$selection->{$opt};
4794 my $value = $conf->{pending}->{$opt};
4795 eval {
4796 if ($opt eq 'hotplug') {
4797 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
4798 } elsif ($opt eq 'tablet') {
4799 die "skip\n" if !$hotplug_features->{usb};
4800 if ($value == 1) {
4801 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4802 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4803 if $arch eq 'aarch64';
4804 } elsif ($value == 0) {
4805 vm_deviceunplug($vmid, $conf, 'tablet');
4806 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4807 }
4808 } elsif ($opt =~ m/^usb\d+$/) {
4809 die "skip\n";
4810 # since we cannot reliably hot unplug usb devices we disable it for now
4811 #die "skip\n" if !$hotplug_features->{usb} || $value =~ m/spice/i;
4812 #my $d = eval { parse_property_string($usbdesc->{format}, $value) };
4813 #die "skip\n" if !$d;
4814 #qemu_usb_hotplug($storecfg, $conf, $vmid, $opt, $d, $arch, $machine_type);
4815 } elsif ($opt eq 'vcpus') {
4816 die "skip\n" if !$hotplug_features->{cpu};
4817 qemu_cpu_hotplug($vmid, $conf, $value);
4818 } elsif ($opt eq 'balloon') {
4819 # enable/disable balloning device is not hotpluggable
4820 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
4821 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
4822 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
4823
4824 # allow manual ballooning if shares is set to zero
4825 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
4826 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
4827 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4828 }
4829 } elsif ($opt =~ m/^net(\d+)$/) {
4830 # some changes can be done without hotplug
4831 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
4832 $vmid, $opt, $value, $arch, $machine_type);
4833 } elsif (is_valid_drivename($opt)) {
4834 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
4835 # some changes can be done without hotplug
4836 my $drive = parse_drive($opt, $value);
4837 if (drive_is_cloudinit($drive)) {
4838 &$apply_pending_cloudinit($opt, $value);
4839 }
4840 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
4841 $vmid, $opt, $value, $arch, $machine_type);
4842 } elsif ($opt =~ m/^memory$/) { #dimms
4843 die "skip\n" if !$hotplug_features->{memory};
4844 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $opt, $value);
4845 } elsif ($opt eq 'cpuunits') {
4846 $cgroup->change_cpu_shares($conf->{pending}->{$opt}, 1024);
4847 } elsif ($opt eq 'cpulimit') {
4848 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
4849 $cgroup->change_cpu_quota($cpulimit, 100000);
4850 } else {
4851 die "skip\n"; # skip non-hot-pluggable options
4852 }
4853 };
4854 if (my $err = $@) {
4855 &$add_error($opt, $err) if $err ne "skip\n";
4856 } else {
4857 $conf->{$opt} = $value;
4858 delete $conf->{pending}->{$opt};
4859 }
4860 }
4861
4862 PVE::QemuConfig->write_config($vmid, $conf);
4863 }
4864
4865 sub try_deallocate_drive {
4866 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
4867
4868 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
4869 my $volid = $drive->{file};
4870 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
4871 my $sid = PVE::Storage::parse_volume_id($volid);
4872 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
4873
4874 # check if the disk is really unused
4875 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
4876 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
4877 PVE::Storage::vdisk_free($storecfg, $volid);
4878 return 1;
4879 } else {
4880 # If vm is not owner of this disk remove from config
4881 return 1;
4882 }
4883 }
4884
4885 return;
4886 }
4887
4888 sub vmconfig_delete_or_detach_drive {
4889 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
4890
4891 my $drive = parse_drive($opt, $conf->{$opt});
4892
4893 my $rpcenv = PVE::RPCEnvironment::get();
4894 my $authuser = $rpcenv->get_user();
4895
4896 if ($force) {
4897 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
4898 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
4899 } else {
4900 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
4901 }
4902 }
4903
4904
4905
4906 sub vmconfig_apply_pending {
4907 my ($vmid, $conf, $storecfg, $errors) = @_;
4908
4909 my $add_apply_error = sub {
4910 my ($opt, $msg) = @_;
4911 my $err_msg = "unable to apply pending change $opt : $msg";
4912 $errors->{$opt} = $err_msg;
4913 warn $err_msg;
4914 };
4915
4916 # cold plug
4917
4918 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4919 foreach my $opt (sort keys %$pending_delete_hash) {
4920 my $force = $pending_delete_hash->{$opt}->{force};
4921 eval {
4922 if ($opt =~ m/^unused/) {
4923 die "internal error";
4924 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4925 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4926 }
4927 };
4928 if (my $err = $@) {
4929 $add_apply_error->($opt, $err);
4930 } else {
4931 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4932 delete $conf->{$opt};
4933 }
4934 }
4935
4936 PVE::QemuConfig->cleanup_pending($conf);
4937
4938 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4939 next if $opt eq 'delete'; # just to be sure
4940 eval {
4941 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
4942 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
4943 }
4944 };
4945 if (my $err = $@) {
4946 $add_apply_error->($opt, $err);
4947 } else {
4948 $conf->{$opt} = delete $conf->{pending}->{$opt};
4949 }
4950 }
4951
4952 # write all changes at once to avoid unnecessary i/o
4953 PVE::QemuConfig->write_config($vmid, $conf);
4954 }
4955
4956 sub vmconfig_update_net {
4957 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
4958
4959 my $newnet = parse_net($value);
4960
4961 if ($conf->{$opt}) {
4962 my $oldnet = parse_net($conf->{$opt});
4963
4964 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
4965 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
4966 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
4967 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
4968
4969 # for non online change, we try to hot-unplug
4970 die "skip\n" if !$hotplug;
4971 vm_deviceunplug($vmid, $conf, $opt);
4972 } else {
4973
4974 die "internal error" if $opt !~ m/net(\d+)/;
4975 my $iface = "tap${vmid}i$1";
4976
4977 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
4978 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
4979 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
4980 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
4981 PVE::Network::tap_unplug($iface);
4982
4983 if ($have_sdn) {
4984 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4985 } else {
4986 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
4987 }
4988 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4989 # Rate can be applied on its own but any change above needs to
4990 # include the rate in tap_plug since OVS resets everything.
4991 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
4992 }
4993
4994 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
4995 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
4996 }
4997
4998 return 1;
4999 }
5000 }
5001
5002 if ($hotplug) {
5003 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5004 } else {
5005 die "skip\n";
5006 }
5007 }
5008
5009 sub vmconfig_update_disk {
5010 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5011
5012 my $drive = parse_drive($opt, $value);
5013
5014 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5015 my $media = $drive->{media} || 'disk';
5016 my $oldmedia = $old_drive->{media} || 'disk';
5017 die "unable to change media type\n" if $media ne $oldmedia;
5018
5019 if (!drive_is_cdrom($old_drive)) {
5020
5021 if ($drive->{file} ne $old_drive->{file}) {
5022
5023 die "skip\n" if !$hotplug;
5024
5025 # unplug and register as unused
5026 vm_deviceunplug($vmid, $conf, $opt);
5027 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5028
5029 } else {
5030 # update existing disk
5031
5032 # skip non hotpluggable value
5033 if (safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5034 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5035 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5036 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5037 safe_string_ne($drive->{ssd}, $old_drive->{ssd})) {
5038 die "skip\n";
5039 }
5040
5041 # apply throttle
5042 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5043 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5044 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5045 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5046 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5047 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5048 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5049 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5050 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5051 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5052 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5053 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5054 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5055 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5056 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5057 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5058 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5059 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5060
5061 qemu_block_set_io_throttle(
5062 $vmid,"drive-$opt",
5063 ($drive->{mbps} || 0)*1024*1024,
5064 ($drive->{mbps_rd} || 0)*1024*1024,
5065 ($drive->{mbps_wr} || 0)*1024*1024,
5066 $drive->{iops} || 0,
5067 $drive->{iops_rd} || 0,
5068 $drive->{iops_wr} || 0,
5069 ($drive->{mbps_max} || 0)*1024*1024,
5070 ($drive->{mbps_rd_max} || 0)*1024*1024,
5071 ($drive->{mbps_wr_max} || 0)*1024*1024,
5072 $drive->{iops_max} || 0,
5073 $drive->{iops_rd_max} || 0,
5074 $drive->{iops_wr_max} || 0,
5075 $drive->{bps_max_length} || 1,
5076 $drive->{bps_rd_max_length} || 1,
5077 $drive->{bps_wr_max_length} || 1,
5078 $drive->{iops_max_length} || 1,
5079 $drive->{iops_rd_max_length} || 1,
5080 $drive->{iops_wr_max_length} || 1,
5081 );
5082
5083 }
5084
5085 return 1;
5086 }
5087
5088 } else { # cdrom
5089
5090 if ($drive->{file} eq 'none') {
5091 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5092 if (drive_is_cloudinit($old_drive)) {
5093 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5094 }
5095 } else {
5096 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5097
5098 # force eject if locked
5099 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5100
5101 if ($path) {
5102 mon_cmd($vmid, "blockdev-change-medium",
5103 id => "$opt", filename => "$path");
5104 }
5105 }
5106
5107 return 1;
5108 }
5109 }
5110
5111 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5112 # hotplug new disks
5113 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5114 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5115 }
5116
5117 # called in locked context by incoming migration
5118 sub vm_migrate_get_nbd_disks {
5119 my ($storecfg, $conf, $replicated_volumes) = @_;
5120
5121 my $local_volumes = {};
5122 PVE::QemuConfig->foreach_volume($conf, sub {
5123 my ($ds, $drive) = @_;
5124
5125 return if drive_is_cdrom($drive);
5126
5127 my $volid = $drive->{file};
5128
5129 return if !$volid;
5130
5131 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5132
5133 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5134 return if $scfg->{shared};
5135
5136 # replicated disks re-use existing state via bitmap
5137 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5138 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5139 });
5140 return $local_volumes;
5141 }
5142
5143 # called in locked context by incoming migration
5144 sub vm_migrate_alloc_nbd_disks {
5145 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5146
5147 my $format = undef;
5148
5149 my $nbd = {};
5150 foreach my $opt (sort keys %$source_volumes) {
5151 my ($volid, $storeid, $volname, $drive, $use_existing) = @{$source_volumes->{$opt}};
5152
5153 if ($use_existing) {
5154 $nbd->{$opt}->{drivestr} = print_drive($drive);
5155 $nbd->{$opt}->{volid} = $volid;
5156 $nbd->{$opt}->{replicated} = 1;
5157 next;
5158 }
5159
5160 # If a remote storage is specified and the format of the original
5161 # volume is not available there, fall back to the default format.
5162 # Otherwise use the same format as the original.
5163 if (!$storagemap->{identity}) {
5164 $storeid = map_storage($storagemap, $storeid);
5165 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5166 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5167 my $fileFormat = qemu_img_format($scfg, $volname);
5168 $format = (grep {$fileFormat eq $_} @{$validFormats}) ? $fileFormat : $defFormat;
5169 } else {
5170 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5171 $format = qemu_img_format($scfg, $volname);
5172 }
5173
5174 my $size = $drive->{size} / 1024;
5175 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5176 my $newdrive = $drive;
5177 $newdrive->{format} = $format;
5178 $newdrive->{file} = $newvolid;
5179 my $drivestr = print_drive($newdrive);
5180 $nbd->{$opt}->{drivestr} = $drivestr;
5181 $nbd->{$opt}->{volid} = $newvolid;
5182 }
5183
5184 return $nbd;
5185 }
5186
5187 # see vm_start_nolock for parameters, additionally:
5188 # migrate_opts:
5189 # storagemap = parsed storage map for allocating NBD disks
5190 sub vm_start {
5191 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5192
5193 return PVE::QemuConfig->lock_config($vmid, sub {
5194 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5195
5196 die "you can't start a vm if it's a template\n"
5197 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5198
5199 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5200 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5201
5202 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5203
5204 if ($has_backup_lock && $running) {
5205 # a backup is currently running, attempt to start the guest in the
5206 # existing QEMU instance
5207 return vm_resume($vmid);
5208 }
5209
5210 PVE::QemuConfig->check_lock($conf)
5211 if !($params->{skiplock} || $has_suspended_lock);
5212
5213 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5214
5215 die "VM $vmid already running\n" if $running;
5216
5217 if (my $storagemap = $migrate_opts->{storagemap}) {
5218 my $replicated = $migrate_opts->{replicated_volumes};
5219 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5220 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5221
5222 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5223 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5224 }
5225 }
5226
5227 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5228 });
5229 }
5230
5231
5232 # params:
5233 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5234 # skiplock => 0/1, skip checking for config lock
5235 # skiptemplate => 0/1, skip checking whether VM is template
5236 # forcemachine => to force Qemu machine (rollback/migration)
5237 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5238 # timeout => in seconds
5239 # paused => start VM in paused state (backup)
5240 # resume => resume from hibernation
5241 # pbs-backing => {
5242 # sata0 => {
5243 # repository
5244 # snapshot
5245 # keyfile
5246 # archive
5247 # },
5248 # virtio2 => ...
5249 # }
5250 # migrate_opts:
5251 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5252 # migratedfrom => source node
5253 # spice_ticket => used for spice migration, passed via tunnel/stdin
5254 # network => CIDR of migration network
5255 # type => secure/insecure - tunnel over encrypted connection or plain-text
5256 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5257 # replicated_volumes = which volids should be re-used with bitmaps for nbd migration
5258 sub vm_start_nolock {
5259 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5260
5261 my $statefile = $params->{statefile};
5262 my $resume = $params->{resume};
5263
5264 my $migratedfrom = $migrate_opts->{migratedfrom};
5265 my $migration_type = $migrate_opts->{type};
5266
5267 my $res = {};
5268
5269 # clean up leftover reboot request files
5270 eval { clear_reboot_request($vmid); };
5271 warn $@ if $@;
5272
5273 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5274 vmconfig_apply_pending($vmid, $conf, $storecfg);
5275 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5276 }
5277
5278 # don't regenerate the ISO if the VM is started as part of a live migration
5279 # this way we can reuse the old ISO with the correct config
5280 PVE::QemuServer::Cloudinit::generate_cloudinitconfig($conf, $vmid) if !$migratedfrom;
5281
5282 my $defaults = load_defaults();
5283
5284 # set environment variable useful inside network script
5285 $ENV{PVE_MIGRATED_FROM} = $migratedfrom if $migratedfrom;
5286
5287 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5288
5289 my $forcemachine = $params->{forcemachine};
5290 my $forcecpu = $params->{forcecpu};
5291 if ($resume) {
5292 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5293 $forcemachine = $conf->{runningmachine};
5294 $forcecpu = $conf->{runningcpu};
5295 print "Resuming suspended VM\n";
5296 }
5297
5298 my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid,
5299 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5300
5301 my $migration_ip;
5302 my $get_migration_ip = sub {
5303 my ($nodename) = @_;
5304
5305 return $migration_ip if defined($migration_ip);
5306
5307 my $cidr = $migrate_opts->{network};
5308
5309 if (!defined($cidr)) {
5310 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5311 $cidr = $dc_conf->{migration}->{network};
5312 }
5313
5314 if (defined($cidr)) {
5315 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5316
5317 die "could not get IP: no address configured on local " .
5318 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5319
5320 die "could not get IP: multiple addresses configured on local " .
5321 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5322
5323 $migration_ip = @$ips[0];
5324 }
5325
5326 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5327 if !defined($migration_ip);
5328
5329 return $migration_ip;
5330 };
5331
5332 my $migrate_uri;
5333 if ($statefile) {
5334 if ($statefile eq 'tcp') {
5335 my $localip = "localhost";
5336 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5337 my $nodename = nodename();
5338
5339 if (!defined($migration_type)) {
5340 if (defined($datacenterconf->{migration}->{type})) {
5341 $migration_type = $datacenterconf->{migration}->{type};
5342 } else {
5343 $migration_type = 'secure';
5344 }
5345 }
5346
5347 if ($migration_type eq 'insecure') {
5348 $localip = $get_migration_ip->($nodename);
5349 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5350 }
5351
5352 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5353 my $migrate_port = PVE::Tools::next_migrate_port($pfamily);
5354 $migrate_uri = "tcp:${localip}:${migrate_port}";
5355 push @$cmd, '-incoming', $migrate_uri;
5356 push @$cmd, '-S';
5357
5358 } elsif ($statefile eq 'unix') {
5359 # should be default for secure migrations as a ssh TCP forward
5360 # tunnel is not deterministic reliable ready and fails regurarly
5361 # to set up in time, so use UNIX socket forwards
5362 my $socket_addr = "/run/qemu-server/$vmid.migrate";
5363 unlink $socket_addr;
5364
5365 $migrate_uri = "unix:$socket_addr";
5366
5367 push @$cmd, '-incoming', $migrate_uri;
5368 push @$cmd, '-S';
5369
5370 } elsif (-e $statefile) {
5371 push @$cmd, '-loadstate', $statefile;
5372 } else {
5373 my $statepath = PVE::Storage::path($storecfg, $statefile);
5374 push @$vollist, $statefile;
5375 push @$cmd, '-loadstate', $statepath;
5376 }
5377 } elsif ($params->{paused}) {
5378 push @$cmd, '-S';
5379 }
5380
5381 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5382
5383 my $pci_devices = {}; # host pci devices
5384 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
5385 my $dev = $conf->{"hostpci$i"} or next;
5386 $pci_devices->{$i} = parse_hostpci($dev);
5387 }
5388
5389 my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } values $pci_devices->%* ];
5390 # reserve all PCI IDs before actually doing anything with them
5391 PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout);
5392
5393 eval {
5394 for my $id (sort keys %$pci_devices) {
5395 my $d = $pci_devices->{$id};
5396 for my $dev ($d->{pciid}->@*) {
5397 PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
5398 }
5399 }
5400 };
5401 if (my $err = $@) {
5402 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5403 warn $@ if $@;
5404 die $err;
5405 }
5406
5407 PVE::Storage::activate_volumes($storecfg, $vollist);
5408
5409 eval {
5410 run_command(['/bin/systemctl', 'stop', "$vmid.scope"], outfunc => sub{}, errfunc => sub{});
5411 };
5412 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5413 # timeout should be more than enough here...
5414 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 5);
5415
5416 my $cpuunits = get_cpuunits($conf);
5417
5418 my %run_params = (
5419 timeout => $statefile ? undef : $start_timeout,
5420 umask => 0077,
5421 noerr => 1,
5422 );
5423
5424 # when migrating, prefix QEMU output so other side can pick up any
5425 # errors that might occur and show the user
5426 if ($migratedfrom) {
5427 $run_params{quiet} = 1;
5428 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5429 }
5430
5431 my %systemd_properties = (
5432 Slice => 'qemu.slice',
5433 KillMode => 'process',
5434 SendSIGKILL => 0,
5435 TimeoutStopUSec => ULONG_MAX, # infinity
5436 );
5437
5438 if (PVE::CGroup::cgroup_mode() == 2) {
5439 $cpuunits = 10000 if $cpuunits >= 10000; # else we get an error
5440 $systemd_properties{CPUWeight} = $cpuunits;
5441 } else {
5442 $systemd_properties{CPUShares} = $cpuunits;
5443 }
5444
5445 if (my $cpulimit = $conf->{cpulimit}) {
5446 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5447 }
5448 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5449
5450 my $run_qemu = sub {
5451 PVE::Tools::run_fork sub {
5452 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5453
5454 my $tpmpid;
5455 if (my $tpm = $conf->{tpmstate0}) {
5456 # start the TPM emulator so QEMU can connect on start
5457 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5458 }
5459
5460 my $exitcode = run_command($cmd, %run_params);
5461 if ($exitcode) {
5462 if ($tpmpid) {
5463 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5464 kill 'TERM', $tpmpid;
5465 }
5466 die "QEMU exited with code $exitcode\n";
5467 }
5468 };
5469 };
5470
5471 if ($conf->{hugepages}) {
5472
5473 my $code = sub {
5474 my $hugepages_topology = PVE::QemuServer::Memory::hugepages_topology($conf);
5475 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5476
5477 PVE::QemuServer::Memory::hugepages_mount();
5478 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5479
5480 eval { $run_qemu->() };
5481 if (my $err = $@) {
5482 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5483 if !$conf->{keephugepages};
5484 die $err;
5485 }
5486
5487 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5488 if !$conf->{keephugepages};
5489 };
5490 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5491
5492 } else {
5493 eval { $run_qemu->() };
5494 }
5495
5496 if (my $err = $@) {
5497 # deactivate volumes if start fails
5498 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5499 eval { PVE::QemuServer::PCI::remove_pci_reservation($pci_id_list) };
5500
5501 die "start failed: $err";
5502 }
5503
5504 # re-reserve all PCI IDs now that we can know the actual VM PID
5505 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5506 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) };
5507 warn $@ if $@;
5508
5509 print "migration listens on $migrate_uri\n" if $migrate_uri;
5510 $res->{migrate_uri} = $migrate_uri;
5511
5512 if ($statefile && $statefile ne 'tcp' && $statefile ne 'unix') {
5513 eval { mon_cmd($vmid, "cont"); };
5514 warn $@ if $@;
5515 }
5516
5517 #start nbd server for storage migration
5518 if (my $nbd = $migrate_opts->{nbd}) {
5519 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5520
5521 my $migrate_storage_uri;
5522 # nbd_protocol_version > 0 for unix socket support
5523 if ($nbd_protocol_version > 0 && $migration_type eq 'secure') {
5524 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5525 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5526 $migrate_storage_uri = "nbd:unix:$socket_path";
5527 } else {
5528 my $nodename = nodename();
5529 my $localip = $get_migration_ip->($nodename);
5530 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5531 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5532
5533 mon_cmd($vmid, "nbd-server-start", addr => {
5534 type => 'inet',
5535 data => {
5536 host => "${localip}",
5537 port => "${storage_migrate_port}",
5538 },
5539 });
5540 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5541 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5542 }
5543
5544 $res->{migrate_storage_uri} = $migrate_storage_uri;
5545
5546 foreach my $opt (sort keys %$nbd) {
5547 my $drivestr = $nbd->{$opt}->{drivestr};
5548 my $volid = $nbd->{$opt}->{volid};
5549 mon_cmd($vmid, "nbd-server-add", device => "drive-$opt", writable => JSON::true );
5550 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5551 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5552 print "re-using replicated volume: $opt - $volid\n"
5553 if $nbd->{$opt}->{replicated};
5554
5555 $res->{drives}->{$opt} = $nbd->{$opt};
5556 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5557 }
5558 }
5559
5560 if ($migratedfrom) {
5561 eval {
5562 set_migration_caps($vmid);
5563 };
5564 warn $@ if $@;
5565
5566 if ($spice_port) {
5567 print "spice listens on port $spice_port\n";
5568 $res->{spice_port} = $spice_port;
5569 if ($migrate_opts->{spice_ticket}) {
5570 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
5571 $migrate_opts->{spice_ticket});
5572 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
5573 }
5574 }
5575
5576 } else {
5577 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
5578 if !$statefile && $conf->{balloon};
5579
5580 foreach my $opt (keys %$conf) {
5581 next if $opt !~ m/^net\d+$/;
5582 my $nicconf = parse_net($conf->{$opt});
5583 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
5584 }
5585 }
5586
5587 mon_cmd($vmid, 'qom-set',
5588 path => "machine/peripheral/balloon0",
5589 property => "guest-stats-polling-interval",
5590 value => 2) if (!defined($conf->{balloon}) || $conf->{balloon});
5591
5592 if ($resume) {
5593 print "Resumed VM, removing state\n";
5594 if (my $vmstate = $conf->{vmstate}) {
5595 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5596 PVE::Storage::vdisk_free($storecfg, $vmstate);
5597 }
5598 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
5599 PVE::QemuConfig->write_config($vmid, $conf);
5600 }
5601
5602 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
5603
5604 return $res;
5605 }
5606
5607 sub vm_commandline {
5608 my ($storecfg, $vmid, $snapname) = @_;
5609
5610 my $conf = PVE::QemuConfig->load_config($vmid);
5611 my $forcemachine;
5612 my $forcecpu;
5613
5614 if ($snapname) {
5615 my $snapshot = $conf->{snapshots}->{$snapname};
5616 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
5617
5618 # check for machine or CPU overrides in snapshot
5619 $forcemachine = $snapshot->{runningmachine};
5620 $forcecpu = $snapshot->{runningcpu};
5621
5622 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
5623
5624 $conf = $snapshot;
5625 }
5626
5627 my $defaults = load_defaults();
5628
5629 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults,
5630 $forcemachine, $forcecpu);
5631
5632 return PVE::Tools::cmd2string($cmd);
5633 }
5634
5635 sub vm_reset {
5636 my ($vmid, $skiplock) = @_;
5637
5638 PVE::QemuConfig->lock_config($vmid, sub {
5639
5640 my $conf = PVE::QemuConfig->load_config($vmid);
5641
5642 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5643
5644 mon_cmd($vmid, "system_reset");
5645 });
5646 }
5647
5648 sub get_vm_volumes {
5649 my ($conf) = @_;
5650
5651 my $vollist = [];
5652 foreach_volid($conf, sub {
5653 my ($volid, $attr) = @_;
5654
5655 return if $volid =~ m|^/|;
5656
5657 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
5658 return if !$sid;
5659
5660 push @$vollist, $volid;
5661 });
5662
5663 return $vollist;
5664 }
5665
5666 sub vm_stop_cleanup {
5667 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
5668
5669 eval {
5670
5671 if (!$keepActive) {
5672 my $vollist = get_vm_volumes($conf);
5673 PVE::Storage::deactivate_volumes($storecfg, $vollist);
5674
5675 if (my $tpmdrive = $conf->{tpmstate0}) {
5676 my $tpm = parse_drive("tpmstate0", $tpmdrive);
5677 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
5678 if ($storeid) {
5679 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
5680 }
5681 }
5682 }
5683
5684 foreach my $ext (qw(mon qmp pid vnc qga)) {
5685 unlink "/var/run/qemu-server/${vmid}.$ext";
5686 }
5687
5688 if ($conf->{ivshmem}) {
5689 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
5690 # just delete it for now, VMs which have this already open do not
5691 # are affected, but new VMs will get a separated one. If this
5692 # becomes an issue we either add some sort of ref-counting or just
5693 # add a "don't delete on stop" flag to the ivshmem format.
5694 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
5695 }
5696
5697 my $ids = [];
5698 foreach my $key (keys %$conf) {
5699 next if $key !~ m/^hostpci(\d+)$/;
5700 my $hostpciindex = $1;
5701 my $d = parse_hostpci($conf->{$key});
5702 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
5703
5704 foreach my $pci (@{$d->{pciid}}) {
5705 my $pciid = $pci->{id};
5706 push @$ids, $pci->{id};
5707 PVE::SysFSTools::pci_cleanup_mdev_device($pciid, $uuid);
5708 }
5709 }
5710 PVE::QemuServer::PCI::remove_pci_reservation($ids);
5711
5712 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
5713 };
5714 warn $@ if $@; # avoid errors - just warn
5715 }
5716
5717 # call only in locked context
5718 sub _do_vm_stop {
5719 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
5720
5721 my $pid = check_running($vmid, $nocheck);
5722 return if !$pid;
5723
5724 my $conf;
5725 if (!$nocheck) {
5726 $conf = PVE::QemuConfig->load_config($vmid);
5727 PVE::QemuConfig->check_lock($conf) if !$skiplock;
5728 if (!defined($timeout) && $shutdown && $conf->{startup}) {
5729 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
5730 $timeout = $opts->{down} if $opts->{down};
5731 }
5732 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
5733 }
5734
5735 eval {
5736 if ($shutdown) {
5737 if (defined($conf) && get_qga_key($conf, 'enabled')) {
5738 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
5739 } else {
5740 mon_cmd($vmid, "system_powerdown");
5741 }
5742 } else {
5743 mon_cmd($vmid, "quit");
5744 }
5745 };
5746 my $err = $@;
5747
5748 if (!$err) {
5749 $timeout = 60 if !defined($timeout);
5750
5751 my $count = 0;
5752 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5753 $count++;
5754 sleep 1;
5755 }
5756
5757 if ($count >= $timeout) {
5758 if ($force) {
5759 warn "VM still running - terminating now with SIGTERM\n";
5760 kill 15, $pid;
5761 } else {
5762 die "VM quit/powerdown failed - got timeout\n";
5763 }
5764 } else {
5765 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5766 return;
5767 }
5768 } else {
5769 if (!check_running($vmid, $nocheck)) {
5770 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
5771 return;
5772 }
5773 if ($force) {
5774 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
5775 kill 15, $pid;
5776 } else {
5777 die "VM quit/powerdown failed\n";
5778 }
5779 }
5780
5781 # wait again
5782 $timeout = 10;
5783
5784 my $count = 0;
5785 while (($count < $timeout) && check_running($vmid, $nocheck)) {
5786 $count++;
5787 sleep 1;
5788 }
5789
5790 if ($count >= $timeout) {
5791 warn "VM still running - terminating now with SIGKILL\n";
5792 kill 9, $pid;
5793 sleep 1;
5794 }
5795
5796 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
5797 }
5798
5799 # Note: use $nocheck to skip tests if VM configuration file exists.
5800 # We need that when migration VMs to other nodes (files already moved)
5801 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
5802 sub vm_stop {
5803 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
5804
5805 $force = 1 if !defined($force) && !$shutdown;
5806
5807 if ($migratedfrom){
5808 my $pid = check_running($vmid, $nocheck, $migratedfrom);
5809 kill 15, $pid if $pid;
5810 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
5811 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
5812 return;
5813 }
5814
5815 PVE::QemuConfig->lock_config($vmid, sub {
5816 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
5817 });
5818 }
5819
5820 sub vm_reboot {
5821 my ($vmid, $timeout) = @_;
5822
5823 PVE::QemuConfig->lock_config($vmid, sub {
5824 eval {
5825
5826 # only reboot if running, as qmeventd starts it again on a stop event
5827 return if !check_running($vmid);
5828
5829 create_reboot_request($vmid);
5830
5831 my $storecfg = PVE::Storage::config();
5832 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
5833
5834 };
5835 if (my $err = $@) {
5836 # avoid that the next normal shutdown will be confused for a reboot
5837 clear_reboot_request($vmid);
5838 die $err;
5839 }
5840 });
5841 }
5842
5843 # note: if using the statestorage parameter, the caller has to check privileges
5844 sub vm_suspend {
5845 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
5846
5847 my $conf;
5848 my $path;
5849 my $storecfg;
5850 my $vmstate;
5851
5852 PVE::QemuConfig->lock_config($vmid, sub {
5853
5854 $conf = PVE::QemuConfig->load_config($vmid);
5855
5856 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
5857 PVE::QemuConfig->check_lock($conf)
5858 if !($skiplock || $is_backing_up);
5859
5860 die "cannot suspend to disk during backup\n"
5861 if $is_backing_up && $includestate;
5862
5863 if ($includestate) {
5864 $conf->{lock} = 'suspending';
5865 my $date = strftime("%Y-%m-%d", localtime(time()));
5866 $storecfg = PVE::Storage::config();
5867 if (!$statestorage) {
5868 $statestorage = find_vmstate_storage($conf, $storecfg);
5869 # check permissions for the storage
5870 my $rpcenv = PVE::RPCEnvironment::get();
5871 if ($rpcenv->{type} ne 'cli') {
5872 my $authuser = $rpcenv->get_user();
5873 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
5874 }
5875 }
5876
5877
5878 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
5879 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
5880 $path = PVE::Storage::path($storecfg, $vmstate);
5881 PVE::QemuConfig->write_config($vmid, $conf);
5882 } else {
5883 mon_cmd($vmid, "stop");
5884 }
5885 });
5886
5887 if ($includestate) {
5888 # save vm state
5889 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
5890
5891 eval {
5892 set_migration_caps($vmid, 1);
5893 mon_cmd($vmid, "savevm-start", statefile => $path);
5894 for(;;) {
5895 my $state = mon_cmd($vmid, "query-savevm");
5896 if (!$state->{status}) {
5897 die "savevm not active\n";
5898 } elsif ($state->{status} eq 'active') {
5899 sleep(1);
5900 next;
5901 } elsif ($state->{status} eq 'completed') {
5902 print "State saved, quitting\n";
5903 last;
5904 } elsif ($state->{status} eq 'failed' && $state->{error}) {
5905 die "query-savevm failed with error '$state->{error}'\n"
5906 } else {
5907 die "query-savevm returned status '$state->{status}'\n";
5908 }
5909 }
5910 };
5911 my $err = $@;
5912
5913 PVE::QemuConfig->lock_config($vmid, sub {
5914 $conf = PVE::QemuConfig->load_config($vmid);
5915 if ($err) {
5916 # cleanup, but leave suspending lock, to indicate something went wrong
5917 eval {
5918 mon_cmd($vmid, "savevm-end");
5919 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
5920 PVE::Storage::vdisk_free($storecfg, $vmstate);
5921 delete $conf->@{qw(vmstate runningmachine runningcpu)};
5922 PVE::QemuConfig->write_config($vmid, $conf);
5923 };
5924 warn $@ if $@;
5925 die $err;
5926 }
5927
5928 die "lock changed unexpectedly\n"
5929 if !PVE::QemuConfig->has_lock($conf, 'suspending');
5930
5931 mon_cmd($vmid, "quit");
5932 $conf->{lock} = 'suspended';
5933 PVE::QemuConfig->write_config($vmid, $conf);
5934 });
5935 }
5936 }
5937
5938 sub vm_resume {
5939 my ($vmid, $skiplock, $nocheck) = @_;
5940
5941 PVE::QemuConfig->lock_config($vmid, sub {
5942 my $res = mon_cmd($vmid, 'query-status');
5943 my $resume_cmd = 'cont';
5944 my $reset = 0;
5945
5946 if ($res->{status}) {
5947 return if $res->{status} eq 'running'; # job done, go home
5948 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
5949 $reset = 1 if $res->{status} eq 'shutdown';
5950 }
5951
5952 if (!$nocheck) {
5953
5954 my $conf = PVE::QemuConfig->load_config($vmid);
5955
5956 PVE::QemuConfig->check_lock($conf)
5957 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
5958 }
5959
5960 if ($reset) {
5961 # required if a VM shuts down during a backup and we get a resume
5962 # request before the backup finishes for example
5963 mon_cmd($vmid, "system_reset");
5964 }
5965 mon_cmd($vmid, $resume_cmd);
5966 });
5967 }
5968
5969 sub vm_sendkey {
5970 my ($vmid, $skiplock, $key) = @_;
5971
5972 PVE::QemuConfig->lock_config($vmid, sub {
5973
5974 my $conf = PVE::QemuConfig->load_config($vmid);
5975
5976 # there is no qmp command, so we use the human monitor command
5977 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
5978 die $res if $res ne '';
5979 });
5980 }
5981
5982 # vzdump restore implementaion
5983
5984 sub tar_archive_read_firstfile {
5985 my $archive = shift;
5986
5987 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
5988
5989 # try to detect archive type first
5990 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
5991 die "unable to open file '$archive'\n";
5992 my $firstfile = <$fh>;
5993 kill 15, $pid;
5994 close $fh;
5995
5996 die "ERROR: archive contaions no data\n" if !$firstfile;
5997 chomp $firstfile;
5998
5999 return $firstfile;
6000 }
6001
6002 sub tar_restore_cleanup {
6003 my ($storecfg, $statfile) = @_;
6004
6005 print STDERR "starting cleanup\n";
6006
6007 if (my $fd = IO::File->new($statfile, "r")) {
6008 while (defined(my $line = <$fd>)) {
6009 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6010 my $volid = $2;
6011 eval {
6012 if ($volid =~ m|^/|) {
6013 unlink $volid || die 'unlink failed\n';
6014 } else {
6015 PVE::Storage::vdisk_free($storecfg, $volid);
6016 }
6017 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6018 };
6019 print STDERR "unable to cleanup '$volid' - $@" if $@;
6020 } else {
6021 print STDERR "unable to parse line in statfile - $line";
6022 }
6023 }
6024 $fd->close();
6025 }
6026 }
6027
6028 sub restore_file_archive {
6029 my ($archive, $vmid, $user, $opts) = @_;
6030
6031 return restore_vma_archive($archive, $vmid, $user, $opts)
6032 if $archive eq '-';
6033
6034 my $info = PVE::Storage::archive_info($archive);
6035 my $format = $opts->{format} // $info->{format};
6036 my $comp = $info->{compression};
6037
6038 # try to detect archive format
6039 if ($format eq 'tar') {
6040 return restore_tar_archive($archive, $vmid, $user, $opts);
6041 } else {
6042 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6043 }
6044 }
6045
6046 # hepler to remove disks that will not be used after restore
6047 my $restore_cleanup_oldconf = sub {
6048 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6049
6050 PVE::QemuConfig->foreach_volume($oldconf, sub {
6051 my ($ds, $drive) = @_;
6052
6053 return if drive_is_cdrom($drive, 1);
6054
6055 my $volid = $drive->{file};
6056 return if !$volid || $volid =~ m|^/|;
6057
6058 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6059 return if !$path || !$owner || ($owner != $vmid);
6060
6061 # Note: only delete disk we want to restore
6062 # other volumes will become unused
6063 if ($virtdev_hash->{$ds}) {
6064 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6065 if (my $err = $@) {
6066 warn $err;
6067 }
6068 }
6069 });
6070
6071 # delete vmstate files, after the restore we have no snapshots anymore
6072 foreach my $snapname (keys %{$oldconf->{snapshots}}) {
6073 my $snap = $oldconf->{snapshots}->{$snapname};
6074 if ($snap->{vmstate}) {
6075 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6076 if (my $err = $@) {
6077 warn $err;
6078 }
6079 }
6080 }
6081 };
6082
6083 # Helper to parse vzdump backup device hints
6084 #
6085 # $rpcenv: Environment, used to ckeck storage permissions
6086 # $user: User ID, to check storage permissions
6087 # $storecfg: Storage configuration
6088 # $fh: the file handle for reading the configuration
6089 # $devinfo: should contain device sizes for all backu-up'ed devices
6090 # $options: backup options (pool, default storage)
6091 #
6092 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6093 my $parse_backup_hints = sub {
6094 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6095
6096 my $virtdev_hash = {};
6097
6098 while (defined(my $line = <$fh>)) {
6099 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6100 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6101 die "archive does not contain data for drive '$virtdev'\n"
6102 if !$devinfo->{$devname};
6103
6104 if (defined($options->{storage})) {
6105 $storeid = $options->{storage} || 'local';
6106 } elsif (!$storeid) {
6107 $storeid = 'local';
6108 }
6109 $format = 'raw' if !$format;
6110 $devinfo->{$devname}->{devname} = $devname;
6111 $devinfo->{$devname}->{virtdev} = $virtdev;
6112 $devinfo->{$devname}->{format} = $format;
6113 $devinfo->{$devname}->{storeid} = $storeid;
6114
6115 # check permission on storage
6116 my $pool = $options->{pool}; # todo: do we need that?
6117 if ($user ne 'root@pam') {
6118 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace']);
6119 }
6120
6121 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6122 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6123 my $virtdev = $1;
6124 my $drive = parse_drive($virtdev, $2);
6125 if (drive_is_cloudinit($drive)) {
6126 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6127 $storeid = $options->{storage} if defined ($options->{storage});
6128 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6129 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6130
6131 $virtdev_hash->{$virtdev} = {
6132 format => $format,
6133 storeid => $storeid,
6134 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6135 is_cloudinit => 1,
6136 };
6137 }
6138 }
6139 }
6140
6141 return $virtdev_hash;
6142 };
6143
6144 # Helper to allocate and activate all volumes required for a restore
6145 #
6146 # $storecfg: Storage configuration
6147 # $virtdev_hash: as returned by parse_backup_hints()
6148 #
6149 # Returns: { $virtdev => $volid }
6150 my $restore_allocate_devices = sub {
6151 my ($storecfg, $virtdev_hash, $vmid) = @_;
6152
6153 my $map = {};
6154 foreach my $virtdev (sort keys %$virtdev_hash) {
6155 my $d = $virtdev_hash->{$virtdev};
6156 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6157 my $storeid = $d->{storeid};
6158 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6159
6160 # test if requested format is supported
6161 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6162 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6163 $d->{format} = $defFormat if !$supported;
6164
6165 my $name;
6166 if ($d->{is_cloudinit}) {
6167 $name = "vm-$vmid-cloudinit";
6168 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6169 if ($scfg->{path}) {
6170 $name .= ".$d->{format}";
6171 }
6172 }
6173
6174 my $volid = PVE::Storage::vdisk_alloc(
6175 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6176
6177 print STDERR "new volume ID is '$volid'\n";
6178 $d->{volid} = $volid;
6179
6180 PVE::Storage::activate_volumes($storecfg, [$volid]);
6181
6182 $map->{$virtdev} = $volid;
6183 }
6184
6185 return $map;
6186 };
6187
6188 sub restore_update_config_line {
6189 my ($cookie, $map, $line, $unique) = @_;
6190
6191 return '' if $line =~ m/^\#qmdump\#/;
6192 return '' if $line =~ m/^\#vzdump\#/;
6193 return '' if $line =~ m/^lock:/;
6194 return '' if $line =~ m/^unused\d+:/;
6195 return '' if $line =~ m/^parent:/;
6196
6197 my $res = '';
6198
6199 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6200 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6201 # try to convert old 1.X settings
6202 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6203 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6204 my ($model, $macaddr) = split(/\=/, $devconfig);
6205 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6206 my $net = {
6207 model => $model,
6208 bridge => "vmbr$ind",
6209 macaddr => $macaddr,
6210 };
6211 my $netstr = print_net($net);
6212
6213 $res .= "net$cookie->{netcount}: $netstr\n";
6214 $cookie->{netcount}++;
6215 }
6216 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6217 my ($id, $netstr) = ($1, $2);
6218 my $net = parse_net($netstr);
6219 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6220 $netstr = print_net($net);
6221 $res .= "$id: $netstr\n";
6222 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6223 my $virtdev = $1;
6224 my $value = $3;
6225 my $di = parse_drive($virtdev, $value);
6226 if (defined($di->{backup}) && !$di->{backup}) {
6227 $res .= "#$line";
6228 } elsif ($map->{$virtdev}) {
6229 delete $di->{format}; # format can change on restore
6230 $di->{file} = $map->{$virtdev};
6231 $value = print_drive($di);
6232 $res .= "$virtdev: $value\n";
6233 } else {
6234 $res .= $line;
6235 }
6236 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6237 my $vmgenid = $1;
6238 if ($vmgenid ne '0') {
6239 # always generate a new vmgenid if there was a valid one setup
6240 $vmgenid = generate_uuid();
6241 }
6242 $res .= "vmgenid: $vmgenid\n";
6243 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6244 my ($uuid, $uuid_str);
6245 UUID::generate($uuid);
6246 UUID::unparse($uuid, $uuid_str);
6247 my $smbios1 = parse_smbios1($2);
6248 $smbios1->{uuid} = $uuid_str;
6249 $res .= $1.print_smbios1($smbios1)."\n";
6250 } else {
6251 $res .= $line;
6252 }
6253
6254 return $res;
6255 }
6256
6257 my $restore_deactivate_volumes = sub {
6258 my ($storecfg, $devinfo) = @_;
6259
6260 my $vollist = [];
6261 foreach my $devname (keys %$devinfo) {
6262 my $volid = $devinfo->{$devname}->{volid};
6263 push @$vollist, $volid if $volid;
6264 }
6265
6266 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6267 };
6268
6269 my $restore_destroy_volumes = sub {
6270 my ($storecfg, $devinfo) = @_;
6271
6272 foreach my $devname (keys %$devinfo) {
6273 my $volid = $devinfo->{$devname}->{volid};
6274 next if !$volid;
6275 eval {
6276 if ($volid =~ m|^/|) {
6277 unlink $volid || die 'unlink failed\n';
6278 } else {
6279 PVE::Storage::vdisk_free($storecfg, $volid);
6280 }
6281 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6282 };
6283 print STDERR "unable to cleanup '$volid' - $@" if $@;
6284 }
6285 };
6286
6287 sub scan_volids {
6288 my ($cfg, $vmid) = @_;
6289
6290 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6291
6292 my $volid_hash = {};
6293 foreach my $storeid (keys %$info) {
6294 foreach my $item (@{$info->{$storeid}}) {
6295 next if !($item->{volid} && $item->{size});
6296 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6297 $volid_hash->{$item->{volid}} = $item;
6298 }
6299 }
6300
6301 return $volid_hash;
6302 }
6303
6304 sub update_disk_config {
6305 my ($vmid, $conf, $volid_hash) = @_;
6306
6307 my $changes;
6308 my $prefix = "VM $vmid";
6309
6310 # used and unused disks
6311 my $referenced = {};
6312
6313 # Note: it is allowed to define multiple storages with same path (alias), so
6314 # we need to check both 'volid' and real 'path' (two different volid can point
6315 # to the same path).
6316
6317 my $referencedpath = {};
6318
6319 # update size info
6320 PVE::QemuConfig->foreach_volume($conf, sub {
6321 my ($opt, $drive) = @_;
6322
6323 my $volid = $drive->{file};
6324 return if !$volid;
6325 my $volume = $volid_hash->{$volid};
6326
6327 # mark volid as "in-use" for next step
6328 $referenced->{$volid} = 1;
6329 if ($volume && (my $path = $volume->{path})) {
6330 $referencedpath->{$path} = 1;
6331 }
6332
6333 return if drive_is_cdrom($drive);
6334 return if !$volume;
6335
6336 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6337 if (defined($updated)) {
6338 $changes = 1;
6339 $conf->{$opt} = print_drive($updated);
6340 print "$prefix ($opt): $msg\n";
6341 }
6342 });
6343
6344 # remove 'unusedX' entry if volume is used
6345 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6346 my ($opt, $drive) = @_;
6347
6348 my $volid = $drive->{file};
6349 return if !$volid;
6350
6351 my $path;
6352 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6353 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6354 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6355 $changes = 1;
6356 delete $conf->{$opt};
6357 }
6358
6359 $referenced->{$volid} = 1;
6360 $referencedpath->{$path} = 1 if $path;
6361 });
6362
6363 foreach my $volid (sort keys %$volid_hash) {
6364 next if $volid =~ m/vm-$vmid-state-/;
6365 next if $referenced->{$volid};
6366 my $path = $volid_hash->{$volid}->{path};
6367 next if !$path; # just to be sure
6368 next if $referencedpath->{$path};
6369 $changes = 1;
6370 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6371 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6372 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6373 }
6374
6375 return $changes;
6376 }
6377
6378 sub rescan {
6379 my ($vmid, $nolock, $dryrun) = @_;
6380
6381 my $cfg = PVE::Storage::config();
6382
6383 print "rescan volumes...\n";
6384 my $volid_hash = scan_volids($cfg, $vmid);
6385
6386 my $updatefn = sub {
6387 my ($vmid) = @_;
6388
6389 my $conf = PVE::QemuConfig->load_config($vmid);
6390
6391 PVE::QemuConfig->check_lock($conf);
6392
6393 my $vm_volids = {};
6394 foreach my $volid (keys %$volid_hash) {
6395 my $info = $volid_hash->{$volid};
6396 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6397 }
6398
6399 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6400
6401 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6402 };
6403
6404 if (defined($vmid)) {
6405 if ($nolock) {
6406 &$updatefn($vmid);
6407 } else {
6408 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6409 }
6410 } else {
6411 my $vmlist = config_list();
6412 foreach my $vmid (keys %$vmlist) {
6413 if ($nolock) {
6414 &$updatefn($vmid);
6415 } else {
6416 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6417 }
6418 }
6419 }
6420 }
6421
6422 sub restore_proxmox_backup_archive {
6423 my ($archive, $vmid, $user, $options) = @_;
6424
6425 my $storecfg = PVE::Storage::config();
6426
6427 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6428 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6429
6430 my $fingerprint = $scfg->{fingerprint};
6431 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6432
6433 my $repo = PVE::PBSClient::get_repository($scfg);
6434
6435 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6436 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6437 local $ENV{PBS_PASSWORD} = $password;
6438 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6439
6440 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6441 PVE::Storage::parse_volname($storecfg, $archive);
6442
6443 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6444
6445 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6446
6447 my $tmpdir = "/var/tmp/vzdumptmp$$";
6448 rmtree $tmpdir;
6449 mkpath $tmpdir;
6450
6451 my $conffile = PVE::QemuConfig->config_file($vmid);
6452 # disable interrupts (always do cleanups)
6453 local $SIG{INT} =
6454 local $SIG{TERM} =
6455 local $SIG{QUIT} =
6456 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6457
6458 # Note: $oldconf is undef if VM does not exists
6459 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6460 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6461 my $new_conf_raw = '';
6462
6463 my $rpcenv = PVE::RPCEnvironment::get();
6464 my $devinfo = {};
6465
6466 eval {
6467 # enable interrupts
6468 local $SIG{INT} =
6469 local $SIG{TERM} =
6470 local $SIG{QUIT} =
6471 local $SIG{HUP} =
6472 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6473
6474 my $cfgfn = "$tmpdir/qemu-server.conf";
6475 my $firewall_config_fn = "$tmpdir/fw.conf";
6476 my $index_fn = "$tmpdir/index.json";
6477
6478 my $cmd = "restore";
6479
6480 my $param = [$pbs_backup_name, "index.json", $index_fn];
6481 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6482 my $index = PVE::Tools::file_get_contents($index_fn);
6483 $index = decode_json($index);
6484
6485 # print Dumper($index);
6486 foreach my $info (@{$index->{files}}) {
6487 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
6488 my $devname = $1;
6489 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
6490 $devinfo->{$devname}->{size} = $1;
6491 } else {
6492 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
6493 }
6494 }
6495 }
6496
6497 my $is_qemu_server_backup = scalar(
6498 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
6499 );
6500 if (!$is_qemu_server_backup) {
6501 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
6502 }
6503 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
6504
6505 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
6506 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6507
6508 if ($has_firewall_config) {
6509 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
6510 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
6511
6512 my $pve_firewall_dir = '/etc/pve/firewall';
6513 mkdir $pve_firewall_dir; # make sure the dir exists
6514 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
6515 }
6516
6517 my $fh = IO::File->new($cfgfn, "r") ||
6518 die "unable to read qemu-server.conf - $!\n";
6519
6520 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
6521
6522 # fixme: rate limit?
6523
6524 # create empty/temp config
6525 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
6526
6527 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
6528
6529 # allocate volumes
6530 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
6531
6532 foreach my $virtdev (sort keys %$virtdev_hash) {
6533 my $d = $virtdev_hash->{$virtdev};
6534 next if $d->{is_cloudinit}; # no need to restore cloudinit
6535
6536 # this fails if storage is unavailable
6537 my $volid = $d->{volid};
6538 my $path = PVE::Storage::path($storecfg, $volid);
6539
6540 # for live-restore we only want to preload the efidisk and TPM state
6541 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
6542
6543 my $pbs_restore_cmd = [
6544 '/usr/bin/pbs-restore',
6545 '--repository', $repo,
6546 $pbs_backup_name,
6547 "$d->{devname}.img.fidx",
6548 $path,
6549 '--verbose',
6550 ];
6551
6552 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
6553 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
6554
6555 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
6556 push @$pbs_restore_cmd, '--skip-zero';
6557 }
6558
6559 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
6560 print "restore proxmox backup image: $dbg_cmdstring\n";
6561 run_command($pbs_restore_cmd);
6562 }
6563
6564 $fh->seek(0, 0) || die "seek failed - $!\n";
6565
6566 my $cookie = { netcount => 0 };
6567 while (defined(my $line = <$fh>)) {
6568 $new_conf_raw .= restore_update_config_line(
6569 $cookie,
6570 $map,
6571 $line,
6572 $options->{unique},
6573 );
6574 }
6575
6576 $fh->close();
6577 };
6578 my $err = $@;
6579
6580 if ($err || !$options->{live}) {
6581 $restore_deactivate_volumes->($storecfg, $devinfo);
6582 }
6583
6584 rmtree $tmpdir;
6585
6586 if ($err) {
6587 $restore_destroy_volumes->($storecfg, $devinfo);
6588 die $err;
6589 }
6590
6591 if ($options->{live}) {
6592 # keep lock during live-restore
6593 $new_conf_raw .= "\nlock: create";
6594 }
6595
6596 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6597
6598 PVE::Cluster::cfs_update(); # make sure we read new file
6599
6600 eval { rescan($vmid, 1); };
6601 warn $@ if $@;
6602
6603 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
6604
6605 if ($options->{live}) {
6606 # enable interrupts
6607 local $SIG{INT} =
6608 local $SIG{TERM} =
6609 local $SIG{QUIT} =
6610 local $SIG{HUP} =
6611 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
6612
6613 my $conf = PVE::QemuConfig->load_config($vmid);
6614 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
6615
6616 # these special drives are already restored before start
6617 delete $devinfo->{'drive-efidisk0'};
6618 delete $devinfo->{'drive-tpmstate0-backup'};
6619 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
6620
6621 PVE::QemuConfig->remove_lock($vmid, "create");
6622 }
6623 }
6624
6625 sub pbs_live_restore {
6626 my ($vmid, $conf, $storecfg, $restored_disks, $repo, $keyfile, $snap) = @_;
6627
6628 print "starting VM for live-restore\n";
6629 print "repository: '$repo', snapshot: '$snap'\n";
6630
6631 my $pbs_backing = {};
6632 for my $ds (keys %$restored_disks) {
6633 $ds =~ m/^drive-(.*)$/;
6634 my $confname = $1;
6635 $pbs_backing->{$confname} = {
6636 repository => $repo,
6637 snapshot => $snap,
6638 archive => "$ds.img.fidx",
6639 };
6640 $pbs_backing->{$confname}->{keyfile} = $keyfile if -e $keyfile;
6641
6642 my $drive = parse_drive($confname, $conf->{$confname});
6643 print "restoring '$ds' to '$drive->{file}'\n";
6644 }
6645
6646 my $drives_streamed = 0;
6647 eval {
6648 # make sure HA doesn't interrupt our restore by stopping the VM
6649 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
6650 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
6651 }
6652
6653 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
6654 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
6655 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
6656
6657 my $qmeventd_fd = register_qmeventd_handle($vmid);
6658
6659 # begin streaming, i.e. data copy from PBS to target disk for every vol,
6660 # this will effectively collapse the backing image chain consisting of
6661 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
6662 # removes itself once all backing images vanish with 'auto-remove=on')
6663 my $jobs = {};
6664 for my $ds (sort keys %$restored_disks) {
6665 my $job_id = "restore-$ds";
6666 mon_cmd($vmid, 'block-stream',
6667 'job-id' => $job_id,
6668 device => "$ds",
6669 );
6670 $jobs->{$job_id} = {};
6671 }
6672
6673 mon_cmd($vmid, 'cont');
6674 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
6675
6676 print "restore-drive jobs finished successfully, removing all tracking block devices"
6677 ." to disconnect from Proxmox Backup Server\n";
6678
6679 for my $ds (sort keys %$restored_disks) {
6680 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
6681 }
6682
6683 close($qmeventd_fd);
6684 };
6685
6686 my $err = $@;
6687
6688 if ($err) {
6689 warn "An error occured during live-restore: $err\n";
6690 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
6691 die "live-restore failed\n";
6692 }
6693 }
6694
6695 sub restore_vma_archive {
6696 my ($archive, $vmid, $user, $opts, $comp) = @_;
6697
6698 my $readfrom = $archive;
6699
6700 my $cfg = PVE::Storage::config();
6701 my $commands = [];
6702 my $bwlimit = $opts->{bwlimit};
6703
6704 my $dbg_cmdstring = '';
6705 my $add_pipe = sub {
6706 my ($cmd) = @_;
6707 push @$commands, $cmd;
6708 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
6709 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
6710 $readfrom = '-';
6711 };
6712
6713 my $input = undef;
6714 if ($archive eq '-') {
6715 $input = '<&STDIN';
6716 } else {
6717 # If we use a backup from a PVE defined storage we also consider that
6718 # storage's rate limit:
6719 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
6720 if (defined($volid)) {
6721 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
6722 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
6723 if ($readlimit) {
6724 print STDERR "applying read rate limit: $readlimit\n";
6725 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
6726 $add_pipe->($cstream);
6727 }
6728 }
6729 }
6730
6731 if ($comp) {
6732 my $info = PVE::Storage::decompressor_info('vma', $comp);
6733 my $cmd = $info->{decompressor};
6734 push @$cmd, $readfrom;
6735 $add_pipe->($cmd);
6736 }
6737
6738 my $tmpdir = "/var/tmp/vzdumptmp$$";
6739 rmtree $tmpdir;
6740
6741 # disable interrupts (always do cleanups)
6742 local $SIG{INT} =
6743 local $SIG{TERM} =
6744 local $SIG{QUIT} =
6745 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
6746
6747 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
6748 POSIX::mkfifo($mapfifo, 0600);
6749 my $fifofh;
6750 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
6751
6752 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
6753
6754 my $oldtimeout;
6755 my $timeout = 5;
6756
6757 my $devinfo = {};
6758
6759 my $rpcenv = PVE::RPCEnvironment::get();
6760
6761 my $conffile = PVE::QemuConfig->config_file($vmid);
6762
6763 # Note: $oldconf is undef if VM does not exist
6764 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
6765 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
6766 my $new_conf_raw = '';
6767
6768 my %storage_limits;
6769
6770 my $print_devmap = sub {
6771 my $cfgfn = "$tmpdir/qemu-server.conf";
6772
6773 # we can read the config - that is already extracted
6774 my $fh = IO::File->new($cfgfn, "r") ||
6775 die "unable to read qemu-server.conf - $!\n";
6776
6777 my $fwcfgfn = "$tmpdir/qemu-server.fw";
6778 if (-f $fwcfgfn) {
6779 my $pve_firewall_dir = '/etc/pve/firewall';
6780 mkdir $pve_firewall_dir; # make sure the dir exists
6781 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
6782 }
6783
6784 my $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
6785
6786 foreach my $info (values %{$virtdev_hash}) {
6787 my $storeid = $info->{storeid};
6788 next if defined($storage_limits{$storeid});
6789
6790 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
6791 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
6792 $storage_limits{$storeid} = $limit * 1024;
6793 }
6794
6795 foreach my $devname (keys %$devinfo) {
6796 die "found no device mapping information for device '$devname'\n"
6797 if !$devinfo->{$devname}->{virtdev};
6798 }
6799
6800 # create empty/temp config
6801 if ($oldconf) {
6802 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
6803 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
6804 }
6805
6806 # allocate volumes
6807 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
6808
6809 # print restore information to $fifofh
6810 foreach my $virtdev (sort keys %$virtdev_hash) {
6811 my $d = $virtdev_hash->{$virtdev};
6812 next if $d->{is_cloudinit}; # no need to restore cloudinit
6813
6814 my $storeid = $d->{storeid};
6815 my $volid = $d->{volid};
6816
6817 my $map_opts = '';
6818 if (my $limit = $storage_limits{$storeid}) {
6819 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
6820 }
6821
6822 my $write_zeros = 1;
6823 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
6824 $write_zeros = 0;
6825 }
6826
6827 my $path = PVE::Storage::path($cfg, $volid);
6828
6829 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
6830
6831 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
6832 }
6833
6834 $fh->seek(0, 0) || die "seek failed - $!\n";
6835
6836 my $cookie = { netcount => 0 };
6837 while (defined(my $line = <$fh>)) {
6838 $new_conf_raw .= restore_update_config_line(
6839 $cookie,
6840 $map,
6841 $line,
6842 $opts->{unique},
6843 );
6844 }
6845
6846 $fh->close();
6847 };
6848
6849 eval {
6850 # enable interrupts
6851 local $SIG{INT} =
6852 local $SIG{TERM} =
6853 local $SIG{QUIT} =
6854 local $SIG{HUP} =
6855 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6856 local $SIG{ALRM} = sub { die "got timeout\n"; };
6857
6858 $oldtimeout = alarm($timeout);
6859
6860 my $parser = sub {
6861 my $line = shift;
6862
6863 print "$line\n";
6864
6865 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
6866 my ($dev_id, $size, $devname) = ($1, $2, $3);
6867 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
6868 } elsif ($line =~ m/^CTIME: /) {
6869 # we correctly received the vma config, so we can disable
6870 # the timeout now for disk allocation (set to 10 minutes, so
6871 # that we always timeout if something goes wrong)
6872 alarm(600);
6873 &$print_devmap();
6874 print $fifofh "done\n";
6875 my $tmp = $oldtimeout || 0;
6876 $oldtimeout = undef;
6877 alarm($tmp);
6878 close($fifofh);
6879 $fifofh = undef;
6880 }
6881 };
6882
6883 print "restore vma archive: $dbg_cmdstring\n";
6884 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
6885 };
6886 my $err = $@;
6887
6888 alarm($oldtimeout) if $oldtimeout;
6889
6890 $restore_deactivate_volumes->($cfg, $devinfo);
6891
6892 close($fifofh) if $fifofh;
6893 unlink $mapfifo;
6894 rmtree $tmpdir;
6895
6896 if ($err) {
6897 $restore_destroy_volumes->($cfg, $devinfo);
6898 die $err;
6899 }
6900
6901 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
6902
6903 PVE::Cluster::cfs_update(); # make sure we read new file
6904
6905 eval { rescan($vmid, 1); };
6906 warn $@ if $@;
6907
6908 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
6909 }
6910
6911 sub restore_tar_archive {
6912 my ($archive, $vmid, $user, $opts) = @_;
6913
6914 if ($archive ne '-') {
6915 my $firstfile = tar_archive_read_firstfile($archive);
6916 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
6917 if $firstfile ne 'qemu-server.conf';
6918 }
6919
6920 my $storecfg = PVE::Storage::config();
6921
6922 # avoid zombie disks when restoring over an existing VM -> cleanup first
6923 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
6924 # skiplock=1 because qmrestore has set the 'create' lock itself already
6925 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
6926 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
6927
6928 my $tocmd = "/usr/lib/qemu-server/qmextract";
6929
6930 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
6931 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
6932 $tocmd .= ' --prealloc' if $opts->{prealloc};
6933 $tocmd .= ' --info' if $opts->{info};
6934
6935 # tar option "xf" does not autodetect compression when read from STDIN,
6936 # so we pipe to zcat
6937 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
6938 PVE::Tools::shellquote("--to-command=$tocmd");
6939
6940 my $tmpdir = "/var/tmp/vzdumptmp$$";
6941 mkpath $tmpdir;
6942
6943 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
6944 local $ENV{VZDUMP_VMID} = $vmid;
6945 local $ENV{VZDUMP_USER} = $user;
6946
6947 my $conffile = PVE::QemuConfig->config_file($vmid);
6948 my $new_conf_raw = '';
6949
6950 # disable interrupts (always do cleanups)
6951 local $SIG{INT} =
6952 local $SIG{TERM} =
6953 local $SIG{QUIT} =
6954 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
6955
6956 eval {
6957 # enable interrupts
6958 local $SIG{INT} =
6959 local $SIG{TERM} =
6960 local $SIG{QUIT} =
6961 local $SIG{HUP} =
6962 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
6963
6964 if ($archive eq '-') {
6965 print "extracting archive from STDIN\n";
6966 run_command($cmd, input => "<&STDIN");
6967 } else {
6968 print "extracting archive '$archive'\n";
6969 run_command($cmd);
6970 }
6971
6972 return if $opts->{info};
6973
6974 # read new mapping
6975 my $map = {};
6976 my $statfile = "$tmpdir/qmrestore.stat";
6977 if (my $fd = IO::File->new($statfile, "r")) {
6978 while (defined (my $line = <$fd>)) {
6979 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6980 $map->{$1} = $2 if $1;
6981 } else {
6982 print STDERR "unable to parse line in statfile - $line\n";
6983 }
6984 }
6985 $fd->close();
6986 }
6987
6988 my $confsrc = "$tmpdir/qemu-server.conf";
6989
6990 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
6991
6992 my $cookie = { netcount => 0 };
6993 while (defined (my $line = <$srcfd>)) {
6994 $new_conf_raw .= restore_update_config_line(
6995 $cookie,
6996 $map,
6997 $line,
6998 $opts->{unique},
6999 );
7000 }
7001
7002 $srcfd->close();
7003 };
7004 if (my $err = $@) {
7005 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7006 die $err;
7007 }
7008
7009 rmtree $tmpdir;
7010
7011 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7012
7013 PVE::Cluster::cfs_update(); # make sure we read new file
7014
7015 eval { rescan($vmid, 1); };
7016 warn $@ if $@;
7017 };
7018
7019 sub foreach_storage_used_by_vm {
7020 my ($conf, $func) = @_;
7021
7022 my $sidhash = {};
7023
7024 PVE::QemuConfig->foreach_volume($conf, sub {
7025 my ($ds, $drive) = @_;
7026 return if drive_is_cdrom($drive);
7027
7028 my $volid = $drive->{file};
7029
7030 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7031 $sidhash->{$sid} = $sid if $sid;
7032 });
7033
7034 foreach my $sid (sort keys %$sidhash) {
7035 &$func($sid);
7036 }
7037 }
7038
7039 my $qemu_snap_storage = {
7040 rbd => 1,
7041 };
7042 sub do_snapshots_with_qemu {
7043 my ($storecfg, $volid, $deviceid) = @_;
7044
7045 return if $deviceid =~ m/tpmstate0/;
7046
7047 my $storage_name = PVE::Storage::parse_volume_id($volid);
7048 my $scfg = $storecfg->{ids}->{$storage_name};
7049 die "could not find storage '$storage_name'\n" if !defined($scfg);
7050
7051 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7052 return 1;
7053 }
7054
7055 if ($volid =~ m/\.(qcow2|qed)$/){
7056 return 1;
7057 }
7058
7059 return;
7060 }
7061
7062 sub qga_check_running {
7063 my ($vmid, $nowarn) = @_;
7064
7065 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7066 if ($@) {
7067 warn "Qemu Guest Agent is not running - $@" if !$nowarn;
7068 return 0;
7069 }
7070 return 1;
7071 }
7072
7073 sub template_create {
7074 my ($vmid, $conf, $disk) = @_;
7075
7076 my $storecfg = PVE::Storage::config();
7077
7078 PVE::QemuConfig->foreach_volume($conf, sub {
7079 my ($ds, $drive) = @_;
7080
7081 return if drive_is_cdrom($drive);
7082 return if $disk && $ds ne $disk;
7083
7084 my $volid = $drive->{file};
7085 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7086
7087 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7088 $drive->{file} = $voliddst;
7089 $conf->{$ds} = print_drive($drive);
7090 PVE::QemuConfig->write_config($vmid, $conf);
7091 });
7092 }
7093
7094 sub convert_iscsi_path {
7095 my ($path) = @_;
7096
7097 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7098 my $portal = $1;
7099 my $target = $2;
7100 my $lun = $3;
7101
7102 my $initiator_name = get_initiator_name();
7103
7104 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7105 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7106 }
7107
7108 die "cannot convert iscsi path '$path', unkown format\n";
7109 }
7110
7111 sub qemu_img_convert {
7112 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized) = @_;
7113
7114 my $storecfg = PVE::Storage::config();
7115 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7116 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7117
7118 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7119
7120 my $cachemode;
7121 my $src_path;
7122 my $src_is_iscsi = 0;
7123 my $src_format;
7124
7125 if ($src_storeid) {
7126 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7127 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7128 $src_format = qemu_img_format($src_scfg, $src_volname);
7129 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7130 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7131 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7132 } elsif (-f $src_volid) {
7133 $src_path = $src_volid;
7134 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7135 $src_format = $1;
7136 }
7137 }
7138
7139 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7140
7141 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7142 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7143 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7144 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7145
7146 my $cmd = [];
7147 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7148 push @$cmd, '-l', "snapshot.name=$snapname"
7149 if $snapname && $src_format && $src_format eq "qcow2";
7150 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7151 push @$cmd, '-T', $cachemode if defined($cachemode);
7152
7153 if ($src_is_iscsi) {
7154 push @$cmd, '--image-opts';
7155 $src_path = convert_iscsi_path($src_path);
7156 } elsif ($src_format) {
7157 push @$cmd, '-f', $src_format;
7158 }
7159
7160 if ($dst_is_iscsi) {
7161 push @$cmd, '--target-image-opts';
7162 $dst_path = convert_iscsi_path($dst_path);
7163 } else {
7164 push @$cmd, '-O', $dst_format;
7165 }
7166
7167 push @$cmd, $src_path;
7168
7169 if (!$dst_is_iscsi && $is_zero_initialized) {
7170 push @$cmd, "zeroinit:$dst_path";
7171 } else {
7172 push @$cmd, $dst_path;
7173 }
7174
7175 my $parser = sub {
7176 my $line = shift;
7177 if($line =~ m/\((\S+)\/100\%\)/){
7178 my $percent = $1;
7179 my $transferred = int($size * $percent / 100);
7180 my $total_h = render_bytes($size, 1);
7181 my $transferred_h = render_bytes($transferred, 1);
7182
7183 print "transferred $transferred_h of $total_h ($percent%)\n";
7184 }
7185
7186 };
7187
7188 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7189 my $err = $@;
7190 die "copy failed: $err" if $err;
7191 }
7192
7193 sub qemu_img_format {
7194 my ($scfg, $volname) = @_;
7195
7196 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7197 return $1;
7198 } else {
7199 return "raw";
7200 }
7201 }
7202
7203 sub qemu_drive_mirror {
7204 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7205
7206 $jobs = {} if !$jobs;
7207
7208 my $qemu_target;
7209 my $format;
7210 $jobs->{"drive-$drive"} = {};
7211
7212 if ($dst_volid =~ /^nbd:/) {
7213 $qemu_target = $dst_volid;
7214 $format = "nbd";
7215 } else {
7216 my $storecfg = PVE::Storage::config();
7217 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7218
7219 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7220
7221 $format = qemu_img_format($dst_scfg, $dst_volname);
7222
7223 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7224
7225 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7226 }
7227
7228 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7229 $opts->{format} = $format if $format;
7230
7231 if (defined($src_bitmap)) {
7232 $opts->{sync} = 'incremental';
7233 $opts->{bitmap} = $src_bitmap;
7234 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7235 }
7236
7237 if (defined($bwlimit)) {
7238 $opts->{speed} = $bwlimit * 1024;
7239 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7240 } else {
7241 print "drive mirror is starting for drive-$drive\n";
7242 }
7243
7244 # if a job already runs for this device we get an error, catch it for cleanup
7245 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7246 if (my $err = $@) {
7247 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7248 warn "$@\n" if $@;
7249 die "mirroring error: $err\n";
7250 }
7251
7252 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7253 }
7254
7255 # $completion can be either
7256 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7257 # 'cancel': wait until all jobs are ready, block-job-cancel them
7258 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7259 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7260 sub qemu_drive_mirror_monitor {
7261 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7262
7263 $completion //= 'complete';
7264 $op //= "mirror";
7265
7266 eval {
7267 my $err_complete = 0;
7268
7269 my $starttime = time ();
7270 while (1) {
7271 die "block job ('$op') timed out\n" if $err_complete > 300;
7272
7273 my $stats = mon_cmd($vmid, "query-block-jobs");
7274 my $ctime = time();
7275
7276 my $running_jobs = {};
7277 for my $stat (@$stats) {
7278 next if $stat->{type} ne $op;
7279 $running_jobs->{$stat->{device}} = $stat;
7280 }
7281
7282 my $readycounter = 0;
7283
7284 for my $job_id (sort keys %$jobs) {
7285 my $job = $running_jobs->{$job_id};
7286
7287 my $vanished = !defined($job);
7288 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7289 if($complete || ($vanished && $completion eq 'auto')) {
7290 print "$job_id: $op-job finished\n";
7291 delete $jobs->{$job_id};
7292 next;
7293 }
7294
7295 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7296
7297 my $busy = $job->{busy};
7298 my $ready = $job->{ready};
7299 if (my $total = $job->{len}) {
7300 my $transferred = $job->{offset} || 0;
7301 my $remaining = $total - $transferred;
7302 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7303
7304 my $duration = $ctime - $starttime;
7305 my $total_h = render_bytes($total, 1);
7306 my $transferred_h = render_bytes($transferred, 1);
7307
7308 my $status = sprintf(
7309 "transferred $transferred_h of $total_h ($percent%%) in %s",
7310 render_duration($duration),
7311 );
7312
7313 if ($ready) {
7314 if ($busy) {
7315 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7316 } else {
7317 $status .= ", ready";
7318 }
7319 }
7320 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7321 $jobs->{$job_id}->{ready} = $ready;
7322 }
7323
7324 $readycounter++ if $job->{ready};
7325 }
7326
7327 last if scalar(keys %$jobs) == 0;
7328
7329 if ($readycounter == scalar(keys %$jobs)) {
7330 print "all '$op' jobs are ready\n";
7331
7332 # do the complete later (or has already been done)
7333 last if $completion eq 'skip' || $completion eq 'auto';
7334
7335 if ($vmiddst && $vmiddst != $vmid) {
7336 my $agent_running = $qga && qga_check_running($vmid);
7337 if ($agent_running) {
7338 print "freeze filesystem\n";
7339 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7340 } else {
7341 print "suspend vm\n";
7342 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7343 }
7344
7345 # if we clone a disk for a new target vm, we don't switch the disk
7346 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7347
7348 if ($agent_running) {
7349 print "unfreeze filesystem\n";
7350 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7351 } else {
7352 print "resume vm\n";
7353 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7354 }
7355
7356 last;
7357 } else {
7358
7359 for my $job_id (sort keys %$jobs) {
7360 # try to switch the disk if source and destination are on the same guest
7361 print "$job_id: Completing block job_id...\n";
7362
7363 my $op;
7364 if ($completion eq 'complete') {
7365 $op = 'block-job-complete';
7366 } elsif ($completion eq 'cancel') {
7367 $op = 'block-job-cancel';
7368 } else {
7369 die "invalid completion value: $completion\n";
7370 }
7371 eval { mon_cmd($vmid, $op, device => $job_id) };
7372 if ($@ =~ m/cannot be completed/) {
7373 print "$job_id: block job cannot be completed, trying again.\n";
7374 $err_complete++;
7375 }else {
7376 print "$job_id: Completed successfully.\n";
7377 $jobs->{$job_id}->{complete} = 1;
7378 }
7379 }
7380 }
7381 }
7382 sleep 1;
7383 }
7384 };
7385 my $err = $@;
7386
7387 if ($err) {
7388 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7389 die "block job ($op) error: $err";
7390 }
7391 }
7392
7393 sub qemu_blockjobs_cancel {
7394 my ($vmid, $jobs) = @_;
7395
7396 foreach my $job (keys %$jobs) {
7397 print "$job: Cancelling block job\n";
7398 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7399 $jobs->{$job}->{cancel} = 1;
7400 }
7401
7402 while (1) {
7403 my $stats = mon_cmd($vmid, "query-block-jobs");
7404
7405 my $running_jobs = {};
7406 foreach my $stat (@$stats) {
7407 $running_jobs->{$stat->{device}} = $stat;
7408 }
7409
7410 foreach my $job (keys %$jobs) {
7411
7412 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7413 print "$job: Done.\n";
7414 delete $jobs->{$job};
7415 }
7416 }
7417
7418 last if scalar(keys %$jobs) == 0;
7419
7420 sleep 1;
7421 }
7422 }
7423
7424 sub clone_disk {
7425 my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
7426 $newvmid, $storage, $format, $full, $newvollist, $jobs, $completion, $qga, $bwlimit, $conf) = @_;
7427
7428 my $newvolid;
7429
7430 if (!$full) {
7431 print "create linked clone of drive $drivename ($drive->{file})\n";
7432 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
7433 push @$newvollist, $newvolid;
7434 } else {
7435
7436 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
7437 $storeid = $storage if $storage;
7438
7439 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
7440
7441 print "create full clone of drive $drivename ($drive->{file})\n";
7442 my $name = undef;
7443 my $size = undef;
7444 if (drive_is_cloudinit($drive)) {
7445 $name = "vm-$newvmid-cloudinit";
7446 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7447 if ($scfg->{path}) {
7448 $name .= ".$dst_format";
7449 }
7450 $snapname = undef;
7451 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
7452 } elsif ($drivename eq 'efidisk0') {
7453 $size = get_efivars_size($conf);
7454 } elsif ($drivename eq 'tpmstate0') {
7455 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7456 } else {
7457 ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7458 }
7459 $newvolid = PVE::Storage::vdisk_alloc(
7460 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
7461 );
7462 push @$newvollist, $newvolid;
7463
7464 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
7465
7466 if (drive_is_cloudinit($drive)) {
7467 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
7468 # if this is the case, we have to complete any block-jobs still there from
7469 # previous drive-mirrors
7470 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
7471 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
7472 }
7473 goto no_data_clone;
7474 }
7475
7476 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
7477 if (!$running || $snapname) {
7478 # TODO: handle bwlimits
7479 if ($drivename eq 'efidisk0') {
7480 # the relevant data on the efidisk may be smaller than the source
7481 # e.g. on RBD/ZFS, so we use dd to copy only the amount
7482 # that is given by the OVMF_VARS.fd
7483 my $src_path = PVE::Storage::path($storecfg, $drive->{file});
7484 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
7485
7486 # better for Ceph if block size is not too small, see bug #3324
7487 my $bs = 1024*1024;
7488
7489 run_command(['qemu-img', 'dd', '-n', '-O', $dst_format, "bs=$bs", "osize=$size",
7490 "if=$src_path", "of=$dst_path"]);
7491 } else {
7492 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit);
7493 }
7494 } else {
7495
7496 die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
7497
7498 my $kvmver = get_running_qemu_version ($vmid);
7499 if (!min_version($kvmver, 2, 7)) {
7500 die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
7501 if $drive->{iothread};
7502 }
7503
7504 qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs,
7505 $completion, $qga, $bwlimit);
7506 }
7507 }
7508
7509 no_data_clone:
7510 my ($size) = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
7511
7512 my $disk = $drive;
7513 $disk->{format} = undef;
7514 $disk->{file} = $newvolid;
7515 $disk->{size} = $size if defined($size);
7516
7517 return $disk;
7518 }
7519
7520 sub get_running_qemu_version {
7521 my ($vmid) = @_;
7522 my $res = mon_cmd($vmid, "query-version");
7523 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
7524 }
7525
7526 sub qemu_use_old_bios_files {
7527 my ($machine_type) = @_;
7528
7529 return if !$machine_type;
7530
7531 my $use_old_bios_files = undef;
7532
7533 if ($machine_type =~ m/^(\S+)\.pxe$/) {
7534 $machine_type = $1;
7535 $use_old_bios_files = 1;
7536 } else {
7537 my $version = extract_version($machine_type, kvm_user_version());
7538 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
7539 # load new efi bios files on migration. So this hack is required to allow
7540 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
7541 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
7542 $use_old_bios_files = !min_version($version, 2, 4);
7543 }
7544
7545 return ($use_old_bios_files, $machine_type);
7546 }
7547
7548 sub get_efivars_size {
7549 my ($conf) = @_;
7550 my $arch = get_vm_arch($conf);
7551 my $efidisk = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
7552 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
7553 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7554 die "uefi vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
7555 return -s $ovmf_vars;
7556 }
7557
7558 sub update_efidisk_size {
7559 my ($conf) = @_;
7560
7561 return if !defined($conf->{efidisk0});
7562
7563 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
7564 $disk->{size} = get_efivars_size($conf);
7565 $conf->{efidisk0} = print_drive($disk);
7566
7567 return;
7568 }
7569
7570 sub update_tpmstate_size {
7571 my ($conf) = @_;
7572
7573 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
7574 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
7575 $conf->{tpmstate0} = print_drive($disk);
7576 }
7577
7578 sub create_efidisk($$$$$$$) {
7579 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
7580
7581 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
7582 die "EFI vars default image not found\n" if ! -f $ovmf_vars;
7583
7584 my $vars_size_b = -s $ovmf_vars;
7585 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
7586 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
7587 PVE::Storage::activate_volumes($storecfg, [$volid]);
7588
7589 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
7590 my ($size) = PVE::Storage::volume_size_info($storecfg, $volid, 3);
7591
7592 return ($volid, $size/1024);
7593 }
7594
7595 sub vm_iothreads_list {
7596 my ($vmid) = @_;
7597
7598 my $res = mon_cmd($vmid, 'query-iothreads');
7599
7600 my $iothreads = {};
7601 foreach my $iothread (@$res) {
7602 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
7603 }
7604
7605 return $iothreads;
7606 }
7607
7608 sub scsihw_infos {
7609 my ($conf, $drive) = @_;
7610
7611 my $maxdev = 0;
7612
7613 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
7614 $maxdev = 7;
7615 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
7616 $maxdev = 1;
7617 } else {
7618 $maxdev = 256;
7619 }
7620
7621 my $controller = int($drive->{index} / $maxdev);
7622 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
7623 ? "virtioscsi"
7624 : "scsihw";
7625
7626 return ($maxdev, $controller, $controller_prefix);
7627 }
7628
7629 sub windows_version {
7630 my ($ostype) = @_;
7631
7632 return 0 if !$ostype;
7633
7634 my $winversion = 0;
7635
7636 if($ostype eq 'wxp' || $ostype eq 'w2k3' || $ostype eq 'w2k') {
7637 $winversion = 5;
7638 } elsif($ostype eq 'w2k8' || $ostype eq 'wvista') {
7639 $winversion = 6;
7640 } elsif ($ostype =~ m/^win(\d+)$/) {
7641 $winversion = $1;
7642 }
7643
7644 return $winversion;
7645 }
7646
7647 sub resolve_dst_disk_format {
7648 my ($storecfg, $storeid, $src_volname, $format) = @_;
7649 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
7650
7651 if (!$format) {
7652 # if no target format is specified, use the source disk format as hint
7653 if ($src_volname) {
7654 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7655 $format = qemu_img_format($scfg, $src_volname);
7656 } else {
7657 return $defFormat;
7658 }
7659 }
7660
7661 # test if requested format is supported - else use default
7662 my $supported = grep { $_ eq $format } @$validFormats;
7663 $format = $defFormat if !$supported;
7664 return $format;
7665 }
7666
7667 # NOTE: if this logic changes, please update docs & possibly gui logic
7668 sub find_vmstate_storage {
7669 my ($conf, $storecfg) = @_;
7670
7671 # first, return storage from conf if set
7672 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
7673
7674 my ($target, $shared, $local);
7675
7676 foreach_storage_used_by_vm($conf, sub {
7677 my ($sid) = @_;
7678 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
7679 my $dst = $scfg->{shared} ? \$shared : \$local;
7680 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
7681 });
7682
7683 # second, use shared storage where VM has at least one disk
7684 # third, use local storage where VM has at least one disk
7685 # fall back to local storage
7686 $target = $shared // $local // 'local';
7687
7688 return $target;
7689 }
7690
7691 sub generate_uuid {
7692 my ($uuid, $uuid_str);
7693 UUID::generate($uuid);
7694 UUID::unparse($uuid, $uuid_str);
7695 return $uuid_str;
7696 }
7697
7698 sub generate_smbios1_uuid {
7699 return "uuid=".generate_uuid();
7700 }
7701
7702 sub nbd_stop {
7703 my ($vmid) = @_;
7704
7705 mon_cmd($vmid, 'nbd-server-stop');
7706 }
7707
7708 sub create_reboot_request {
7709 my ($vmid) = @_;
7710 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
7711 or die "failed to create reboot trigger file: $!\n";
7712 close($fh);
7713 }
7714
7715 sub clear_reboot_request {
7716 my ($vmid) = @_;
7717 my $path = "/run/qemu-server/$vmid.reboot";
7718 my $res = 0;
7719
7720 $res = unlink($path);
7721 die "could not remove reboot request for $vmid: $!"
7722 if !$res && $! != POSIX::ENOENT;
7723
7724 return $res;
7725 }
7726
7727 sub bootorder_from_legacy {
7728 my ($conf, $bootcfg) = @_;
7729
7730 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
7731 my $bootindex_hash = {};
7732 my $i = 1;
7733 foreach my $o (split(//, $boot)) {
7734 $bootindex_hash->{$o} = $i*100;
7735 $i++;
7736 }
7737
7738 my $bootorder = {};
7739
7740 PVE::QemuConfig->foreach_volume($conf, sub {
7741 my ($ds, $drive) = @_;
7742
7743 if (drive_is_cdrom ($drive, 1)) {
7744 if ($bootindex_hash->{d}) {
7745 $bootorder->{$ds} = $bootindex_hash->{d};
7746 $bootindex_hash->{d} += 1;
7747 }
7748 } elsif ($bootindex_hash->{c}) {
7749 $bootorder->{$ds} = $bootindex_hash->{c}
7750 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
7751 $bootindex_hash->{c} += 1;
7752 }
7753 });
7754
7755 if ($bootindex_hash->{n}) {
7756 for (my $i = 0; $i < $MAX_NETS; $i++) {
7757 my $netname = "net$i";
7758 next if !$conf->{$netname};
7759 $bootorder->{$netname} = $bootindex_hash->{n};
7760 $bootindex_hash->{n} += 1;
7761 }
7762 }
7763
7764 return $bootorder;
7765 }
7766
7767 # Generate default device list for 'boot: order=' property. Matches legacy
7768 # default boot order, but with explicit device names. This is important, since
7769 # the fallback for when neither 'order' nor the old format is specified relies
7770 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
7771 sub get_default_bootdevices {
7772 my ($conf) = @_;
7773
7774 my @ret = ();
7775
7776 # harddisk
7777 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
7778 push @ret, $first if $first;
7779
7780 # cdrom
7781 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
7782 push @ret, $first if $first;
7783
7784 # network
7785 for (my $i = 0; $i < $MAX_NETS; $i++) {
7786 my $netname = "net$i";
7787 next if !$conf->{$netname};
7788 push @ret, $netname;
7789 last;
7790 }
7791
7792 return \@ret;
7793 }
7794
7795 sub device_bootorder {
7796 my ($conf) = @_;
7797
7798 return bootorder_from_legacy($conf) if !defined($conf->{boot});
7799
7800 my $boot = parse_property_string($boot_fmt, $conf->{boot});
7801
7802 my $bootorder = {};
7803 if (!defined($boot) || $boot->{legacy}) {
7804 $bootorder = bootorder_from_legacy($conf, $boot);
7805 } elsif ($boot->{order}) {
7806 my $i = 100; # start at 100 to allow user to insert devices before us with -args
7807 for my $dev (PVE::Tools::split_list($boot->{order})) {
7808 $bootorder->{$dev} = $i++;
7809 }
7810 }
7811
7812 return $bootorder;
7813 }
7814
7815 sub register_qmeventd_handle {
7816 my ($vmid) = @_;
7817
7818 my $fh;
7819 my $peer = "/var/run/qmeventd.sock";
7820 my $count = 0;
7821
7822 for (;;) {
7823 $count++;
7824 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
7825 last if $fh;
7826 if ($! != EINTR && $! != EAGAIN) {
7827 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
7828 }
7829 if ($count > 4) {
7830 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
7831 . "after $count retries\n";
7832 }
7833 usleep(25000);
7834 }
7835
7836 # send handshake to mark VM as backing up
7837 print $fh to_json({vzdump => {vmid => "$vmid"}});
7838
7839 # return handle to be closed later when inhibit is no longer required
7840 return $fh;
7841 }
7842
7843 # bash completion helper
7844
7845 sub complete_backup_archives {
7846 my ($cmdname, $pname, $cvalue) = @_;
7847
7848 my $cfg = PVE::Storage::config();
7849
7850 my $storeid;
7851
7852 if ($cvalue =~ m/^([^:]+):/) {
7853 $storeid = $1;
7854 }
7855
7856 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
7857
7858 my $res = [];
7859 foreach my $id (keys %$data) {
7860 foreach my $item (@{$data->{$id}}) {
7861 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
7862 push @$res, $item->{volid} if defined($item->{volid});
7863 }
7864 }
7865
7866 return $res;
7867 }
7868
7869 my $complete_vmid_full = sub {
7870 my ($running) = @_;
7871
7872 my $idlist = vmstatus();
7873
7874 my $res = [];
7875
7876 foreach my $id (keys %$idlist) {
7877 my $d = $idlist->{$id};
7878 if (defined($running)) {
7879 next if $d->{template};
7880 next if $running && $d->{status} ne 'running';
7881 next if !$running && $d->{status} eq 'running';
7882 }
7883 push @$res, $id;
7884
7885 }
7886 return $res;
7887 };
7888
7889 sub complete_vmid {
7890 return &$complete_vmid_full();
7891 }
7892
7893 sub complete_vmid_stopped {
7894 return &$complete_vmid_full(0);
7895 }
7896
7897 sub complete_vmid_running {
7898 return &$complete_vmid_full(1);
7899 }
7900
7901 sub complete_storage {
7902
7903 my $cfg = PVE::Storage::config();
7904 my $ids = $cfg->{ids};
7905
7906 my $res = [];
7907 foreach my $sid (keys %$ids) {
7908 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
7909 next if !$ids->{$sid}->{content}->{images};
7910 push @$res, $sid;
7911 }
7912
7913 return $res;
7914 }
7915
7916 sub complete_migration_storage {
7917 my ($cmd, $param, $current_value, $all_args) = @_;
7918
7919 my $targetnode = @$all_args[1];
7920
7921 my $cfg = PVE::Storage::config();
7922 my $ids = $cfg->{ids};
7923
7924 my $res = [];
7925 foreach my $sid (keys %$ids) {
7926 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
7927 next if !$ids->{$sid}->{content}->{images};
7928 push @$res, $sid;
7929 }
7930
7931 return $res;
7932 }
7933
7934 sub vm_is_paused {
7935 my ($vmid) = @_;
7936 my $qmpstatus = eval {
7937 PVE::QemuConfig::assert_config_exists_on_node($vmid);
7938 mon_cmd($vmid, "query-status");
7939 };
7940 warn "$@\n" if $@;
7941 return $qmpstatus && $qmpstatus->{status} eq "paused";
7942 }
7943
7944 sub check_volume_storage_type {
7945 my ($storecfg, $vol) = @_;
7946
7947 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
7948 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7949 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
7950
7951 die "storage '$storeid' does not support content-type '$vtype'\n"
7952 if !$scfg->{content}->{$vtype};
7953
7954 return 1;
7955 }
7956
7957 1;