]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
qemuserver: foreach_volid: include pending volumes
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
baa4f62d 1package PVE::QemuServer;
1e3baf05
DM
2
3use strict;
990fc5e2 4use warnings;
3ff84d6f 5
5da072fb
TL
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
1e3baf05 10use File::Basename;
5da072fb 11use File::Copy qw(copy);
1e3baf05
DM
12use File::Path;
13use File::stat;
14use Getopt::Long;
5da072fb
TL
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
1e3baf05 20use IPC::Open3;
c971c4f2 21use JSON;
c3d15108 22use List::Util qw(first);
1f30ac3a 23use MIME::Base64;
5da072fb
TL
24use POSIX;
25use Storable qw(dclone);
f85951dc 26use Time::HiRes qw(gettimeofday usleep);
5da072fb 27use URI::Escape;
425441e6 28use UUID;
5da072fb 29
82841214 30use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
5b65b00d 31use PVE::CGroup;
83870398 32use PVE::CpuSet;
48cf040f 33use PVE::DataCenterConfig;
5da072fb 34use PVE::Exception qw(raise raise_param_exc);
3b56383b 35use PVE::Format qw(render_duration render_bytes);
81d6e4e1 36use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
a52eb3c4
DC
37use PVE::Mapping::PCI;
38use PVE::Mapping::USB;
1e3baf05 39use PVE::INotify;
4df98f2f 40use PVE::JSONSchema qw(get_standard_option parse_property_string);
1e3baf05 41use PVE::ProcFSTools;
fbec3f89 42use PVE::PBSClient;
34e82fa2 43use PVE::RESTEnvironment qw(log_warn);
91bd6c90 44use PVE::RPCEnvironment;
5da072fb 45use PVE::Storage;
b71351a7 46use PVE::SysFSTools;
d04d6af1 47use PVE::Systemd;
82841214 48use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
5da072fb
TL
49
50use PVE::QMPClient;
51use PVE::QemuConfig;
238af88e 52use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
5da072fb 53use PVE::QemuServer::Cloudinit;
5b65b00d 54use PVE::QemuServer::CGroup;
d786a274 55use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
75748d44 56use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
2ea5fb7e 57use PVE::QemuServer::Machine;
5da072fb 58use PVE::QemuServer::Memory;
0a13e08e 59use PVE::QemuServer::Monitor qw(mon_cmd);
74c17b7a 60use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
0cf8d56c 61use PVE::QemuServer::USB;
1e3baf05 62
28e129cc
AD
63my $have_sdn;
64eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67};
68
102cf9d8 69my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
96ed3574 70my $OVMF = {
b5099b4f 71 x86_64 => {
90b20b15
DC
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
b5099b4f
SR
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
96ed3574 99};
2ddc0a5c 100
7f0b5beb 101my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1e3baf05 102
8d88a594
TL
103# Note about locking: we use flock on the config file protect against concurent actions.
104# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106# But you can ignore this kind of lock with the --skiplock flag.
1e3baf05 107
cf364f95
TL
108cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112);
1e3baf05 113
3ea94c60
DM
114PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119});
120
c6737ef1 121PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
7bd9abd2 122 description => "Specifies the QEMU machine type.",
c6737ef1 123 type => 'string',
9471e48b 124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
c6737ef1
DC
125 maxLength => 40,
126 optional => 1,
127});
128
1a67f999 129# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
38277afc
TL
130my $nodename_cache;
131sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134}
1e3baf05 135
ec3582b5
WB
136my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151};
152PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
9d66b397
SI
154my $agent_fmt = {
155 enabled => {
7bd9abd2 156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
9d66b397
SI
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
0a4aff09 162 description => "Run fstrim after moving a disk or migrating the VM.",
9d66b397
SI
163 type => 'boolean',
164 optional => 1,
93e21fd2
CH
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
9d66b397 172 },
48657158
MD
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
9d66b397
SI
180};
181
55655ebc
DC
182my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
6f070e39 189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
55655ebc
DC
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198};
199
6dbcb073
DC
200my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213};
214
1448547f
AL
215my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
211785ee 223 enum => ['spice', 'none'],
1448547f
AL
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228};
229
c4df18db
AL
230my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
d282a24d 234 default => '0',
c4df18db
AL
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
d282a24d 240 default => 'off',
c4df18db
AL
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244};
245
2cf61f33
SR
246my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
8d88a594
TL
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
2cf61f33
SR
257 },
258 max_bytes => {
259 type => 'integer',
8d88a594
TL
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
2cf61f33
SR
263 optional => 1,
264
8d88a594
TL
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
2cf61f33
SR
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
8d88a594
TL
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
2cf61f33
SR
274 optional => 1,
275 default => 1000,
276 },
277};
278
26b443c8
TL
279my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
af2a1a1c
TL
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
26b443c8
TL
292};
293
1e3baf05
DM
294my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
2ff09f52 307 hotplug => {
483ceeab
TL
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
94ec5e7c 311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
c60cad61
DC
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
b3c2bdd1 315 default => 'network,disk,usb',
2ff09f52 316 },
1e3baf05
DM
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
159719e5 327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
1e3baf05
DM
328 },
329 cpulimit => {
330 optional => 1,
c6f773b8 331 type => 'number',
52261945 332 description => "Limit of CPU usage.",
8d88a594
TL
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
1e3baf05 335 minimum => 0,
c6f773b8 336 maximum => 128,
483ceeab 337 default => 0,
1e3baf05
DM
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
483ceeab 342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
67498860
TL
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
e65e35ca 346 minimum => 1,
237239bf 347 maximum => 262144,
67498860 348 default => 'cgroup v1: 1024, cgroup v2: 100',
1e3baf05
DM
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
252e2624 353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
8d88a594 354 ." you use the balloon device.",
1e3baf05
DM
355 minimum => 16,
356 default => 512,
357 },
13a48620 358 balloon => {
483ceeab
TL
359 optional => 1,
360 type => 'integer',
252e2624 361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
8b1accf7
DM
362 minimum => 0,
363 },
364 shares => {
483ceeab
TL
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
8d88a594
TL
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
8b1accf7
DM
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
13a48620 373 },
1e3baf05
DM
374 keyboard => {
375 optional => 1,
376 type => 'string',
233fb336
DW
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
e95fe75f 379 enum => PVE::Tools::kvmkeymaplist(),
aea47dd6 380 default => undef,
1e3baf05
DM
381 },
382 name => {
383 optional => 1,
7fabe17d 384 type => 'string', format => 'dns-name',
1e3baf05
DM
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
cdd20088
AD
387 scsihw => {
388 optional => 1,
389 type => 'string',
52261945 390 description => "SCSI controller model",
6731a4cf 391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
cdd20088
AD
392 default => 'lsi',
393 },
1e3baf05
DM
394 description => {
395 optional => 1,
396 type => 'string',
a200af10
TL
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
1e3baf05
DM
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
483ceeab 404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
52261945
DM
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407Specify guest operating system. This is used to enable special
408optimization/features for specific operating systems:
409
410[horizontal]
411other;; unspecified OS
412wxp;; Microsoft Windows XP
413w2k;; Microsoft Windows 2000
414w2k3;; Microsoft Windows 2003
415w2k8;; Microsoft Windows 2008
416wvista;; Microsoft Windows Vista
417win7;; Microsoft Windows 7
44c2a647 418win8;; Microsoft Windows 8/2012/2012r2
1f5828f2 419win10;; Microsoft Windows 10/2016/2019
179b9f1b 420win11;; Microsoft Windows 11/2022
52261945 421l24;; Linux 2.4 Kernel
ea0bc514 422l26;; Linux 2.6 - 6.X Kernel
52261945 423solaris;; Solaris/OpenSolaris/OpenIndiania kernel
1e3baf05
DM
424EODESC
425 },
426 boot => {
427 optional => 1,
2141a802 428 type => 'string', format => 'pve-qm-boot',
483ceeab
TL
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
1e3baf05
DM
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
2141a802 435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
03e480fc 436 pattern => '(ide|sata|scsi|virtio)\d+',
1e3baf05
DM
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
8a010eae
AD
459 numa => {
460 optional => 1,
461 type => 'boolean',
1917695c 462 description => "Enable/disable NUMA.",
8a010eae
AD
463 default => 0,
464 },
7023f3ea
AD
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
f36e9894
SR
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
4df98f2f
TL
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
f36e9894 477 },
de9d1e55 478 vcpus => {
3bd18e48
AD
479 optional => 1,
480 type => 'integer',
de9d1e55 481 description => "Number of hotplugged vcpus.",
3bd18e48 482 minimum => 1,
de9d1e55 483 default => 0,
3bd18e48 484 },
1e3baf05
DM
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
bc84dcca 491 agent => {
ab6a046f 492 optional => 1,
7bd9abd2 493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
9d66b397
SI
494 type => 'string',
495 format => $agent_fmt,
ab6a046f 496 },
1e3baf05
DM
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
8c559505
DM
506 description => "Enable/disable time drift fix.",
507 default => 0,
1e3baf05 508 },
19672434 509 localtime => {
1e3baf05
DM
510 optional => 1,
511 type => 'boolean',
8d88a594
TL
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
1e3baf05
DM
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
55655ebc
DC
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
4df98f2f
TL
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
1e3baf05 531 },
0ea9541d
DM
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
52261945 535 description => "Create a virtual hardware watchdog device.",
4df98f2f
TL
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
0ea9541d 539 },
1e3baf05
DM
540 startdate => {
541 optional => 1,
19672434 542 type => 'string',
1e3baf05 543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
4df98f2f
TL
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
1e3baf05
DM
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
43574f73 549 startup => get_standard_option('pve-startup-order'),
68eda3ab
AD
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
1e3baf05
DM
556 args => {
557 optional => 1,
558 type => 'string',
52261945
DM
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
c7a8aad6 561Arbitrary arguments passed to kvm, for example:
1e3baf05 562
bda7ccb1 563args: -no-reboot -smbios 'type=0,vendor=FOO'
c7a8aad6
FG
564
565NOTE: this option is for experts only.
1e3baf05
DM
566EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
52261945 572 description => "Enable/disable the USB tablet device.",
4df98f2f
TL
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
1e3baf05
DM
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
04432191 588 type => 'number',
1e3baf05
DM
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
04432191 591 default => 0.1,
1e3baf05
DM
592 },
593 cdrom => {
594 optional => 1,
b799312f 595 type => 'string', format => 'pve-qm-ide',
8485b9ba 596 typetext => '<volume>',
1e3baf05
DM
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
5d008ad3 603 format => 'pve-vm-cpu-conf',
1e3baf05 604 },
b7ba6b79
DM
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
982c7f12
DM
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
18bfb361
DM
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
4df98f2f
TL
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
18bfb361 620 },
253624c7
FG
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
c6737ef1 625 runningmachine => get_standard_option('pve-qemu-machine', {
4df98f2f
TL
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
c6737ef1 628 }),
ea1c2110 629 runningcpu => {
4df98f2f
TL
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
ea1c2110
SR
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
c6737ef1 637 machine => get_standard_option('pve-qemu-machine'),
d731ecbe
WB
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
2796e7d5
DM
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
5d004b00 647 maxLength => 512,
2796e7d5
DM
648 optional => 1,
649 },
cb0e4540
AG
650 protection => {
651 optional => 1,
652 type => 'boolean',
4df98f2f
TL
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
cb0e4540
AG
655 default => 0,
656 },
3edb45e7 657 bios => {
a783c78e 658 optional => 1,
3edb45e7
DM
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
a783c78e 663 },
6ee499ff
DC
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
4df98f2f
TL
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
f7ed64e7 678 default => "1 (autogenerated)",
6ee499ff
DC
679 optional => 1,
680 },
9e784b11
DC
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
6dbcb073
DC
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
4df98f2f
TL
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
6dbcb073 692 optional => 1,
2e7b5925
AL
693 },
694 audio0 => {
695 type => 'string',
1448547f 696 format => $audio_fmt,
194b65f1 697 description => "Configure a audio device, useful in combination with QXL/Spice.",
2e7b5925
AL
698 optional => 1
699 },
c4df18db
AL
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
b8e7068a
DC
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
2cf61f33
SR
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
26b443c8
TL
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
83870398
DB
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
326704e7 725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
83870398
DB
726 optional => 1,
727 },
9ed7a77c
WB
728};
729
cb702ebe
DL
730my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
4df98f2f
TL
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
cb702ebe
DL
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
eba285f5 742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
cb702ebe
DL
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
eba285f5 749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
cb702ebe
DL
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
101beafe 753 vendor => {
eba285f5
TL
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
101beafe 759 },
cb702ebe
DL
760};
761PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
9ed7a77c 763my $confdesc_cloudinit = {
41cd94a0
WB
764 citype => {
765 optional => 1,
766 type => 'string',
4df98f2f
TL
767 description => 'Specifies the cloud-init configuration format. The default depends on the'
768 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
769 .' and `configdrive2` for windows.',
545eec65 770 enum => ['configdrive2', 'nocloud', 'opennebula'],
41cd94a0 771 },
7b42f951
WB
772 ciuser => {
773 optional => 1,
774 type => 'string',
4df98f2f
TL
775 description => "cloud-init: User name to change ssh keys and password for instead of the"
776 ." image's configured default user.",
7b42f951
WB
777 },
778 cipassword => {
779 optional => 1,
780 type => 'string',
4df98f2f
TL
781 description => 'cloud-init: Password to assign the user. Using this is generally not'
782 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
783 .' support hashed passwords.',
7b42f951 784 },
efa3355d
LN
785 ciupgrade => {
786 optional => 1,
787 type => 'boolean',
178c355d 788 description => 'cloud-init: do an automatic package upgrade after the first boot.',
ec11b92a 789 default => 1,
efa3355d 790 },
cb702ebe
DL
791 cicustom => {
792 optional => 1,
793 type => 'string',
4df98f2f
TL
794 description => 'cloud-init: Specify custom files to replace the automatically generated'
795 .' ones at start.',
cb702ebe
DL
796 format => 'pve-qm-cicustom',
797 },
0c9a7596
AD
798 searchdomain => {
799 optional => 1,
800 type => 'string',
bd49ecb4 801 description => 'cloud-init: Sets DNS search domains for a container. Create will'
4df98f2f 802 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 803 .' are set.',
0c9a7596
AD
804 },
805 nameserver => {
806 optional => 1,
807 type => 'string', format => 'address-list',
bd49ecb4 808 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
4df98f2f 809 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 810 .' are set.',
0c9a7596
AD
811 },
812 sshkeys => {
813 optional => 1,
814 type => 'string',
815 format => 'urlencoded',
1d1c4e1c 816 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
0c9a7596 817 },
1e3baf05
DM
818};
819
820# what about other qemu settings ?
821#cpu => 'string',
822#machine => 'string',
823#fda => 'file',
824#fdb => 'file',
825#mtdblock => 'file',
826#sd => 'file',
827#pflash => 'file',
828#snapshot => 'bool',
829#bootp => 'file',
830##tftp => 'dir',
831##smb => 'dir',
832#kernel => 'file',
833#append => 'string',
834#initrd => 'file',
835##soundhw => 'string',
836
837while (my ($k, $v) = each %$confdesc) {
838 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
839}
840
5bdcf937 841my $MAX_NETS = 32;
bae179aa 842my $MAX_SERIAL_PORTS = 4;
1989a89c 843my $MAX_PARALLEL_PORTS = 3;
2ed5d572
AD
844my $MAX_NUMA = 8;
845
ffc0d8c7
WB
846my $numa_fmt = {
847 cpus => {
848 type => "string",
849 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 850 description => "CPUs accessing this NUMA node.",
ffc0d8c7
WB
851 format_description => "id[-id];...",
852 },
853 memory => {
854 type => "number",
52261945 855 description => "Amount of memory this NUMA node provides.",
ffc0d8c7
WB
856 optional => 1,
857 },
858 hostnodes => {
859 type => "string",
860 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 861 description => "Host NUMA nodes to use.",
ffc0d8c7
WB
862 format_description => "id[-id];...",
863 optional => 1,
864 },
865 policy => {
866 type => 'string',
867 enum => [qw(preferred bind interleave)],
52261945 868 description => "NUMA allocation policy.",
ffc0d8c7
WB
869 optional => 1,
870 },
871};
872PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
2ed5d572
AD
873my $numadesc = {
874 optional => 1,
ffc0d8c7 875 type => 'string', format => $numa_fmt,
52261945 876 description => "NUMA topology.",
2ed5d572
AD
877};
878PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
879
880for (my $i = 0; $i < $MAX_NUMA; $i++) {
881 $confdesc->{"numa$i"} = $numadesc;
882}
1e3baf05 883
f7bc17ca
TL
884my $nic_model_list = [
885 'e1000',
886 'e1000-82540em',
887 'e1000-82544gc',
888 'e1000-82545em',
e83dd50a 889 'e1000e',
f7bc17ca
TL
890 'i82551',
891 'i82557b',
892 'i82559er',
893 'ne2k_isa',
894 'ne2k_pci',
895 'pcnet',
896 'rtl8139',
897 'virtio',
898 'vmxnet3',
899];
6b64503e 900my $nic_model_list_txt = join(' ', sort @$nic_model_list);
1e3baf05 901
52261945
DM
902my $net_fmt_bridge_descr = <<__EOD__;
903Bridge to attach the network device to. The Proxmox VE standard bridge
904is called 'vmbr0'.
905
906If you do not specify a bridge, we create a kvm user (NATed) network
907device, which provides DHCP and DNS services. The following addresses
908are used:
909
910 10.0.2.2 Gateway
911 10.0.2.3 DNS Server
912 10.0.2.4 SMB Server
913
914The DHCP server assign addresses to the guest starting from 10.0.2.15.
915__EOD__
916
cd9c34d1 917my $net_fmt = {
399d96db 918 macaddr => get_standard_option('mac-addr', {
4df98f2f
TL
919 description => "MAC address. That address must be unique withing your network. This is"
920 ." automatically generated if not specified.",
399d96db 921 }),
7f694a71
DM
922 model => {
923 type => 'string',
4df98f2f
TL
924 description => "Network Card Model. The 'virtio' model provides the best performance with"
925 ." very low CPU overhead. If your guest does not support this driver, it is usually"
926 ." best to use 'e1000'.",
7f694a71
DM
927 enum => $nic_model_list,
928 default_key => 1,
929 },
930 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
dbdcc5cd 931 bridge => get_standard_option('pve-bridge-id', {
52261945 932 description => $net_fmt_bridge_descr,
cd9c34d1 933 optional => 1,
dbdcc5cd 934 }),
cd9c34d1
WB
935 queues => {
936 type => 'integer',
66222265 937 minimum => 0, maximum => 64,
cd9c34d1 938 description => 'Number of packet queues to be used on the device.',
cd9c34d1
WB
939 optional => 1,
940 },
941 rate => {
942 type => 'number',
943 minimum => 0,
52261945 944 description => "Rate limit in mbps (megabytes per second) as floating point number.",
cd9c34d1
WB
945 optional => 1,
946 },
947 tag => {
948 type => 'integer',
9f41a659 949 minimum => 1, maximum => 4094,
cd9c34d1 950 description => 'VLAN tag to apply to packets on this interface.',
cd9c34d1
WB
951 optional => 1,
952 },
953 trunks => {
954 type => 'string',
955 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
956 description => 'VLAN trunks to pass through this interface.',
7f694a71 957 format_description => 'vlanid[;vlanid...]',
cd9c34d1
WB
958 optional => 1,
959 },
960 firewall => {
961 type => 'boolean',
962 description => 'Whether this interface should be protected by the firewall.',
cd9c34d1
WB
963 optional => 1,
964 },
965 link_down => {
966 type => 'boolean',
52261945 967 description => 'Whether this interface should be disconnected (like pulling the plug).',
cd9c34d1
WB
968 optional => 1,
969 },
61a14cde
AD
970 mtu => {
971 type => 'integer',
972 minimum => 1, maximum => 65520,
0530177b 973 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
61a14cde
AD
974 optional => 1,
975 },
cd9c34d1 976};
52261945 977
1e3baf05
DM
978my $netdesc = {
979 optional => 1,
7f694a71 980 type => 'string', format => $net_fmt,
52261945 981 description => "Specify network devices.",
1e3baf05 982};
52261945 983
1e3baf05
DM
984PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
985
0c9a7596
AD
986my $ipconfig_fmt = {
987 ip => {
988 type => 'string',
989 format => 'pve-ipv4-config',
990 format_description => 'IPv4Format/CIDR',
991 description => 'IPv4 address in CIDR format.',
992 optional => 1,
993 default => 'dhcp',
994 },
995 gw => {
996 type => 'string',
997 format => 'ipv4',
998 format_description => 'GatewayIPv4',
999 description => 'Default gateway for IPv4 traffic.',
1000 optional => 1,
1001 requires => 'ip',
1002 },
1003 ip6 => {
1004 type => 'string',
1005 format => 'pve-ipv6-config',
1006 format_description => 'IPv6Format/CIDR',
1007 description => 'IPv6 address in CIDR format.',
1008 optional => 1,
1009 default => 'dhcp',
1010 },
1011 gw6 => {
1012 type => 'string',
1013 format => 'ipv6',
1014 format_description => 'GatewayIPv6',
1015 description => 'Default gateway for IPv6 traffic.',
1016 optional => 1,
1017 requires => 'ip6',
1018 },
1019};
1020PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1021my $ipconfigdesc = {
1022 optional => 1,
1023 type => 'string', format => 'pve-qm-ipconfig',
1024 description => <<'EODESCR',
1025cloud-init: Specify IP addresses and gateways for the corresponding interface.
1026
1027IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1028
4df98f2f
TL
1029The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1030gateway should be provided.
988be8d0
ML
1031For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1032cloud-init 19.4 or newer.
0c9a7596 1033
4df98f2f
TL
1034If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1035dhcp on IPv4.
0c9a7596
AD
1036EODESCR
1037};
1038PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1039
1e3baf05
DM
1040for (my $i = 0; $i < $MAX_NETS; $i++) {
1041 $confdesc->{"net$i"} = $netdesc;
9ed7a77c
WB
1042 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1043}
1044
1045foreach my $key (keys %$confdesc_cloudinit) {
1046 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1e3baf05
DM
1047}
1048
83870398
DB
1049PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1050sub pve_verify_cpuset {
1051 my ($set_text, $noerr) = @_;
1052
1053 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1054
1055 if ($@) {
1056 return if $noerr;
1057 die "unable to parse cpuset option\n";
1058 }
1059
1060 return PVE::CpuSet->new($members)->short_string();
1061}
1062
ffa42b86
DC
1063PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1064sub verify_volume_id_or_qm_path {
822c8a07
WB
1065 my ($volid, $noerr) = @_;
1066
6e55f579
FE
1067 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1068
1069 return verify_volume_id_or_absolute_path($volid, $noerr);
1070}
1071
1072PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1073sub verify_volume_id_or_absolute_path {
1074 my ($volid, $noerr) = @_;
1075
1076 return $volid if $volid =~ m|^/|;
ffa42b86 1077
822c8a07
WB
1078 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1079 if ($@) {
d1c1af4b 1080 return if $noerr;
822c8a07
WB
1081 die $@;
1082 }
1083 return $volid;
1084}
1085
bae179aa
DA
1086my $serialdesc = {
1087 optional => 1,
ca0cef26 1088 type => 'string',
1b0b51ed 1089 pattern => '(/dev/.+|socket)',
52261945
DM
1090 description => "Create a serial device inside the VM (n is 0 to 3)",
1091 verbose_description => <<EODESCR,
52261945
DM
1092Create a serial device inside the VM (n is 0 to 3), and pass through a
1093host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1094host side (use 'qm terminal' to open a terminal connection).
bae179aa 1095
4df98f2f
TL
1096NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1097use with special care.
bae179aa 1098
52261945 1099CAUTION: Experimental! User reported problems with this option.
bae179aa
DA
1100EODESCR
1101};
bae179aa 1102
1989a89c
DA
1103my $paralleldesc= {
1104 optional => 1,
ca0cef26 1105 type => 'string',
9ecc8431 1106 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
52261945
DM
1107 description => "Map host parallel devices (n is 0 to 2).",
1108 verbose_description => <<EODESCR,
19672434 1109Map host parallel devices (n is 0 to 2).
1989a89c 1110
4df98f2f
TL
1111NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1112machines - use with special care.
1989a89c 1113
52261945 1114CAUTION: Experimental! User reported problems with this option.
1989a89c
DA
1115EODESCR
1116};
1989a89c
DA
1117
1118for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1119 $confdesc->{"parallel$i"} = $paralleldesc;
1120}
1121
bae179aa
DA
1122for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1123 $confdesc->{"serial$i"} = $serialdesc;
1124}
1125
74c17b7a
SR
1126for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1127 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
040b06b7 1128}
1e3baf05 1129
e0fd2b2f
FE
1130for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1131 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
cdb0931f
DA
1132}
1133
0cf8d56c
DC
1134for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1135 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1e3baf05
DM
1136}
1137
5cfa9f5f
SR
1138my $boot_fmt = {
1139 legacy => {
1140 optional => 1,
1141 default_key => 1,
1142 type => 'string',
1143 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1144 . " Deprecated, use 'order=' instead.",
1145 pattern => '[acdn]{1,4}',
1146 format_description => "[acdn]{1,4}",
1147
1148 # note: this is also the fallback if boot: is not given at all
1149 default => 'cdn',
1150 },
1151 order => {
1152 optional => 1,
1153 type => 'string',
1154 format => 'pve-qm-bootdev-list',
1155 format_description => "device[;device...]",
1156 description => <<EODESC,
1157The guest will attempt to boot from devices in the order they appear here.
1158
1159Disks, optical drives and passed-through storage USB devices will be directly
1160booted from, NICs will load PXE, and PCIe devices will either behave like disks
1161(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1162
1163Note that only devices in this list will be marked as bootable and thus loaded
1164by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1165(e.g. software-raid), you need to specify all of them here.
1166
1167Overrides the deprecated 'legacy=[acdn]*' value when given.
1168EODESC
1169 },
1170};
1171PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1172
1173PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1174sub verify_bootdev {
1175 my ($dev, $noerr) = @_;
1176
f9dde219
SR
1177 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1178 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
5cfa9f5f
SR
1179
1180 my $check = sub {
1181 my ($base) = @_;
1182 return 0 if $dev !~ m/^$base\d+$/;
1183 return 0 if !$confdesc->{$dev};
1184 return 1;
1185 };
1186
1187 return $dev if $check->("net");
1188 return $dev if $check->("usb");
1189 return $dev if $check->("hostpci");
1190
d1c1af4b 1191 return if $noerr;
5cfa9f5f
SR
1192 die "invalid boot device '$dev'\n";
1193}
1194
1195sub print_bootorder {
1196 my ($devs) = @_;
4c27b18c 1197 return "" if !@$devs;
5cfa9f5f
SR
1198 my $data = { order => join(';', @$devs) };
1199 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1200}
1201
1e3baf05
DM
1202my $kvm_api_version = 0;
1203
1204sub kvm_version {
1e3baf05
DM
1205 return $kvm_api_version if $kvm_api_version;
1206
808a65b5 1207 open my $fh, '<', '/dev/kvm' or return;
1e3baf05 1208
646f2df4
WB
1209 # 0xae00 => KVM_GET_API_VERSION
1210 $kvm_api_version = ioctl($fh, 0xae00, 0);
808a65b5 1211 close($fh);
1e3baf05 1212
646f2df4 1213 return $kvm_api_version;
1e3baf05
DM
1214}
1215
1476b99f
DC
1216my $kvm_user_version = {};
1217my $kvm_mtime = {};
1e3baf05
DM
1218
1219sub kvm_user_version {
1476b99f 1220 my ($binary) = @_;
1e3baf05 1221
1476b99f
DC
1222 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1223 my $st = stat($binary);
1e3baf05 1224
1476b99f
DC
1225 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1226 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1227 $cachedmtime == $st->mtime;
1228
1229 $kvm_user_version->{$binary} = 'unknown';
1230 $kvm_mtime->{$binary} = $st->mtime;
1e3baf05 1231
09b11429
TL
1232 my $code = sub {
1233 my $line = shift;
1234 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1476b99f 1235 $kvm_user_version->{$binary} = $2;
09b11429
TL
1236 }
1237 };
19672434 1238
1476b99f 1239 eval { run_command([$binary, '--version'], outfunc => $code); };
09b11429 1240 warn $@ if $@;
1e3baf05 1241
1476b99f 1242 return $kvm_user_version->{$binary};
1e3baf05
DM
1243
1244}
4df98f2f
TL
1245my sub extract_version {
1246 my ($machine_type, $version) = @_;
1247 $version = kvm_user_version() if !defined($version);
8eb73377 1248 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
4df98f2f 1249}
1e3baf05 1250
db70021b
TL
1251sub kernel_has_vhost_net {
1252 return -c '/dev/vhost-net';
1253}
1e3baf05 1254
1e3baf05
DM
1255sub option_exists {
1256 my $key = shift;
1257 return defined($confdesc->{$key});
19672434 1258}
1e3baf05 1259
1e3baf05 1260my $cdrom_path;
1e3baf05
DM
1261sub get_cdrom_path {
1262
259470ee 1263 return $cdrom_path if defined($cdrom_path);
1e3baf05 1264
c3d15108
TL
1265 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1266
1267 if (!defined($cdrom_path)) {
490b7308
SS
1268 log_warn("no physical CD-ROM available, ignoring");
1269 $cdrom_path = '';
1270 }
259470ee
SS
1271
1272 return $cdrom_path;
1e3baf05
DM
1273}
1274
1275sub get_iso_path {
1276 my ($storecfg, $vmid, $cdrom) = @_;
1277
1278 if ($cdrom eq 'cdrom') {
1279 return get_cdrom_path();
1280 } elsif ($cdrom eq 'none') {
1281 return '';
1282 } elsif ($cdrom =~ m|^/|) {
1283 return $cdrom;
1284 } else {
6b64503e 1285 return PVE::Storage::path($storecfg, $cdrom);
1e3baf05
DM
1286 }
1287}
1288
1289# try to convert old style file names to volume IDs
1290sub filename_to_volume_id {
1291 my ($vmid, $file, $media) = @_;
1292
0c9a7596 1293 if (!($file eq 'none' || $file eq 'cdrom' ||
1e3baf05 1294 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
19672434 1295
d1c1af4b 1296 return if $file =~ m|/|;
19672434 1297
1e3baf05
DM
1298 if ($media && $media eq 'cdrom') {
1299 $file = "local:iso/$file";
1300 } else {
1301 $file = "local:$vmid/$file";
1302 }
1303 }
1304
1305 return $file;
1306}
1307
1308sub verify_media_type {
1309 my ($opt, $vtype, $media) = @_;
1310
1311 return if !$media;
1312
1313 my $etype;
1314 if ($media eq 'disk') {
a125592c 1315 $etype = 'images';
1e3baf05
DM
1316 } elsif ($media eq 'cdrom') {
1317 $etype = 'iso';
1318 } else {
1319 die "internal error";
1320 }
1321
1322 return if ($vtype eq $etype);
19672434 1323
1e3baf05
DM
1324 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1325}
1326
1327sub cleanup_drive_path {
1328 my ($opt, $storecfg, $drive) = @_;
1329
1330 # try to convert filesystem paths to volume IDs
1331
1332 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1333 ($drive->{file} !~ m|^/dev/.+|) &&
1334 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
19672434 1335 ($drive->{file} !~ m/^\d+$/)) {
1e3baf05 1336 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
4df98f2f
TL
1337 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1338 if !$vtype;
1e3baf05
DM
1339 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1340 verify_media_type($opt, $vtype, $drive->{media});
1341 $drive->{file} = $volid;
1342 }
1343
1344 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1345}
1346
b3c2bdd1
DM
1347sub parse_hotplug_features {
1348 my ($data) = @_;
1349
1350 my $res = {};
1351
1352 return $res if $data eq '0';
a1b7d579 1353
b3c2bdd1
DM
1354 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1355
45827685 1356 foreach my $feature (PVE::Tools::split_list($data)) {
94ec5e7c 1357 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
b3c2bdd1
DM
1358 $res->{$1} = 1;
1359 } else {
596a0a20 1360 die "invalid hotplug feature '$feature'\n";
b3c2bdd1
DM
1361 }
1362 }
1363 return $res;
1364}
1365
1366PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1367sub pve_verify_hotplug_features {
1368 my ($value, $noerr) = @_;
1369
1370 return $value if parse_hotplug_features($value);
1371
d1c1af4b 1372 return if $noerr;
b3c2bdd1
DM
1373
1374 die "unable to parse hotplug option\n";
1375}
1376
28ef82d3
DM
1377sub scsi_inquiry {
1378 my($fh, $noerr) = @_;
1379
1380 my $SG_IO = 0x2285;
1381 my $SG_GET_VERSION_NUM = 0x2282;
1382
1383 my $versionbuf = "\x00" x 8;
1384 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1385 if (!$ret) {
1386 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
d1c1af4b 1387 return;
28ef82d3 1388 }
97d62eb7 1389 my $version = unpack("I", $versionbuf);
28ef82d3
DM
1390 if ($version < 30000) {
1391 die "scsi generic interface too old\n" if !$noerr;
d1c1af4b 1392 return;
28ef82d3 1393 }
97d62eb7 1394
28ef82d3
DM
1395 my $buf = "\x00" x 36;
1396 my $sensebuf = "\x00" x 8;
f334aa3e 1397 my $cmd = pack("C x3 C x1", 0x12, 36);
97d62eb7 1398
28ef82d3
DM
1399 # see /usr/include/scsi/sg.h
1400 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1401
f606d5bd
TL
1402 my $packet = pack(
1403 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1404 );
28ef82d3
DM
1405
1406 $ret = ioctl($fh, $SG_IO, $packet);
1407 if (!$ret) {
1408 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
d1c1af4b 1409 return;
28ef82d3 1410 }
97d62eb7 1411
28ef82d3
DM
1412 my @res = unpack($sg_io_hdr_t, $packet);
1413 if ($res[17] || $res[18]) {
1414 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
d1c1af4b 1415 return;
28ef82d3
DM
1416 }
1417
1418 my $res = {};
f606d5bd 1419 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
28ef82d3 1420
f606d5bd
TL
1421 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1422 $res->{type} &= 0x1F;
09984754 1423
28ef82d3
DM
1424 return $res;
1425}
1426
1427sub path_is_scsi {
1428 my ($path) = @_;
1429
d1c1af4b 1430 my $fh = IO::File->new("+<$path") || return;
28ef82d3
DM
1431 my $res = scsi_inquiry($fh, 1);
1432 close($fh);
1433
1434 return $res;
1435}
1436
db656e5f 1437sub print_tabletdevice_full {
d559309f 1438 my ($conf, $arch) = @_;
b467f79a 1439
3392d6ca 1440 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f
DM
1441
1442 # we use uhci for old VMs because tablet driver was buggy in older qemu
d559309f 1443 my $usbbus;
2b938c7d 1444 if ($q35 || $arch eq 'aarch64') {
d559309f
WB
1445 $usbbus = 'ehci';
1446 } else {
1447 $usbbus = 'uhci';
1448 }
b467f79a 1449
db656e5f
DM
1450 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1451}
1452
d559309f 1453sub print_keyboarddevice_full {
6971c38e 1454 my ($conf, $arch) = @_;
d559309f 1455
d1c1af4b 1456 return if $arch ne 'aarch64';
d559309f
WB
1457
1458 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1459}
1460
a183df68
TL
1461my sub get_drive_id {
1462 my ($drive) = @_;
1463 return "$drive->{interface}$drive->{index}";
1464}
1465
ca916ecc 1466sub print_drivedevice_full {
d559309f 1467 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
ca916ecc
DA
1468
1469 my $device = '';
1470 my $maxdev = 0;
19672434 1471
a183df68 1472 my $drive_id = get_drive_id($drive);
ca916ecc 1473 if ($drive->{interface} eq 'virtio') {
4df98f2f
TL
1474 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1475 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1476 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
2ed36a41 1477 } elsif ($drive->{interface} eq 'scsi') {
6731a4cf 1478
ee034f5c 1479 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
2ed36a41
DM
1480 my $unit = $drive->{index} % $maxdev;
1481 my $devicetype = 'hd';
69bcf246
WB
1482 my $path = '';
1483 if (drive_is_cdrom($drive)) {
1484 $devicetype = 'cd';
29b19529 1485 } else {
69bcf246
WB
1486 if ($drive->{file} =~ m|^/|) {
1487 $path = $drive->{file};
1488 if (my $info = path_is_scsi($path)) {
8e3c33ab 1489 if ($info->{type} == 0 && $drive->{scsiblock}) {
69bcf246
WB
1490 $devicetype = 'block';
1491 } elsif ($info->{type} == 1) { # tape
1492 $devicetype = 'generic';
1493 }
1494 }
1495 } else {
1496 $path = PVE::Storage::path($storecfg, $drive->{file});
1497 }
1498
a034e3d6 1499 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
4df98f2f 1500 my $version = extract_version($machine_type, kvm_user_version());
a034e3d6 1501 if ($path =~ m/^iscsi\:\/\// &&
2ea5fb7e 1502 !min_version($version, 4, 1)) {
69bcf246
WB
1503 $devicetype = 'generic';
1504 }
1505 }
1506
ef88eaaa 1507 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
4df98f2f 1508 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
69bcf246 1509 } else {
4df98f2f
TL
1510 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1511 .",lun=$drive->{index}";
69bcf246 1512 }
4df98f2f 1513 $device .= ",drive=drive-$drive_id,id=$drive_id";
cdd20088 1514
6c875f9f
NC
1515 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1516 $device .= ",rotation_rate=1";
1517 }
e741c516 1518 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
6c875f9f
NC
1519
1520 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
e0fd2b2f 1521 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
2ed36a41
DM
1522 my $controller = int($drive->{index} / $maxdev);
1523 my $unit = $drive->{index} % $maxdev;
1524 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1525
6c875f9f
NC
1526 $device = "ide-$devicetype";
1527 if ($drive->{interface} eq 'ide') {
1528 $device .= ",bus=ide.$controller,unit=$unit";
1529 } else {
1530 $device .= ",bus=ahci$controller.$unit";
1531 }
4df98f2f 1532 $device .= ",drive=drive-$drive_id,id=$drive_id";
6c875f9f
NC
1533
1534 if ($devicetype eq 'hd') {
1535 if (my $model = $drive->{model}) {
1536 $model = URI::Escape::uri_unescape($model);
1537 $device .= ",model=$model";
1538 }
1539 if ($drive->{ssd}) {
1540 $device .= ",rotation_rate=1";
1541 }
0f2812c2 1542 }
e741c516 1543 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
2ed36a41
DM
1544 } elsif ($drive->{interface} eq 'usb') {
1545 die "implement me";
1546 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1547 } else {
1548 die "unsupported interface type";
ca916ecc
DA
1549 }
1550
3b408e82
DM
1551 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1552
a70e7e6c
TL
1553 if (my $serial = $drive->{serial}) {
1554 $serial = URI::Escape::uri_unescape($serial);
1555 $device .= ",serial=$serial";
1556 }
1557
1558
ca916ecc
DA
1559 return $device;
1560}
1561
15b21acc 1562sub get_initiator_name {
46f58b5f 1563 my $initiator;
15b21acc 1564
d1c1af4b 1565 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
46f58b5f
DM
1566 while (defined(my $line = <$fh>)) {
1567 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
15b21acc
MR
1568 $initiator = $1;
1569 last;
1570 }
46f58b5f
DM
1571 $fh->close();
1572
15b21acc
MR
1573 return $initiator;
1574}
1575
eec9f9fe
FE
1576my sub storage_allows_io_uring_default {
1577 my ($scfg, $cache_direct) = @_;
1578
1579 # io_uring with cache mode writeback or writethrough on krbd will hang...
1580 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1581
1582 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1583 # sometimes, just plain disable...
1584 return if $scfg && $scfg->{type} eq 'lvm';
1585
1586 # io_uring causes problems when used with CIFS since kernel 5.15
1587 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1588 return if $scfg && $scfg->{type} eq 'cifs';
1589
1590 return 1;
1591}
1592
b7071d6c
FE
1593my sub drive_uses_cache_direct {
1594 my ($drive, $scfg) = @_;
1595
1596 my $cache_direct = 0;
1597
1598 if (my $cache = $drive->{cache}) {
1599 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1600 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1601 $cache_direct = 1;
1602 }
1603
1604 return $cache_direct;
1605}
1606
776c5f50 1607sub print_drive_commandline_full {
6d5673c3 1608 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1e3baf05 1609
d81f0f09
DM
1610 my $path;
1611 my $volid = $drive->{file};
5921764c 1612 my $format = $drive->{format};
a183df68 1613 my $drive_id = get_drive_id($drive);
370b05e7 1614
0fe779a6
WB
1615 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1616 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1617
d81f0f09
DM
1618 if (drive_is_cdrom($drive)) {
1619 $path = get_iso_path($storecfg, $vmid, $volid);
a183df68 1620 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
d81f0f09 1621 } else {
d81f0f09
DM
1622 if ($storeid) {
1623 $path = PVE::Storage::path($storecfg, $volid);
5921764c 1624 $format //= qemu_img_format($scfg, $volname);
d81f0f09
DM
1625 } else {
1626 $path = $volid;
5921764c 1627 $format //= "raw";
d81f0f09
DM
1628 }
1629 }
1630
5921764c
SR
1631 my $is_rbd = $path =~ m/^rbd:/;
1632
1e3baf05 1633 my $opts = '';
5921764c 1634 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1e3baf05 1635 foreach my $o (@qemu_drive_options) {
5fc74861 1636 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
19672434 1637 }
8a267708
DC
1638
1639 # snapshot only accepts on|off
1640 if (defined($drive->{snapshot})) {
1641 my $v = $drive->{snapshot} ? 'on' : 'off';
1642 $opts .= ",snapshot=$v";
1643 }
1644
1f91f7b4
TL
1645 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1646 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
12e1d472
DC
1647 }
1648
fb8e95a2
WB
1649 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1650 my ($dir, $qmpname) = @$type;
1651 if (my $v = $drive->{"mbps$dir"}) {
1652 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1653 }
1654 if (my $v = $drive->{"mbps${dir}_max"}) {
1655 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1656 }
1657 if (my $v = $drive->{"bps${dir}_max_length"}) {
1658 $opts .= ",throttling.bps$qmpname-max-length=$v";
1659 }
1660 if (my $v = $drive->{"iops${dir}"}) {
1661 $opts .= ",throttling.iops$qmpname=$v";
1662 }
1663 if (my $v = $drive->{"iops${dir}_max"}) {
8aca1654 1664 $opts .= ",throttling.iops$qmpname-max=$v";
fb8e95a2
WB
1665 }
1666 if (my $v = $drive->{"iops${dir}_max_length"}) {
8aca1654 1667 $opts .= ",throttling.iops$qmpname-max-length=$v";
fb8e95a2
WB
1668 }
1669 }
1670
5921764c
SR
1671 if ($pbs_name) {
1672 $format = "rbd" if $is_rbd;
a183df68
TL
1673 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1674 if !$format;
5921764c
SR
1675 $opts .= ",format=alloc-track,file.driver=$format";
1676 } elsif ($format) {
1677 $opts .= ",format=$format";
1678 }
d81f0f09 1679
b7071d6c 1680 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
b2ee900e 1681
b7071d6c 1682 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
b2ee900e 1683
b2ee900e 1684 if (!$drive->{aio}) {
eec9f9fe 1685 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
6d5673c3
SR
1686 # io_uring supports all cache modes
1687 $opts .= ",aio=io_uring";
b2ee900e 1688 } else {
6d5673c3
SR
1689 # aio native works only with O_DIRECT
1690 if($cache_direct) {
1691 $opts .= ",aio=native";
1692 } else {
1693 $opts .= ",aio=threads";
1694 }
b2ee900e
WB
1695 }
1696 }
11490cf2 1697
6e47c3b4
WB
1698 if (!drive_is_cdrom($drive)) {
1699 my $detectzeroes;
7d4e30f3 1700 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
6e47c3b4
WB
1701 $detectzeroes = 'off';
1702 } elsif ($drive->{discard}) {
1703 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1704 } else {
1705 # This used to be our default with discard not being specified:
1706 $detectzeroes = 'on';
1707 }
5921764c
SR
1708
1709 # note: 'detect-zeroes' works per blockdev and we want it to persist
1710 # after the alloc-track is removed, so put it on 'file' directly
1711 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1712 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
6e47c3b4 1713 }
f1e05305 1714
5921764c
SR
1715 if ($pbs_name) {
1716 $opts .= ",backing=$pbs_name";
1717 $opts .= ",auto-remove=on";
1718 }
1719
1720 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1721 my $file_param = "file";
1722 if ($pbs_name) {
1723 # non-rbd drivers require the underlying file to be a seperate block
1724 # node, so add a second .file indirection
1725 $file_param .= ".file" if !$is_rbd;
1726 $file_param .= ".filename";
1727 }
1728 my $pathinfo = $path ? "$file_param=$path," : '';
1e3baf05 1729
3ebfcc86 1730 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
1731}
1732
5921764c
SR
1733sub print_pbs_blockdev {
1734 my ($pbs_conf, $pbs_name) = @_;
1735 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1736 $blockdev .= ",repository=$pbs_conf->{repository}";
2dda626d 1737 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
5921764c
SR
1738 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1739 $blockdev .= ",archive=$pbs_conf->{archive}";
1740 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1741 return $blockdev;
1742}
1743
cc4d6182 1744sub print_netdevice_full {
0c03a390 1745 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
cc4d6182 1746
cc4d6182
DA
1747 my $device = $net->{model};
1748 if ($net->{model} eq 'virtio') {
1749 $device = 'virtio-net-pci';
1750 };
1751
d559309f 1752 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
5e2068d2 1753 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
a9410357 1754 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
4df98f2f
TL
1755 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1756 # and out of each queue plus one config interrupt and control vector queue
a9410357
AD
1757 my $vectors = $net->{queues} * 2 + 2;
1758 $tmpstr .= ",vectors=$vectors,mq=on";
0c03a390
AD
1759 if (min_version($machine_version, 7, 1)) {
1760 $tmpstr .= ",packed=on";
1761 }
a9410357 1762 }
620d6b32
AD
1763
1764 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1765 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1766 }
1767
cc4d6182 1768 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
ba9e1000 1769
0530177b
TL
1770 if (my $mtu = $net->{mtu}) {
1771 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1772 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1773 if ($mtu == 1) {
1774 $mtu = $bridge_mtu;
1775 } elsif ($mtu < 576) {
1776 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1777 } elsif ($mtu > $bridge_mtu) {
1778 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1779 }
1780 $tmpstr .= ",host_mtu=$mtu";
1781 } else {
1782 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
61a14cde 1783 }
61a14cde
AD
1784 }
1785
ba9e1000
DM
1786 if ($use_old_bios_files) {
1787 my $romfile;
1788 if ($device eq 'virtio-net-pci') {
1789 $romfile = 'pxe-virtio.rom';
1790 } elsif ($device eq 'e1000') {
1791 $romfile = 'pxe-e1000.rom';
e83dd50a
TL
1792 } elsif ($device eq 'e1000e') {
1793 $romfile = 'pxe-e1000e.rom';
ba9e1000
DM
1794 } elsif ($device eq 'ne2k') {
1795 $romfile = 'pxe-ne2k_pci.rom';
1796 } elsif ($device eq 'pcnet') {
1797 $romfile = 'pxe-pcnet.rom';
1798 } elsif ($device eq 'rtl8139') {
1799 $romfile = 'pxe-rtl8139.rom';
1800 }
1801 $tmpstr .= ",romfile=$romfile" if $romfile;
1802 }
1803
cc4d6182
DA
1804 return $tmpstr;
1805}
1806
1807sub print_netdev_full {
d559309f 1808 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
cc4d6182
DA
1809
1810 my $i = '';
1811 if ($netid =~ m/^net(\d+)$/) {
1812 $i = int($1);
1813 }
1814
1815 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1816
1817 my $ifname = "tap${vmid}i$i";
1818
1819 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1820 die "interface name '$ifname' is too long (max 15 character)\n"
1821 if length($ifname) >= 16;
1822
1823 my $vhostparam = '';
6f0cb675 1824 if (is_native($arch)) {
db70021b 1825 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
6f0cb675 1826 }
cc4d6182
DA
1827
1828 my $vmname = $conf->{name} || "vm$vmid";
1829
a9410357 1830 my $netdev = "";
208ba94e 1831 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
a9410357 1832
cc4d6182 1833 if ($net->{bridge}) {
4df98f2f
TL
1834 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1835 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
cc4d6182 1836 } else {
a9410357 1837 $netdev = "type=user,id=$netid,hostname=$vmname";
cc4d6182 1838 }
a9410357
AD
1839
1840 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1841
1842 return $netdev;
cc4d6182 1843}
1e3baf05 1844
55655ebc
DC
1845my $vga_map = {
1846 'cirrus' => 'cirrus-vga',
1847 'std' => 'VGA',
1848 'vmware' => 'vmware-svga',
1849 'virtio' => 'virtio-vga',
6f070e39 1850 'virtio-gl' => 'virtio-vga-gl',
55655ebc
DC
1851};
1852
1853sub print_vga_device {
2ea5fb7e 1854 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
55655ebc
DC
1855
1856 my $type = $vga_map->{$vga->{type}};
86c9fafe 1857 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
d559309f
WB
1858 $type = 'virtio-gpu';
1859 }
55655ebc 1860 my $vgamem_mb = $vga->{memory};
6021c7a5
AL
1861
1862 my $max_outputs = '';
55655ebc
DC
1863 if ($qxlnum) {
1864 $type = $id ? 'qxl' : 'qxl-vga';
6021c7a5 1865
c5a4c92c 1866 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
6021c7a5 1867 # set max outputs so linux can have up to 4 qxl displays with one device
2ea5fb7e 1868 if (min_version($machine_version, 4, 1)) {
9e8976ea
TL
1869 $max_outputs = ",max_outputs=4";
1870 }
6021c7a5 1871 }
55655ebc 1872 }
6021c7a5 1873
55655ebc
DC
1874 die "no devicetype for $vga->{type}\n" if !$type;
1875
1876 my $memory = "";
1877 if ($vgamem_mb) {
6f070e39 1878 if ($vga->{type} =~ /^virtio/) {
55655ebc
DC
1879 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1880 $memory = ",max_hostmem=$bytes";
1881 } elsif ($qxlnum) {
1882 # from https://www.spice-space.org/multiple-monitors.html
1883 $memory = ",vgamem_mb=$vga->{memory}";
1884 my $ram = $vgamem_mb * 4;
1885 my $vram = $vgamem_mb * 2;
1886 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1887 } else {
1888 $memory = ",vgamem_mb=$vga->{memory}";
1889 }
1890 } elsif ($qxlnum && $id) {
1891 $memory = ",ram_size=67108864,vram_size=33554432";
1892 }
1893
789fe8e8
AL
1894 my $edidoff = "";
1895 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
b5d32c6b 1896 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
789fe8e8
AL
1897 }
1898
3392d6ca 1899 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
55655ebc
DC
1900 my $vgaid = "vga" . ($id // '');
1901 my $pciaddr;
1902 if ($q35 && $vgaid eq 'vga') {
daadd5a4 1903 # the first display uses pcie.0 bus on q35 machines
d559309f 1904 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
55655ebc 1905 } else {
d559309f 1906 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
55655ebc
DC
1907 }
1908
9f979d9f 1909 if ($vga->{type} eq 'virtio-gl') {
8884a8bf
TL
1910 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1911 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1912 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
9f979d9f 1913
8884a8bf
TL
1914 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1915 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
9f979d9f
SI
1916 }
1917
789fe8e8 1918 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
55655ebc
DC
1919}
1920
ffc0d8c7
WB
1921sub parse_number_sets {
1922 my ($set) = @_;
1923 my $res = [];
1924 foreach my $part (split(/;/, $set)) {
1925 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1926 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1927 push @$res, [ $1, $2 ];
2ed5d572 1928 } else {
ffc0d8c7 1929 die "invalid range: $part\n";
2ed5d572
AD
1930 }
1931 }
ffc0d8c7
WB
1932 return $res;
1933}
2ed5d572 1934
ffc0d8c7
WB
1935sub parse_numa {
1936 my ($data) = @_;
1937
4df98f2f 1938 my $res = parse_property_string($numa_fmt, $data);
ffc0d8c7
WB
1939 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1940 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
2ed5d572
AD
1941 return $res;
1942}
1943
1e3baf05
DM
1944# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1945sub parse_net {
4ddd2ca2 1946 my ($data, $disable_mac_autogen) = @_;
1e3baf05 1947
4df98f2f 1948 my $res = eval { parse_property_string($net_fmt, $data) };
cd9c34d1
WB
1949 if ($@) {
1950 warn $@;
d1c1af4b 1951 return;
1e3baf05 1952 }
4ddd2ca2 1953 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
b5b99790
WB
1954 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1955 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1956 }
0c9a7596
AD
1957 return $res;
1958}
1959
1960# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1961sub parse_ipconfig {
1962 my ($data) = @_;
1963
4df98f2f 1964 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
0c9a7596
AD
1965 if ($@) {
1966 warn $@;
d1c1af4b 1967 return;
0c9a7596
AD
1968 }
1969
1970 if ($res->{gw} && !$res->{ip}) {
1971 warn 'gateway specified without specifying an IP address';
d1c1af4b 1972 return;
0c9a7596
AD
1973 }
1974 if ($res->{gw6} && !$res->{ip6}) {
1975 warn 'IPv6 gateway specified without specifying an IPv6 address';
d1c1af4b 1976 return;
0c9a7596
AD
1977 }
1978 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1979 warn 'gateway specified together with DHCP';
d1c1af4b 1980 return;
0c9a7596
AD
1981 }
1982 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1983 # gw6 + auto/dhcp
1984 warn "IPv6 gateway specified together with $res->{ip6} address";
d1c1af4b 1985 return;
0c9a7596
AD
1986 }
1987
1988 if (!$res->{ip} && !$res->{ip6}) {
1989 return { ip => 'dhcp', ip6 => 'dhcp' };
1990 }
1991
1e3baf05
DM
1992 return $res;
1993}
1994
1995sub print_net {
1996 my $net = shift;
1997
cd9c34d1 1998 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1e3baf05
DM
1999}
2000
2001sub add_random_macs {
2002 my ($settings) = @_;
2003
2004 foreach my $opt (keys %$settings) {
2005 next if $opt !~ m/^net(\d+)$/;
2006 my $net = parse_net($settings->{$opt});
2007 next if !$net;
2008 $settings->{$opt} = print_net($net);
2009 }
2010}
2011
055d554d
DM
2012sub vm_is_volid_owner {
2013 my ($storecfg, $vmid, $volid) = @_;
2014
2015 if ($volid !~ m|^/|) {
2016 my ($path, $owner);
2017 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2018 if ($owner && ($owner == $vmid)) {
2019 return 1;
2020 }
2021 }
2022
d1c1af4b 2023 return;
055d554d
DM
2024}
2025
055d554d
DM
2026sub vmconfig_register_unused_drive {
2027 my ($storecfg, $vmid, $conf, $drive) = @_;
2028
2d9ddec5
WB
2029 if (drive_is_cloudinit($drive)) {
2030 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2031 warn $@ if $@;
95a5135d 2032 delete $conf->{cloudinit};
2d9ddec5 2033 } elsif (!drive_is_cdrom($drive)) {
055d554d
DM
2034 my $volid = $drive->{file};
2035 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
8793d495 2036 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
055d554d
DM
2037 }
2038 }
2039}
2040
1f30ac3a 2041# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
ff6ffe20 2042my $smbios1_fmt = {
bd27e851
WB
2043 uuid => {
2044 type => 'string',
2045 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2046 format_description => 'UUID',
52261945 2047 description => "Set SMBIOS1 UUID.",
bd27e851
WB
2048 optional => 1,
2049 },
2050 version => {
2051 type => 'string',
1f30ac3a
CE
2052 pattern => '[A-Za-z0-9+\/]+={0,2}',
2053 format_description => 'Base64 encoded string',
52261945 2054 description => "Set SMBIOS1 version.",
bd27e851
WB
2055 optional => 1,
2056 },
2057 serial => {
2058 type => 'string',
1f30ac3a
CE
2059 pattern => '[A-Za-z0-9+\/]+={0,2}',
2060 format_description => 'Base64 encoded string',
52261945 2061 description => "Set SMBIOS1 serial number.",
bd27e851
WB
2062 optional => 1,
2063 },
2064 manufacturer => {
2065 type => 'string',
1f30ac3a
CE
2066 pattern => '[A-Za-z0-9+\/]+={0,2}',
2067 format_description => 'Base64 encoded string',
52261945 2068 description => "Set SMBIOS1 manufacturer.",
bd27e851
WB
2069 optional => 1,
2070 },
2071 product => {
2072 type => 'string',
1f30ac3a
CE
2073 pattern => '[A-Za-z0-9+\/]+={0,2}',
2074 format_description => 'Base64 encoded string',
52261945 2075 description => "Set SMBIOS1 product ID.",
bd27e851
WB
2076 optional => 1,
2077 },
2078 sku => {
2079 type => 'string',
1f30ac3a
CE
2080 pattern => '[A-Za-z0-9+\/]+={0,2}',
2081 format_description => 'Base64 encoded string',
52261945 2082 description => "Set SMBIOS1 SKU string.",
bd27e851
WB
2083 optional => 1,
2084 },
2085 family => {
2086 type => 'string',
1f30ac3a
CE
2087 pattern => '[A-Za-z0-9+\/]+={0,2}',
2088 format_description => 'Base64 encoded string',
52261945 2089 description => "Set SMBIOS1 family string.",
bd27e851
WB
2090 optional => 1,
2091 },
1f30ac3a
CE
2092 base64 => {
2093 type => 'boolean',
2094 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2095 optional => 1,
2096 },
2796e7d5
DM
2097};
2098
2796e7d5
DM
2099sub parse_smbios1 {
2100 my ($data) = @_;
2101
4df98f2f 2102 my $res = eval { parse_property_string($smbios1_fmt, $data) };
bd27e851 2103 warn $@ if $@;
2796e7d5
DM
2104 return $res;
2105}
2106
cd11416f
DM
2107sub print_smbios1 {
2108 my ($smbios1) = @_;
ff6ffe20 2109 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
cd11416f
DM
2110}
2111
ff6ffe20 2112PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2796e7d5 2113
0ea9541d
DM
2114sub parse_watchdog {
2115 my ($value) = @_;
2116
d1c1af4b 2117 return if !$value;
0ea9541d 2118
4df98f2f 2119 my $res = eval { parse_property_string($watchdog_fmt, $value) };
ec3582b5 2120 warn $@ if $@;
0ea9541d
DM
2121 return $res;
2122}
2123
9d66b397 2124sub parse_guest_agent {
a2af1bbe 2125 my ($conf) = @_;
9d66b397 2126
a2af1bbe 2127 return {} if !defined($conf->{agent});
9d66b397 2128
a2af1bbe 2129 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
9d66b397
SI
2130 warn $@ if $@;
2131
2132 # if the agent is disabled ignore the other potentially set properties
2133 return {} if !$res->{enabled};
2134 return $res;
2135}
2136
a2af1bbe
TL
2137sub get_qga_key {
2138 my ($conf, $key) = @_;
2139 return undef if !defined($conf->{agent});
2140
2141 my $agent = parse_guest_agent($conf);
2142 return $agent->{$key};
2143}
2144
55655ebc
DC
2145sub parse_vga {
2146 my ($value) = @_;
2147
2148 return {} if !$value;
4df98f2f 2149 my $res = eval { parse_property_string($vga_fmt, $value) };
55655ebc
DC
2150 warn $@ if $@;
2151 return $res;
2152}
2153
2cf61f33
SR
2154sub parse_rng {
2155 my ($value) = @_;
2156
d1c1af4b 2157 return if !$value;
2cf61f33 2158
4df98f2f 2159 my $res = eval { parse_property_string($rng_fmt, $value) };
2cf61f33
SR
2160 warn $@ if $@;
2161 return $res;
2162}
2163
26b443c8
TL
2164sub parse_meta_info {
2165 my ($value) = @_;
2166
2167 return if !$value;
2168
2169 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2170 warn $@ if $@;
2171 return $res;
2172}
2173
2174sub new_meta_info_string {
2175 my () = @_; # for now do not allow to override any value
2176
2177 return PVE::JSONSchema::print_property_string(
2178 {
af2a1a1c 2179 'creation-qemu' => kvm_user_version(),
26b443c8
TL
2180 ctime => "". int(time()),
2181 },
2182 $meta_info_fmt
2183 );
2184}
2185
cc181036
TL
2186sub qemu_created_version_fixups {
2187 my ($conf, $forcemachine, $kvmver) = @_;
2188
2189 my $meta = parse_meta_info($conf->{meta}) // {};
2190 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2191
2192 # check if we need to apply some handling for VMs that always use the latest machine version but
2193 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2194 # would be required (we do not want to pin machine version for non-windows OS type)
2195 if (
2196 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2197 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2198 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2199 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2200 ) {
2201 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2202 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2203 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2204 # and thus with the predictable interface naming of systemd
2205 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2206 }
2207 }
2208 return;
2209}
2210
1e3baf05
DM
2211# add JSON properties for create and set function
2212sub json_config_properties {
c1accf9d 2213 my ($prop, $with_disk_alloc) = @_;
1e3baf05 2214
5a08fb9c
TL
2215 my $skip_json_config_opts = {
2216 parent => 1,
2217 snaptime => 1,
2218 vmstate => 1,
2219 runningmachine => 1,
2220 runningcpu => 1,
26b443c8 2221 meta => 1,
5a08fb9c
TL
2222 };
2223
1e3baf05 2224 foreach my $opt (keys %$confdesc) {
5a08fb9c 2225 next if $skip_json_config_opts->{$opt};
c1accf9d
FE
2226
2227 if ($with_disk_alloc && is_valid_drivename($opt)) {
2228 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2229 } else {
2230 $prop->{$opt} = $confdesc->{$opt};
2231 }
1e3baf05
DM
2232 }
2233
2234 return $prop;
2235}
2236
39051ac0
FE
2237# Properties that we can read from an OVF file
2238sub json_ovf_properties {
2239 my $prop = {};
2240
2241 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2242 $prop->{$device} = {
2243 type => 'string',
2244 format => 'pve-volume-id-or-absolute-path',
2245 description => "Disk image that gets imported to $device",
2246 optional => 1,
2247 };
2248 }
2249
2250 $prop->{cores} = {
2251 type => 'integer',
2252 description => "The number of CPU cores.",
2253 optional => 1,
2254 };
2255 $prop->{memory} = {
2256 type => 'integer',
2257 description => "Amount of RAM for the VM in MB.",
2258 optional => 1,
2259 };
2260 $prop->{name} = {
2261 type => 'string',
2262 description => "Name of the VM.",
2263 optional => 1,
2264 };
2265
2266 return $prop;
2267}
2268
d41121fd
DM
2269# return copy of $confdesc_cloudinit to generate documentation
2270sub cloudinit_config_properties {
2271
2272 return dclone($confdesc_cloudinit);
2273}
2274
f16cf6c3
WB
2275sub cloudinit_pending_properties {
2276 my $p = {
2277 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2278 name => 1,
2279 };
2280 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2281 return $p;
2282}
2283
1e3baf05
DM
2284sub check_type {
2285 my ($key, $value) = @_;
2286
2287 die "unknown setting '$key'\n" if !$confdesc->{$key};
2288
2289 my $type = $confdesc->{$key}->{type};
2290
6b64503e 2291 if (!defined($value)) {
1e3baf05
DM
2292 die "got undefined value\n";
2293 }
2294
2295 if ($value =~ m/[\n\r]/) {
2296 die "property contains a line feed\n";
2297 }
2298
2299 if ($type eq 'boolean') {
19672434
DM
2300 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2301 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2302 die "type check ('boolean') failed - got '$value'\n";
1e3baf05
DM
2303 } elsif ($type eq 'integer') {
2304 return int($1) if $value =~ m/^(\d+)$/;
2305 die "type check ('integer') failed - got '$value'\n";
04432191
AD
2306 } elsif ($type eq 'number') {
2307 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2308 die "type check ('number') failed - got '$value'\n";
1e3baf05
DM
2309 } elsif ($type eq 'string') {
2310 if (my $fmt = $confdesc->{$key}->{format}) {
1e3baf05 2311 PVE::JSONSchema::check_format($fmt, $value);
19672434
DM
2312 return $value;
2313 }
1e3baf05 2314 $value =~ s/^\"(.*)\"$/$1/;
19672434 2315 return $value;
1e3baf05
DM
2316 } else {
2317 die "internal error"
2318 }
2319}
2320
1e3baf05 2321sub destroy_vm {
75854662 2322 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
1e3baf05 2323
ffda963f 2324 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2325
30fdf99c
SH
2326 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2327 PVE::QemuConfig->check_lock($conf);
2328 }
1e3baf05 2329
5e67a2d2
DC
2330 if ($conf->{template}) {
2331 # check if any base image is still used by a linked clone
3ab0f925 2332 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
5e67a2d2 2333 my ($ds, $drive) = @_;
5e67a2d2
DC
2334 return if drive_is_cdrom($drive);
2335
2336 my $volid = $drive->{file};
5e67a2d2
DC
2337 return if !$volid || $volid =~ m|^/|;
2338
2339 die "base volume '$volid' is still in use by linked cloned\n"
2340 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2341
2342 });
2343 }
2344
3e07c6d5 2345 my $volids = {};
ba1a1984 2346 my $remove_owned_drive = sub {
1e3baf05 2347 my ($ds, $drive) = @_;
9c52f5ed 2348 return if drive_is_cdrom($drive, 1);
1e3baf05
DM
2349
2350 my $volid = $drive->{file};
ff1a2432 2351 return if !$volid || $volid =~ m|^/|;
3e07c6d5 2352 return if $volids->{$volid};
1e3baf05 2353
6b64503e 2354 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
ff1a2432 2355 return if !$path || !$owner || ($owner != $vmid);
1e3baf05 2356
3e07c6d5 2357 $volids->{$volid} = 1;
a2f50f01 2358 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
31b52247 2359 warn "Could not remove disk '$volid', check manually: $@" if $@;
ba1a1984
FE
2360 };
2361
2362 # only remove disks owned by this VM (referenced in the config)
2363 my $include_opts = {
2364 include_unused => 1,
2365 extra_keys => ['vmstate'],
2366 };
2367 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2368
2369 for my $snap (values %{$conf->{snapshots}}) {
2370 next if !defined($snap->{vmstate});
2371 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2372 next if !defined($drive);
2373 $remove_owned_drive->('vmstate', $drive);
2374 }
19672434 2375
3e07c6d5
FG
2376 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2377
75854662 2378 if ($purge_unreferenced) { # also remove unreferenced disk
d0ff75d9 2379 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
75854662
TL
2380 PVE::Storage::foreach_volid($vmdisks, sub {
2381 my ($volid, $sid, $volname, $d) = @_;
2382 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2383 warn $@ if $@;
2384 });
2385 }
dfda979e 2386
b04ea584 2387 if (defined $replacement_conf) {
8baf8bc7 2388 PVE::QemuConfig->write_config($vmid, $replacement_conf);
dfda979e
DJ
2389 } else {
2390 PVE::QemuConfig->destroy_config($vmid);
2391 }
1e3baf05
DM
2392}
2393
1e3baf05 2394sub parse_vm_config {
ad5812d8 2395 my ($filename, $raw, $strict) = @_;
1e3baf05 2396
d1c1af4b 2397 return if !defined($raw);
1e3baf05 2398
554ac7e7 2399 my $res = {
fc1ddcdc 2400 digest => Digest::SHA::sha1_hex($raw),
0d18dcfc 2401 snapshots => {},
0d732d16 2402 pending => {},
95a5135d 2403 cloudinit => {},
554ac7e7 2404 };
1e3baf05 2405
ad5812d8
FG
2406 my $handle_error = sub {
2407 my ($msg) = @_;
2408
2409 if ($strict) {
2410 die $msg;
2411 } else {
2412 warn $msg;
2413 }
2414 };
2415
19672434 2416 $filename =~ m|/qemu-server/(\d+)\.conf$|
1e3baf05
DM
2417 || die "got strange filename '$filename'";
2418
2419 my $vmid = $1;
2420
0d18dcfc 2421 my $conf = $res;
b0ec896e 2422 my $descr;
cbfc9d75
TL
2423 my $finish_description = sub {
2424 if (defined($descr)) {
2425 $descr =~ s/\s+$//;
2426 $conf->{description} = $descr;
2427 }
2428 $descr = undef;
2429 };
e297c490 2430 my $section = '';
0581fe4f 2431
0d18dcfc
DM
2432 my @lines = split(/\n/, $raw);
2433 foreach my $line (@lines) {
1e3baf05 2434 next if $line =~ m/^\s*$/;
be190583 2435
eab09f4e 2436 if ($line =~ m/^\[PENDING\]\s*$/i) {
e297c490 2437 $section = 'pending';
cbfc9d75 2438 $finish_description->();
e297c490 2439 $conf = $res->{$section} = {};
eab09f4e 2440 next;
95a5135d
AD
2441 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2442 $section = 'cloudinit';
eb9923f9 2443 $finish_description->();
95a5135d
AD
2444 $conf = $res->{$section} = {};
2445 next;
eab09f4e 2446
0d732d16 2447 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
e297c490 2448 $section = $1;
cbfc9d75 2449 $finish_description->();
e297c490 2450 $conf = $res->{snapshots}->{$section} = {};
0d18dcfc
DM
2451 next;
2452 }
1e3baf05 2453
d1e7b922 2454 if ($line =~ m/^\#(.*)$/) {
b0ec896e 2455 $descr = '' if !defined($descr);
0581fe4f
DM
2456 $descr .= PVE::Tools::decode_text($1) . "\n";
2457 next;
2458 }
2459
1e3baf05 2460 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
b0ec896e 2461 $descr = '' if !defined($descr);
0581fe4f 2462 $descr .= PVE::Tools::decode_text($2);
0d18dcfc
DM
2463 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2464 $conf->{snapstate} = $1;
1e3baf05
DM
2465 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2466 my $key = $1;
2467 my $value = $2;
0d18dcfc 2468 $conf->{$key} = $value;
ef824322 2469 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
e297c490 2470 my $value = $1;
ef824322
DM
2471 if ($section eq 'pending') {
2472 $conf->{delete} = $value; # we parse this later
2473 } else {
ad5812d8 2474 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
eab09f4e 2475 }
15cf7698 2476 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
1e3baf05
DM
2477 my $key = $1;
2478 my $value = $2;
c229961a
TL
2479 if ($section eq 'cloudinit') {
2480 # ignore validation only used for informative purpose
2481 $conf->{$key} = $value;
2482 next;
2483 }
1e3baf05
DM
2484 eval { $value = check_type($key, $value); };
2485 if ($@) {
ad5812d8 2486 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
1e3baf05 2487 } else {
b799312f 2488 $key = 'ide2' if $key eq 'cdrom';
1e3baf05 2489 my $fmt = $confdesc->{$key}->{format};
b799312f 2490 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
1e3baf05
DM
2491 my $v = parse_drive($key, $value);
2492 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2493 $v->{file} = $volid;
71c58bb7 2494 $value = print_drive($v);
1e3baf05 2495 } else {
ad5812d8 2496 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
1e3baf05
DM
2497 next;
2498 }
2499 }
2500
b799312f 2501 $conf->{$key} = $value;
1e3baf05 2502 }
f8d2a1ce 2503 } else {
ad5812d8 2504 $handle_error->("vm $vmid - unable to parse config: $line\n");
1e3baf05
DM
2505 }
2506 }
2507
cbfc9d75 2508 $finish_description->();
0d18dcfc 2509 delete $res->{snapstate}; # just to be sure
1e3baf05
DM
2510
2511 return $res;
2512}
2513
1858638f
DM
2514sub write_vm_config {
2515 my ($filename, $conf) = @_;
1e3baf05 2516
0d18dcfc
DM
2517 delete $conf->{snapstate}; # just to be sure
2518
1858638f
DM
2519 if ($conf->{cdrom}) {
2520 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2521 $conf->{ide2} = $conf->{cdrom};
2522 delete $conf->{cdrom};
2523 }
1e3baf05
DM
2524
2525 # we do not use 'smp' any longer
1858638f
DM
2526 if ($conf->{sockets}) {
2527 delete $conf->{smp};
2528 } elsif ($conf->{smp}) {
2529 $conf->{sockets} = $conf->{smp};
2530 delete $conf->{cores};
2531 delete $conf->{smp};
1e3baf05
DM
2532 }
2533
ee2f90b1 2534 my $used_volids = {};
0d18dcfc 2535
ee2f90b1 2536 my $cleanup_config = sub {
ef824322 2537 my ($cref, $pending, $snapname) = @_;
1858638f 2538
ee2f90b1
DM
2539 foreach my $key (keys %$cref) {
2540 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
95a5135d 2541 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
ee2f90b1 2542 my $value = $cref->{$key};
ef824322
DM
2543 if ($key eq 'delete') {
2544 die "propertry 'delete' is only allowed in [PENDING]\n"
2545 if !$pending;
2546 # fixme: check syntax?
2547 next;
2548 }
ee2f90b1
DM
2549 eval { $value = check_type($key, $value); };
2550 die "unable to parse value of '$key' - $@" if $@;
1858638f 2551
ee2f90b1
DM
2552 $cref->{$key} = $value;
2553
74479ee9 2554 if (!$snapname && is_valid_drivename($key)) {
ed221350 2555 my $drive = parse_drive($key, $value);
ee2f90b1
DM
2556 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2557 }
1e3baf05 2558 }
ee2f90b1
DM
2559 };
2560
2561 &$cleanup_config($conf);
ef824322
DM
2562
2563 &$cleanup_config($conf->{pending}, 1);
2564
ee2f90b1 2565 foreach my $snapname (keys %{$conf->{snapshots}}) {
15c6e277 2566 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
ef824322 2567 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
1e3baf05
DM
2568 }
2569
1858638f
DM
2570 # remove 'unusedX' settings if we re-add a volume
2571 foreach my $key (keys %$conf) {
2572 my $value = $conf->{$key};
ee2f90b1 2573 if ($key =~ m/^unused/ && $used_volids->{$value}) {
1858638f 2574 delete $conf->{$key};
1e3baf05 2575 }
1858638f 2576 }
be190583 2577
0d18dcfc 2578 my $generate_raw_config = sub {
b0ec896e 2579 my ($conf, $pending) = @_;
0581fe4f 2580
0d18dcfc
DM
2581 my $raw = '';
2582
2583 # add description as comment to top of file
b0ec896e
DM
2584 if (defined(my $descr = $conf->{description})) {
2585 if ($descr) {
2586 foreach my $cl (split(/\n/, $descr)) {
2587 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2588 }
2589 } else {
2590 $raw .= "#\n" if $pending;
2591 }
0d18dcfc
DM
2592 }
2593
2594 foreach my $key (sort keys %$conf) {
95a5135d 2595 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
0d18dcfc
DM
2596 $raw .= "$key: $conf->{$key}\n";
2597 }
2598 return $raw;
2599 };
0581fe4f 2600
0d18dcfc 2601 my $raw = &$generate_raw_config($conf);
ef824322
DM
2602
2603 if (scalar(keys %{$conf->{pending}})){
2604 $raw .= "\n[PENDING]\n";
b0ec896e 2605 $raw .= &$generate_raw_config($conf->{pending}, 1);
ef824322
DM
2606 }
2607
1e1d6f58 2608 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
95a5135d
AD
2609 $raw .= "\n[special:cloudinit]\n";
2610 $raw .= &$generate_raw_config($conf->{cloudinit});
2611 }
2612
0d18dcfc
DM
2613 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2614 $raw .= "\n[$snapname]\n";
2615 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
1858638f 2616 }
1e3baf05 2617
1858638f
DM
2618 return $raw;
2619}
1e3baf05 2620
19672434 2621sub load_defaults {
1e3baf05
DM
2622
2623 my $res = {};
2624
2625 # we use static defaults from our JSON schema configuration
2626 foreach my $key (keys %$confdesc) {
2627 if (defined(my $default = $confdesc->{$key}->{default})) {
2628 $res->{$key} = $default;
2629 }
2630 }
19672434 2631
1e3baf05
DM
2632 return $res;
2633}
2634
2635sub config_list {
2636 my $vmlist = PVE::Cluster::get_vmlist();
2637 my $res = {};
2638 return $res if !$vmlist || !$vmlist->{ids};
2639 my $ids = $vmlist->{ids};
38277afc 2640 my $nodename = nodename();
1e3baf05 2641
1e3baf05
DM
2642 foreach my $vmid (keys %$ids) {
2643 my $d = $ids->{$vmid};
2644 next if !$d->{node} || $d->{node} ne $nodename;
5ee957cc 2645 next if !$d->{type} || $d->{type} ne 'qemu';
1e3baf05
DM
2646 $res->{$vmid}->{exists} = 1;
2647 }
2648 return $res;
2649}
2650
64e13401
DM
2651# test if VM uses local resources (to prevent migration)
2652sub check_local_resources {
2653 my ($conf, $noerr) = @_;
2654
ca6abacf 2655 my @loc_res = ();
a52eb3c4
DC
2656 my $mapped_res = [];
2657
2658 my $nodelist = PVE::Cluster::get_nodelist();
2659 my $pci_map = PVE::Mapping::PCI::config();
2660 my $usb_map = PVE::Mapping::USB::config();
2661
2662 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2663
2664 my $add_missing_mapping = sub {
2665 my ($type, $key, $id) = @_;
2666 for my $node (@$nodelist) {
2667 my $entry;
2668 if ($type eq 'pci') {
2669 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2670 } elsif ($type eq 'usb') {
2671 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2672 }
2673 if (!scalar($entry->@*)) {
2674 push @{$missing_mappings_by_node->{$node}}, $key;
2675 }
2676 }
2677 };
19672434 2678
ca6abacf
TM
2679 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2680 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
64e13401 2681
ca6abacf 2682 push @loc_res, "ivshmem" if $conf->{ivshmem};
6dbcb073 2683
0d29ab3b 2684 foreach my $k (keys %$conf) {
a52eb3c4
DC
2685 if ($k =~ m/^usb/) {
2686 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2687 next if $entry->{host} =~ m/^spice$/i;
2688 if ($entry->{mapping}) {
2689 $add_missing_mapping->('usb', $k, $entry->{mapping});
2690 push @$mapped_res, $k;
2691 }
2692 }
2693 if ($k =~ m/^hostpci/) {
2694 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2695 if ($entry->{mapping}) {
2696 $add_missing_mapping->('pci', $k, $entry->{mapping});
2697 push @$mapped_res, $k;
2698 }
2699 }
d44712fc
EK
2700 # sockets are safe: they will recreated be on the target side post-migrate
2701 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
ca6abacf 2702 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
64e13401
DM
2703 }
2704
ca6abacf 2705 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
64e13401 2706
a52eb3c4 2707 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
64e13401
DM
2708}
2709
719893a9 2710# check if used storages are available on all nodes (use by migrate)
47152e2e
DM
2711sub check_storage_availability {
2712 my ($storecfg, $conf, $node) = @_;
2713
912792e2 2714 PVE::QemuConfig->foreach_volume($conf, sub {
47152e2e
DM
2715 my ($ds, $drive) = @_;
2716
2717 my $volid = $drive->{file};
2718 return if !$volid;
2719
2720 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2721 return if !$sid;
2722
2723 # check if storage is available on both nodes
0d2db084
FE
2724 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2725 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
24b84b47 2726
3148f0b0
TL
2727 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2728
2729 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2730 if !$scfg->{content}->{$vtype};
47152e2e
DM
2731 });
2732}
2733
719893a9
DM
2734# list nodes where all VM images are available (used by has_feature API)
2735sub shared_nodes {
2736 my ($conf, $storecfg) = @_;
2737
2738 my $nodelist = PVE::Cluster::get_nodelist();
2739 my $nodehash = { map { $_ => 1 } @$nodelist };
38277afc 2740 my $nodename = nodename();
be190583 2741
912792e2 2742 PVE::QemuConfig->foreach_volume($conf, sub {
719893a9
DM
2743 my ($ds, $drive) = @_;
2744
2745 my $volid = $drive->{file};
2746 return if !$volid;
2747
2748 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2749 if ($storeid) {
2750 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2751 if ($scfg->{disable}) {
2752 $nodehash = {};
2753 } elsif (my $avail = $scfg->{nodes}) {
2754 foreach my $node (keys %$nodehash) {
2755 delete $nodehash->{$node} if !$avail->{$node};
2756 }
2757 } elsif (!$scfg->{shared}) {
2758 foreach my $node (keys %$nodehash) {
2759 delete $nodehash->{$node} if $node ne $nodename
2760 }
2761 }
2762 }
2763 });
2764
2765 return $nodehash
2766}
2767
f25852c2
TM
2768sub check_local_storage_availability {
2769 my ($conf, $storecfg) = @_;
2770
2771 my $nodelist = PVE::Cluster::get_nodelist();
2772 my $nodehash = { map { $_ => {} } @$nodelist };
2773
912792e2 2774 PVE::QemuConfig->foreach_volume($conf, sub {
f25852c2
TM
2775 my ($ds, $drive) = @_;
2776
2777 my $volid = $drive->{file};
2778 return if !$volid;
2779
2780 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2781 if ($storeid) {
2782 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2783
2784 if ($scfg->{disable}) {
2785 foreach my $node (keys %$nodehash) {
32075a2c 2786 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2787 }
2788 } elsif (my $avail = $scfg->{nodes}) {
2789 foreach my $node (keys %$nodehash) {
2790 if (!$avail->{$node}) {
32075a2c 2791 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2792 }
2793 }
2794 }
2795 }
2796 });
2797
32075a2c
TL
2798 foreach my $node (values %$nodehash) {
2799 if (my $unavail = $node->{unavailable_storages}) {
2800 $node->{unavailable_storages} = [ sort keys %$unavail ];
2801 }
2802 }
2803
f25852c2
TM
2804 return $nodehash
2805}
2806
babf613a 2807# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
1e3baf05 2808sub check_running {
7e8dcf2c 2809 my ($vmid, $nocheck, $node) = @_;
1e3baf05 2810
a20dc58a
FG
2811 # $nocheck is set when called during a migration, in which case the config
2812 # file might still or already reside on the *other* node
2813 # - because rename has already happened, and current node is source
2814 # - because rename hasn't happened yet, and current node is target
2815 # - because rename has happened, current node is target, but hasn't yet
2816 # processed it yet
babf613a
SR
2817 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2818 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
1e3baf05
DM
2819}
2820
2821sub vzlist {
19672434 2822
1e3baf05
DM
2823 my $vzlist = config_list();
2824
d036e418 2825 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
1e3baf05 2826
19672434 2827 while (defined(my $de = $fd->read)) {
1e3baf05
DM
2828 next if $de !~ m/^(\d+)\.pid$/;
2829 my $vmid = $1;
6b64503e
DM
2830 next if !defined($vzlist->{$vmid});
2831 if (my $pid = check_running($vmid)) {
1e3baf05
DM
2832 $vzlist->{$vmid}->{pid} = $pid;
2833 }
2834 }
2835
2836 return $vzlist;
2837}
2838
b1a70cab
DM
2839our $vmstatus_return_properties = {
2840 vmid => get_standard_option('pve-vmid'),
2841 status => {
7bd9abd2 2842 description => "QEMU process status.",
b1a70cab
DM
2843 type => 'string',
2844 enum => ['stopped', 'running'],
2845 },
2846 maxmem => {
2847 description => "Maximum memory in bytes.",
2848 type => 'integer',
2849 optional => 1,
2850 renderer => 'bytes',
2851 },
2852 maxdisk => {
2853 description => "Root disk size in bytes.",
2854 type => 'integer',
2855 optional => 1,
2856 renderer => 'bytes',
2857 },
2858 name => {
2859 description => "VM name.",
2860 type => 'string',
2861 optional => 1,
2862 },
2863 qmpstatus => {
58542139 2864 description => "VM run state from the 'query-status' QMP monitor command.",
b1a70cab
DM
2865 type => 'string',
2866 optional => 1,
2867 },
2868 pid => {
2869 description => "PID of running qemu process.",
2870 type => 'integer',
2871 optional => 1,
2872 },
2873 uptime => {
2874 description => "Uptime.",
2875 type => 'integer',
2876 optional => 1,
2877 renderer => 'duration',
2878 },
2879 cpus => {
2880 description => "Maximum usable CPUs.",
2881 type => 'number',
2882 optional => 1,
2883 },
e6ed61b4 2884 lock => {
11efdfa5 2885 description => "The current config lock, if any.",
e6ed61b4
DC
2886 type => 'string',
2887 optional => 1,
b8e7068a
DC
2888 },
2889 tags => {
2890 description => "The current configured tags, if any",
2891 type => 'string',
2892 optional => 1,
2893 },
949112c3
FE
2894 'running-machine' => {
2895 description => "The currently running machine type (if running).",
2896 type => 'string',
2897 optional => 1,
2898 },
2899 'running-qemu' => {
2900 description => "The currently running QEMU version (if running).",
2901 type => 'string',
2902 optional => 1,
2903 },
b1a70cab
DM
2904};
2905
1e3baf05
DM
2906my $last_proc_pid_stat;
2907
03a33f30
DM
2908# get VM status information
2909# This must be fast and should not block ($full == false)
2910# We only query KVM using QMP if $full == true (this can be slow)
1e3baf05 2911sub vmstatus {
03a33f30 2912 my ($opt_vmid, $full) = @_;
1e3baf05
DM
2913
2914 my $res = {};
2915
19672434 2916 my $storecfg = PVE::Storage::config();
1e3baf05
DM
2917
2918 my $list = vzlist();
3618ee99
EK
2919 my $defaults = load_defaults();
2920
694fcad4 2921 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1e3baf05 2922
ae4915a2
DM
2923 my $cpucount = $cpuinfo->{cpus} || 1;
2924
1e3baf05
DM
2925 foreach my $vmid (keys %$list) {
2926 next if $opt_vmid && ($vmid ne $opt_vmid);
2927
9f78b695 2928 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2929
ad2cad72 2930 my $d = { vmid => int($vmid) };
8a0addab 2931 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
1e3baf05
DM
2932
2933 # fixme: better status?
2934 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2935
776c5f50 2936 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
af990afe
DM
2937 if (defined($size)) {
2938 $d->{disk} = 0; # no info available
1e3baf05
DM
2939 $d->{maxdisk} = $size;
2940 } else {
2941 $d->{disk} = 0;
2942 $d->{maxdisk} = 0;
2943 }
2944
3618ee99
EK
2945 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2946 * ($conf->{cores} || $defaults->{cores});
ae4915a2 2947 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
d7c8364b 2948 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
ae4915a2 2949
1e3baf05 2950 $d->{name} = $conf->{name} || "VM $vmid";
3618ee99
EK
2951 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2952 : $defaults->{memory}*(1024*1024);
1e3baf05 2953
8b1accf7 2954 if ($conf->{balloon}) {
4bdb0514 2955 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
3618ee99
EK
2956 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2957 : $defaults->{shares};
8b1accf7
DM
2958 }
2959
1e3baf05
DM
2960 $d->{uptime} = 0;
2961 $d->{cpu} = 0;
1e3baf05
DM
2962 $d->{mem} = 0;
2963
2964 $d->{netout} = 0;
2965 $d->{netin} = 0;
2966
2967 $d->{diskread} = 0;
2968 $d->{diskwrite} = 0;
2969
75a2a423 2970 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
4d8c851b 2971
8107b378 2972 $d->{serial} = 1 if conf_has_serial($conf);
e6ed61b4 2973 $d->{lock} = $conf->{lock} if $conf->{lock};
b8e7068a 2974 $d->{tags} = $conf->{tags} if defined($conf->{tags});
8107b378 2975
1e3baf05
DM
2976 $res->{$vmid} = $d;
2977 }
2978
2979 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2980 foreach my $dev (keys %$netdev) {
2981 next if $dev !~ m/^tap([1-9]\d*)i/;
2982 my $vmid = $1;
2983 my $d = $res->{$vmid};
2984 next if !$d;
19672434 2985
1e3baf05
DM
2986 $d->{netout} += $netdev->{$dev}->{receive};
2987 $d->{netin} += $netdev->{$dev}->{transmit};
604ea644
AD
2988
2989 if ($full) {
ad2cad72
FE
2990 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2991 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
604ea644
AD
2992 }
2993
1e3baf05
DM
2994 }
2995
1e3baf05
DM
2996 my $ctime = gettimeofday;
2997
2998 foreach my $vmid (keys %$list) {
2999
3000 my $d = $res->{$vmid};
3001 my $pid = $d->{pid};
3002 next if !$pid;
3003
694fcad4
DM
3004 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3005 next if !$pstat; # not running
19672434 3006
694fcad4 3007 my $used = $pstat->{utime} + $pstat->{stime};
1e3baf05 3008
694fcad4 3009 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1e3baf05 3010
694fcad4 3011 if ($pstat->{vsize}) {
6b64503e 3012 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1e3baf05
DM
3013 }
3014
3015 my $old = $last_proc_pid_stat->{$pid};
3016 if (!$old) {
19672434
DM
3017 $last_proc_pid_stat->{$pid} = {
3018 time => $ctime,
1e3baf05
DM
3019 used => $used,
3020 cpu => 0,
1e3baf05
DM
3021 };
3022 next;
3023 }
3024
7f0b5beb 3025 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1e3baf05
DM
3026
3027 if ($dtime > 1000) {
3028 my $dutime = $used - $old->{used};
3029
ae4915a2 3030 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
1e3baf05 3031 $last_proc_pid_stat->{$pid} = {
19672434 3032 time => $ctime,
1e3baf05
DM
3033 used => $used,
3034 cpu => $d->{cpu},
1e3baf05
DM
3035 };
3036 } else {
3037 $d->{cpu} = $old->{cpu};
1e3baf05
DM
3038 }
3039 }
3040
f5eb281a 3041 return $res if !$full;
03a33f30
DM
3042
3043 my $qmpclient = PVE::QMPClient->new();
3044
64e7fcf2
DM
3045 my $ballooncb = sub {
3046 my ($vmid, $resp) = @_;
3047
3048 my $info = $resp->{'return'};
38babf81
DM
3049 return if !$info->{max_mem};
3050
64e7fcf2
DM
3051 my $d = $res->{$vmid};
3052
38babf81
DM
3053 # use memory assigned to VM
3054 $d->{maxmem} = $info->{max_mem};
3055 $d->{balloon} = $info->{actual};
3056
3057 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3058 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3059 $d->{freemem} = $info->{free_mem};
64e7fcf2
DM
3060 }
3061
604ea644 3062 $d->{ballooninfo} = $info;
64e7fcf2
DM
3063 };
3064
03a33f30
DM
3065 my $blockstatscb = sub {
3066 my ($vmid, $resp) = @_;
3067 my $data = $resp->{'return'} || [];
3068 my $totalrdbytes = 0;
3069 my $totalwrbytes = 0;
604ea644 3070
03a33f30
DM
3071 for my $blockstat (@$data) {
3072 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3073 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
604ea644
AD
3074
3075 $blockstat->{device} =~ s/drive-//;
3076 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
03a33f30
DM
3077 }
3078 $res->{$vmid}->{diskread} = $totalrdbytes;
3079 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3080 };
3081
949112c3
FE
3082 my $machinecb = sub {
3083 my ($vmid, $resp) = @_;
3084 my $data = $resp->{'return'} || [];
3085
3086 $res->{$vmid}->{'running-machine'} =
3087 PVE::QemuServer::Machine::current_from_query_machines($data);
3088 };
3089
3090 my $versioncb = sub {
3091 my ($vmid, $resp) = @_;
3092 my $data = $resp->{'return'} // {};
3093 my $version = 'unknown';
3094
3095 if (my $v = $data->{qemu}) {
3096 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3097 }
3098
3099 $res->{$vmid}->{'running-qemu'} = $version;
3100 };
3101
03a33f30
DM
3102 my $statuscb = sub {
3103 my ($vmid, $resp) = @_;
64e7fcf2 3104
03a33f30 3105 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
949112c3
FE
3106 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3107 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
64e7fcf2
DM
3108 # this fails if ballon driver is not loaded, so this must be
3109 # the last commnand (following command are aborted if this fails).
38babf81 3110 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
03a33f30
DM
3111
3112 my $status = 'unknown';
3113 if (!defined($status = $resp->{'return'}->{status})) {
3114 warn "unable to get VM status\n";
3115 return;
3116 }
3117
3118 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3119 };
3120
3121 foreach my $vmid (keys %$list) {
3122 next if $opt_vmid && ($vmid ne $opt_vmid);
3123 next if !$res->{$vmid}->{pid}; # not running
3124 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3125 }
3126
b017fbda 3127 $qmpclient->queue_execute(undef, 2);
03a33f30 3128
6891fd70
SR
3129 foreach my $vmid (keys %$list) {
3130 next if $opt_vmid && ($vmid ne $opt_vmid);
e5b18771
FG
3131 next if !$res->{$vmid}->{pid}; #not running
3132
6891fd70
SR
3133 # we can't use the $qmpclient since it might have already aborted on
3134 # 'query-balloon', but this might also fail for older versions...
3135 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3136 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3137 }
3138
03a33f30
DM
3139 foreach my $vmid (keys %$list) {
3140 next if $opt_vmid && ($vmid ne $opt_vmid);
3141 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3142 }
3143
1e3baf05
DM
3144 return $res;
3145}
3146
8107b378
DC
3147sub conf_has_serial {
3148 my ($conf) = @_;
3149
3150 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3151 if ($conf->{"serial$i"}) {
3152 return 1;
3153 }
3154 }
3155
3156 return 0;
3157}
3158
d5535a00
TL
3159sub conf_has_audio {
3160 my ($conf, $id) = @_;
3161
3162 $id //= 0;
3163 my $audio = $conf->{"audio$id"};
d1c1af4b 3164 return if !defined($audio);
d5535a00 3165
4df98f2f 3166 my $audioproperties = parse_property_string($audio_fmt, $audio);
d5535a00
TL
3167 my $audiodriver = $audioproperties->{driver} // 'spice';
3168
3169 return {
3170 dev => $audioproperties->{device},
b0f96836 3171 dev_id => "audiodev$id",
d5535a00
TL
3172 backend => $audiodriver,
3173 backend_id => "$audiodriver-backend${id}",
3174 };
3175}
3176
b01de199 3177sub audio_devs {
1cc5ed1b 3178 my ($audio, $audiopciaddr, $machine_version) = @_;
b01de199
TL
3179
3180 my $devs = [];
3181
3182 my $id = $audio->{dev_id};
1cc5ed1b
AL
3183 my $audiodev = "";
3184 if (min_version($machine_version, 4, 2)) {
3185 $audiodev = ",audiodev=$audio->{backend_id}";
3186 }
b01de199
TL
3187
3188 if ($audio->{dev} eq 'AC97') {
1cc5ed1b 3189 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
b01de199
TL
3190 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3191 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
1cc5ed1b
AL
3192 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3193 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
b01de199
TL
3194 } else {
3195 die "unkown audio device '$audio->{dev}', implement me!";
3196 }
3197
3198 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3199
3200 return $devs;
3201}
3202
f9dde219
SR
3203sub get_tpm_paths {
3204 my ($vmid) = @_;
3205 return {
3206 socket => "/var/run/qemu-server/$vmid.swtpm",
3207 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3208 };
3209}
3210
3211sub add_tpm_device {
3212 my ($vmid, $devices, $conf) = @_;
3213
3214 return if !$conf->{tpmstate0};
3215
3216 my $paths = get_tpm_paths($vmid);
3217
3218 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3219 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3220 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3221}
3222
3223sub start_swtpm {
3224 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3225
3226 return if !$tpmdrive;
3227
3228 my $state;
3229 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3230 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3231 if ($storeid) {
3232 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3233 } else {
3234 $state = $tpm->{file};
3235 }
3236
3237 my $paths = get_tpm_paths($vmid);
3238
3239 # during migration, we will get state from remote
3240 #
3241 if (!$migration) {
3242 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3243 my $setup_cmd = [
3244 "swtpm_setup",
3245 "--tpmstate",
3246 "file://$state",
3247 "--createek",
3248 "--create-ek-cert",
3249 "--create-platform-cert",
3250 "--lock-nvram",
3251 "--config",
3252 "/etc/swtpm_setup.conf", # do not use XDG configs
3253 "--runas",
3254 "0", # force creation as root, error if not possible
3255 "--not-overwrite", # ignore existing state, do not modify
3256 ];
3257
3258 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3259 # TPM 2.0 supports ECC crypto, use if possible
3260 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3261
3262 run_command($setup_cmd, outfunc => sub {
3263 print "swtpm_setup: $1\n";
3264 });
3265 }
3266
72a5a176
FE
3267 # Used to distinguish different invocations in the log.
3268 my $log_prefix = "[id=" . int(time()) . "] ";
3269
f9dde219
SR
3270 my $emulator_cmd = [
3271 "swtpm",
3272 "socket",
3273 "--tpmstate",
3274 "backend-uri=file://$state,mode=0600",
3275 "--ctrl",
3276 "type=unixio,path=$paths->{socket},mode=0600",
3277 "--pid",
3278 "file=$paths->{pid}",
3279 "--terminate", # terminate on QEMU disconnect
3280 "--daemon",
b2e9c4d3 3281 "--log",
72a5a176 3282 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
f9dde219
SR
3283 ];
3284 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3285 run_command($emulator_cmd, outfunc => sub { print $1; });
3286
6bbcd71f 3287 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
f85951dc 3288 while (! -e $paths->{pid}) {
90c41bac 3289 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
6bbcd71f 3290 usleep(50_000);
f85951dc
SR
3291 }
3292
f9dde219
SR
3293 # return untainted PID of swtpm daemon so it can be killed on error
3294 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3295 return $1;
3296}
3297
86b8228b
DM
3298sub vga_conf_has_spice {
3299 my ($vga) = @_;
3300
55655ebc
DC
3301 my $vgaconf = parse_vga($vga);
3302 my $vgatype = $vgaconf->{type};
3303 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
590e698c
DM
3304
3305 return $1 || 1;
86b8228b
DM
3306}
3307
d731ecbe
WB
3308sub is_native($) {
3309 my ($arch) = @_;
3310 return get_host_arch() eq $arch;
3311}
3312
045749f2
TL
3313sub get_vm_arch {
3314 my ($conf) = @_;
3315 return $conf->{arch} // get_host_arch();
3316}
3317
d731ecbe
WB
3318my $default_machines = {
3319 x86_64 => 'pc',
3320 aarch64 => 'virt',
3321};
3322
0761e619
TL
3323sub get_installed_machine_version {
3324 my ($kvmversion) = @_;
3325 $kvmversion = kvm_user_version() if !defined($kvmversion);
3326 $kvmversion =~ m/^(\d+\.\d+)/;
3327 return $1;
3328}
3329
3330sub windows_get_pinned_machine_version {
3331 my ($machine, $base_version, $kvmversion) = @_;
3332
3333 my $pin_version = $base_version;
3334 if (!defined($base_version) ||
3335 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3336 ) {
3337 $pin_version = get_installed_machine_version($kvmversion);
3338 }
3339 if (!$machine || $machine eq 'pc') {
3340 $machine = "pc-i440fx-$pin_version";
3341 } elsif ($machine eq 'q35') {
3342 $machine = "pc-q35-$pin_version";
3343 } elsif ($machine eq 'virt') {
3344 $machine = "virt-$pin_version";
3345 } else {
3346 warn "unknown machine type '$machine', not touching that!\n";
3347 }
3348
3349 return $machine;
3350}
3351
045749f2 3352sub get_vm_machine {
ac0077cc 3353 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
045749f2
TL
3354
3355 my $machine = $forcemachine || $conf->{machine};
d731ecbe 3356
9471e48b 3357 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
4dd1e83c
TL
3358 $kvmversion //= kvm_user_version();
3359 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3360 # layout which confuses windows quite a bit and may result in various regressions..
3361 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3362 if (windows_version($conf->{ostype})) {
0761e619 3363 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
4dd1e83c 3364 }
045749f2
TL
3365 $arch //= 'x86_64';
3366 $machine ||= $default_machines->{$arch};
ac0077cc 3367 if ($add_pve_version) {
ac0077cc
SR
3368 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3369 $machine .= "+pve$pvever";
3370 }
3371 }
3372
d4be7f31
SR
3373 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3374 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3375 $machine = $1 if $is_pxe;
3376
ac0077cc
SR
3377 # for version-pinned machines that do not include a pve-version (e.g.
3378 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3379 $machine .= '+pve0';
d4be7f31
SR
3380
3381 $machine .= '.pxe' if $is_pxe;
045749f2
TL
3382 }
3383
3384 return $machine;
d731ecbe
WB
3385}
3386
90b20b15
DC
3387sub get_ovmf_files($$$) {
3388 my ($arch, $efidisk, $smm) = @_;
96ed3574 3389
b5099b4f 3390 my $types = $OVMF->{$arch}
96ed3574
WB
3391 or die "no OVMF images known for architecture '$arch'\n";
3392
b5099b4f 3393 my $type = 'default';
1183c8f1 3394 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
90b20b15
DC
3395 $type = $smm ? "4m" : "4m-no-smm";
3396 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
b5099b4f
SR
3397 }
3398
f78c9b6b
NU
3399 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3400 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3401 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3402
3403 return ($ovmf_code, $ovmf_vars);
96ed3574
WB
3404}
3405
6908fd9b
WB
3406my $Arch2Qemu = {
3407 aarch64 => '/usr/bin/qemu-system-aarch64',
3408 x86_64 => '/usr/bin/qemu-system-x86_64',
3409};
3410sub get_command_for_arch($) {
3411 my ($arch) = @_;
3412 return '/usr/bin/kvm' if is_native($arch);
3413
3414 my $cmd = $Arch2Qemu->{$arch}
3415 or die "don't know how to emulate architecture '$arch'\n";
3416 return $cmd;
3417}
3418
05a4c550
SR
3419# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3420# to use in a QEMU command line (-cpu element), first array_intersect the result
3421# of query_supported_ with query_understood_. This is necessary because:
3422#
3423# a) query_understood_ returns flags the host cannot use and
3424# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3425# flags, but CPU settings - with most of them being flags. Those settings
3426# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3427#
3428# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3429# expensive. If you need the value returned from this, you can get it much
3430# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3431# $accel being 'kvm' or 'tcg'.
3432#
3433# pvestatd calls this function on startup and whenever the QEMU/KVM version
3434# changes, automatically populating pmxcfs.
3435#
3436# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3437# since kvm and tcg machines support different flags
3438#
3439sub query_supported_cpu_flags {
52cffab6 3440 my ($arch) = @_;
05a4c550 3441
52cffab6
SR
3442 $arch //= get_host_arch();
3443 my $default_machine = $default_machines->{$arch};
3444
3445 my $flags = {};
05a4c550
SR
3446
3447 # FIXME: Once this is merged, the code below should work for ARM as well:
3448 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3449 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3450 $arch eq "aarch64";
3451
3452 my $kvm_supported = defined(kvm_version());
3453 my $qemu_cmd = get_command_for_arch($arch);
3454 my $fakevmid = -1;
3455 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3456
3457 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3458 my $query_supported_run_qemu = sub {
3459 my ($kvm) = @_;
3460
3461 my $flags = {};
3462 my $cmd = [
3463 $qemu_cmd,
3464 '-machine', $default_machine,
3465 '-display', 'none',
378ad769 3466 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
05a4c550
SR
3467 '-mon', 'chardev=qmp,mode=control',
3468 '-pidfile', $pidfile,
3469 '-S', '-daemonize'
3470 ];
3471
3472 if (!$kvm) {
3473 push @$cmd, '-accel', 'tcg';
3474 }
3475
3476 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3477 die "QEMU flag querying VM exited with code " . $rc if $rc;
3478
3479 eval {
3480 my $cmd_result = mon_cmd(
3481 $fakevmid,
3482 'query-cpu-model-expansion',
3483 type => 'full',
3484 model => { name => 'host' }
3485 );
3486
3487 my $props = $cmd_result->{model}->{props};
3488 foreach my $prop (keys %$props) {
3489 next if $props->{$prop} ne '1';
3490 # QEMU returns some flags multiple times, with '_', '.' or '-'
3491 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3492 # We only keep those with underscores, to match /proc/cpuinfo
3493 $prop =~ s/\.|-/_/g;
3494 $flags->{$prop} = 1;
3495 }
3496 };
3497 my $err = $@;
3498
6bbcd71f 3499 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
05a4c550
SR
3500 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3501
3502 die $err if $err;
3503
3504 return [ sort keys %$flags ];
3505 };
3506
3507 # We need to query QEMU twice, since KVM and TCG have different supported flags
3508 PVE::QemuConfig->lock_config($fakevmid, sub {
3509 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3510 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3511
3512 if ($kvm_supported) {
3513 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3514 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3515 }
3516 });
3517
3518 return $flags;
3519}
3520
3521# Understood CPU flags are written to a file at 'pve-qemu' compile time
3522my $understood_cpu_flag_dir = "/usr/share/kvm";
3523sub query_understood_cpu_flags {
3524 my $arch = get_host_arch();
3525 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3526
3527 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3528 if ! -e $filepath;
3529
3530 my $raw = file_get_contents($filepath);
3531 $raw =~ s/^\s+|\s+$//g;
3532 my @flags = split(/\s+/, $raw);
3533
3534 return \@flags;
3535}
3536
e5a6919c
FE
3537# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3538# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3539my sub should_disable_smm {
e4263214
FE
3540 my ($conf, $vga, $machine) = @_;
3541
3542 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
e5a6919c
FE
3543
3544 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
bec87424 3545 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
e5a6919c
FE
3546}
3547
b7d80c79
FE
3548my sub print_ovmf_drive_commandlines {
3549 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3550
3d07669c 3551 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
b7d80c79
FE
3552
3553 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
b7d80c79 3554
3d07669c 3555 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
b7d80c79
FE
3556 if ($d) {
3557 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3d07669c 3558 my ($path, $format) = $d->@{'file', 'format'};
b7d80c79
FE
3559 if ($storeid) {
3560 $path = PVE::Storage::path($storecfg, $d->{file});
3561 if (!defined($format)) {
3562 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3563 $format = qemu_img_format($scfg, $volname);
3564 }
3d07669c
TL
3565 } elsif (!defined($format)) {
3566 die "efidisk format must be specified\n";
3567 }
3568 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3569 if ($path =~ m/^rbd:/) {
3570 $var_drive_str .= ',cache=writeback';
3571 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
b7d80c79 3572 }
3d07669c 3573 $var_drive_str .= ",format=$format,file=$path";
b7d80c79 3574
3d07669c
TL
3575 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3576 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
b7d80c79
FE
3577 } else {
3578 log_warn("no efidisk configured! Using temporary efivars disk.");
3d07669c 3579 my $path = "/tmp/$vmid-ovmf.fd";
b7d80c79 3580 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3d07669c
TL
3581 $var_drive_str .= ",format=raw,file=$path";
3582 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
b7d80c79
FE
3583 }
3584
3d07669c 3585 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
b7d80c79
FE
3586}
3587
1e3baf05 3588sub config_to_command {
5921764c
SR
3589 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3590 $pbs_backing) = @_;
1e3baf05 3591
3326ae19 3592 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
5bdcf937 3593 my $devices = [];
5bdcf937 3594 my $bridges = {};
b42d3cf9 3595 my $ostype = $conf->{ostype};
4317f69f 3596 my $winversion = windows_version($ostype);
d731ecbe 3597 my $kvm = $conf->{kvm};
38277afc 3598 my $nodename = nodename();
d731ecbe 3599
045749f2 3600 my $arch = get_vm_arch($conf);
1476b99f
DC
3601 my $kvm_binary = get_command_for_arch($arch);
3602 my $kvmver = kvm_user_version($kvm_binary);
045749f2 3603
a04dd5c4
SR
3604 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3605 $kvmver //= "undefined";
3606 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3607 }
3608
9471e48b
TL
3609 my $add_pve_version = min_version($kvmver, 4, 1);
3610
3611 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
4df98f2f 3612 my $machine_version = extract_version($machine_type, $kvmver);
d731ecbe 3613 $kvm //= 1 if is_native($arch);
4317f69f 3614
a77a53ae 3615 $machine_version =~ m/(\d+)\.(\d+)/;
ac0077cc 3616 my ($machine_major, $machine_minor) = ($1, $2);
ac0077cc 3617
b516c848
SR
3618 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3619 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3620 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
4df98f2f
TL
3621 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3622 ." please upgrade node '$nodename'\n"
b516c848 3623 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
ac0077cc 3624 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
4df98f2f
TL
3625 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3626 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3627 ." node '$nodename'\n";
ac0077cc
SR
3628 }
3629
3630 # if a specific +pve version is required for a feature, use $version_guard
3631 # instead of min_version to allow machines to be run with the minimum
3632 # required version
3633 my $required_pve_version = 0;
3634 my $version_guard = sub {
3635 my ($major, $minor, $pve) = @_;
3636 return 0 if !min_version($machine_version, $major, $minor, $pve);
47f35977
SR
3637 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3638 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
ac0077cc
SR
3639 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3640 return 1;
3641 };
a77a53ae 3642
4df98f2f
TL
3643 if ($kvm && !defined kvm_version()) {
3644 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3645 ." or enable in BIOS.\n";
d731ecbe 3646 }
bfcd9b7e 3647
3392d6ca 3648 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4d3f29ed 3649 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
249c4a6c
AD
3650 my $use_old_bios_files = undef;
3651 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
db656e5f 3652
74fe3d9a 3653 my $cmd = [];
83870398 3654 if ($conf->{affinity}) {
74fe3d9a 3655 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
83870398
DB
3656 }
3657
1476b99f 3658 push @$cmd, $kvm_binary;
1e3baf05
DM
3659
3660 push @$cmd, '-id', $vmid;
3661
e4d4cda1
HR
3662 my $vmname = $conf->{name} || "vm$vmid";
3663
6884a7d7 3664 push @$cmd, '-name', "$vmname,debug-threads=on";
e4d4cda1 3665
27b25d03
SR
3666 push @$cmd, '-no-shutdown';
3667
1e3baf05
DM
3668 my $use_virtio = 0;
3669
d036e418 3670 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
378ad769 3671 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
c971c4f2
AD
3672 push @$cmd, '-mon', "chardev=qmp,mode=control";
3673
2ea5fb7e 3674 if (min_version($machine_version, 2, 12)) {
b4496b9e 3675 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
71bd73b5
DC
3676 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3677 }
1e3baf05 3678
d036e418 3679 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
19672434 3680
1e3baf05
DM
3681 push @$cmd, '-daemonize';
3682
2796e7d5 3683 if ($conf->{smbios1}) {
1f30ac3a
CE
3684 my $smbios_conf = parse_smbios1($conf->{smbios1});
3685 if ($smbios_conf->{base64}) {
3686 # Do not pass base64 flag to qemu
3687 delete $smbios_conf->{base64};
3688 my $smbios_string = "";
3689 foreach my $key (keys %$smbios_conf) {
3690 my $value;
3691 if ($key eq "uuid") {
3692 $value = $smbios_conf->{uuid}
3693 } else {
3694 $value = decode_base64($smbios_conf->{$key});
3695 }
3696 # qemu accepts any binary data, only commas need escaping by double comma
3697 $value =~ s/,/,,/g;
3698 $smbios_string .= "," . $key . "=" . $value if $value;
3699 }
3700 push @$cmd, '-smbios', "type=1" . $smbios_string;
3701 } else {
3702 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3703 }
2796e7d5
DM
3704 }
3705
3edb45e7 3706 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
b7d80c79
FE
3707 my ($code_drive_str, $var_drive_str) =
3708 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3709 push $cmd->@*, '-drive', $code_drive_str;
3710 push $cmd->@*, '-drive', $var_drive_str;
a783c78e
AD
3711 }
3712
483ceeab 3713 if ($q35) { # tell QEMU to load q35 config early
7583d156 3714 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
2ea5fb7e 3715 if (min_version($machine_version, 4, 0)) {
7583d156
DC
3716 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3717 } else {
3718 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3719 }
3720 }
da8b4189 3721
cc181036
TL
3722 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3723 push @$cmd, $fixups->@*;
3724 }
3725
844d8fa6
DC
3726 if ($conf->{vmgenid}) {
3727 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3728 }
3729
d40e5e18 3730 # add usb controllers
4df98f2f 3731 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
0cf8d56c 3732 $conf, $bridges, $arch, $machine_type, $machine_version);
d40e5e18 3733 push @$devices, @usbcontrollers if @usbcontrollers;
55655ebc 3734 my $vga = parse_vga($conf->{vga});
2fa3151e 3735
55655ebc
DC
3736 my $qxlnum = vga_conf_has_spice($conf->{vga});
3737 $vga->{type} = 'qxl' if $qxlnum;
2fa3151e 3738
55655ebc 3739 if (!$vga->{type}) {
869ad4a7
WB
3740 if ($arch eq 'aarch64') {
3741 $vga->{type} = 'virtio';
2ea5fb7e 3742 } elsif (min_version($machine_version, 2, 9)) {
55655ebc 3743 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3744 } else {
55655ebc 3745 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3746 }
5acbfe9e
DM
3747 }
3748
1e3baf05 3749 # enable absolute mouse coordinates (needed by vnc)
fa3b3ce0
TL
3750 my $tablet = $conf->{tablet};
3751 if (!defined($tablet)) {
5acbfe9e 3752 $tablet = $defaults->{tablet};
590e698c 3753 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
55655ebc 3754 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
5acbfe9e
DM
3755 }
3756
d559309f
WB
3757 if ($tablet) {
3758 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3759 my $kbd = print_keyboarddevice_full($conf, $arch);
3760 push @$devices, '-device', $kbd if defined($kbd);
3761 }
b467f79a 3762
e5d611c3 3763 my $bootorder = device_bootorder($conf);
2141a802 3764
74c17b7a 3765 # host pci device passthrough
9b71c34d
DC
3766 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3767 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
1e3baf05
DM
3768
3769 # usb devices
ae36393d 3770 my $usb_dev_features = {};
2ea5fb7e 3771 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
ae36393d 3772
4df98f2f 3773 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
0cf8d56c 3774 $conf, $usb_dev_features, $bootorder, $machine_version);
d40e5e18 3775 push @$devices, @usbdevices if @usbdevices;
2141a802 3776
1e3baf05 3777 # serial devices
bae179aa 3778 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
fa3b3ce0
TL
3779 my $path = $conf->{"serial$i"} or next;
3780 if ($path eq 'socket') {
3781 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3782 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
7bd9abd2 3783 # On aarch64, serial0 is the UART device. QEMU only allows
fa3b3ce0
TL
3784 # connecting UART devices via the '-serial' command line, as
3785 # the device has a fixed slot on the hardware...
3786 if ($arch eq 'aarch64' && $i == 0) {
3787 push @$devices, '-serial', "chardev:serial$i";
9f9d2fb2 3788 } else {
9f9d2fb2
DM
3789 push @$devices, '-device', "isa-serial,chardev=serial$i";
3790 }
fa3b3ce0
TL
3791 } else {
3792 die "no such serial device\n" if ! -c $path;
e35eb876 3793 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
fa3b3ce0 3794 push @$devices, '-device', "isa-serial,chardev=serial$i";
34978be3 3795 }
1e3baf05
DM
3796 }
3797
3798 # parallel devices
1989a89c 3799 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
34978be3 3800 if (my $path = $conf->{"parallel$i"}) {
19672434 3801 die "no such parallel device\n" if ! -c $path;
e35eb876 3802 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
4c5dbaf6 3803 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
5bdcf937 3804 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
34978be3 3805 }
1e3baf05
DM
3806 }
3807
b01de199 3808 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
2e7b5925 3809 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
1cc5ed1b 3810 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
b01de199 3811 push @$devices, @$audio_devs;
2e7b5925 3812 }
19672434 3813
f9dde219
SR
3814 add_tpm_device($vmid, $devices, $conf);
3815
1e3baf05
DM
3816 my $sockets = 1;
3817 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3818 $sockets = $conf->{sockets} if $conf->{sockets};
3819
3820 my $cores = $conf->{cores} || 1;
3bd18e48 3821
de9d1e55 3822 my $maxcpus = $sockets * $cores;
76267728 3823
de9d1e55 3824 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
76267728 3825
de9d1e55
AD
3826 my $allowed_vcpus = $cpuinfo->{cpus};
3827
483ceeab 3828 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
1e3baf05 3829
483ceeab 3830 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
69c81430
AD
3831 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3832 for (my $i = 2; $i <= $vcpus; $i++) {
3833 my $cpustr = print_cpu_device($conf,$i);
3834 push @$cmd, '-device', $cpustr;
3835 }
3836
3837 } else {
3838
3839 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3840 }
1e3baf05
DM
3841 push @$cmd, '-nodefaults';
3842
dbea4415 3843 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
1e3baf05 3844
0f704640 3845 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
1e3baf05 3846
6b64503e 3847 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
1e3baf05 3848
84902837 3849 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
4df98f2f
TL
3850 push @$devices, '-device', print_vga_device(
3851 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
6f070e39
TL
3852
3853 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3854
d036e418 3855 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
378ad769 3856 push @$cmd, '-vnc', "unix:$socket,password=on";
b7be4ba9 3857 } else {
55655ebc 3858 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
b7be4ba9
AD
3859 push @$cmd, '-nographic';
3860 }
3861
1e3baf05 3862 # time drift fix
6b64503e 3863 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
8c559505 3864 my $useLocaltime = $conf->{localtime};
1e3baf05 3865
4317f69f
AD
3866 if ($winversion >= 5) { # windows
3867 $useLocaltime = 1 if !defined($conf->{localtime});
7a131888 3868
4317f69f
AD
3869 # use time drift fix when acpi is enabled
3870 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3871 $tdf = 1 if !defined($conf->{tdf});
462e8d19 3872 }
4317f69f 3873 }
462e8d19 3874
4317f69f
AD
3875 if ($winversion >= 6) {
3876 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
17bacc21 3877 push @$machineFlags, 'hpet=off';
1e3baf05
DM
3878 }
3879
8c559505
DM
3880 push @$rtcFlags, 'driftfix=slew' if $tdf;
3881
2f6f002c 3882 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
8c559505
DM
3883 push @$rtcFlags, "base=$conf->{startdate}";
3884 } elsif ($useLocaltime) {
3885 push @$rtcFlags, 'base=localtime';
3886 }
1e3baf05 3887
58c64ad5
SR
3888 if ($forcecpu) {
3889 push @$cmd, '-cpu', $forcecpu;
3890 } else {
2f6f002c 3891 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
58c64ad5 3892 }
519ed28c 3893
dafb728c
AD
3894 PVE::QemuServer::Memory::config(
3895 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
370b05e7 3896
1e3baf05
DM
3897 push @$cmd, '-S' if $conf->{freeze};
3898
b20df606 3899 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
1e3baf05 3900
48657158
MD
3901 my $guest_agent = parse_guest_agent($conf);
3902
3903 if ($guest_agent->{enabled}) {
d036e418 3904 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
378ad769 3905 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
48657158 3906
60f03a11 3907 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
48657158
MD
3908 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3909 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3910 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3911 } elsif ($guest_agent->{type} eq 'isa') {
3912 push @$devices, '-device', "isa-serial,chardev=qga0";
3913 }
ab6a046f
AD
3914 }
3915
e5d611c3
TL
3916 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3917 if ($rng && $version_guard->(4, 1, 2)) {
05853188
SR
3918 check_rng_source($rng->{source});
3919
2cf61f33
SR
3920 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3921 my $period = $rng->{period} // $rng_fmt->{period}->{default};
2cf61f33
SR
3922 my $limiter_str = "";
3923 if ($max_bytes) {
3924 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3925 }
3926
2cf61f33 3927 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
2cf61f33
SR
3928 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3929 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3930 }
3931
1d794448 3932 my $spice_port;
2fa3151e 3933
f8ea1b30 3934 if ($qxlnum || $vga->{type} =~ /^virtio/) {
590e698c 3935 if ($qxlnum > 1) {
ac087616 3936 if ($winversion){
2f6f002c 3937 for (my $i = 1; $i < $qxlnum; $i++){
4df98f2f
TL
3938 push @$devices, '-device', print_vga_device(
3939 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
590e698c
DM
3940 }
3941 } else {
3942 # assume other OS works like Linux
55655ebc
DC
3943 my ($ram, $vram) = ("134217728", "67108864");
3944 if ($vga->{memory}) {
3945 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3946 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3947 }
3948 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3949 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
2fa3151e
AD
3950 }
3951 }
3952
d559309f 3953 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
95a4b4a9 3954
af0eba7e 3955 my $pfamily = PVE::Tools::get_host_address_family($nodename);
91152441
WB
3956 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3957 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
4d316a63
AL
3958
3959 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3960 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3961 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3962
91152441
WB
3963 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3964 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
943340a6 3965
4df98f2f
TL
3966 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3967 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
caab114a
TL
3968 if ($spice_enhancement->{foldersharing}) {
3969 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3970 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3971 }
c4df18db 3972
caab114a 3973 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
4df98f2f
TL
3974 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3975 if $spice_enhancement->{videostreaming};
3976
caab114a 3977 push @$devices, '-spice', "$spice_opts";
1011b570
DM
3978 }
3979
8d9ae0d2
DM
3980 # enable balloon by default, unless explicitly disabled
3981 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3326ae19 3982 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
c70e4ec3
AD
3983 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3984 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3985 push @$devices, '-device', $ballooncmd;
8d9ae0d2 3986 }
1e3baf05 3987
0ea9541d
DM
3988 if ($conf->{watchdog}) {
3989 my $wdopts = parse_watchdog($conf->{watchdog});
3326ae19 3990 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
0a40e8ea 3991 my $watchdog = $wdopts->{model} || 'i6300esb';
5bdcf937
AD
3992 push @$devices, '-device', "$watchdog$pciaddr";
3993 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
0ea9541d
DM
3994 }
3995
1e3baf05 3996 my $vollist = [];
941e0c42 3997 my $scsicontroller = {};
26ee04b6 3998 my $ahcicontroller = {};
cdd20088 3999 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
1e3baf05 4000
5881b913
DM
4001 # Add iscsi initiator name if available
4002 if (my $initiator = get_initiator_name()) {
4003 push @$devices, '-iscsi', "initiator-name=$initiator";
4004 }
4005
912792e2 4006 PVE::QemuConfig->foreach_volume($conf, sub {
1e3baf05
DM
4007 my ($ds, $drive) = @_;
4008
ff1a2432 4009 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3f11f0d7 4010 check_volume_storage_type($storecfg, $drive->{file});
1e3baf05 4011 push @$vollist, $drive->{file};
ff1a2432 4012 }
afdb31d5 4013
4dcce9ee
TL
4014 # ignore efidisk here, already added in bios/fw handling code above
4015 return if $drive->{interface} eq 'efidisk';
f9dde219
SR
4016 # similar for TPM
4017 return if $drive->{interface} eq 'tpmstate';
4dcce9ee 4018
1e3baf05 4019 $use_virtio = 1 if $ds =~ m/^virtio/;
3b408e82 4020
2141a802 4021 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3b408e82 4022
2f6f002c 4023 if ($drive->{interface} eq 'virtio'){
51f492cd
AD
4024 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4025 }
4026
2f6f002c 4027 if ($drive->{interface} eq 'scsi') {
cdd20088 4028
ee034f5c 4029 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
6731a4cf 4030
b8fb1c03
SR
4031 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4032 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4033
3326ae19 4034 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
a1b7d579 4035 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
fc8b40fd
AD
4036
4037 my $iothread = '';
4038 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4039 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4040 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
e7a5104d 4041 } elsif ($drive->{iothread}) {
d80ad18c
MH
4042 log_warn(
4043 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4044 );
fc8b40fd
AD
4045 }
4046
6e11f143
AD
4047 my $queues = '';
4048 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4049 $queues = ",num_queues=$drive->{queues}";
370b05e7 4050 }
6e11f143 4051
4df98f2f
TL
4052 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4053 if !$scsicontroller->{$controller};
cdd20088 4054 $scsicontroller->{$controller}=1;
2f6f002c 4055 }
3b408e82 4056
26ee04b6 4057 if ($drive->{interface} eq 'sata') {
2f6f002c 4058 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3326ae19 4059 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4df98f2f
TL
4060 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4061 if !$ahcicontroller->{$controller};
2f6f002c 4062 $ahcicontroller->{$controller}=1;
26ee04b6 4063 }
46f58b5f 4064
5921764c
SR
4065 my $pbs_conf = $pbs_backing->{$ds};
4066 my $pbs_name = undef;
4067 if ($pbs_conf) {
4068 $pbs_name = "drive-$ds-pbs";
4069 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4070 }
4071
6d5673c3
SR
4072 my $drive_cmd = print_drive_commandline_full(
4073 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3dc33a72
FG
4074
4075 # extra protection for templates, but SATA and IDE don't support it..
75748d44 4076 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4ef13a7f 4077
15b21acc 4078 push @$devices, '-drive',$drive_cmd;
4df98f2f
TL
4079 push @$devices, '-device', print_drivedevice_full(
4080 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
1e3baf05
DM
4081 });
4082
cc4d6182 4083 for (my $i = 0; $i < $MAX_NETS; $i++) {
2141a802
SR
4084 my $netname = "net$i";
4085
4086 next if !$conf->{$netname};
4087 my $d = parse_net($conf->{$netname});
d0a86b24 4088 next if !$d;
4ddd2ca2 4089 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
1e3baf05 4090
d0a86b24 4091 $use_virtio = 1 if $d->{model} eq 'virtio';
1e3baf05 4092
2141a802 4093 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
1e3baf05 4094
2141a802 4095 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
d0a86b24 4096 push @$devices, '-netdev', $netdevfull;
5bdcf937 4097
d0a86b24 4098 my $netdevicefull = print_netdevice_full(
0c03a390 4099 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4df98f2f 4100
d0a86b24 4101 push @$devices, '-device', $netdevicefull;
5bdcf937 4102 }
1e3baf05 4103
6dbcb073 4104 if ($conf->{ivshmem}) {
4df98f2f 4105 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
e3c27a6a 4106
6dbcb073
DC
4107 my $bus;
4108 if ($q35) {
4109 $bus = print_pcie_addr("ivshmem");
4110 } else {
4111 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4112 }
e3c27a6a
TL
4113
4114 my $ivshmem_name = $ivshmem->{name} // $vmid;
4115 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4116
6dbcb073 4117 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4df98f2f
TL
4118 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4119 .",size=$ivshmem->{size}M";
6dbcb073
DC
4120 }
4121
2513b862
DC
4122 # pci.4 is nested in pci.1
4123 $bridges->{1} = 1 if $bridges->{4};
4124
3326ae19
TL
4125 if (!$q35) { # add pci bridges
4126 if (min_version($machine_version, 2, 3)) {
fc79e813
AD
4127 $bridges->{1} = 1;
4128 $bridges->{2} = 1;
4129 }
6731a4cf 4130 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
2513b862
DC
4131 }
4132
4133 for my $k (sort {$b cmp $a} keys %$bridges) {
4134 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
13d68979
SR
4135
4136 my $k_name = $k;
4137 if ($k == 2 && $legacy_igd) {
4138 $k_name = "$k-igd";
4139 }
3326ae19 4140 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
2513b862 4141 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3326ae19
TL
4142
4143 if ($q35) { # add after -readconfig pve-q35.cfg
2513b862
DC
4144 splice @$devices, 2, 0, '-device', $devstr;
4145 } else {
4146 unshift @$devices, '-device', $devstr if $k > 0;
f8e83f05 4147 }
19672434
DM
4148 }
4149
ac0077cc
SR
4150 if (!$kvm) {
4151 push @$machineFlags, 'accel=tcg';
4152 }
4153
e4263214 4154 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
e5a6919c 4155
ac0077cc
SR
4156 my $machine_type_min = $machine_type;
4157 if ($add_pve_version) {
4158 $machine_type_min =~ s/\+pve\d+$//;
4159 $machine_type_min .= "+pve$required_pve_version";
4160 }
4161 push @$machineFlags, "type=${machine_type_min}";
4162
5bdcf937 4163 push @$cmd, @$devices;
2f6f002c
TL
4164 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4165 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4166 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
8c559505 4167
7ceade4c
DC
4168 if (my $vmstate = $conf->{vmstate}) {
4169 my $statepath = PVE::Storage::path($storecfg, $vmstate);
24d1f93a 4170 push @$vollist, $vmstate;
7ceade4c 4171 push @$cmd, '-loadstate', $statepath;
b85666cf 4172 print "activating and using '$vmstate' as vmstate\n";
7ceade4c
DC
4173 }
4174
85fcf79e
FG
4175 if (PVE::QemuConfig->is_template($conf)) {
4176 # needed to workaround base volumes being read-only
4177 push @$cmd, '-snapshot';
4178 }
4179
76350670
DC
4180 # add custom args
4181 if ($conf->{args}) {
4182 my $aa = PVE::Tools::split_args($conf->{args});
4183 push @$cmd, @$aa;
4184 }
4185
9b71c34d 4186 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
1e3baf05 4187}
19672434 4188
05853188
SR
4189sub check_rng_source {
4190 my ($source) = @_;
4191
4192 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4193 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4194 if ! -e $source;
4195
4196 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4197 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4df98f2f
TL
4198 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4199 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4200 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4201 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4202 ." to the host.\n";
05853188
SR
4203 }
4204}
4205
943340a6 4206sub spice_port {
1011b570 4207 my ($vmid) = @_;
943340a6 4208
0a13e08e 4209 my $res = mon_cmd($vmid, 'query-spice');
943340a6
DM
4210
4211 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
1011b570
DM
4212}
4213
86fdcfb2
DA
4214sub vm_devices_list {
4215 my ($vmid) = @_;
4216
0a13e08e 4217 my $res = mon_cmd($vmid, 'query-pci');
f721624b 4218 my $devices_to_check = [];
ceea9078
DM
4219 my $devices = {};
4220 foreach my $pcibus (@$res) {
f721624b
DC
4221 push @$devices_to_check, @{$pcibus->{devices}},
4222 }
4223
4224 while (@$devices_to_check) {
4225 my $to_check = [];
4226 for my $d (@$devices_to_check) {
4227 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
b3a3e929 4228 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
f721624b
DC
4229
4230 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4231 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
f78cc802 4232 }
f721624b 4233 $devices_to_check = $to_check;
f78cc802
AD
4234 }
4235
0a13e08e 4236 my $resblock = mon_cmd($vmid, 'query-block');
f78cc802
AD
4237 foreach my $block (@$resblock) {
4238 if($block->{device} =~ m/^drive-(\S+)/){
4239 $devices->{$1} = 1;
1dc4f496
DM
4240 }
4241 }
86fdcfb2 4242
0a13e08e 4243 my $resmice = mon_cmd($vmid, 'query-mice');
3d7389fe
DM
4244 foreach my $mice (@$resmice) {
4245 if ($mice->{name} eq 'QEMU HID Tablet') {
4246 $devices->{tablet} = 1;
4247 last;
4248 }
4249 }
4250
deb091c5
DC
4251 # for usb devices there is no query-usb
4252 # but we can iterate over the entries in
4253 # qom-list path=/machine/peripheral
0a13e08e 4254 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
deb091c5 4255 foreach my $per (@$resperipheral) {
c60cad61 4256 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
deb091c5
DC
4257 $devices->{$per->{name}} = 1;
4258 }
4259 }
4260
1dc4f496 4261 return $devices;
86fdcfb2
DA
4262}
4263
ec21aa11 4264sub vm_deviceplug {
d559309f 4265 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
ae57f6b3 4266
3392d6ca 4267 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f 4268
95d6343b
DA
4269 my $devices_list = vm_devices_list($vmid);
4270 return 1 if defined($devices_list->{$deviceid});
4271
4df98f2f
TL
4272 # add PCI bridge if we need it for the device
4273 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
fee46675 4274
3d7389fe 4275 if ($deviceid eq 'tablet') {
d559309f 4276 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
d559309f 4277 } elsif ($deviceid eq 'keyboard') {
d559309f 4278 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
c60cad61
DC
4279 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4280 my $id = $1;
4281 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4282 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4eb68604 4283 } elsif ($deviceid =~ m/^usb(\d+)$/) {
c60cad61 4284 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
fee46675 4285 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
22de899a
AD
4286 qemu_iothread_add($vmid, $deviceid, $device);
4287
3326ae19
TL
4288 qemu_driveadd($storecfg, $vmid, $device);
4289 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675 4290
3326ae19 4291 qemu_deviceadd($vmid, $devicefull);
fee46675
DM
4292 eval { qemu_deviceaddverify($vmid, $deviceid); };
4293 if (my $err = $@) {
63c2da2f
DM
4294 eval { qemu_drivedel($vmid, $deviceid); };
4295 warn $@ if $@;
fee46675 4296 die $err;
5e5dcb73 4297 }
2733141c 4298 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3326ae19
TL
4299 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4300 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
a1b7d579 4301 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
2733141c 4302
3326ae19 4303 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
fee46675 4304
fc8b40fd
AD
4305 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4306 qemu_iothread_add($vmid, $deviceid, $device);
4307 $devicefull .= ",iothread=iothread-$deviceid";
4308 }
4309
6e11f143
AD
4310 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4311 $devicefull .= ",num_queues=$device->{queues}";
4312 }
4313
3326ae19
TL
4314 qemu_deviceadd($vmid, $devicefull);
4315 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4316 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
d559309f 4317 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
fee46675 4318 qemu_driveadd($storecfg, $vmid, $device);
a1b7d579 4319
acfc6ef8 4320 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675
DM
4321 eval { qemu_deviceadd($vmid, $devicefull); };
4322 if (my $err = $@) {
63c2da2f
DM
4323 eval { qemu_drivedel($vmid, $deviceid); };
4324 warn $@ if $@;
fee46675 4325 die $err;
a4f091a0 4326 }
fee46675 4327 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
d1c1af4b 4328 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
8718099c 4329
3392d6ca 4330 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
0c03a390 4331 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
95d3be58
DC
4332 my $use_old_bios_files = undef;
4333 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
8718099c 4334
4df98f2f 4335 my $netdevicefull = print_netdevice_full(
0c03a390 4336 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
95d3be58 4337 qemu_deviceadd($vmid, $netdevicefull);
79046fd1
DC
4338 eval {
4339 qemu_deviceaddverify($vmid, $deviceid);
4340 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4341 };
fee46675
DM
4342 if (my $err = $@) {
4343 eval { qemu_netdevdel($vmid, $deviceid); };
4344 warn $@ if $@;
4345 die $err;
95d3be58 4346 }
fee46675 4347 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
40f28a9f 4348 my $bridgeid = $2;
d559309f 4349 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
40f28a9f 4350 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
a1b7d579 4351
40f28a9f 4352 qemu_deviceadd($vmid, $devicefull);
fee46675 4353 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4354 } else {
a1b7d579 4355 die "can't hotplug device '$deviceid'\n";
40f28a9f
AD
4356 }
4357
5e5dcb73 4358 return 1;
a4dea331
DA
4359}
4360
3eec5767 4361# fixme: this should raise exceptions on error!
ec21aa11 4362sub vm_deviceunplug {
f19d1c47 4363 my ($vmid, $conf, $deviceid) = @_;
873c2d69 4364
95d6343b
DA
4365 my $devices_list = vm_devices_list($vmid);
4366 return 1 if !defined($devices_list->{$deviceid});
4367
2141a802
SR
4368 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4369 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
63c2da2f 4370
c60cad61 4371 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
3d7389fe 4372 qemu_devicedel($vmid, $deviceid);
c60cad61
DC
4373 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4374 qemu_devicedel($vmid, $deviceid);
4375 qemu_devicedelverify($vmid, $deviceid);
4eb68604 4376 } elsif ($deviceid =~ m/^usb\d+$/) {
c60cad61
DC
4377 qemu_devicedel($vmid, $deviceid);
4378 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4379 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
9a66c311 4380 my $device = parse_drive($deviceid, $conf->{$deviceid});
f19d1c47 4381
a8d0fec3
TL
4382 qemu_devicedel($vmid, $deviceid);
4383 qemu_devicedelverify($vmid, $deviceid);
4384 qemu_drivedel($vmid, $deviceid);
9a66c311 4385 qemu_iothread_del($vmid, $deviceid, $device);
2733141c 4386 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
63c2da2f 4387 qemu_devicedel($vmid, $deviceid);
8ce30dde 4388 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4389 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
9a66c311 4390 my $device = parse_drive($deviceid, $conf->{$deviceid});
cfc817c7 4391
a8d0fec3 4392 qemu_devicedel($vmid, $deviceid);
52b361af 4393 qemu_devicedelverify($vmid, $deviceid);
a8d0fec3 4394 qemu_drivedel($vmid, $deviceid);
a1b7d579 4395 qemu_deletescsihw($conf, $vmid, $deviceid);
8ce30dde 4396
9a66c311
FG
4397 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4398 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
63c2da2f 4399 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
a8d0fec3
TL
4400 qemu_devicedel($vmid, $deviceid);
4401 qemu_devicedelverify($vmid, $deviceid);
4402 qemu_netdevdel($vmid, $deviceid);
63c2da2f
DM
4403 } else {
4404 die "can't unplug device '$deviceid'\n";
2630d2a9
DA
4405 }
4406
5e5dcb73
DA
4407 return 1;
4408}
4409
c60cad61
DC
4410sub qemu_spice_usbredir_chardev_add {
4411 my ($vmid, $id) = @_;
4412
4413 mon_cmd($vmid, "chardev-add" , (
4414 id => $id,
4415 backend => {
4416 type => 'spicevmc',
4417 data => {
4418 type => "usbredir",
4419 },
4420 },
4421 ));
4422}
4423
5e5dcb73
DA
4424sub qemu_deviceadd {
4425 my ($vmid, $devicefull) = @_;
873c2d69 4426
d695b5b7
AD
4427 $devicefull = "driver=".$devicefull;
4428 my %options = split(/[=,]/, $devicefull);
f19d1c47 4429
0a13e08e 4430 mon_cmd($vmid, "device_add" , %options);
5e5dcb73 4431}
afdb31d5 4432
5e5dcb73 4433sub qemu_devicedel {
fee46675 4434 my ($vmid, $deviceid) = @_;
63c2da2f 4435
0a13e08e 4436 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
5e5dcb73
DA
4437}
4438
22de899a 4439sub qemu_iothread_add {
a8d0fec3 4440 my ($vmid, $deviceid, $device) = @_;
22de899a
AD
4441
4442 if ($device->{iothread}) {
4443 my $iothreads = vm_iothreads_list($vmid);
4444 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4445 }
4446}
4447
4448sub qemu_iothread_del {
a8d0fec3 4449 my ($vmid, $deviceid, $device) = @_;
22de899a 4450
22de899a
AD
4451 if ($device->{iothread}) {
4452 my $iothreads = vm_iothreads_list($vmid);
4453 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4454 }
4455}
4456
4d3f29ed 4457sub qemu_objectadd {
a8d0fec3 4458 my ($vmid, $objectid, $qomtype) = @_;
4d3f29ed 4459
0a13e08e 4460 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4d3f29ed
AD
4461
4462 return 1;
4463}
4464
4465sub qemu_objectdel {
a8d0fec3 4466 my ($vmid, $objectid) = @_;
4d3f29ed 4467
0a13e08e 4468 mon_cmd($vmid, "object-del", id => $objectid);
4d3f29ed
AD
4469
4470 return 1;
4471}
4472
5e5dcb73 4473sub qemu_driveadd {
fee46675 4474 my ($storecfg, $vmid, $device) = @_;
5e5dcb73 4475
6d5673c3
SR
4476 my $kvmver = get_running_qemu_version($vmid);
4477 my $io_uring = min_version($kvmver, 6, 0);
4478 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
7a69fc3c 4479 $drive =~ s/\\/\\\\/g;
0a13e08e 4480 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
fee46675 4481
5e5dcb73 4482 # If the command succeeds qemu prints: "OK"
fee46675
DM
4483 return 1 if $ret =~ m/OK/s;
4484
4485 die "adding drive failed: $ret\n";
5e5dcb73 4486}
afdb31d5 4487
5e5dcb73 4488sub qemu_drivedel {
a8d0fec3 4489 my ($vmid, $deviceid) = @_;
873c2d69 4490
0a13e08e 4491 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
5e5dcb73 4492 $ret =~ s/^\s+//;
a1b7d579 4493
63c2da2f 4494 return 1 if $ret eq "";
a1b7d579 4495
63c2da2f 4496 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
a1b7d579
DM
4497 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4498
63c2da2f 4499 die "deleting drive $deviceid failed : $ret\n";
5e5dcb73 4500}
f19d1c47 4501
5e5dcb73 4502sub qemu_deviceaddverify {
fee46675 4503 my ($vmid, $deviceid) = @_;
873c2d69 4504
5e5dcb73
DA
4505 for (my $i = 0; $i <= 5; $i++) {
4506 my $devices_list = vm_devices_list($vmid);
4507 return 1 if defined($devices_list->{$deviceid});
4508 sleep 1;
afdb31d5 4509 }
fee46675
DM
4510
4511 die "error on hotplug device '$deviceid'\n";
5e5dcb73 4512}
afdb31d5 4513
5e5dcb73
DA
4514
4515sub qemu_devicedelverify {
63c2da2f
DM
4516 my ($vmid, $deviceid) = @_;
4517
a1b7d579 4518 # need to verify that the device is correctly removed as device_del
63c2da2f 4519 # is async and empty return is not reliable
5e5dcb73 4520
5e5dcb73
DA
4521 for (my $i = 0; $i <= 5; $i++) {
4522 my $devices_list = vm_devices_list($vmid);
4523 return 1 if !defined($devices_list->{$deviceid});
4524 sleep 1;
afdb31d5 4525 }
63c2da2f
DM
4526
4527 die "error on hot-unplugging device '$deviceid'\n";
873c2d69
DA
4528}
4529
cdd20088 4530sub qemu_findorcreatescsihw {
d559309f 4531 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
cfc817c7 4532
ee034f5c 4533 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
2733141c
AD
4534
4535 my $scsihwid="$controller_prefix$controller";
cfc817c7
DA
4536 my $devices_list = vm_devices_list($vmid);
4537
a8d0fec3 4538 if (!defined($devices_list->{$scsihwid})) {
d559309f 4539 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
cfc817c7 4540 }
fee46675 4541
cfc817c7
DA
4542 return 1;
4543}
4544
8ce30dde
AD
4545sub qemu_deletescsihw {
4546 my ($conf, $vmid, $opt) = @_;
4547
4548 my $device = parse_drive($opt, $conf->{$opt});
4549
a1511b3c 4550 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
2733141c
AD
4551 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4552 return 1;
4553 }
4554
ee034f5c 4555 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
8ce30dde
AD
4556
4557 my $devices_list = vm_devices_list($vmid);
4558 foreach my $opt (keys %{$devices_list}) {
e0fd2b2f
FE
4559 if (is_valid_drivename($opt)) {
4560 my $drive = parse_drive($opt, $conf->{$opt});
a8d0fec3 4561 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
8ce30dde
AD
4562 return 1;
4563 }
4564 }
4565 }
4566
4567 my $scsihwid="scsihw$controller";
4568
4569 vm_deviceunplug($vmid, $conf, $scsihwid);
4570
4571 return 1;
4572}
4573
281fedb3 4574sub qemu_add_pci_bridge {
d559309f 4575 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
40f28a9f
AD
4576
4577 my $bridges = {};
281fedb3
DM
4578
4579 my $bridgeid;
4580
d559309f 4581 print_pci_addr($device, $bridges, $arch, $machine_type);
40f28a9f
AD
4582
4583 while (my ($k, $v) = each %$bridges) {
4584 $bridgeid = $k;
4585 }
fee46675 4586 return 1 if !defined($bridgeid) || $bridgeid < 1;
281fedb3 4587
40f28a9f
AD
4588 my $bridge = "pci.$bridgeid";
4589 my $devices_list = vm_devices_list($vmid);
4590
281fedb3 4591 if (!defined($devices_list->{$bridge})) {
d559309f 4592 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
40f28a9f 4593 }
281fedb3 4594
40f28a9f
AD
4595 return 1;
4596}
4597
25088687
DM
4598sub qemu_set_link_status {
4599 my ($vmid, $device, $up) = @_;
4600
0a13e08e 4601 mon_cmd($vmid, "set_link", name => $device,
25088687
DM
4602 up => $up ? JSON::true : JSON::false);
4603}
4604
2630d2a9 4605sub qemu_netdevadd {
d559309f 4606 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
2630d2a9 4607
d559309f 4608 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
73aa03b8 4609 my %options = split(/[=,]/, $netdev);
2630d2a9 4610
bf5aef9b
DC
4611 if (defined(my $vhost = $options{vhost})) {
4612 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4613 }
4614
4615 if (defined(my $queues = $options{queues})) {
4616 $options{queues} = $queues + 0;
4617 }
4618
0a13e08e 4619 mon_cmd($vmid, "netdev_add", %options);
73aa03b8 4620 return 1;
2630d2a9
DA
4621}
4622
4623sub qemu_netdevdel {
4624 my ($vmid, $deviceid) = @_;
4625
0a13e08e 4626 mon_cmd($vmid, "netdev_del", id => $deviceid);
2630d2a9
DA
4627}
4628
16521d63 4629sub qemu_usb_hotplug {
d559309f 4630 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
16521d63
DC
4631
4632 return if !$device;
4633
4634 # remove the old one first
4635 vm_deviceunplug($vmid, $conf, $deviceid);
4636
4637 # check if xhci controller is necessary and available
c60cad61 4638 my $devicelist = vm_devices_list($vmid);
16521d63 4639
c60cad61
DC
4640 if (!$devicelist->{xhci}) {
4641 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4642 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
16521d63 4643 }
c60cad61 4644
16521d63 4645 # add the new one
0cf8d56c 4646 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
16521d63
DC
4647}
4648
838776ab 4649sub qemu_cpu_hotplug {
8edc9c08 4650 my ($vmid, $conf, $vcpus) = @_;
838776ab 4651
3392d6ca 4652 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
1e881b75 4653
8edc9c08
AD
4654 my $sockets = 1;
4655 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4656 $sockets = $conf->{sockets} if $conf->{sockets};
4657 my $cores = $conf->{cores} || 1;
4658 my $maxcpus = $sockets * $cores;
838776ab 4659
8edc9c08 4660 $vcpus = $maxcpus if !$vcpus;
3a11fadb 4661
8edc9c08
AD
4662 die "you can't add more vcpus than maxcpus\n"
4663 if $vcpus > $maxcpus;
3a11fadb 4664
8edc9c08 4665 my $currentvcpus = $conf->{vcpus} || $maxcpus;
1e881b75 4666
eba3e64d 4667 if ($vcpus < $currentvcpus) {
1e881b75 4668
2ea5fb7e 4669 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
1e881b75
AD
4670
4671 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4672 qemu_devicedel($vmid, "cpu$i");
4673 my $retry = 0;
4674 my $currentrunningvcpus = undef;
4675 while (1) {
65af8c31 4676 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
1e881b75 4677 last if scalar(@{$currentrunningvcpus}) == $i-1;
961af8a3 4678 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
1e881b75
AD
4679 $retry++;
4680 sleep 1;
4681 }
4682 #update conf after each succesfull cpu unplug
4683 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4684 PVE::QemuConfig->write_config($vmid, $conf);
4685 }
4686 } else {
961af8a3 4687 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
1e881b75
AD
4688 }
4689
4690 return;
4691 }
838776ab 4692
65af8c31 4693 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
961af8a3 4694 die "vcpus in running vm does not match its configuration\n"
8edc9c08 4695 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
838776ab 4696
2ea5fb7e 4697 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
eba3e64d
AD
4698
4699 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4700 my $cpustr = print_cpu_device($conf, $i);
4701 qemu_deviceadd($vmid, $cpustr);
4702
4703 my $retry = 0;
4704 my $currentrunningvcpus = undef;
4705 while (1) {
65af8c31 4706 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
eba3e64d 4707 last if scalar(@{$currentrunningvcpus}) == $i;
961af8a3 4708 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
eba3e64d
AD
4709 sleep 1;
4710 $retry++;
4711 }
4712 #update conf after each succesfull cpu hotplug
4713 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4714 PVE::QemuConfig->write_config($vmid, $conf);
4715 }
4716 } else {
4717
4718 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
0a13e08e 4719 mon_cmd($vmid, "cpu-add", id => int($i));
eba3e64d 4720 }
838776ab
AD
4721 }
4722}
4723
affd2f88 4724sub qemu_block_set_io_throttle {
277ca170
WB
4725 my ($vmid, $deviceid,
4726 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
9196a8ec
WB
4727 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4728 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4729 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
affd2f88 4730
f3f323a3
AD
4731 return if !check_running($vmid) ;
4732
0a13e08e 4733 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
277ca170
WB
4734 bps => int($bps),
4735 bps_rd => int($bps_rd),
4736 bps_wr => int($bps_wr),
4737 iops => int($iops),
4738 iops_rd => int($iops_rd),
4739 iops_wr => int($iops_wr),
4740 bps_max => int($bps_max),
4741 bps_rd_max => int($bps_rd_max),
4742 bps_wr_max => int($bps_wr_max),
4743 iops_max => int($iops_max),
4744 iops_rd_max => int($iops_rd_max),
9196a8ec
WB
4745 iops_wr_max => int($iops_wr_max),
4746 bps_max_length => int($bps_max_length),
4747 bps_rd_max_length => int($bps_rd_max_length),
4748 bps_wr_max_length => int($bps_wr_max_length),
4749 iops_max_length => int($iops_max_length),
4750 iops_rd_max_length => int($iops_rd_max_length),
4751 iops_wr_max_length => int($iops_wr_max_length),
277ca170 4752 );
f3f323a3 4753
affd2f88
AD
4754}
4755
c1175c92
AD
4756sub qemu_block_resize {
4757 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4758
ed221350 4759 my $running = check_running($vmid);
c1175c92 4760
2e4357c5 4761 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
c1175c92
AD
4762
4763 return if !$running;
4764
375db731
FE
4765 my $padding = (1024 - $size % 1024) % 1024;
4766 $size = $size + $padding;
4767
190c8461
SR
4768 mon_cmd(
4769 $vmid,
4770 "block_resize",
4771 device => $deviceid,
4772 size => int($size),
4773 timeout => 60,
4774 );
c1175c92
AD
4775}
4776
1ab0057c
AD
4777sub qemu_volume_snapshot {
4778 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4779
ed221350 4780 my $running = check_running($vmid);
1ab0057c 4781
9d83932d 4782 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4783 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
e5eaa028
WL
4784 } else {
4785 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4786 }
1ab0057c
AD
4787}
4788
fc46aff9
AD
4789sub qemu_volume_snapshot_delete {
4790 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4791
ed221350 4792 my $running = check_running($vmid);
fc46aff9 4793
a2f1554b
AD
4794 if($running) {
4795
4796 $running = undef;
4797 my $conf = PVE::QemuConfig->load_config($vmid);
912792e2 4798 PVE::QemuConfig->foreach_volume($conf, sub {
a2f1554b
AD
4799 my ($ds, $drive) = @_;
4800 $running = 1 if $drive->{file} eq $volid;
4801 });
4802 }
4803
9d83932d 4804 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4805 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
1ef7592f
AD
4806 } else {
4807 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4808 }
fc46aff9
AD
4809}
4810
264e519f 4811sub set_migration_caps {
27a5be53 4812 my ($vmid, $savevm) = @_;
a89fded1 4813
acc10e51
SR
4814 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4815
27a5be53
SR
4816 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4817 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4818
8b8345f3 4819 my $cap_ref = [];
a89fded1
AD
4820
4821 my $enabled_cap = {
8b8345f3 4822 "auto-converge" => 1,
0b0a47e8 4823 "xbzrle" => 1,
8b8345f3
DM
4824 "x-rdma-pin-all" => 0,
4825 "zero-blocks" => 0,
acc10e51 4826 "compress" => 0,
27a5be53 4827 "dirty-bitmaps" => $dirty_bitmaps,
a89fded1
AD
4828 };
4829
0a13e08e 4830 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
a89fded1 4831
8b8345f3 4832 for my $supported_capability (@$supported_capabilities) {
b463a3ce
SP
4833 push @$cap_ref, {
4834 capability => $supported_capability->{capability},
22430fa2
DM
4835 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4836 };
a89fded1
AD
4837 }
4838
0a13e08e 4839 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
8b8345f3 4840}
a89fded1 4841
912792e2 4842sub foreach_volid {
6328c554 4843 my ($conf, $include_pending, $func, @param) = @_;
912792e2
FE
4844
4845 my $volhash = {};
4846
4847 my $test_volid = sub {
6328c554 4848 my ($key, $drive, $snapname, $pending) = @_;
912792e2 4849
ae180b8f 4850 my $volid = $drive->{file};
912792e2
FE
4851 return if !$volid;
4852
4853 $volhash->{$volid}->{cdrom} //= 1;
ae180b8f 4854 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
912792e2 4855
ae180b8f 4856 my $replicate = $drive->{replicate} // 1;
912792e2
FE
4857 $volhash->{$volid}->{replicate} //= 0;
4858 $volhash->{$volid}->{replicate} = 1 if $replicate;
4859
4860 $volhash->{$volid}->{shared} //= 0;
ae180b8f 4861 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
912792e2
FE
4862
4863 $volhash->{$volid}->{referenced_in_config} //= 0;
6328c554 4864 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname) && !defined($pending);
912792e2
FE
4865
4866 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4867 if defined($snapname);
ae180b8f 4868
6328c554
AL
4869 $volhash->{$volid}->{referenced_in_pending} = 1 if defined($pending);
4870
ae180b8f
FE
4871 my $size = $drive->{size};
4872 $volhash->{$volid}->{size} //= $size if $size;
4873
4874 $volhash->{$volid}->{is_vmstate} //= 0;
4875 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4876
f9dde219
SR
4877 $volhash->{$volid}->{is_tpmstate} //= 0;
4878 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4879
ae180b8f
FE
4880 $volhash->{$volid}->{is_unused} //= 0;
4881 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
a6be63ac
FE
4882
4883 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
912792e2
FE
4884 };
4885
ae180b8f
FE
4886 my $include_opts = {
4887 extra_keys => ['vmstate'],
4888 include_unused => 1,
4889 };
4890
0b953b8e 4891 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
6328c554
AL
4892
4893 if ($include_pending && defined($conf->{pending}) && $conf->{pending}->%*) {
4894 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1);
4895 }
4896
912792e2
FE
4897 foreach my $snapname (keys %{$conf->{snapshots}}) {
4898 my $snap = $conf->{snapshots}->{$snapname};
0b953b8e 4899 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
912792e2
FE
4900 }
4901
4902 foreach my $volid (keys %$volhash) {
4903 &$func($volid, $volhash->{$volid}, @param);
4904 }
4905}
4906
81d95ae1 4907my $fast_plug_option = {
f68910a0
FE
4908 'description' => 1,
4909 'hookscript' => 1,
7498eb64 4910 'lock' => 1,
d62bdac5
FE
4911 'migrate_downtime' => 1,
4912 'migrate_speed' => 1,
81d95ae1 4913 'name' => 1,
a1b7d579 4914 'onboot' => 1,
f68910a0 4915 'protection' => 1,
81d95ae1
DM
4916 'shares' => 1,
4917 'startup' => 1,
b8e7068a 4918 'tags' => 1,
f68910a0 4919 'vmstatestorage' => 1,
81d95ae1
DM
4920};
4921
71d9006b
AD
4922for my $opt (keys %$confdesc_cloudinit) {
4923 $fast_plug_option->{$opt} = 1;
4924};
4925
3a11fadb
DM
4926# hotplug changes in [PENDING]
4927# $selection hash can be used to only apply specified options, for
4928# example: { cores => 1 } (only apply changed 'cores')
4929# $errors ref is used to return error messages
c427973b 4930sub vmconfig_hotplug_pending {
3a11fadb 4931 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
c427973b 4932
8e90138a 4933 my $defaults = load_defaults();
045749f2
TL
4934 my $arch = get_vm_arch($conf);
4935 my $machine_type = get_vm_machine($conf, undef, $arch);
c427973b
DM
4936
4937 # commit values which do not have any impact on running VM first
3a11fadb
DM
4938 # Note: those option cannot raise errors, we we do not care about
4939 # $selection and always apply them.
4940
4941 my $add_error = sub {
4942 my ($opt, $msg) = @_;
4943 $errors->{$opt} = "hotplug problem - $msg";
4944 };
c427973b 4945
f16cf6c3
WB
4946 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4947
4948 my $cloudinit_record_changed = sub {
4949 my ($conf, $opt, $old, $new) = @_;
4950 return if !$cloudinit_pending_properties->{$opt};
4951
4952 my $ci = ($conf->{cloudinit} //= {});
4953
4954 my $recorded = $ci->{$opt};
a5409851
WB
4955 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4956
4957 if (defined($new)) {
4958 if (defined($old)) {
4959 # an existing value is being modified
4960 if (defined($recorded)) {
4961 # the value was already not in sync
4962 if ($new eq $recorded) {
4963 # a value is being reverted to the cloud-init state:
4964 delete $ci->{$opt};
4965 delete $added{$opt};
4966 } else {
4967 # the value was changed multiple times, do nothing
4968 }
4969 } elsif ($added{$opt}) {
4970 # the value had been marked as added and is being changed, do nothing
4971 } else {
4972 # the value is new, record it:
4973 $ci->{$opt} = $old;
4974 }
f16cf6c3 4975 } else {
a5409851
WB
4976 # a new value is being added
4977 if (defined($recorded)) {
4978 # it was already not in sync
4979 if ($new eq $recorded) {
4980 # a value is being reverted to the cloud-init state:
4981 delete $ci->{$opt};
4982 delete $added{$opt};
4983 } else {
4984 # the value had temporarily been removed, do nothing
4985 }
4986 } elsif ($added{$opt}) {
4987 # the value had been marked as added already, do nothing
4988 } else {
4989 # the value is new, add it
4990 $added{$opt} = 1;
4991 }
f16cf6c3 4992 }
a5409851
WB
4993 } elsif (!defined($old)) {
4994 # a non-existent value is being removed? ignore...
f16cf6c3 4995 } else {
a5409851
WB
4996 # a value is being deleted
4997 if (defined($recorded)) {
4998 # a value was already recorded, just keep it
4999 } elsif ($added{$opt}) {
5000 # the value was marked as added, remove it
5001 delete $added{$opt};
f16cf6c3 5002 } else {
a5409851
WB
5003 # a previously unrecorded value is being removed, record the old value:
5004 $ci->{$opt} = $old;
f16cf6c3
WB
5005 }
5006 }
a5409851
WB
5007
5008 my $added = join(',', sort keys %added);
5009 $ci->{added} = $added if length($added);
f16cf6c3
WB
5010 };
5011
c427973b
DM
5012 my $changes = 0;
5013 foreach my $opt (keys %{$conf->{pending}}) { # add/change
81d95ae1 5014 if ($fast_plug_option->{$opt}) {
f16cf6c3
WB
5015 my $new = delete $conf->{pending}->{$opt};
5016 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5017 $conf->{$opt} = $new;
c427973b
DM
5018 $changes = 1;
5019 }
5020 }
5021
5022 if ($changes) {
ffda963f 5023 PVE::QemuConfig->write_config($vmid, $conf);
c427973b
DM
5024 }
5025
c60cad61
DC
5026 my $ostype = $conf->{ostype};
5027 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
b3c2bdd1 5028 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
c60cad61
DC
5029 my $usb_hotplug = $hotplug_features->{usb}
5030 && min_version($version, 7, 1)
5031 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
c427973b 5032
5b65b00d 5033 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
98bc3aeb 5034 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
f16cf6c3 5035
d321c4a9 5036 foreach my $opt (sort keys %$pending_delete_hash) {
3a11fadb 5037 next if $selection && !$selection->{$opt};
d321c4a9 5038 my $force = $pending_delete_hash->{$opt}->{force};
3a11fadb 5039 eval {
51a6f637
AD
5040 if ($opt eq 'hotplug') {
5041 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5042 } elsif ($opt eq 'tablet') {
b3c2bdd1 5043 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5044 if ($defaults->{tablet}) {
d559309f
WB
5045 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5046 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5047 if $arch eq 'aarch64';
3a11fadb 5048 } else {
d559309f
WB
5049 vm_deviceunplug($vmid, $conf, 'tablet');
5050 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5051 }
c60cad61
DC
5052 } elsif ($opt =~ m/^usb(\d+)$/) {
5053 my $index = $1;
5054 die "skip\n" if !$usb_hotplug;
5055 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5056 vm_deviceunplug($vmid, $conf, $opt);
8edc9c08 5057 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5058 die "skip\n" if !$hotplug_features->{cpu};
8edc9c08 5059 qemu_cpu_hotplug($vmid, $conf, undef);
9c2f7069 5060 } elsif ($opt eq 'balloon') {
81d95ae1 5061 # enable balloon device is not hotpluggable
75b51053
DC
5062 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5063 # here we reset the ballooning value to memory
5064 my $balloon = $conf->{memory} || $defaults->{memory};
0a13e08e 5065 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
81d95ae1
DM
5066 } elsif ($fast_plug_option->{$opt}) {
5067 # do nothing
3eec5767 5068 } elsif ($opt =~ m/^net(\d+)$/) {
b3c2bdd1 5069 die "skip\n" if !$hotplug_features->{network};
3eec5767 5070 vm_deviceunplug($vmid, $conf, $opt);
74479ee9 5071 } elsif (is_valid_drivename($opt)) {
b3c2bdd1 5072 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
19120f99 5073 vm_deviceunplug($vmid, $conf, $opt);
3dc38fbb 5074 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4d3f29ed
AD
5075 } elsif ($opt =~ m/^memory$/) {
5076 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5077 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
c8effec3 5078 } elsif ($opt eq 'cpuunits') {
0d318453 5079 $cgroup->change_cpu_shares(undef);
58be00f1 5080 } elsif ($opt eq 'cpulimit') {
25de70ae 5081 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
3d7389fe 5082 } else {
e56beeda 5083 die "skip\n";
3d7389fe 5084 }
3a11fadb
DM
5085 };
5086 if (my $err = $@) {
e56beeda
DM
5087 &$add_error($opt, $err) if $err ne "skip\n";
5088 } else {
f16cf6c3
WB
5089 my $old = delete $conf->{$opt};
5090 $cloudinit_record_changed->($conf, $opt, $old, undef);
98bc3aeb 5091 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
3d7389fe 5092 }
3d7389fe
DM
5093 }
5094
4b785da1 5095 my $cloudinit_opt;
3d7389fe 5096 foreach my $opt (keys %{$conf->{pending}}) {
3a11fadb 5097 next if $selection && !$selection->{$opt};
3d7389fe 5098 my $value = $conf->{pending}->{$opt};
3a11fadb 5099 eval {
51a6f637
AD
5100 if ($opt eq 'hotplug') {
5101 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5102 } elsif ($opt eq 'tablet') {
b3c2bdd1 5103 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5104 if ($value == 1) {
d559309f
WB
5105 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5106 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5107 if $arch eq 'aarch64';
3a11fadb 5108 } elsif ($value == 0) {
d559309f
WB
5109 vm_deviceunplug($vmid, $conf, 'tablet');
5110 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5111 }
c60cad61
DC
5112 } elsif ($opt =~ m/^usb(\d+)$/) {
5113 my $index = $1;
5114 die "skip\n" if !$usb_hotplug;
0cf8d56c 5115 my $d = eval { parse_property_string('pve-qm-usb', $value) };
c60cad61 5116 my $id = $opt;
0cf8d56c 5117 if ($d->{host} =~ m/^spice$/i) {
c60cad61
DC
5118 $id = "usbredirdev$index";
5119 }
5120 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
8edc9c08 5121 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5122 die "skip\n" if !$hotplug_features->{cpu};
3a11fadb
DM
5123 qemu_cpu_hotplug($vmid, $conf, $value);
5124 } elsif ($opt eq 'balloon') {
81d95ae1 5125 # enable/disable balloning device is not hotpluggable
8fe689e7 5126 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
a1b7d579 5127 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
81d95ae1
DM
5128 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5129
3a11fadb 5130 # allow manual ballooning if shares is set to zero
4cc1efa6 5131 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
9c2f7069 5132 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
0a13e08e 5133 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
9c2f7069 5134 }
a1b7d579 5135 } elsif ($opt =~ m/^net(\d+)$/) {
3eec5767 5136 # some changes can be done without hotplug
a1b7d579 5137 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
d559309f 5138 $vmid, $opt, $value, $arch, $machine_type);
74479ee9 5139 } elsif (is_valid_drivename($opt)) {
f9dde219 5140 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
a05cff86 5141 # some changes can be done without hotplug
9ed7a77c
WB
5142 my $drive = parse_drive($opt, $value);
5143 if (drive_is_cloudinit($drive)) {
4b785da1
WB
5144 $cloudinit_opt = [$opt, $drive];
5145 # apply all the other changes first, then generate the cloudinit disk
5146 die "skip\n";
9ed7a77c 5147 }
b3c2bdd1 5148 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
9e7bce2c 5149 $vmid, $opt, $value, $arch, $machine_type);
4d3f29ed
AD
5150 } elsif ($opt =~ m/^memory$/) { #dimms
5151 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5152 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
c8effec3 5153 } elsif ($opt eq 'cpuunits') {
6b7ef5e5 5154 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
0d318453 5155 $cgroup->change_cpu_shares($new_cpuunits);
58be00f1 5156 } elsif ($opt eq 'cpulimit') {
c6f773b8 5157 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5b65b00d 5158 $cgroup->change_cpu_quota($cpulimit, 100000);
74ea2c65
AD
5159 } elsif ($opt eq 'agent') {
5160 vmconfig_update_agent($conf, $opt, $value);
3a11fadb 5161 } else {
e56beeda 5162 die "skip\n"; # skip non-hot-pluggable options
3d7389fe 5163 }
3a11fadb 5164 };
4b785da1
WB
5165 if (my $err = $@) {
5166 &$add_error($opt, $err) if $err ne "skip\n";
5167 } else {
5168 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5169 $conf->{$opt} = $value;
5170 delete $conf->{pending}->{$opt};
5171 }
5172 }
5173
5174 if (defined($cloudinit_opt)) {
5175 my ($opt, $drive) = @$cloudinit_opt;
5176 my $value = $conf->{pending}->{$opt};
5177 eval {
9660e606
WB
5178 my $temp = {%$conf, $opt => $value};
5179 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
4b785da1
WB
5180 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5181 $vmid, $opt, $value, $arch, $machine_type);
5182 };
3a11fadb 5183 if (my $err = $@) {
e56beeda
DM
5184 &$add_error($opt, $err) if $err ne "skip\n";
5185 } else {
3a11fadb
DM
5186 $conf->{$opt} = $value;
5187 delete $conf->{pending}->{$opt};
3d7389fe 5188 }
3d7389fe 5189 }
c60cad61
DC
5190
5191 # unplug xhci controller if no usb device is left
5192 if ($usb_hotplug) {
5193 my $has_usb = 0;
0cf8d56c 5194 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
c60cad61
DC
5195 next if !defined($conf->{"usb$i"});
5196 $has_usb = 1;
5197 last;
5198 }
5199 if (!$has_usb) {
5200 vm_deviceunplug($vmid, $conf, 'xhci');
5201 }
5202 }
5203
4df15a03 5204 PVE::QemuConfig->write_config($vmid, $conf);
94ec5e7c 5205
4b785da1
WB
5206 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5207 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
94ec5e7c 5208 }
c427973b 5209}
055d554d 5210
3dc38fbb
WB
5211sub try_deallocate_drive {
5212 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5213
5214 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5215 my $volid = $drive->{file};
5216 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5217 my $sid = PVE::Storage::parse_volume_id($volid);
5218 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
cee01bcb
WB
5219
5220 # check if the disk is really unused
cee01bcb 5221 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
e0fd2b2f 5222 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
cee01bcb 5223 PVE::Storage::vdisk_free($storecfg, $volid);
3dc38fbb 5224 return 1;
40b977f3
WL
5225 } else {
5226 # If vm is not owner of this disk remove from config
5227 return 1;
3dc38fbb
WB
5228 }
5229 }
5230
d1c1af4b 5231 return;
3dc38fbb
WB
5232}
5233
5234sub vmconfig_delete_or_detach_drive {
5235 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5236
5237 my $drive = parse_drive($opt, $conf->{$opt});
5238
5239 my $rpcenv = PVE::RPCEnvironment::get();
5240 my $authuser = $rpcenv->get_user();
5241
5242 if ($force) {
5243 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5244 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5245 } else {
5246 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5247 }
5248}
5249
98bc3aeb
OB
5250
5251
055d554d 5252sub vmconfig_apply_pending {
e97bbbb6 5253 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
eb5e482d 5254
a644de29
OB
5255 return if !scalar(keys %{$conf->{pending}});
5256
eb5e482d
OB
5257 my $add_apply_error = sub {
5258 my ($opt, $msg) = @_;
5259 my $err_msg = "unable to apply pending change $opt : $msg";
5260 $errors->{$opt} = $err_msg;
5261 warn $err_msg;
5262 };
c427973b
DM
5263
5264 # cold plug
055d554d 5265
98bc3aeb 5266 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
d321c4a9 5267 foreach my $opt (sort keys %$pending_delete_hash) {
fb4d1ba2 5268 my $force = $pending_delete_hash->{$opt}->{force};
eb5e482d 5269 eval {
3d48b95a
OB
5270 if ($opt =~ m/^unused/) {
5271 die "internal error";
5272 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5273 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
eb5e482d
OB
5274 }
5275 };
5276 if (my $err = $@) {
5277 $add_apply_error->($opt, $err);
055d554d 5278 } else {
98bc3aeb 5279 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
055d554d 5280 delete $conf->{$opt};
055d554d
DM
5281 }
5282 }
5283
3d48b95a 5284 PVE::QemuConfig->cleanup_pending($conf);
055d554d 5285
7a24c98a 5286 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
c930f99e 5287
055d554d 5288 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3d48b95a 5289 next if $opt eq 'delete'; # just to be sure
eb5e482d 5290 eval {
3d48b95a 5291 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5292 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
eb5e482d
OB
5293 }
5294 };
5295 if (my $err = $@) {
5296 $add_apply_error->($opt, $err);
055d554d 5297 } else {
c930f99e
AD
5298
5299 if (is_valid_drivename($opt)) {
5300 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
7a24c98a 5301 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
c930f99e
AD
5302 }
5303
eb5e482d 5304 $conf->{$opt} = delete $conf->{pending}->{$opt};
055d554d 5305 }
055d554d 5306 }
3d48b95a
OB
5307
5308 # write all changes at once to avoid unnecessary i/o
5309 PVE::QemuConfig->write_config($vmid, $conf);
7a24c98a 5310 if ($generate_cloudinit) {
4b785da1
WB
5311 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5312 # After successful generation and if there were changes to be applied, update the
5313 # config to drop the {cloudinit} entry.
5314 PVE::QemuConfig->write_config($vmid, $conf);
5315 }
5316 }
055d554d
DM
5317}
5318
3eec5767 5319sub vmconfig_update_net {
d559309f 5320 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
3eec5767
DM
5321
5322 my $newnet = parse_net($value);
5323
5324 if ($conf->{$opt}) {
5325 my $oldnet = parse_net($conf->{$opt});
5326
0f1af9e7
OB
5327 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5328 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5329 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
62fdcfd4 5330 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
3eec5767
DM
5331 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5332
5333 # for non online change, we try to hot-unplug
7196b757 5334 die "skip\n" if !$hotplug;
3eec5767
DM
5335 vm_deviceunplug($vmid, $conf, $opt);
5336 } else {
5337
5338 die "internal error" if $opt !~ m/net(\d+)/;
5339 my $iface = "tap${vmid}i$1";
a1b7d579 5340
0f1af9e7
OB
5341 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5342 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5343 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5344 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
3eec5767 5345 PVE::Network::tap_unplug($iface);
28e129cc
AD
5346
5347 if ($have_sdn) {
5348 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5349 } else {
5350 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5351 }
0f1af9e7 5352 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4f4fbeb0
WB
5353 # Rate can be applied on its own but any change above needs to
5354 # include the rate in tap_plug since OVS resets everything.
5355 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
3eec5767 5356 }
38c590d9 5357
0f1af9e7 5358 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
25088687
DM
5359 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5360 }
5361
38c590d9 5362 return 1;
3eec5767
DM
5363 }
5364 }
a1b7d579 5365
7196b757 5366 if ($hotplug) {
d559309f 5367 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
38c590d9
DM
5368 } else {
5369 die "skip\n";
5370 }
3eec5767
DM
5371}
5372
74ea2c65
AD
5373sub vmconfig_update_agent {
5374 my ($conf, $opt, $value) = @_;
5375
5376 die "skip\n" if !$conf->{$opt};
5377
5378 my $hotplug_options = { fstrim_cloned_disks => 1 };
5379
5380 my $old_agent = parse_guest_agent($conf);
5381 my $agent = parse_guest_agent({$opt => $value});
5382
33f8b887 5383 for my $option (keys %$agent) { # added/changed options
74ea2c65
AD
5384 next if defined($hotplug_options->{$option});
5385 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5386 }
5387
33f8b887 5388 for my $option (keys %$old_agent) { # removed options
74ea2c65
AD
5389 next if defined($hotplug_options->{$option});
5390 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5391 }
33f8b887
TL
5392
5393 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
74ea2c65
AD
5394}
5395
a05cff86 5396sub vmconfig_update_disk {
9e7bce2c 5397 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
a05cff86
DM
5398
5399 my $drive = parse_drive($opt, $value);
5400
4df98f2f
TL
5401 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5402 my $media = $drive->{media} || 'disk';
5403 my $oldmedia = $old_drive->{media} || 'disk';
5404 die "unable to change media type\n" if $media ne $oldmedia;
a05cff86 5405
4df98f2f 5406 if (!drive_is_cdrom($old_drive)) {
a05cff86 5407
4df98f2f 5408 if ($drive->{file} ne $old_drive->{file}) {
a05cff86 5409
4df98f2f 5410 die "skip\n" if !$hotplug;
a05cff86 5411
4df98f2f
TL
5412 # unplug and register as unused
5413 vm_deviceunplug($vmid, $conf, $opt);
5414 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
a1b7d579 5415
4df98f2f
TL
5416 } else {
5417 # update existing disk
5418
5419 # skip non hotpluggable value
ea7c3b39
FE
5420 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5421 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
4df98f2f
TL
5422 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5423 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5424 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
e0e036e1
LN
5425 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5426 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
4df98f2f
TL
5427 die "skip\n";
5428 }
a05cff86 5429
4df98f2f
TL
5430 # apply throttle
5431 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5432 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5433 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5434 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5435 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5436 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5437 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5438 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5439 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5440 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5441 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5442 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5443 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5444 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5445 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5446 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5447 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5448 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5449
5450 qemu_block_set_io_throttle(
5451 $vmid,"drive-$opt",
5452 ($drive->{mbps} || 0)*1024*1024,
5453 ($drive->{mbps_rd} || 0)*1024*1024,
5454 ($drive->{mbps_wr} || 0)*1024*1024,
5455 $drive->{iops} || 0,
5456 $drive->{iops_rd} || 0,
5457 $drive->{iops_wr} || 0,
5458 ($drive->{mbps_max} || 0)*1024*1024,
5459 ($drive->{mbps_rd_max} || 0)*1024*1024,
5460 ($drive->{mbps_wr_max} || 0)*1024*1024,
5461 $drive->{iops_max} || 0,
5462 $drive->{iops_rd_max} || 0,
5463 $drive->{iops_wr_max} || 0,
5464 $drive->{bps_max_length} || 1,
5465 $drive->{bps_rd_max_length} || 1,
5466 $drive->{bps_wr_max_length} || 1,
5467 $drive->{iops_max_length} || 1,
5468 $drive->{iops_rd_max_length} || 1,
5469 $drive->{iops_wr_max_length} || 1,
5470 );
a05cff86 5471
4df98f2f 5472 }
a1b7d579 5473
4df98f2f
TL
5474 return 1;
5475 }
4de1bb25 5476
4df98f2f 5477 } else { # cdrom
a1b7d579 5478
4df98f2f
TL
5479 if ($drive->{file} eq 'none') {
5480 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5481 if (drive_is_cloudinit($old_drive)) {
5482 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5483 }
5484 } else {
5485 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
ce9fce79 5486
4df98f2f
TL
5487 # force eject if locked
5488 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
ce9fce79 5489
4df98f2f
TL
5490 if ($path) {
5491 mon_cmd($vmid, "blockdev-change-medium",
5492 id => "$opt", filename => "$path");
4de1bb25 5493 }
a05cff86 5494 }
4df98f2f
TL
5495
5496 return 1;
a05cff86
DM
5497 }
5498 }
5499
a1b7d579 5500 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
4de1bb25 5501 # hotplug new disks
f7b4356f 5502 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
d559309f 5503 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
a05cff86
DM
5504}
5505
9687287b
AD
5506sub vmconfig_update_cloudinit_drive {
5507 my ($storecfg, $conf, $vmid) = @_;
5508
5509 my $cloudinit_ds = undef;
5510 my $cloudinit_drive = undef;
5511
5512 PVE::QemuConfig->foreach_volume($conf, sub {
5513 my ($ds, $drive) = @_;
5514 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5515 $cloudinit_ds = $ds;
5516 $cloudinit_drive = $drive;
5517 }
5518 });
5519
5520 return if !$cloudinit_drive;
5521
4b785da1
WB
5522 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5523 PVE::QemuConfig->write_config($vmid, $conf);
5524 }
5525
9687287b
AD
5526 my $running = PVE::QemuServer::check_running($vmid);
5527
5528 if ($running) {
5529 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5530 if ($path) {
5531 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5532 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5533 }
5534 }
5535}
5536
13cfe3b7 5537# called in locked context by incoming migration
ba5396b5
FG
5538sub vm_migrate_get_nbd_disks {
5539 my ($storecfg, $conf, $replicated_volumes) = @_;
13cfe3b7
FG
5540
5541 my $local_volumes = {};
912792e2 5542 PVE::QemuConfig->foreach_volume($conf, sub {
13cfe3b7
FG
5543 my ($ds, $drive) = @_;
5544
5545 return if drive_is_cdrom($drive);
41c8671e 5546 return if $ds eq 'tpmstate0';
13cfe3b7
FG
5547
5548 my $volid = $drive->{file};
5549
5550 return if !$volid;
5551
5552 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5553
5554 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5555 return if $scfg->{shared};
ba5396b5
FG
5556
5557 # replicated disks re-use existing state via bitmap
5558 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5559 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
13cfe3b7 5560 });
ba5396b5
FG
5561 return $local_volumes;
5562}
5563
5564# called in locked context by incoming migration
5565sub vm_migrate_alloc_nbd_disks {
5566 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
13cfe3b7 5567
13cfe3b7 5568 my $nbd = {};
ba5396b5 5569 foreach my $opt (sort keys %$source_volumes) {
5668463b 5570 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
ba5396b5
FG
5571
5572 if ($use_existing) {
5573 $nbd->{$opt}->{drivestr} = print_drive($drive);
5574 $nbd->{$opt}->{volid} = $volid;
5575 $nbd->{$opt}->{replicated} = 1;
13cfe3b7
FG
5576 next;
5577 }
13cfe3b7 5578
5668463b
FG
5579 # storage mapping + volname = regular migration
5580 # storage mapping + format = remote migration
5581 # order of precedence, filtered by whether storage supports it:
5582 # 1. explicit requested format
5583 # 2. format of current volume
5584 # 3. default format of storage
bf8fc5a3 5585 if (!$storagemap->{identity}) {
82a03671 5586 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
13cfe3b7 5587 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5668463b
FG
5588 if (!$format || !grep { $format eq $_ } @$validFormats) {
5589 if ($volname) {
5590 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5591 my $fileFormat = qemu_img_format($scfg, $volname);
5592 $format = $fileFormat
5593 if grep { $fileFormat eq $_ } @$validFormats;
5594 }
5595 $format //= $defFormat;
5596 }
13cfe3b7 5597 } else {
5668463b 5598 # can't happen for remote migration, so $volname is always defined
13cfe3b7
FG
5599 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5600 $format = qemu_img_format($scfg, $volname);
5601 }
5602
4df98f2f
TL
5603 my $size = $drive->{size} / 1024;
5604 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
13cfe3b7
FG
5605 my $newdrive = $drive;
5606 $newdrive->{format} = $format;
5607 $newdrive->{file} = $newvolid;
5608 my $drivestr = print_drive($newdrive);
ba5396b5
FG
5609 $nbd->{$opt}->{drivestr} = $drivestr;
5610 $nbd->{$opt}->{volid} = $newvolid;
13cfe3b7
FG
5611 }
5612
5613 return $nbd;
5614}
5615
5616# see vm_start_nolock for parameters, additionally:
5617# migrate_opts:
bf8fc5a3 5618# storagemap = parsed storage map for allocating NBD disks
3898a563
FG
5619sub vm_start {
5620 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5621
84da8217 5622 return PVE::QemuConfig->lock_config($vmid, sub {
3898a563
FG
5623 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5624
4ef13a7f
FG
5625 die "you can't start a vm if it's a template\n"
5626 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
3898a563 5627
d544e0e0 5628 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
8e0c97bb
SR
5629 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5630
5631 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5632
5633 if ($has_backup_lock && $running) {
5634 # a backup is currently running, attempt to start the guest in the
5635 # existing QEMU instance
5636 return vm_resume($vmid);
5637 }
3898a563
FG
5638
5639 PVE::QemuConfig->check_lock($conf)
d544e0e0
FE
5640 if !($params->{skiplock} || $has_suspended_lock);
5641
5642 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
3898a563 5643
8e0c97bb 5644 die "VM $vmid already running\n" if $running;
3898a563 5645
ba5396b5
FG
5646 if (my $storagemap = $migrate_opts->{storagemap}) {
5647 my $replicated = $migrate_opts->{replicated_volumes};
5648 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5649 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5650
5651 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5652 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5653 }
5654 }
13cfe3b7 5655
84da8217 5656 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
3898a563
FG
5657 });
5658}
5659
5660
0c498cca
FG
5661# params:
5662# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5663# skiplock => 0/1, skip checking for config lock
4ef13a7f 5664# skiptemplate => 0/1, skip checking whether VM is template
7bd9abd2 5665# forcemachine => to force QEMU machine (rollback/migration)
58c64ad5 5666# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
0c498cca
FG
5667# timeout => in seconds
5668# paused => start VM in paused state (backup)
3898a563 5669# resume => resume from hibernation
5921764c
SR
5670# pbs-backing => {
5671# sata0 => {
5672# repository
5673# snapshot
5674# keyfile
5675# archive
5676# },
5677# virtio2 => ...
5678# }
0c498cca 5679# migrate_opts:
ba5396b5 5680# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
0c498cca
FG
5681# migratedfrom => source node
5682# spice_ticket => used for spice migration, passed via tunnel/stdin
5683# network => CIDR of migration network
5684# type => secure/insecure - tunnel over encrypted connection or plain-text
0c498cca 5685# nbd_proto_version => int, 0 for TCP, 1 for UNIX
fd95d780 5686# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
13d121d7
FE
5687# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5688# contained in config
3898a563
FG
5689sub vm_start_nolock {
5690 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
1e3baf05 5691
3898a563
FG
5692 my $statefile = $params->{statefile};
5693 my $resume = $params->{resume};
3dcb98d5 5694
3898a563
FG
5695 my $migratedfrom = $migrate_opts->{migratedfrom};
5696 my $migration_type = $migrate_opts->{type};
7ceade4c 5697
84da8217
FG
5698 my $res = {};
5699
3898a563
FG
5700 # clean up leftover reboot request files
5701 eval { clear_reboot_request($vmid); };
5702 warn $@ if $@;
1e3baf05 5703
3898a563
FG
5704 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5705 vmconfig_apply_pending($vmid, $conf, $storecfg);
5706 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5707 }
64457ed4 5708
3de134ef
WB
5709 # don't regenerate the ISO if the VM is started as part of a live migration
5710 # this way we can reuse the old ISO with the correct config
4b785da1
WB
5711 if (!$migratedfrom) {
5712 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5713 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5714 # $conf->{cloudinit}, so we could just not do this?
5715 # But we do it above, so for now let's be consistent.
5716 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5717 }
5718 }
3de134ef 5719
13d121d7
FE
5720 # override offline migrated volumes, conf is out of date still
5721 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5722 for my $key (sort keys $offline_volumes->%*) {
5723 my $parsed = parse_drive($key, $conf->{$key});
5724 $parsed->{file} = $offline_volumes->{$key};
5725 $conf->{$key} = print_drive($parsed);
5726 }
fd95d780
FG
5727 }
5728
3898a563 5729 my $defaults = load_defaults();
0c9a7596 5730
3898a563 5731 # set environment variable useful inside network script
eef93bc5
FG
5732 # for remote migration the config is available on the target node!
5733 if (!$migrate_opts->{remote_node}) {
5734 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5735 }
6c47d546 5736
3898a563 5737 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
9e784b11 5738
3898a563 5739 my $forcemachine = $params->{forcemachine};
ea1c2110 5740 my $forcecpu = $params->{forcecpu};
3898a563 5741 if ($resume) {
ea1c2110 5742 # enforce machine and CPU type on suspended vm to ensure HW compatibility
3898a563 5743 $forcemachine = $conf->{runningmachine};
ea1c2110 5744 $forcecpu = $conf->{runningcpu};
3898a563
FG
5745 print "Resuming suspended VM\n";
5746 }
7ceade4c 5747
9b71c34d 5748 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5921764c 5749 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
6c47d546 5750
3898a563
FG
5751 my $migration_ip;
5752 my $get_migration_ip = sub {
5753 my ($nodename) = @_;
b24e1ac2 5754
3898a563 5755 return $migration_ip if defined($migration_ip);
b24e1ac2 5756
3898a563 5757 my $cidr = $migrate_opts->{network};
0c498cca 5758
3898a563
FG
5759 if (!defined($cidr)) {
5760 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5761 $cidr = $dc_conf->{migration}->{network};
5762 }
b24e1ac2 5763
3898a563
FG
5764 if (defined($cidr)) {
5765 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
b24e1ac2 5766
3898a563
FG
5767 die "could not get IP: no address configured on local " .
5768 "node for network '$cidr'\n" if scalar(@$ips) == 0;
b24e1ac2 5769
3898a563
FG
5770 die "could not get IP: multiple addresses configured on local " .
5771 "node for network '$cidr'\n" if scalar(@$ips) > 1;
b24e1ac2 5772
3898a563
FG
5773 $migration_ip = @$ips[0];
5774 }
b24e1ac2 5775
3898a563
FG
5776 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5777 if !defined($migration_ip);
b24e1ac2 5778
3898a563
FG
5779 return $migration_ip;
5780 };
b24e1ac2 5781
3898a563
FG
5782 if ($statefile) {
5783 if ($statefile eq 'tcp') {
05b2a4ae
FG
5784 my $migrate = $res->{migrate} = { proto => 'tcp' };
5785 $migrate->{addr} = "localhost";
3898a563
FG
5786 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5787 my $nodename = nodename();
2de2d6f7 5788
3898a563
FG
5789 if (!defined($migration_type)) {
5790 if (defined($datacenterconf->{migration}->{type})) {
5791 $migration_type = $datacenterconf->{migration}->{type};
5792 } else {
5793 $migration_type = 'secure';
b7a5a225 5794 }
3898a563 5795 }
b7a5a225 5796
3898a563 5797 if ($migration_type eq 'insecure') {
05b2a4ae
FG
5798 $migrate->{addr} = $get_migration_ip->($nodename);
5799 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
3898a563 5800 }
2de2d6f7 5801
3898a563 5802 my $pfamily = PVE::Tools::get_host_address_family($nodename);
05b2a4ae
FG
5803 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5804 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5805 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5806 push @$cmd, '-S';
1c9d54bf 5807
3898a563
FG
5808 } elsif ($statefile eq 'unix') {
5809 # should be default for secure migrations as a ssh TCP forward
5810 # tunnel is not deterministic reliable ready and fails regurarly
5811 # to set up in time, so use UNIX socket forwards
05b2a4ae
FG
5812 my $migrate = $res->{migrate} = { proto => 'unix' };
5813 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5814 unlink $migrate->{addr};
54323eed 5815
05b2a4ae
FG
5816 $migrate->{uri} = "unix:$migrate->{addr}";
5817 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5818 push @$cmd, '-S';
1c9d54bf 5819
3898a563
FG
5820 } elsif (-e $statefile) {
5821 push @$cmd, '-loadstate', $statefile;
5822 } else {
5823 my $statepath = PVE::Storage::path($storecfg, $statefile);
5824 push @$vollist, $statefile;
5825 push @$cmd, '-loadstate', $statepath;
5826 }
5827 } elsif ($params->{paused}) {
5828 push @$cmd, '-S';
5829 }
5830
1fb1822e
DC
5831 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5832
9b71c34d
DC
5833 my $pci_reserve_list = [];
5834 for my $device (values $pci_devices->%*) {
5835 next if $device->{mdev}; # we don't reserve for mdev devices
5836 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
1fb1822e
DC
5837 }
5838
1fb1822e 5839 # reserve all PCI IDs before actually doing anything with them
9b71c34d 5840 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
1fb1822e
DC
5841
5842 eval {
bbf96e0f 5843 my $uuid;
1fb1822e
DC
5844 for my $id (sort keys %$pci_devices) {
5845 my $d = $pci_devices->{$id};
9b71c34d
DC
5846 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5847
5848 my $chosen_mdev;
5849 for my $dev ($d->{ids}->@*) {
5850 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5851 if ($d->{mdev}) {
5852 warn $@ if $@;
5853 $chosen_mdev = $info;
5854 last if $chosen_mdev; # if successful, we're done
5855 } else {
5856 die $@ if $@;
5857 }
5858 }
5859
5860 next if !$d->{mdev};
5861 die "could not create mediated device\n" if !defined($chosen_mdev);
5862
5863 # nvidia grid needs the uuid of the mdev as qemu parameter
5864 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5865 if (defined($conf->{smbios1})) {
5866 my $smbios_conf = parse_smbios1($conf->{smbios1});
5867 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
bbf96e0f 5868 }
9b71c34d 5869 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
1fb1822e
DC
5870 }
5871 }
bbf96e0f 5872 push @$cmd, '-uuid', $uuid if defined($uuid);
1fb1822e
DC
5873 };
5874 if (my $err = $@) {
1b189121 5875 eval { cleanup_pci_devices($vmid, $conf) };
1fb1822e
DC
5876 warn $@ if $@;
5877 die $err;
3898a563 5878 }
1e3baf05 5879
3898a563 5880 PVE::Storage::activate_volumes($storecfg, $vollist);
1e3baf05 5881
728404c0
TL
5882
5883 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
3d79cf55 5884 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
728404c0 5885 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
3898a563
FG
5886 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5887 # timeout should be more than enough here...
39abafc8 5888 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
3898a563 5889
6b7ef5e5 5890 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
3898a563 5891
3898a563
FG
5892 my %run_params = (
5893 timeout => $statefile ? undef : $start_timeout,
5894 umask => 0077,
5895 noerr => 1,
5896 );
1e3baf05 5897
3898a563
FG
5898 # when migrating, prefix QEMU output so other side can pick up any
5899 # errors that might occur and show the user
5900 if ($migratedfrom) {
5901 $run_params{quiet} = 1;
5902 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5903 }
8bf30c2a 5904
212220a4 5905 my %systemd_properties = (
3898a563 5906 Slice => 'qemu.slice',
354e61aa
SR
5907 KillMode => 'process',
5908 SendSIGKILL => 0,
5909 TimeoutStopUSec => ULONG_MAX, # infinity
3898a563 5910 );
7023f3ea 5911
6cbd3eb8 5912 if (PVE::CGroup::cgroup_mode() == 2) {
212220a4 5913 $systemd_properties{CPUWeight} = $cpuunits;
6cbd3eb8 5914 } else {
212220a4 5915 $systemd_properties{CPUShares} = $cpuunits;
6cbd3eb8
AD
5916 }
5917
3898a563 5918 if (my $cpulimit = $conf->{cpulimit}) {
212220a4 5919 $systemd_properties{CPUQuota} = int($cpulimit * 100);
3898a563 5920 }
212220a4 5921 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
7023f3ea 5922
3898a563
FG
5923 my $run_qemu = sub {
5924 PVE::Tools::run_fork sub {
212220a4 5925 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
6e0216d8 5926
f9dde219
SR
5927 my $tpmpid;
5928 if (my $tpm = $conf->{tpmstate0}) {
5929 # start the TPM emulator so QEMU can connect on start
5930 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5931 }
5932
3898a563 5933 my $exitcode = run_command($cmd, %run_params);
f9dde219 5934 if ($exitcode) {
23bee97d
FE
5935 if ($tpmpid) {
5936 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5937 kill 'TERM', $tpmpid;
5938 }
f9dde219
SR
5939 die "QEMU exited with code $exitcode\n";
5940 }
503308ed 5941 };
3898a563 5942 };
503308ed 5943
3898a563 5944 if ($conf->{hugepages}) {
7023f3ea 5945
3898a563 5946 my $code = sub {
dafb728c
AD
5947 my $hotplug_features =
5948 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5949 my $hugepages_topology =
5950 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5951
3898a563 5952 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
7023f3ea 5953
3898a563
FG
5954 PVE::QemuServer::Memory::hugepages_mount();
5955 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
7023f3ea 5956
503308ed 5957 eval { $run_qemu->() };
3898a563 5958 if (my $err = $@) {
f36e9894
SR
5959 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5960 if !$conf->{keephugepages};
3898a563
FG
5961 die $err;
5962 }
77cde36b 5963
f36e9894
SR
5964 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5965 if !$conf->{keephugepages};
3898a563
FG
5966 };
5967 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
1e3baf05 5968
3898a563
FG
5969 } else {
5970 eval { $run_qemu->() };
5971 }
afdb31d5 5972
3898a563
FG
5973 if (my $err = $@) {
5974 # deactivate volumes if start fails
5975 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
1b189121
DC
5976 warn $@ if $@;
5977 eval { cleanup_pci_devices($vmid, $conf) };
5978 warn $@ if $@;
1fb1822e 5979
3898a563
FG
5980 die "start failed: $err";
5981 }
62de2cbd 5982
1fb1822e
DC
5983 # re-reserve all PCI IDs now that we can know the actual VM PID
5984 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
9b71c34d 5985 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
1fb1822e
DC
5986 warn $@ if $@;
5987
05b2a4ae
FG
5988 if (defined($res->{migrate})) {
5989 print "migration listens on $res->{migrate}->{uri}\n";
5990 } elsif ($statefile) {
3898a563
FG
5991 eval { mon_cmd($vmid, "cont"); };
5992 warn $@ if $@;
5993 }
2189246c 5994
3898a563 5995 #start nbd server for storage migration
13cfe3b7 5996 if (my $nbd = $migrate_opts->{nbd}) {
3898a563 5997 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
2189246c 5998
3898a563
FG
5999 my $migrate_storage_uri;
6000 # nbd_protocol_version > 0 for unix socket support
eef93bc5 6001 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
3898a563
FG
6002 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
6003 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
6004 $migrate_storage_uri = "nbd:unix:$socket_path";
05b2a4ae 6005 $res->{migrate}->{unix_sockets} = [$socket_path];
3898a563
FG
6006 } else {
6007 my $nodename = nodename();
6008 my $localip = $get_migration_ip->($nodename);
6009 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6010 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6011
4df98f2f
TL
6012 mon_cmd($vmid, "nbd-server-start", addr => {
6013 type => 'inet',
6014 data => {
6015 host => "${localip}",
6016 port => "${storage_migrate_port}",
6017 },
6018 });
3898a563
FG
6019 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6020 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
2189246c
AD
6021 }
6022
83f04be3
FE
6023 my $block_info = mon_cmd($vmid, "query-block");
6024 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6025
13cfe3b7 6026 foreach my $opt (sort keys %$nbd) {
ba5396b5
FG
6027 my $drivestr = $nbd->{$opt}->{drivestr};
6028 my $volid = $nbd->{$opt}->{volid};
83f04be3
FE
6029
6030 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6031
6032 mon_cmd(
6033 $vmid,
6034 "block-export-add",
6035 id => "drive-$opt",
6036 'node-name' => $block_node,
6037 writable => JSON::true,
6038 type => "nbd",
6039 name => "drive-$opt", # NBD export name
6040 );
6041
84da8217
FG
6042 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6043 print "storage migration listens on $nbd_uri volume:$drivestr\n";
ba5396b5
FG
6044 print "re-using replicated volume: $opt - $volid\n"
6045 if $nbd->{$opt}->{replicated};
84da8217
FG
6046
6047 $res->{drives}->{$opt} = $nbd->{$opt};
6048 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
3898a563
FG
6049 }
6050 }
a89fded1 6051
3898a563
FG
6052 if ($migratedfrom) {
6053 eval {
6054 set_migration_caps($vmid);
6055 };
6056 warn $@ if $@;
6057
6058 if ($spice_port) {
6059 print "spice listens on port $spice_port\n";
84da8217 6060 $res->{spice_port} = $spice_port;
3898a563 6061 if ($migrate_opts->{spice_ticket}) {
4df98f2f
TL
6062 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6063 $migrate_opts->{spice_ticket});
3898a563 6064 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
95a4b4a9 6065 }
3898a563 6066 }
95a4b4a9 6067
3898a563
FG
6068 } else {
6069 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6070 if !$statefile && $conf->{balloon};
25088687 6071
3898a563
FG
6072 foreach my $opt (keys %$conf) {
6073 next if $opt !~ m/^net\d+$/;
6074 my $nicconf = parse_net($conf->{$opt});
6075 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
e18b0b99 6076 }
21947fea 6077 add_nets_bridge_fdb($conf, $vmid);
3898a563 6078 }
a1b7d579 6079
4044ae1f 6080 if (!defined($conf->{balloon}) || $conf->{balloon}) {
cb64a643
FE
6081 eval {
6082 mon_cmd(
6083 $vmid,
6084 'qom-set',
6085 path => "machine/peripheral/balloon0",
6086 property => "guest-stats-polling-interval",
6087 value => 2
6088 );
6089 };
6090 log_warn("could not set polling interval for ballooning - $@") if $@;
4044ae1f 6091 }
eb065317 6092
3898a563
FG
6093 if ($resume) {
6094 print "Resumed VM, removing state\n";
6095 if (my $vmstate = $conf->{vmstate}) {
6096 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6097 PVE::Storage::vdisk_free($storecfg, $vmstate);
7ceade4c 6098 }
ea1c2110 6099 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
3898a563
FG
6100 PVE::QemuConfig->write_config($vmid, $conf);
6101 }
7ceade4c 6102
3898a563 6103 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
84da8217
FG
6104
6105 return $res;
1e3baf05
DM
6106}
6107
1e3baf05 6108sub vm_commandline {
b14477e7 6109 my ($storecfg, $vmid, $snapname) = @_;
1e3baf05 6110
ffda963f 6111 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6112
e8a26810 6113 my ($forcemachine, $forcecpu);
b14477e7
RV
6114 if ($snapname) {
6115 my $snapshot = $conf->{snapshots}->{$snapname};
87d92707
TL
6116 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6117
ea1c2110
SR
6118 # check for machine or CPU overrides in snapshot
6119 $forcemachine = $snapshot->{runningmachine};
6120 $forcecpu = $snapshot->{runningcpu};
092868c4 6121
87d92707 6122 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
b14477e7 6123
b14477e7
RV
6124 $conf = $snapshot;
6125 }
6126
1e3baf05
DM
6127 my $defaults = load_defaults();
6128
e8a26810 6129 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
1e3baf05 6130
5930c1ff 6131 return PVE::Tools::cmd2string($cmd);
1e3baf05
DM
6132}
6133
6134sub vm_reset {
6135 my ($vmid, $skiplock) = @_;
6136
ffda963f 6137 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6138
ffda963f 6139 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6140
ffda963f 6141 PVE::QemuConfig->check_lock($conf) if !$skiplock;
1e3baf05 6142
0a13e08e 6143 mon_cmd($vmid, "system_reset");
ff1a2432
DM
6144 });
6145}
6146
6147sub get_vm_volumes {
6148 my ($conf) = @_;
1e3baf05 6149
ff1a2432 6150 my $vollist = [];
6328c554 6151 foreach_volid($conf, 1, sub {
392f8b5d 6152 my ($volid, $attr) = @_;
ff1a2432 6153
d5769dc2 6154 return if $volid =~ m|^/|;
ff1a2432 6155
d5769dc2
DM
6156 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6157 return if !$sid;
ff1a2432
DM
6158
6159 push @$vollist, $volid;
1e3baf05 6160 });
ff1a2432
DM
6161
6162 return $vollist;
6163}
6164
1b189121
DC
6165sub cleanup_pci_devices {
6166 my ($vmid, $conf) = @_;
6167
6168 foreach my $key (keys %$conf) {
6169 next if $key !~ m/^hostpci(\d+)$/;
6170 my $hostpciindex = $1;
6171 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6172 my $d = parse_hostpci($conf->{$key});
faf72d6c
TL
6173 if ($d->{mdev}) {
6174 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6175 # don't want to break ABI just for this two liner
6176 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
49c51a60
DC
6177
6178 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6179 # out when we do it first. so wait for 10 seconds and then try it
9b71c34d 6180 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
49c51a60
DC
6181 sleep 10;
6182 }
6183
faf72d6c
TL
6184 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6185 }
1b189121
DC
6186 }
6187 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6188}
6189
ff1a2432 6190sub vm_stop_cleanup {
70b04821 6191 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
ff1a2432 6192
745fed70 6193 eval {
ff1a2432 6194
254575e9
DM
6195 if (!$keepActive) {
6196 my $vollist = get_vm_volumes($conf);
6197 PVE::Storage::deactivate_volumes($storecfg, $vollist);
f9dde219
SR
6198
6199 if (my $tpmdrive = $conf->{tpmstate0}) {
6200 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6201 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6202 if ($storeid) {
6203 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6204 }
6205 }
254575e9 6206 }
a1b7d579 6207
ab6a046f 6208 foreach my $ext (qw(mon qmp pid vnc qga)) {
961bfcb2
DM
6209 unlink "/var/run/qemu-server/${vmid}.$ext";
6210 }
a1b7d579 6211
6dbcb073 6212 if ($conf->{ivshmem}) {
4df98f2f 6213 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4c5a6a24
TL
6214 # just delete it for now, VMs which have this already open do not
6215 # are affected, but new VMs will get a separated one. If this
6216 # becomes an issue we either add some sort of ref-counting or just
6217 # add a "don't delete on stop" flag to the ivshmem format.
6dbcb073
DC
6218 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6219 }
6220
1b189121 6221 cleanup_pci_devices($vmid, $conf);
6ab45bd7 6222
70b04821 6223 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
745fed70
DM
6224 };
6225 warn $@ if $@; # avoid errors - just warn
1e3baf05
DM
6226}
6227
575d19da
DC
6228# call only in locked context
6229sub _do_vm_stop {
6230 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
9269013a 6231
575d19da
DC
6232 my $pid = check_running($vmid, $nocheck);
6233 return if !$pid;
1e3baf05 6234
575d19da
DC
6235 my $conf;
6236 if (!$nocheck) {
6237 $conf = PVE::QemuConfig->load_config($vmid);
6238 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6239 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6240 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6241 $timeout = $opts->{down} if $opts->{down};
e6c3b671 6242 }
575d19da
DC
6243 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6244 }
19672434 6245
575d19da
DC
6246 eval {
6247 if ($shutdown) {
a2af1bbe 6248 if (defined($conf) && get_qga_key($conf, 'enabled')) {
0a13e08e 6249 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
9269013a 6250 } else {
0a13e08e 6251 mon_cmd($vmid, "system_powerdown");
1e3baf05
DM
6252 }
6253 } else {
0a13e08e 6254 mon_cmd($vmid, "quit");
1e3baf05 6255 }
575d19da
DC
6256 };
6257 my $err = $@;
1e3baf05 6258
575d19da
DC
6259 if (!$err) {
6260 $timeout = 60 if !defined($timeout);
1e3baf05
DM
6261
6262 my $count = 0;
e6c3b671 6263 while (($count < $timeout) && check_running($vmid, $nocheck)) {
1e3baf05
DM
6264 $count++;
6265 sleep 1;
6266 }
6267
6268 if ($count >= $timeout) {
575d19da
DC
6269 if ($force) {
6270 warn "VM still running - terminating now with SIGTERM\n";
6271 kill 15, $pid;
6272 } else {
6273 die "VM quit/powerdown failed - got timeout\n";
6274 }
6275 } else {
6276 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6277 return;
1e3baf05 6278 }
575d19da 6279 } else {
d60cbb97
TL
6280 if (!check_running($vmid, $nocheck)) {
6281 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6282 return;
6283 }
6284 if ($force) {
575d19da
DC
6285 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6286 kill 15, $pid;
6287 } else {
6288 die "VM quit/powerdown failed\n";
6289 }
6290 }
6291
6292 # wait again
6293 $timeout = 10;
6294
6295 my $count = 0;
6296 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6297 $count++;
6298 sleep 1;
6299 }
6300
6301 if ($count >= $timeout) {
6302 warn "VM still running - terminating now with SIGKILL\n";
6303 kill 9, $pid;
6304 sleep 1;
6305 }
1e3baf05 6306
575d19da
DC
6307 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6308}
6309
6310# Note: use $nocheck to skip tests if VM configuration file exists.
6311# We need that when migration VMs to other nodes (files already moved)
6312# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6313sub vm_stop {
6314 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6315
6316 $force = 1 if !defined($force) && !$shutdown;
6317
6318 if ($migratedfrom){
6319 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6320 kill 15, $pid if $pid;
6321 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6322 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6323 return;
6324 }
6325
6326 PVE::QemuConfig->lock_config($vmid, sub {
6327 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
ff1a2432 6328 });
1e3baf05
DM
6329}
6330
165411f0
DC
6331sub vm_reboot {
6332 my ($vmid, $timeout) = @_;
6333
6334 PVE::QemuConfig->lock_config($vmid, sub {
66026117 6335 eval {
165411f0 6336
66026117
OB
6337 # only reboot if running, as qmeventd starts it again on a stop event
6338 return if !check_running($vmid);
165411f0 6339
66026117 6340 create_reboot_request($vmid);
165411f0 6341
66026117
OB
6342 my $storecfg = PVE::Storage::config();
6343 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
165411f0 6344
66026117
OB
6345 };
6346 if (my $err = $@) {
3c1c3fe6 6347 # avoid that the next normal shutdown will be confused for a reboot
66026117
OB
6348 clear_reboot_request($vmid);
6349 die $err;
6350 }
165411f0
DC
6351 });
6352}
6353
75c24bba 6354# note: if using the statestorage parameter, the caller has to check privileges
1e3baf05 6355sub vm_suspend {
48b4cdc2 6356 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
159719e5
DC
6357
6358 my $conf;
6359 my $path;
6360 my $storecfg;
6361 my $vmstate;
1e3baf05 6362
ffda963f 6363 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6364
159719e5 6365 $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6366
159719e5 6367 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
e79706d4 6368 PVE::QemuConfig->check_lock($conf)
159719e5
DC
6369 if !($skiplock || $is_backing_up);
6370
6371 die "cannot suspend to disk during backup\n"
6372 if $is_backing_up && $includestate;
bcb7c9cf 6373
159719e5
DC
6374 if ($includestate) {
6375 $conf->{lock} = 'suspending';
6376 my $date = strftime("%Y-%m-%d", localtime(time()));
6377 $storecfg = PVE::Storage::config();
75c24bba
DC
6378 if (!$statestorage) {
6379 $statestorage = find_vmstate_storage($conf, $storecfg);
6380 # check permissions for the storage
6381 my $rpcenv = PVE::RPCEnvironment::get();
6382 if ($rpcenv->{type} ne 'cli') {
6383 my $authuser = $rpcenv->get_user();
6384 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6385 }
6386 }
6387
6388
4df98f2f
TL
6389 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6390 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
159719e5
DC
6391 $path = PVE::Storage::path($storecfg, $vmstate);
6392 PVE::QemuConfig->write_config($vmid, $conf);
6393 } else {
0a13e08e 6394 mon_cmd($vmid, "stop");
159719e5 6395 }
1e3baf05 6396 });
159719e5
DC
6397
6398 if ($includestate) {
6399 # save vm state
6400 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6401
6402 eval {
27a5be53 6403 set_migration_caps($vmid, 1);
0a13e08e 6404 mon_cmd($vmid, "savevm-start", statefile => $path);
159719e5 6405 for(;;) {
0a13e08e 6406 my $state = mon_cmd($vmid, "query-savevm");
159719e5
DC
6407 if (!$state->{status}) {
6408 die "savevm not active\n";
6409 } elsif ($state->{status} eq 'active') {
6410 sleep(1);
6411 next;
6412 } elsif ($state->{status} eq 'completed') {
b0a9a385 6413 print "State saved, quitting\n";
159719e5
DC
6414 last;
6415 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6416 die "query-savevm failed with error '$state->{error}'\n"
6417 } else {
6418 die "query-savevm returned status '$state->{status}'\n";
6419 }
6420 }
6421 };
6422 my $err = $@;
6423
6424 PVE::QemuConfig->lock_config($vmid, sub {
6425 $conf = PVE::QemuConfig->load_config($vmid);
6426 if ($err) {
6427 # cleanup, but leave suspending lock, to indicate something went wrong
6428 eval {
0a13e08e 6429 mon_cmd($vmid, "savevm-end");
159719e5
DC
6430 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6431 PVE::Storage::vdisk_free($storecfg, $vmstate);
ea1c2110 6432 delete $conf->@{qw(vmstate runningmachine runningcpu)};
159719e5
DC
6433 PVE::QemuConfig->write_config($vmid, $conf);
6434 };
6435 warn $@ if $@;
6436 die $err;
6437 }
6438
6439 die "lock changed unexpectedly\n"
6440 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6441
0a13e08e 6442 mon_cmd($vmid, "quit");
159719e5
DC
6443 $conf->{lock} = 'suspended';
6444 PVE::QemuConfig->write_config($vmid, $conf);
6445 });
6446 }
1e3baf05
DM
6447}
6448
a20dc58a
FG
6449# $nocheck is set when called as part of a migration - in this context the
6450# location of the config file (source or target node) is not deterministic,
6451# since migration cannot wait for pmxcfs to process the rename
1e3baf05 6452sub vm_resume {
289e0b85 6453 my ($vmid, $skiplock, $nocheck) = @_;
1e3baf05 6454
ffda963f 6455 PVE::QemuConfig->lock_config($vmid, sub {
0a13e08e 6456 my $res = mon_cmd($vmid, 'query-status');
c2786bed 6457 my $resume_cmd = 'cont';
8e0c97bb 6458 my $reset = 0;
270bfff2
FG
6459 my $conf;
6460 if ($nocheck) {
ad9e347c
FG
6461 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6462 if ($@) {
6463 my $vmlist = PVE::Cluster::get_vmlist();
6464 if (exists($vmlist->{ids}->{$vmid})) {
6465 my $node = $vmlist->{ids}->{$vmid}->{node};
6466 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6467 }
6468 if (!$conf) {
6469 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6470 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6471 }
270bfff2 6472 }
270bfff2
FG
6473 } else {
6474 $conf = PVE::QemuConfig->load_config($vmid);
6475 }
c2786bed 6476
8e0c97bb
SR
6477 if ($res->{status}) {
6478 return if $res->{status} eq 'running'; # job done, go home
6479 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6480 $reset = 1 if $res->{status} eq 'shutdown';
c2786bed
DC
6481 }
6482
289e0b85 6483 if (!$nocheck) {
e79706d4
FG
6484 PVE::QemuConfig->check_lock($conf)
6485 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
289e0b85 6486 }
3e24733b 6487
8e0c97bb
SR
6488 if ($reset) {
6489 # required if a VM shuts down during a backup and we get a resume
6490 # request before the backup finishes for example
6491 mon_cmd($vmid, "system_reset");
6492 }
21947fea
AD
6493
6494 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6495
0a13e08e 6496 mon_cmd($vmid, $resume_cmd);
1e3baf05
DM
6497 });
6498}
6499
5fdbe4f0
DM
6500sub vm_sendkey {
6501 my ($vmid, $skiplock, $key) = @_;
1e3baf05 6502
ffda963f 6503 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6504
ffda963f 6505 my $conf = PVE::QemuConfig->load_config($vmid);
f5eb281a 6506
7b7c6d1b 6507 # there is no qmp command, so we use the human monitor command
0a13e08e 6508 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
d30820d6 6509 die $res if $res ne '';
1e3baf05
DM
6510 });
6511}
6512
d6deb7f6
TL
6513sub check_bridge_access {
6514 my ($rpcenv, $authuser, $conf) = @_;
6515
6516 return 1 if $authuser eq 'root@pam';
6517
6518 for my $opt (sort keys $conf->%*) {
6519 next if $opt !~ m/^net\d+$/;
6520 my $net = parse_net($conf->{$opt});
6521 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6522 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6523 }
6524 return 1;
6525};
6526
e3971865
DC
6527sub check_mapping_access {
6528 my ($rpcenv, $user, $conf) = @_;
6529
6530 for my $opt (keys $conf->%*) {
6531 if ($opt =~ m/^usb\d+$/) {
6532 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6533 if (my $host = $device->{host}) {
6534 die "only root can set '$opt' config for real devices\n"
6535 if $host !~ m/^spice$/i && $user ne 'root@pam';
6536 } elsif ($device->{mapping}) {
6537 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6538 } else {
6539 die "either 'host' or 'mapping' must be set.\n";
6540 }
9b71c34d
DC
6541 } elsif ($opt =~ m/^hostpci\d+$/) {
6542 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6543 if ($device->{host}) {
6544 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6545 } elsif ($device->{mapping}) {
6546 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6547 } else {
6548 die "either 'host' or 'mapping' must be set.\n";
6549 }
e3971865
DC
6550 }
6551 }
6552};
6553
e3971865
DC
6554sub check_restore_permissions {
6555 my ($rpcenv, $user, $conf) = @_;
621edb2b 6556
e3971865
DC
6557 check_bridge_access($rpcenv, $user, $conf);
6558 check_mapping_access($rpcenv, $user, $conf);
6559}
3e16d5fc
DM
6560# vzdump restore implementaion
6561
ed221350 6562sub tar_archive_read_firstfile {
3e16d5fc 6563 my $archive = shift;
afdb31d5 6564
3e16d5fc
DM
6565 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6566
6567 # try to detect archive type first
387ba257 6568 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
3e16d5fc 6569 die "unable to open file '$archive'\n";
387ba257 6570 my $firstfile = <$fh>;
3e16d5fc 6571 kill 15, $pid;
387ba257 6572 close $fh;
3e16d5fc
DM
6573
6574 die "ERROR: archive contaions no data\n" if !$firstfile;
6575 chomp $firstfile;
6576
6577 return $firstfile;
6578}
6579
ed221350
DM
6580sub tar_restore_cleanup {
6581 my ($storecfg, $statfile) = @_;
3e16d5fc
DM
6582
6583 print STDERR "starting cleanup\n";
6584
6585 if (my $fd = IO::File->new($statfile, "r")) {
6586 while (defined(my $line = <$fd>)) {
6587 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6588 my $volid = $2;
6589 eval {
6590 if ($volid =~ m|^/|) {
6591 unlink $volid || die 'unlink failed\n';
6592 } else {
ed221350 6593 PVE::Storage::vdisk_free($storecfg, $volid);
3e16d5fc 6594 }
afdb31d5 6595 print STDERR "temporary volume '$volid' sucessfuly removed\n";
3e16d5fc
DM
6596 };
6597 print STDERR "unable to cleanup '$volid' - $@" if $@;
6598 } else {
6599 print STDERR "unable to parse line in statfile - $line";
afdb31d5 6600 }
3e16d5fc
DM
6601 }
6602 $fd->close();
6603 }
6604}
6605
d1e92cf6 6606sub restore_file_archive {
a0d1b1a2 6607 my ($archive, $vmid, $user, $opts) = @_;
3e16d5fc 6608
a2ec5a67
FG
6609 return restore_vma_archive($archive, $vmid, $user, $opts)
6610 if $archive eq '-';
6611
c6d51783
AA
6612 my $info = PVE::Storage::archive_info($archive);
6613 my $format = $opts->{format} // $info->{format};
6614 my $comp = $info->{compression};
91bd6c90
DM
6615
6616 # try to detect archive format
6617 if ($format eq 'tar') {
6618 return restore_tar_archive($archive, $vmid, $user, $opts);
6619 } else {
6620 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6621 }
6622}
6623
d1e92cf6
DM
6624# hepler to remove disks that will not be used after restore
6625my $restore_cleanup_oldconf = sub {
6626 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6627
01a4377f
FE
6628 my $kept_disks = {};
6629
912792e2 6630 PVE::QemuConfig->foreach_volume($oldconf, sub {
d1e92cf6
DM
6631 my ($ds, $drive) = @_;
6632
6633 return if drive_is_cdrom($drive, 1);
6634
6635 my $volid = $drive->{file};
6636 return if !$volid || $volid =~ m|^/|;
6637
6638 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6639 return if !$path || !$owner || ($owner != $vmid);
6640
6641 # Note: only delete disk we want to restore
6642 # other volumes will become unused
6643 if ($virtdev_hash->{$ds}) {
6644 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6645 if (my $err = $@) {
6646 warn $err;
6647 }
01a4377f
FE
6648 } else {
6649 $kept_disks->{$volid} = 1;
d1e92cf6
DM
6650 }
6651 });
6652
01a4377f
FE
6653 # after the restore we have no snapshots anymore
6654 for my $snapname (keys $oldconf->{snapshots}->%*) {
d1e92cf6
DM
6655 my $snap = $oldconf->{snapshots}->{$snapname};
6656 if ($snap->{vmstate}) {
6657 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6658 if (my $err = $@) {
6659 warn $err;
6660 }
6661 }
01a4377f
FE
6662
6663 for my $volid (keys $kept_disks->%*) {
6664 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6665 warn $@ if $@;
6666 }
d1e92cf6
DM
6667 }
6668};
6669
9f3d73bc
DM
6670# Helper to parse vzdump backup device hints
6671#
6672# $rpcenv: Environment, used to ckeck storage permissions
6673# $user: User ID, to check storage permissions
6674# $storecfg: Storage configuration
6675# $fh: the file handle for reading the configuration
6676# $devinfo: should contain device sizes for all backu-up'ed devices
6677# $options: backup options (pool, default storage)
6678#
6679# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6680my $parse_backup_hints = sub {
6681 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
d1e92cf6 6682
36d4bdcb
TL
6683 my $check_storage = sub { # assert if an image can be allocate
6684 my ($storeid, $scfg) = @_;
6685 die "Content type 'images' is not available on storage '$storeid'\n"
6686 if !$scfg->{content}->{images};
6687 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6688 if $user ne 'root@pam';
6689 };
d1e92cf6 6690
36d4bdcb 6691 my $virtdev_hash = {};
9f3d73bc
DM
6692 while (defined(my $line = <$fh>)) {
6693 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6694 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6695 die "archive does not contain data for drive '$virtdev'\n"
6696 if !$devinfo->{$devname};
6697
6698 if (defined($options->{storage})) {
6699 $storeid = $options->{storage} || 'local';
6700 } elsif (!$storeid) {
6701 $storeid = 'local';
d1e92cf6 6702 }
9f3d73bc
DM
6703 $format = 'raw' if !$format;
6704 $devinfo->{$devname}->{devname} = $devname;
6705 $devinfo->{$devname}->{virtdev} = $virtdev;
6706 $devinfo->{$devname}->{format} = $format;
6707 $devinfo->{$devname}->{storeid} = $storeid;
6708
62af60cd 6709 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
36d4bdcb 6710 $check_storage->($storeid, $scfg); # permission and content type check
d1e92cf6 6711
9f3d73bc
DM
6712 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6713 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6714 my $virtdev = $1;
6715 my $drive = parse_drive($virtdev, $2);
36d4bdcb 6716
9f3d73bc
DM
6717 if (drive_is_cloudinit($drive)) {
6718 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
5364990d
TL
6719 $storeid = $options->{storage} if defined ($options->{storage});
6720 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
9f3d73bc 6721 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
d1e92cf6 6722
36d4bdcb 6723 $check_storage->($storeid, $scfg); # permission and content type check
9f8ba326 6724
9f3d73bc
DM
6725 $virtdev_hash->{$virtdev} = {
6726 format => $format,
5364990d 6727 storeid => $storeid,
9f3d73bc
DM
6728 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6729 is_cloudinit => 1,
6730 };
d1e92cf6 6731 }
9f3d73bc
DM
6732 }
6733 }
d1e92cf6 6734
9f3d73bc
DM
6735 return $virtdev_hash;
6736};
d1e92cf6 6737
9f3d73bc
DM
6738# Helper to allocate and activate all volumes required for a restore
6739#
6740# $storecfg: Storage configuration
6741# $virtdev_hash: as returned by parse_backup_hints()
6742#
6743# Returns: { $virtdev => $volid }
6744my $restore_allocate_devices = sub {
6745 my ($storecfg, $virtdev_hash, $vmid) = @_;
d1e92cf6 6746
9f3d73bc
DM
6747 my $map = {};
6748 foreach my $virtdev (sort keys %$virtdev_hash) {
6749 my $d = $virtdev_hash->{$virtdev};
6750 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6751 my $storeid = $d->{storeid};
6752 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
d1e92cf6 6753
9f3d73bc
DM
6754 # test if requested format is supported
6755 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6756 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6757 $d->{format} = $defFormat if !$supported;
d1e92cf6 6758
9f3d73bc
DM
6759 my $name;
6760 if ($d->{is_cloudinit}) {
6761 $name = "vm-$vmid-cloudinit";
c997e24a
ML
6762 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6763 if ($scfg->{path}) {
6764 $name .= ".$d->{format}";
6765 }
d1e92cf6
DM
6766 }
6767
4df98f2f
TL
6768 my $volid = PVE::Storage::vdisk_alloc(
6769 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
d1e92cf6 6770
9f3d73bc
DM
6771 print STDERR "new volume ID is '$volid'\n";
6772 $d->{volid} = $volid;
d1e92cf6 6773
9f3d73bc 6774 PVE::Storage::activate_volumes($storecfg, [$volid]);
d1e92cf6 6775
9f3d73bc 6776 $map->{$virtdev} = $volid;
d1e92cf6
DM
6777 }
6778
9f3d73bc
DM
6779 return $map;
6780};
d1e92cf6 6781
c62d7cf5 6782sub restore_update_config_line {
eabac302 6783 my ($cookie, $map, $line, $unique) = @_;
91bd6c90 6784
98a4b3fb
FE
6785 return '' if $line =~ m/^\#qmdump\#/;
6786 return '' if $line =~ m/^\#vzdump\#/;
6787 return '' if $line =~ m/^lock:/;
6788 return '' if $line =~ m/^unused\d+:/;
6789 return '' if $line =~ m/^parent:/;
6790
6791 my $res = '';
91bd6c90 6792
b5b99790 6793 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
91bd6c90
DM
6794 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6795 # try to convert old 1.X settings
6796 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6797 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6798 my ($model, $macaddr) = split(/\=/, $devconfig);
b5b99790 6799 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
91bd6c90
DM
6800 my $net = {
6801 model => $model,
6802 bridge => "vmbr$ind",
6803 macaddr => $macaddr,
6804 };
6805 my $netstr = print_net($net);
6806
98a4b3fb 6807 $res .= "net$cookie->{netcount}: $netstr\n";
91bd6c90
DM
6808 $cookie->{netcount}++;
6809 }
6810 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6811 my ($id, $netstr) = ($1, $2);
6812 my $net = parse_net($netstr);
b5b99790 6813 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
91bd6c90 6814 $netstr = print_net($net);
98a4b3fb 6815 $res .= "$id: $netstr\n";
f9dde219 6816 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
91bd6c90 6817 my $virtdev = $1;
907ea891 6818 my $value = $3;
d9faf790
WB
6819 my $di = parse_drive($virtdev, $value);
6820 if (defined($di->{backup}) && !$di->{backup}) {
98a4b3fb 6821 $res .= "#$line";
c0f7406e 6822 } elsif ($map->{$virtdev}) {
8fd57431 6823 delete $di->{format}; # format can change on restore
91bd6c90 6824 $di->{file} = $map->{$virtdev};
71c58bb7 6825 $value = print_drive($di);
98a4b3fb 6826 $res .= "$virtdev: $value\n";
91bd6c90 6827 } else {
98a4b3fb 6828 $res .= $line;
91bd6c90 6829 }
1a0c2f03 6830 } elsif (($line =~ m/^vmgenid: (.*)/)) {
babecffe 6831 my $vmgenid = $1;
6ee499ff 6832 if ($vmgenid ne '0') {
1a0c2f03 6833 # always generate a new vmgenid if there was a valid one setup
6ee499ff
DC
6834 $vmgenid = generate_uuid();
6835 }
98a4b3fb 6836 $res .= "vmgenid: $vmgenid\n";
19a5dd55
WL
6837 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6838 my ($uuid, $uuid_str);
6839 UUID::generate($uuid);
6840 UUID::unparse($uuid, $uuid_str);
6841 my $smbios1 = parse_smbios1($2);
6842 $smbios1->{uuid} = $uuid_str;
98a4b3fb 6843 $res .= $1.print_smbios1($smbios1)."\n";
91bd6c90 6844 } else {
98a4b3fb 6845 $res .= $line;
91bd6c90 6846 }
98a4b3fb
FE
6847
6848 return $res;
c62d7cf5 6849}
9f3d73bc
DM
6850
6851my $restore_deactivate_volumes = sub {
e8b07b29 6852 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc
DM
6853
6854 my $vollist = [];
e8b07b29
FE
6855 for my $dev (values $virtdev_hash->%*) {
6856 push $vollist->@*, $dev->{volid} if $dev->{volid};
9f3d73bc
DM
6857 }
6858
ff86112c
FE
6859 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6860 print STDERR $@ if $@;
9f3d73bc
DM
6861};
6862
6863my $restore_destroy_volumes = sub {
e8b07b29 6864 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc 6865
e8b07b29
FE
6866 for my $dev (values $virtdev_hash->%*) {
6867 my $volid = $dev->{volid} or next;
9f3d73bc 6868 eval {
e60afe82 6869 PVE::Storage::vdisk_free($storecfg, $volid);
9f3d73bc
DM
6870 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6871 };
6872 print STDERR "unable to cleanup '$volid' - $@" if $@;
6873 }
6874};
91bd6c90 6875
621edb2b 6876sub restore_merge_config {
202a2a0b
FE
6877 my ($filename, $backup_conf_raw, $override_conf) = @_;
6878
6879 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6880 for my $key (keys $override_conf->%*) {
6881 $backup_conf->{$key} = $override_conf->{$key};
6882 }
6883
6884 return $backup_conf;
621edb2b 6885}
202a2a0b 6886
91bd6c90 6887sub scan_volids {
9a8ba127 6888 my ($cfg, $vmid) = @_;
91bd6c90 6889
9a8ba127 6890 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
91bd6c90
DM
6891
6892 my $volid_hash = {};
6893 foreach my $storeid (keys %$info) {
6894 foreach my $item (@{$info->{$storeid}}) {
6895 next if !($item->{volid} && $item->{size});
5996a936 6896 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
91bd6c90
DM
6897 $volid_hash->{$item->{volid}} = $item;
6898 }
6899 }
6900
6901 return $volid_hash;
6902}
6903
68b108ee 6904sub update_disk_config {
91bd6c90 6905 my ($vmid, $conf, $volid_hash) = @_;
be190583 6906
91bd6c90 6907 my $changes;
9b29cbd0 6908 my $prefix = "VM $vmid";
91bd6c90 6909
c449137a
DC
6910 # used and unused disks
6911 my $referenced = {};
91bd6c90 6912
5996a936
DM
6913 # Note: it is allowed to define multiple storages with same path (alias), so
6914 # we need to check both 'volid' and real 'path' (two different volid can point
6915 # to the same path).
6916
c449137a 6917 my $referencedpath = {};
be190583 6918
91bd6c90 6919 # update size info
0c4fef3f 6920 PVE::QemuConfig->foreach_volume($conf, sub {
ca04977c 6921 my ($opt, $drive) = @_;
91bd6c90 6922
ca04977c
FE
6923 my $volid = $drive->{file};
6924 return if !$volid;
4df98f2f 6925 my $volume = $volid_hash->{$volid};
91bd6c90 6926
ca04977c
FE
6927 # mark volid as "in-use" for next step
6928 $referenced->{$volid} = 1;
4df98f2f 6929 if ($volume && (my $path = $volume->{path})) {
ca04977c 6930 $referencedpath->{$path} = 1;
91bd6c90 6931 }
ca04977c
FE
6932
6933 return if drive_is_cdrom($drive);
4df98f2f 6934 return if !$volume;
ca04977c 6935
4df98f2f 6936 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
ca04977c
FE
6937 if (defined($updated)) {
6938 $changes = 1;
6939 $conf->{$opt} = print_drive($updated);
9b29cbd0 6940 print "$prefix ($opt): $msg\n";
ca04977c
FE
6941 }
6942 });
91bd6c90 6943
5996a936 6944 # remove 'unusedX' entry if volume is used
ca04977c
FE
6945 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6946 my ($opt, $drive) = @_;
6947
6948 my $volid = $drive->{file};
6949 return if !$volid;
6950
f7d1505b
TL
6951 my $path;
6952 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
c449137a 6953 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
68b108ee 6954 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
5996a936
DM
6955 $changes = 1;
6956 delete $conf->{$opt};
6957 }
c449137a
DC
6958
6959 $referenced->{$volid} = 1;
6960 $referencedpath->{$path} = 1 if $path;
ca04977c 6961 });
5996a936 6962
91bd6c90
DM
6963 foreach my $volid (sort keys %$volid_hash) {
6964 next if $volid =~ m/vm-$vmid-state-/;
c449137a 6965 next if $referenced->{$volid};
5996a936
DM
6966 my $path = $volid_hash->{$volid}->{path};
6967 next if !$path; # just to be sure
c449137a 6968 next if $referencedpath->{$path};
91bd6c90 6969 $changes = 1;
53b81297 6970 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
68b108ee 6971 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
c449137a 6972 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
91bd6c90
DM
6973 }
6974
6975 return $changes;
6976}
6977
6978sub rescan {
9224dcee 6979 my ($vmid, $nolock, $dryrun) = @_;
91bd6c90 6980
20519efc 6981 my $cfg = PVE::Storage::config();
91bd6c90 6982
53b81297 6983 print "rescan volumes...\n";
9a8ba127 6984 my $volid_hash = scan_volids($cfg, $vmid);
91bd6c90
DM
6985
6986 my $updatefn = sub {
6987 my ($vmid) = @_;
6988
ffda963f 6989 my $conf = PVE::QemuConfig->load_config($vmid);
be190583 6990
ffda963f 6991 PVE::QemuConfig->check_lock($conf);
91bd6c90 6992
03da3f0d
DM
6993 my $vm_volids = {};
6994 foreach my $volid (keys %$volid_hash) {
6995 my $info = $volid_hash->{$volid};
6996 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6997 }
6998
68b108ee 6999 my $changes = update_disk_config($vmid, $conf, $vm_volids);
91bd6c90 7000
9224dcee 7001 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
91bd6c90
DM
7002 };
7003
7004 if (defined($vmid)) {
7005 if ($nolock) {
7006 &$updatefn($vmid);
7007 } else {
ffda963f 7008 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
91bd6c90
DM
7009 }
7010 } else {
7011 my $vmlist = config_list();
7012 foreach my $vmid (keys %$vmlist) {
7013 if ($nolock) {
7014 &$updatefn($vmid);
7015 } else {
ffda963f 7016 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
be190583 7017 }
91bd6c90
DM
7018 }
7019 }
7020}
7021
9f3d73bc
DM
7022sub restore_proxmox_backup_archive {
7023 my ($archive, $vmid, $user, $options) = @_;
7024
7025 my $storecfg = PVE::Storage::config();
7026
7027 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7028 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7029
9f3d73bc 7030 my $fingerprint = $scfg->{fingerprint};
503e96f8 7031 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
9f3d73bc 7032
fbec3f89 7033 my $repo = PVE::PBSClient::get_repository($scfg);
2dda626d 7034 my $namespace = $scfg->{namespace};
dea4b04c 7035
26731a3c 7036 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
9f3d73bc
DM
7037 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7038 local $ENV{PBS_PASSWORD} = $password;
7039 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7040
7041 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7042 PVE::Storage::parse_volname($storecfg, $archive);
7043
7044 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7045
7046 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7047
7048 my $tmpdir = "/var/tmp/vzdumptmp$$";
7049 rmtree $tmpdir;
7050 mkpath $tmpdir;
7051
7052 my $conffile = PVE::QemuConfig->config_file($vmid);
9f3d73bc
DM
7053 # disable interrupts (always do cleanups)
7054 local $SIG{INT} =
7055 local $SIG{TERM} =
7056 local $SIG{QUIT} =
7057 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7058
7059 # Note: $oldconf is undef if VM does not exists
7060 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7061 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7062 my $new_conf_raw = '';
9f3d73bc
DM
7063
7064 my $rpcenv = PVE::RPCEnvironment::get();
e8b07b29
FE
7065 my $devinfo = {}; # info about drives included in backup
7066 my $virtdev_hash = {}; # info about allocated drives
9f3d73bc
DM
7067
7068 eval {
7069 # enable interrupts
7070 local $SIG{INT} =
7071 local $SIG{TERM} =
7072 local $SIG{QUIT} =
7073 local $SIG{HUP} =
7074 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7075
7076 my $cfgfn = "$tmpdir/qemu-server.conf";
7077 my $firewall_config_fn = "$tmpdir/fw.conf";
7078 my $index_fn = "$tmpdir/index.json";
7079
7080 my $cmd = "restore";
7081
7082 my $param = [$pbs_backup_name, "index.json", $index_fn];
7083 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7084 my $index = PVE::Tools::file_get_contents($index_fn);
7085 $index = decode_json($index);
7086
9f3d73bc
DM
7087 foreach my $info (@{$index->{files}}) {
7088 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7089 my $devname = $1;
7090 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7091 $devinfo->{$devname}->{size} = $1;
7092 } else {
7093 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7094 }
7095 }
7096 }
7097
4df98f2f
TL
7098 my $is_qemu_server_backup = scalar(
7099 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7100 );
9f3d73bc
DM
7101 if (!$is_qemu_server_backup) {
7102 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7103 }
7104 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7105
7106 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7107 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7108
7109 if ($has_firewall_config) {
7110 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7111 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7112
7113 my $pve_firewall_dir = '/etc/pve/firewall';
7114 mkdir $pve_firewall_dir; # make sure the dir exists
7115 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7116 }
7117
7118 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7119 die "unable to read qemu-server.conf - $!\n";
9f3d73bc 7120
e8b07b29 7121 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
9f3d73bc
DM
7122
7123 # fixme: rate limit?
7124
7125 # create empty/temp config
7126 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7127
7128 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7129
7130 # allocate volumes
7131 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7132
6f94e162
SR
7133 foreach my $virtdev (sort keys %$virtdev_hash) {
7134 my $d = $virtdev_hash->{$virtdev};
7135 next if $d->{is_cloudinit}; # no need to restore cloudinit
9f3d73bc 7136
55c7f9cf 7137 # this fails if storage is unavailable
6f94e162 7138 my $volid = $d->{volid};
6f94e162 7139 my $path = PVE::Storage::path($storecfg, $volid);
9f3d73bc 7140
f9dde219
SR
7141 # for live-restore we only want to preload the efidisk and TPM state
7142 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
55c7f9cf 7143
21a9ec2a
WB
7144 my @ns_arg;
7145 if (defined(my $ns = $scfg->{namespace})) {
7146 @ns_arg = ('--ns', $ns);
7147 }
7148
6f94e162
SR
7149 my $pbs_restore_cmd = [
7150 '/usr/bin/pbs-restore',
7151 '--repository', $repo,
21a9ec2a 7152 @ns_arg,
6f94e162
SR
7153 $pbs_backup_name,
7154 "$d->{devname}.img.fidx",
7155 $path,
7156 '--verbose',
7157 ];
55fb78aa 7158
6f94e162
SR
7159 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7160 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
9f3d73bc 7161
6f94e162
SR
7162 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7163 push @$pbs_restore_cmd, '--skip-zero';
26731a3c 7164 }
6f94e162
SR
7165
7166 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7167 print "restore proxmox backup image: $dbg_cmdstring\n";
7168 run_command($pbs_restore_cmd);
9f3d73bc
DM
7169 }
7170
7171 $fh->seek(0, 0) || die "seek failed - $!\n";
7172
9f3d73bc
DM
7173 my $cookie = { netcount => 0 };
7174 while (defined(my $line = <$fh>)) {
c62d7cf5 7175 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7176 $cookie,
98a4b3fb
FE
7177 $map,
7178 $line,
7179 $options->{unique},
7180 );
9f3d73bc
DM
7181 }
7182
7183 $fh->close();
9f3d73bc
DM
7184 };
7185 my $err = $@;
7186
26731a3c 7187 if ($err || !$options->{live}) {
e8b07b29 7188 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
26731a3c 7189 }
9f3d73bc
DM
7190
7191 rmtree $tmpdir;
7192
7193 if ($err) {
e8b07b29 7194 $restore_destroy_volumes->($storecfg, $virtdev_hash);
9f3d73bc
DM
7195 die $err;
7196 }
7197
f7551170
SR
7198 if ($options->{live}) {
7199 # keep lock during live-restore
7200 $new_conf_raw .= "\nlock: create";
7201 }
7202
621edb2b 7203 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
e3971865 7204 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7205 PVE::QemuConfig->write_config($vmid, $new_conf);
9f3d73bc
DM
7206
7207 eval { rescan($vmid, 1); };
7208 warn $@ if $@;
26731a3c
SR
7209
7210 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7211
7212 if ($options->{live}) {
fefd65a1
SR
7213 # enable interrupts
7214 local $SIG{INT} =
7215 local $SIG{TERM} =
7216 local $SIG{QUIT} =
7217 local $SIG{HUP} =
7218 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
26731a3c 7219
fefd65a1
SR
7220 my $conf = PVE::QemuConfig->load_config($vmid);
7221 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
26731a3c 7222
f9dde219
SR
7223 # these special drives are already restored before start
7224 delete $devinfo->{'drive-efidisk0'};
7225 delete $devinfo->{'drive-tpmstate0-backup'};
2dda626d
DC
7226
7227 my $pbs_opts = {
7228 repo => $repo,
7229 keyfile => $keyfile,
7230 snapshot => $pbs_backup_name,
7231 namespace => $namespace,
7232 };
7233 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
f7551170
SR
7234
7235 PVE::QemuConfig->remove_lock($vmid, "create");
26731a3c
SR
7236 }
7237}
7238
7239sub pbs_live_restore {
2dda626d 7240 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
26731a3c 7241
88cabb62 7242 print "starting VM for live-restore\n";
2dda626d 7243 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
26731a3c
SR
7244
7245 my $pbs_backing = {};
8986e36e 7246 for my $ds (keys %$restored_disks) {
26731a3c 7247 $ds =~ m/^drive-(.*)$/;
88cabb62
SR
7248 my $confname = $1;
7249 $pbs_backing->{$confname} = {
2dda626d
DC
7250 repository => $opts->{repo},
7251 snapshot => $opts->{snapshot},
26731a3c
SR
7252 archive => "$ds.img.fidx",
7253 };
2dda626d
DC
7254 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7255 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
88cabb62
SR
7256
7257 my $drive = parse_drive($confname, $conf->{$confname});
7258 print "restoring '$ds' to '$drive->{file}'\n";
26731a3c
SR
7259 }
7260
fd70c843 7261 my $drives_streamed = 0;
26731a3c
SR
7262 eval {
7263 # make sure HA doesn't interrupt our restore by stopping the VM
7264 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
fd70c843 7265 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
26731a3c
SR
7266 }
7267
fd70c843
TL
7268 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7269 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
bfb12678 7270 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
26731a3c 7271
26697640
SR
7272 my $qmeventd_fd = register_qmeventd_handle($vmid);
7273
26731a3c
SR
7274 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7275 # this will effectively collapse the backing image chain consisting of
7276 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7277 # removes itself once all backing images vanish with 'auto-remove=on')
7278 my $jobs = {};
8986e36e 7279 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7280 my $job_id = "restore-$ds";
7281 mon_cmd($vmid, 'block-stream',
7282 'job-id' => $job_id,
7283 device => "$ds",
7284 );
7285 $jobs->{$job_id} = {};
7286 }
7287
7288 mon_cmd($vmid, 'cont');
7289 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7290
a09b39f1
TL
7291 print "restore-drive jobs finished successfully, removing all tracking block devices"
7292 ." to disconnect from Proxmox Backup Server\n";
7293
8986e36e 7294 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7295 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7296 }
26697640
SR
7297
7298 close($qmeventd_fd);
26731a3c
SR
7299 };
7300
7301 my $err = $@;
7302
7303 if ($err) {
8b8893c3 7304 warn "An error occurred during live-restore: $err\n";
26731a3c
SR
7305 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7306 die "live-restore failed\n";
7307 }
9f3d73bc
DM
7308}
7309
91bd6c90
DM
7310sub restore_vma_archive {
7311 my ($archive, $vmid, $user, $opts, $comp) = @_;
7312
91bd6c90
DM
7313 my $readfrom = $archive;
7314
7c536e11
WB
7315 my $cfg = PVE::Storage::config();
7316 my $commands = [];
7317 my $bwlimit = $opts->{bwlimit};
7318
7319 my $dbg_cmdstring = '';
7320 my $add_pipe = sub {
7321 my ($cmd) = @_;
7322 push @$commands, $cmd;
7323 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7324 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
91bd6c90 7325 $readfrom = '-';
7c536e11
WB
7326 };
7327
7328 my $input = undef;
7329 if ($archive eq '-') {
7330 $input = '<&STDIN';
7331 } else {
7332 # If we use a backup from a PVE defined storage we also consider that
7333 # storage's rate limit:
7334 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7335 if (defined($volid)) {
7336 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7337 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7338 if ($readlimit) {
7339 print STDERR "applying read rate limit: $readlimit\n";
9444c6e4 7340 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7c536e11
WB
7341 $add_pipe->($cstream);
7342 }
7343 }
7344 }
7345
7346 if ($comp) {
c6d51783
AA
7347 my $info = PVE::Storage::decompressor_info('vma', $comp);
7348 my $cmd = $info->{decompressor};
7349 push @$cmd, $readfrom;
7c536e11 7350 $add_pipe->($cmd);
91bd6c90
DM
7351 }
7352
7353 my $tmpdir = "/var/tmp/vzdumptmp$$";
7354 rmtree $tmpdir;
7355
7356 # disable interrupts (always do cleanups)
5b97ef24
TL
7357 local $SIG{INT} =
7358 local $SIG{TERM} =
7359 local $SIG{QUIT} =
7360 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
91bd6c90
DM
7361
7362 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7363 POSIX::mkfifo($mapfifo, 0600);
7364 my $fifofh;
808a65b5 7365 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
91bd6c90 7366
7c536e11 7367 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
91bd6c90
DM
7368
7369 my $oldtimeout;
7370 my $timeout = 5;
7371
e8b07b29
FE
7372 my $devinfo = {}; # info about drives included in backup
7373 my $virtdev_hash = {}; # info about allocated drives
91bd6c90
DM
7374
7375 my $rpcenv = PVE::RPCEnvironment::get();
7376
ffda963f 7377 my $conffile = PVE::QemuConfig->config_file($vmid);
91bd6c90 7378
ae200950 7379 # Note: $oldconf is undef if VM does not exist
ffda963f
FG
7380 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7381 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7382 my $new_conf_raw = '';
ed221350 7383
7c536e11
WB
7384 my %storage_limits;
7385
91bd6c90 7386 my $print_devmap = sub {
91bd6c90
DM
7387 my $cfgfn = "$tmpdir/qemu-server.conf";
7388
7389 # we can read the config - that is already extracted
7390 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7391 die "unable to read qemu-server.conf - $!\n";
91bd6c90 7392
6738ab9c 7393 my $fwcfgfn = "$tmpdir/qemu-server.fw";
3457d090
WL
7394 if (-f $fwcfgfn) {
7395 my $pve_firewall_dir = '/etc/pve/firewall';
7396 mkdir $pve_firewall_dir; # make sure the dir exists
7397 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7398 }
6738ab9c 7399
e8b07b29 7400 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
91bd6c90 7401
c8964278
FE
7402 foreach my $info (values %{$virtdev_hash}) {
7403 my $storeid = $info->{storeid};
7404 next if defined($storage_limits{$storeid});
7405
7406 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7407 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7408 $storage_limits{$storeid} = $limit * 1024;
7c536e11
WB
7409 }
7410
91bd6c90 7411 foreach my $devname (keys %$devinfo) {
be190583
DM
7412 die "found no device mapping information for device '$devname'\n"
7413 if !$devinfo->{$devname}->{virtdev};
91bd6c90
DM
7414 }
7415
ed221350 7416 # create empty/temp config
be190583 7417 if ($oldconf) {
ed221350 7418 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
d1e92cf6 7419 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
ed221350
DM
7420 }
7421
9f3d73bc
DM
7422 # allocate volumes
7423 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7424
7425 # print restore information to $fifofh
91bd6c90
DM
7426 foreach my $virtdev (sort keys %$virtdev_hash) {
7427 my $d = $virtdev_hash->{$virtdev};
9f3d73bc
DM
7428 next if $d->{is_cloudinit}; # no need to restore cloudinit
7429
7c536e11 7430 my $storeid = $d->{storeid};
9f3d73bc 7431 my $volid = $d->{volid};
7c536e11
WB
7432
7433 my $map_opts = '';
7434 if (my $limit = $storage_limits{$storeid}) {
7435 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7436 }
8fd57431 7437
91bd6c90 7438 my $write_zeros = 1;
88240a83 7439 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
91bd6c90
DM
7440 $write_zeros = 0;
7441 }
7442
9f3d73bc 7443 my $path = PVE::Storage::path($cfg, $volid);
87056e18 7444
9f3d73bc 7445 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
91bd6c90 7446
9f3d73bc 7447 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
91bd6c90
DM
7448 }
7449
7450 $fh->seek(0, 0) || die "seek failed - $!\n";
7451
91bd6c90
DM
7452 my $cookie = { netcount => 0 };
7453 while (defined(my $line = <$fh>)) {
c62d7cf5 7454 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7455 $cookie,
98a4b3fb
FE
7456 $map,
7457 $line,
7458 $opts->{unique},
7459 );
91bd6c90
DM
7460 }
7461
7462 $fh->close();
91bd6c90
DM
7463 };
7464
7465 eval {
7466 # enable interrupts
6cb0144a
EK
7467 local $SIG{INT} =
7468 local $SIG{TERM} =
7469 local $SIG{QUIT} =
7470 local $SIG{HUP} =
7471 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
91bd6c90
DM
7472 local $SIG{ALRM} = sub { die "got timeout\n"; };
7473
7474 $oldtimeout = alarm($timeout);
7475
7476 my $parser = sub {
7477 my $line = shift;
7478
7479 print "$line\n";
7480
7481 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7482 my ($dev_id, $size, $devname) = ($1, $2, $3);
7483 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7484 } elsif ($line =~ m/^CTIME: /) {
46f58b5f 7485 # we correctly received the vma config, so we can disable
3cf90d7a
DM
7486 # the timeout now for disk allocation (set to 10 minutes, so
7487 # that we always timeout if something goes wrong)
7488 alarm(600);
91bd6c90
DM
7489 &$print_devmap();
7490 print $fifofh "done\n";
7491 my $tmp = $oldtimeout || 0;
7492 $oldtimeout = undef;
7493 alarm($tmp);
7494 close($fifofh);
808a65b5 7495 $fifofh = undef;
91bd6c90
DM
7496 }
7497 };
be190583 7498
7c536e11
WB
7499 print "restore vma archive: $dbg_cmdstring\n";
7500 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
91bd6c90
DM
7501 };
7502 my $err = $@;
7503
7504 alarm($oldtimeout) if $oldtimeout;
7505
e8b07b29 7506 $restore_deactivate_volumes->($cfg, $virtdev_hash);
5f96f4df 7507
808a65b5 7508 close($fifofh) if $fifofh;
91bd6c90 7509 unlink $mapfifo;
9f3d73bc 7510 rmtree $tmpdir;
91bd6c90
DM
7511
7512 if ($err) {
e8b07b29 7513 $restore_destroy_volumes->($cfg, $virtdev_hash);
91bd6c90
DM
7514 die $err;
7515 }
7516
621edb2b 7517 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
e3971865 7518 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7519 PVE::QemuConfig->write_config($vmid, $new_conf);
ed221350 7520
91bd6c90
DM
7521 eval { rescan($vmid, 1); };
7522 warn $@ if $@;
26731a3c
SR
7523
7524 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
91bd6c90
DM
7525}
7526
7527sub restore_tar_archive {
7528 my ($archive, $vmid, $user, $opts) = @_;
7529
202a2a0b
FE
7530 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7531 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7532 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7533 }
7534
9c502e26 7535 if ($archive ne '-') {
ed221350 7536 my $firstfile = tar_archive_read_firstfile($archive);
32e54050 7537 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
9c502e26
DM
7538 if $firstfile ne 'qemu-server.conf';
7539 }
3e16d5fc 7540
20519efc 7541 my $storecfg = PVE::Storage::config();
ebb55558 7542
4b026937
TL
7543 # avoid zombie disks when restoring over an existing VM -> cleanup first
7544 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7545 # skiplock=1 because qmrestore has set the 'create' lock itself already
ffda963f 7546 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
b04ea584 7547 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
ed221350 7548
3e16d5fc
DM
7549 my $tocmd = "/usr/lib/qemu-server/qmextract";
7550
2415a446 7551 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
a0d1b1a2 7552 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
3e16d5fc
DM
7553 $tocmd .= ' --prealloc' if $opts->{prealloc};
7554 $tocmd .= ' --info' if $opts->{info};
7555
a0d1b1a2 7556 # tar option "xf" does not autodetect compression when read from STDIN,
9c502e26 7557 # so we pipe to zcat
2415a446
DM
7558 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7559 PVE::Tools::shellquote("--to-command=$tocmd");
3e16d5fc
DM
7560
7561 my $tmpdir = "/var/tmp/vzdumptmp$$";
7562 mkpath $tmpdir;
7563
7564 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7565 local $ENV{VZDUMP_VMID} = $vmid;
a0d1b1a2 7566 local $ENV{VZDUMP_USER} = $user;
3e16d5fc 7567
ffda963f 7568 my $conffile = PVE::QemuConfig->config_file($vmid);
98a4b3fb 7569 my $new_conf_raw = '';
3e16d5fc
DM
7570
7571 # disable interrupts (always do cleanups)
6cb0144a
EK
7572 local $SIG{INT} =
7573 local $SIG{TERM} =
7574 local $SIG{QUIT} =
7575 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
3e16d5fc 7576
afdb31d5 7577 eval {
3e16d5fc 7578 # enable interrupts
6cb0144a
EK
7579 local $SIG{INT} =
7580 local $SIG{TERM} =
7581 local $SIG{QUIT} =
7582 local $SIG{HUP} =
7583 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
3e16d5fc 7584
9c502e26
DM
7585 if ($archive eq '-') {
7586 print "extracting archive from STDIN\n";
7587 run_command($cmd, input => "<&STDIN");
7588 } else {
7589 print "extracting archive '$archive'\n";
7590 run_command($cmd);
7591 }
3e16d5fc
DM
7592
7593 return if $opts->{info};
7594
7595 # read new mapping
7596 my $map = {};
7597 my $statfile = "$tmpdir/qmrestore.stat";
7598 if (my $fd = IO::File->new($statfile, "r")) {
7599 while (defined (my $line = <$fd>)) {
7600 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7601 $map->{$1} = $2 if $1;
7602 } else {
7603 print STDERR "unable to parse line in statfile - $line\n";
7604 }
7605 }
7606 $fd->close();
7607 }
7608
7609 my $confsrc = "$tmpdir/qemu-server.conf";
7610
f7d1505b 7611 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
3e16d5fc 7612
91bd6c90 7613 my $cookie = { netcount => 0 };
3e16d5fc 7614 while (defined (my $line = <$srcfd>)) {
c62d7cf5 7615 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7616 $cookie,
98a4b3fb
FE
7617 $map,
7618 $line,
7619 $opts->{unique},
7620 );
3e16d5fc
DM
7621 }
7622
7623 $srcfd->close();
3e16d5fc 7624 };
7dc7f315 7625 if (my $err = $@) {
ed221350 7626 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
3e16d5fc 7627 die $err;
afdb31d5 7628 }
3e16d5fc
DM
7629
7630 rmtree $tmpdir;
7631
98a4b3fb 7632 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
91bd6c90 7633
ed221350
DM
7634 PVE::Cluster::cfs_update(); # make sure we read new file
7635
91bd6c90
DM
7636 eval { rescan($vmid, 1); };
7637 warn $@ if $@;
3e16d5fc
DM
7638};
7639
65a5ce88 7640sub foreach_storage_used_by_vm {
18bfb361
DM
7641 my ($conf, $func) = @_;
7642
7643 my $sidhash = {};
7644
912792e2 7645 PVE::QemuConfig->foreach_volume($conf, sub {
8ddbcf8b
FG
7646 my ($ds, $drive) = @_;
7647 return if drive_is_cdrom($drive);
18bfb361
DM
7648
7649 my $volid = $drive->{file};
7650
7651 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
be190583 7652 $sidhash->{$sid} = $sid if $sid;
8ddbcf8b 7653 });
18bfb361
DM
7654
7655 foreach my $sid (sort keys %$sidhash) {
7656 &$func($sid);
7657 }
7658}
7659
6c9f59c1
TL
7660my $qemu_snap_storage = {
7661 rbd => 1,
7662};
e5eaa028 7663sub do_snapshots_with_qemu {
9d83932d
SR
7664 my ($storecfg, $volid, $deviceid) = @_;
7665
7666 return if $deviceid =~ m/tpmstate0/;
e5eaa028
WL
7667
7668 my $storage_name = PVE::Storage::parse_volume_id($volid);
8aa2ed7c 7669 my $scfg = $storecfg->{ids}->{$storage_name};
f7d1505b 7670 die "could not find storage '$storage_name'\n" if !defined($scfg);
e5eaa028 7671
8aa2ed7c 7672 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
e5eaa028
WL
7673 return 1;
7674 }
7675
7676 if ($volid =~ m/\.(qcow2|qed)$/){
7677 return 1;
7678 }
7679
d1c1af4b 7680 return;
e5eaa028
WL
7681}
7682
4dcc780c 7683sub qga_check_running {
a4938c72 7684 my ($vmid, $nowarn) = @_;
4dcc780c 7685
0a13e08e 7686 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
4dcc780c 7687 if ($@) {
7bd9abd2 7688 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
4dcc780c
WL
7689 return 0;
7690 }
7691 return 1;
7692}
7693
04a69bb4
AD
7694sub template_create {
7695 my ($vmid, $conf, $disk) = @_;
7696
04a69bb4 7697 my $storecfg = PVE::Storage::config();
04a69bb4 7698
912792e2 7699 PVE::QemuConfig->foreach_volume($conf, sub {
9cd07842
DM
7700 my ($ds, $drive) = @_;
7701
7702 return if drive_is_cdrom($drive);
7703 return if $disk && $ds ne $disk;
7704
7705 my $volid = $drive->{file};
bbd56097 7706 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
9cd07842 7707
04a69bb4
AD
7708 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7709 $drive->{file} = $voliddst;
71c58bb7 7710 $conf->{$ds} = print_drive($drive);
ffda963f 7711 PVE::QemuConfig->write_config($vmid, $conf);
04a69bb4 7712 });
04a69bb4
AD
7713}
7714
92bdc3f0
DC
7715sub convert_iscsi_path {
7716 my ($path) = @_;
7717
7718 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7719 my $portal = $1;
7720 my $target = $2;
7721 my $lun = $3;
7722
7723 my $initiator_name = get_initiator_name();
7724
7725 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7726 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7727 }
7728
7729 die "cannot convert iscsi path '$path', unkown format\n";
7730}
7731
5133de42 7732sub qemu_img_convert {
56d16f16 7733 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
5133de42
AD
7734
7735 my $storecfg = PVE::Storage::config();
7736 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7737 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7738
af1f1ec0 7739 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
6bb91c17 7740
af1f1ec0
DC
7741 my $cachemode;
7742 my $src_path;
7743 my $src_is_iscsi = 0;
bdd1feef 7744 my $src_format;
6bb91c17 7745
af1f1ec0
DC
7746 if ($src_storeid) {
7747 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
5133de42 7748 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
af1f1ec0
DC
7749 $src_format = qemu_img_format($src_scfg, $src_volname);
7750 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7751 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7752 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
a23d57d5 7753 } elsif (-f $src_volid || -b $src_volid) {
af1f1ec0 7754 $src_path = $src_volid;
e0fd2b2f 7755 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
af1f1ec0
DC
7756 $src_format = $1;
7757 }
7758 }
5133de42 7759
af1f1ec0 7760 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
5133de42 7761
af1f1ec0
DC
7762 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7763 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7764 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7765 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
5133de42 7766
af1f1ec0
DC
7767 my $cmd = [];
7768 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
bdd1feef
TL
7769 push @$cmd, '-l', "snapshot.name=$snapname"
7770 if $snapname && $src_format && $src_format eq "qcow2";
af1f1ec0
DC
7771 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7772 push @$cmd, '-T', $cachemode if defined($cachemode);
56d16f16 7773 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
af1f1ec0
DC
7774
7775 if ($src_is_iscsi) {
7776 push @$cmd, '--image-opts';
7777 $src_path = convert_iscsi_path($src_path);
bdd1feef 7778 } elsif ($src_format) {
af1f1ec0
DC
7779 push @$cmd, '-f', $src_format;
7780 }
92bdc3f0 7781
af1f1ec0
DC
7782 if ($dst_is_iscsi) {
7783 push @$cmd, '--target-image-opts';
7784 $dst_path = convert_iscsi_path($dst_path);
7785 } else {
7786 push @$cmd, '-O', $dst_format;
7787 }
92bdc3f0 7788
af1f1ec0 7789 push @$cmd, $src_path;
92bdc3f0 7790
af1f1ec0
DC
7791 if (!$dst_is_iscsi && $is_zero_initialized) {
7792 push @$cmd, "zeroinit:$dst_path";
7793 } else {
7794 push @$cmd, $dst_path;
7795 }
92bdc3f0 7796
af1f1ec0
DC
7797 my $parser = sub {
7798 my $line = shift;
7799 if($line =~ m/\((\S+)\/100\%\)/){
7800 my $percent = $1;
7801 my $transferred = int($size * $percent / 100);
b5e9d97b
TL
7802 my $total_h = render_bytes($size, 1);
7803 my $transferred_h = render_bytes($transferred, 1);
92bdc3f0 7804
6629f976 7805 print "transferred $transferred_h of $total_h ($percent%)\n";
988e2714 7806 }
5133de42 7807
af1f1ec0 7808 };
5133de42 7809
af1f1ec0
DC
7810 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7811 my $err = $@;
7812 die "copy failed: $err" if $err;
5133de42
AD
7813}
7814
7815sub qemu_img_format {
7816 my ($scfg, $volname) = @_;
7817
e0fd2b2f 7818 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
5133de42 7819 return $1;
be190583 7820 } else {
5133de42 7821 return "raw";
5133de42
AD
7822 }
7823}
7824
cfad42af 7825sub qemu_drive_mirror {
bc6c8231 7826 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
cfad42af 7827
5a345967
AD
7828 $jobs = {} if !$jobs;
7829
7830 my $qemu_target;
7831 my $format;
35e4ab04 7832 $jobs->{"drive-$drive"} = {};
152fe752 7833
1e5143de 7834 if ($dst_volid =~ /^nbd:/) {
87955688 7835 $qemu_target = $dst_volid;
5a345967 7836 $format = "nbd";
5a345967 7837 } else {
5a345967
AD
7838 my $storecfg = PVE::Storage::config();
7839 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7840
7841 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
cfad42af 7842
5a345967 7843 $format = qemu_img_format($dst_scfg, $dst_volname);
21ccdb50 7844
5a345967 7845 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
21ccdb50 7846
5a345967
AD
7847 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7848 }
988e2714
WB
7849
7850 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
88383920
DM
7851 $opts->{format} = $format if $format;
7852
bc6c8231
FG
7853 if (defined($src_bitmap)) {
7854 $opts->{sync} = 'incremental';
7855 $opts->{bitmap} = $src_bitmap;
7856 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7857 }
7858
9fa05d31 7859 if (defined($bwlimit)) {
f6409f61
TL
7860 $opts->{speed} = $bwlimit * 1024;
7861 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
9fa05d31
SI
7862 } else {
7863 print "drive mirror is starting for drive-$drive\n";
7864 }
21ccdb50 7865
6dde5ea2 7866 # if a job already runs for this device we get an error, catch it for cleanup
0a13e08e 7867 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
5a345967
AD
7868 if (my $err = $@) {
7869 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
6dde5ea2
TL
7870 warn "$@\n" if $@;
7871 die "mirroring error: $err\n";
5a345967
AD
7872 }
7873
e02fb126 7874 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
5a345967
AD
7875}
7876
db1f8b39
FG
7877# $completion can be either
7878# 'complete': wait until all jobs are ready, block-job-complete them (default)
7879# 'cancel': wait until all jobs are ready, block-job-cancel them
7880# 'skip': wait until all jobs are ready, return with block jobs in ready state
9e671722 7881# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
5a345967 7882sub qemu_drive_mirror_monitor {
9e671722 7883 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
e02fb126 7884
db1f8b39 7885 $completion //= 'complete';
9e671722 7886 $op //= "mirror";
2e953867 7887
08ac653f 7888 eval {
5a345967
AD
7889 my $err_complete = 0;
7890
3b56383b 7891 my $starttime = time ();
08ac653f 7892 while (1) {
9e671722 7893 die "block job ('$op') timed out\n" if $err_complete > 300;
5a345967 7894
0a13e08e 7895 my $stats = mon_cmd($vmid, "query-block-jobs");
3b56383b 7896 my $ctime = time();
08ac653f 7897
9e671722 7898 my $running_jobs = {};
0ea24bf0 7899 for my $stat (@$stats) {
9e671722
SR
7900 next if $stat->{type} ne $op;
7901 $running_jobs->{$stat->{device}} = $stat;
5a345967 7902 }
08ac653f 7903
5a345967 7904 my $readycounter = 0;
67fb9de6 7905
0ea24bf0 7906 for my $job_id (sort keys %$jobs) {
1057fc74 7907 my $job = $running_jobs->{$job_id};
5a345967 7908
1057fc74 7909 my $vanished = !defined($job);
0ea24bf0 7910 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
9e671722 7911 if($complete || ($vanished && $completion eq 'auto')) {
3b56383b 7912 print "$job_id: $op-job finished\n";
0ea24bf0 7913 delete $jobs->{$job_id};
5a345967
AD
7914 next;
7915 }
7916
1057fc74 7917 die "$job_id: '$op' has been cancelled\n" if !defined($job);
f34ebd52 7918
1057fc74
TL
7919 my $busy = $job->{busy};
7920 my $ready = $job->{ready};
7921 if (my $total = $job->{len}) {
7922 my $transferred = $job->{offset} || 0;
5a345967
AD
7923 my $remaining = $total - $transferred;
7924 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
08ac653f 7925
3b56383b
TL
7926 my $duration = $ctime - $starttime;
7927 my $total_h = render_bytes($total, 1);
7928 my $transferred_h = render_bytes($transferred, 1);
7929
7930 my $status = sprintf(
7931 "transferred $transferred_h of $total_h ($percent%%) in %s",
7932 render_duration($duration),
7933 );
7934
7935 if ($ready) {
7936 if ($busy) {
7937 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7938 } else {
7939 $status .= ", ready";
7940 }
7941 }
67daf692
TL
7942 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7943 $jobs->{$job_id}->{ready} = $ready;
5a345967 7944 }
f34ebd52 7945
1057fc74 7946 $readycounter++ if $job->{ready};
5a345967 7947 }
b467f79a 7948
5a345967
AD
7949 last if scalar(keys %$jobs) == 0;
7950
7951 if ($readycounter == scalar(keys %$jobs)) {
9e671722
SR
7952 print "all '$op' jobs are ready\n";
7953
7954 # do the complete later (or has already been done)
7955 last if $completion eq 'skip' || $completion eq 'auto';
5a345967
AD
7956
7957 if ($vmiddst && $vmiddst != $vmid) {
1a988fd2
DC
7958 my $agent_running = $qga && qga_check_running($vmid);
7959 if ($agent_running) {
5619e74a 7960 print "freeze filesystem\n";
0a13e08e 7961 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
d6cdfae4 7962 warn $@ if $@;
5619e74a
AD
7963 } else {
7964 print "suspend vm\n";
7965 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
d6cdfae4 7966 warn $@ if $@;
5619e74a
AD
7967 }
7968
5a345967
AD
7969 # if we clone a disk for a new target vm, we don't switch the disk
7970 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
5619e74a 7971
1a988fd2 7972 if ($agent_running) {
5619e74a 7973 print "unfreeze filesystem\n";
0a13e08e 7974 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
d6cdfae4 7975 warn $@ if $@;
5619e74a
AD
7976 } else {
7977 print "resume vm\n";
d6cdfae4
FE
7978 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7979 warn $@ if $@;
5619e74a
AD
7980 }
7981
2e953867 7982 last;
5a345967
AD
7983 } else {
7984
0ea24bf0 7985 for my $job_id (sort keys %$jobs) {
5a345967 7986 # try to switch the disk if source and destination are on the same guest
0ea24bf0 7987 print "$job_id: Completing block job_id...\n";
5a345967 7988
e02fb126 7989 my $op;
db1f8b39 7990 if ($completion eq 'complete') {
e02fb126 7991 $op = 'block-job-complete';
db1f8b39 7992 } elsif ($completion eq 'cancel') {
e02fb126
ML
7993 $op = 'block-job-cancel';
7994 } else {
7995 die "invalid completion value: $completion\n";
7996 }
0ea24bf0 7997 eval { mon_cmd($vmid, $op, device => $job_id) };
5a345967 7998 if ($@ =~ m/cannot be completed/) {
3b56383b 7999 print "$job_id: block job cannot be completed, trying again.\n";
5a345967
AD
8000 $err_complete++;
8001 }else {
0ea24bf0
TL
8002 print "$job_id: Completed successfully.\n";
8003 $jobs->{$job_id}->{complete} = 1;
5a345967
AD
8004 }
8005 }
2e953867 8006 }
08ac653f 8007 }
08ac653f 8008 sleep 1;
cfad42af 8009 }
08ac653f 8010 };
88383920 8011 my $err = $@;
08ac653f 8012
88383920 8013 if ($err) {
5a345967 8014 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
3b56383b 8015 die "block job ($op) error: $err";
88383920 8016 }
5a345967
AD
8017}
8018
8019sub qemu_blockjobs_cancel {
8020 my ($vmid, $jobs) = @_;
8021
8022 foreach my $job (keys %$jobs) {
bd2d5fe6 8023 print "$job: Cancelling block job\n";
0a13e08e 8024 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
5a345967
AD
8025 $jobs->{$job}->{cancel} = 1;
8026 }
8027
8028 while (1) {
0a13e08e 8029 my $stats = mon_cmd($vmid, "query-block-jobs");
5a345967
AD
8030
8031 my $running_jobs = {};
8032 foreach my $stat (@$stats) {
8033 $running_jobs->{$stat->{device}} = $stat;
8034 }
8035
8036 foreach my $job (keys %$jobs) {
8037
bd2d5fe6
WB
8038 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8039 print "$job: Done.\n";
5a345967
AD
8040 delete $jobs->{$job};
8041 }
8042 }
8043
8044 last if scalar(keys %$jobs) == 0;
8045
8046 sleep 1;
cfad42af
AD
8047 }
8048}
8049
8fbae1dc
FE
8050# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8051# source, but some storages have problems with io_uring, sometimes even leading to crashes.
8052my sub clone_disk_check_io_uring {
8053 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8054
8055 return if !$use_drive_mirror;
8056
8057 # Don't complain when not changing storage.
8058 # Assume if it works for the source, it'll work for the target too.
8059 return if $src_storeid eq $dst_storeid;
8060
8061 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8062 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8063
8064 my $cache_direct = drive_uses_cache_direct($src_drive);
8065
8066 my $src_uses_io_uring;
8067 if ($src_drive->{aio}) {
8068 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8069 } else {
8070 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8071 }
8072
8073 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8074 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8075}
8076
152fe752 8077sub clone_disk {
1196086f
FE
8078 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8079
8080 my ($vmid, $running) = $source->@{qw(vmid running)};
25166060 8081 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
1196086f 8082
25166060 8083 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
1196086f 8084 my ($storage, $format) = $dest->@{qw(storage format)};
152fe752 8085
5f957592
FE
8086 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8087
25166060
FE
8088 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8089 die "cloning from/to EFI disk requires EFI disk\n"
8090 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8091 die "cloning from/to TPM state requires TPM state\n"
8092 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
5f957592
FE
8093
8094 # This would lead to two device nodes in QEMU pointing to the same backing image!
8095 die "cannot change drive name when cloning disk from/to the same VM\n"
8096 if $use_drive_mirror && $vmid == $newvmid;
25166060
FE
8097 }
8098
1d1f8f9a
FE
8099 die "cannot move TPM state while VM is running\n"
8100 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8101
152fe752
DM
8102 my $newvolid;
8103
25166060
FE
8104 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8105 print "$src_drivename " if $src_drivename;
8106 print "($drive->{file})\n";
8107
152fe752 8108 if (!$full) {
258e646c 8109 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
152fe752
DM
8110 push @$newvollist, $newvolid;
8111 } else {
8fbae1dc
FE
8112 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8113 my $storeid = $storage || $src_storeid;
152fe752 8114
44549149 8115 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
152fe752 8116
931432bd 8117 my $name = undef;
d0abc774 8118 my $size = undef;
7fe8b44c
TL
8119 if (drive_is_cloudinit($drive)) {
8120 $name = "vm-$newvmid-cloudinit";
c997e24a
ML
8121 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8122 if ($scfg->{path}) {
8123 $name .= ".$dst_format";
8124 }
7fe8b44c
TL
8125 $snapname = undef;
8126 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
25166060 8127 } elsif ($dst_drivename eq 'efidisk0') {
7344af7b 8128 $size = $efisize or die "internal error - need to specify EFI disk size\n";
25166060 8129 } elsif ($dst_drivename eq 'tpmstate0') {
5f5aba25 8130 $dst_format = 'raw';
f9dde219 8131 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
d0abc774 8132 } else {
8fbae1dc
FE
8133 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8134
efa3aa24 8135 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7fe8b44c 8136 }
b5688f69
FE
8137 $newvolid = PVE::Storage::vdisk_alloc(
8138 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8139 );
152fe752
DM
8140 push @$newvollist, $newvolid;
8141
3999f370 8142 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
1dbd6d30 8143
7fe8b44c 8144 if (drive_is_cloudinit($drive)) {
1b485263
ML
8145 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8146 # if this is the case, we have to complete any block-jobs still there from
8147 # previous drive-mirrors
8148 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8149 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8150 }
7fe8b44c
TL
8151 goto no_data_clone;
8152 }
8153
988e2714 8154 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
5f957592 8155 if ($use_drive_mirror) {
5f957592
FE
8156 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8157 $completion, $qga, $bwlimit);
8158 } else {
25166060 8159 if ($dst_drivename eq 'efidisk0') {
818ce80e
DC
8160 # the relevant data on the efidisk may be smaller than the source
8161 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8162 # that is given by the OVMF_VARS.fd
62375438 8163 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
818ce80e 8164 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
fdfdc80e 8165
62375438
FE
8166 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8167
fdfdc80e
FE
8168 # better for Ceph if block size is not too small, see bug #3324
8169 my $bs = 1024*1024;
8170
62375438 8171 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
a9c45bd4
FE
8172
8173 if ($src_format eq 'qcow2' && $snapname) {
8174 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8175 if !min_version(kvm_user_version(), 6, 2);
8176 push $cmd->@*, '-l', $snapname;
8177 }
62375438
FE
8178 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8179 run_command($cmd);
818ce80e 8180 } else {
56d16f16 8181 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
818ce80e 8182 }
be190583 8183 }
152fe752
DM
8184 }
8185
7fe8b44c 8186no_data_clone:
efa3aa24 8187 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
152fe752 8188
3b53c471
FE
8189 my $disk = dclone($drive);
8190 delete $disk->{format};
152fe752 8191 $disk->{file} = $newvolid;
3bae384f 8192 $disk->{size} = $size if defined($size);
152fe752
DM
8193
8194 return $disk;
8195}
8196
98cfd8b6
AD
8197sub get_running_qemu_version {
8198 my ($vmid) = @_;
0a13e08e 8199 my $res = mon_cmd($vmid, "query-version");
98cfd8b6
AD
8200 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8201}
8202
249c4a6c
AD
8203sub qemu_use_old_bios_files {
8204 my ($machine_type) = @_;
8205
8206 return if !$machine_type;
8207
8208 my $use_old_bios_files = undef;
8209
8210 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8211 $machine_type = $1;
8212 $use_old_bios_files = 1;
8213 } else {
4df98f2f 8214 my $version = extract_version($machine_type, kvm_user_version());
249c4a6c
AD
8215 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8216 # load new efi bios files on migration. So this hack is required to allow
8217 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8218 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
2ea5fb7e 8219 $use_old_bios_files = !min_version($version, 2, 4);
249c4a6c
AD
8220 }
8221
8222 return ($use_old_bios_files, $machine_type);
8223}
8224
818ce80e 8225sub get_efivars_size {
ff84f0e3
FE
8226 my ($conf, $efidisk) = @_;
8227
818ce80e 8228 my $arch = get_vm_arch($conf);
ff84f0e3 8229 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
90b20b15
DC
8230 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8231 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
818ce80e
DC
8232 return -s $ovmf_vars;
8233}
8234
8235sub update_efidisk_size {
8236 my ($conf) = @_;
8237
8238 return if !defined($conf->{efidisk0});
8239
8240 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8241 $disk->{size} = get_efivars_size($conf);
8242 $conf->{efidisk0} = print_drive($disk);
8243
8244 return;
8245}
8246
f9dde219
SR
8247sub update_tpmstate_size {
8248 my ($conf) = @_;
8249
8250 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8251 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8252 $conf->{tpmstate0} = print_drive($disk);
8253}
8254
90b20b15
DC
8255sub create_efidisk($$$$$$$) {
8256 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
3e1f1122 8257
90b20b15 8258 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
3e1f1122 8259
af1f1ec0
DC
8260 my $vars_size_b = -s $ovmf_vars;
8261 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
3e1f1122
TL
8262 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8263 PVE::Storage::activate_volumes($storecfg, [$volid]);
8264
af1f1ec0 8265 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
efa3aa24 8266 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
3e1f1122 8267
340dbcf7 8268 return ($volid, $size/1024);
3e1f1122
TL
8269}
8270
22de899a
AD
8271sub vm_iothreads_list {
8272 my ($vmid) = @_;
8273
0a13e08e 8274 my $res = mon_cmd($vmid, 'query-iothreads');
22de899a
AD
8275
8276 my $iothreads = {};
8277 foreach my $iothread (@$res) {
8278 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8279 }
8280
8281 return $iothreads;
8282}
8283
ee034f5c
AD
8284sub scsihw_infos {
8285 my ($conf, $drive) = @_;
8286
8287 my $maxdev = 0;
8288
7fe1b688 8289 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
ee034f5c 8290 $maxdev = 7;
a1511b3c 8291 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
ee034f5c
AD
8292 $maxdev = 1;
8293 } else {
8294 $maxdev = 256;
8295 }
8296
8297 my $controller = int($drive->{index} / $maxdev);
4df98f2f
TL
8298 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8299 ? "virtioscsi"
8300 : "scsihw";
ee034f5c
AD
8301
8302 return ($maxdev, $controller, $controller_prefix);
8303}
a1511b3c 8304
44549149
EK
8305sub resolve_dst_disk_format {
8306 my ($storecfg, $storeid, $src_volname, $format) = @_;
8307 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8308
8309 if (!$format) {
8310 # if no target format is specified, use the source disk format as hint
8311 if ($src_volname) {
8312 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8313 $format = qemu_img_format($scfg, $src_volname);
8314 } else {
8315 return $defFormat;
8316 }
8317 }
8318
8319 # test if requested format is supported - else use default
8320 my $supported = grep { $_ eq $format } @$validFormats;
8321 $format = $defFormat if !$supported;
8322 return $format;
8323}
8324
66cebc46
DC
8325# NOTE: if this logic changes, please update docs & possibly gui logic
8326sub find_vmstate_storage {
8327 my ($conf, $storecfg) = @_;
8328
8329 # first, return storage from conf if set
8330 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8331
8332 my ($target, $shared, $local);
8333
8334 foreach_storage_used_by_vm($conf, sub {
8335 my ($sid) = @_;
8336 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8337 my $dst = $scfg->{shared} ? \$shared : \$local;
8338 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8339 });
8340
8341 # second, use shared storage where VM has at least one disk
8342 # third, use local storage where VM has at least one disk
8343 # fall back to local storage
8344 $target = $shared // $local // 'local';
8345
8346 return $target;
8347}
8348
6ee499ff 8349sub generate_uuid {
ae2fcb3b
EK
8350 my ($uuid, $uuid_str);
8351 UUID::generate($uuid);
8352 UUID::unparse($uuid, $uuid_str);
6ee499ff
DC
8353 return $uuid_str;
8354}
8355
8356sub generate_smbios1_uuid {
8357 return "uuid=".generate_uuid();
ae2fcb3b
EK
8358}
8359
9c152e87
TL
8360sub nbd_stop {
8361 my ($vmid) = @_;
8362
0a13e08e 8363 mon_cmd($vmid, 'nbd-server-stop');
9c152e87
TL
8364}
8365
dae98db9
DC
8366sub create_reboot_request {
8367 my ($vmid) = @_;
8368 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8369 or die "failed to create reboot trigger file: $!\n";
8370 close($fh);
8371}
8372
8373sub clear_reboot_request {
8374 my ($vmid) = @_;
8375 my $path = "/run/qemu-server/$vmid.reboot";
8376 my $res = 0;
8377
8378 $res = unlink($path);
8379 die "could not remove reboot request for $vmid: $!"
8380 if !$res && $! != POSIX::ENOENT;
8381
8382 return $res;
8383}
8384
5cfa9f5f
SR
8385sub bootorder_from_legacy {
8386 my ($conf, $bootcfg) = @_;
8387
8388 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8389 my $bootindex_hash = {};
8390 my $i = 1;
8391 foreach my $o (split(//, $boot)) {
8392 $bootindex_hash->{$o} = $i*100;
8393 $i++;
8394 }
8395
8396 my $bootorder = {};
8397
8398 PVE::QemuConfig->foreach_volume($conf, sub {
8399 my ($ds, $drive) = @_;
8400
8401 if (drive_is_cdrom ($drive, 1)) {
8402 if ($bootindex_hash->{d}) {
8403 $bootorder->{$ds} = $bootindex_hash->{d};
8404 $bootindex_hash->{d} += 1;
8405 }
8406 } elsif ($bootindex_hash->{c}) {
8407 $bootorder->{$ds} = $bootindex_hash->{c}
8408 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8409 $bootindex_hash->{c} += 1;
8410 }
8411 });
8412
8413 if ($bootindex_hash->{n}) {
8414 for (my $i = 0; $i < $MAX_NETS; $i++) {
8415 my $netname = "net$i";
8416 next if !$conf->{$netname};
8417 $bootorder->{$netname} = $bootindex_hash->{n};
8418 $bootindex_hash->{n} += 1;
8419 }
8420 }
8421
8422 return $bootorder;
8423}
8424
8425# Generate default device list for 'boot: order=' property. Matches legacy
8426# default boot order, but with explicit device names. This is important, since
8427# the fallback for when neither 'order' nor the old format is specified relies
8428# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8429sub get_default_bootdevices {
8430 my ($conf) = @_;
8431
8432 my @ret = ();
8433
8434 # harddisk
8435 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8436 push @ret, $first if $first;
8437
8438 # cdrom
8439 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8440 push @ret, $first if $first;
8441
8442 # network
8443 for (my $i = 0; $i < $MAX_NETS; $i++) {
8444 my $netname = "net$i";
8445 next if !$conf->{$netname};
8446 push @ret, $netname;
8447 last;
8448 }
8449
8450 return \@ret;
8451}
8452
e5d611c3
TL
8453sub device_bootorder {
8454 my ($conf) = @_;
8455
8456 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8457
8458 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8459
8460 my $bootorder = {};
8461 if (!defined($boot) || $boot->{legacy}) {
8462 $bootorder = bootorder_from_legacy($conf, $boot);
8463 } elsif ($boot->{order}) {
8464 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8465 for my $dev (PVE::Tools::split_list($boot->{order})) {
8466 $bootorder->{$dev} = $i++;
8467 }
8468 }
8469
8470 return $bootorder;
8471}
8472
65911545
SR
8473sub register_qmeventd_handle {
8474 my ($vmid) = @_;
8475
8476 my $fh;
8477 my $peer = "/var/run/qmeventd.sock";
8478 my $count = 0;
8479
8480 for (;;) {
8481 $count++;
8482 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8483 last if $fh;
8484 if ($! != EINTR && $! != EAGAIN) {
8485 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8486 }
8487 if ($count > 4) {
8488 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8489 . "after $count retries\n";
8490 }
8491 usleep(25000);
8492 }
8493
8494 # send handshake to mark VM as backing up
8495 print $fh to_json({vzdump => {vmid => "$vmid"}});
8496
8497 # return handle to be closed later when inhibit is no longer required
8498 return $fh;
8499}
8500
65e866e5
DM
8501# bash completion helper
8502
8503sub complete_backup_archives {
8504 my ($cmdname, $pname, $cvalue) = @_;
8505
8506 my $cfg = PVE::Storage::config();
8507
8508 my $storeid;
8509
8510 if ($cvalue =~ m/^([^:]+):/) {
8511 $storeid = $1;
8512 }
8513
8514 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8515
8516 my $res = [];
8517 foreach my $id (keys %$data) {
8518 foreach my $item (@{$data->{$id}}) {
f43a4f12 8519 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
65e866e5
DM
8520 push @$res, $item->{volid} if defined($item->{volid});
8521 }
8522 }
8523
8524 return $res;
8525}
8526
8527my $complete_vmid_full = sub {
8528 my ($running) = @_;
8529
8530 my $idlist = vmstatus();
8531
8532 my $res = [];
8533
8534 foreach my $id (keys %$idlist) {
8535 my $d = $idlist->{$id};
8536 if (defined($running)) {
8537 next if $d->{template};
8538 next if $running && $d->{status} ne 'running';
8539 next if !$running && $d->{status} eq 'running';
8540 }
8541 push @$res, $id;
8542
8543 }
8544 return $res;
8545};
8546
8547sub complete_vmid {
8548 return &$complete_vmid_full();
8549}
8550
8551sub complete_vmid_stopped {
8552 return &$complete_vmid_full(0);
8553}
8554
8555sub complete_vmid_running {
8556 return &$complete_vmid_full(1);
8557}
8558
335af808
DM
8559sub complete_storage {
8560
8561 my $cfg = PVE::Storage::config();
8562 my $ids = $cfg->{ids};
8563
8564 my $res = [];
8565 foreach my $sid (keys %$ids) {
8566 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
c4c844ef 8567 next if !$ids->{$sid}->{content}->{images};
335af808
DM
8568 push @$res, $sid;
8569 }
8570
8571 return $res;
8572}
8573
255e9c54
AL
8574sub complete_migration_storage {
8575 my ($cmd, $param, $current_value, $all_args) = @_;
8576
8577 my $targetnode = @$all_args[1];
8578
8579 my $cfg = PVE::Storage::config();
8580 my $ids = $cfg->{ids};
8581
8582 my $res = [];
8583 foreach my $sid (keys %$ids) {
8584 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8585 next if !$ids->{$sid}->{content}->{images};
8586 push @$res, $sid;
8587 }
8588
8589 return $res;
8590}
8591
b08c37c3
DC
8592sub vm_is_paused {
8593 my ($vmid) = @_;
8594 my $qmpstatus = eval {
8595 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8596 mon_cmd($vmid, "query-status");
8597 };
8598 warn "$@\n" if $@;
8599 return $qmpstatus && $qmpstatus->{status} eq "paused";
8600}
8601
3f11f0d7
LS
8602sub check_volume_storage_type {
8603 my ($storecfg, $vol) = @_;
8604
8605 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8606 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8607 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8608
8609 die "storage '$storeid' does not support content-type '$vtype'\n"
8610 if !$scfg->{content}->{$vtype};
8611
8612 return 1;
8613}
8614
21947fea
AD
8615sub add_nets_bridge_fdb {
8616 my ($conf, $vmid) = @_;
8617
1b5ba4dd
TL
8618 for my $opt (keys %$conf) {
8619 next if $opt !~ m/^net(\d+)$/;
8620 my $iface = "tap${vmid}i$1";
4ddd2ca2
TL
8621 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8622 my $net = parse_net($conf->{$opt}, 1) or next;
8623
8624 my $mac = $net->{macaddr};
8625 if (!$mac) {
8626 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8627 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8628 next;
8629 }
21947fea 8630
f81c9843 8631 my $bridge = $net->{bridge};
bb547dcb
CE
8632 if (!$bridge) {
8633 log_warn("Interface '$iface' not attached to any bridge.");
8634 next;
8635 }
1b5ba4dd 8636 if ($have_sdn) {
f81c9843 8637 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8638 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
1b5ba4dd 8639 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
21947fea
AD
8640 }
8641 }
8642}
1b5ba4dd 8643
73ed6496
AD
8644sub del_nets_bridge_fdb {
8645 my ($conf, $vmid) = @_;
8646
8647 for my $opt (keys %$conf) {
8648 next if $opt !~ m/^net(\d+)$/;
8649 my $iface = "tap${vmid}i$1";
8650
8651 my $net = parse_net($conf->{$opt}) or next;
8652 my $mac = $net->{macaddr} or next;
8653
f81c9843 8654 my $bridge = $net->{bridge};
73ed6496 8655 if ($have_sdn) {
f81c9843 8656 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8657 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
73ed6496
AD
8658 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8659 }
8660 }
8661}
8662
1e3baf05 86631;