]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
restore vma: inline one timeout variable and move other closer to usage
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
baa4f62d 1package PVE::QemuServer;
1e3baf05
DM
2
3use strict;
990fc5e2 4use warnings;
3ff84d6f 5
5da072fb
TL
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
1e3baf05 10use File::Basename;
5da072fb 11use File::Copy qw(copy);
1e3baf05
DM
12use File::Path;
13use File::stat;
14use Getopt::Long;
5da072fb
TL
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
1e3baf05 20use IPC::Open3;
c971c4f2 21use JSON;
c3d15108 22use List::Util qw(first);
1f30ac3a 23use MIME::Base64;
5da072fb
TL
24use POSIX;
25use Storable qw(dclone);
f85951dc 26use Time::HiRes qw(gettimeofday usleep);
5da072fb 27use URI::Escape;
425441e6 28use UUID;
5da072fb 29
82841214 30use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
5b65b00d 31use PVE::CGroup;
83870398 32use PVE::CpuSet;
48cf040f 33use PVE::DataCenterConfig;
5da072fb 34use PVE::Exception qw(raise raise_param_exc);
3b56383b 35use PVE::Format qw(render_duration render_bytes);
81d6e4e1 36use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
a52eb3c4
DC
37use PVE::Mapping::PCI;
38use PVE::Mapping::USB;
1e3baf05 39use PVE::INotify;
4df98f2f 40use PVE::JSONSchema qw(get_standard_option parse_property_string);
1e3baf05 41use PVE::ProcFSTools;
fbec3f89 42use PVE::PBSClient;
34e82fa2 43use PVE::RESTEnvironment qw(log_warn);
91bd6c90 44use PVE::RPCEnvironment;
5da072fb 45use PVE::Storage;
b71351a7 46use PVE::SysFSTools;
d04d6af1 47use PVE::Systemd;
82841214 48use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
5da072fb
TL
49
50use PVE::QMPClient;
51use PVE::QemuConfig;
238af88e 52use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
5da072fb 53use PVE::QemuServer::Cloudinit;
5b65b00d 54use PVE::QemuServer::CGroup;
d786a274 55use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
75748d44 56use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
2ea5fb7e 57use PVE::QemuServer::Machine;
5da072fb 58use PVE::QemuServer::Memory;
0a13e08e 59use PVE::QemuServer::Monitor qw(mon_cmd);
74c17b7a 60use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
0cf8d56c 61use PVE::QemuServer::USB;
1e3baf05 62
28e129cc
AD
63my $have_sdn;
64eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67};
68
102cf9d8 69my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
96ed3574 70my $OVMF = {
b5099b4f 71 x86_64 => {
90b20b15
DC
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
b5099b4f
SR
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
96ed3574 99};
2ddc0a5c 100
7f0b5beb 101my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1e3baf05 102
8d88a594
TL
103# Note about locking: we use flock on the config file protect against concurent actions.
104# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106# But you can ignore this kind of lock with the --skiplock flag.
1e3baf05 107
cf364f95
TL
108cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112);
1e3baf05 113
3ea94c60
DM
114PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119});
120
c6737ef1 121PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
7bd9abd2 122 description => "Specifies the QEMU machine type.",
c6737ef1 123 type => 'string',
9471e48b 124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
c6737ef1
DC
125 maxLength => 40,
126 optional => 1,
127});
128
1a67f999 129# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
38277afc
TL
130my $nodename_cache;
131sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134}
1e3baf05 135
ec3582b5
WB
136my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151};
152PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
9d66b397
SI
154my $agent_fmt = {
155 enabled => {
7bd9abd2 156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
9d66b397
SI
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
0a4aff09 162 description => "Run fstrim after moving a disk or migrating the VM.",
9d66b397
SI
163 type => 'boolean',
164 optional => 1,
93e21fd2
CH
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
9d66b397 172 },
48657158
MD
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
9d66b397
SI
180};
181
55655ebc
DC
182my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
6f070e39 189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
55655ebc
DC
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198};
199
6dbcb073
DC
200my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213};
214
1448547f
AL
215my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
211785ee 223 enum => ['spice', 'none'],
1448547f
AL
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228};
229
c4df18db
AL
230my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
d282a24d 234 default => '0',
c4df18db
AL
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
d282a24d 240 default => 'off',
c4df18db
AL
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244};
245
2cf61f33
SR
246my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
8d88a594
TL
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
2cf61f33
SR
257 },
258 max_bytes => {
259 type => 'integer',
8d88a594
TL
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
2cf61f33
SR
263 optional => 1,
264
8d88a594
TL
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
2cf61f33
SR
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
8d88a594
TL
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
2cf61f33
SR
274 optional => 1,
275 default => 1000,
276 },
277};
278
26b443c8
TL
279my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
af2a1a1c
TL
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
26b443c8
TL
292};
293
1e3baf05
DM
294my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
2ff09f52 307 hotplug => {
483ceeab
TL
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
94ec5e7c 311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
c60cad61
DC
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
b3c2bdd1 315 default => 'network,disk,usb',
2ff09f52 316 },
1e3baf05
DM
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
159719e5 327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
1e3baf05
DM
328 },
329 cpulimit => {
330 optional => 1,
c6f773b8 331 type => 'number',
52261945 332 description => "Limit of CPU usage.",
8d88a594
TL
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
1e3baf05 335 minimum => 0,
c6f773b8 336 maximum => 128,
483ceeab 337 default => 0,
1e3baf05
DM
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
483ceeab 342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
67498860
TL
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
e65e35ca 346 minimum => 1,
237239bf 347 maximum => 262144,
67498860 348 default => 'cgroup v1: 1024, cgroup v2: 100',
1e3baf05
DM
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
252e2624 353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
8d88a594 354 ." you use the balloon device.",
1e3baf05
DM
355 minimum => 16,
356 default => 512,
357 },
13a48620 358 balloon => {
483ceeab
TL
359 optional => 1,
360 type => 'integer',
252e2624 361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
8b1accf7
DM
362 minimum => 0,
363 },
364 shares => {
483ceeab
TL
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
8d88a594
TL
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
8b1accf7
DM
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
13a48620 373 },
1e3baf05
DM
374 keyboard => {
375 optional => 1,
376 type => 'string',
233fb336
DW
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
e95fe75f 379 enum => PVE::Tools::kvmkeymaplist(),
aea47dd6 380 default => undef,
1e3baf05
DM
381 },
382 name => {
383 optional => 1,
7fabe17d 384 type => 'string', format => 'dns-name',
1e3baf05
DM
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
cdd20088
AD
387 scsihw => {
388 optional => 1,
389 type => 'string',
52261945 390 description => "SCSI controller model",
6731a4cf 391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
cdd20088
AD
392 default => 'lsi',
393 },
1e3baf05
DM
394 description => {
395 optional => 1,
396 type => 'string',
a200af10
TL
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
1e3baf05
DM
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
483ceeab 404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
52261945
DM
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407Specify guest operating system. This is used to enable special
408optimization/features for specific operating systems:
409
410[horizontal]
411other;; unspecified OS
412wxp;; Microsoft Windows XP
413w2k;; Microsoft Windows 2000
414w2k3;; Microsoft Windows 2003
415w2k8;; Microsoft Windows 2008
416wvista;; Microsoft Windows Vista
417win7;; Microsoft Windows 7
44c2a647 418win8;; Microsoft Windows 8/2012/2012r2
1f5828f2 419win10;; Microsoft Windows 10/2016/2019
179b9f1b 420win11;; Microsoft Windows 11/2022
52261945 421l24;; Linux 2.4 Kernel
ea0bc514 422l26;; Linux 2.6 - 6.X Kernel
52261945 423solaris;; Solaris/OpenSolaris/OpenIndiania kernel
1e3baf05
DM
424EODESC
425 },
426 boot => {
427 optional => 1,
2141a802 428 type => 'string', format => 'pve-qm-boot',
483ceeab
TL
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
1e3baf05
DM
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
2141a802 435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
03e480fc 436 pattern => '(ide|sata|scsi|virtio)\d+',
1e3baf05
DM
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
8a010eae
AD
459 numa => {
460 optional => 1,
461 type => 'boolean',
1917695c 462 description => "Enable/disable NUMA.",
8a010eae
AD
463 default => 0,
464 },
7023f3ea
AD
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
f36e9894
SR
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
4df98f2f
TL
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
f36e9894 477 },
de9d1e55 478 vcpus => {
3bd18e48
AD
479 optional => 1,
480 type => 'integer',
de9d1e55 481 description => "Number of hotplugged vcpus.",
3bd18e48 482 minimum => 1,
de9d1e55 483 default => 0,
3bd18e48 484 },
1e3baf05
DM
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
bc84dcca 491 agent => {
ab6a046f 492 optional => 1,
7bd9abd2 493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
9d66b397
SI
494 type => 'string',
495 format => $agent_fmt,
ab6a046f 496 },
1e3baf05
DM
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
8c559505
DM
506 description => "Enable/disable time drift fix.",
507 default => 0,
1e3baf05 508 },
19672434 509 localtime => {
1e3baf05
DM
510 optional => 1,
511 type => 'boolean',
8d88a594
TL
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
1e3baf05
DM
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
55655ebc
DC
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
4df98f2f
TL
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
1e3baf05 531 },
0ea9541d
DM
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
52261945 535 description => "Create a virtual hardware watchdog device.",
4df98f2f
TL
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
0ea9541d 539 },
1e3baf05
DM
540 startdate => {
541 optional => 1,
19672434 542 type => 'string',
1e3baf05 543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
4df98f2f
TL
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
1e3baf05
DM
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
43574f73 549 startup => get_standard_option('pve-startup-order'),
68eda3ab
AD
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
1e3baf05
DM
556 args => {
557 optional => 1,
558 type => 'string',
52261945
DM
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
c7a8aad6 561Arbitrary arguments passed to kvm, for example:
1e3baf05 562
bda7ccb1 563args: -no-reboot -smbios 'type=0,vendor=FOO'
c7a8aad6
FG
564
565NOTE: this option is for experts only.
1e3baf05
DM
566EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
52261945 572 description => "Enable/disable the USB tablet device.",
4df98f2f
TL
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
1e3baf05
DM
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
04432191 588 type => 'number',
1e3baf05
DM
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
04432191 591 default => 0.1,
1e3baf05
DM
592 },
593 cdrom => {
594 optional => 1,
b799312f 595 type => 'string', format => 'pve-qm-ide',
8485b9ba 596 typetext => '<volume>',
1e3baf05
DM
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
5d008ad3 603 format => 'pve-vm-cpu-conf',
1e3baf05 604 },
b7ba6b79
DM
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
982c7f12
DM
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
18bfb361
DM
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
4df98f2f
TL
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
18bfb361 620 },
253624c7
FG
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
c6737ef1 625 runningmachine => get_standard_option('pve-qemu-machine', {
4df98f2f
TL
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
c6737ef1 628 }),
ea1c2110 629 runningcpu => {
4df98f2f
TL
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
ea1c2110
SR
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
c6737ef1 637 machine => get_standard_option('pve-qemu-machine'),
d731ecbe
WB
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
2796e7d5
DM
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
5d004b00 647 maxLength => 512,
2796e7d5
DM
648 optional => 1,
649 },
cb0e4540
AG
650 protection => {
651 optional => 1,
652 type => 'boolean',
4df98f2f
TL
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
cb0e4540
AG
655 default => 0,
656 },
3edb45e7 657 bios => {
a783c78e 658 optional => 1,
3edb45e7
DM
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
a783c78e 663 },
6ee499ff
DC
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
4df98f2f
TL
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
f7ed64e7 678 default => "1 (autogenerated)",
6ee499ff
DC
679 optional => 1,
680 },
9e784b11
DC
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
6dbcb073
DC
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
4df98f2f
TL
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
6dbcb073 692 optional => 1,
2e7b5925
AL
693 },
694 audio0 => {
695 type => 'string',
1448547f 696 format => $audio_fmt,
194b65f1 697 description => "Configure a audio device, useful in combination with QXL/Spice.",
2e7b5925
AL
698 optional => 1
699 },
c4df18db
AL
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
b8e7068a
DC
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
2cf61f33
SR
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
26b443c8
TL
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
83870398
DB
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
326704e7 725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
83870398
DB
726 optional => 1,
727 },
9ed7a77c
WB
728};
729
cb702ebe
DL
730my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
4df98f2f
TL
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
cb702ebe
DL
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
eba285f5 742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
cb702ebe
DL
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
eba285f5 749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
cb702ebe
DL
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
101beafe 753 vendor => {
eba285f5
TL
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
101beafe 759 },
cb702ebe
DL
760};
761PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
92c02f6c 763# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
9ed7a77c 764my $confdesc_cloudinit = {
41cd94a0
WB
765 citype => {
766 optional => 1,
767 type => 'string',
4df98f2f
TL
768 description => 'Specifies the cloud-init configuration format. The default depends on the'
769 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
770 .' and `configdrive2` for windows.',
545eec65 771 enum => ['configdrive2', 'nocloud', 'opennebula'],
41cd94a0 772 },
7b42f951
WB
773 ciuser => {
774 optional => 1,
775 type => 'string',
4df98f2f
TL
776 description => "cloud-init: User name to change ssh keys and password for instead of the"
777 ." image's configured default user.",
7b42f951
WB
778 },
779 cipassword => {
780 optional => 1,
781 type => 'string',
4df98f2f
TL
782 description => 'cloud-init: Password to assign the user. Using this is generally not'
783 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
784 .' support hashed passwords.',
7b42f951 785 },
efa3355d
LN
786 ciupgrade => {
787 optional => 1,
788 type => 'boolean',
178c355d 789 description => 'cloud-init: do an automatic package upgrade after the first boot.',
ec11b92a 790 default => 1,
efa3355d 791 },
cb702ebe
DL
792 cicustom => {
793 optional => 1,
794 type => 'string',
4df98f2f
TL
795 description => 'cloud-init: Specify custom files to replace the automatically generated'
796 .' ones at start.',
cb702ebe
DL
797 format => 'pve-qm-cicustom',
798 },
0c9a7596
AD
799 searchdomain => {
800 optional => 1,
801 type => 'string',
bd49ecb4 802 description => 'cloud-init: Sets DNS search domains for a container. Create will'
4df98f2f 803 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 804 .' are set.',
0c9a7596
AD
805 },
806 nameserver => {
807 optional => 1,
808 type => 'string', format => 'address-list',
bd49ecb4 809 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
4df98f2f 810 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 811 .' are set.',
0c9a7596
AD
812 },
813 sshkeys => {
814 optional => 1,
815 type => 'string',
816 format => 'urlencoded',
1d1c4e1c 817 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
0c9a7596 818 },
1e3baf05
DM
819};
820
821# what about other qemu settings ?
822#cpu => 'string',
823#machine => 'string',
824#fda => 'file',
825#fdb => 'file',
826#mtdblock => 'file',
827#sd => 'file',
828#pflash => 'file',
829#snapshot => 'bool',
830#bootp => 'file',
831##tftp => 'dir',
832##smb => 'dir',
833#kernel => 'file',
834#append => 'string',
835#initrd => 'file',
836##soundhw => 'string',
837
838while (my ($k, $v) = each %$confdesc) {
839 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
840}
841
5bdcf937 842my $MAX_NETS = 32;
bae179aa 843my $MAX_SERIAL_PORTS = 4;
1989a89c 844my $MAX_PARALLEL_PORTS = 3;
2ed5d572
AD
845my $MAX_NUMA = 8;
846
ffc0d8c7
WB
847my $numa_fmt = {
848 cpus => {
849 type => "string",
850 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 851 description => "CPUs accessing this NUMA node.",
ffc0d8c7
WB
852 format_description => "id[-id];...",
853 },
854 memory => {
855 type => "number",
52261945 856 description => "Amount of memory this NUMA node provides.",
ffc0d8c7
WB
857 optional => 1,
858 },
859 hostnodes => {
860 type => "string",
861 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 862 description => "Host NUMA nodes to use.",
ffc0d8c7
WB
863 format_description => "id[-id];...",
864 optional => 1,
865 },
866 policy => {
867 type => 'string',
868 enum => [qw(preferred bind interleave)],
52261945 869 description => "NUMA allocation policy.",
ffc0d8c7
WB
870 optional => 1,
871 },
872};
873PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
2ed5d572
AD
874my $numadesc = {
875 optional => 1,
ffc0d8c7 876 type => 'string', format => $numa_fmt,
52261945 877 description => "NUMA topology.",
2ed5d572
AD
878};
879PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
880
881for (my $i = 0; $i < $MAX_NUMA; $i++) {
882 $confdesc->{"numa$i"} = $numadesc;
883}
1e3baf05 884
f7bc17ca
TL
885my $nic_model_list = [
886 'e1000',
887 'e1000-82540em',
888 'e1000-82544gc',
889 'e1000-82545em',
e83dd50a 890 'e1000e',
f7bc17ca
TL
891 'i82551',
892 'i82557b',
893 'i82559er',
894 'ne2k_isa',
895 'ne2k_pci',
896 'pcnet',
897 'rtl8139',
898 'virtio',
899 'vmxnet3',
900];
6b64503e 901my $nic_model_list_txt = join(' ', sort @$nic_model_list);
1e3baf05 902
52261945
DM
903my $net_fmt_bridge_descr = <<__EOD__;
904Bridge to attach the network device to. The Proxmox VE standard bridge
905is called 'vmbr0'.
906
907If you do not specify a bridge, we create a kvm user (NATed) network
908device, which provides DHCP and DNS services. The following addresses
909are used:
910
911 10.0.2.2 Gateway
912 10.0.2.3 DNS Server
913 10.0.2.4 SMB Server
914
915The DHCP server assign addresses to the guest starting from 10.0.2.15.
916__EOD__
917
cd9c34d1 918my $net_fmt = {
399d96db 919 macaddr => get_standard_option('mac-addr', {
4df98f2f
TL
920 description => "MAC address. That address must be unique withing your network. This is"
921 ." automatically generated if not specified.",
399d96db 922 }),
7f694a71
DM
923 model => {
924 type => 'string',
4df98f2f
TL
925 description => "Network Card Model. The 'virtio' model provides the best performance with"
926 ." very low CPU overhead. If your guest does not support this driver, it is usually"
927 ." best to use 'e1000'.",
7f694a71
DM
928 enum => $nic_model_list,
929 default_key => 1,
930 },
931 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
dbdcc5cd 932 bridge => get_standard_option('pve-bridge-id', {
52261945 933 description => $net_fmt_bridge_descr,
cd9c34d1 934 optional => 1,
dbdcc5cd 935 }),
cd9c34d1
WB
936 queues => {
937 type => 'integer',
66222265 938 minimum => 0, maximum => 64,
cd9c34d1 939 description => 'Number of packet queues to be used on the device.',
cd9c34d1
WB
940 optional => 1,
941 },
942 rate => {
943 type => 'number',
944 minimum => 0,
52261945 945 description => "Rate limit in mbps (megabytes per second) as floating point number.",
cd9c34d1
WB
946 optional => 1,
947 },
948 tag => {
949 type => 'integer',
9f41a659 950 minimum => 1, maximum => 4094,
cd9c34d1 951 description => 'VLAN tag to apply to packets on this interface.',
cd9c34d1
WB
952 optional => 1,
953 },
954 trunks => {
955 type => 'string',
956 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
957 description => 'VLAN trunks to pass through this interface.',
7f694a71 958 format_description => 'vlanid[;vlanid...]',
cd9c34d1
WB
959 optional => 1,
960 },
961 firewall => {
962 type => 'boolean',
963 description => 'Whether this interface should be protected by the firewall.',
cd9c34d1
WB
964 optional => 1,
965 },
966 link_down => {
967 type => 'boolean',
52261945 968 description => 'Whether this interface should be disconnected (like pulling the plug).',
cd9c34d1
WB
969 optional => 1,
970 },
61a14cde
AD
971 mtu => {
972 type => 'integer',
973 minimum => 1, maximum => 65520,
0530177b 974 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
61a14cde
AD
975 optional => 1,
976 },
cd9c34d1 977};
52261945 978
1e3baf05
DM
979my $netdesc = {
980 optional => 1,
7f694a71 981 type => 'string', format => $net_fmt,
52261945 982 description => "Specify network devices.",
1e3baf05 983};
52261945 984
1e3baf05
DM
985PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
986
0c9a7596
AD
987my $ipconfig_fmt = {
988 ip => {
989 type => 'string',
990 format => 'pve-ipv4-config',
991 format_description => 'IPv4Format/CIDR',
992 description => 'IPv4 address in CIDR format.',
993 optional => 1,
994 default => 'dhcp',
995 },
996 gw => {
997 type => 'string',
998 format => 'ipv4',
999 format_description => 'GatewayIPv4',
1000 description => 'Default gateway for IPv4 traffic.',
1001 optional => 1,
1002 requires => 'ip',
1003 },
1004 ip6 => {
1005 type => 'string',
1006 format => 'pve-ipv6-config',
1007 format_description => 'IPv6Format/CIDR',
1008 description => 'IPv6 address in CIDR format.',
1009 optional => 1,
1010 default => 'dhcp',
1011 },
1012 gw6 => {
1013 type => 'string',
1014 format => 'ipv6',
1015 format_description => 'GatewayIPv6',
1016 description => 'Default gateway for IPv6 traffic.',
1017 optional => 1,
1018 requires => 'ip6',
1019 },
1020};
1021PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1022my $ipconfigdesc = {
1023 optional => 1,
1024 type => 'string', format => 'pve-qm-ipconfig',
1025 description => <<'EODESCR',
1026cloud-init: Specify IP addresses and gateways for the corresponding interface.
1027
1028IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1029
4df98f2f
TL
1030The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1031gateway should be provided.
988be8d0
ML
1032For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1033cloud-init 19.4 or newer.
0c9a7596 1034
4df98f2f
TL
1035If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1036dhcp on IPv4.
0c9a7596
AD
1037EODESCR
1038};
1039PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1040
1e3baf05
DM
1041for (my $i = 0; $i < $MAX_NETS; $i++) {
1042 $confdesc->{"net$i"} = $netdesc;
9ed7a77c
WB
1043 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1044}
1045
1046foreach my $key (keys %$confdesc_cloudinit) {
1047 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1e3baf05
DM
1048}
1049
83870398
DB
1050PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1051sub pve_verify_cpuset {
1052 my ($set_text, $noerr) = @_;
1053
1054 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1055
1056 if ($@) {
1057 return if $noerr;
1058 die "unable to parse cpuset option\n";
1059 }
1060
1061 return PVE::CpuSet->new($members)->short_string();
1062}
1063
ffa42b86
DC
1064PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1065sub verify_volume_id_or_qm_path {
822c8a07
WB
1066 my ($volid, $noerr) = @_;
1067
6e55f579
FE
1068 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1069
1070 return verify_volume_id_or_absolute_path($volid, $noerr);
1071}
1072
1073PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1074sub verify_volume_id_or_absolute_path {
1075 my ($volid, $noerr) = @_;
1076
1077 return $volid if $volid =~ m|^/|;
ffa42b86 1078
822c8a07
WB
1079 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1080 if ($@) {
d1c1af4b 1081 return if $noerr;
822c8a07
WB
1082 die $@;
1083 }
1084 return $volid;
1085}
1086
bae179aa
DA
1087my $serialdesc = {
1088 optional => 1,
ca0cef26 1089 type => 'string',
1b0b51ed 1090 pattern => '(/dev/.+|socket)',
52261945
DM
1091 description => "Create a serial device inside the VM (n is 0 to 3)",
1092 verbose_description => <<EODESCR,
52261945
DM
1093Create a serial device inside the VM (n is 0 to 3), and pass through a
1094host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1095host side (use 'qm terminal' to open a terminal connection).
bae179aa 1096
4df98f2f
TL
1097NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1098use with special care.
bae179aa 1099
52261945 1100CAUTION: Experimental! User reported problems with this option.
bae179aa
DA
1101EODESCR
1102};
bae179aa 1103
1989a89c
DA
1104my $paralleldesc= {
1105 optional => 1,
ca0cef26 1106 type => 'string',
9ecc8431 1107 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
52261945
DM
1108 description => "Map host parallel devices (n is 0 to 2).",
1109 verbose_description => <<EODESCR,
19672434 1110Map host parallel devices (n is 0 to 2).
1989a89c 1111
4df98f2f
TL
1112NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1113machines - use with special care.
1989a89c 1114
52261945 1115CAUTION: Experimental! User reported problems with this option.
1989a89c
DA
1116EODESCR
1117};
1989a89c
DA
1118
1119for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1120 $confdesc->{"parallel$i"} = $paralleldesc;
1121}
1122
bae179aa
DA
1123for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1124 $confdesc->{"serial$i"} = $serialdesc;
1125}
1126
74c17b7a
SR
1127for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1128 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
040b06b7 1129}
1e3baf05 1130
e0fd2b2f
FE
1131for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1132 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
cdb0931f
DA
1133}
1134
0cf8d56c
DC
1135for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1136 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1e3baf05
DM
1137}
1138
5cfa9f5f
SR
1139my $boot_fmt = {
1140 legacy => {
1141 optional => 1,
1142 default_key => 1,
1143 type => 'string',
1144 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1145 . " Deprecated, use 'order=' instead.",
1146 pattern => '[acdn]{1,4}',
1147 format_description => "[acdn]{1,4}",
1148
1149 # note: this is also the fallback if boot: is not given at all
1150 default => 'cdn',
1151 },
1152 order => {
1153 optional => 1,
1154 type => 'string',
1155 format => 'pve-qm-bootdev-list',
1156 format_description => "device[;device...]",
1157 description => <<EODESC,
1158The guest will attempt to boot from devices in the order they appear here.
1159
1160Disks, optical drives and passed-through storage USB devices will be directly
1161booted from, NICs will load PXE, and PCIe devices will either behave like disks
1162(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1163
1164Note that only devices in this list will be marked as bootable and thus loaded
1165by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1166(e.g. software-raid), you need to specify all of them here.
1167
1168Overrides the deprecated 'legacy=[acdn]*' value when given.
1169EODESC
1170 },
1171};
1172PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1173
1174PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1175sub verify_bootdev {
1176 my ($dev, $noerr) = @_;
1177
f9dde219
SR
1178 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1179 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
5cfa9f5f
SR
1180
1181 my $check = sub {
1182 my ($base) = @_;
1183 return 0 if $dev !~ m/^$base\d+$/;
1184 return 0 if !$confdesc->{$dev};
1185 return 1;
1186 };
1187
1188 return $dev if $check->("net");
1189 return $dev if $check->("usb");
1190 return $dev if $check->("hostpci");
1191
d1c1af4b 1192 return if $noerr;
5cfa9f5f
SR
1193 die "invalid boot device '$dev'\n";
1194}
1195
1196sub print_bootorder {
1197 my ($devs) = @_;
4c27b18c 1198 return "" if !@$devs;
5cfa9f5f
SR
1199 my $data = { order => join(';', @$devs) };
1200 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1201}
1202
1e3baf05
DM
1203my $kvm_api_version = 0;
1204
1205sub kvm_version {
1e3baf05
DM
1206 return $kvm_api_version if $kvm_api_version;
1207
808a65b5 1208 open my $fh, '<', '/dev/kvm' or return;
1e3baf05 1209
646f2df4
WB
1210 # 0xae00 => KVM_GET_API_VERSION
1211 $kvm_api_version = ioctl($fh, 0xae00, 0);
808a65b5 1212 close($fh);
1e3baf05 1213
646f2df4 1214 return $kvm_api_version;
1e3baf05
DM
1215}
1216
1476b99f
DC
1217my $kvm_user_version = {};
1218my $kvm_mtime = {};
1e3baf05
DM
1219
1220sub kvm_user_version {
1476b99f 1221 my ($binary) = @_;
1e3baf05 1222
1476b99f
DC
1223 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1224 my $st = stat($binary);
1e3baf05 1225
1476b99f
DC
1226 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1227 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1228 $cachedmtime == $st->mtime;
1229
1230 $kvm_user_version->{$binary} = 'unknown';
1231 $kvm_mtime->{$binary} = $st->mtime;
1e3baf05 1232
09b11429
TL
1233 my $code = sub {
1234 my $line = shift;
1235 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1476b99f 1236 $kvm_user_version->{$binary} = $2;
09b11429
TL
1237 }
1238 };
19672434 1239
1476b99f 1240 eval { run_command([$binary, '--version'], outfunc => $code); };
09b11429 1241 warn $@ if $@;
1e3baf05 1242
1476b99f 1243 return $kvm_user_version->{$binary};
1e3baf05
DM
1244
1245}
4df98f2f
TL
1246my sub extract_version {
1247 my ($machine_type, $version) = @_;
1248 $version = kvm_user_version() if !defined($version);
8eb73377 1249 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
4df98f2f 1250}
1e3baf05 1251
db70021b
TL
1252sub kernel_has_vhost_net {
1253 return -c '/dev/vhost-net';
1254}
1e3baf05 1255
1e3baf05
DM
1256sub option_exists {
1257 my $key = shift;
1258 return defined($confdesc->{$key});
19672434 1259}
1e3baf05 1260
1e3baf05 1261my $cdrom_path;
1e3baf05
DM
1262sub get_cdrom_path {
1263
259470ee 1264 return $cdrom_path if defined($cdrom_path);
1e3baf05 1265
c3d15108
TL
1266 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1267
1268 if (!defined($cdrom_path)) {
490b7308
SS
1269 log_warn("no physical CD-ROM available, ignoring");
1270 $cdrom_path = '';
1271 }
259470ee
SS
1272
1273 return $cdrom_path;
1e3baf05
DM
1274}
1275
1276sub get_iso_path {
1277 my ($storecfg, $vmid, $cdrom) = @_;
1278
1279 if ($cdrom eq 'cdrom') {
1280 return get_cdrom_path();
1281 } elsif ($cdrom eq 'none') {
1282 return '';
1283 } elsif ($cdrom =~ m|^/|) {
1284 return $cdrom;
1285 } else {
6b64503e 1286 return PVE::Storage::path($storecfg, $cdrom);
1e3baf05
DM
1287 }
1288}
1289
1290# try to convert old style file names to volume IDs
1291sub filename_to_volume_id {
1292 my ($vmid, $file, $media) = @_;
1293
0c9a7596 1294 if (!($file eq 'none' || $file eq 'cdrom' ||
1e3baf05 1295 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
19672434 1296
d1c1af4b 1297 return if $file =~ m|/|;
19672434 1298
1e3baf05
DM
1299 if ($media && $media eq 'cdrom') {
1300 $file = "local:iso/$file";
1301 } else {
1302 $file = "local:$vmid/$file";
1303 }
1304 }
1305
1306 return $file;
1307}
1308
1309sub verify_media_type {
1310 my ($opt, $vtype, $media) = @_;
1311
1312 return if !$media;
1313
1314 my $etype;
1315 if ($media eq 'disk') {
a125592c 1316 $etype = 'images';
1e3baf05
DM
1317 } elsif ($media eq 'cdrom') {
1318 $etype = 'iso';
1319 } else {
1320 die "internal error";
1321 }
1322
1323 return if ($vtype eq $etype);
19672434 1324
1e3baf05
DM
1325 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1326}
1327
1328sub cleanup_drive_path {
1329 my ($opt, $storecfg, $drive) = @_;
1330
1331 # try to convert filesystem paths to volume IDs
1332
1333 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1334 ($drive->{file} !~ m|^/dev/.+|) &&
1335 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
19672434 1336 ($drive->{file} !~ m/^\d+$/)) {
1e3baf05 1337 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
4df98f2f
TL
1338 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1339 if !$vtype;
1e3baf05
DM
1340 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1341 verify_media_type($opt, $vtype, $drive->{media});
1342 $drive->{file} = $volid;
1343 }
1344
1345 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1346}
1347
b3c2bdd1
DM
1348sub parse_hotplug_features {
1349 my ($data) = @_;
1350
1351 my $res = {};
1352
1353 return $res if $data eq '0';
a1b7d579 1354
b3c2bdd1
DM
1355 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1356
45827685 1357 foreach my $feature (PVE::Tools::split_list($data)) {
94ec5e7c 1358 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
b3c2bdd1
DM
1359 $res->{$1} = 1;
1360 } else {
596a0a20 1361 die "invalid hotplug feature '$feature'\n";
b3c2bdd1
DM
1362 }
1363 }
1364 return $res;
1365}
1366
1367PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1368sub pve_verify_hotplug_features {
1369 my ($value, $noerr) = @_;
1370
1371 return $value if parse_hotplug_features($value);
1372
d1c1af4b 1373 return if $noerr;
b3c2bdd1
DM
1374
1375 die "unable to parse hotplug option\n";
1376}
1377
28ef82d3
DM
1378sub scsi_inquiry {
1379 my($fh, $noerr) = @_;
1380
1381 my $SG_IO = 0x2285;
1382 my $SG_GET_VERSION_NUM = 0x2282;
1383
1384 my $versionbuf = "\x00" x 8;
1385 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1386 if (!$ret) {
1387 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
d1c1af4b 1388 return;
28ef82d3 1389 }
97d62eb7 1390 my $version = unpack("I", $versionbuf);
28ef82d3
DM
1391 if ($version < 30000) {
1392 die "scsi generic interface too old\n" if !$noerr;
d1c1af4b 1393 return;
28ef82d3 1394 }
97d62eb7 1395
28ef82d3
DM
1396 my $buf = "\x00" x 36;
1397 my $sensebuf = "\x00" x 8;
f334aa3e 1398 my $cmd = pack("C x3 C x1", 0x12, 36);
97d62eb7 1399
28ef82d3
DM
1400 # see /usr/include/scsi/sg.h
1401 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1402
f606d5bd
TL
1403 my $packet = pack(
1404 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1405 );
28ef82d3
DM
1406
1407 $ret = ioctl($fh, $SG_IO, $packet);
1408 if (!$ret) {
1409 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
d1c1af4b 1410 return;
28ef82d3 1411 }
97d62eb7 1412
28ef82d3
DM
1413 my @res = unpack($sg_io_hdr_t, $packet);
1414 if ($res[17] || $res[18]) {
1415 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
d1c1af4b 1416 return;
28ef82d3
DM
1417 }
1418
1419 my $res = {};
f606d5bd 1420 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
28ef82d3 1421
f606d5bd
TL
1422 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1423 $res->{type} &= 0x1F;
09984754 1424
28ef82d3
DM
1425 return $res;
1426}
1427
1428sub path_is_scsi {
1429 my ($path) = @_;
1430
d1c1af4b 1431 my $fh = IO::File->new("+<$path") || return;
28ef82d3
DM
1432 my $res = scsi_inquiry($fh, 1);
1433 close($fh);
1434
1435 return $res;
1436}
1437
db656e5f 1438sub print_tabletdevice_full {
d559309f 1439 my ($conf, $arch) = @_;
b467f79a 1440
3392d6ca 1441 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f
DM
1442
1443 # we use uhci for old VMs because tablet driver was buggy in older qemu
d559309f 1444 my $usbbus;
2b938c7d 1445 if ($q35 || $arch eq 'aarch64') {
d559309f
WB
1446 $usbbus = 'ehci';
1447 } else {
1448 $usbbus = 'uhci';
1449 }
b467f79a 1450
db656e5f
DM
1451 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1452}
1453
d559309f 1454sub print_keyboarddevice_full {
6971c38e 1455 my ($conf, $arch) = @_;
d559309f 1456
d1c1af4b 1457 return if $arch ne 'aarch64';
d559309f
WB
1458
1459 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1460}
1461
a183df68
TL
1462my sub get_drive_id {
1463 my ($drive) = @_;
1464 return "$drive->{interface}$drive->{index}";
1465}
1466
ca916ecc 1467sub print_drivedevice_full {
d559309f 1468 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
ca916ecc
DA
1469
1470 my $device = '';
1471 my $maxdev = 0;
19672434 1472
a183df68 1473 my $drive_id = get_drive_id($drive);
ca916ecc 1474 if ($drive->{interface} eq 'virtio') {
4df98f2f
TL
1475 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1476 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1477 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
2ed36a41 1478 } elsif ($drive->{interface} eq 'scsi') {
6731a4cf 1479
ee034f5c 1480 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
2ed36a41
DM
1481 my $unit = $drive->{index} % $maxdev;
1482 my $devicetype = 'hd';
69bcf246
WB
1483 my $path = '';
1484 if (drive_is_cdrom($drive)) {
1485 $devicetype = 'cd';
29b19529 1486 } else {
69bcf246
WB
1487 if ($drive->{file} =~ m|^/|) {
1488 $path = $drive->{file};
1489 if (my $info = path_is_scsi($path)) {
8e3c33ab 1490 if ($info->{type} == 0 && $drive->{scsiblock}) {
69bcf246
WB
1491 $devicetype = 'block';
1492 } elsif ($info->{type} == 1) { # tape
1493 $devicetype = 'generic';
1494 }
1495 }
1496 } else {
1497 $path = PVE::Storage::path($storecfg, $drive->{file});
1498 }
1499
a034e3d6 1500 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
4df98f2f 1501 my $version = extract_version($machine_type, kvm_user_version());
a034e3d6 1502 if ($path =~ m/^iscsi\:\/\// &&
2ea5fb7e 1503 !min_version($version, 4, 1)) {
69bcf246
WB
1504 $devicetype = 'generic';
1505 }
1506 }
1507
ef88eaaa 1508 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
4df98f2f 1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
69bcf246 1510 } else {
4df98f2f
TL
1511 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1512 .",lun=$drive->{index}";
69bcf246 1513 }
4df98f2f 1514 $device .= ",drive=drive-$drive_id,id=$drive_id";
cdd20088 1515
6c875f9f
NC
1516 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1517 $device .= ",rotation_rate=1";
1518 }
e741c516 1519 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
6c875f9f
NC
1520
1521 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
e0fd2b2f 1522 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
2ed36a41
DM
1523 my $controller = int($drive->{index} / $maxdev);
1524 my $unit = $drive->{index} % $maxdev;
b155086b
FE
1525
1526 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1527 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1528 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1529 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1530 # were before. Move odd ones up by 2 where they don't clash.
1531 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1532 $controller += 2 * ($unit % 2);
1533 $unit = 0;
1534 }
1535
2ed36a41
DM
1536 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1537
6c875f9f
NC
1538 $device = "ide-$devicetype";
1539 if ($drive->{interface} eq 'ide') {
1540 $device .= ",bus=ide.$controller,unit=$unit";
1541 } else {
1542 $device .= ",bus=ahci$controller.$unit";
1543 }
4df98f2f 1544 $device .= ",drive=drive-$drive_id,id=$drive_id";
6c875f9f
NC
1545
1546 if ($devicetype eq 'hd') {
1547 if (my $model = $drive->{model}) {
1548 $model = URI::Escape::uri_unescape($model);
1549 $device .= ",model=$model";
1550 }
1551 if ($drive->{ssd}) {
1552 $device .= ",rotation_rate=1";
1553 }
0f2812c2 1554 }
e741c516 1555 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
2ed36a41
DM
1556 } elsif ($drive->{interface} eq 'usb') {
1557 die "implement me";
1558 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1559 } else {
1560 die "unsupported interface type";
ca916ecc
DA
1561 }
1562
3b408e82
DM
1563 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1564
a70e7e6c
TL
1565 if (my $serial = $drive->{serial}) {
1566 $serial = URI::Escape::uri_unescape($serial);
1567 $device .= ",serial=$serial";
1568 }
1569
1570
ca916ecc
DA
1571 return $device;
1572}
1573
15b21acc 1574sub get_initiator_name {
46f58b5f 1575 my $initiator;
15b21acc 1576
d1c1af4b 1577 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
46f58b5f
DM
1578 while (defined(my $line = <$fh>)) {
1579 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
15b21acc
MR
1580 $initiator = $1;
1581 last;
1582 }
46f58b5f
DM
1583 $fh->close();
1584
15b21acc
MR
1585 return $initiator;
1586}
1587
eec9f9fe
FE
1588my sub storage_allows_io_uring_default {
1589 my ($scfg, $cache_direct) = @_;
1590
1591 # io_uring with cache mode writeback or writethrough on krbd will hang...
1592 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1593
1594 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1595 # sometimes, just plain disable...
1596 return if $scfg && $scfg->{type} eq 'lvm';
1597
1598 # io_uring causes problems when used with CIFS since kernel 5.15
1599 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1600 return if $scfg && $scfg->{type} eq 'cifs';
1601
1602 return 1;
1603}
1604
b7071d6c
FE
1605my sub drive_uses_cache_direct {
1606 my ($drive, $scfg) = @_;
1607
1608 my $cache_direct = 0;
1609
1610 if (my $cache = $drive->{cache}) {
1611 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1612 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1613 $cache_direct = 1;
1614 }
1615
1616 return $cache_direct;
1617}
1618
776c5f50 1619sub print_drive_commandline_full {
6d5673c3 1620 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1e3baf05 1621
d81f0f09
DM
1622 my $path;
1623 my $volid = $drive->{file};
5921764c 1624 my $format = $drive->{format};
a183df68 1625 my $drive_id = get_drive_id($drive);
370b05e7 1626
0fe779a6
WB
1627 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1628 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1629
d81f0f09
DM
1630 if (drive_is_cdrom($drive)) {
1631 $path = get_iso_path($storecfg, $vmid, $volid);
a183df68 1632 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
d81f0f09 1633 } else {
d81f0f09
DM
1634 if ($storeid) {
1635 $path = PVE::Storage::path($storecfg, $volid);
5921764c 1636 $format //= qemu_img_format($scfg, $volname);
d81f0f09
DM
1637 } else {
1638 $path = $volid;
5921764c 1639 $format //= "raw";
d81f0f09
DM
1640 }
1641 }
1642
5921764c
SR
1643 my $is_rbd = $path =~ m/^rbd:/;
1644
1e3baf05 1645 my $opts = '';
5921764c 1646 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1e3baf05 1647 foreach my $o (@qemu_drive_options) {
5fc74861 1648 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
19672434 1649 }
8a267708
DC
1650
1651 # snapshot only accepts on|off
1652 if (defined($drive->{snapshot})) {
1653 my $v = $drive->{snapshot} ? 'on' : 'off';
1654 $opts .= ",snapshot=$v";
1655 }
1656
1f91f7b4
TL
1657 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1658 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
12e1d472
DC
1659 }
1660
fb8e95a2
WB
1661 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1662 my ($dir, $qmpname) = @$type;
1663 if (my $v = $drive->{"mbps$dir"}) {
1664 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1665 }
1666 if (my $v = $drive->{"mbps${dir}_max"}) {
1667 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1668 }
1669 if (my $v = $drive->{"bps${dir}_max_length"}) {
1670 $opts .= ",throttling.bps$qmpname-max-length=$v";
1671 }
1672 if (my $v = $drive->{"iops${dir}"}) {
1673 $opts .= ",throttling.iops$qmpname=$v";
1674 }
1675 if (my $v = $drive->{"iops${dir}_max"}) {
8aca1654 1676 $opts .= ",throttling.iops$qmpname-max=$v";
fb8e95a2
WB
1677 }
1678 if (my $v = $drive->{"iops${dir}_max_length"}) {
8aca1654 1679 $opts .= ",throttling.iops$qmpname-max-length=$v";
fb8e95a2
WB
1680 }
1681 }
1682
5921764c
SR
1683 if ($pbs_name) {
1684 $format = "rbd" if $is_rbd;
a183df68
TL
1685 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1686 if !$format;
5921764c
SR
1687 $opts .= ",format=alloc-track,file.driver=$format";
1688 } elsif ($format) {
1689 $opts .= ",format=$format";
1690 }
d81f0f09 1691
b7071d6c 1692 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
b2ee900e 1693
b7071d6c 1694 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
b2ee900e 1695
b2ee900e 1696 if (!$drive->{aio}) {
eec9f9fe 1697 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
6d5673c3
SR
1698 # io_uring supports all cache modes
1699 $opts .= ",aio=io_uring";
b2ee900e 1700 } else {
6d5673c3
SR
1701 # aio native works only with O_DIRECT
1702 if($cache_direct) {
1703 $opts .= ",aio=native";
1704 } else {
1705 $opts .= ",aio=threads";
1706 }
b2ee900e
WB
1707 }
1708 }
11490cf2 1709
6e47c3b4
WB
1710 if (!drive_is_cdrom($drive)) {
1711 my $detectzeroes;
7d4e30f3 1712 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
6e47c3b4
WB
1713 $detectzeroes = 'off';
1714 } elsif ($drive->{discard}) {
1715 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1716 } else {
1717 # This used to be our default with discard not being specified:
1718 $detectzeroes = 'on';
1719 }
5921764c
SR
1720
1721 # note: 'detect-zeroes' works per blockdev and we want it to persist
1722 # after the alloc-track is removed, so put it on 'file' directly
1723 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1724 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
6e47c3b4 1725 }
f1e05305 1726
5921764c
SR
1727 if ($pbs_name) {
1728 $opts .= ",backing=$pbs_name";
1729 $opts .= ",auto-remove=on";
1730 }
1731
1732 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1733 my $file_param = "file";
1734 if ($pbs_name) {
1735 # non-rbd drivers require the underlying file to be a seperate block
1736 # node, so add a second .file indirection
1737 $file_param .= ".file" if !$is_rbd;
1738 $file_param .= ".filename";
1739 }
1740 my $pathinfo = $path ? "$file_param=$path," : '';
1e3baf05 1741
3ebfcc86 1742 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
1743}
1744
5921764c
SR
1745sub print_pbs_blockdev {
1746 my ($pbs_conf, $pbs_name) = @_;
1747 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1748 $blockdev .= ",repository=$pbs_conf->{repository}";
2dda626d 1749 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
5921764c
SR
1750 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1751 $blockdev .= ",archive=$pbs_conf->{archive}";
1752 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1753 return $blockdev;
1754}
1755
cc4d6182 1756sub print_netdevice_full {
0c03a390 1757 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
cc4d6182 1758
cc4d6182
DA
1759 my $device = $net->{model};
1760 if ($net->{model} eq 'virtio') {
1761 $device = 'virtio-net-pci';
1762 };
1763
d559309f 1764 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
5e2068d2 1765 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
a9410357 1766 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
4df98f2f
TL
1767 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1768 # and out of each queue plus one config interrupt and control vector queue
a9410357
AD
1769 my $vectors = $net->{queues} * 2 + 2;
1770 $tmpstr .= ",vectors=$vectors,mq=on";
0c03a390
AD
1771 if (min_version($machine_version, 7, 1)) {
1772 $tmpstr .= ",packed=on";
1773 }
a9410357 1774 }
620d6b32
AD
1775
1776 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
089aed81 1777 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
620d6b32
AD
1778 }
1779
cc4d6182 1780 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
ba9e1000 1781
0530177b
TL
1782 if (my $mtu = $net->{mtu}) {
1783 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1784 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1785 if ($mtu == 1) {
1786 $mtu = $bridge_mtu;
1787 } elsif ($mtu < 576) {
1788 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1789 } elsif ($mtu > $bridge_mtu) {
1790 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1791 }
1792 $tmpstr .= ",host_mtu=$mtu";
1793 } else {
1794 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
61a14cde 1795 }
61a14cde
AD
1796 }
1797
ba9e1000
DM
1798 if ($use_old_bios_files) {
1799 my $romfile;
1800 if ($device eq 'virtio-net-pci') {
1801 $romfile = 'pxe-virtio.rom';
1802 } elsif ($device eq 'e1000') {
1803 $romfile = 'pxe-e1000.rom';
e83dd50a
TL
1804 } elsif ($device eq 'e1000e') {
1805 $romfile = 'pxe-e1000e.rom';
ba9e1000
DM
1806 } elsif ($device eq 'ne2k') {
1807 $romfile = 'pxe-ne2k_pci.rom';
1808 } elsif ($device eq 'pcnet') {
1809 $romfile = 'pxe-pcnet.rom';
1810 } elsif ($device eq 'rtl8139') {
1811 $romfile = 'pxe-rtl8139.rom';
1812 }
1813 $tmpstr .= ",romfile=$romfile" if $romfile;
1814 }
1815
cc4d6182
DA
1816 return $tmpstr;
1817}
1818
1819sub print_netdev_full {
d559309f 1820 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
cc4d6182
DA
1821
1822 my $i = '';
1823 if ($netid =~ m/^net(\d+)$/) {
1824 $i = int($1);
1825 }
1826
1827 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1828
1829 my $ifname = "tap${vmid}i$i";
1830
1831 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1832 die "interface name '$ifname' is too long (max 15 character)\n"
1833 if length($ifname) >= 16;
1834
1835 my $vhostparam = '';
6f0cb675 1836 if (is_native($arch)) {
db70021b 1837 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
6f0cb675 1838 }
cc4d6182
DA
1839
1840 my $vmname = $conf->{name} || "vm$vmid";
1841
a9410357 1842 my $netdev = "";
208ba94e 1843 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
a9410357 1844
cc4d6182 1845 if ($net->{bridge}) {
4df98f2f
TL
1846 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1847 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
cc4d6182 1848 } else {
a9410357 1849 $netdev = "type=user,id=$netid,hostname=$vmname";
cc4d6182 1850 }
a9410357
AD
1851
1852 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1853
1854 return $netdev;
cc4d6182 1855}
1e3baf05 1856
55655ebc
DC
1857my $vga_map = {
1858 'cirrus' => 'cirrus-vga',
1859 'std' => 'VGA',
1860 'vmware' => 'vmware-svga',
1861 'virtio' => 'virtio-vga',
6f070e39 1862 'virtio-gl' => 'virtio-vga-gl',
55655ebc
DC
1863};
1864
1865sub print_vga_device {
2ea5fb7e 1866 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
55655ebc
DC
1867
1868 my $type = $vga_map->{$vga->{type}};
86c9fafe 1869 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
d559309f
WB
1870 $type = 'virtio-gpu';
1871 }
55655ebc 1872 my $vgamem_mb = $vga->{memory};
6021c7a5
AL
1873
1874 my $max_outputs = '';
55655ebc
DC
1875 if ($qxlnum) {
1876 $type = $id ? 'qxl' : 'qxl-vga';
6021c7a5 1877
c5a4c92c 1878 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
6021c7a5 1879 # set max outputs so linux can have up to 4 qxl displays with one device
2ea5fb7e 1880 if (min_version($machine_version, 4, 1)) {
9e8976ea
TL
1881 $max_outputs = ",max_outputs=4";
1882 }
6021c7a5 1883 }
55655ebc 1884 }
6021c7a5 1885
55655ebc
DC
1886 die "no devicetype for $vga->{type}\n" if !$type;
1887
1888 my $memory = "";
1889 if ($vgamem_mb) {
6f070e39 1890 if ($vga->{type} =~ /^virtio/) {
55655ebc
DC
1891 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1892 $memory = ",max_hostmem=$bytes";
1893 } elsif ($qxlnum) {
1894 # from https://www.spice-space.org/multiple-monitors.html
1895 $memory = ",vgamem_mb=$vga->{memory}";
1896 my $ram = $vgamem_mb * 4;
1897 my $vram = $vgamem_mb * 2;
1898 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1899 } else {
1900 $memory = ",vgamem_mb=$vga->{memory}";
1901 }
1902 } elsif ($qxlnum && $id) {
1903 $memory = ",ram_size=67108864,vram_size=33554432";
1904 }
1905
789fe8e8
AL
1906 my $edidoff = "";
1907 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
b5d32c6b 1908 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
789fe8e8
AL
1909 }
1910
3392d6ca 1911 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
55655ebc
DC
1912 my $vgaid = "vga" . ($id // '');
1913 my $pciaddr;
1914 if ($q35 && $vgaid eq 'vga') {
daadd5a4 1915 # the first display uses pcie.0 bus on q35 machines
d559309f 1916 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
55655ebc 1917 } else {
d559309f 1918 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
55655ebc
DC
1919 }
1920
9f979d9f 1921 if ($vga->{type} eq 'virtio-gl') {
8884a8bf
TL
1922 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1923 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1924 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
9f979d9f 1925
8884a8bf
TL
1926 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1927 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
9f979d9f
SI
1928 }
1929
789fe8e8 1930 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
55655ebc
DC
1931}
1932
ffc0d8c7
WB
1933sub parse_number_sets {
1934 my ($set) = @_;
1935 my $res = [];
1936 foreach my $part (split(/;/, $set)) {
1937 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1938 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1939 push @$res, [ $1, $2 ];
2ed5d572 1940 } else {
ffc0d8c7 1941 die "invalid range: $part\n";
2ed5d572
AD
1942 }
1943 }
ffc0d8c7
WB
1944 return $res;
1945}
2ed5d572 1946
ffc0d8c7
WB
1947sub parse_numa {
1948 my ($data) = @_;
1949
4df98f2f 1950 my $res = parse_property_string($numa_fmt, $data);
ffc0d8c7
WB
1951 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1952 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
2ed5d572
AD
1953 return $res;
1954}
1955
1e3baf05
DM
1956# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1957sub parse_net {
4ddd2ca2 1958 my ($data, $disable_mac_autogen) = @_;
1e3baf05 1959
4df98f2f 1960 my $res = eval { parse_property_string($net_fmt, $data) };
cd9c34d1
WB
1961 if ($@) {
1962 warn $@;
d1c1af4b 1963 return;
1e3baf05 1964 }
4ddd2ca2 1965 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
b5b99790
WB
1966 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1967 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1968 }
0c9a7596
AD
1969 return $res;
1970}
1971
1972# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1973sub parse_ipconfig {
1974 my ($data) = @_;
1975
4df98f2f 1976 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
0c9a7596
AD
1977 if ($@) {
1978 warn $@;
d1c1af4b 1979 return;
0c9a7596
AD
1980 }
1981
1982 if ($res->{gw} && !$res->{ip}) {
1983 warn 'gateway specified without specifying an IP address';
d1c1af4b 1984 return;
0c9a7596
AD
1985 }
1986 if ($res->{gw6} && !$res->{ip6}) {
1987 warn 'IPv6 gateway specified without specifying an IPv6 address';
d1c1af4b 1988 return;
0c9a7596
AD
1989 }
1990 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1991 warn 'gateway specified together with DHCP';
d1c1af4b 1992 return;
0c9a7596
AD
1993 }
1994 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1995 # gw6 + auto/dhcp
1996 warn "IPv6 gateway specified together with $res->{ip6} address";
d1c1af4b 1997 return;
0c9a7596
AD
1998 }
1999
2000 if (!$res->{ip} && !$res->{ip6}) {
2001 return { ip => 'dhcp', ip6 => 'dhcp' };
2002 }
2003
1e3baf05
DM
2004 return $res;
2005}
2006
2007sub print_net {
2008 my $net = shift;
2009
cd9c34d1 2010 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1e3baf05
DM
2011}
2012
2013sub add_random_macs {
2014 my ($settings) = @_;
2015
2016 foreach my $opt (keys %$settings) {
2017 next if $opt !~ m/^net(\d+)$/;
2018 my $net = parse_net($settings->{$opt});
2019 next if !$net;
2020 $settings->{$opt} = print_net($net);
2021 }
2022}
2023
055d554d
DM
2024sub vm_is_volid_owner {
2025 my ($storecfg, $vmid, $volid) = @_;
2026
2027 if ($volid !~ m|^/|) {
2028 my ($path, $owner);
2029 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2030 if ($owner && ($owner == $vmid)) {
2031 return 1;
2032 }
2033 }
2034
d1c1af4b 2035 return;
055d554d
DM
2036}
2037
055d554d
DM
2038sub vmconfig_register_unused_drive {
2039 my ($storecfg, $vmid, $conf, $drive) = @_;
2040
2d9ddec5
WB
2041 if (drive_is_cloudinit($drive)) {
2042 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2043 warn $@ if $@;
95a5135d 2044 delete $conf->{cloudinit};
2d9ddec5 2045 } elsif (!drive_is_cdrom($drive)) {
055d554d
DM
2046 my $volid = $drive->{file};
2047 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
8793d495 2048 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
055d554d
DM
2049 }
2050 }
2051}
2052
1f30ac3a 2053# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
ff6ffe20 2054my $smbios1_fmt = {
bd27e851
WB
2055 uuid => {
2056 type => 'string',
2057 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2058 format_description => 'UUID',
52261945 2059 description => "Set SMBIOS1 UUID.",
bd27e851
WB
2060 optional => 1,
2061 },
2062 version => {
2063 type => 'string',
1f30ac3a
CE
2064 pattern => '[A-Za-z0-9+\/]+={0,2}',
2065 format_description => 'Base64 encoded string',
52261945 2066 description => "Set SMBIOS1 version.",
bd27e851
WB
2067 optional => 1,
2068 },
2069 serial => {
2070 type => 'string',
1f30ac3a
CE
2071 pattern => '[A-Za-z0-9+\/]+={0,2}',
2072 format_description => 'Base64 encoded string',
52261945 2073 description => "Set SMBIOS1 serial number.",
bd27e851
WB
2074 optional => 1,
2075 },
2076 manufacturer => {
2077 type => 'string',
1f30ac3a
CE
2078 pattern => '[A-Za-z0-9+\/]+={0,2}',
2079 format_description => 'Base64 encoded string',
52261945 2080 description => "Set SMBIOS1 manufacturer.",
bd27e851
WB
2081 optional => 1,
2082 },
2083 product => {
2084 type => 'string',
1f30ac3a
CE
2085 pattern => '[A-Za-z0-9+\/]+={0,2}',
2086 format_description => 'Base64 encoded string',
52261945 2087 description => "Set SMBIOS1 product ID.",
bd27e851
WB
2088 optional => 1,
2089 },
2090 sku => {
2091 type => 'string',
1f30ac3a
CE
2092 pattern => '[A-Za-z0-9+\/]+={0,2}',
2093 format_description => 'Base64 encoded string',
52261945 2094 description => "Set SMBIOS1 SKU string.",
bd27e851
WB
2095 optional => 1,
2096 },
2097 family => {
2098 type => 'string',
1f30ac3a
CE
2099 pattern => '[A-Za-z0-9+\/]+={0,2}',
2100 format_description => 'Base64 encoded string',
52261945 2101 description => "Set SMBIOS1 family string.",
bd27e851
WB
2102 optional => 1,
2103 },
1f30ac3a
CE
2104 base64 => {
2105 type => 'boolean',
2106 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2107 optional => 1,
2108 },
2796e7d5
DM
2109};
2110
2796e7d5
DM
2111sub parse_smbios1 {
2112 my ($data) = @_;
2113
4df98f2f 2114 my $res = eval { parse_property_string($smbios1_fmt, $data) };
bd27e851 2115 warn $@ if $@;
2796e7d5
DM
2116 return $res;
2117}
2118
cd11416f
DM
2119sub print_smbios1 {
2120 my ($smbios1) = @_;
ff6ffe20 2121 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
cd11416f
DM
2122}
2123
ff6ffe20 2124PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2796e7d5 2125
0ea9541d
DM
2126sub parse_watchdog {
2127 my ($value) = @_;
2128
d1c1af4b 2129 return if !$value;
0ea9541d 2130
4df98f2f 2131 my $res = eval { parse_property_string($watchdog_fmt, $value) };
ec3582b5 2132 warn $@ if $@;
0ea9541d
DM
2133 return $res;
2134}
2135
9d66b397 2136sub parse_guest_agent {
a2af1bbe 2137 my ($conf) = @_;
9d66b397 2138
a2af1bbe 2139 return {} if !defined($conf->{agent});
9d66b397 2140
a2af1bbe 2141 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
9d66b397
SI
2142 warn $@ if $@;
2143
2144 # if the agent is disabled ignore the other potentially set properties
2145 return {} if !$res->{enabled};
2146 return $res;
2147}
2148
a2af1bbe
TL
2149sub get_qga_key {
2150 my ($conf, $key) = @_;
2151 return undef if !defined($conf->{agent});
2152
2153 my $agent = parse_guest_agent($conf);
2154 return $agent->{$key};
2155}
2156
55655ebc
DC
2157sub parse_vga {
2158 my ($value) = @_;
2159
2160 return {} if !$value;
4df98f2f 2161 my $res = eval { parse_property_string($vga_fmt, $value) };
55655ebc
DC
2162 warn $@ if $@;
2163 return $res;
2164}
2165
2cf61f33
SR
2166sub parse_rng {
2167 my ($value) = @_;
2168
d1c1af4b 2169 return if !$value;
2cf61f33 2170
4df98f2f 2171 my $res = eval { parse_property_string($rng_fmt, $value) };
2cf61f33
SR
2172 warn $@ if $@;
2173 return $res;
2174}
2175
26b443c8
TL
2176sub parse_meta_info {
2177 my ($value) = @_;
2178
2179 return if !$value;
2180
2181 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2182 warn $@ if $@;
2183 return $res;
2184}
2185
2186sub new_meta_info_string {
2187 my () = @_; # for now do not allow to override any value
2188
2189 return PVE::JSONSchema::print_property_string(
2190 {
af2a1a1c 2191 'creation-qemu' => kvm_user_version(),
26b443c8
TL
2192 ctime => "". int(time()),
2193 },
2194 $meta_info_fmt
2195 );
2196}
2197
cc181036
TL
2198sub qemu_created_version_fixups {
2199 my ($conf, $forcemachine, $kvmver) = @_;
2200
2201 my $meta = parse_meta_info($conf->{meta}) // {};
2202 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2203
2204 # check if we need to apply some handling for VMs that always use the latest machine version but
2205 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2206 # would be required (we do not want to pin machine version for non-windows OS type)
2207 if (
2208 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2209 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2210 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2211 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2212 ) {
2213 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2214 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2215 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2216 # and thus with the predictable interface naming of systemd
2217 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2218 }
2219 }
2220 return;
2221}
2222
1e3baf05
DM
2223# add JSON properties for create and set function
2224sub json_config_properties {
c1accf9d 2225 my ($prop, $with_disk_alloc) = @_;
1e3baf05 2226
5a08fb9c
TL
2227 my $skip_json_config_opts = {
2228 parent => 1,
2229 snaptime => 1,
2230 vmstate => 1,
2231 runningmachine => 1,
2232 runningcpu => 1,
26b443c8 2233 meta => 1,
5a08fb9c
TL
2234 };
2235
1e3baf05 2236 foreach my $opt (keys %$confdesc) {
5a08fb9c 2237 next if $skip_json_config_opts->{$opt};
c1accf9d
FE
2238
2239 if ($with_disk_alloc && is_valid_drivename($opt)) {
2240 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2241 } else {
2242 $prop->{$opt} = $confdesc->{$opt};
2243 }
1e3baf05
DM
2244 }
2245
2246 return $prop;
2247}
2248
39051ac0
FE
2249# Properties that we can read from an OVF file
2250sub json_ovf_properties {
2251 my $prop = {};
2252
2253 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2254 $prop->{$device} = {
2255 type => 'string',
2256 format => 'pve-volume-id-or-absolute-path',
2257 description => "Disk image that gets imported to $device",
2258 optional => 1,
2259 };
2260 }
2261
2262 $prop->{cores} = {
2263 type => 'integer',
2264 description => "The number of CPU cores.",
2265 optional => 1,
2266 };
2267 $prop->{memory} = {
2268 type => 'integer',
2269 description => "Amount of RAM for the VM in MB.",
2270 optional => 1,
2271 };
2272 $prop->{name} = {
2273 type => 'string',
2274 description => "Name of the VM.",
2275 optional => 1,
2276 };
2277
2278 return $prop;
2279}
2280
d41121fd
DM
2281# return copy of $confdesc_cloudinit to generate documentation
2282sub cloudinit_config_properties {
2283
2284 return dclone($confdesc_cloudinit);
2285}
2286
f16cf6c3
WB
2287sub cloudinit_pending_properties {
2288 my $p = {
2289 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2290 name => 1,
2291 };
2292 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2293 return $p;
2294}
2295
1e3baf05
DM
2296sub check_type {
2297 my ($key, $value) = @_;
2298
2299 die "unknown setting '$key'\n" if !$confdesc->{$key};
2300
2301 my $type = $confdesc->{$key}->{type};
2302
6b64503e 2303 if (!defined($value)) {
1e3baf05
DM
2304 die "got undefined value\n";
2305 }
2306
2307 if ($value =~ m/[\n\r]/) {
2308 die "property contains a line feed\n";
2309 }
2310
2311 if ($type eq 'boolean') {
19672434
DM
2312 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2313 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2314 die "type check ('boolean') failed - got '$value'\n";
1e3baf05
DM
2315 } elsif ($type eq 'integer') {
2316 return int($1) if $value =~ m/^(\d+)$/;
2317 die "type check ('integer') failed - got '$value'\n";
04432191
AD
2318 } elsif ($type eq 'number') {
2319 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2320 die "type check ('number') failed - got '$value'\n";
1e3baf05
DM
2321 } elsif ($type eq 'string') {
2322 if (my $fmt = $confdesc->{$key}->{format}) {
1e3baf05 2323 PVE::JSONSchema::check_format($fmt, $value);
19672434
DM
2324 return $value;
2325 }
1e3baf05 2326 $value =~ s/^\"(.*)\"$/$1/;
19672434 2327 return $value;
1e3baf05
DM
2328 } else {
2329 die "internal error"
2330 }
2331}
2332
1e3baf05 2333sub destroy_vm {
75854662 2334 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
1e3baf05 2335
ffda963f 2336 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2337
30fdf99c
SH
2338 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2339 PVE::QemuConfig->check_lock($conf);
2340 }
1e3baf05 2341
5e67a2d2
DC
2342 if ($conf->{template}) {
2343 # check if any base image is still used by a linked clone
3ab0f925 2344 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
5e67a2d2 2345 my ($ds, $drive) = @_;
5e67a2d2
DC
2346 return if drive_is_cdrom($drive);
2347
2348 my $volid = $drive->{file};
5e67a2d2
DC
2349 return if !$volid || $volid =~ m|^/|;
2350
2351 die "base volume '$volid' is still in use by linked cloned\n"
2352 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2353
2354 });
2355 }
2356
3e07c6d5 2357 my $volids = {};
ba1a1984 2358 my $remove_owned_drive = sub {
1e3baf05 2359 my ($ds, $drive) = @_;
9c52f5ed 2360 return if drive_is_cdrom($drive, 1);
1e3baf05
DM
2361
2362 my $volid = $drive->{file};
ff1a2432 2363 return if !$volid || $volid =~ m|^/|;
3e07c6d5 2364 return if $volids->{$volid};
1e3baf05 2365
6b64503e 2366 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
ff1a2432 2367 return if !$path || !$owner || ($owner != $vmid);
1e3baf05 2368
3e07c6d5 2369 $volids->{$volid} = 1;
a2f50f01 2370 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
31b52247 2371 warn "Could not remove disk '$volid', check manually: $@" if $@;
ba1a1984
FE
2372 };
2373
2374 # only remove disks owned by this VM (referenced in the config)
2375 my $include_opts = {
2376 include_unused => 1,
2377 extra_keys => ['vmstate'],
2378 };
2379 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2380
2381 for my $snap (values %{$conf->{snapshots}}) {
2382 next if !defined($snap->{vmstate});
2383 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2384 next if !defined($drive);
2385 $remove_owned_drive->('vmstate', $drive);
2386 }
19672434 2387
3e07c6d5
FG
2388 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2389
75854662 2390 if ($purge_unreferenced) { # also remove unreferenced disk
d0ff75d9 2391 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
75854662
TL
2392 PVE::Storage::foreach_volid($vmdisks, sub {
2393 my ($volid, $sid, $volname, $d) = @_;
2394 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2395 warn $@ if $@;
2396 });
2397 }
dfda979e 2398
b04ea584 2399 if (defined $replacement_conf) {
8baf8bc7 2400 PVE::QemuConfig->write_config($vmid, $replacement_conf);
dfda979e
DJ
2401 } else {
2402 PVE::QemuConfig->destroy_config($vmid);
2403 }
1e3baf05
DM
2404}
2405
1e3baf05 2406sub parse_vm_config {
ad5812d8 2407 my ($filename, $raw, $strict) = @_;
1e3baf05 2408
d1c1af4b 2409 return if !defined($raw);
1e3baf05 2410
554ac7e7 2411 my $res = {
fc1ddcdc 2412 digest => Digest::SHA::sha1_hex($raw),
0d18dcfc 2413 snapshots => {},
0d732d16 2414 pending => {},
95a5135d 2415 cloudinit => {},
554ac7e7 2416 };
1e3baf05 2417
ad5812d8
FG
2418 my $handle_error = sub {
2419 my ($msg) = @_;
2420
2421 if ($strict) {
2422 die $msg;
2423 } else {
2424 warn $msg;
2425 }
2426 };
2427
19672434 2428 $filename =~ m|/qemu-server/(\d+)\.conf$|
1e3baf05
DM
2429 || die "got strange filename '$filename'";
2430
2431 my $vmid = $1;
2432
0d18dcfc 2433 my $conf = $res;
b0ec896e 2434 my $descr;
cbfc9d75
TL
2435 my $finish_description = sub {
2436 if (defined($descr)) {
2437 $descr =~ s/\s+$//;
2438 $conf->{description} = $descr;
2439 }
2440 $descr = undef;
2441 };
e297c490 2442 my $section = '';
0581fe4f 2443
0d18dcfc
DM
2444 my @lines = split(/\n/, $raw);
2445 foreach my $line (@lines) {
1e3baf05 2446 next if $line =~ m/^\s*$/;
be190583 2447
eab09f4e 2448 if ($line =~ m/^\[PENDING\]\s*$/i) {
e297c490 2449 $section = 'pending';
cbfc9d75 2450 $finish_description->();
e297c490 2451 $conf = $res->{$section} = {};
eab09f4e 2452 next;
95a5135d
AD
2453 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2454 $section = 'cloudinit';
eb9923f9 2455 $finish_description->();
95a5135d
AD
2456 $conf = $res->{$section} = {};
2457 next;
eab09f4e 2458
0d732d16 2459 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
e297c490 2460 $section = $1;
cbfc9d75 2461 $finish_description->();
e297c490 2462 $conf = $res->{snapshots}->{$section} = {};
0d18dcfc
DM
2463 next;
2464 }
1e3baf05 2465
d1e7b922 2466 if ($line =~ m/^\#(.*)$/) {
b0ec896e 2467 $descr = '' if !defined($descr);
0581fe4f
DM
2468 $descr .= PVE::Tools::decode_text($1) . "\n";
2469 next;
2470 }
2471
1e3baf05 2472 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
b0ec896e 2473 $descr = '' if !defined($descr);
0581fe4f 2474 $descr .= PVE::Tools::decode_text($2);
0d18dcfc
DM
2475 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2476 $conf->{snapstate} = $1;
1e3baf05
DM
2477 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2478 my $key = $1;
2479 my $value = $2;
0d18dcfc 2480 $conf->{$key} = $value;
ef824322 2481 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
e297c490 2482 my $value = $1;
ef824322
DM
2483 if ($section eq 'pending') {
2484 $conf->{delete} = $value; # we parse this later
2485 } else {
ad5812d8 2486 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
eab09f4e 2487 }
15cf7698 2488 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
1e3baf05
DM
2489 my $key = $1;
2490 my $value = $2;
c229961a
TL
2491 if ($section eq 'cloudinit') {
2492 # ignore validation only used for informative purpose
2493 $conf->{$key} = $value;
2494 next;
2495 }
1e3baf05
DM
2496 eval { $value = check_type($key, $value); };
2497 if ($@) {
ad5812d8 2498 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
1e3baf05 2499 } else {
b799312f 2500 $key = 'ide2' if $key eq 'cdrom';
1e3baf05 2501 my $fmt = $confdesc->{$key}->{format};
b799312f 2502 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
1e3baf05
DM
2503 my $v = parse_drive($key, $value);
2504 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2505 $v->{file} = $volid;
71c58bb7 2506 $value = print_drive($v);
1e3baf05 2507 } else {
ad5812d8 2508 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
1e3baf05
DM
2509 next;
2510 }
2511 }
2512
b799312f 2513 $conf->{$key} = $value;
1e3baf05 2514 }
f8d2a1ce 2515 } else {
ad5812d8 2516 $handle_error->("vm $vmid - unable to parse config: $line\n");
1e3baf05
DM
2517 }
2518 }
2519
cbfc9d75 2520 $finish_description->();
0d18dcfc 2521 delete $res->{snapstate}; # just to be sure
1e3baf05
DM
2522
2523 return $res;
2524}
2525
1858638f
DM
2526sub write_vm_config {
2527 my ($filename, $conf) = @_;
1e3baf05 2528
0d18dcfc
DM
2529 delete $conf->{snapstate}; # just to be sure
2530
1858638f
DM
2531 if ($conf->{cdrom}) {
2532 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2533 $conf->{ide2} = $conf->{cdrom};
2534 delete $conf->{cdrom};
2535 }
1e3baf05
DM
2536
2537 # we do not use 'smp' any longer
1858638f
DM
2538 if ($conf->{sockets}) {
2539 delete $conf->{smp};
2540 } elsif ($conf->{smp}) {
2541 $conf->{sockets} = $conf->{smp};
2542 delete $conf->{cores};
2543 delete $conf->{smp};
1e3baf05
DM
2544 }
2545
ee2f90b1 2546 my $used_volids = {};
0d18dcfc 2547
ee2f90b1 2548 my $cleanup_config = sub {
ef824322 2549 my ($cref, $pending, $snapname) = @_;
1858638f 2550
ee2f90b1
DM
2551 foreach my $key (keys %$cref) {
2552 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
95a5135d 2553 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
ee2f90b1 2554 my $value = $cref->{$key};
ef824322
DM
2555 if ($key eq 'delete') {
2556 die "propertry 'delete' is only allowed in [PENDING]\n"
2557 if !$pending;
2558 # fixme: check syntax?
2559 next;
2560 }
ee2f90b1
DM
2561 eval { $value = check_type($key, $value); };
2562 die "unable to parse value of '$key' - $@" if $@;
1858638f 2563
ee2f90b1
DM
2564 $cref->{$key} = $value;
2565
74479ee9 2566 if (!$snapname && is_valid_drivename($key)) {
ed221350 2567 my $drive = parse_drive($key, $value);
ee2f90b1
DM
2568 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2569 }
1e3baf05 2570 }
ee2f90b1
DM
2571 };
2572
2573 &$cleanup_config($conf);
ef824322
DM
2574
2575 &$cleanup_config($conf->{pending}, 1);
2576
ee2f90b1 2577 foreach my $snapname (keys %{$conf->{snapshots}}) {
15c6e277 2578 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
ef824322 2579 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
1e3baf05
DM
2580 }
2581
1858638f
DM
2582 # remove 'unusedX' settings if we re-add a volume
2583 foreach my $key (keys %$conf) {
2584 my $value = $conf->{$key};
ee2f90b1 2585 if ($key =~ m/^unused/ && $used_volids->{$value}) {
1858638f 2586 delete $conf->{$key};
1e3baf05 2587 }
1858638f 2588 }
be190583 2589
0d18dcfc 2590 my $generate_raw_config = sub {
b0ec896e 2591 my ($conf, $pending) = @_;
0581fe4f 2592
0d18dcfc
DM
2593 my $raw = '';
2594
2595 # add description as comment to top of file
b0ec896e
DM
2596 if (defined(my $descr = $conf->{description})) {
2597 if ($descr) {
2598 foreach my $cl (split(/\n/, $descr)) {
2599 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2600 }
2601 } else {
2602 $raw .= "#\n" if $pending;
2603 }
0d18dcfc
DM
2604 }
2605
2606 foreach my $key (sort keys %$conf) {
95a5135d 2607 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
0d18dcfc
DM
2608 $raw .= "$key: $conf->{$key}\n";
2609 }
2610 return $raw;
2611 };
0581fe4f 2612
0d18dcfc 2613 my $raw = &$generate_raw_config($conf);
ef824322
DM
2614
2615 if (scalar(keys %{$conf->{pending}})){
2616 $raw .= "\n[PENDING]\n";
b0ec896e 2617 $raw .= &$generate_raw_config($conf->{pending}, 1);
ef824322
DM
2618 }
2619
1e1d6f58 2620 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
95a5135d
AD
2621 $raw .= "\n[special:cloudinit]\n";
2622 $raw .= &$generate_raw_config($conf->{cloudinit});
2623 }
2624
0d18dcfc
DM
2625 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2626 $raw .= "\n[$snapname]\n";
2627 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
1858638f 2628 }
1e3baf05 2629
1858638f
DM
2630 return $raw;
2631}
1e3baf05 2632
19672434 2633sub load_defaults {
1e3baf05
DM
2634
2635 my $res = {};
2636
2637 # we use static defaults from our JSON schema configuration
2638 foreach my $key (keys %$confdesc) {
2639 if (defined(my $default = $confdesc->{$key}->{default})) {
2640 $res->{$key} = $default;
2641 }
2642 }
19672434 2643
1e3baf05
DM
2644 return $res;
2645}
2646
2647sub config_list {
2648 my $vmlist = PVE::Cluster::get_vmlist();
2649 my $res = {};
2650 return $res if !$vmlist || !$vmlist->{ids};
2651 my $ids = $vmlist->{ids};
38277afc 2652 my $nodename = nodename();
1e3baf05 2653
1e3baf05
DM
2654 foreach my $vmid (keys %$ids) {
2655 my $d = $ids->{$vmid};
2656 next if !$d->{node} || $d->{node} ne $nodename;
5ee957cc 2657 next if !$d->{type} || $d->{type} ne 'qemu';
1e3baf05
DM
2658 $res->{$vmid}->{exists} = 1;
2659 }
2660 return $res;
2661}
2662
64e13401
DM
2663# test if VM uses local resources (to prevent migration)
2664sub check_local_resources {
2665 my ($conf, $noerr) = @_;
2666
ca6abacf 2667 my @loc_res = ();
a52eb3c4
DC
2668 my $mapped_res = [];
2669
2670 my $nodelist = PVE::Cluster::get_nodelist();
2671 my $pci_map = PVE::Mapping::PCI::config();
2672 my $usb_map = PVE::Mapping::USB::config();
2673
2674 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2675
2676 my $add_missing_mapping = sub {
2677 my ($type, $key, $id) = @_;
2678 for my $node (@$nodelist) {
2679 my $entry;
2680 if ($type eq 'pci') {
2681 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2682 } elsif ($type eq 'usb') {
2683 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2684 }
2685 if (!scalar($entry->@*)) {
2686 push @{$missing_mappings_by_node->{$node}}, $key;
2687 }
2688 }
2689 };
19672434 2690
ca6abacf
TM
2691 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2692 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
64e13401 2693
ca6abacf 2694 push @loc_res, "ivshmem" if $conf->{ivshmem};
6dbcb073 2695
0d29ab3b 2696 foreach my $k (keys %$conf) {
a52eb3c4
DC
2697 if ($k =~ m/^usb/) {
2698 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2699 next if $entry->{host} =~ m/^spice$/i;
2700 if ($entry->{mapping}) {
2701 $add_missing_mapping->('usb', $k, $entry->{mapping});
2702 push @$mapped_res, $k;
2703 }
2704 }
2705 if ($k =~ m/^hostpci/) {
2706 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2707 if ($entry->{mapping}) {
2708 $add_missing_mapping->('pci', $k, $entry->{mapping});
2709 push @$mapped_res, $k;
2710 }
2711 }
d44712fc
EK
2712 # sockets are safe: they will recreated be on the target side post-migrate
2713 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
ca6abacf 2714 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
64e13401
DM
2715 }
2716
ca6abacf 2717 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
64e13401 2718
a52eb3c4 2719 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
64e13401
DM
2720}
2721
719893a9 2722# check if used storages are available on all nodes (use by migrate)
47152e2e
DM
2723sub check_storage_availability {
2724 my ($storecfg, $conf, $node) = @_;
2725
912792e2 2726 PVE::QemuConfig->foreach_volume($conf, sub {
47152e2e
DM
2727 my ($ds, $drive) = @_;
2728
2729 my $volid = $drive->{file};
2730 return if !$volid;
2731
2732 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2733 return if !$sid;
2734
2735 # check if storage is available on both nodes
0d2db084
FE
2736 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2737 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
24b84b47 2738
3148f0b0
TL
2739 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2740
2741 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2742 if !$scfg->{content}->{$vtype};
47152e2e
DM
2743 });
2744}
2745
719893a9
DM
2746# list nodes where all VM images are available (used by has_feature API)
2747sub shared_nodes {
2748 my ($conf, $storecfg) = @_;
2749
2750 my $nodelist = PVE::Cluster::get_nodelist();
2751 my $nodehash = { map { $_ => 1 } @$nodelist };
38277afc 2752 my $nodename = nodename();
be190583 2753
912792e2 2754 PVE::QemuConfig->foreach_volume($conf, sub {
719893a9
DM
2755 my ($ds, $drive) = @_;
2756
2757 my $volid = $drive->{file};
2758 return if !$volid;
2759
2760 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2761 if ($storeid) {
2762 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2763 if ($scfg->{disable}) {
2764 $nodehash = {};
2765 } elsif (my $avail = $scfg->{nodes}) {
2766 foreach my $node (keys %$nodehash) {
2767 delete $nodehash->{$node} if !$avail->{$node};
2768 }
2769 } elsif (!$scfg->{shared}) {
2770 foreach my $node (keys %$nodehash) {
2771 delete $nodehash->{$node} if $node ne $nodename
2772 }
2773 }
2774 }
2775 });
2776
2777 return $nodehash
2778}
2779
f25852c2
TM
2780sub check_local_storage_availability {
2781 my ($conf, $storecfg) = @_;
2782
2783 my $nodelist = PVE::Cluster::get_nodelist();
2784 my $nodehash = { map { $_ => {} } @$nodelist };
2785
912792e2 2786 PVE::QemuConfig->foreach_volume($conf, sub {
f25852c2
TM
2787 my ($ds, $drive) = @_;
2788
2789 my $volid = $drive->{file};
2790 return if !$volid;
2791
2792 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2793 if ($storeid) {
2794 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2795
2796 if ($scfg->{disable}) {
2797 foreach my $node (keys %$nodehash) {
32075a2c 2798 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2799 }
2800 } elsif (my $avail = $scfg->{nodes}) {
2801 foreach my $node (keys %$nodehash) {
2802 if (!$avail->{$node}) {
32075a2c 2803 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2804 }
2805 }
2806 }
2807 }
2808 });
2809
32075a2c
TL
2810 foreach my $node (values %$nodehash) {
2811 if (my $unavail = $node->{unavailable_storages}) {
2812 $node->{unavailable_storages} = [ sort keys %$unavail ];
2813 }
2814 }
2815
f25852c2
TM
2816 return $nodehash
2817}
2818
babf613a 2819# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
1e3baf05 2820sub check_running {
7e8dcf2c 2821 my ($vmid, $nocheck, $node) = @_;
1e3baf05 2822
a20dc58a
FG
2823 # $nocheck is set when called during a migration, in which case the config
2824 # file might still or already reside on the *other* node
2825 # - because rename has already happened, and current node is source
2826 # - because rename hasn't happened yet, and current node is target
2827 # - because rename has happened, current node is target, but hasn't yet
2828 # processed it yet
babf613a
SR
2829 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2830 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
1e3baf05
DM
2831}
2832
2833sub vzlist {
19672434 2834
1e3baf05
DM
2835 my $vzlist = config_list();
2836
d036e418 2837 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
1e3baf05 2838
19672434 2839 while (defined(my $de = $fd->read)) {
1e3baf05
DM
2840 next if $de !~ m/^(\d+)\.pid$/;
2841 my $vmid = $1;
6b64503e
DM
2842 next if !defined($vzlist->{$vmid});
2843 if (my $pid = check_running($vmid)) {
1e3baf05
DM
2844 $vzlist->{$vmid}->{pid} = $pid;
2845 }
2846 }
2847
2848 return $vzlist;
2849}
2850
b1a70cab
DM
2851our $vmstatus_return_properties = {
2852 vmid => get_standard_option('pve-vmid'),
2853 status => {
7bd9abd2 2854 description => "QEMU process status.",
b1a70cab
DM
2855 type => 'string',
2856 enum => ['stopped', 'running'],
2857 },
2858 maxmem => {
2859 description => "Maximum memory in bytes.",
2860 type => 'integer',
2861 optional => 1,
2862 renderer => 'bytes',
2863 },
2864 maxdisk => {
2865 description => "Root disk size in bytes.",
2866 type => 'integer',
2867 optional => 1,
2868 renderer => 'bytes',
2869 },
2870 name => {
2871 description => "VM name.",
2872 type => 'string',
2873 optional => 1,
2874 },
2875 qmpstatus => {
58542139 2876 description => "VM run state from the 'query-status' QMP monitor command.",
b1a70cab
DM
2877 type => 'string',
2878 optional => 1,
2879 },
2880 pid => {
2881 description => "PID of running qemu process.",
2882 type => 'integer',
2883 optional => 1,
2884 },
2885 uptime => {
2886 description => "Uptime.",
2887 type => 'integer',
2888 optional => 1,
2889 renderer => 'duration',
2890 },
2891 cpus => {
2892 description => "Maximum usable CPUs.",
2893 type => 'number',
2894 optional => 1,
2895 },
e6ed61b4 2896 lock => {
11efdfa5 2897 description => "The current config lock, if any.",
e6ed61b4
DC
2898 type => 'string',
2899 optional => 1,
b8e7068a
DC
2900 },
2901 tags => {
2902 description => "The current configured tags, if any",
2903 type => 'string',
2904 optional => 1,
2905 },
949112c3
FE
2906 'running-machine' => {
2907 description => "The currently running machine type (if running).",
2908 type => 'string',
2909 optional => 1,
2910 },
2911 'running-qemu' => {
2912 description => "The currently running QEMU version (if running).",
2913 type => 'string',
2914 optional => 1,
2915 },
b1a70cab
DM
2916};
2917
1e3baf05
DM
2918my $last_proc_pid_stat;
2919
03a33f30
DM
2920# get VM status information
2921# This must be fast and should not block ($full == false)
2922# We only query KVM using QMP if $full == true (this can be slow)
1e3baf05 2923sub vmstatus {
03a33f30 2924 my ($opt_vmid, $full) = @_;
1e3baf05
DM
2925
2926 my $res = {};
2927
19672434 2928 my $storecfg = PVE::Storage::config();
1e3baf05
DM
2929
2930 my $list = vzlist();
3618ee99
EK
2931 my $defaults = load_defaults();
2932
694fcad4 2933 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1e3baf05 2934
ae4915a2
DM
2935 my $cpucount = $cpuinfo->{cpus} || 1;
2936
1e3baf05
DM
2937 foreach my $vmid (keys %$list) {
2938 next if $opt_vmid && ($vmid ne $opt_vmid);
2939
9f78b695 2940 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2941
ad2cad72 2942 my $d = { vmid => int($vmid) };
8a0addab 2943 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
1e3baf05
DM
2944
2945 # fixme: better status?
2946 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2947
776c5f50 2948 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
af990afe
DM
2949 if (defined($size)) {
2950 $d->{disk} = 0; # no info available
1e3baf05
DM
2951 $d->{maxdisk} = $size;
2952 } else {
2953 $d->{disk} = 0;
2954 $d->{maxdisk} = 0;
2955 }
2956
3618ee99
EK
2957 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2958 * ($conf->{cores} || $defaults->{cores});
ae4915a2 2959 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
d7c8364b 2960 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
ae4915a2 2961
1e3baf05 2962 $d->{name} = $conf->{name} || "VM $vmid";
3618ee99
EK
2963 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2964 : $defaults->{memory}*(1024*1024);
1e3baf05 2965
8b1accf7 2966 if ($conf->{balloon}) {
4bdb0514 2967 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
3618ee99
EK
2968 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2969 : $defaults->{shares};
8b1accf7
DM
2970 }
2971
1e3baf05
DM
2972 $d->{uptime} = 0;
2973 $d->{cpu} = 0;
1e3baf05
DM
2974 $d->{mem} = 0;
2975
2976 $d->{netout} = 0;
2977 $d->{netin} = 0;
2978
2979 $d->{diskread} = 0;
2980 $d->{diskwrite} = 0;
2981
75a2a423 2982 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
4d8c851b 2983
8107b378 2984 $d->{serial} = 1 if conf_has_serial($conf);
e6ed61b4 2985 $d->{lock} = $conf->{lock} if $conf->{lock};
b8e7068a 2986 $d->{tags} = $conf->{tags} if defined($conf->{tags});
8107b378 2987
1e3baf05
DM
2988 $res->{$vmid} = $d;
2989 }
2990
2991 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2992 foreach my $dev (keys %$netdev) {
2993 next if $dev !~ m/^tap([1-9]\d*)i/;
2994 my $vmid = $1;
2995 my $d = $res->{$vmid};
2996 next if !$d;
19672434 2997
1e3baf05
DM
2998 $d->{netout} += $netdev->{$dev}->{receive};
2999 $d->{netin} += $netdev->{$dev}->{transmit};
604ea644
AD
3000
3001 if ($full) {
ad2cad72
FE
3002 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
3003 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
604ea644
AD
3004 }
3005
1e3baf05
DM
3006 }
3007
1e3baf05
DM
3008 my $ctime = gettimeofday;
3009
3010 foreach my $vmid (keys %$list) {
3011
3012 my $d = $res->{$vmid};
3013 my $pid = $d->{pid};
3014 next if !$pid;
3015
694fcad4
DM
3016 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3017 next if !$pstat; # not running
19672434 3018
694fcad4 3019 my $used = $pstat->{utime} + $pstat->{stime};
1e3baf05 3020
694fcad4 3021 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1e3baf05 3022
694fcad4 3023 if ($pstat->{vsize}) {
6b64503e 3024 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1e3baf05
DM
3025 }
3026
3027 my $old = $last_proc_pid_stat->{$pid};
3028 if (!$old) {
19672434
DM
3029 $last_proc_pid_stat->{$pid} = {
3030 time => $ctime,
1e3baf05
DM
3031 used => $used,
3032 cpu => 0,
1e3baf05
DM
3033 };
3034 next;
3035 }
3036
7f0b5beb 3037 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1e3baf05
DM
3038
3039 if ($dtime > 1000) {
3040 my $dutime = $used - $old->{used};
3041
ae4915a2 3042 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
1e3baf05 3043 $last_proc_pid_stat->{$pid} = {
19672434 3044 time => $ctime,
1e3baf05
DM
3045 used => $used,
3046 cpu => $d->{cpu},
1e3baf05
DM
3047 };
3048 } else {
3049 $d->{cpu} = $old->{cpu};
1e3baf05
DM
3050 }
3051 }
3052
f5eb281a 3053 return $res if !$full;
03a33f30
DM
3054
3055 my $qmpclient = PVE::QMPClient->new();
3056
64e7fcf2
DM
3057 my $ballooncb = sub {
3058 my ($vmid, $resp) = @_;
3059
3060 my $info = $resp->{'return'};
38babf81
DM
3061 return if !$info->{max_mem};
3062
64e7fcf2
DM
3063 my $d = $res->{$vmid};
3064
38babf81
DM
3065 # use memory assigned to VM
3066 $d->{maxmem} = $info->{max_mem};
3067 $d->{balloon} = $info->{actual};
3068
3069 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3070 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3071 $d->{freemem} = $info->{free_mem};
64e7fcf2
DM
3072 }
3073
604ea644 3074 $d->{ballooninfo} = $info;
64e7fcf2
DM
3075 };
3076
03a33f30
DM
3077 my $blockstatscb = sub {
3078 my ($vmid, $resp) = @_;
3079 my $data = $resp->{'return'} || [];
3080 my $totalrdbytes = 0;
3081 my $totalwrbytes = 0;
604ea644 3082
03a33f30
DM
3083 for my $blockstat (@$data) {
3084 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3085 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
604ea644
AD
3086
3087 $blockstat->{device} =~ s/drive-//;
3088 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
03a33f30
DM
3089 }
3090 $res->{$vmid}->{diskread} = $totalrdbytes;
3091 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3092 };
3093
949112c3
FE
3094 my $machinecb = sub {
3095 my ($vmid, $resp) = @_;
3096 my $data = $resp->{'return'} || [];
3097
3098 $res->{$vmid}->{'running-machine'} =
3099 PVE::QemuServer::Machine::current_from_query_machines($data);
3100 };
3101
3102 my $versioncb = sub {
3103 my ($vmid, $resp) = @_;
3104 my $data = $resp->{'return'} // {};
3105 my $version = 'unknown';
3106
3107 if (my $v = $data->{qemu}) {
3108 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3109 }
3110
3111 $res->{$vmid}->{'running-qemu'} = $version;
3112 };
3113
03a33f30
DM
3114 my $statuscb = sub {
3115 my ($vmid, $resp) = @_;
64e7fcf2 3116
03a33f30 3117 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
949112c3
FE
3118 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3119 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
64e7fcf2
DM
3120 # this fails if ballon driver is not loaded, so this must be
3121 # the last commnand (following command are aborted if this fails).
38babf81 3122 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
03a33f30
DM
3123
3124 my $status = 'unknown';
3125 if (!defined($status = $resp->{'return'}->{status})) {
3126 warn "unable to get VM status\n";
3127 return;
3128 }
3129
3130 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3131 };
3132
3133 foreach my $vmid (keys %$list) {
3134 next if $opt_vmid && ($vmid ne $opt_vmid);
3135 next if !$res->{$vmid}->{pid}; # not running
3136 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3137 }
3138
b017fbda 3139 $qmpclient->queue_execute(undef, 2);
03a33f30 3140
6891fd70
SR
3141 foreach my $vmid (keys %$list) {
3142 next if $opt_vmid && ($vmid ne $opt_vmid);
e5b18771
FG
3143 next if !$res->{$vmid}->{pid}; #not running
3144
6891fd70
SR
3145 # we can't use the $qmpclient since it might have already aborted on
3146 # 'query-balloon', but this might also fail for older versions...
3147 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3148 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3149 }
3150
03a33f30
DM
3151 foreach my $vmid (keys %$list) {
3152 next if $opt_vmid && ($vmid ne $opt_vmid);
3153 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3154 }
3155
1e3baf05
DM
3156 return $res;
3157}
3158
8107b378
DC
3159sub conf_has_serial {
3160 my ($conf) = @_;
3161
3162 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3163 if ($conf->{"serial$i"}) {
3164 return 1;
3165 }
3166 }
3167
3168 return 0;
3169}
3170
d5535a00
TL
3171sub conf_has_audio {
3172 my ($conf, $id) = @_;
3173
3174 $id //= 0;
3175 my $audio = $conf->{"audio$id"};
d1c1af4b 3176 return if !defined($audio);
d5535a00 3177
4df98f2f 3178 my $audioproperties = parse_property_string($audio_fmt, $audio);
d5535a00
TL
3179 my $audiodriver = $audioproperties->{driver} // 'spice';
3180
3181 return {
3182 dev => $audioproperties->{device},
b0f96836 3183 dev_id => "audiodev$id",
d5535a00
TL
3184 backend => $audiodriver,
3185 backend_id => "$audiodriver-backend${id}",
3186 };
3187}
3188
b01de199 3189sub audio_devs {
1cc5ed1b 3190 my ($audio, $audiopciaddr, $machine_version) = @_;
b01de199
TL
3191
3192 my $devs = [];
3193
3194 my $id = $audio->{dev_id};
1cc5ed1b
AL
3195 my $audiodev = "";
3196 if (min_version($machine_version, 4, 2)) {
3197 $audiodev = ",audiodev=$audio->{backend_id}";
3198 }
b01de199
TL
3199
3200 if ($audio->{dev} eq 'AC97') {
1cc5ed1b 3201 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
b01de199
TL
3202 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3203 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
1cc5ed1b
AL
3204 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3205 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
b01de199
TL
3206 } else {
3207 die "unkown audio device '$audio->{dev}', implement me!";
3208 }
3209
3210 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3211
3212 return $devs;
3213}
3214
f9dde219
SR
3215sub get_tpm_paths {
3216 my ($vmid) = @_;
3217 return {
3218 socket => "/var/run/qemu-server/$vmid.swtpm",
3219 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3220 };
3221}
3222
3223sub add_tpm_device {
3224 my ($vmid, $devices, $conf) = @_;
3225
3226 return if !$conf->{tpmstate0};
3227
3228 my $paths = get_tpm_paths($vmid);
3229
3230 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3231 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3232 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3233}
3234
3235sub start_swtpm {
3236 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3237
3238 return if !$tpmdrive;
3239
3240 my $state;
3241 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3242 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3243 if ($storeid) {
3244 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3245 } else {
3246 $state = $tpm->{file};
3247 }
3248
3249 my $paths = get_tpm_paths($vmid);
3250
3251 # during migration, we will get state from remote
3252 #
3253 if (!$migration) {
3254 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3255 my $setup_cmd = [
3256 "swtpm_setup",
3257 "--tpmstate",
3258 "file://$state",
3259 "--createek",
3260 "--create-ek-cert",
3261 "--create-platform-cert",
3262 "--lock-nvram",
3263 "--config",
3264 "/etc/swtpm_setup.conf", # do not use XDG configs
3265 "--runas",
3266 "0", # force creation as root, error if not possible
3267 "--not-overwrite", # ignore existing state, do not modify
3268 ];
3269
3270 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3271 # TPM 2.0 supports ECC crypto, use if possible
3272 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3273
3274 run_command($setup_cmd, outfunc => sub {
3275 print "swtpm_setup: $1\n";
3276 });
3277 }
3278
72a5a176
FE
3279 # Used to distinguish different invocations in the log.
3280 my $log_prefix = "[id=" . int(time()) . "] ";
3281
f9dde219
SR
3282 my $emulator_cmd = [
3283 "swtpm",
3284 "socket",
3285 "--tpmstate",
3286 "backend-uri=file://$state,mode=0600",
3287 "--ctrl",
3288 "type=unixio,path=$paths->{socket},mode=0600",
3289 "--pid",
3290 "file=$paths->{pid}",
3291 "--terminate", # terminate on QEMU disconnect
3292 "--daemon",
b2e9c4d3 3293 "--log",
72a5a176 3294 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
f9dde219
SR
3295 ];
3296 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3297 run_command($emulator_cmd, outfunc => sub { print $1; });
3298
6bbcd71f 3299 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
f85951dc 3300 while (! -e $paths->{pid}) {
90c41bac 3301 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
6bbcd71f 3302 usleep(50_000);
f85951dc
SR
3303 }
3304
f9dde219
SR
3305 # return untainted PID of swtpm daemon so it can be killed on error
3306 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3307 return $1;
3308}
3309
86b8228b
DM
3310sub vga_conf_has_spice {
3311 my ($vga) = @_;
3312
55655ebc
DC
3313 my $vgaconf = parse_vga($vga);
3314 my $vgatype = $vgaconf->{type};
3315 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
590e698c
DM
3316
3317 return $1 || 1;
86b8228b
DM
3318}
3319
d731ecbe
WB
3320sub is_native($) {
3321 my ($arch) = @_;
3322 return get_host_arch() eq $arch;
3323}
3324
045749f2
TL
3325sub get_vm_arch {
3326 my ($conf) = @_;
3327 return $conf->{arch} // get_host_arch();
3328}
3329
d731ecbe
WB
3330my $default_machines = {
3331 x86_64 => 'pc',
3332 aarch64 => 'virt',
3333};
3334
0761e619
TL
3335sub get_installed_machine_version {
3336 my ($kvmversion) = @_;
3337 $kvmversion = kvm_user_version() if !defined($kvmversion);
3338 $kvmversion =~ m/^(\d+\.\d+)/;
3339 return $1;
3340}
3341
3342sub windows_get_pinned_machine_version {
3343 my ($machine, $base_version, $kvmversion) = @_;
3344
3345 my $pin_version = $base_version;
3346 if (!defined($base_version) ||
3347 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3348 ) {
3349 $pin_version = get_installed_machine_version($kvmversion);
3350 }
3351 if (!$machine || $machine eq 'pc') {
3352 $machine = "pc-i440fx-$pin_version";
3353 } elsif ($machine eq 'q35') {
3354 $machine = "pc-q35-$pin_version";
3355 } elsif ($machine eq 'virt') {
3356 $machine = "virt-$pin_version";
3357 } else {
3358 warn "unknown machine type '$machine', not touching that!\n";
3359 }
3360
3361 return $machine;
3362}
3363
045749f2 3364sub get_vm_machine {
ac0077cc 3365 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
045749f2
TL
3366
3367 my $machine = $forcemachine || $conf->{machine};
d731ecbe 3368
9471e48b 3369 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
4dd1e83c
TL
3370 $kvmversion //= kvm_user_version();
3371 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3372 # layout which confuses windows quite a bit and may result in various regressions..
3373 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3374 if (windows_version($conf->{ostype})) {
0761e619 3375 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
4dd1e83c 3376 }
045749f2
TL
3377 $arch //= 'x86_64';
3378 $machine ||= $default_machines->{$arch};
ac0077cc 3379 if ($add_pve_version) {
ac0077cc
SR
3380 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3381 $machine .= "+pve$pvever";
3382 }
3383 }
3384
d4be7f31
SR
3385 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3386 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3387 $machine = $1 if $is_pxe;
3388
ac0077cc
SR
3389 # for version-pinned machines that do not include a pve-version (e.g.
3390 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3391 $machine .= '+pve0';
d4be7f31
SR
3392
3393 $machine .= '.pxe' if $is_pxe;
045749f2
TL
3394 }
3395
3396 return $machine;
d731ecbe
WB
3397}
3398
90b20b15
DC
3399sub get_ovmf_files($$$) {
3400 my ($arch, $efidisk, $smm) = @_;
96ed3574 3401
b5099b4f 3402 my $types = $OVMF->{$arch}
96ed3574
WB
3403 or die "no OVMF images known for architecture '$arch'\n";
3404
b5099b4f 3405 my $type = 'default';
1183c8f1 3406 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
90b20b15
DC
3407 $type = $smm ? "4m" : "4m-no-smm";
3408 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
b5099b4f
SR
3409 }
3410
f78c9b6b
NU
3411 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3412 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3413 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3414
3415 return ($ovmf_code, $ovmf_vars);
96ed3574
WB
3416}
3417
6908fd9b
WB
3418my $Arch2Qemu = {
3419 aarch64 => '/usr/bin/qemu-system-aarch64',
3420 x86_64 => '/usr/bin/qemu-system-x86_64',
3421};
3422sub get_command_for_arch($) {
3423 my ($arch) = @_;
3424 return '/usr/bin/kvm' if is_native($arch);
3425
3426 my $cmd = $Arch2Qemu->{$arch}
3427 or die "don't know how to emulate architecture '$arch'\n";
3428 return $cmd;
3429}
3430
05a4c550
SR
3431# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3432# to use in a QEMU command line (-cpu element), first array_intersect the result
3433# of query_supported_ with query_understood_. This is necessary because:
3434#
3435# a) query_understood_ returns flags the host cannot use and
3436# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3437# flags, but CPU settings - with most of them being flags. Those settings
3438# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3439#
3440# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3441# expensive. If you need the value returned from this, you can get it much
3442# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3443# $accel being 'kvm' or 'tcg'.
3444#
3445# pvestatd calls this function on startup and whenever the QEMU/KVM version
3446# changes, automatically populating pmxcfs.
3447#
3448# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3449# since kvm and tcg machines support different flags
3450#
3451sub query_supported_cpu_flags {
52cffab6 3452 my ($arch) = @_;
05a4c550 3453
52cffab6
SR
3454 $arch //= get_host_arch();
3455 my $default_machine = $default_machines->{$arch};
3456
3457 my $flags = {};
05a4c550
SR
3458
3459 # FIXME: Once this is merged, the code below should work for ARM as well:
3460 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3461 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3462 $arch eq "aarch64";
3463
3464 my $kvm_supported = defined(kvm_version());
3465 my $qemu_cmd = get_command_for_arch($arch);
3466 my $fakevmid = -1;
3467 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3468
3469 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3470 my $query_supported_run_qemu = sub {
3471 my ($kvm) = @_;
3472
3473 my $flags = {};
3474 my $cmd = [
3475 $qemu_cmd,
3476 '-machine', $default_machine,
3477 '-display', 'none',
378ad769 3478 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
05a4c550
SR
3479 '-mon', 'chardev=qmp,mode=control',
3480 '-pidfile', $pidfile,
3481 '-S', '-daemonize'
3482 ];
3483
3484 if (!$kvm) {
3485 push @$cmd, '-accel', 'tcg';
3486 }
3487
3488 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3489 die "QEMU flag querying VM exited with code " . $rc if $rc;
3490
3491 eval {
3492 my $cmd_result = mon_cmd(
3493 $fakevmid,
3494 'query-cpu-model-expansion',
3495 type => 'full',
3496 model => { name => 'host' }
3497 );
3498
3499 my $props = $cmd_result->{model}->{props};
3500 foreach my $prop (keys %$props) {
3501 next if $props->{$prop} ne '1';
3502 # QEMU returns some flags multiple times, with '_', '.' or '-'
3503 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3504 # We only keep those with underscores, to match /proc/cpuinfo
3505 $prop =~ s/\.|-/_/g;
3506 $flags->{$prop} = 1;
3507 }
3508 };
3509 my $err = $@;
3510
6bbcd71f 3511 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
05a4c550
SR
3512 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3513
3514 die $err if $err;
3515
3516 return [ sort keys %$flags ];
3517 };
3518
3519 # We need to query QEMU twice, since KVM and TCG have different supported flags
3520 PVE::QemuConfig->lock_config($fakevmid, sub {
3521 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3522 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3523
3524 if ($kvm_supported) {
3525 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3526 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3527 }
3528 });
3529
3530 return $flags;
3531}
3532
3533# Understood CPU flags are written to a file at 'pve-qemu' compile time
3534my $understood_cpu_flag_dir = "/usr/share/kvm";
3535sub query_understood_cpu_flags {
3536 my $arch = get_host_arch();
3537 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3538
3539 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3540 if ! -e $filepath;
3541
3542 my $raw = file_get_contents($filepath);
3543 $raw =~ s/^\s+|\s+$//g;
3544 my @flags = split(/\s+/, $raw);
3545
3546 return \@flags;
3547}
3548
e5a6919c
FE
3549# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3550# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3551my sub should_disable_smm {
e4263214
FE
3552 my ($conf, $vga, $machine) = @_;
3553
3554 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
e5a6919c
FE
3555
3556 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
bec87424 3557 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
e5a6919c
FE
3558}
3559
b7d80c79
FE
3560my sub print_ovmf_drive_commandlines {
3561 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3562
3d07669c 3563 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
b7d80c79
FE
3564
3565 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
b7d80c79 3566
3d07669c 3567 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
b7d80c79
FE
3568 if ($d) {
3569 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3d07669c 3570 my ($path, $format) = $d->@{'file', 'format'};
b7d80c79
FE
3571 if ($storeid) {
3572 $path = PVE::Storage::path($storecfg, $d->{file});
3573 if (!defined($format)) {
3574 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3575 $format = qemu_img_format($scfg, $volname);
3576 }
3d07669c
TL
3577 } elsif (!defined($format)) {
3578 die "efidisk format must be specified\n";
3579 }
3580 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3581 if ($path =~ m/^rbd:/) {
3582 $var_drive_str .= ',cache=writeback';
3583 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
b7d80c79 3584 }
3d07669c 3585 $var_drive_str .= ",format=$format,file=$path";
b7d80c79 3586
3d07669c
TL
3587 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3588 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
b7d80c79
FE
3589 } else {
3590 log_warn("no efidisk configured! Using temporary efivars disk.");
3d07669c 3591 my $path = "/tmp/$vmid-ovmf.fd";
b7d80c79 3592 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3d07669c
TL
3593 $var_drive_str .= ",format=raw,file=$path";
3594 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
b7d80c79
FE
3595 }
3596
3d07669c 3597 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
b7d80c79
FE
3598}
3599
1e3baf05 3600sub config_to_command {
5921764c
SR
3601 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3602 $pbs_backing) = @_;
1e3baf05 3603
3326ae19 3604 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
5bdcf937 3605 my $devices = [];
5bdcf937 3606 my $bridges = {};
b42d3cf9 3607 my $ostype = $conf->{ostype};
4317f69f 3608 my $winversion = windows_version($ostype);
d731ecbe 3609 my $kvm = $conf->{kvm};
38277afc 3610 my $nodename = nodename();
d731ecbe 3611
045749f2 3612 my $arch = get_vm_arch($conf);
1476b99f
DC
3613 my $kvm_binary = get_command_for_arch($arch);
3614 my $kvmver = kvm_user_version($kvm_binary);
045749f2 3615
a04dd5c4
SR
3616 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3617 $kvmver //= "undefined";
3618 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3619 }
3620
9471e48b
TL
3621 my $add_pve_version = min_version($kvmver, 4, 1);
3622
3623 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
4df98f2f 3624 my $machine_version = extract_version($machine_type, $kvmver);
d731ecbe 3625 $kvm //= 1 if is_native($arch);
4317f69f 3626
a77a53ae 3627 $machine_version =~ m/(\d+)\.(\d+)/;
ac0077cc 3628 my ($machine_major, $machine_minor) = ($1, $2);
ac0077cc 3629
b516c848
SR
3630 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3631 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3632 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
4df98f2f
TL
3633 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3634 ." please upgrade node '$nodename'\n"
b516c848 3635 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
ac0077cc 3636 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
4df98f2f
TL
3637 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3638 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3639 ." node '$nodename'\n";
ac0077cc
SR
3640 }
3641
3642 # if a specific +pve version is required for a feature, use $version_guard
3643 # instead of min_version to allow machines to be run with the minimum
3644 # required version
3645 my $required_pve_version = 0;
3646 my $version_guard = sub {
3647 my ($major, $minor, $pve) = @_;
3648 return 0 if !min_version($machine_version, $major, $minor, $pve);
47f35977
SR
3649 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3650 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
ac0077cc
SR
3651 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3652 return 1;
3653 };
a77a53ae 3654
4df98f2f
TL
3655 if ($kvm && !defined kvm_version()) {
3656 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3657 ." or enable in BIOS.\n";
d731ecbe 3658 }
bfcd9b7e 3659
3392d6ca 3660 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4d3f29ed 3661 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
249c4a6c
AD
3662 my $use_old_bios_files = undef;
3663 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
db656e5f 3664
74fe3d9a 3665 my $cmd = [];
83870398 3666 if ($conf->{affinity}) {
74fe3d9a 3667 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
83870398
DB
3668 }
3669
1476b99f 3670 push @$cmd, $kvm_binary;
1e3baf05
DM
3671
3672 push @$cmd, '-id', $vmid;
3673
e4d4cda1
HR
3674 my $vmname = $conf->{name} || "vm$vmid";
3675
6884a7d7 3676 push @$cmd, '-name', "$vmname,debug-threads=on";
e4d4cda1 3677
27b25d03
SR
3678 push @$cmd, '-no-shutdown';
3679
1e3baf05
DM
3680 my $use_virtio = 0;
3681
d036e418 3682 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
378ad769 3683 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
c971c4f2
AD
3684 push @$cmd, '-mon', "chardev=qmp,mode=control";
3685
2ea5fb7e 3686 if (min_version($machine_version, 2, 12)) {
b4496b9e 3687 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
71bd73b5
DC
3688 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3689 }
1e3baf05 3690
d036e418 3691 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
19672434 3692
1e3baf05
DM
3693 push @$cmd, '-daemonize';
3694
2796e7d5 3695 if ($conf->{smbios1}) {
1f30ac3a
CE
3696 my $smbios_conf = parse_smbios1($conf->{smbios1});
3697 if ($smbios_conf->{base64}) {
3698 # Do not pass base64 flag to qemu
3699 delete $smbios_conf->{base64};
3700 my $smbios_string = "";
3701 foreach my $key (keys %$smbios_conf) {
3702 my $value;
3703 if ($key eq "uuid") {
3704 $value = $smbios_conf->{uuid}
3705 } else {
3706 $value = decode_base64($smbios_conf->{$key});
3707 }
3708 # qemu accepts any binary data, only commas need escaping by double comma
3709 $value =~ s/,/,,/g;
3710 $smbios_string .= "," . $key . "=" . $value if $value;
3711 }
3712 push @$cmd, '-smbios', "type=1" . $smbios_string;
3713 } else {
3714 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3715 }
2796e7d5
DM
3716 }
3717
3edb45e7 3718 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
b7d80c79
FE
3719 my ($code_drive_str, $var_drive_str) =
3720 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3721 push $cmd->@*, '-drive', $code_drive_str;
3722 push $cmd->@*, '-drive', $var_drive_str;
a783c78e
AD
3723 }
3724
483ceeab 3725 if ($q35) { # tell QEMU to load q35 config early
7583d156 3726 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
2ea5fb7e 3727 if (min_version($machine_version, 4, 0)) {
7583d156
DC
3728 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3729 } else {
3730 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3731 }
3732 }
da8b4189 3733
cc181036
TL
3734 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3735 push @$cmd, $fixups->@*;
3736 }
3737
844d8fa6
DC
3738 if ($conf->{vmgenid}) {
3739 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3740 }
3741
d40e5e18 3742 # add usb controllers
4df98f2f 3743 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
0cf8d56c 3744 $conf, $bridges, $arch, $machine_type, $machine_version);
d40e5e18 3745 push @$devices, @usbcontrollers if @usbcontrollers;
55655ebc 3746 my $vga = parse_vga($conf->{vga});
2fa3151e 3747
55655ebc
DC
3748 my $qxlnum = vga_conf_has_spice($conf->{vga});
3749 $vga->{type} = 'qxl' if $qxlnum;
2fa3151e 3750
55655ebc 3751 if (!$vga->{type}) {
869ad4a7
WB
3752 if ($arch eq 'aarch64') {
3753 $vga->{type} = 'virtio';
2ea5fb7e 3754 } elsif (min_version($machine_version, 2, 9)) {
55655ebc 3755 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3756 } else {
55655ebc 3757 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3758 }
5acbfe9e
DM
3759 }
3760
1e3baf05 3761 # enable absolute mouse coordinates (needed by vnc)
fa3b3ce0
TL
3762 my $tablet = $conf->{tablet};
3763 if (!defined($tablet)) {
5acbfe9e 3764 $tablet = $defaults->{tablet};
590e698c 3765 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
55655ebc 3766 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
5acbfe9e
DM
3767 }
3768
d559309f
WB
3769 if ($tablet) {
3770 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3771 my $kbd = print_keyboarddevice_full($conf, $arch);
3772 push @$devices, '-device', $kbd if defined($kbd);
3773 }
b467f79a 3774
e5d611c3 3775 my $bootorder = device_bootorder($conf);
2141a802 3776
74c17b7a 3777 # host pci device passthrough
9b71c34d
DC
3778 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3779 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
1e3baf05
DM
3780
3781 # usb devices
ae36393d 3782 my $usb_dev_features = {};
2ea5fb7e 3783 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
ae36393d 3784
4df98f2f 3785 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
0cf8d56c 3786 $conf, $usb_dev_features, $bootorder, $machine_version);
d40e5e18 3787 push @$devices, @usbdevices if @usbdevices;
2141a802 3788
1e3baf05 3789 # serial devices
bae179aa 3790 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
fa3b3ce0
TL
3791 my $path = $conf->{"serial$i"} or next;
3792 if ($path eq 'socket') {
3793 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3794 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
7bd9abd2 3795 # On aarch64, serial0 is the UART device. QEMU only allows
fa3b3ce0
TL
3796 # connecting UART devices via the '-serial' command line, as
3797 # the device has a fixed slot on the hardware...
3798 if ($arch eq 'aarch64' && $i == 0) {
3799 push @$devices, '-serial', "chardev:serial$i";
9f9d2fb2 3800 } else {
9f9d2fb2
DM
3801 push @$devices, '-device', "isa-serial,chardev=serial$i";
3802 }
fa3b3ce0
TL
3803 } else {
3804 die "no such serial device\n" if ! -c $path;
e35eb876 3805 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
fa3b3ce0 3806 push @$devices, '-device', "isa-serial,chardev=serial$i";
34978be3 3807 }
1e3baf05
DM
3808 }
3809
3810 # parallel devices
1989a89c 3811 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
34978be3 3812 if (my $path = $conf->{"parallel$i"}) {
19672434 3813 die "no such parallel device\n" if ! -c $path;
e35eb876 3814 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
4c5dbaf6 3815 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
5bdcf937 3816 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
34978be3 3817 }
1e3baf05
DM
3818 }
3819
b01de199 3820 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
2e7b5925 3821 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
1cc5ed1b 3822 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
b01de199 3823 push @$devices, @$audio_devs;
2e7b5925 3824 }
19672434 3825
a55d0f71
FS
3826 # Add a TPM only if the VM is not a template,
3827 # to support backing up template VMs even if the TPM disk is write-protected.
3828 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
f9dde219 3829
1e3baf05
DM
3830 my $sockets = 1;
3831 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3832 $sockets = $conf->{sockets} if $conf->{sockets};
3833
3834 my $cores = $conf->{cores} || 1;
3bd18e48 3835
de9d1e55 3836 my $maxcpus = $sockets * $cores;
76267728 3837
de9d1e55 3838 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
76267728 3839
de9d1e55
AD
3840 my $allowed_vcpus = $cpuinfo->{cpus};
3841
483ceeab 3842 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
1e3baf05 3843
483ceeab 3844 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
69c81430
AD
3845 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3846 for (my $i = 2; $i <= $vcpus; $i++) {
3847 my $cpustr = print_cpu_device($conf,$i);
3848 push @$cmd, '-device', $cpustr;
3849 }
3850
3851 } else {
3852
3853 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3854 }
1e3baf05
DM
3855 push @$cmd, '-nodefaults';
3856
dbea4415 3857 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
1e3baf05 3858
0f704640 3859 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
1e3baf05 3860
6b64503e 3861 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
1e3baf05 3862
84902837 3863 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
4df98f2f
TL
3864 push @$devices, '-device', print_vga_device(
3865 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
6f070e39
TL
3866
3867 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3868
d036e418 3869 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
378ad769 3870 push @$cmd, '-vnc', "unix:$socket,password=on";
b7be4ba9 3871 } else {
55655ebc 3872 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
b7be4ba9
AD
3873 push @$cmd, '-nographic';
3874 }
3875
1e3baf05 3876 # time drift fix
6b64503e 3877 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
8c559505 3878 my $useLocaltime = $conf->{localtime};
1e3baf05 3879
4317f69f
AD
3880 if ($winversion >= 5) { # windows
3881 $useLocaltime = 1 if !defined($conf->{localtime});
7a131888 3882
4317f69f
AD
3883 # use time drift fix when acpi is enabled
3884 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3885 $tdf = 1 if !defined($conf->{tdf});
462e8d19 3886 }
4317f69f 3887 }
462e8d19 3888
4317f69f
AD
3889 if ($winversion >= 6) {
3890 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
17bacc21 3891 push @$machineFlags, 'hpet=off';
1e3baf05
DM
3892 }
3893
8c559505
DM
3894 push @$rtcFlags, 'driftfix=slew' if $tdf;
3895
2f6f002c 3896 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
8c559505
DM
3897 push @$rtcFlags, "base=$conf->{startdate}";
3898 } elsif ($useLocaltime) {
3899 push @$rtcFlags, 'base=localtime';
3900 }
1e3baf05 3901
58c64ad5
SR
3902 if ($forcecpu) {
3903 push @$cmd, '-cpu', $forcecpu;
3904 } else {
2f6f002c 3905 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
58c64ad5 3906 }
519ed28c 3907
dafb728c
AD
3908 PVE::QemuServer::Memory::config(
3909 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
370b05e7 3910
1e3baf05
DM
3911 push @$cmd, '-S' if $conf->{freeze};
3912
b20df606 3913 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
1e3baf05 3914
48657158
MD
3915 my $guest_agent = parse_guest_agent($conf);
3916
3917 if ($guest_agent->{enabled}) {
d036e418 3918 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
378ad769 3919 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
48657158 3920
60f03a11 3921 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
48657158
MD
3922 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3923 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3924 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3925 } elsif ($guest_agent->{type} eq 'isa') {
3926 push @$devices, '-device', "isa-serial,chardev=qga0";
3927 }
ab6a046f
AD
3928 }
3929
e5d611c3
TL
3930 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3931 if ($rng && $version_guard->(4, 1, 2)) {
05853188
SR
3932 check_rng_source($rng->{source});
3933
2cf61f33
SR
3934 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3935 my $period = $rng->{period} // $rng_fmt->{period}->{default};
2cf61f33
SR
3936 my $limiter_str = "";
3937 if ($max_bytes) {
3938 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3939 }
3940
2cf61f33 3941 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
2cf61f33
SR
3942 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3943 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3944 }
3945
1d794448 3946 my $spice_port;
2fa3151e 3947
f8ea1b30 3948 if ($qxlnum || $vga->{type} =~ /^virtio/) {
590e698c 3949 if ($qxlnum > 1) {
ac087616 3950 if ($winversion){
2f6f002c 3951 for (my $i = 1; $i < $qxlnum; $i++){
4df98f2f
TL
3952 push @$devices, '-device', print_vga_device(
3953 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
590e698c
DM
3954 }
3955 } else {
3956 # assume other OS works like Linux
55655ebc
DC
3957 my ($ram, $vram) = ("134217728", "67108864");
3958 if ($vga->{memory}) {
3959 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3960 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3961 }
3962 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3963 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
2fa3151e
AD
3964 }
3965 }
3966
d559309f 3967 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
95a4b4a9 3968
af0eba7e 3969 my $pfamily = PVE::Tools::get_host_address_family($nodename);
91152441
WB
3970 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3971 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
4d316a63
AL
3972
3973 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3974 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3975 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3976
91152441
WB
3977 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3978 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
943340a6 3979
4df98f2f
TL
3980 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3981 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
caab114a
TL
3982 if ($spice_enhancement->{foldersharing}) {
3983 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3984 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3985 }
c4df18db 3986
caab114a 3987 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
4df98f2f
TL
3988 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3989 if $spice_enhancement->{videostreaming};
3990
caab114a 3991 push @$devices, '-spice', "$spice_opts";
1011b570
DM
3992 }
3993
8d9ae0d2
DM
3994 # enable balloon by default, unless explicitly disabled
3995 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3326ae19 3996 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
c70e4ec3
AD
3997 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3998 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3999 push @$devices, '-device', $ballooncmd;
8d9ae0d2 4000 }
1e3baf05 4001
0ea9541d
DM
4002 if ($conf->{watchdog}) {
4003 my $wdopts = parse_watchdog($conf->{watchdog});
3326ae19 4004 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
0a40e8ea 4005 my $watchdog = $wdopts->{model} || 'i6300esb';
5bdcf937
AD
4006 push @$devices, '-device', "$watchdog$pciaddr";
4007 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
0ea9541d
DM
4008 }
4009
1e3baf05 4010 my $vollist = [];
941e0c42 4011 my $scsicontroller = {};
26ee04b6 4012 my $ahcicontroller = {};
cdd20088 4013 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
1e3baf05 4014
5881b913
DM
4015 # Add iscsi initiator name if available
4016 if (my $initiator = get_initiator_name()) {
4017 push @$devices, '-iscsi', "initiator-name=$initiator";
4018 }
4019
912792e2 4020 PVE::QemuConfig->foreach_volume($conf, sub {
1e3baf05
DM
4021 my ($ds, $drive) = @_;
4022
ff1a2432 4023 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3f11f0d7 4024 check_volume_storage_type($storecfg, $drive->{file});
1e3baf05 4025 push @$vollist, $drive->{file};
ff1a2432 4026 }
afdb31d5 4027
4dcce9ee
TL
4028 # ignore efidisk here, already added in bios/fw handling code above
4029 return if $drive->{interface} eq 'efidisk';
f9dde219
SR
4030 # similar for TPM
4031 return if $drive->{interface} eq 'tpmstate';
4dcce9ee 4032
1e3baf05 4033 $use_virtio = 1 if $ds =~ m/^virtio/;
3b408e82 4034
2141a802 4035 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3b408e82 4036
2f6f002c 4037 if ($drive->{interface} eq 'virtio'){
51f492cd
AD
4038 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4039 }
4040
2f6f002c 4041 if ($drive->{interface} eq 'scsi') {
cdd20088 4042
ee034f5c 4043 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
6731a4cf 4044
b8fb1c03
SR
4045 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4046 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4047
3326ae19 4048 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
a1b7d579 4049 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
fc8b40fd
AD
4050
4051 my $iothread = '';
4052 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4053 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4054 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
e7a5104d 4055 } elsif ($drive->{iothread}) {
d80ad18c
MH
4056 log_warn(
4057 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4058 );
fc8b40fd
AD
4059 }
4060
6e11f143
AD
4061 my $queues = '';
4062 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4063 $queues = ",num_queues=$drive->{queues}";
370b05e7 4064 }
6e11f143 4065
4df98f2f
TL
4066 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4067 if !$scsicontroller->{$controller};
cdd20088 4068 $scsicontroller->{$controller}=1;
2f6f002c 4069 }
3b408e82 4070
26ee04b6 4071 if ($drive->{interface} eq 'sata') {
2f6f002c 4072 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3326ae19 4073 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4df98f2f
TL
4074 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4075 if !$ahcicontroller->{$controller};
2f6f002c 4076 $ahcicontroller->{$controller}=1;
26ee04b6 4077 }
46f58b5f 4078
5921764c
SR
4079 my $pbs_conf = $pbs_backing->{$ds};
4080 my $pbs_name = undef;
4081 if ($pbs_conf) {
4082 $pbs_name = "drive-$ds-pbs";
4083 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4084 }
4085
6d5673c3
SR
4086 my $drive_cmd = print_drive_commandline_full(
4087 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3dc33a72
FG
4088
4089 # extra protection for templates, but SATA and IDE don't support it..
75748d44 4090 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4ef13a7f 4091
15b21acc 4092 push @$devices, '-drive',$drive_cmd;
4df98f2f
TL
4093 push @$devices, '-device', print_drivedevice_full(
4094 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
1e3baf05
DM
4095 });
4096
cc4d6182 4097 for (my $i = 0; $i < $MAX_NETS; $i++) {
2141a802
SR
4098 my $netname = "net$i";
4099
4100 next if !$conf->{$netname};
4101 my $d = parse_net($conf->{$netname});
d0a86b24 4102 next if !$d;
4ddd2ca2 4103 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
1e3baf05 4104
d0a86b24 4105 $use_virtio = 1 if $d->{model} eq 'virtio';
1e3baf05 4106
2141a802 4107 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
1e3baf05 4108
2141a802 4109 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
d0a86b24 4110 push @$devices, '-netdev', $netdevfull;
5bdcf937 4111
d0a86b24 4112 my $netdevicefull = print_netdevice_full(
0c03a390 4113 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4df98f2f 4114
d0a86b24 4115 push @$devices, '-device', $netdevicefull;
5bdcf937 4116 }
1e3baf05 4117
6dbcb073 4118 if ($conf->{ivshmem}) {
4df98f2f 4119 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
e3c27a6a 4120
6dbcb073
DC
4121 my $bus;
4122 if ($q35) {
4123 $bus = print_pcie_addr("ivshmem");
4124 } else {
4125 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4126 }
e3c27a6a
TL
4127
4128 my $ivshmem_name = $ivshmem->{name} // $vmid;
4129 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4130
6dbcb073 4131 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4df98f2f
TL
4132 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4133 .",size=$ivshmem->{size}M";
6dbcb073
DC
4134 }
4135
2513b862
DC
4136 # pci.4 is nested in pci.1
4137 $bridges->{1} = 1 if $bridges->{4};
4138
3326ae19
TL
4139 if (!$q35) { # add pci bridges
4140 if (min_version($machine_version, 2, 3)) {
fc79e813
AD
4141 $bridges->{1} = 1;
4142 $bridges->{2} = 1;
4143 }
6731a4cf 4144 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
2513b862
DC
4145 }
4146
4147 for my $k (sort {$b cmp $a} keys %$bridges) {
4148 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
13d68979
SR
4149
4150 my $k_name = $k;
4151 if ($k == 2 && $legacy_igd) {
4152 $k_name = "$k-igd";
4153 }
3326ae19 4154 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
2513b862 4155 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3326ae19
TL
4156
4157 if ($q35) { # add after -readconfig pve-q35.cfg
2513b862
DC
4158 splice @$devices, 2, 0, '-device', $devstr;
4159 } else {
4160 unshift @$devices, '-device', $devstr if $k > 0;
f8e83f05 4161 }
19672434
DM
4162 }
4163
ac0077cc
SR
4164 if (!$kvm) {
4165 push @$machineFlags, 'accel=tcg';
4166 }
4167
e4263214 4168 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
e5a6919c 4169
ac0077cc
SR
4170 my $machine_type_min = $machine_type;
4171 if ($add_pve_version) {
4172 $machine_type_min =~ s/\+pve\d+$//;
4173 $machine_type_min .= "+pve$required_pve_version";
4174 }
4175 push @$machineFlags, "type=${machine_type_min}";
4176
5bdcf937 4177 push @$cmd, @$devices;
2f6f002c
TL
4178 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4179 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4180 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
8c559505 4181
7ceade4c
DC
4182 if (my $vmstate = $conf->{vmstate}) {
4183 my $statepath = PVE::Storage::path($storecfg, $vmstate);
24d1f93a 4184 push @$vollist, $vmstate;
7ceade4c 4185 push @$cmd, '-loadstate', $statepath;
b85666cf 4186 print "activating and using '$vmstate' as vmstate\n";
7ceade4c
DC
4187 }
4188
85fcf79e
FG
4189 if (PVE::QemuConfig->is_template($conf)) {
4190 # needed to workaround base volumes being read-only
4191 push @$cmd, '-snapshot';
4192 }
4193
76350670
DC
4194 # add custom args
4195 if ($conf->{args}) {
4196 my $aa = PVE::Tools::split_args($conf->{args});
4197 push @$cmd, @$aa;
4198 }
4199
9b71c34d 4200 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
1e3baf05 4201}
19672434 4202
05853188
SR
4203sub check_rng_source {
4204 my ($source) = @_;
4205
4206 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4207 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4208 if ! -e $source;
4209
4210 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4211 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4df98f2f
TL
4212 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4213 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4214 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4215 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4216 ." to the host.\n";
05853188
SR
4217 }
4218}
4219
943340a6 4220sub spice_port {
1011b570 4221 my ($vmid) = @_;
943340a6 4222
0a13e08e 4223 my $res = mon_cmd($vmid, 'query-spice');
943340a6
DM
4224
4225 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
1011b570
DM
4226}
4227
86fdcfb2
DA
4228sub vm_devices_list {
4229 my ($vmid) = @_;
4230
0a13e08e 4231 my $res = mon_cmd($vmid, 'query-pci');
f721624b 4232 my $devices_to_check = [];
ceea9078
DM
4233 my $devices = {};
4234 foreach my $pcibus (@$res) {
f721624b
DC
4235 push @$devices_to_check, @{$pcibus->{devices}},
4236 }
4237
4238 while (@$devices_to_check) {
4239 my $to_check = [];
4240 for my $d (@$devices_to_check) {
4241 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
b3a3e929 4242 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
f721624b
DC
4243
4244 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4245 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
f78cc802 4246 }
f721624b 4247 $devices_to_check = $to_check;
f78cc802
AD
4248 }
4249
0a13e08e 4250 my $resblock = mon_cmd($vmid, 'query-block');
f78cc802
AD
4251 foreach my $block (@$resblock) {
4252 if($block->{device} =~ m/^drive-(\S+)/){
4253 $devices->{$1} = 1;
1dc4f496
DM
4254 }
4255 }
86fdcfb2 4256
0a13e08e 4257 my $resmice = mon_cmd($vmid, 'query-mice');
3d7389fe
DM
4258 foreach my $mice (@$resmice) {
4259 if ($mice->{name} eq 'QEMU HID Tablet') {
4260 $devices->{tablet} = 1;
4261 last;
4262 }
4263 }
4264
deb091c5
DC
4265 # for usb devices there is no query-usb
4266 # but we can iterate over the entries in
4267 # qom-list path=/machine/peripheral
0a13e08e 4268 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
deb091c5 4269 foreach my $per (@$resperipheral) {
c60cad61 4270 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
deb091c5
DC
4271 $devices->{$per->{name}} = 1;
4272 }
4273 }
4274
1dc4f496 4275 return $devices;
86fdcfb2
DA
4276}
4277
ec21aa11 4278sub vm_deviceplug {
d559309f 4279 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
ae57f6b3 4280
3392d6ca 4281 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f 4282
95d6343b
DA
4283 my $devices_list = vm_devices_list($vmid);
4284 return 1 if defined($devices_list->{$deviceid});
4285
4df98f2f
TL
4286 # add PCI bridge if we need it for the device
4287 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
fee46675 4288
3d7389fe 4289 if ($deviceid eq 'tablet') {
d559309f 4290 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
d559309f 4291 } elsif ($deviceid eq 'keyboard') {
d559309f 4292 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
c60cad61
DC
4293 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4294 my $id = $1;
4295 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4296 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4eb68604 4297 } elsif ($deviceid =~ m/^usb(\d+)$/) {
c60cad61 4298 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
fee46675 4299 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
22de899a
AD
4300 qemu_iothread_add($vmid, $deviceid, $device);
4301
3326ae19
TL
4302 qemu_driveadd($storecfg, $vmid, $device);
4303 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675 4304
3326ae19 4305 qemu_deviceadd($vmid, $devicefull);
fee46675
DM
4306 eval { qemu_deviceaddverify($vmid, $deviceid); };
4307 if (my $err = $@) {
63c2da2f
DM
4308 eval { qemu_drivedel($vmid, $deviceid); };
4309 warn $@ if $@;
fee46675 4310 die $err;
5e5dcb73 4311 }
2733141c 4312 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3326ae19
TL
4313 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4314 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
a1b7d579 4315 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
2733141c 4316
3326ae19 4317 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
fee46675 4318
fc8b40fd
AD
4319 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4320 qemu_iothread_add($vmid, $deviceid, $device);
4321 $devicefull .= ",iothread=iothread-$deviceid";
4322 }
4323
6e11f143
AD
4324 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4325 $devicefull .= ",num_queues=$device->{queues}";
4326 }
4327
3326ae19
TL
4328 qemu_deviceadd($vmid, $devicefull);
4329 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4330 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
d559309f 4331 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
fee46675 4332 qemu_driveadd($storecfg, $vmid, $device);
a1b7d579 4333
acfc6ef8 4334 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675
DM
4335 eval { qemu_deviceadd($vmid, $devicefull); };
4336 if (my $err = $@) {
63c2da2f
DM
4337 eval { qemu_drivedel($vmid, $deviceid); };
4338 warn $@ if $@;
fee46675 4339 die $err;
a4f091a0 4340 }
fee46675 4341 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
d1c1af4b 4342 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
8718099c 4343
3392d6ca 4344 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
0c03a390 4345 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
95d3be58
DC
4346 my $use_old_bios_files = undef;
4347 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
8718099c 4348
4df98f2f 4349 my $netdevicefull = print_netdevice_full(
0c03a390 4350 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
95d3be58 4351 qemu_deviceadd($vmid, $netdevicefull);
79046fd1
DC
4352 eval {
4353 qemu_deviceaddverify($vmid, $deviceid);
4354 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4355 };
fee46675
DM
4356 if (my $err = $@) {
4357 eval { qemu_netdevdel($vmid, $deviceid); };
4358 warn $@ if $@;
4359 die $err;
95d3be58 4360 }
fee46675 4361 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
40f28a9f 4362 my $bridgeid = $2;
d559309f 4363 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
40f28a9f 4364 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
a1b7d579 4365
40f28a9f 4366 qemu_deviceadd($vmid, $devicefull);
fee46675 4367 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4368 } else {
a1b7d579 4369 die "can't hotplug device '$deviceid'\n";
40f28a9f
AD
4370 }
4371
5e5dcb73 4372 return 1;
a4dea331
DA
4373}
4374
3eec5767 4375# fixme: this should raise exceptions on error!
ec21aa11 4376sub vm_deviceunplug {
f19d1c47 4377 my ($vmid, $conf, $deviceid) = @_;
873c2d69 4378
95d6343b
DA
4379 my $devices_list = vm_devices_list($vmid);
4380 return 1 if !defined($devices_list->{$deviceid});
4381
2141a802
SR
4382 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4383 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
63c2da2f 4384
c60cad61 4385 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
3d7389fe 4386 qemu_devicedel($vmid, $deviceid);
c60cad61
DC
4387 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4388 qemu_devicedel($vmid, $deviceid);
4389 qemu_devicedelverify($vmid, $deviceid);
4eb68604 4390 } elsif ($deviceid =~ m/^usb\d+$/) {
c60cad61
DC
4391 qemu_devicedel($vmid, $deviceid);
4392 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4393 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
9a66c311 4394 my $device = parse_drive($deviceid, $conf->{$deviceid});
f19d1c47 4395
a8d0fec3
TL
4396 qemu_devicedel($vmid, $deviceid);
4397 qemu_devicedelverify($vmid, $deviceid);
4398 qemu_drivedel($vmid, $deviceid);
9a66c311 4399 qemu_iothread_del($vmid, $deviceid, $device);
2733141c 4400 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
63c2da2f 4401 qemu_devicedel($vmid, $deviceid);
8ce30dde 4402 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4403 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
9a66c311 4404 my $device = parse_drive($deviceid, $conf->{$deviceid});
cfc817c7 4405
a8d0fec3 4406 qemu_devicedel($vmid, $deviceid);
52b361af 4407 qemu_devicedelverify($vmid, $deviceid);
a8d0fec3 4408 qemu_drivedel($vmid, $deviceid);
a1b7d579 4409 qemu_deletescsihw($conf, $vmid, $deviceid);
8ce30dde 4410
9a66c311
FG
4411 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4412 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
63c2da2f 4413 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
a8d0fec3
TL
4414 qemu_devicedel($vmid, $deviceid);
4415 qemu_devicedelverify($vmid, $deviceid);
4416 qemu_netdevdel($vmid, $deviceid);
63c2da2f
DM
4417 } else {
4418 die "can't unplug device '$deviceid'\n";
2630d2a9
DA
4419 }
4420
5e5dcb73
DA
4421 return 1;
4422}
4423
c60cad61
DC
4424sub qemu_spice_usbredir_chardev_add {
4425 my ($vmid, $id) = @_;
4426
4427 mon_cmd($vmid, "chardev-add" , (
4428 id => $id,
4429 backend => {
4430 type => 'spicevmc',
4431 data => {
4432 type => "usbredir",
4433 },
4434 },
4435 ));
4436}
4437
5e5dcb73
DA
4438sub qemu_deviceadd {
4439 my ($vmid, $devicefull) = @_;
873c2d69 4440
d695b5b7
AD
4441 $devicefull = "driver=".$devicefull;
4442 my %options = split(/[=,]/, $devicefull);
f19d1c47 4443
0a13e08e 4444 mon_cmd($vmid, "device_add" , %options);
5e5dcb73 4445}
afdb31d5 4446
5e5dcb73 4447sub qemu_devicedel {
fee46675 4448 my ($vmid, $deviceid) = @_;
63c2da2f 4449
0a13e08e 4450 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
5e5dcb73
DA
4451}
4452
22de899a 4453sub qemu_iothread_add {
a8d0fec3 4454 my ($vmid, $deviceid, $device) = @_;
22de899a
AD
4455
4456 if ($device->{iothread}) {
4457 my $iothreads = vm_iothreads_list($vmid);
4458 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4459 }
4460}
4461
4462sub qemu_iothread_del {
a8d0fec3 4463 my ($vmid, $deviceid, $device) = @_;
22de899a 4464
22de899a
AD
4465 if ($device->{iothread}) {
4466 my $iothreads = vm_iothreads_list($vmid);
4467 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4468 }
4469}
4470
4d3f29ed 4471sub qemu_objectadd {
a8d0fec3 4472 my ($vmid, $objectid, $qomtype) = @_;
4d3f29ed 4473
0a13e08e 4474 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4d3f29ed
AD
4475
4476 return 1;
4477}
4478
4479sub qemu_objectdel {
a8d0fec3 4480 my ($vmid, $objectid) = @_;
4d3f29ed 4481
0a13e08e 4482 mon_cmd($vmid, "object-del", id => $objectid);
4d3f29ed
AD
4483
4484 return 1;
4485}
4486
5e5dcb73 4487sub qemu_driveadd {
fee46675 4488 my ($storecfg, $vmid, $device) = @_;
5e5dcb73 4489
6d5673c3
SR
4490 my $kvmver = get_running_qemu_version($vmid);
4491 my $io_uring = min_version($kvmver, 6, 0);
4492 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
7a69fc3c 4493 $drive =~ s/\\/\\\\/g;
0a13e08e 4494 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
fee46675 4495
5e5dcb73 4496 # If the command succeeds qemu prints: "OK"
fee46675
DM
4497 return 1 if $ret =~ m/OK/s;
4498
4499 die "adding drive failed: $ret\n";
5e5dcb73 4500}
afdb31d5 4501
5e5dcb73 4502sub qemu_drivedel {
a8d0fec3 4503 my ($vmid, $deviceid) = @_;
873c2d69 4504
0a13e08e 4505 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
5e5dcb73 4506 $ret =~ s/^\s+//;
a1b7d579 4507
63c2da2f 4508 return 1 if $ret eq "";
a1b7d579 4509
63c2da2f 4510 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
a1b7d579
DM
4511 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4512
63c2da2f 4513 die "deleting drive $deviceid failed : $ret\n";
5e5dcb73 4514}
f19d1c47 4515
5e5dcb73 4516sub qemu_deviceaddverify {
fee46675 4517 my ($vmid, $deviceid) = @_;
873c2d69 4518
5e5dcb73
DA
4519 for (my $i = 0; $i <= 5; $i++) {
4520 my $devices_list = vm_devices_list($vmid);
4521 return 1 if defined($devices_list->{$deviceid});
4522 sleep 1;
afdb31d5 4523 }
fee46675
DM
4524
4525 die "error on hotplug device '$deviceid'\n";
5e5dcb73 4526}
afdb31d5 4527
5e5dcb73
DA
4528
4529sub qemu_devicedelverify {
63c2da2f
DM
4530 my ($vmid, $deviceid) = @_;
4531
a1b7d579 4532 # need to verify that the device is correctly removed as device_del
63c2da2f 4533 # is async and empty return is not reliable
5e5dcb73 4534
5e5dcb73
DA
4535 for (my $i = 0; $i <= 5; $i++) {
4536 my $devices_list = vm_devices_list($vmid);
4537 return 1 if !defined($devices_list->{$deviceid});
4538 sleep 1;
afdb31d5 4539 }
63c2da2f
DM
4540
4541 die "error on hot-unplugging device '$deviceid'\n";
873c2d69
DA
4542}
4543
cdd20088 4544sub qemu_findorcreatescsihw {
d559309f 4545 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
cfc817c7 4546
ee034f5c 4547 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
2733141c
AD
4548
4549 my $scsihwid="$controller_prefix$controller";
cfc817c7
DA
4550 my $devices_list = vm_devices_list($vmid);
4551
a8d0fec3 4552 if (!defined($devices_list->{$scsihwid})) {
d559309f 4553 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
cfc817c7 4554 }
fee46675 4555
cfc817c7
DA
4556 return 1;
4557}
4558
8ce30dde
AD
4559sub qemu_deletescsihw {
4560 my ($conf, $vmid, $opt) = @_;
4561
4562 my $device = parse_drive($opt, $conf->{$opt});
4563
a1511b3c 4564 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
2733141c
AD
4565 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4566 return 1;
4567 }
4568
ee034f5c 4569 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
8ce30dde
AD
4570
4571 my $devices_list = vm_devices_list($vmid);
4572 foreach my $opt (keys %{$devices_list}) {
e0fd2b2f
FE
4573 if (is_valid_drivename($opt)) {
4574 my $drive = parse_drive($opt, $conf->{$opt});
a8d0fec3 4575 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
8ce30dde
AD
4576 return 1;
4577 }
4578 }
4579 }
4580
4581 my $scsihwid="scsihw$controller";
4582
4583 vm_deviceunplug($vmid, $conf, $scsihwid);
4584
4585 return 1;
4586}
4587
281fedb3 4588sub qemu_add_pci_bridge {
d559309f 4589 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
40f28a9f
AD
4590
4591 my $bridges = {};
281fedb3
DM
4592
4593 my $bridgeid;
4594
d559309f 4595 print_pci_addr($device, $bridges, $arch, $machine_type);
40f28a9f
AD
4596
4597 while (my ($k, $v) = each %$bridges) {
4598 $bridgeid = $k;
4599 }
fee46675 4600 return 1 if !defined($bridgeid) || $bridgeid < 1;
281fedb3 4601
40f28a9f
AD
4602 my $bridge = "pci.$bridgeid";
4603 my $devices_list = vm_devices_list($vmid);
4604
281fedb3 4605 if (!defined($devices_list->{$bridge})) {
d559309f 4606 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
40f28a9f 4607 }
281fedb3 4608
40f28a9f
AD
4609 return 1;
4610}
4611
25088687
DM
4612sub qemu_set_link_status {
4613 my ($vmid, $device, $up) = @_;
4614
0a13e08e 4615 mon_cmd($vmid, "set_link", name => $device,
25088687
DM
4616 up => $up ? JSON::true : JSON::false);
4617}
4618
2630d2a9 4619sub qemu_netdevadd {
d559309f 4620 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
2630d2a9 4621
d559309f 4622 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
73aa03b8 4623 my %options = split(/[=,]/, $netdev);
2630d2a9 4624
bf5aef9b
DC
4625 if (defined(my $vhost = $options{vhost})) {
4626 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4627 }
4628
4629 if (defined(my $queues = $options{queues})) {
4630 $options{queues} = $queues + 0;
4631 }
4632
0a13e08e 4633 mon_cmd($vmid, "netdev_add", %options);
73aa03b8 4634 return 1;
2630d2a9
DA
4635}
4636
4637sub qemu_netdevdel {
4638 my ($vmid, $deviceid) = @_;
4639
0a13e08e 4640 mon_cmd($vmid, "netdev_del", id => $deviceid);
2630d2a9
DA
4641}
4642
16521d63 4643sub qemu_usb_hotplug {
d559309f 4644 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
16521d63
DC
4645
4646 return if !$device;
4647
4648 # remove the old one first
4649 vm_deviceunplug($vmid, $conf, $deviceid);
4650
4651 # check if xhci controller is necessary and available
c60cad61 4652 my $devicelist = vm_devices_list($vmid);
16521d63 4653
c60cad61
DC
4654 if (!$devicelist->{xhci}) {
4655 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4656 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
16521d63 4657 }
c60cad61 4658
16521d63 4659 # add the new one
0cf8d56c 4660 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
16521d63
DC
4661}
4662
838776ab 4663sub qemu_cpu_hotplug {
8edc9c08 4664 my ($vmid, $conf, $vcpus) = @_;
838776ab 4665
3392d6ca 4666 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
1e881b75 4667
8edc9c08
AD
4668 my $sockets = 1;
4669 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4670 $sockets = $conf->{sockets} if $conf->{sockets};
4671 my $cores = $conf->{cores} || 1;
4672 my $maxcpus = $sockets * $cores;
838776ab 4673
8edc9c08 4674 $vcpus = $maxcpus if !$vcpus;
3a11fadb 4675
8edc9c08
AD
4676 die "you can't add more vcpus than maxcpus\n"
4677 if $vcpus > $maxcpus;
3a11fadb 4678
8edc9c08 4679 my $currentvcpus = $conf->{vcpus} || $maxcpus;
1e881b75 4680
eba3e64d 4681 if ($vcpus < $currentvcpus) {
1e881b75 4682
2ea5fb7e 4683 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
1e881b75
AD
4684
4685 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4686 qemu_devicedel($vmid, "cpu$i");
4687 my $retry = 0;
4688 my $currentrunningvcpus = undef;
4689 while (1) {
65af8c31 4690 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
1e881b75 4691 last if scalar(@{$currentrunningvcpus}) == $i-1;
961af8a3 4692 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
1e881b75
AD
4693 $retry++;
4694 sleep 1;
4695 }
4696 #update conf after each succesfull cpu unplug
4697 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4698 PVE::QemuConfig->write_config($vmid, $conf);
4699 }
4700 } else {
961af8a3 4701 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
1e881b75
AD
4702 }
4703
4704 return;
4705 }
838776ab 4706
65af8c31 4707 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
961af8a3 4708 die "vcpus in running vm does not match its configuration\n"
8edc9c08 4709 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
838776ab 4710
2ea5fb7e 4711 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
eba3e64d
AD
4712
4713 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4714 my $cpustr = print_cpu_device($conf, $i);
4715 qemu_deviceadd($vmid, $cpustr);
4716
4717 my $retry = 0;
4718 my $currentrunningvcpus = undef;
4719 while (1) {
65af8c31 4720 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
eba3e64d 4721 last if scalar(@{$currentrunningvcpus}) == $i;
961af8a3 4722 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
eba3e64d
AD
4723 sleep 1;
4724 $retry++;
4725 }
4726 #update conf after each succesfull cpu hotplug
4727 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4728 PVE::QemuConfig->write_config($vmid, $conf);
4729 }
4730 } else {
4731
4732 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
0a13e08e 4733 mon_cmd($vmid, "cpu-add", id => int($i));
eba3e64d 4734 }
838776ab
AD
4735 }
4736}
4737
affd2f88 4738sub qemu_block_set_io_throttle {
277ca170
WB
4739 my ($vmid, $deviceid,
4740 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
9196a8ec
WB
4741 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4742 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4743 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
affd2f88 4744
f3f323a3
AD
4745 return if !check_running($vmid) ;
4746
0a13e08e 4747 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
277ca170
WB
4748 bps => int($bps),
4749 bps_rd => int($bps_rd),
4750 bps_wr => int($bps_wr),
4751 iops => int($iops),
4752 iops_rd => int($iops_rd),
4753 iops_wr => int($iops_wr),
4754 bps_max => int($bps_max),
4755 bps_rd_max => int($bps_rd_max),
4756 bps_wr_max => int($bps_wr_max),
4757 iops_max => int($iops_max),
4758 iops_rd_max => int($iops_rd_max),
9196a8ec
WB
4759 iops_wr_max => int($iops_wr_max),
4760 bps_max_length => int($bps_max_length),
4761 bps_rd_max_length => int($bps_rd_max_length),
4762 bps_wr_max_length => int($bps_wr_max_length),
4763 iops_max_length => int($iops_max_length),
4764 iops_rd_max_length => int($iops_rd_max_length),
4765 iops_wr_max_length => int($iops_wr_max_length),
277ca170 4766 );
f3f323a3 4767
affd2f88
AD
4768}
4769
c1175c92
AD
4770sub qemu_block_resize {
4771 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4772
ed221350 4773 my $running = check_running($vmid);
c1175c92 4774
2e4357c5 4775 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
c1175c92
AD
4776
4777 return if !$running;
4778
375db731
FE
4779 my $padding = (1024 - $size % 1024) % 1024;
4780 $size = $size + $padding;
4781
190c8461
SR
4782 mon_cmd(
4783 $vmid,
4784 "block_resize",
4785 device => $deviceid,
4786 size => int($size),
4787 timeout => 60,
4788 );
c1175c92
AD
4789}
4790
1ab0057c
AD
4791sub qemu_volume_snapshot {
4792 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4793
ed221350 4794 my $running = check_running($vmid);
1ab0057c 4795
9d83932d 4796 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4797 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
e5eaa028
WL
4798 } else {
4799 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4800 }
1ab0057c
AD
4801}
4802
fc46aff9
AD
4803sub qemu_volume_snapshot_delete {
4804 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4805
ed221350 4806 my $running = check_running($vmid);
fc46aff9 4807
a2f1554b
AD
4808 if($running) {
4809
4810 $running = undef;
4811 my $conf = PVE::QemuConfig->load_config($vmid);
912792e2 4812 PVE::QemuConfig->foreach_volume($conf, sub {
a2f1554b
AD
4813 my ($ds, $drive) = @_;
4814 $running = 1 if $drive->{file} eq $volid;
4815 });
4816 }
4817
9d83932d 4818 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4819 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
1ef7592f
AD
4820 } else {
4821 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4822 }
fc46aff9
AD
4823}
4824
264e519f 4825sub set_migration_caps {
27a5be53 4826 my ($vmid, $savevm) = @_;
a89fded1 4827
acc10e51
SR
4828 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4829
27a5be53
SR
4830 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4831 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4832
8b8345f3 4833 my $cap_ref = [];
a89fded1
AD
4834
4835 my $enabled_cap = {
8b8345f3 4836 "auto-converge" => 1,
0b0a47e8 4837 "xbzrle" => 1,
8b8345f3
DM
4838 "x-rdma-pin-all" => 0,
4839 "zero-blocks" => 0,
acc10e51 4840 "compress" => 0,
27a5be53 4841 "dirty-bitmaps" => $dirty_bitmaps,
a89fded1
AD
4842 };
4843
0a13e08e 4844 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
a89fded1 4845
8b8345f3 4846 for my $supported_capability (@$supported_capabilities) {
b463a3ce
SP
4847 push @$cap_ref, {
4848 capability => $supported_capability->{capability},
22430fa2
DM
4849 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4850 };
a89fded1
AD
4851 }
4852
0a13e08e 4853 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
8b8345f3 4854}
a89fded1 4855
912792e2 4856sub foreach_volid {
0b7a0b78 4857 my ($conf, $func, @param) = @_;
912792e2
FE
4858
4859 my $volhash = {};
4860
4861 my $test_volid = sub {
6328c554 4862 my ($key, $drive, $snapname, $pending) = @_;
912792e2 4863
ae180b8f 4864 my $volid = $drive->{file};
912792e2
FE
4865 return if !$volid;
4866
4867 $volhash->{$volid}->{cdrom} //= 1;
ae180b8f 4868 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
912792e2 4869
ae180b8f 4870 my $replicate = $drive->{replicate} // 1;
912792e2
FE
4871 $volhash->{$volid}->{replicate} //= 0;
4872 $volhash->{$volid}->{replicate} = 1 if $replicate;
4873
4874 $volhash->{$volid}->{shared} //= 0;
ae180b8f 4875 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
912792e2 4876
6e9c4929
AL
4877 $volhash->{$volid}->{is_unused} //= 0;
4878 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4879
4880 $volhash->{$volid}->{is_attached} //= 0;
4881 $volhash->{$volid}->{is_attached} = 1
219719aa 4882 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
912792e2
FE
4883
4884 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4885 if defined($snapname);
ae180b8f 4886
219719aa 4887 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
6328c554 4888
ae180b8f
FE
4889 my $size = $drive->{size};
4890 $volhash->{$volid}->{size} //= $size if $size;
4891
4892 $volhash->{$volid}->{is_vmstate} //= 0;
4893 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4894
f9dde219
SR
4895 $volhash->{$volid}->{is_tpmstate} //= 0;
4896 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4897
a6be63ac 4898 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
912792e2
FE
4899 };
4900
ae180b8f
FE
4901 my $include_opts = {
4902 extra_keys => ['vmstate'],
4903 include_unused => 1,
4904 };
4905
0b953b8e 4906 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
6328c554 4907
0b7a0b78
AL
4908 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4909 if defined($conf->{pending}) && $conf->{pending}->%*;
6328c554 4910
912792e2
FE
4911 foreach my $snapname (keys %{$conf->{snapshots}}) {
4912 my $snap = $conf->{snapshots}->{$snapname};
0b953b8e 4913 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
912792e2
FE
4914 }
4915
4916 foreach my $volid (keys %$volhash) {
4917 &$func($volid, $volhash->{$volid}, @param);
4918 }
4919}
4920
81d95ae1 4921my $fast_plug_option = {
f68910a0
FE
4922 'description' => 1,
4923 'hookscript' => 1,
7498eb64 4924 'lock' => 1,
d62bdac5
FE
4925 'migrate_downtime' => 1,
4926 'migrate_speed' => 1,
81d95ae1 4927 'name' => 1,
a1b7d579 4928 'onboot' => 1,
f68910a0 4929 'protection' => 1,
81d95ae1
DM
4930 'shares' => 1,
4931 'startup' => 1,
b8e7068a 4932 'tags' => 1,
f68910a0 4933 'vmstatestorage' => 1,
81d95ae1
DM
4934};
4935
71d9006b
AD
4936for my $opt (keys %$confdesc_cloudinit) {
4937 $fast_plug_option->{$opt} = 1;
4938};
4939
3a11fadb
DM
4940# hotplug changes in [PENDING]
4941# $selection hash can be used to only apply specified options, for
4942# example: { cores => 1 } (only apply changed 'cores')
4943# $errors ref is used to return error messages
c427973b 4944sub vmconfig_hotplug_pending {
3a11fadb 4945 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
c427973b 4946
8e90138a 4947 my $defaults = load_defaults();
045749f2
TL
4948 my $arch = get_vm_arch($conf);
4949 my $machine_type = get_vm_machine($conf, undef, $arch);
c427973b
DM
4950
4951 # commit values which do not have any impact on running VM first
3a11fadb
DM
4952 # Note: those option cannot raise errors, we we do not care about
4953 # $selection and always apply them.
4954
4955 my $add_error = sub {
4956 my ($opt, $msg) = @_;
4957 $errors->{$opt} = "hotplug problem - $msg";
4958 };
c427973b 4959
f16cf6c3
WB
4960 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4961
4962 my $cloudinit_record_changed = sub {
4963 my ($conf, $opt, $old, $new) = @_;
4964 return if !$cloudinit_pending_properties->{$opt};
4965
4966 my $ci = ($conf->{cloudinit} //= {});
4967
4968 my $recorded = $ci->{$opt};
a5409851
WB
4969 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4970
4971 if (defined($new)) {
4972 if (defined($old)) {
4973 # an existing value is being modified
4974 if (defined($recorded)) {
4975 # the value was already not in sync
4976 if ($new eq $recorded) {
4977 # a value is being reverted to the cloud-init state:
4978 delete $ci->{$opt};
4979 delete $added{$opt};
4980 } else {
4981 # the value was changed multiple times, do nothing
4982 }
4983 } elsif ($added{$opt}) {
4984 # the value had been marked as added and is being changed, do nothing
4985 } else {
4986 # the value is new, record it:
4987 $ci->{$opt} = $old;
4988 }
f16cf6c3 4989 } else {
a5409851
WB
4990 # a new value is being added
4991 if (defined($recorded)) {
4992 # it was already not in sync
4993 if ($new eq $recorded) {
4994 # a value is being reverted to the cloud-init state:
4995 delete $ci->{$opt};
4996 delete $added{$opt};
4997 } else {
4998 # the value had temporarily been removed, do nothing
4999 }
5000 } elsif ($added{$opt}) {
5001 # the value had been marked as added already, do nothing
5002 } else {
5003 # the value is new, add it
5004 $added{$opt} = 1;
5005 }
f16cf6c3 5006 }
a5409851
WB
5007 } elsif (!defined($old)) {
5008 # a non-existent value is being removed? ignore...
f16cf6c3 5009 } else {
a5409851
WB
5010 # a value is being deleted
5011 if (defined($recorded)) {
5012 # a value was already recorded, just keep it
5013 } elsif ($added{$opt}) {
5014 # the value was marked as added, remove it
5015 delete $added{$opt};
f16cf6c3 5016 } else {
a5409851
WB
5017 # a previously unrecorded value is being removed, record the old value:
5018 $ci->{$opt} = $old;
f16cf6c3
WB
5019 }
5020 }
a5409851
WB
5021
5022 my $added = join(',', sort keys %added);
5023 $ci->{added} = $added if length($added);
f16cf6c3
WB
5024 };
5025
c427973b
DM
5026 my $changes = 0;
5027 foreach my $opt (keys %{$conf->{pending}}) { # add/change
81d95ae1 5028 if ($fast_plug_option->{$opt}) {
f16cf6c3
WB
5029 my $new = delete $conf->{pending}->{$opt};
5030 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5031 $conf->{$opt} = $new;
c427973b
DM
5032 $changes = 1;
5033 }
5034 }
5035
5036 if ($changes) {
ffda963f 5037 PVE::QemuConfig->write_config($vmid, $conf);
c427973b
DM
5038 }
5039
c60cad61
DC
5040 my $ostype = $conf->{ostype};
5041 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
b3c2bdd1 5042 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
c60cad61
DC
5043 my $usb_hotplug = $hotplug_features->{usb}
5044 && min_version($version, 7, 1)
5045 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
c427973b 5046
5b65b00d 5047 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
98bc3aeb 5048 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
f16cf6c3 5049
d321c4a9 5050 foreach my $opt (sort keys %$pending_delete_hash) {
3a11fadb 5051 next if $selection && !$selection->{$opt};
d321c4a9 5052 my $force = $pending_delete_hash->{$opt}->{force};
3a11fadb 5053 eval {
51a6f637
AD
5054 if ($opt eq 'hotplug') {
5055 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5056 } elsif ($opt eq 'tablet') {
b3c2bdd1 5057 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5058 if ($defaults->{tablet}) {
d559309f
WB
5059 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5060 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5061 if $arch eq 'aarch64';
3a11fadb 5062 } else {
d559309f
WB
5063 vm_deviceunplug($vmid, $conf, 'tablet');
5064 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5065 }
c60cad61
DC
5066 } elsif ($opt =~ m/^usb(\d+)$/) {
5067 my $index = $1;
5068 die "skip\n" if !$usb_hotplug;
5069 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5070 vm_deviceunplug($vmid, $conf, $opt);
8edc9c08 5071 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5072 die "skip\n" if !$hotplug_features->{cpu};
8edc9c08 5073 qemu_cpu_hotplug($vmid, $conf, undef);
9c2f7069 5074 } elsif ($opt eq 'balloon') {
81d95ae1 5075 # enable balloon device is not hotpluggable
75b51053
DC
5076 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5077 # here we reset the ballooning value to memory
5078 my $balloon = $conf->{memory} || $defaults->{memory};
0a13e08e 5079 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
81d95ae1
DM
5080 } elsif ($fast_plug_option->{$opt}) {
5081 # do nothing
3eec5767 5082 } elsif ($opt =~ m/^net(\d+)$/) {
b3c2bdd1 5083 die "skip\n" if !$hotplug_features->{network};
3eec5767 5084 vm_deviceunplug($vmid, $conf, $opt);
74479ee9 5085 } elsif (is_valid_drivename($opt)) {
b3c2bdd1 5086 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
19120f99 5087 vm_deviceunplug($vmid, $conf, $opt);
3dc38fbb 5088 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4d3f29ed
AD
5089 } elsif ($opt =~ m/^memory$/) {
5090 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5091 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
c8effec3 5092 } elsif ($opt eq 'cpuunits') {
0d318453 5093 $cgroup->change_cpu_shares(undef);
58be00f1 5094 } elsif ($opt eq 'cpulimit') {
25de70ae 5095 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
3d7389fe 5096 } else {
e56beeda 5097 die "skip\n";
3d7389fe 5098 }
3a11fadb
DM
5099 };
5100 if (my $err = $@) {
e56beeda
DM
5101 &$add_error($opt, $err) if $err ne "skip\n";
5102 } else {
f16cf6c3
WB
5103 my $old = delete $conf->{$opt};
5104 $cloudinit_record_changed->($conf, $opt, $old, undef);
98bc3aeb 5105 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
3d7389fe 5106 }
3d7389fe
DM
5107 }
5108
4b785da1 5109 my $cloudinit_opt;
3d7389fe 5110 foreach my $opt (keys %{$conf->{pending}}) {
3a11fadb 5111 next if $selection && !$selection->{$opt};
3d7389fe 5112 my $value = $conf->{pending}->{$opt};
3a11fadb 5113 eval {
51a6f637
AD
5114 if ($opt eq 'hotplug') {
5115 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5116 } elsif ($opt eq 'tablet') {
b3c2bdd1 5117 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5118 if ($value == 1) {
d559309f
WB
5119 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5120 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5121 if $arch eq 'aarch64';
3a11fadb 5122 } elsif ($value == 0) {
d559309f
WB
5123 vm_deviceunplug($vmid, $conf, 'tablet');
5124 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5125 }
c60cad61
DC
5126 } elsif ($opt =~ m/^usb(\d+)$/) {
5127 my $index = $1;
5128 die "skip\n" if !$usb_hotplug;
0cf8d56c 5129 my $d = eval { parse_property_string('pve-qm-usb', $value) };
c60cad61 5130 my $id = $opt;
0cf8d56c 5131 if ($d->{host} =~ m/^spice$/i) {
c60cad61
DC
5132 $id = "usbredirdev$index";
5133 }
5134 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
8edc9c08 5135 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5136 die "skip\n" if !$hotplug_features->{cpu};
3a11fadb
DM
5137 qemu_cpu_hotplug($vmid, $conf, $value);
5138 } elsif ($opt eq 'balloon') {
81d95ae1 5139 # enable/disable balloning device is not hotpluggable
8fe689e7 5140 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
a1b7d579 5141 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
81d95ae1
DM
5142 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5143
3a11fadb 5144 # allow manual ballooning if shares is set to zero
4cc1efa6 5145 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
9c2f7069 5146 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
0a13e08e 5147 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
9c2f7069 5148 }
a1b7d579 5149 } elsif ($opt =~ m/^net(\d+)$/) {
3eec5767 5150 # some changes can be done without hotplug
a1b7d579 5151 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
d559309f 5152 $vmid, $opt, $value, $arch, $machine_type);
74479ee9 5153 } elsif (is_valid_drivename($opt)) {
f9dde219 5154 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
a05cff86 5155 # some changes can be done without hotplug
9ed7a77c
WB
5156 my $drive = parse_drive($opt, $value);
5157 if (drive_is_cloudinit($drive)) {
4b785da1
WB
5158 $cloudinit_opt = [$opt, $drive];
5159 # apply all the other changes first, then generate the cloudinit disk
5160 die "skip\n";
9ed7a77c 5161 }
b3c2bdd1 5162 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
9e7bce2c 5163 $vmid, $opt, $value, $arch, $machine_type);
4d3f29ed
AD
5164 } elsif ($opt =~ m/^memory$/) { #dimms
5165 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5166 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
c8effec3 5167 } elsif ($opt eq 'cpuunits') {
6b7ef5e5 5168 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
0d318453 5169 $cgroup->change_cpu_shares($new_cpuunits);
58be00f1 5170 } elsif ($opt eq 'cpulimit') {
c6f773b8 5171 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5b65b00d 5172 $cgroup->change_cpu_quota($cpulimit, 100000);
74ea2c65
AD
5173 } elsif ($opt eq 'agent') {
5174 vmconfig_update_agent($conf, $opt, $value);
3a11fadb 5175 } else {
e56beeda 5176 die "skip\n"; # skip non-hot-pluggable options
3d7389fe 5177 }
3a11fadb 5178 };
4b785da1
WB
5179 if (my $err = $@) {
5180 &$add_error($opt, $err) if $err ne "skip\n";
5181 } else {
5182 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5183 $conf->{$opt} = $value;
5184 delete $conf->{pending}->{$opt};
5185 }
5186 }
5187
5188 if (defined($cloudinit_opt)) {
5189 my ($opt, $drive) = @$cloudinit_opt;
5190 my $value = $conf->{pending}->{$opt};
5191 eval {
9660e606
WB
5192 my $temp = {%$conf, $opt => $value};
5193 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
4b785da1
WB
5194 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5195 $vmid, $opt, $value, $arch, $machine_type);
5196 };
3a11fadb 5197 if (my $err = $@) {
e56beeda
DM
5198 &$add_error($opt, $err) if $err ne "skip\n";
5199 } else {
3a11fadb
DM
5200 $conf->{$opt} = $value;
5201 delete $conf->{pending}->{$opt};
3d7389fe 5202 }
3d7389fe 5203 }
c60cad61
DC
5204
5205 # unplug xhci controller if no usb device is left
5206 if ($usb_hotplug) {
5207 my $has_usb = 0;
0cf8d56c 5208 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
c60cad61
DC
5209 next if !defined($conf->{"usb$i"});
5210 $has_usb = 1;
5211 last;
5212 }
5213 if (!$has_usb) {
5214 vm_deviceunplug($vmid, $conf, 'xhci');
5215 }
5216 }
5217
4df15a03 5218 PVE::QemuConfig->write_config($vmid, $conf);
94ec5e7c 5219
4b785da1
WB
5220 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5221 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
94ec5e7c 5222 }
c427973b 5223}
055d554d 5224
3dc38fbb
WB
5225sub try_deallocate_drive {
5226 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5227
5228 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5229 my $volid = $drive->{file};
5230 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5231 my $sid = PVE::Storage::parse_volume_id($volid);
5232 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
cee01bcb
WB
5233
5234 # check if the disk is really unused
cee01bcb 5235 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
e0fd2b2f 5236 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
cee01bcb 5237 PVE::Storage::vdisk_free($storecfg, $volid);
3dc38fbb 5238 return 1;
40b977f3
WL
5239 } else {
5240 # If vm is not owner of this disk remove from config
5241 return 1;
3dc38fbb
WB
5242 }
5243 }
5244
d1c1af4b 5245 return;
3dc38fbb
WB
5246}
5247
5248sub vmconfig_delete_or_detach_drive {
5249 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5250
5251 my $drive = parse_drive($opt, $conf->{$opt});
5252
5253 my $rpcenv = PVE::RPCEnvironment::get();
5254 my $authuser = $rpcenv->get_user();
5255
5256 if ($force) {
5257 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5258 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5259 } else {
5260 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5261 }
5262}
5263
98bc3aeb
OB
5264
5265
055d554d 5266sub vmconfig_apply_pending {
e97bbbb6 5267 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
eb5e482d 5268
a644de29
OB
5269 return if !scalar(keys %{$conf->{pending}});
5270
eb5e482d
OB
5271 my $add_apply_error = sub {
5272 my ($opt, $msg) = @_;
5273 my $err_msg = "unable to apply pending change $opt : $msg";
5274 $errors->{$opt} = $err_msg;
5275 warn $err_msg;
5276 };
c427973b
DM
5277
5278 # cold plug
055d554d 5279
98bc3aeb 5280 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
d321c4a9 5281 foreach my $opt (sort keys %$pending_delete_hash) {
fb4d1ba2 5282 my $force = $pending_delete_hash->{$opt}->{force};
eb5e482d 5283 eval {
3d48b95a
OB
5284 if ($opt =~ m/^unused/) {
5285 die "internal error";
5286 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5287 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
eb5e482d
OB
5288 }
5289 };
5290 if (my $err = $@) {
5291 $add_apply_error->($opt, $err);
055d554d 5292 } else {
98bc3aeb 5293 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
055d554d 5294 delete $conf->{$opt};
055d554d
DM
5295 }
5296 }
5297
3d48b95a 5298 PVE::QemuConfig->cleanup_pending($conf);
055d554d 5299
7a24c98a 5300 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
c930f99e 5301
055d554d 5302 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3d48b95a 5303 next if $opt eq 'delete'; # just to be sure
eb5e482d 5304 eval {
3d48b95a 5305 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5306 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
eb5e482d
OB
5307 }
5308 };
5309 if (my $err = $@) {
5310 $add_apply_error->($opt, $err);
055d554d 5311 } else {
c930f99e
AD
5312
5313 if (is_valid_drivename($opt)) {
5314 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
7a24c98a 5315 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
c930f99e
AD
5316 }
5317
eb5e482d 5318 $conf->{$opt} = delete $conf->{pending}->{$opt};
055d554d 5319 }
055d554d 5320 }
3d48b95a
OB
5321
5322 # write all changes at once to avoid unnecessary i/o
5323 PVE::QemuConfig->write_config($vmid, $conf);
7a24c98a 5324 if ($generate_cloudinit) {
4b785da1
WB
5325 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5326 # After successful generation and if there were changes to be applied, update the
5327 # config to drop the {cloudinit} entry.
5328 PVE::QemuConfig->write_config($vmid, $conf);
5329 }
5330 }
055d554d
DM
5331}
5332
3eec5767 5333sub vmconfig_update_net {
d559309f 5334 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
3eec5767
DM
5335
5336 my $newnet = parse_net($value);
5337
5338 if ($conf->{$opt}) {
5339 my $oldnet = parse_net($conf->{$opt});
5340
0f1af9e7
OB
5341 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5342 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5343 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
62fdcfd4 5344 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
3eec5767
DM
5345 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5346
5347 # for non online change, we try to hot-unplug
7196b757 5348 die "skip\n" if !$hotplug;
3eec5767
DM
5349 vm_deviceunplug($vmid, $conf, $opt);
5350 } else {
5351
5352 die "internal error" if $opt !~ m/net(\d+)/;
5353 my $iface = "tap${vmid}i$1";
a1b7d579 5354
0f1af9e7
OB
5355 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5356 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5357 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5358 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
3eec5767 5359 PVE::Network::tap_unplug($iface);
28e129cc
AD
5360
5361 if ($have_sdn) {
5362 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5363 } else {
5364 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5365 }
0f1af9e7 5366 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4f4fbeb0
WB
5367 # Rate can be applied on its own but any change above needs to
5368 # include the rate in tap_plug since OVS resets everything.
5369 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
3eec5767 5370 }
38c590d9 5371
0f1af9e7 5372 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
25088687
DM
5373 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5374 }
5375
38c590d9 5376 return 1;
3eec5767
DM
5377 }
5378 }
a1b7d579 5379
7196b757 5380 if ($hotplug) {
d559309f 5381 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
38c590d9
DM
5382 } else {
5383 die "skip\n";
5384 }
3eec5767
DM
5385}
5386
74ea2c65
AD
5387sub vmconfig_update_agent {
5388 my ($conf, $opt, $value) = @_;
5389
5390 die "skip\n" if !$conf->{$opt};
5391
5392 my $hotplug_options = { fstrim_cloned_disks => 1 };
5393
5394 my $old_agent = parse_guest_agent($conf);
5395 my $agent = parse_guest_agent({$opt => $value});
5396
33f8b887 5397 for my $option (keys %$agent) { # added/changed options
74ea2c65
AD
5398 next if defined($hotplug_options->{$option});
5399 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5400 }
5401
33f8b887 5402 for my $option (keys %$old_agent) { # removed options
74ea2c65
AD
5403 next if defined($hotplug_options->{$option});
5404 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5405 }
33f8b887
TL
5406
5407 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
74ea2c65
AD
5408}
5409
a05cff86 5410sub vmconfig_update_disk {
9e7bce2c 5411 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
a05cff86
DM
5412
5413 my $drive = parse_drive($opt, $value);
5414
4df98f2f
TL
5415 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5416 my $media = $drive->{media} || 'disk';
5417 my $oldmedia = $old_drive->{media} || 'disk';
5418 die "unable to change media type\n" if $media ne $oldmedia;
a05cff86 5419
4df98f2f 5420 if (!drive_is_cdrom($old_drive)) {
a05cff86 5421
4df98f2f 5422 if ($drive->{file} ne $old_drive->{file}) {
a05cff86 5423
4df98f2f 5424 die "skip\n" if !$hotplug;
a05cff86 5425
4df98f2f
TL
5426 # unplug and register as unused
5427 vm_deviceunplug($vmid, $conf, $opt);
5428 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
a1b7d579 5429
4df98f2f
TL
5430 } else {
5431 # update existing disk
5432
5433 # skip non hotpluggable value
ea7c3b39
FE
5434 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5435 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
4df98f2f
TL
5436 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5437 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5438 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
e0e036e1
LN
5439 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5440 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
4df98f2f
TL
5441 die "skip\n";
5442 }
a05cff86 5443
4df98f2f
TL
5444 # apply throttle
5445 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5446 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5447 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5448 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5449 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5450 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5451 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5452 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5453 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5454 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5455 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5456 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5457 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5458 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5459 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5460 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5461 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5462 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5463
5464 qemu_block_set_io_throttle(
5465 $vmid,"drive-$opt",
5466 ($drive->{mbps} || 0)*1024*1024,
5467 ($drive->{mbps_rd} || 0)*1024*1024,
5468 ($drive->{mbps_wr} || 0)*1024*1024,
5469 $drive->{iops} || 0,
5470 $drive->{iops_rd} || 0,
5471 $drive->{iops_wr} || 0,
5472 ($drive->{mbps_max} || 0)*1024*1024,
5473 ($drive->{mbps_rd_max} || 0)*1024*1024,
5474 ($drive->{mbps_wr_max} || 0)*1024*1024,
5475 $drive->{iops_max} || 0,
5476 $drive->{iops_rd_max} || 0,
5477 $drive->{iops_wr_max} || 0,
5478 $drive->{bps_max_length} || 1,
5479 $drive->{bps_rd_max_length} || 1,
5480 $drive->{bps_wr_max_length} || 1,
5481 $drive->{iops_max_length} || 1,
5482 $drive->{iops_rd_max_length} || 1,
5483 $drive->{iops_wr_max_length} || 1,
5484 );
a05cff86 5485
4df98f2f 5486 }
a1b7d579 5487
4df98f2f
TL
5488 return 1;
5489 }
4de1bb25 5490
4df98f2f 5491 } else { # cdrom
a1b7d579 5492
4df98f2f
TL
5493 if ($drive->{file} eq 'none') {
5494 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5495 if (drive_is_cloudinit($old_drive)) {
5496 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5497 }
5498 } else {
5499 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
ce9fce79 5500
4df98f2f
TL
5501 # force eject if locked
5502 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
ce9fce79 5503
4df98f2f
TL
5504 if ($path) {
5505 mon_cmd($vmid, "blockdev-change-medium",
5506 id => "$opt", filename => "$path");
4de1bb25 5507 }
a05cff86 5508 }
4df98f2f
TL
5509
5510 return 1;
a05cff86
DM
5511 }
5512 }
5513
a1b7d579 5514 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
4de1bb25 5515 # hotplug new disks
f7b4356f 5516 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
d559309f 5517 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
a05cff86
DM
5518}
5519
9687287b
AD
5520sub vmconfig_update_cloudinit_drive {
5521 my ($storecfg, $conf, $vmid) = @_;
5522
5523 my $cloudinit_ds = undef;
5524 my $cloudinit_drive = undef;
5525
5526 PVE::QemuConfig->foreach_volume($conf, sub {
5527 my ($ds, $drive) = @_;
5528 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5529 $cloudinit_ds = $ds;
5530 $cloudinit_drive = $drive;
5531 }
5532 });
5533
5534 return if !$cloudinit_drive;
5535
4b785da1
WB
5536 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5537 PVE::QemuConfig->write_config($vmid, $conf);
5538 }
5539
9687287b
AD
5540 my $running = PVE::QemuServer::check_running($vmid);
5541
5542 if ($running) {
5543 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5544 if ($path) {
5545 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5546 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5547 }
5548 }
5549}
5550
13cfe3b7 5551# called in locked context by incoming migration
ba5396b5
FG
5552sub vm_migrate_get_nbd_disks {
5553 my ($storecfg, $conf, $replicated_volumes) = @_;
13cfe3b7
FG
5554
5555 my $local_volumes = {};
912792e2 5556 PVE::QemuConfig->foreach_volume($conf, sub {
13cfe3b7
FG
5557 my ($ds, $drive) = @_;
5558
5559 return if drive_is_cdrom($drive);
41c8671e 5560 return if $ds eq 'tpmstate0';
13cfe3b7
FG
5561
5562 my $volid = $drive->{file};
5563
5564 return if !$volid;
5565
5566 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5567
5568 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5569 return if $scfg->{shared};
ba5396b5 5570
0d4e8cbd
FE
5571 my $format = qemu_img_format($scfg, $volname);
5572
ba5396b5
FG
5573 # replicated disks re-use existing state via bitmap
5574 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
0d4e8cbd 5575 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
13cfe3b7 5576 });
ba5396b5
FG
5577 return $local_volumes;
5578}
5579
5580# called in locked context by incoming migration
5581sub vm_migrate_alloc_nbd_disks {
5582 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
13cfe3b7 5583
13cfe3b7 5584 my $nbd = {};
ba5396b5 5585 foreach my $opt (sort keys %$source_volumes) {
0d4e8cbd 5586 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
ba5396b5
FG
5587
5588 if ($use_existing) {
5589 $nbd->{$opt}->{drivestr} = print_drive($drive);
5590 $nbd->{$opt}->{volid} = $volid;
5591 $nbd->{$opt}->{replicated} = 1;
13cfe3b7
FG
5592 next;
5593 }
13cfe3b7 5594
0d4e8cbd
FE
5595 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5596
5668463b
FG
5597 # order of precedence, filtered by whether storage supports it:
5598 # 1. explicit requested format
0d4e8cbd
FE
5599 # 2. default format of storage
5600 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5601 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
13cfe3b7 5602
4df98f2f
TL
5603 my $size = $drive->{size} / 1024;
5604 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
13cfe3b7
FG
5605 my $newdrive = $drive;
5606 $newdrive->{format} = $format;
5607 $newdrive->{file} = $newvolid;
5608 my $drivestr = print_drive($newdrive);
ba5396b5
FG
5609 $nbd->{$opt}->{drivestr} = $drivestr;
5610 $nbd->{$opt}->{volid} = $newvolid;
13cfe3b7
FG
5611 }
5612
5613 return $nbd;
5614}
5615
5616# see vm_start_nolock for parameters, additionally:
5617# migrate_opts:
bf8fc5a3 5618# storagemap = parsed storage map for allocating NBD disks
3898a563
FG
5619sub vm_start {
5620 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5621
84da8217 5622 return PVE::QemuConfig->lock_config($vmid, sub {
3898a563
FG
5623 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5624
4ef13a7f
FG
5625 die "you can't start a vm if it's a template\n"
5626 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
3898a563 5627
d544e0e0 5628 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
8e0c97bb
SR
5629 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5630
5631 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5632
5633 if ($has_backup_lock && $running) {
5634 # a backup is currently running, attempt to start the guest in the
5635 # existing QEMU instance
5636 return vm_resume($vmid);
5637 }
3898a563
FG
5638
5639 PVE::QemuConfig->check_lock($conf)
d544e0e0
FE
5640 if !($params->{skiplock} || $has_suspended_lock);
5641
5642 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
3898a563 5643
8e0c97bb 5644 die "VM $vmid already running\n" if $running;
3898a563 5645
ba5396b5
FG
5646 if (my $storagemap = $migrate_opts->{storagemap}) {
5647 my $replicated = $migrate_opts->{replicated_volumes};
5648 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5649 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5650
5651 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5652 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5653 }
5654 }
13cfe3b7 5655
84da8217 5656 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
3898a563
FG
5657 });
5658}
5659
5660
0c498cca
FG
5661# params:
5662# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5663# skiplock => 0/1, skip checking for config lock
4ef13a7f 5664# skiptemplate => 0/1, skip checking whether VM is template
7bd9abd2 5665# forcemachine => to force QEMU machine (rollback/migration)
58c64ad5 5666# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
0c498cca
FG
5667# timeout => in seconds
5668# paused => start VM in paused state (backup)
3898a563 5669# resume => resume from hibernation
5921764c
SR
5670# pbs-backing => {
5671# sata0 => {
5672# repository
5673# snapshot
5674# keyfile
5675# archive
5676# },
5677# virtio2 => ...
5678# }
0c498cca 5679# migrate_opts:
ba5396b5 5680# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
0c498cca
FG
5681# migratedfrom => source node
5682# spice_ticket => used for spice migration, passed via tunnel/stdin
5683# network => CIDR of migration network
5684# type => secure/insecure - tunnel over encrypted connection or plain-text
0c498cca 5685# nbd_proto_version => int, 0 for TCP, 1 for UNIX
fd95d780 5686# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
13d121d7
FE
5687# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5688# contained in config
3898a563
FG
5689sub vm_start_nolock {
5690 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
1e3baf05 5691
3898a563
FG
5692 my $statefile = $params->{statefile};
5693 my $resume = $params->{resume};
3dcb98d5 5694
3898a563
FG
5695 my $migratedfrom = $migrate_opts->{migratedfrom};
5696 my $migration_type = $migrate_opts->{type};
7ceade4c 5697
84da8217
FG
5698 my $res = {};
5699
3898a563
FG
5700 # clean up leftover reboot request files
5701 eval { clear_reboot_request($vmid); };
5702 warn $@ if $@;
1e3baf05 5703
3898a563
FG
5704 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5705 vmconfig_apply_pending($vmid, $conf, $storecfg);
5706 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5707 }
64457ed4 5708
3de134ef
WB
5709 # don't regenerate the ISO if the VM is started as part of a live migration
5710 # this way we can reuse the old ISO with the correct config
4b785da1
WB
5711 if (!$migratedfrom) {
5712 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5713 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5714 # $conf->{cloudinit}, so we could just not do this?
5715 # But we do it above, so for now let's be consistent.
5716 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5717 }
5718 }
3de134ef 5719
13d121d7
FE
5720 # override offline migrated volumes, conf is out of date still
5721 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5722 for my $key (sort keys $offline_volumes->%*) {
5723 my $parsed = parse_drive($key, $conf->{$key});
5724 $parsed->{file} = $offline_volumes->{$key};
5725 $conf->{$key} = print_drive($parsed);
5726 }
fd95d780
FG
5727 }
5728
3898a563 5729 my $defaults = load_defaults();
0c9a7596 5730
3898a563 5731 # set environment variable useful inside network script
eef93bc5
FG
5732 # for remote migration the config is available on the target node!
5733 if (!$migrate_opts->{remote_node}) {
5734 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5735 }
6c47d546 5736
3898a563 5737 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
9e784b11 5738
3898a563 5739 my $forcemachine = $params->{forcemachine};
ea1c2110 5740 my $forcecpu = $params->{forcecpu};
3898a563 5741 if ($resume) {
ea1c2110 5742 # enforce machine and CPU type on suspended vm to ensure HW compatibility
3898a563 5743 $forcemachine = $conf->{runningmachine};
ea1c2110 5744 $forcecpu = $conf->{runningcpu};
3898a563
FG
5745 print "Resuming suspended VM\n";
5746 }
7ceade4c 5747
9b71c34d 5748 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5921764c 5749 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
6c47d546 5750
3898a563
FG
5751 my $migration_ip;
5752 my $get_migration_ip = sub {
5753 my ($nodename) = @_;
b24e1ac2 5754
3898a563 5755 return $migration_ip if defined($migration_ip);
b24e1ac2 5756
3898a563 5757 my $cidr = $migrate_opts->{network};
0c498cca 5758
3898a563
FG
5759 if (!defined($cidr)) {
5760 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5761 $cidr = $dc_conf->{migration}->{network};
5762 }
b24e1ac2 5763
3898a563
FG
5764 if (defined($cidr)) {
5765 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
b24e1ac2 5766
3898a563
FG
5767 die "could not get IP: no address configured on local " .
5768 "node for network '$cidr'\n" if scalar(@$ips) == 0;
b24e1ac2 5769
3898a563
FG
5770 die "could not get IP: multiple addresses configured on local " .
5771 "node for network '$cidr'\n" if scalar(@$ips) > 1;
b24e1ac2 5772
3898a563
FG
5773 $migration_ip = @$ips[0];
5774 }
b24e1ac2 5775
3898a563
FG
5776 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5777 if !defined($migration_ip);
b24e1ac2 5778
3898a563
FG
5779 return $migration_ip;
5780 };
b24e1ac2 5781
3898a563
FG
5782 if ($statefile) {
5783 if ($statefile eq 'tcp') {
05b2a4ae
FG
5784 my $migrate = $res->{migrate} = { proto => 'tcp' };
5785 $migrate->{addr} = "localhost";
3898a563
FG
5786 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5787 my $nodename = nodename();
2de2d6f7 5788
3898a563
FG
5789 if (!defined($migration_type)) {
5790 if (defined($datacenterconf->{migration}->{type})) {
5791 $migration_type = $datacenterconf->{migration}->{type};
5792 } else {
5793 $migration_type = 'secure';
b7a5a225 5794 }
3898a563 5795 }
b7a5a225 5796
3898a563 5797 if ($migration_type eq 'insecure') {
05b2a4ae
FG
5798 $migrate->{addr} = $get_migration_ip->($nodename);
5799 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
3898a563 5800 }
2de2d6f7 5801
3898a563 5802 my $pfamily = PVE::Tools::get_host_address_family($nodename);
05b2a4ae
FG
5803 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5804 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5805 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5806 push @$cmd, '-S';
1c9d54bf 5807
3898a563
FG
5808 } elsif ($statefile eq 'unix') {
5809 # should be default for secure migrations as a ssh TCP forward
5810 # tunnel is not deterministic reliable ready and fails regurarly
5811 # to set up in time, so use UNIX socket forwards
05b2a4ae
FG
5812 my $migrate = $res->{migrate} = { proto => 'unix' };
5813 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5814 unlink $migrate->{addr};
54323eed 5815
05b2a4ae
FG
5816 $migrate->{uri} = "unix:$migrate->{addr}";
5817 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5818 push @$cmd, '-S';
1c9d54bf 5819
3898a563
FG
5820 } elsif (-e $statefile) {
5821 push @$cmd, '-loadstate', $statefile;
5822 } else {
5823 my $statepath = PVE::Storage::path($storecfg, $statefile);
5824 push @$vollist, $statefile;
5825 push @$cmd, '-loadstate', $statepath;
5826 }
5827 } elsif ($params->{paused}) {
5828 push @$cmd, '-S';
5829 }
5830
1fb1822e
DC
5831 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5832
9b71c34d
DC
5833 my $pci_reserve_list = [];
5834 for my $device (values $pci_devices->%*) {
5835 next if $device->{mdev}; # we don't reserve for mdev devices
5836 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
1fb1822e
DC
5837 }
5838
1fb1822e 5839 # reserve all PCI IDs before actually doing anything with them
9b71c34d 5840 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
1fb1822e
DC
5841
5842 eval {
bbf96e0f 5843 my $uuid;
1fb1822e
DC
5844 for my $id (sort keys %$pci_devices) {
5845 my $d = $pci_devices->{$id};
9b71c34d
DC
5846 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5847
5848 my $chosen_mdev;
5849 for my $dev ($d->{ids}->@*) {
5850 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5851 if ($d->{mdev}) {
5852 warn $@ if $@;
5853 $chosen_mdev = $info;
5854 last if $chosen_mdev; # if successful, we're done
5855 } else {
5856 die $@ if $@;
5857 }
5858 }
5859
5860 next if !$d->{mdev};
5861 die "could not create mediated device\n" if !defined($chosen_mdev);
5862
5863 # nvidia grid needs the uuid of the mdev as qemu parameter
5864 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5865 if (defined($conf->{smbios1})) {
5866 my $smbios_conf = parse_smbios1($conf->{smbios1});
5867 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
bbf96e0f 5868 }
9b71c34d 5869 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
1fb1822e
DC
5870 }
5871 }
bbf96e0f 5872 push @$cmd, '-uuid', $uuid if defined($uuid);
1fb1822e
DC
5873 };
5874 if (my $err = $@) {
1b189121 5875 eval { cleanup_pci_devices($vmid, $conf) };
1fb1822e
DC
5876 warn $@ if $@;
5877 die $err;
3898a563 5878 }
1e3baf05 5879
3898a563 5880 PVE::Storage::activate_volumes($storecfg, $vollist);
1e3baf05 5881
728404c0
TL
5882
5883 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
3d79cf55 5884 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
728404c0 5885 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
3898a563
FG
5886 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5887 # timeout should be more than enough here...
39abafc8 5888 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
3898a563 5889
6b7ef5e5 5890 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
3898a563 5891
3898a563
FG
5892 my %run_params = (
5893 timeout => $statefile ? undef : $start_timeout,
5894 umask => 0077,
5895 noerr => 1,
5896 );
1e3baf05 5897
3898a563
FG
5898 # when migrating, prefix QEMU output so other side can pick up any
5899 # errors that might occur and show the user
5900 if ($migratedfrom) {
5901 $run_params{quiet} = 1;
5902 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5903 }
8bf30c2a 5904
212220a4 5905 my %systemd_properties = (
3898a563 5906 Slice => 'qemu.slice',
354e61aa
SR
5907 KillMode => 'process',
5908 SendSIGKILL => 0,
5909 TimeoutStopUSec => ULONG_MAX, # infinity
3898a563 5910 );
7023f3ea 5911
6cbd3eb8 5912 if (PVE::CGroup::cgroup_mode() == 2) {
212220a4 5913 $systemd_properties{CPUWeight} = $cpuunits;
6cbd3eb8 5914 } else {
212220a4 5915 $systemd_properties{CPUShares} = $cpuunits;
6cbd3eb8
AD
5916 }
5917
3898a563 5918 if (my $cpulimit = $conf->{cpulimit}) {
212220a4 5919 $systemd_properties{CPUQuota} = int($cpulimit * 100);
3898a563 5920 }
212220a4 5921 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
7023f3ea 5922
3898a563
FG
5923 my $run_qemu = sub {
5924 PVE::Tools::run_fork sub {
212220a4 5925 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
6e0216d8 5926
f9dde219 5927 my $tpmpid;
a55d0f71 5928 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
f9dde219
SR
5929 # start the TPM emulator so QEMU can connect on start
5930 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5931 }
5932
3898a563 5933 my $exitcode = run_command($cmd, %run_params);
f9dde219 5934 if ($exitcode) {
23bee97d
FE
5935 if ($tpmpid) {
5936 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5937 kill 'TERM', $tpmpid;
5938 }
f9dde219
SR
5939 die "QEMU exited with code $exitcode\n";
5940 }
503308ed 5941 };
3898a563 5942 };
503308ed 5943
3898a563 5944 if ($conf->{hugepages}) {
7023f3ea 5945
3898a563 5946 my $code = sub {
dafb728c
AD
5947 my $hotplug_features =
5948 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5949 my $hugepages_topology =
5950 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5951
3898a563 5952 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
7023f3ea 5953
3898a563
FG
5954 PVE::QemuServer::Memory::hugepages_mount();
5955 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
7023f3ea 5956
503308ed 5957 eval { $run_qemu->() };
3898a563 5958 if (my $err = $@) {
f36e9894
SR
5959 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5960 if !$conf->{keephugepages};
3898a563
FG
5961 die $err;
5962 }
77cde36b 5963
f36e9894
SR
5964 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5965 if !$conf->{keephugepages};
3898a563
FG
5966 };
5967 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
1e3baf05 5968
3898a563
FG
5969 } else {
5970 eval { $run_qemu->() };
5971 }
afdb31d5 5972
3898a563
FG
5973 if (my $err = $@) {
5974 # deactivate volumes if start fails
5975 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
1b189121
DC
5976 warn $@ if $@;
5977 eval { cleanup_pci_devices($vmid, $conf) };
5978 warn $@ if $@;
1fb1822e 5979
3898a563
FG
5980 die "start failed: $err";
5981 }
62de2cbd 5982
1fb1822e
DC
5983 # re-reserve all PCI IDs now that we can know the actual VM PID
5984 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
9b71c34d 5985 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
1fb1822e
DC
5986 warn $@ if $@;
5987
05b2a4ae
FG
5988 if (defined($res->{migrate})) {
5989 print "migration listens on $res->{migrate}->{uri}\n";
5990 } elsif ($statefile) {
3898a563
FG
5991 eval { mon_cmd($vmid, "cont"); };
5992 warn $@ if $@;
5993 }
2189246c 5994
3898a563 5995 #start nbd server for storage migration
13cfe3b7 5996 if (my $nbd = $migrate_opts->{nbd}) {
3898a563 5997 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
2189246c 5998
3898a563
FG
5999 my $migrate_storage_uri;
6000 # nbd_protocol_version > 0 for unix socket support
eef93bc5 6001 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
3898a563
FG
6002 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
6003 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
6004 $migrate_storage_uri = "nbd:unix:$socket_path";
05b2a4ae 6005 $res->{migrate}->{unix_sockets} = [$socket_path];
3898a563
FG
6006 } else {
6007 my $nodename = nodename();
6008 my $localip = $get_migration_ip->($nodename);
6009 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6010 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6011
4df98f2f
TL
6012 mon_cmd($vmid, "nbd-server-start", addr => {
6013 type => 'inet',
6014 data => {
6015 host => "${localip}",
6016 port => "${storage_migrate_port}",
6017 },
6018 });
3898a563
FG
6019 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6020 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
2189246c
AD
6021 }
6022
83f04be3
FE
6023 my $block_info = mon_cmd($vmid, "query-block");
6024 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6025
13cfe3b7 6026 foreach my $opt (sort keys %$nbd) {
ba5396b5
FG
6027 my $drivestr = $nbd->{$opt}->{drivestr};
6028 my $volid = $nbd->{$opt}->{volid};
83f04be3
FE
6029
6030 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6031
6032 mon_cmd(
6033 $vmid,
6034 "block-export-add",
6035 id => "drive-$opt",
6036 'node-name' => $block_node,
6037 writable => JSON::true,
6038 type => "nbd",
6039 name => "drive-$opt", # NBD export name
6040 );
6041
84da8217
FG
6042 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6043 print "storage migration listens on $nbd_uri volume:$drivestr\n";
ba5396b5
FG
6044 print "re-using replicated volume: $opt - $volid\n"
6045 if $nbd->{$opt}->{replicated};
84da8217
FG
6046
6047 $res->{drives}->{$opt} = $nbd->{$opt};
6048 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
3898a563
FG
6049 }
6050 }
a89fded1 6051
3898a563
FG
6052 if ($migratedfrom) {
6053 eval {
6054 set_migration_caps($vmid);
6055 };
6056 warn $@ if $@;
6057
6058 if ($spice_port) {
6059 print "spice listens on port $spice_port\n";
84da8217 6060 $res->{spice_port} = $spice_port;
3898a563 6061 if ($migrate_opts->{spice_ticket}) {
4df98f2f
TL
6062 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6063 $migrate_opts->{spice_ticket});
3898a563 6064 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
95a4b4a9 6065 }
3898a563 6066 }
95a4b4a9 6067
3898a563
FG
6068 } else {
6069 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6070 if !$statefile && $conf->{balloon};
25088687 6071
3898a563
FG
6072 foreach my $opt (keys %$conf) {
6073 next if $opt !~ m/^net\d+$/;
6074 my $nicconf = parse_net($conf->{$opt});
6075 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
e18b0b99 6076 }
21947fea 6077 add_nets_bridge_fdb($conf, $vmid);
3898a563 6078 }
a1b7d579 6079
4044ae1f 6080 if (!defined($conf->{balloon}) || $conf->{balloon}) {
cb64a643
FE
6081 eval {
6082 mon_cmd(
6083 $vmid,
6084 'qom-set',
6085 path => "machine/peripheral/balloon0",
6086 property => "guest-stats-polling-interval",
6087 value => 2
6088 );
6089 };
6090 log_warn("could not set polling interval for ballooning - $@") if $@;
4044ae1f 6091 }
eb065317 6092
3898a563
FG
6093 if ($resume) {
6094 print "Resumed VM, removing state\n";
6095 if (my $vmstate = $conf->{vmstate}) {
6096 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6097 PVE::Storage::vdisk_free($storecfg, $vmstate);
7ceade4c 6098 }
ea1c2110 6099 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
3898a563
FG
6100 PVE::QemuConfig->write_config($vmid, $conf);
6101 }
7ceade4c 6102
3898a563 6103 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
84da8217
FG
6104
6105 return $res;
1e3baf05
DM
6106}
6107
1e3baf05 6108sub vm_commandline {
b14477e7 6109 my ($storecfg, $vmid, $snapname) = @_;
1e3baf05 6110
ffda963f 6111 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6112
e8a26810 6113 my ($forcemachine, $forcecpu);
b14477e7
RV
6114 if ($snapname) {
6115 my $snapshot = $conf->{snapshots}->{$snapname};
87d92707
TL
6116 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6117
ea1c2110
SR
6118 # check for machine or CPU overrides in snapshot
6119 $forcemachine = $snapshot->{runningmachine};
6120 $forcecpu = $snapshot->{runningcpu};
092868c4 6121
87d92707 6122 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
b14477e7 6123
b14477e7
RV
6124 $conf = $snapshot;
6125 }
6126
1e3baf05
DM
6127 my $defaults = load_defaults();
6128
e8a26810 6129 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
1e3baf05 6130
5930c1ff 6131 return PVE::Tools::cmd2string($cmd);
1e3baf05
DM
6132}
6133
6134sub vm_reset {
6135 my ($vmid, $skiplock) = @_;
6136
ffda963f 6137 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6138
ffda963f 6139 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6140
ffda963f 6141 PVE::QemuConfig->check_lock($conf) if !$skiplock;
1e3baf05 6142
0a13e08e 6143 mon_cmd($vmid, "system_reset");
ff1a2432
DM
6144 });
6145}
6146
6147sub get_vm_volumes {
6148 my ($conf) = @_;
1e3baf05 6149
ff1a2432 6150 my $vollist = [];
0b7a0b78 6151 foreach_volid($conf, sub {
392f8b5d 6152 my ($volid, $attr) = @_;
ff1a2432 6153
d5769dc2 6154 return if $volid =~ m|^/|;
ff1a2432 6155
d5769dc2
DM
6156 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6157 return if !$sid;
ff1a2432
DM
6158
6159 push @$vollist, $volid;
1e3baf05 6160 });
ff1a2432
DM
6161
6162 return $vollist;
6163}
6164
1b189121
DC
6165sub cleanup_pci_devices {
6166 my ($vmid, $conf) = @_;
6167
6168 foreach my $key (keys %$conf) {
6169 next if $key !~ m/^hostpci(\d+)$/;
6170 my $hostpciindex = $1;
6171 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6172 my $d = parse_hostpci($conf->{$key});
faf72d6c
TL
6173 if ($d->{mdev}) {
6174 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6175 # don't want to break ABI just for this two liner
6176 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
49c51a60
DC
6177
6178 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6179 # out when we do it first. so wait for 10 seconds and then try it
9b71c34d 6180 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
49c51a60
DC
6181 sleep 10;
6182 }
6183
faf72d6c
TL
6184 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6185 }
1b189121
DC
6186 }
6187 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6188}
6189
ff1a2432 6190sub vm_stop_cleanup {
70b04821 6191 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
ff1a2432 6192
745fed70 6193 eval {
ff1a2432 6194
254575e9
DM
6195 if (!$keepActive) {
6196 my $vollist = get_vm_volumes($conf);
6197 PVE::Storage::deactivate_volumes($storecfg, $vollist);
f9dde219
SR
6198
6199 if (my $tpmdrive = $conf->{tpmstate0}) {
6200 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6201 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6202 if ($storeid) {
6203 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6204 }
6205 }
254575e9 6206 }
a1b7d579 6207
ab6a046f 6208 foreach my $ext (qw(mon qmp pid vnc qga)) {
961bfcb2
DM
6209 unlink "/var/run/qemu-server/${vmid}.$ext";
6210 }
a1b7d579 6211
6dbcb073 6212 if ($conf->{ivshmem}) {
4df98f2f 6213 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4c5a6a24
TL
6214 # just delete it for now, VMs which have this already open do not
6215 # are affected, but new VMs will get a separated one. If this
6216 # becomes an issue we either add some sort of ref-counting or just
6217 # add a "don't delete on stop" flag to the ivshmem format.
6dbcb073
DC
6218 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6219 }
6220
1b189121 6221 cleanup_pci_devices($vmid, $conf);
6ab45bd7 6222
70b04821 6223 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
745fed70
DM
6224 };
6225 warn $@ if $@; # avoid errors - just warn
1e3baf05
DM
6226}
6227
575d19da
DC
6228# call only in locked context
6229sub _do_vm_stop {
6230 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
9269013a 6231
575d19da
DC
6232 my $pid = check_running($vmid, $nocheck);
6233 return if !$pid;
1e3baf05 6234
575d19da
DC
6235 my $conf;
6236 if (!$nocheck) {
6237 $conf = PVE::QemuConfig->load_config($vmid);
6238 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6239 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6240 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6241 $timeout = $opts->{down} if $opts->{down};
e6c3b671 6242 }
575d19da
DC
6243 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6244 }
19672434 6245
575d19da
DC
6246 eval {
6247 if ($shutdown) {
a2af1bbe 6248 if (defined($conf) && get_qga_key($conf, 'enabled')) {
0a13e08e 6249 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
9269013a 6250 } else {
0a13e08e 6251 mon_cmd($vmid, "system_powerdown");
1e3baf05
DM
6252 }
6253 } else {
0a13e08e 6254 mon_cmd($vmid, "quit");
1e3baf05 6255 }
575d19da
DC
6256 };
6257 my $err = $@;
1e3baf05 6258
575d19da
DC
6259 if (!$err) {
6260 $timeout = 60 if !defined($timeout);
1e3baf05
DM
6261
6262 my $count = 0;
e6c3b671 6263 while (($count < $timeout) && check_running($vmid, $nocheck)) {
1e3baf05
DM
6264 $count++;
6265 sleep 1;
6266 }
6267
6268 if ($count >= $timeout) {
575d19da
DC
6269 if ($force) {
6270 warn "VM still running - terminating now with SIGTERM\n";
6271 kill 15, $pid;
6272 } else {
6273 die "VM quit/powerdown failed - got timeout\n";
6274 }
6275 } else {
6276 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6277 return;
1e3baf05 6278 }
575d19da 6279 } else {
d60cbb97
TL
6280 if (!check_running($vmid, $nocheck)) {
6281 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6282 return;
6283 }
6284 if ($force) {
575d19da
DC
6285 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6286 kill 15, $pid;
6287 } else {
6288 die "VM quit/powerdown failed\n";
6289 }
6290 }
6291
6292 # wait again
6293 $timeout = 10;
6294
6295 my $count = 0;
6296 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6297 $count++;
6298 sleep 1;
6299 }
6300
6301 if ($count >= $timeout) {
6302 warn "VM still running - terminating now with SIGKILL\n";
6303 kill 9, $pid;
6304 sleep 1;
6305 }
1e3baf05 6306
575d19da
DC
6307 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6308}
6309
6310# Note: use $nocheck to skip tests if VM configuration file exists.
6311# We need that when migration VMs to other nodes (files already moved)
6312# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6313sub vm_stop {
6314 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6315
6316 $force = 1 if !defined($force) && !$shutdown;
6317
6318 if ($migratedfrom){
6319 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6320 kill 15, $pid if $pid;
6321 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6322 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6323 return;
6324 }
6325
6326 PVE::QemuConfig->lock_config($vmid, sub {
6327 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
ff1a2432 6328 });
1e3baf05
DM
6329}
6330
165411f0
DC
6331sub vm_reboot {
6332 my ($vmid, $timeout) = @_;
6333
6334 PVE::QemuConfig->lock_config($vmid, sub {
66026117 6335 eval {
165411f0 6336
66026117
OB
6337 # only reboot if running, as qmeventd starts it again on a stop event
6338 return if !check_running($vmid);
165411f0 6339
66026117 6340 create_reboot_request($vmid);
165411f0 6341
66026117
OB
6342 my $storecfg = PVE::Storage::config();
6343 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
165411f0 6344
66026117
OB
6345 };
6346 if (my $err = $@) {
3c1c3fe6 6347 # avoid that the next normal shutdown will be confused for a reboot
66026117
OB
6348 clear_reboot_request($vmid);
6349 die $err;
6350 }
165411f0
DC
6351 });
6352}
6353
75c24bba 6354# note: if using the statestorage parameter, the caller has to check privileges
1e3baf05 6355sub vm_suspend {
48b4cdc2 6356 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
159719e5
DC
6357
6358 my $conf;
6359 my $path;
6360 my $storecfg;
6361 my $vmstate;
1e3baf05 6362
ffda963f 6363 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6364
159719e5 6365 $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6366
159719e5 6367 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
e79706d4 6368 PVE::QemuConfig->check_lock($conf)
159719e5
DC
6369 if !($skiplock || $is_backing_up);
6370
6371 die "cannot suspend to disk during backup\n"
6372 if $is_backing_up && $includestate;
bcb7c9cf 6373
159719e5
DC
6374 if ($includestate) {
6375 $conf->{lock} = 'suspending';
6376 my $date = strftime("%Y-%m-%d", localtime(time()));
6377 $storecfg = PVE::Storage::config();
75c24bba
DC
6378 if (!$statestorage) {
6379 $statestorage = find_vmstate_storage($conf, $storecfg);
6380 # check permissions for the storage
6381 my $rpcenv = PVE::RPCEnvironment::get();
6382 if ($rpcenv->{type} ne 'cli') {
6383 my $authuser = $rpcenv->get_user();
6384 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6385 }
6386 }
6387
6388
4df98f2f
TL
6389 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6390 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
159719e5
DC
6391 $path = PVE::Storage::path($storecfg, $vmstate);
6392 PVE::QemuConfig->write_config($vmid, $conf);
6393 } else {
0a13e08e 6394 mon_cmd($vmid, "stop");
159719e5 6395 }
1e3baf05 6396 });
159719e5
DC
6397
6398 if ($includestate) {
6399 # save vm state
6400 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6401
6402 eval {
27a5be53 6403 set_migration_caps($vmid, 1);
0a13e08e 6404 mon_cmd($vmid, "savevm-start", statefile => $path);
159719e5 6405 for(;;) {
0a13e08e 6406 my $state = mon_cmd($vmid, "query-savevm");
159719e5
DC
6407 if (!$state->{status}) {
6408 die "savevm not active\n";
6409 } elsif ($state->{status} eq 'active') {
6410 sleep(1);
6411 next;
6412 } elsif ($state->{status} eq 'completed') {
b0a9a385 6413 print "State saved, quitting\n";
159719e5
DC
6414 last;
6415 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6416 die "query-savevm failed with error '$state->{error}'\n"
6417 } else {
6418 die "query-savevm returned status '$state->{status}'\n";
6419 }
6420 }
6421 };
6422 my $err = $@;
6423
6424 PVE::QemuConfig->lock_config($vmid, sub {
6425 $conf = PVE::QemuConfig->load_config($vmid);
6426 if ($err) {
6427 # cleanup, but leave suspending lock, to indicate something went wrong
6428 eval {
0a13e08e 6429 mon_cmd($vmid, "savevm-end");
159719e5
DC
6430 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6431 PVE::Storage::vdisk_free($storecfg, $vmstate);
ea1c2110 6432 delete $conf->@{qw(vmstate runningmachine runningcpu)};
159719e5
DC
6433 PVE::QemuConfig->write_config($vmid, $conf);
6434 };
6435 warn $@ if $@;
6436 die $err;
6437 }
6438
6439 die "lock changed unexpectedly\n"
6440 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6441
0a13e08e 6442 mon_cmd($vmid, "quit");
159719e5
DC
6443 $conf->{lock} = 'suspended';
6444 PVE::QemuConfig->write_config($vmid, $conf);
6445 });
6446 }
1e3baf05
DM
6447}
6448
a20dc58a
FG
6449# $nocheck is set when called as part of a migration - in this context the
6450# location of the config file (source or target node) is not deterministic,
6451# since migration cannot wait for pmxcfs to process the rename
1e3baf05 6452sub vm_resume {
289e0b85 6453 my ($vmid, $skiplock, $nocheck) = @_;
1e3baf05 6454
ffda963f 6455 PVE::QemuConfig->lock_config($vmid, sub {
0a13e08e 6456 my $res = mon_cmd($vmid, 'query-status');
c2786bed 6457 my $resume_cmd = 'cont';
8e0c97bb 6458 my $reset = 0;
270bfff2
FG
6459 my $conf;
6460 if ($nocheck) {
ad9e347c
FG
6461 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6462 if ($@) {
6463 my $vmlist = PVE::Cluster::get_vmlist();
6464 if (exists($vmlist->{ids}->{$vmid})) {
6465 my $node = $vmlist->{ids}->{$vmid}->{node};
6466 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6467 }
6468 if (!$conf) {
6469 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6470 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6471 }
270bfff2 6472 }
270bfff2
FG
6473 } else {
6474 $conf = PVE::QemuConfig->load_config($vmid);
6475 }
c2786bed 6476
8e0c97bb
SR
6477 if ($res->{status}) {
6478 return if $res->{status} eq 'running'; # job done, go home
6479 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6480 $reset = 1 if $res->{status} eq 'shutdown';
c2786bed
DC
6481 }
6482
289e0b85 6483 if (!$nocheck) {
e79706d4
FG
6484 PVE::QemuConfig->check_lock($conf)
6485 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
289e0b85 6486 }
3e24733b 6487
8e0c97bb
SR
6488 if ($reset) {
6489 # required if a VM shuts down during a backup and we get a resume
6490 # request before the backup finishes for example
6491 mon_cmd($vmid, "system_reset");
6492 }
21947fea
AD
6493
6494 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6495
0a13e08e 6496 mon_cmd($vmid, $resume_cmd);
1e3baf05
DM
6497 });
6498}
6499
5fdbe4f0
DM
6500sub vm_sendkey {
6501 my ($vmid, $skiplock, $key) = @_;
1e3baf05 6502
ffda963f 6503 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6504
ffda963f 6505 my $conf = PVE::QemuConfig->load_config($vmid);
f5eb281a 6506
7b7c6d1b 6507 # there is no qmp command, so we use the human monitor command
0a13e08e 6508 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
d30820d6 6509 die $res if $res ne '';
1e3baf05
DM
6510 });
6511}
6512
d6deb7f6
TL
6513sub check_bridge_access {
6514 my ($rpcenv, $authuser, $conf) = @_;
6515
6516 return 1 if $authuser eq 'root@pam';
6517
6518 for my $opt (sort keys $conf->%*) {
6519 next if $opt !~ m/^net\d+$/;
6520 my $net = parse_net($conf->{$opt});
6521 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6522 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6523 }
6524 return 1;
6525};
6526
e3971865
DC
6527sub check_mapping_access {
6528 my ($rpcenv, $user, $conf) = @_;
6529
6530 for my $opt (keys $conf->%*) {
6531 if ($opt =~ m/^usb\d+$/) {
6532 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6533 if (my $host = $device->{host}) {
6534 die "only root can set '$opt' config for real devices\n"
6535 if $host !~ m/^spice$/i && $user ne 'root@pam';
6536 } elsif ($device->{mapping}) {
6537 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6538 } else {
6539 die "either 'host' or 'mapping' must be set.\n";
6540 }
9b71c34d
DC
6541 } elsif ($opt =~ m/^hostpci\d+$/) {
6542 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6543 if ($device->{host}) {
6544 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6545 } elsif ($device->{mapping}) {
6546 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6547 } else {
6548 die "either 'host' or 'mapping' must be set.\n";
6549 }
e3971865
DC
6550 }
6551 }
6552};
6553
e3971865
DC
6554sub check_restore_permissions {
6555 my ($rpcenv, $user, $conf) = @_;
621edb2b 6556
e3971865
DC
6557 check_bridge_access($rpcenv, $user, $conf);
6558 check_mapping_access($rpcenv, $user, $conf);
6559}
3e16d5fc
DM
6560# vzdump restore implementaion
6561
ed221350 6562sub tar_archive_read_firstfile {
3e16d5fc 6563 my $archive = shift;
afdb31d5 6564
3e16d5fc
DM
6565 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6566
6567 # try to detect archive type first
387ba257 6568 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
3e16d5fc 6569 die "unable to open file '$archive'\n";
387ba257 6570 my $firstfile = <$fh>;
3e16d5fc 6571 kill 15, $pid;
387ba257 6572 close $fh;
3e16d5fc
DM
6573
6574 die "ERROR: archive contaions no data\n" if !$firstfile;
6575 chomp $firstfile;
6576
6577 return $firstfile;
6578}
6579
ed221350
DM
6580sub tar_restore_cleanup {
6581 my ($storecfg, $statfile) = @_;
3e16d5fc
DM
6582
6583 print STDERR "starting cleanup\n";
6584
6585 if (my $fd = IO::File->new($statfile, "r")) {
6586 while (defined(my $line = <$fd>)) {
6587 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6588 my $volid = $2;
6589 eval {
6590 if ($volid =~ m|^/|) {
6591 unlink $volid || die 'unlink failed\n';
6592 } else {
ed221350 6593 PVE::Storage::vdisk_free($storecfg, $volid);
3e16d5fc 6594 }
afdb31d5 6595 print STDERR "temporary volume '$volid' sucessfuly removed\n";
3e16d5fc
DM
6596 };
6597 print STDERR "unable to cleanup '$volid' - $@" if $@;
6598 } else {
6599 print STDERR "unable to parse line in statfile - $line";
afdb31d5 6600 }
3e16d5fc
DM
6601 }
6602 $fd->close();
6603 }
6604}
6605
d1e92cf6 6606sub restore_file_archive {
a0d1b1a2 6607 my ($archive, $vmid, $user, $opts) = @_;
3e16d5fc 6608
a2ec5a67
FG
6609 return restore_vma_archive($archive, $vmid, $user, $opts)
6610 if $archive eq '-';
6611
c6d51783
AA
6612 my $info = PVE::Storage::archive_info($archive);
6613 my $format = $opts->{format} // $info->{format};
6614 my $comp = $info->{compression};
91bd6c90
DM
6615
6616 # try to detect archive format
6617 if ($format eq 'tar') {
6618 return restore_tar_archive($archive, $vmid, $user, $opts);
6619 } else {
6620 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6621 }
6622}
6623
d1e92cf6
DM
6624# hepler to remove disks that will not be used after restore
6625my $restore_cleanup_oldconf = sub {
6626 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6627
01a4377f
FE
6628 my $kept_disks = {};
6629
912792e2 6630 PVE::QemuConfig->foreach_volume($oldconf, sub {
d1e92cf6
DM
6631 my ($ds, $drive) = @_;
6632
6633 return if drive_is_cdrom($drive, 1);
6634
6635 my $volid = $drive->{file};
6636 return if !$volid || $volid =~ m|^/|;
6637
6638 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6639 return if !$path || !$owner || ($owner != $vmid);
6640
6641 # Note: only delete disk we want to restore
6642 # other volumes will become unused
6643 if ($virtdev_hash->{$ds}) {
6644 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6645 if (my $err = $@) {
6646 warn $err;
6647 }
01a4377f
FE
6648 } else {
6649 $kept_disks->{$volid} = 1;
d1e92cf6
DM
6650 }
6651 });
6652
01a4377f
FE
6653 # after the restore we have no snapshots anymore
6654 for my $snapname (keys $oldconf->{snapshots}->%*) {
d1e92cf6
DM
6655 my $snap = $oldconf->{snapshots}->{$snapname};
6656 if ($snap->{vmstate}) {
6657 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6658 if (my $err = $@) {
6659 warn $err;
6660 }
6661 }
01a4377f
FE
6662
6663 for my $volid (keys $kept_disks->%*) {
6664 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6665 warn $@ if $@;
6666 }
d1e92cf6
DM
6667 }
6668};
6669
9f3d73bc
DM
6670# Helper to parse vzdump backup device hints
6671#
6672# $rpcenv: Environment, used to ckeck storage permissions
6673# $user: User ID, to check storage permissions
6674# $storecfg: Storage configuration
6675# $fh: the file handle for reading the configuration
6676# $devinfo: should contain device sizes for all backu-up'ed devices
6677# $options: backup options (pool, default storage)
6678#
6679# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6680my $parse_backup_hints = sub {
6681 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
d1e92cf6 6682
36d4bdcb
TL
6683 my $check_storage = sub { # assert if an image can be allocate
6684 my ($storeid, $scfg) = @_;
6685 die "Content type 'images' is not available on storage '$storeid'\n"
6686 if !$scfg->{content}->{images};
6687 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6688 if $user ne 'root@pam';
6689 };
d1e92cf6 6690
36d4bdcb 6691 my $virtdev_hash = {};
9f3d73bc
DM
6692 while (defined(my $line = <$fh>)) {
6693 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6694 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6695 die "archive does not contain data for drive '$virtdev'\n"
6696 if !$devinfo->{$devname};
6697
6698 if (defined($options->{storage})) {
6699 $storeid = $options->{storage} || 'local';
6700 } elsif (!$storeid) {
6701 $storeid = 'local';
d1e92cf6 6702 }
9f3d73bc
DM
6703 $format = 'raw' if !$format;
6704 $devinfo->{$devname}->{devname} = $devname;
6705 $devinfo->{$devname}->{virtdev} = $virtdev;
6706 $devinfo->{$devname}->{format} = $format;
6707 $devinfo->{$devname}->{storeid} = $storeid;
6708
62af60cd 6709 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
36d4bdcb 6710 $check_storage->($storeid, $scfg); # permission and content type check
d1e92cf6 6711
9f3d73bc
DM
6712 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6713 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6714 my $virtdev = $1;
6715 my $drive = parse_drive($virtdev, $2);
36d4bdcb 6716
9f3d73bc
DM
6717 if (drive_is_cloudinit($drive)) {
6718 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
5364990d
TL
6719 $storeid = $options->{storage} if defined ($options->{storage});
6720 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
9f3d73bc 6721 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
d1e92cf6 6722
36d4bdcb 6723 $check_storage->($storeid, $scfg); # permission and content type check
9f8ba326 6724
9f3d73bc
DM
6725 $virtdev_hash->{$virtdev} = {
6726 format => $format,
5364990d 6727 storeid => $storeid,
9f3d73bc
DM
6728 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6729 is_cloudinit => 1,
6730 };
d1e92cf6 6731 }
9f3d73bc
DM
6732 }
6733 }
d1e92cf6 6734
9f3d73bc
DM
6735 return $virtdev_hash;
6736};
d1e92cf6 6737
9f3d73bc
DM
6738# Helper to allocate and activate all volumes required for a restore
6739#
6740# $storecfg: Storage configuration
6741# $virtdev_hash: as returned by parse_backup_hints()
6742#
6743# Returns: { $virtdev => $volid }
6744my $restore_allocate_devices = sub {
6745 my ($storecfg, $virtdev_hash, $vmid) = @_;
d1e92cf6 6746
9f3d73bc
DM
6747 my $map = {};
6748 foreach my $virtdev (sort keys %$virtdev_hash) {
6749 my $d = $virtdev_hash->{$virtdev};
6750 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6751 my $storeid = $d->{storeid};
6752 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
d1e92cf6 6753
9f3d73bc
DM
6754 # test if requested format is supported
6755 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6756 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6757 $d->{format} = $defFormat if !$supported;
d1e92cf6 6758
9f3d73bc
DM
6759 my $name;
6760 if ($d->{is_cloudinit}) {
6761 $name = "vm-$vmid-cloudinit";
c997e24a
ML
6762 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6763 if ($scfg->{path}) {
6764 $name .= ".$d->{format}";
6765 }
d1e92cf6
DM
6766 }
6767
4df98f2f
TL
6768 my $volid = PVE::Storage::vdisk_alloc(
6769 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
d1e92cf6 6770
9f3d73bc
DM
6771 print STDERR "new volume ID is '$volid'\n";
6772 $d->{volid} = $volid;
d1e92cf6 6773
9f3d73bc 6774 PVE::Storage::activate_volumes($storecfg, [$volid]);
d1e92cf6 6775
9f3d73bc 6776 $map->{$virtdev} = $volid;
d1e92cf6
DM
6777 }
6778
9f3d73bc
DM
6779 return $map;
6780};
d1e92cf6 6781
c62d7cf5 6782sub restore_update_config_line {
eabac302 6783 my ($cookie, $map, $line, $unique) = @_;
91bd6c90 6784
98a4b3fb
FE
6785 return '' if $line =~ m/^\#qmdump\#/;
6786 return '' if $line =~ m/^\#vzdump\#/;
6787 return '' if $line =~ m/^lock:/;
6788 return '' if $line =~ m/^unused\d+:/;
6789 return '' if $line =~ m/^parent:/;
6790
6791 my $res = '';
91bd6c90 6792
b5b99790 6793 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
91bd6c90
DM
6794 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6795 # try to convert old 1.X settings
6796 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6797 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6798 my ($model, $macaddr) = split(/\=/, $devconfig);
b5b99790 6799 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
91bd6c90
DM
6800 my $net = {
6801 model => $model,
6802 bridge => "vmbr$ind",
6803 macaddr => $macaddr,
6804 };
6805 my $netstr = print_net($net);
6806
98a4b3fb 6807 $res .= "net$cookie->{netcount}: $netstr\n";
91bd6c90
DM
6808 $cookie->{netcount}++;
6809 }
6810 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6811 my ($id, $netstr) = ($1, $2);
6812 my $net = parse_net($netstr);
b5b99790 6813 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
91bd6c90 6814 $netstr = print_net($net);
98a4b3fb 6815 $res .= "$id: $netstr\n";
f9dde219 6816 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
91bd6c90 6817 my $virtdev = $1;
907ea891 6818 my $value = $3;
d9faf790
WB
6819 my $di = parse_drive($virtdev, $value);
6820 if (defined($di->{backup}) && !$di->{backup}) {
98a4b3fb 6821 $res .= "#$line";
c0f7406e 6822 } elsif ($map->{$virtdev}) {
8fd57431 6823 delete $di->{format}; # format can change on restore
91bd6c90 6824 $di->{file} = $map->{$virtdev};
71c58bb7 6825 $value = print_drive($di);
98a4b3fb 6826 $res .= "$virtdev: $value\n";
91bd6c90 6827 } else {
98a4b3fb 6828 $res .= $line;
91bd6c90 6829 }
1a0c2f03 6830 } elsif (($line =~ m/^vmgenid: (.*)/)) {
babecffe 6831 my $vmgenid = $1;
6ee499ff 6832 if ($vmgenid ne '0') {
1a0c2f03 6833 # always generate a new vmgenid if there was a valid one setup
6ee499ff
DC
6834 $vmgenid = generate_uuid();
6835 }
98a4b3fb 6836 $res .= "vmgenid: $vmgenid\n";
19a5dd55
WL
6837 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6838 my ($uuid, $uuid_str);
6839 UUID::generate($uuid);
6840 UUID::unparse($uuid, $uuid_str);
6841 my $smbios1 = parse_smbios1($2);
6842 $smbios1->{uuid} = $uuid_str;
98a4b3fb 6843 $res .= $1.print_smbios1($smbios1)."\n";
91bd6c90 6844 } else {
98a4b3fb 6845 $res .= $line;
91bd6c90 6846 }
98a4b3fb
FE
6847
6848 return $res;
c62d7cf5 6849}
9f3d73bc
DM
6850
6851my $restore_deactivate_volumes = sub {
e8b07b29 6852 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc
DM
6853
6854 my $vollist = [];
e8b07b29
FE
6855 for my $dev (values $virtdev_hash->%*) {
6856 push $vollist->@*, $dev->{volid} if $dev->{volid};
9f3d73bc
DM
6857 }
6858
ff86112c
FE
6859 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6860 print STDERR $@ if $@;
9f3d73bc
DM
6861};
6862
6863my $restore_destroy_volumes = sub {
e8b07b29 6864 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc 6865
e8b07b29
FE
6866 for my $dev (values $virtdev_hash->%*) {
6867 my $volid = $dev->{volid} or next;
9f3d73bc 6868 eval {
e60afe82 6869 PVE::Storage::vdisk_free($storecfg, $volid);
9f3d73bc
DM
6870 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6871 };
6872 print STDERR "unable to cleanup '$volid' - $@" if $@;
6873 }
6874};
91bd6c90 6875
621edb2b 6876sub restore_merge_config {
202a2a0b
FE
6877 my ($filename, $backup_conf_raw, $override_conf) = @_;
6878
6879 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6880 for my $key (keys $override_conf->%*) {
6881 $backup_conf->{$key} = $override_conf->{$key};
6882 }
6883
6884 return $backup_conf;
621edb2b 6885}
202a2a0b 6886
91bd6c90 6887sub scan_volids {
9a8ba127 6888 my ($cfg, $vmid) = @_;
91bd6c90 6889
9a8ba127 6890 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
91bd6c90
DM
6891
6892 my $volid_hash = {};
6893 foreach my $storeid (keys %$info) {
6894 foreach my $item (@{$info->{$storeid}}) {
6895 next if !($item->{volid} && $item->{size});
5996a936 6896 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
91bd6c90
DM
6897 $volid_hash->{$item->{volid}} = $item;
6898 }
6899 }
6900
6901 return $volid_hash;
6902}
6903
68b108ee 6904sub update_disk_config {
91bd6c90 6905 my ($vmid, $conf, $volid_hash) = @_;
be190583 6906
91bd6c90 6907 my $changes;
9b29cbd0 6908 my $prefix = "VM $vmid";
91bd6c90 6909
c449137a
DC
6910 # used and unused disks
6911 my $referenced = {};
91bd6c90 6912
5996a936
DM
6913 # Note: it is allowed to define multiple storages with same path (alias), so
6914 # we need to check both 'volid' and real 'path' (two different volid can point
6915 # to the same path).
6916
c449137a 6917 my $referencedpath = {};
be190583 6918
91bd6c90 6919 # update size info
0c4fef3f 6920 PVE::QemuConfig->foreach_volume($conf, sub {
ca04977c 6921 my ($opt, $drive) = @_;
91bd6c90 6922
ca04977c
FE
6923 my $volid = $drive->{file};
6924 return if !$volid;
4df98f2f 6925 my $volume = $volid_hash->{$volid};
91bd6c90 6926
ca04977c
FE
6927 # mark volid as "in-use" for next step
6928 $referenced->{$volid} = 1;
4df98f2f 6929 if ($volume && (my $path = $volume->{path})) {
ca04977c 6930 $referencedpath->{$path} = 1;
91bd6c90 6931 }
ca04977c
FE
6932
6933 return if drive_is_cdrom($drive);
4df98f2f 6934 return if !$volume;
ca04977c 6935
4df98f2f 6936 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
ca04977c
FE
6937 if (defined($updated)) {
6938 $changes = 1;
6939 $conf->{$opt} = print_drive($updated);
9b29cbd0 6940 print "$prefix ($opt): $msg\n";
ca04977c
FE
6941 }
6942 });
91bd6c90 6943
5996a936 6944 # remove 'unusedX' entry if volume is used
ca04977c
FE
6945 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6946 my ($opt, $drive) = @_;
6947
6948 my $volid = $drive->{file};
6949 return if !$volid;
6950
f7d1505b
TL
6951 my $path;
6952 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
c449137a 6953 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
68b108ee 6954 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
5996a936
DM
6955 $changes = 1;
6956 delete $conf->{$opt};
6957 }
c449137a
DC
6958
6959 $referenced->{$volid} = 1;
6960 $referencedpath->{$path} = 1 if $path;
ca04977c 6961 });
5996a936 6962
91bd6c90
DM
6963 foreach my $volid (sort keys %$volid_hash) {
6964 next if $volid =~ m/vm-$vmid-state-/;
c449137a 6965 next if $referenced->{$volid};
5996a936
DM
6966 my $path = $volid_hash->{$volid}->{path};
6967 next if !$path; # just to be sure
c449137a 6968 next if $referencedpath->{$path};
91bd6c90 6969 $changes = 1;
53b81297 6970 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
68b108ee 6971 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
c449137a 6972 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
91bd6c90
DM
6973 }
6974
6975 return $changes;
6976}
6977
6978sub rescan {
9224dcee 6979 my ($vmid, $nolock, $dryrun) = @_;
91bd6c90 6980
20519efc 6981 my $cfg = PVE::Storage::config();
91bd6c90 6982
53b81297 6983 print "rescan volumes...\n";
9a8ba127 6984 my $volid_hash = scan_volids($cfg, $vmid);
91bd6c90
DM
6985
6986 my $updatefn = sub {
6987 my ($vmid) = @_;
6988
ffda963f 6989 my $conf = PVE::QemuConfig->load_config($vmid);
be190583 6990
ffda963f 6991 PVE::QemuConfig->check_lock($conf);
91bd6c90 6992
03da3f0d
DM
6993 my $vm_volids = {};
6994 foreach my $volid (keys %$volid_hash) {
6995 my $info = $volid_hash->{$volid};
6996 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6997 }
6998
68b108ee 6999 my $changes = update_disk_config($vmid, $conf, $vm_volids);
91bd6c90 7000
9224dcee 7001 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
91bd6c90
DM
7002 };
7003
7004 if (defined($vmid)) {
7005 if ($nolock) {
7006 &$updatefn($vmid);
7007 } else {
ffda963f 7008 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
91bd6c90
DM
7009 }
7010 } else {
7011 my $vmlist = config_list();
7012 foreach my $vmid (keys %$vmlist) {
7013 if ($nolock) {
7014 &$updatefn($vmid);
7015 } else {
ffda963f 7016 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
be190583 7017 }
91bd6c90
DM
7018 }
7019 }
7020}
7021
9f3d73bc
DM
7022sub restore_proxmox_backup_archive {
7023 my ($archive, $vmid, $user, $options) = @_;
7024
7025 my $storecfg = PVE::Storage::config();
7026
7027 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7028 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7029
9f3d73bc 7030 my $fingerprint = $scfg->{fingerprint};
503e96f8 7031 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
9f3d73bc 7032
fbec3f89 7033 my $repo = PVE::PBSClient::get_repository($scfg);
2dda626d 7034 my $namespace = $scfg->{namespace};
dea4b04c 7035
26731a3c 7036 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
9f3d73bc
DM
7037 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7038 local $ENV{PBS_PASSWORD} = $password;
7039 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7040
7041 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7042 PVE::Storage::parse_volname($storecfg, $archive);
7043
7044 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7045
7046 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7047
7048 my $tmpdir = "/var/tmp/vzdumptmp$$";
7049 rmtree $tmpdir;
7050 mkpath $tmpdir;
7051
7052 my $conffile = PVE::QemuConfig->config_file($vmid);
9f3d73bc
DM
7053 # disable interrupts (always do cleanups)
7054 local $SIG{INT} =
7055 local $SIG{TERM} =
7056 local $SIG{QUIT} =
7057 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7058
7059 # Note: $oldconf is undef if VM does not exists
7060 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7061 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7062 my $new_conf_raw = '';
9f3d73bc
DM
7063
7064 my $rpcenv = PVE::RPCEnvironment::get();
e8b07b29
FE
7065 my $devinfo = {}; # info about drives included in backup
7066 my $virtdev_hash = {}; # info about allocated drives
9f3d73bc
DM
7067
7068 eval {
7069 # enable interrupts
7070 local $SIG{INT} =
7071 local $SIG{TERM} =
7072 local $SIG{QUIT} =
7073 local $SIG{HUP} =
7074 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7075
7076 my $cfgfn = "$tmpdir/qemu-server.conf";
7077 my $firewall_config_fn = "$tmpdir/fw.conf";
7078 my $index_fn = "$tmpdir/index.json";
7079
7080 my $cmd = "restore";
7081
7082 my $param = [$pbs_backup_name, "index.json", $index_fn];
7083 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7084 my $index = PVE::Tools::file_get_contents($index_fn);
7085 $index = decode_json($index);
7086
9f3d73bc
DM
7087 foreach my $info (@{$index->{files}}) {
7088 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7089 my $devname = $1;
7090 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7091 $devinfo->{$devname}->{size} = $1;
7092 } else {
7093 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7094 }
7095 }
7096 }
7097
4df98f2f
TL
7098 my $is_qemu_server_backup = scalar(
7099 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7100 );
9f3d73bc
DM
7101 if (!$is_qemu_server_backup) {
7102 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7103 }
7104 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7105
7106 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7107 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7108
7109 if ($has_firewall_config) {
7110 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7111 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7112
7113 my $pve_firewall_dir = '/etc/pve/firewall';
7114 mkdir $pve_firewall_dir; # make sure the dir exists
7115 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7116 }
7117
7118 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7119 die "unable to read qemu-server.conf - $!\n";
9f3d73bc 7120
e8b07b29 7121 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
9f3d73bc
DM
7122
7123 # fixme: rate limit?
7124
7125 # create empty/temp config
7126 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7127
7128 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7129
7130 # allocate volumes
7131 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7132
6f94e162
SR
7133 foreach my $virtdev (sort keys %$virtdev_hash) {
7134 my $d = $virtdev_hash->{$virtdev};
7135 next if $d->{is_cloudinit}; # no need to restore cloudinit
9f3d73bc 7136
55c7f9cf 7137 # this fails if storage is unavailable
6f94e162 7138 my $volid = $d->{volid};
6f94e162 7139 my $path = PVE::Storage::path($storecfg, $volid);
9f3d73bc 7140
f9dde219
SR
7141 # for live-restore we only want to preload the efidisk and TPM state
7142 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
55c7f9cf 7143
21a9ec2a
WB
7144 my @ns_arg;
7145 if (defined(my $ns = $scfg->{namespace})) {
7146 @ns_arg = ('--ns', $ns);
7147 }
7148
6f94e162
SR
7149 my $pbs_restore_cmd = [
7150 '/usr/bin/pbs-restore',
7151 '--repository', $repo,
21a9ec2a 7152 @ns_arg,
6f94e162
SR
7153 $pbs_backup_name,
7154 "$d->{devname}.img.fidx",
7155 $path,
7156 '--verbose',
7157 ];
55fb78aa 7158
6f94e162
SR
7159 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7160 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
9f3d73bc 7161
6f94e162
SR
7162 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7163 push @$pbs_restore_cmd, '--skip-zero';
26731a3c 7164 }
6f94e162
SR
7165
7166 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7167 print "restore proxmox backup image: $dbg_cmdstring\n";
7168 run_command($pbs_restore_cmd);
9f3d73bc
DM
7169 }
7170
7171 $fh->seek(0, 0) || die "seek failed - $!\n";
7172
9f3d73bc
DM
7173 my $cookie = { netcount => 0 };
7174 while (defined(my $line = <$fh>)) {
c62d7cf5 7175 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7176 $cookie,
98a4b3fb
FE
7177 $map,
7178 $line,
7179 $options->{unique},
7180 );
9f3d73bc
DM
7181 }
7182
7183 $fh->close();
9f3d73bc
DM
7184 };
7185 my $err = $@;
7186
26731a3c 7187 if ($err || !$options->{live}) {
e8b07b29 7188 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
26731a3c 7189 }
9f3d73bc
DM
7190
7191 rmtree $tmpdir;
7192
7193 if ($err) {
e8b07b29 7194 $restore_destroy_volumes->($storecfg, $virtdev_hash);
9f3d73bc
DM
7195 die $err;
7196 }
7197
f7551170
SR
7198 if ($options->{live}) {
7199 # keep lock during live-restore
7200 $new_conf_raw .= "\nlock: create";
7201 }
7202
621edb2b 7203 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
e3971865 7204 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7205 PVE::QemuConfig->write_config($vmid, $new_conf);
9f3d73bc
DM
7206
7207 eval { rescan($vmid, 1); };
7208 warn $@ if $@;
26731a3c
SR
7209
7210 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7211
7212 if ($options->{live}) {
fefd65a1
SR
7213 # enable interrupts
7214 local $SIG{INT} =
7215 local $SIG{TERM} =
7216 local $SIG{QUIT} =
7217 local $SIG{HUP} =
7218 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
26731a3c 7219
fefd65a1
SR
7220 my $conf = PVE::QemuConfig->load_config($vmid);
7221 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
26731a3c 7222
f9dde219
SR
7223 # these special drives are already restored before start
7224 delete $devinfo->{'drive-efidisk0'};
7225 delete $devinfo->{'drive-tpmstate0-backup'};
2dda626d
DC
7226
7227 my $pbs_opts = {
7228 repo => $repo,
7229 keyfile => $keyfile,
7230 snapshot => $pbs_backup_name,
7231 namespace => $namespace,
7232 };
7233 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
f7551170
SR
7234
7235 PVE::QemuConfig->remove_lock($vmid, "create");
26731a3c
SR
7236 }
7237}
7238
7239sub pbs_live_restore {
2dda626d 7240 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
26731a3c 7241
88cabb62 7242 print "starting VM for live-restore\n";
2dda626d 7243 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
26731a3c
SR
7244
7245 my $pbs_backing = {};
8986e36e 7246 for my $ds (keys %$restored_disks) {
26731a3c 7247 $ds =~ m/^drive-(.*)$/;
88cabb62
SR
7248 my $confname = $1;
7249 $pbs_backing->{$confname} = {
2dda626d
DC
7250 repository => $opts->{repo},
7251 snapshot => $opts->{snapshot},
26731a3c
SR
7252 archive => "$ds.img.fidx",
7253 };
2dda626d
DC
7254 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7255 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
88cabb62
SR
7256
7257 my $drive = parse_drive($confname, $conf->{$confname});
7258 print "restoring '$ds' to '$drive->{file}'\n";
26731a3c
SR
7259 }
7260
fd70c843 7261 my $drives_streamed = 0;
26731a3c
SR
7262 eval {
7263 # make sure HA doesn't interrupt our restore by stopping the VM
7264 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
fd70c843 7265 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
26731a3c
SR
7266 }
7267
fd70c843
TL
7268 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7269 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
bfb12678 7270 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
26731a3c 7271
26697640
SR
7272 my $qmeventd_fd = register_qmeventd_handle($vmid);
7273
26731a3c
SR
7274 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7275 # this will effectively collapse the backing image chain consisting of
7276 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7277 # removes itself once all backing images vanish with 'auto-remove=on')
7278 my $jobs = {};
8986e36e 7279 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7280 my $job_id = "restore-$ds";
7281 mon_cmd($vmid, 'block-stream',
7282 'job-id' => $job_id,
7283 device => "$ds",
7284 );
7285 $jobs->{$job_id} = {};
7286 }
7287
7288 mon_cmd($vmid, 'cont');
7289 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7290
a09b39f1
TL
7291 print "restore-drive jobs finished successfully, removing all tracking block devices"
7292 ." to disconnect from Proxmox Backup Server\n";
7293
8986e36e 7294 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7295 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7296 }
26697640
SR
7297
7298 close($qmeventd_fd);
26731a3c
SR
7299 };
7300
7301 my $err = $@;
7302
7303 if ($err) {
8b8893c3 7304 warn "An error occurred during live-restore: $err\n";
26731a3c
SR
7305 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7306 die "live-restore failed\n";
7307 }
9f3d73bc
DM
7308}
7309
91bd6c90
DM
7310sub restore_vma_archive {
7311 my ($archive, $vmid, $user, $opts, $comp) = @_;
7312
91bd6c90
DM
7313 my $readfrom = $archive;
7314
7c536e11
WB
7315 my $cfg = PVE::Storage::config();
7316 my $commands = [];
7317 my $bwlimit = $opts->{bwlimit};
7318
7319 my $dbg_cmdstring = '';
7320 my $add_pipe = sub {
7321 my ($cmd) = @_;
7322 push @$commands, $cmd;
7323 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7324 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
91bd6c90 7325 $readfrom = '-';
7c536e11
WB
7326 };
7327
7328 my $input = undef;
7329 if ($archive eq '-') {
7330 $input = '<&STDIN';
7331 } else {
7332 # If we use a backup from a PVE defined storage we also consider that
7333 # storage's rate limit:
7334 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7335 if (defined($volid)) {
7336 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7337 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7338 if ($readlimit) {
7339 print STDERR "applying read rate limit: $readlimit\n";
9444c6e4 7340 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7c536e11
WB
7341 $add_pipe->($cstream);
7342 }
7343 }
7344 }
7345
7346 if ($comp) {
c6d51783
AA
7347 my $info = PVE::Storage::decompressor_info('vma', $comp);
7348 my $cmd = $info->{decompressor};
7349 push @$cmd, $readfrom;
7c536e11 7350 $add_pipe->($cmd);
91bd6c90
DM
7351 }
7352
7353 my $tmpdir = "/var/tmp/vzdumptmp$$";
7354 rmtree $tmpdir;
7355
7356 # disable interrupts (always do cleanups)
5b97ef24
TL
7357 local $SIG{INT} =
7358 local $SIG{TERM} =
7359 local $SIG{QUIT} =
7360 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
91bd6c90
DM
7361
7362 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7363 POSIX::mkfifo($mapfifo, 0600);
7364 my $fifofh;
808a65b5 7365 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
91bd6c90 7366
7c536e11 7367 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
91bd6c90 7368
e8b07b29
FE
7369 my $devinfo = {}; # info about drives included in backup
7370 my $virtdev_hash = {}; # info about allocated drives
91bd6c90
DM
7371
7372 my $rpcenv = PVE::RPCEnvironment::get();
7373
ffda963f 7374 my $conffile = PVE::QemuConfig->config_file($vmid);
91bd6c90 7375
ae200950 7376 # Note: $oldconf is undef if VM does not exist
ffda963f
FG
7377 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7378 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7379 my $new_conf_raw = '';
ed221350 7380
7c536e11
WB
7381 my %storage_limits;
7382
91bd6c90 7383 my $print_devmap = sub {
91bd6c90
DM
7384 my $cfgfn = "$tmpdir/qemu-server.conf";
7385
7386 # we can read the config - that is already extracted
7387 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7388 die "unable to read qemu-server.conf - $!\n";
91bd6c90 7389
6738ab9c 7390 my $fwcfgfn = "$tmpdir/qemu-server.fw";
3457d090
WL
7391 if (-f $fwcfgfn) {
7392 my $pve_firewall_dir = '/etc/pve/firewall';
7393 mkdir $pve_firewall_dir; # make sure the dir exists
7394 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7395 }
6738ab9c 7396
e8b07b29 7397 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
91bd6c90 7398
c8964278
FE
7399 foreach my $info (values %{$virtdev_hash}) {
7400 my $storeid = $info->{storeid};
7401 next if defined($storage_limits{$storeid});
7402
7403 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7404 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7405 $storage_limits{$storeid} = $limit * 1024;
7c536e11
WB
7406 }
7407
91bd6c90 7408 foreach my $devname (keys %$devinfo) {
be190583
DM
7409 die "found no device mapping information for device '$devname'\n"
7410 if !$devinfo->{$devname}->{virtdev};
91bd6c90
DM
7411 }
7412
ed221350 7413 # create empty/temp config
be190583 7414 if ($oldconf) {
ed221350 7415 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
d1e92cf6 7416 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
ed221350
DM
7417 }
7418
9f3d73bc
DM
7419 # allocate volumes
7420 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7421
7422 # print restore information to $fifofh
91bd6c90
DM
7423 foreach my $virtdev (sort keys %$virtdev_hash) {
7424 my $d = $virtdev_hash->{$virtdev};
9f3d73bc
DM
7425 next if $d->{is_cloudinit}; # no need to restore cloudinit
7426
7c536e11 7427 my $storeid = $d->{storeid};
9f3d73bc 7428 my $volid = $d->{volid};
7c536e11
WB
7429
7430 my $map_opts = '';
7431 if (my $limit = $storage_limits{$storeid}) {
7432 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7433 }
8fd57431 7434
91bd6c90 7435 my $write_zeros = 1;
88240a83 7436 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
91bd6c90
DM
7437 $write_zeros = 0;
7438 }
7439
9f3d73bc 7440 my $path = PVE::Storage::path($cfg, $volid);
87056e18 7441
9f3d73bc 7442 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
91bd6c90 7443
9f3d73bc 7444 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
91bd6c90
DM
7445 }
7446
7447 $fh->seek(0, 0) || die "seek failed - $!\n";
7448
91bd6c90
DM
7449 my $cookie = { netcount => 0 };
7450 while (defined(my $line = <$fh>)) {
c62d7cf5 7451 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7452 $cookie,
98a4b3fb
FE
7453 $map,
7454 $line,
7455 $opts->{unique},
7456 );
91bd6c90
DM
7457 }
7458
7459 $fh->close();
91bd6c90
DM
7460 };
7461
61b172d8
FE
7462 my $oldtimeout;
7463
91bd6c90
DM
7464 eval {
7465 # enable interrupts
6cb0144a
EK
7466 local $SIG{INT} =
7467 local $SIG{TERM} =
7468 local $SIG{QUIT} =
7469 local $SIG{HUP} =
7470 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
91bd6c90
DM
7471 local $SIG{ALRM} = sub { die "got timeout\n"; };
7472
61b172d8 7473 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
91bd6c90
DM
7474
7475 my $parser = sub {
7476 my $line = shift;
7477
7478 print "$line\n";
7479
7480 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7481 my ($dev_id, $size, $devname) = ($1, $2, $3);
7482 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7483 } elsif ($line =~ m/^CTIME: /) {
46f58b5f 7484 # we correctly received the vma config, so we can disable
853757cc
FE
7485 # the timeout now for disk allocation
7486 alarm($oldtimeout || 0);
7487 $oldtimeout = undef;
91bd6c90
DM
7488 &$print_devmap();
7489 print $fifofh "done\n";
91bd6c90 7490 close($fifofh);
808a65b5 7491 $fifofh = undef;
91bd6c90
DM
7492 }
7493 };
be190583 7494
7c536e11
WB
7495 print "restore vma archive: $dbg_cmdstring\n";
7496 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
91bd6c90
DM
7497 };
7498 my $err = $@;
7499
7500 alarm($oldtimeout) if $oldtimeout;
7501
e8b07b29 7502 $restore_deactivate_volumes->($cfg, $virtdev_hash);
5f96f4df 7503
808a65b5 7504 close($fifofh) if $fifofh;
91bd6c90 7505 unlink $mapfifo;
9f3d73bc 7506 rmtree $tmpdir;
91bd6c90
DM
7507
7508 if ($err) {
e8b07b29 7509 $restore_destroy_volumes->($cfg, $virtdev_hash);
91bd6c90
DM
7510 die $err;
7511 }
7512
621edb2b 7513 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
e3971865 7514 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7515 PVE::QemuConfig->write_config($vmid, $new_conf);
ed221350 7516
91bd6c90
DM
7517 eval { rescan($vmid, 1); };
7518 warn $@ if $@;
26731a3c
SR
7519
7520 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
91bd6c90
DM
7521}
7522
7523sub restore_tar_archive {
7524 my ($archive, $vmid, $user, $opts) = @_;
7525
202a2a0b
FE
7526 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7527 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7528 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7529 }
7530
9c502e26 7531 if ($archive ne '-') {
ed221350 7532 my $firstfile = tar_archive_read_firstfile($archive);
32e54050 7533 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
9c502e26
DM
7534 if $firstfile ne 'qemu-server.conf';
7535 }
3e16d5fc 7536
20519efc 7537 my $storecfg = PVE::Storage::config();
ebb55558 7538
4b026937
TL
7539 # avoid zombie disks when restoring over an existing VM -> cleanup first
7540 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7541 # skiplock=1 because qmrestore has set the 'create' lock itself already
ffda963f 7542 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
b04ea584 7543 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
ed221350 7544
3e16d5fc
DM
7545 my $tocmd = "/usr/lib/qemu-server/qmextract";
7546
2415a446 7547 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
a0d1b1a2 7548 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
3e16d5fc
DM
7549 $tocmd .= ' --prealloc' if $opts->{prealloc};
7550 $tocmd .= ' --info' if $opts->{info};
7551
a0d1b1a2 7552 # tar option "xf" does not autodetect compression when read from STDIN,
9c502e26 7553 # so we pipe to zcat
2415a446
DM
7554 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7555 PVE::Tools::shellquote("--to-command=$tocmd");
3e16d5fc
DM
7556
7557 my $tmpdir = "/var/tmp/vzdumptmp$$";
7558 mkpath $tmpdir;
7559
7560 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7561 local $ENV{VZDUMP_VMID} = $vmid;
a0d1b1a2 7562 local $ENV{VZDUMP_USER} = $user;
3e16d5fc 7563
ffda963f 7564 my $conffile = PVE::QemuConfig->config_file($vmid);
98a4b3fb 7565 my $new_conf_raw = '';
3e16d5fc
DM
7566
7567 # disable interrupts (always do cleanups)
6cb0144a
EK
7568 local $SIG{INT} =
7569 local $SIG{TERM} =
7570 local $SIG{QUIT} =
7571 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
3e16d5fc 7572
afdb31d5 7573 eval {
3e16d5fc 7574 # enable interrupts
6cb0144a
EK
7575 local $SIG{INT} =
7576 local $SIG{TERM} =
7577 local $SIG{QUIT} =
7578 local $SIG{HUP} =
7579 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
3e16d5fc 7580
9c502e26
DM
7581 if ($archive eq '-') {
7582 print "extracting archive from STDIN\n";
7583 run_command($cmd, input => "<&STDIN");
7584 } else {
7585 print "extracting archive '$archive'\n";
7586 run_command($cmd);
7587 }
3e16d5fc
DM
7588
7589 return if $opts->{info};
7590
7591 # read new mapping
7592 my $map = {};
7593 my $statfile = "$tmpdir/qmrestore.stat";
7594 if (my $fd = IO::File->new($statfile, "r")) {
7595 while (defined (my $line = <$fd>)) {
7596 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7597 $map->{$1} = $2 if $1;
7598 } else {
7599 print STDERR "unable to parse line in statfile - $line\n";
7600 }
7601 }
7602 $fd->close();
7603 }
7604
7605 my $confsrc = "$tmpdir/qemu-server.conf";
7606
f7d1505b 7607 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
3e16d5fc 7608
91bd6c90 7609 my $cookie = { netcount => 0 };
3e16d5fc 7610 while (defined (my $line = <$srcfd>)) {
c62d7cf5 7611 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7612 $cookie,
98a4b3fb
FE
7613 $map,
7614 $line,
7615 $opts->{unique},
7616 );
3e16d5fc
DM
7617 }
7618
7619 $srcfd->close();
3e16d5fc 7620 };
7dc7f315 7621 if (my $err = $@) {
ed221350 7622 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
3e16d5fc 7623 die $err;
afdb31d5 7624 }
3e16d5fc
DM
7625
7626 rmtree $tmpdir;
7627
98a4b3fb 7628 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
91bd6c90 7629
ed221350
DM
7630 PVE::Cluster::cfs_update(); # make sure we read new file
7631
91bd6c90
DM
7632 eval { rescan($vmid, 1); };
7633 warn $@ if $@;
3e16d5fc
DM
7634};
7635
65a5ce88 7636sub foreach_storage_used_by_vm {
18bfb361
DM
7637 my ($conf, $func) = @_;
7638
7639 my $sidhash = {};
7640
912792e2 7641 PVE::QemuConfig->foreach_volume($conf, sub {
8ddbcf8b
FG
7642 my ($ds, $drive) = @_;
7643 return if drive_is_cdrom($drive);
18bfb361
DM
7644
7645 my $volid = $drive->{file};
7646
7647 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
be190583 7648 $sidhash->{$sid} = $sid if $sid;
8ddbcf8b 7649 });
18bfb361
DM
7650
7651 foreach my $sid (sort keys %$sidhash) {
7652 &$func($sid);
7653 }
7654}
7655
6c9f59c1
TL
7656my $qemu_snap_storage = {
7657 rbd => 1,
7658};
e5eaa028 7659sub do_snapshots_with_qemu {
9d83932d
SR
7660 my ($storecfg, $volid, $deviceid) = @_;
7661
7662 return if $deviceid =~ m/tpmstate0/;
e5eaa028
WL
7663
7664 my $storage_name = PVE::Storage::parse_volume_id($volid);
8aa2ed7c 7665 my $scfg = $storecfg->{ids}->{$storage_name};
f7d1505b 7666 die "could not find storage '$storage_name'\n" if !defined($scfg);
e5eaa028 7667
8aa2ed7c 7668 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
e5eaa028
WL
7669 return 1;
7670 }
7671
7672 if ($volid =~ m/\.(qcow2|qed)$/){
7673 return 1;
7674 }
7675
d1c1af4b 7676 return;
e5eaa028
WL
7677}
7678
4dcc780c 7679sub qga_check_running {
a4938c72 7680 my ($vmid, $nowarn) = @_;
4dcc780c 7681
0a13e08e 7682 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
4dcc780c 7683 if ($@) {
7bd9abd2 7684 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
4dcc780c
WL
7685 return 0;
7686 }
7687 return 1;
7688}
7689
04a69bb4
AD
7690sub template_create {
7691 my ($vmid, $conf, $disk) = @_;
7692
04a69bb4 7693 my $storecfg = PVE::Storage::config();
04a69bb4 7694
912792e2 7695 PVE::QemuConfig->foreach_volume($conf, sub {
9cd07842
DM
7696 my ($ds, $drive) = @_;
7697
7698 return if drive_is_cdrom($drive);
7699 return if $disk && $ds ne $disk;
7700
7701 my $volid = $drive->{file};
bbd56097 7702 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
9cd07842 7703
04a69bb4
AD
7704 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7705 $drive->{file} = $voliddst;
71c58bb7 7706 $conf->{$ds} = print_drive($drive);
ffda963f 7707 PVE::QemuConfig->write_config($vmid, $conf);
04a69bb4 7708 });
04a69bb4
AD
7709}
7710
92bdc3f0
DC
7711sub convert_iscsi_path {
7712 my ($path) = @_;
7713
7714 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7715 my $portal = $1;
7716 my $target = $2;
7717 my $lun = $3;
7718
7719 my $initiator_name = get_initiator_name();
7720
7721 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7722 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7723 }
7724
7725 die "cannot convert iscsi path '$path', unkown format\n";
7726}
7727
5133de42 7728sub qemu_img_convert {
56d16f16 7729 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
5133de42
AD
7730
7731 my $storecfg = PVE::Storage::config();
7732 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7733 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7734
af1f1ec0 7735 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
6bb91c17 7736
af1f1ec0
DC
7737 my $cachemode;
7738 my $src_path;
7739 my $src_is_iscsi = 0;
bdd1feef 7740 my $src_format;
6bb91c17 7741
af1f1ec0
DC
7742 if ($src_storeid) {
7743 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
5133de42 7744 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
af1f1ec0
DC
7745 $src_format = qemu_img_format($src_scfg, $src_volname);
7746 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7747 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7748 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
a23d57d5 7749 } elsif (-f $src_volid || -b $src_volid) {
af1f1ec0 7750 $src_path = $src_volid;
e0fd2b2f 7751 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
af1f1ec0
DC
7752 $src_format = $1;
7753 }
7754 }
5133de42 7755
af1f1ec0 7756 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
5133de42 7757
af1f1ec0
DC
7758 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7759 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7760 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7761 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
5133de42 7762
af1f1ec0
DC
7763 my $cmd = [];
7764 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
bdd1feef
TL
7765 push @$cmd, '-l', "snapshot.name=$snapname"
7766 if $snapname && $src_format && $src_format eq "qcow2";
af1f1ec0
DC
7767 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7768 push @$cmd, '-T', $cachemode if defined($cachemode);
56d16f16 7769 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
af1f1ec0
DC
7770
7771 if ($src_is_iscsi) {
7772 push @$cmd, '--image-opts';
7773 $src_path = convert_iscsi_path($src_path);
bdd1feef 7774 } elsif ($src_format) {
af1f1ec0
DC
7775 push @$cmd, '-f', $src_format;
7776 }
92bdc3f0 7777
af1f1ec0
DC
7778 if ($dst_is_iscsi) {
7779 push @$cmd, '--target-image-opts';
7780 $dst_path = convert_iscsi_path($dst_path);
7781 } else {
7782 push @$cmd, '-O', $dst_format;
7783 }
92bdc3f0 7784
af1f1ec0 7785 push @$cmd, $src_path;
92bdc3f0 7786
af1f1ec0
DC
7787 if (!$dst_is_iscsi && $is_zero_initialized) {
7788 push @$cmd, "zeroinit:$dst_path";
7789 } else {
7790 push @$cmd, $dst_path;
7791 }
92bdc3f0 7792
af1f1ec0
DC
7793 my $parser = sub {
7794 my $line = shift;
7795 if($line =~ m/\((\S+)\/100\%\)/){
7796 my $percent = $1;
7797 my $transferred = int($size * $percent / 100);
b5e9d97b
TL
7798 my $total_h = render_bytes($size, 1);
7799 my $transferred_h = render_bytes($transferred, 1);
92bdc3f0 7800
6629f976 7801 print "transferred $transferred_h of $total_h ($percent%)\n";
988e2714 7802 }
5133de42 7803
af1f1ec0 7804 };
5133de42 7805
af1f1ec0
DC
7806 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7807 my $err = $@;
7808 die "copy failed: $err" if $err;
5133de42
AD
7809}
7810
7811sub qemu_img_format {
7812 my ($scfg, $volname) = @_;
7813
e0fd2b2f 7814 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
5133de42 7815 return $1;
be190583 7816 } else {
5133de42 7817 return "raw";
5133de42
AD
7818 }
7819}
7820
cfad42af 7821sub qemu_drive_mirror {
bc6c8231 7822 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
cfad42af 7823
5a345967
AD
7824 $jobs = {} if !$jobs;
7825
7826 my $qemu_target;
7827 my $format;
35e4ab04 7828 $jobs->{"drive-$drive"} = {};
152fe752 7829
1e5143de 7830 if ($dst_volid =~ /^nbd:/) {
87955688 7831 $qemu_target = $dst_volid;
5a345967 7832 $format = "nbd";
5a345967 7833 } else {
5a345967
AD
7834 my $storecfg = PVE::Storage::config();
7835 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7836
7837 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
cfad42af 7838
5a345967 7839 $format = qemu_img_format($dst_scfg, $dst_volname);
21ccdb50 7840
5a345967 7841 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
21ccdb50 7842
5a345967
AD
7843 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7844 }
988e2714
WB
7845
7846 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
88383920
DM
7847 $opts->{format} = $format if $format;
7848
bc6c8231
FG
7849 if (defined($src_bitmap)) {
7850 $opts->{sync} = 'incremental';
7851 $opts->{bitmap} = $src_bitmap;
7852 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7853 }
7854
9fa05d31 7855 if (defined($bwlimit)) {
f6409f61
TL
7856 $opts->{speed} = $bwlimit * 1024;
7857 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
9fa05d31
SI
7858 } else {
7859 print "drive mirror is starting for drive-$drive\n";
7860 }
21ccdb50 7861
6dde5ea2 7862 # if a job already runs for this device we get an error, catch it for cleanup
0a13e08e 7863 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
5a345967
AD
7864 if (my $err = $@) {
7865 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
6dde5ea2
TL
7866 warn "$@\n" if $@;
7867 die "mirroring error: $err\n";
5a345967
AD
7868 }
7869
e02fb126 7870 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
5a345967
AD
7871}
7872
db1f8b39
FG
7873# $completion can be either
7874# 'complete': wait until all jobs are ready, block-job-complete them (default)
7875# 'cancel': wait until all jobs are ready, block-job-cancel them
7876# 'skip': wait until all jobs are ready, return with block jobs in ready state
9e671722 7877# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
5a345967 7878sub qemu_drive_mirror_monitor {
9e671722 7879 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
e02fb126 7880
db1f8b39 7881 $completion //= 'complete';
9e671722 7882 $op //= "mirror";
2e953867 7883
08ac653f 7884 eval {
5a345967
AD
7885 my $err_complete = 0;
7886
3b56383b 7887 my $starttime = time ();
08ac653f 7888 while (1) {
9e671722 7889 die "block job ('$op') timed out\n" if $err_complete > 300;
5a345967 7890
0a13e08e 7891 my $stats = mon_cmd($vmid, "query-block-jobs");
3b56383b 7892 my $ctime = time();
08ac653f 7893
9e671722 7894 my $running_jobs = {};
0ea24bf0 7895 for my $stat (@$stats) {
9e671722
SR
7896 next if $stat->{type} ne $op;
7897 $running_jobs->{$stat->{device}} = $stat;
5a345967 7898 }
08ac653f 7899
5a345967 7900 my $readycounter = 0;
67fb9de6 7901
0ea24bf0 7902 for my $job_id (sort keys %$jobs) {
1057fc74 7903 my $job = $running_jobs->{$job_id};
5a345967 7904
1057fc74 7905 my $vanished = !defined($job);
0ea24bf0 7906 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
9e671722 7907 if($complete || ($vanished && $completion eq 'auto')) {
3b56383b 7908 print "$job_id: $op-job finished\n";
0ea24bf0 7909 delete $jobs->{$job_id};
5a345967
AD
7910 next;
7911 }
7912
1057fc74 7913 die "$job_id: '$op' has been cancelled\n" if !defined($job);
f34ebd52 7914
1057fc74
TL
7915 my $busy = $job->{busy};
7916 my $ready = $job->{ready};
7917 if (my $total = $job->{len}) {
7918 my $transferred = $job->{offset} || 0;
5a345967
AD
7919 my $remaining = $total - $transferred;
7920 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
08ac653f 7921
3b56383b
TL
7922 my $duration = $ctime - $starttime;
7923 my $total_h = render_bytes($total, 1);
7924 my $transferred_h = render_bytes($transferred, 1);
7925
7926 my $status = sprintf(
7927 "transferred $transferred_h of $total_h ($percent%%) in %s",
7928 render_duration($duration),
7929 );
7930
7931 if ($ready) {
7932 if ($busy) {
7933 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7934 } else {
7935 $status .= ", ready";
7936 }
7937 }
67daf692
TL
7938 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7939 $jobs->{$job_id}->{ready} = $ready;
5a345967 7940 }
f34ebd52 7941
1057fc74 7942 $readycounter++ if $job->{ready};
5a345967 7943 }
b467f79a 7944
5a345967
AD
7945 last if scalar(keys %$jobs) == 0;
7946
7947 if ($readycounter == scalar(keys %$jobs)) {
9e671722
SR
7948 print "all '$op' jobs are ready\n";
7949
7950 # do the complete later (or has already been done)
7951 last if $completion eq 'skip' || $completion eq 'auto';
5a345967
AD
7952
7953 if ($vmiddst && $vmiddst != $vmid) {
1a988fd2
DC
7954 my $agent_running = $qga && qga_check_running($vmid);
7955 if ($agent_running) {
5619e74a 7956 print "freeze filesystem\n";
0a13e08e 7957 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
d6cdfae4 7958 warn $@ if $@;
5619e74a
AD
7959 } else {
7960 print "suspend vm\n";
7961 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
d6cdfae4 7962 warn $@ if $@;
5619e74a
AD
7963 }
7964
5a345967
AD
7965 # if we clone a disk for a new target vm, we don't switch the disk
7966 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
5619e74a 7967
1a988fd2 7968 if ($agent_running) {
5619e74a 7969 print "unfreeze filesystem\n";
0a13e08e 7970 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
d6cdfae4 7971 warn $@ if $@;
5619e74a
AD
7972 } else {
7973 print "resume vm\n";
d6cdfae4
FE
7974 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7975 warn $@ if $@;
5619e74a
AD
7976 }
7977
2e953867 7978 last;
5a345967
AD
7979 } else {
7980
0ea24bf0 7981 for my $job_id (sort keys %$jobs) {
5a345967 7982 # try to switch the disk if source and destination are on the same guest
0ea24bf0 7983 print "$job_id: Completing block job_id...\n";
5a345967 7984
e02fb126 7985 my $op;
db1f8b39 7986 if ($completion eq 'complete') {
e02fb126 7987 $op = 'block-job-complete';
db1f8b39 7988 } elsif ($completion eq 'cancel') {
e02fb126
ML
7989 $op = 'block-job-cancel';
7990 } else {
7991 die "invalid completion value: $completion\n";
7992 }
0ea24bf0 7993 eval { mon_cmd($vmid, $op, device => $job_id) };
5a345967 7994 if ($@ =~ m/cannot be completed/) {
3b56383b 7995 print "$job_id: block job cannot be completed, trying again.\n";
5a345967
AD
7996 $err_complete++;
7997 }else {
0ea24bf0
TL
7998 print "$job_id: Completed successfully.\n";
7999 $jobs->{$job_id}->{complete} = 1;
5a345967
AD
8000 }
8001 }
2e953867 8002 }
08ac653f 8003 }
08ac653f 8004 sleep 1;
cfad42af 8005 }
08ac653f 8006 };
88383920 8007 my $err = $@;
08ac653f 8008
88383920 8009 if ($err) {
5a345967 8010 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
3b56383b 8011 die "block job ($op) error: $err";
88383920 8012 }
5a345967
AD
8013}
8014
8015sub qemu_blockjobs_cancel {
8016 my ($vmid, $jobs) = @_;
8017
8018 foreach my $job (keys %$jobs) {
bd2d5fe6 8019 print "$job: Cancelling block job\n";
0a13e08e 8020 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
5a345967
AD
8021 $jobs->{$job}->{cancel} = 1;
8022 }
8023
8024 while (1) {
0a13e08e 8025 my $stats = mon_cmd($vmid, "query-block-jobs");
5a345967
AD
8026
8027 my $running_jobs = {};
8028 foreach my $stat (@$stats) {
8029 $running_jobs->{$stat->{device}} = $stat;
8030 }
8031
8032 foreach my $job (keys %$jobs) {
8033
bd2d5fe6
WB
8034 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8035 print "$job: Done.\n";
5a345967
AD
8036 delete $jobs->{$job};
8037 }
8038 }
8039
8040 last if scalar(keys %$jobs) == 0;
8041
8042 sleep 1;
cfad42af
AD
8043 }
8044}
8045
8fbae1dc
FE
8046# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8047# source, but some storages have problems with io_uring, sometimes even leading to crashes.
8048my sub clone_disk_check_io_uring {
8049 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8050
8051 return if !$use_drive_mirror;
8052
8053 # Don't complain when not changing storage.
8054 # Assume if it works for the source, it'll work for the target too.
8055 return if $src_storeid eq $dst_storeid;
8056
8057 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8058 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8059
8060 my $cache_direct = drive_uses_cache_direct($src_drive);
8061
8062 my $src_uses_io_uring;
8063 if ($src_drive->{aio}) {
8064 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8065 } else {
8066 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8067 }
8068
8069 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8070 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8071}
8072
152fe752 8073sub clone_disk {
1196086f
FE
8074 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8075
8076 my ($vmid, $running) = $source->@{qw(vmid running)};
25166060 8077 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
1196086f 8078
25166060 8079 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
1196086f 8080 my ($storage, $format) = $dest->@{qw(storage format)};
152fe752 8081
5f957592
FE
8082 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8083
25166060
FE
8084 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8085 die "cloning from/to EFI disk requires EFI disk\n"
8086 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8087 die "cloning from/to TPM state requires TPM state\n"
8088 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
5f957592
FE
8089
8090 # This would lead to two device nodes in QEMU pointing to the same backing image!
8091 die "cannot change drive name when cloning disk from/to the same VM\n"
8092 if $use_drive_mirror && $vmid == $newvmid;
25166060
FE
8093 }
8094
1d1f8f9a
FE
8095 die "cannot move TPM state while VM is running\n"
8096 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8097
152fe752
DM
8098 my $newvolid;
8099
25166060
FE
8100 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8101 print "$src_drivename " if $src_drivename;
8102 print "($drive->{file})\n";
8103
152fe752 8104 if (!$full) {
258e646c 8105 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
152fe752
DM
8106 push @$newvollist, $newvolid;
8107 } else {
8fbae1dc
FE
8108 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8109 my $storeid = $storage || $src_storeid;
152fe752 8110
44549149 8111 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
152fe752 8112
931432bd 8113 my $name = undef;
d0abc774 8114 my $size = undef;
7fe8b44c
TL
8115 if (drive_is_cloudinit($drive)) {
8116 $name = "vm-$newvmid-cloudinit";
c997e24a
ML
8117 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8118 if ($scfg->{path}) {
8119 $name .= ".$dst_format";
8120 }
7fe8b44c
TL
8121 $snapname = undef;
8122 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
25166060 8123 } elsif ($dst_drivename eq 'efidisk0') {
7344af7b 8124 $size = $efisize or die "internal error - need to specify EFI disk size\n";
25166060 8125 } elsif ($dst_drivename eq 'tpmstate0') {
5f5aba25 8126 $dst_format = 'raw';
f9dde219 8127 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
d0abc774 8128 } else {
8fbae1dc
FE
8129 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8130
efa3aa24 8131 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7fe8b44c 8132 }
b5688f69
FE
8133 $newvolid = PVE::Storage::vdisk_alloc(
8134 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8135 );
152fe752
DM
8136 push @$newvollist, $newvolid;
8137
3999f370 8138 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
1dbd6d30 8139
7fe8b44c 8140 if (drive_is_cloudinit($drive)) {
1b485263
ML
8141 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8142 # if this is the case, we have to complete any block-jobs still there from
8143 # previous drive-mirrors
8144 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8145 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8146 }
7fe8b44c
TL
8147 goto no_data_clone;
8148 }
8149
988e2714 8150 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
5f957592 8151 if ($use_drive_mirror) {
5f957592
FE
8152 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8153 $completion, $qga, $bwlimit);
8154 } else {
25166060 8155 if ($dst_drivename eq 'efidisk0') {
818ce80e
DC
8156 # the relevant data on the efidisk may be smaller than the source
8157 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8158 # that is given by the OVMF_VARS.fd
62375438 8159 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
818ce80e 8160 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
fdfdc80e 8161
62375438
FE
8162 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8163
fdfdc80e
FE
8164 # better for Ceph if block size is not too small, see bug #3324
8165 my $bs = 1024*1024;
8166
62375438 8167 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
a9c45bd4
FE
8168
8169 if ($src_format eq 'qcow2' && $snapname) {
8170 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8171 if !min_version(kvm_user_version(), 6, 2);
8172 push $cmd->@*, '-l', $snapname;
8173 }
62375438
FE
8174 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8175 run_command($cmd);
818ce80e 8176 } else {
56d16f16 8177 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
818ce80e 8178 }
be190583 8179 }
152fe752
DM
8180 }
8181
7fe8b44c 8182no_data_clone:
efa3aa24 8183 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
152fe752 8184
3b53c471
FE
8185 my $disk = dclone($drive);
8186 delete $disk->{format};
152fe752 8187 $disk->{file} = $newvolid;
3bae384f 8188 $disk->{size} = $size if defined($size);
152fe752
DM
8189
8190 return $disk;
8191}
8192
98cfd8b6
AD
8193sub get_running_qemu_version {
8194 my ($vmid) = @_;
0a13e08e 8195 my $res = mon_cmd($vmid, "query-version");
98cfd8b6
AD
8196 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8197}
8198
249c4a6c
AD
8199sub qemu_use_old_bios_files {
8200 my ($machine_type) = @_;
8201
8202 return if !$machine_type;
8203
8204 my $use_old_bios_files = undef;
8205
8206 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8207 $machine_type = $1;
8208 $use_old_bios_files = 1;
8209 } else {
4df98f2f 8210 my $version = extract_version($machine_type, kvm_user_version());
249c4a6c
AD
8211 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8212 # load new efi bios files on migration. So this hack is required to allow
8213 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8214 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
2ea5fb7e 8215 $use_old_bios_files = !min_version($version, 2, 4);
249c4a6c
AD
8216 }
8217
8218 return ($use_old_bios_files, $machine_type);
8219}
8220
818ce80e 8221sub get_efivars_size {
ff84f0e3
FE
8222 my ($conf, $efidisk) = @_;
8223
818ce80e 8224 my $arch = get_vm_arch($conf);
ff84f0e3 8225 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
90b20b15
DC
8226 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8227 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
818ce80e
DC
8228 return -s $ovmf_vars;
8229}
8230
8231sub update_efidisk_size {
8232 my ($conf) = @_;
8233
8234 return if !defined($conf->{efidisk0});
8235
8236 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8237 $disk->{size} = get_efivars_size($conf);
8238 $conf->{efidisk0} = print_drive($disk);
8239
8240 return;
8241}
8242
f9dde219
SR
8243sub update_tpmstate_size {
8244 my ($conf) = @_;
8245
8246 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8247 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8248 $conf->{tpmstate0} = print_drive($disk);
8249}
8250
90b20b15
DC
8251sub create_efidisk($$$$$$$) {
8252 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
3e1f1122 8253
90b20b15 8254 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
3e1f1122 8255
af1f1ec0
DC
8256 my $vars_size_b = -s $ovmf_vars;
8257 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
3e1f1122
TL
8258 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8259 PVE::Storage::activate_volumes($storecfg, [$volid]);
8260
af1f1ec0 8261 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
efa3aa24 8262 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
3e1f1122 8263
340dbcf7 8264 return ($volid, $size/1024);
3e1f1122
TL
8265}
8266
22de899a
AD
8267sub vm_iothreads_list {
8268 my ($vmid) = @_;
8269
0a13e08e 8270 my $res = mon_cmd($vmid, 'query-iothreads');
22de899a
AD
8271
8272 my $iothreads = {};
8273 foreach my $iothread (@$res) {
8274 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8275 }
8276
8277 return $iothreads;
8278}
8279
ee034f5c
AD
8280sub scsihw_infos {
8281 my ($conf, $drive) = @_;
8282
8283 my $maxdev = 0;
8284
7fe1b688 8285 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
ee034f5c 8286 $maxdev = 7;
a1511b3c 8287 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
ee034f5c
AD
8288 $maxdev = 1;
8289 } else {
8290 $maxdev = 256;
8291 }
8292
8293 my $controller = int($drive->{index} / $maxdev);
4df98f2f
TL
8294 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8295 ? "virtioscsi"
8296 : "scsihw";
ee034f5c
AD
8297
8298 return ($maxdev, $controller, $controller_prefix);
8299}
a1511b3c 8300
44549149
EK
8301sub resolve_dst_disk_format {
8302 my ($storecfg, $storeid, $src_volname, $format) = @_;
8303 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8304
8305 if (!$format) {
8306 # if no target format is specified, use the source disk format as hint
8307 if ($src_volname) {
8308 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8309 $format = qemu_img_format($scfg, $src_volname);
8310 } else {
8311 return $defFormat;
8312 }
8313 }
8314
8315 # test if requested format is supported - else use default
8316 my $supported = grep { $_ eq $format } @$validFormats;
8317 $format = $defFormat if !$supported;
8318 return $format;
8319}
8320
66cebc46
DC
8321# NOTE: if this logic changes, please update docs & possibly gui logic
8322sub find_vmstate_storage {
8323 my ($conf, $storecfg) = @_;
8324
8325 # first, return storage from conf if set
8326 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8327
8328 my ($target, $shared, $local);
8329
8330 foreach_storage_used_by_vm($conf, sub {
8331 my ($sid) = @_;
8332 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8333 my $dst = $scfg->{shared} ? \$shared : \$local;
8334 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8335 });
8336
8337 # second, use shared storage where VM has at least one disk
8338 # third, use local storage where VM has at least one disk
8339 # fall back to local storage
8340 $target = $shared // $local // 'local';
8341
8342 return $target;
8343}
8344
6ee499ff 8345sub generate_uuid {
ae2fcb3b
EK
8346 my ($uuid, $uuid_str);
8347 UUID::generate($uuid);
8348 UUID::unparse($uuid, $uuid_str);
6ee499ff
DC
8349 return $uuid_str;
8350}
8351
8352sub generate_smbios1_uuid {
8353 return "uuid=".generate_uuid();
ae2fcb3b
EK
8354}
8355
9c152e87
TL
8356sub nbd_stop {
8357 my ($vmid) = @_;
8358
0a13e08e 8359 mon_cmd($vmid, 'nbd-server-stop');
9c152e87
TL
8360}
8361
dae98db9
DC
8362sub create_reboot_request {
8363 my ($vmid) = @_;
8364 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8365 or die "failed to create reboot trigger file: $!\n";
8366 close($fh);
8367}
8368
8369sub clear_reboot_request {
8370 my ($vmid) = @_;
8371 my $path = "/run/qemu-server/$vmid.reboot";
8372 my $res = 0;
8373
8374 $res = unlink($path);
8375 die "could not remove reboot request for $vmid: $!"
8376 if !$res && $! != POSIX::ENOENT;
8377
8378 return $res;
8379}
8380
5cfa9f5f
SR
8381sub bootorder_from_legacy {
8382 my ($conf, $bootcfg) = @_;
8383
8384 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8385 my $bootindex_hash = {};
8386 my $i = 1;
8387 foreach my $o (split(//, $boot)) {
8388 $bootindex_hash->{$o} = $i*100;
8389 $i++;
8390 }
8391
8392 my $bootorder = {};
8393
8394 PVE::QemuConfig->foreach_volume($conf, sub {
8395 my ($ds, $drive) = @_;
8396
8397 if (drive_is_cdrom ($drive, 1)) {
8398 if ($bootindex_hash->{d}) {
8399 $bootorder->{$ds} = $bootindex_hash->{d};
8400 $bootindex_hash->{d} += 1;
8401 }
8402 } elsif ($bootindex_hash->{c}) {
8403 $bootorder->{$ds} = $bootindex_hash->{c}
8404 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8405 $bootindex_hash->{c} += 1;
8406 }
8407 });
8408
8409 if ($bootindex_hash->{n}) {
8410 for (my $i = 0; $i < $MAX_NETS; $i++) {
8411 my $netname = "net$i";
8412 next if !$conf->{$netname};
8413 $bootorder->{$netname} = $bootindex_hash->{n};
8414 $bootindex_hash->{n} += 1;
8415 }
8416 }
8417
8418 return $bootorder;
8419}
8420
8421# Generate default device list for 'boot: order=' property. Matches legacy
8422# default boot order, but with explicit device names. This is important, since
8423# the fallback for when neither 'order' nor the old format is specified relies
8424# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8425sub get_default_bootdevices {
8426 my ($conf) = @_;
8427
8428 my @ret = ();
8429
8430 # harddisk
8431 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8432 push @ret, $first if $first;
8433
8434 # cdrom
8435 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8436 push @ret, $first if $first;
8437
8438 # network
8439 for (my $i = 0; $i < $MAX_NETS; $i++) {
8440 my $netname = "net$i";
8441 next if !$conf->{$netname};
8442 push @ret, $netname;
8443 last;
8444 }
8445
8446 return \@ret;
8447}
8448
e5d611c3
TL
8449sub device_bootorder {
8450 my ($conf) = @_;
8451
8452 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8453
8454 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8455
8456 my $bootorder = {};
8457 if (!defined($boot) || $boot->{legacy}) {
8458 $bootorder = bootorder_from_legacy($conf, $boot);
8459 } elsif ($boot->{order}) {
8460 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8461 for my $dev (PVE::Tools::split_list($boot->{order})) {
8462 $bootorder->{$dev} = $i++;
8463 }
8464 }
8465
8466 return $bootorder;
8467}
8468
65911545
SR
8469sub register_qmeventd_handle {
8470 my ($vmid) = @_;
8471
8472 my $fh;
8473 my $peer = "/var/run/qmeventd.sock";
8474 my $count = 0;
8475
8476 for (;;) {
8477 $count++;
8478 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8479 last if $fh;
8480 if ($! != EINTR && $! != EAGAIN) {
8481 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8482 }
8483 if ($count > 4) {
8484 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8485 . "after $count retries\n";
8486 }
8487 usleep(25000);
8488 }
8489
8490 # send handshake to mark VM as backing up
8491 print $fh to_json({vzdump => {vmid => "$vmid"}});
8492
8493 # return handle to be closed later when inhibit is no longer required
8494 return $fh;
8495}
8496
65e866e5
DM
8497# bash completion helper
8498
8499sub complete_backup_archives {
8500 my ($cmdname, $pname, $cvalue) = @_;
8501
8502 my $cfg = PVE::Storage::config();
8503
8504 my $storeid;
8505
8506 if ($cvalue =~ m/^([^:]+):/) {
8507 $storeid = $1;
8508 }
8509
8510 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8511
8512 my $res = [];
8513 foreach my $id (keys %$data) {
8514 foreach my $item (@{$data->{$id}}) {
f43a4f12 8515 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
65e866e5
DM
8516 push @$res, $item->{volid} if defined($item->{volid});
8517 }
8518 }
8519
8520 return $res;
8521}
8522
8523my $complete_vmid_full = sub {
8524 my ($running) = @_;
8525
8526 my $idlist = vmstatus();
8527
8528 my $res = [];
8529
8530 foreach my $id (keys %$idlist) {
8531 my $d = $idlist->{$id};
8532 if (defined($running)) {
8533 next if $d->{template};
8534 next if $running && $d->{status} ne 'running';
8535 next if !$running && $d->{status} eq 'running';
8536 }
8537 push @$res, $id;
8538
8539 }
8540 return $res;
8541};
8542
8543sub complete_vmid {
8544 return &$complete_vmid_full();
8545}
8546
8547sub complete_vmid_stopped {
8548 return &$complete_vmid_full(0);
8549}
8550
8551sub complete_vmid_running {
8552 return &$complete_vmid_full(1);
8553}
8554
335af808
DM
8555sub complete_storage {
8556
8557 my $cfg = PVE::Storage::config();
8558 my $ids = $cfg->{ids};
8559
8560 my $res = [];
8561 foreach my $sid (keys %$ids) {
8562 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
c4c844ef 8563 next if !$ids->{$sid}->{content}->{images};
335af808
DM
8564 push @$res, $sid;
8565 }
8566
8567 return $res;
8568}
8569
255e9c54
AL
8570sub complete_migration_storage {
8571 my ($cmd, $param, $current_value, $all_args) = @_;
8572
8573 my $targetnode = @$all_args[1];
8574
8575 my $cfg = PVE::Storage::config();
8576 my $ids = $cfg->{ids};
8577
8578 my $res = [];
8579 foreach my $sid (keys %$ids) {
8580 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8581 next if !$ids->{$sid}->{content}->{images};
8582 push @$res, $sid;
8583 }
8584
8585 return $res;
8586}
8587
b08c37c3
DC
8588sub vm_is_paused {
8589 my ($vmid) = @_;
8590 my $qmpstatus = eval {
8591 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8592 mon_cmd($vmid, "query-status");
8593 };
8594 warn "$@\n" if $@;
8595 return $qmpstatus && $qmpstatus->{status} eq "paused";
8596}
8597
3f11f0d7
LS
8598sub check_volume_storage_type {
8599 my ($storecfg, $vol) = @_;
8600
8601 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8602 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8603 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8604
8605 die "storage '$storeid' does not support content-type '$vtype'\n"
8606 if !$scfg->{content}->{$vtype};
8607
8608 return 1;
8609}
8610
21947fea
AD
8611sub add_nets_bridge_fdb {
8612 my ($conf, $vmid) = @_;
8613
1b5ba4dd
TL
8614 for my $opt (keys %$conf) {
8615 next if $opt !~ m/^net(\d+)$/;
8616 my $iface = "tap${vmid}i$1";
4ddd2ca2
TL
8617 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8618 my $net = parse_net($conf->{$opt}, 1) or next;
8619
8620 my $mac = $net->{macaddr};
8621 if (!$mac) {
8622 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8623 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8624 next;
8625 }
21947fea 8626
f81c9843 8627 my $bridge = $net->{bridge};
bb547dcb
CE
8628 if (!$bridge) {
8629 log_warn("Interface '$iface' not attached to any bridge.");
8630 next;
8631 }
1b5ba4dd 8632 if ($have_sdn) {
f81c9843 8633 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8634 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
1b5ba4dd 8635 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
21947fea
AD
8636 }
8637 }
8638}
1b5ba4dd 8639
73ed6496
AD
8640sub del_nets_bridge_fdb {
8641 my ($conf, $vmid) = @_;
8642
8643 for my $opt (keys %$conf) {
8644 next if $opt !~ m/^net(\d+)$/;
8645 my $iface = "tap${vmid}i$1";
8646
8647 my $net = parse_net($conf->{$opt}) or next;
8648 my $mac = $net->{macaddr} or next;
8649
f81c9843 8650 my $bridge = $net->{bridge};
73ed6496 8651 if ($have_sdn) {
f81c9843 8652 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8653 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
73ed6496
AD
8654 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8655 }
8656 }
8657}
8658
1e3baf05 86591;