]> git.proxmox.com Git - qemu-server.git/blame - PVE/QemuServer.pm
vm start: factor out silencing systemd stop-scope command
[qemu-server.git] / PVE / QemuServer.pm
CommitLineData
baa4f62d 1package PVE::QemuServer;
1e3baf05
DM
2
3use strict;
990fc5e2 4use warnings;
3ff84d6f 5
5da072fb
TL
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
1e3baf05 10use File::Basename;
5da072fb 11use File::Copy qw(copy);
1e3baf05
DM
12use File::Path;
13use File::stat;
14use Getopt::Long;
5da072fb
TL
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
1e3baf05 20use IPC::Open3;
c971c4f2 21use JSON;
c3d15108 22use List::Util qw(first);
1f30ac3a 23use MIME::Base64;
5da072fb
TL
24use POSIX;
25use Storable qw(dclone);
f85951dc 26use Time::HiRes qw(gettimeofday usleep);
5da072fb 27use URI::Escape;
425441e6 28use UUID;
5da072fb 29
82841214 30use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
5b65b00d 31use PVE::CGroup;
83870398 32use PVE::CpuSet;
48cf040f 33use PVE::DataCenterConfig;
5da072fb 34use PVE::Exception qw(raise raise_param_exc);
3b56383b 35use PVE::Format qw(render_duration render_bytes);
81d6e4e1 36use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
a52eb3c4
DC
37use PVE::Mapping::PCI;
38use PVE::Mapping::USB;
1e3baf05 39use PVE::INotify;
4df98f2f 40use PVE::JSONSchema qw(get_standard_option parse_property_string);
1e3baf05 41use PVE::ProcFSTools;
fbec3f89 42use PVE::PBSClient;
34e82fa2 43use PVE::RESTEnvironment qw(log_warn);
91bd6c90 44use PVE::RPCEnvironment;
5da072fb 45use PVE::Storage;
b71351a7 46use PVE::SysFSTools;
d04d6af1 47use PVE::Systemd;
82841214 48use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
5da072fb
TL
49
50use PVE::QMPClient;
51use PVE::QemuConfig;
238af88e 52use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
5da072fb 53use PVE::QemuServer::Cloudinit;
5b65b00d 54use PVE::QemuServer::CGroup;
d786a274 55use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
75748d44 56use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
2ea5fb7e 57use PVE::QemuServer::Machine;
5da072fb 58use PVE::QemuServer::Memory;
0a13e08e 59use PVE::QemuServer::Monitor qw(mon_cmd);
74c17b7a 60use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
0cf8d56c 61use PVE::QemuServer::USB;
1e3baf05 62
28e129cc
AD
63my $have_sdn;
64eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67};
68
102cf9d8 69my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
96ed3574 70my $OVMF = {
b5099b4f 71 x86_64 => {
90b20b15
DC
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
b5099b4f
SR
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
96ed3574 99};
2ddc0a5c 100
7f0b5beb 101my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
1e3baf05 102
8d88a594
TL
103# Note about locking: we use flock on the config file protect against concurent actions.
104# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106# But you can ignore this kind of lock with the --skiplock flag.
1e3baf05 107
cf364f95
TL
108cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112);
1e3baf05 113
3ea94c60
DM
114PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119});
120
c6737ef1 121PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
7bd9abd2 122 description => "Specifies the QEMU machine type.",
c6737ef1 123 type => 'string',
9471e48b 124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
c6737ef1
DC
125 maxLength => 40,
126 optional => 1,
127});
128
1a67f999 129# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
38277afc
TL
130my $nodename_cache;
131sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134}
1e3baf05 135
ec3582b5
WB
136my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151};
152PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
9d66b397
SI
154my $agent_fmt = {
155 enabled => {
7bd9abd2 156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
9d66b397
SI
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
0a4aff09 162 description => "Run fstrim after moving a disk or migrating the VM.",
9d66b397
SI
163 type => 'boolean',
164 optional => 1,
93e21fd2
CH
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
9d66b397 172 },
48657158
MD
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
9d66b397
SI
180};
181
55655ebc
DC
182my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
6f070e39 189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
55655ebc
DC
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198};
199
6dbcb073
DC
200my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213};
214
1448547f
AL
215my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
211785ee 223 enum => ['spice', 'none'],
1448547f
AL
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228};
229
c4df18db
AL
230my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
d282a24d 234 default => '0',
c4df18db
AL
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
d282a24d 240 default => 'off',
c4df18db
AL
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244};
245
2cf61f33
SR
246my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
8d88a594
TL
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
2cf61f33
SR
257 },
258 max_bytes => {
259 type => 'integer',
8d88a594
TL
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
2cf61f33
SR
263 optional => 1,
264
8d88a594
TL
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
2cf61f33
SR
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
8d88a594
TL
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
2cf61f33
SR
274 optional => 1,
275 default => 1000,
276 },
277};
278
26b443c8
TL
279my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
af2a1a1c
TL
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
26b443c8
TL
292};
293
1e3baf05
DM
294my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
2ff09f52 307 hotplug => {
483ceeab
TL
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
94ec5e7c 311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
c60cad61
DC
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
b3c2bdd1 315 default => 'network,disk,usb',
2ff09f52 316 },
1e3baf05
DM
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
159719e5 327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
1e3baf05
DM
328 },
329 cpulimit => {
330 optional => 1,
c6f773b8 331 type => 'number',
52261945 332 description => "Limit of CPU usage.",
8d88a594
TL
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
1e3baf05 335 minimum => 0,
c6f773b8 336 maximum => 128,
483ceeab 337 default => 0,
1e3baf05
DM
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
483ceeab 342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
67498860
TL
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
e65e35ca 346 minimum => 1,
237239bf 347 maximum => 262144,
67498860 348 default => 'cgroup v1: 1024, cgroup v2: 100',
1e3baf05
DM
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
252e2624 353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
8d88a594 354 ." you use the balloon device.",
1e3baf05
DM
355 minimum => 16,
356 default => 512,
357 },
13a48620 358 balloon => {
483ceeab
TL
359 optional => 1,
360 type => 'integer',
252e2624 361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
8b1accf7
DM
362 minimum => 0,
363 },
364 shares => {
483ceeab
TL
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
8d88a594
TL
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
8b1accf7
DM
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
13a48620 373 },
1e3baf05
DM
374 keyboard => {
375 optional => 1,
376 type => 'string',
233fb336
DW
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
e95fe75f 379 enum => PVE::Tools::kvmkeymaplist(),
aea47dd6 380 default => undef,
1e3baf05
DM
381 },
382 name => {
383 optional => 1,
7fabe17d 384 type => 'string', format => 'dns-name',
1e3baf05
DM
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
cdd20088
AD
387 scsihw => {
388 optional => 1,
389 type => 'string',
52261945 390 description => "SCSI controller model",
6731a4cf 391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
cdd20088
AD
392 default => 'lsi',
393 },
1e3baf05
DM
394 description => {
395 optional => 1,
396 type => 'string',
a200af10
TL
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
1e3baf05
DM
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
483ceeab 404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
52261945
DM
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407Specify guest operating system. This is used to enable special
408optimization/features for specific operating systems:
409
410[horizontal]
411other;; unspecified OS
412wxp;; Microsoft Windows XP
413w2k;; Microsoft Windows 2000
414w2k3;; Microsoft Windows 2003
415w2k8;; Microsoft Windows 2008
416wvista;; Microsoft Windows Vista
417win7;; Microsoft Windows 7
44c2a647 418win8;; Microsoft Windows 8/2012/2012r2
1f5828f2 419win10;; Microsoft Windows 10/2016/2019
179b9f1b 420win11;; Microsoft Windows 11/2022
52261945 421l24;; Linux 2.4 Kernel
ea0bc514 422l26;; Linux 2.6 - 6.X Kernel
52261945 423solaris;; Solaris/OpenSolaris/OpenIndiania kernel
1e3baf05
DM
424EODESC
425 },
426 boot => {
427 optional => 1,
2141a802 428 type => 'string', format => 'pve-qm-boot',
483ceeab
TL
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
1e3baf05
DM
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
2141a802 435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
03e480fc 436 pattern => '(ide|sata|scsi|virtio)\d+',
1e3baf05
DM
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
8a010eae
AD
459 numa => {
460 optional => 1,
461 type => 'boolean',
1917695c 462 description => "Enable/disable NUMA.",
8a010eae
AD
463 default => 0,
464 },
7023f3ea
AD
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
f36e9894
SR
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
4df98f2f
TL
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
f36e9894 477 },
de9d1e55 478 vcpus => {
3bd18e48
AD
479 optional => 1,
480 type => 'integer',
de9d1e55 481 description => "Number of hotplugged vcpus.",
3bd18e48 482 minimum => 1,
de9d1e55 483 default => 0,
3bd18e48 484 },
1e3baf05
DM
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
bc84dcca 491 agent => {
ab6a046f 492 optional => 1,
7bd9abd2 493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
9d66b397
SI
494 type => 'string',
495 format => $agent_fmt,
ab6a046f 496 },
1e3baf05
DM
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
8c559505
DM
506 description => "Enable/disable time drift fix.",
507 default => 0,
1e3baf05 508 },
19672434 509 localtime => {
1e3baf05
DM
510 optional => 1,
511 type => 'boolean',
8d88a594
TL
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
1e3baf05
DM
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
55655ebc
DC
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
4df98f2f
TL
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
1e3baf05 531 },
0ea9541d
DM
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
52261945 535 description => "Create a virtual hardware watchdog device.",
4df98f2f
TL
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
0ea9541d 539 },
1e3baf05
DM
540 startdate => {
541 optional => 1,
19672434 542 type => 'string',
1e3baf05 543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
4df98f2f
TL
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
1e3baf05
DM
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
43574f73 549 startup => get_standard_option('pve-startup-order'),
68eda3ab
AD
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
1e3baf05
DM
556 args => {
557 optional => 1,
558 type => 'string',
52261945
DM
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
c7a8aad6 561Arbitrary arguments passed to kvm, for example:
1e3baf05 562
bda7ccb1 563args: -no-reboot -smbios 'type=0,vendor=FOO'
c7a8aad6
FG
564
565NOTE: this option is for experts only.
1e3baf05
DM
566EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
52261945 572 description => "Enable/disable the USB tablet device.",
4df98f2f
TL
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
1e3baf05
DM
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
04432191 588 type => 'number',
1e3baf05
DM
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
04432191 591 default => 0.1,
1e3baf05
DM
592 },
593 cdrom => {
594 optional => 1,
b799312f 595 type => 'string', format => 'pve-qm-ide',
8485b9ba 596 typetext => '<volume>',
1e3baf05
DM
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
5d008ad3 603 format => 'pve-vm-cpu-conf',
1e3baf05 604 },
b7ba6b79
DM
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
982c7f12
DM
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
18bfb361
DM
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
4df98f2f
TL
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
18bfb361 620 },
253624c7
FG
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
c6737ef1 625 runningmachine => get_standard_option('pve-qemu-machine', {
4df98f2f
TL
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
c6737ef1 628 }),
ea1c2110 629 runningcpu => {
4df98f2f
TL
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
ea1c2110
SR
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
c6737ef1 637 machine => get_standard_option('pve-qemu-machine'),
d731ecbe
WB
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
2796e7d5
DM
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
5d004b00 647 maxLength => 512,
2796e7d5
DM
648 optional => 1,
649 },
cb0e4540
AG
650 protection => {
651 optional => 1,
652 type => 'boolean',
4df98f2f
TL
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
cb0e4540
AG
655 default => 0,
656 },
3edb45e7 657 bios => {
a783c78e 658 optional => 1,
3edb45e7
DM
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
a783c78e 663 },
6ee499ff
DC
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
4df98f2f
TL
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
f7ed64e7 678 default => "1 (autogenerated)",
6ee499ff
DC
679 optional => 1,
680 },
9e784b11
DC
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
6dbcb073
DC
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
4df98f2f
TL
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
6dbcb073 692 optional => 1,
2e7b5925
AL
693 },
694 audio0 => {
695 type => 'string',
1448547f 696 format => $audio_fmt,
194b65f1 697 description => "Configure a audio device, useful in combination with QXL/Spice.",
2e7b5925
AL
698 optional => 1
699 },
c4df18db
AL
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
b8e7068a
DC
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
2cf61f33
SR
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
26b443c8
TL
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
83870398
DB
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
326704e7 725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
83870398
DB
726 optional => 1,
727 },
9ed7a77c
WB
728};
729
cb702ebe
DL
730my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
4df98f2f
TL
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
cb702ebe
DL
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
eba285f5 742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
cb702ebe
DL
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
eba285f5 749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
cb702ebe
DL
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
101beafe 753 vendor => {
eba285f5
TL
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
101beafe 759 },
cb702ebe
DL
760};
761PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
9ed7a77c 763my $confdesc_cloudinit = {
41cd94a0
WB
764 citype => {
765 optional => 1,
766 type => 'string',
4df98f2f
TL
767 description => 'Specifies the cloud-init configuration format. The default depends on the'
768 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
769 .' and `configdrive2` for windows.',
545eec65 770 enum => ['configdrive2', 'nocloud', 'opennebula'],
41cd94a0 771 },
7b42f951
WB
772 ciuser => {
773 optional => 1,
774 type => 'string',
4df98f2f
TL
775 description => "cloud-init: User name to change ssh keys and password for instead of the"
776 ." image's configured default user.",
7b42f951
WB
777 },
778 cipassword => {
779 optional => 1,
780 type => 'string',
4df98f2f
TL
781 description => 'cloud-init: Password to assign the user. Using this is generally not'
782 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
783 .' support hashed passwords.',
7b42f951 784 },
efa3355d
LN
785 ciupgrade => {
786 optional => 1,
787 type => 'boolean',
788 description => 'cloud-init: do an automatic package upgrade after the first boot.'
789 },
cb702ebe
DL
790 cicustom => {
791 optional => 1,
792 type => 'string',
4df98f2f
TL
793 description => 'cloud-init: Specify custom files to replace the automatically generated'
794 .' ones at start.',
cb702ebe
DL
795 format => 'pve-qm-cicustom',
796 },
0c9a7596
AD
797 searchdomain => {
798 optional => 1,
799 type => 'string',
bd49ecb4 800 description => 'cloud-init: Sets DNS search domains for a container. Create will'
4df98f2f 801 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 802 .' are set.',
0c9a7596
AD
803 },
804 nameserver => {
805 optional => 1,
806 type => 'string', format => 'address-list',
bd49ecb4 807 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
4df98f2f 808 .' automatically use the setting from the host if neither searchdomain nor nameserver'
bd49ecb4 809 .' are set.',
0c9a7596
AD
810 },
811 sshkeys => {
812 optional => 1,
813 type => 'string',
814 format => 'urlencoded',
1d1c4e1c 815 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
0c9a7596 816 },
1e3baf05
DM
817};
818
819# what about other qemu settings ?
820#cpu => 'string',
821#machine => 'string',
822#fda => 'file',
823#fdb => 'file',
824#mtdblock => 'file',
825#sd => 'file',
826#pflash => 'file',
827#snapshot => 'bool',
828#bootp => 'file',
829##tftp => 'dir',
830##smb => 'dir',
831#kernel => 'file',
832#append => 'string',
833#initrd => 'file',
834##soundhw => 'string',
835
836while (my ($k, $v) = each %$confdesc) {
837 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
838}
839
5bdcf937 840my $MAX_NETS = 32;
bae179aa 841my $MAX_SERIAL_PORTS = 4;
1989a89c 842my $MAX_PARALLEL_PORTS = 3;
2ed5d572
AD
843my $MAX_NUMA = 8;
844
ffc0d8c7
WB
845my $numa_fmt = {
846 cpus => {
847 type => "string",
848 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 849 description => "CPUs accessing this NUMA node.",
ffc0d8c7
WB
850 format_description => "id[-id];...",
851 },
852 memory => {
853 type => "number",
52261945 854 description => "Amount of memory this NUMA node provides.",
ffc0d8c7
WB
855 optional => 1,
856 },
857 hostnodes => {
858 type => "string",
859 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
52261945 860 description => "Host NUMA nodes to use.",
ffc0d8c7
WB
861 format_description => "id[-id];...",
862 optional => 1,
863 },
864 policy => {
865 type => 'string',
866 enum => [qw(preferred bind interleave)],
52261945 867 description => "NUMA allocation policy.",
ffc0d8c7
WB
868 optional => 1,
869 },
870};
871PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
2ed5d572
AD
872my $numadesc = {
873 optional => 1,
ffc0d8c7 874 type => 'string', format => $numa_fmt,
52261945 875 description => "NUMA topology.",
2ed5d572
AD
876};
877PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
878
879for (my $i = 0; $i < $MAX_NUMA; $i++) {
880 $confdesc->{"numa$i"} = $numadesc;
881}
1e3baf05 882
f7bc17ca
TL
883my $nic_model_list = [
884 'e1000',
885 'e1000-82540em',
886 'e1000-82544gc',
887 'e1000-82545em',
e83dd50a 888 'e1000e',
f7bc17ca
TL
889 'i82551',
890 'i82557b',
891 'i82559er',
892 'ne2k_isa',
893 'ne2k_pci',
894 'pcnet',
895 'rtl8139',
896 'virtio',
897 'vmxnet3',
898];
6b64503e 899my $nic_model_list_txt = join(' ', sort @$nic_model_list);
1e3baf05 900
52261945
DM
901my $net_fmt_bridge_descr = <<__EOD__;
902Bridge to attach the network device to. The Proxmox VE standard bridge
903is called 'vmbr0'.
904
905If you do not specify a bridge, we create a kvm user (NATed) network
906device, which provides DHCP and DNS services. The following addresses
907are used:
908
909 10.0.2.2 Gateway
910 10.0.2.3 DNS Server
911 10.0.2.4 SMB Server
912
913The DHCP server assign addresses to the guest starting from 10.0.2.15.
914__EOD__
915
cd9c34d1 916my $net_fmt = {
399d96db 917 macaddr => get_standard_option('mac-addr', {
4df98f2f
TL
918 description => "MAC address. That address must be unique withing your network. This is"
919 ." automatically generated if not specified.",
399d96db 920 }),
7f694a71
DM
921 model => {
922 type => 'string',
4df98f2f
TL
923 description => "Network Card Model. The 'virtio' model provides the best performance with"
924 ." very low CPU overhead. If your guest does not support this driver, it is usually"
925 ." best to use 'e1000'.",
7f694a71
DM
926 enum => $nic_model_list,
927 default_key => 1,
928 },
929 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
dbdcc5cd 930 bridge => get_standard_option('pve-bridge-id', {
52261945 931 description => $net_fmt_bridge_descr,
cd9c34d1 932 optional => 1,
dbdcc5cd 933 }),
cd9c34d1
WB
934 queues => {
935 type => 'integer',
66222265 936 minimum => 0, maximum => 64,
cd9c34d1 937 description => 'Number of packet queues to be used on the device.',
cd9c34d1
WB
938 optional => 1,
939 },
940 rate => {
941 type => 'number',
942 minimum => 0,
52261945 943 description => "Rate limit in mbps (megabytes per second) as floating point number.",
cd9c34d1
WB
944 optional => 1,
945 },
946 tag => {
947 type => 'integer',
9f41a659 948 minimum => 1, maximum => 4094,
cd9c34d1 949 description => 'VLAN tag to apply to packets on this interface.',
cd9c34d1
WB
950 optional => 1,
951 },
952 trunks => {
953 type => 'string',
954 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
955 description => 'VLAN trunks to pass through this interface.',
7f694a71 956 format_description => 'vlanid[;vlanid...]',
cd9c34d1
WB
957 optional => 1,
958 },
959 firewall => {
960 type => 'boolean',
961 description => 'Whether this interface should be protected by the firewall.',
cd9c34d1
WB
962 optional => 1,
963 },
964 link_down => {
965 type => 'boolean',
52261945 966 description => 'Whether this interface should be disconnected (like pulling the plug).',
cd9c34d1
WB
967 optional => 1,
968 },
61a14cde
AD
969 mtu => {
970 type => 'integer',
971 minimum => 1, maximum => 65520,
0530177b 972 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
61a14cde
AD
973 optional => 1,
974 },
cd9c34d1 975};
52261945 976
1e3baf05
DM
977my $netdesc = {
978 optional => 1,
7f694a71 979 type => 'string', format => $net_fmt,
52261945 980 description => "Specify network devices.",
1e3baf05 981};
52261945 982
1e3baf05
DM
983PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
984
0c9a7596
AD
985my $ipconfig_fmt = {
986 ip => {
987 type => 'string',
988 format => 'pve-ipv4-config',
989 format_description => 'IPv4Format/CIDR',
990 description => 'IPv4 address in CIDR format.',
991 optional => 1,
992 default => 'dhcp',
993 },
994 gw => {
995 type => 'string',
996 format => 'ipv4',
997 format_description => 'GatewayIPv4',
998 description => 'Default gateway for IPv4 traffic.',
999 optional => 1,
1000 requires => 'ip',
1001 },
1002 ip6 => {
1003 type => 'string',
1004 format => 'pve-ipv6-config',
1005 format_description => 'IPv6Format/CIDR',
1006 description => 'IPv6 address in CIDR format.',
1007 optional => 1,
1008 default => 'dhcp',
1009 },
1010 gw6 => {
1011 type => 'string',
1012 format => 'ipv6',
1013 format_description => 'GatewayIPv6',
1014 description => 'Default gateway for IPv6 traffic.',
1015 optional => 1,
1016 requires => 'ip6',
1017 },
1018};
1019PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1020my $ipconfigdesc = {
1021 optional => 1,
1022 type => 'string', format => 'pve-qm-ipconfig',
1023 description => <<'EODESCR',
1024cloud-init: Specify IP addresses and gateways for the corresponding interface.
1025
1026IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1027
4df98f2f
TL
1028The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1029gateway should be provided.
988be8d0
ML
1030For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1031cloud-init 19.4 or newer.
0c9a7596 1032
4df98f2f
TL
1033If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1034dhcp on IPv4.
0c9a7596
AD
1035EODESCR
1036};
1037PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1038
1e3baf05
DM
1039for (my $i = 0; $i < $MAX_NETS; $i++) {
1040 $confdesc->{"net$i"} = $netdesc;
9ed7a77c
WB
1041 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1042}
1043
1044foreach my $key (keys %$confdesc_cloudinit) {
1045 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1e3baf05
DM
1046}
1047
83870398
DB
1048PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1049sub pve_verify_cpuset {
1050 my ($set_text, $noerr) = @_;
1051
1052 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1053
1054 if ($@) {
1055 return if $noerr;
1056 die "unable to parse cpuset option\n";
1057 }
1058
1059 return PVE::CpuSet->new($members)->short_string();
1060}
1061
ffa42b86
DC
1062PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1063sub verify_volume_id_or_qm_path {
822c8a07
WB
1064 my ($volid, $noerr) = @_;
1065
6e55f579
FE
1066 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1067
1068 return verify_volume_id_or_absolute_path($volid, $noerr);
1069}
1070
1071PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1072sub verify_volume_id_or_absolute_path {
1073 my ($volid, $noerr) = @_;
1074
1075 return $volid if $volid =~ m|^/|;
ffa42b86 1076
822c8a07
WB
1077 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1078 if ($@) {
d1c1af4b 1079 return if $noerr;
822c8a07
WB
1080 die $@;
1081 }
1082 return $volid;
1083}
1084
bae179aa
DA
1085my $serialdesc = {
1086 optional => 1,
ca0cef26 1087 type => 'string',
1b0b51ed 1088 pattern => '(/dev/.+|socket)',
52261945
DM
1089 description => "Create a serial device inside the VM (n is 0 to 3)",
1090 verbose_description => <<EODESCR,
52261945
DM
1091Create a serial device inside the VM (n is 0 to 3), and pass through a
1092host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1093host side (use 'qm terminal' to open a terminal connection).
bae179aa 1094
4df98f2f
TL
1095NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1096use with special care.
bae179aa 1097
52261945 1098CAUTION: Experimental! User reported problems with this option.
bae179aa
DA
1099EODESCR
1100};
bae179aa 1101
1989a89c
DA
1102my $paralleldesc= {
1103 optional => 1,
ca0cef26 1104 type => 'string',
9ecc8431 1105 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
52261945
DM
1106 description => "Map host parallel devices (n is 0 to 2).",
1107 verbose_description => <<EODESCR,
19672434 1108Map host parallel devices (n is 0 to 2).
1989a89c 1109
4df98f2f
TL
1110NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1111machines - use with special care.
1989a89c 1112
52261945 1113CAUTION: Experimental! User reported problems with this option.
1989a89c
DA
1114EODESCR
1115};
1989a89c
DA
1116
1117for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1118 $confdesc->{"parallel$i"} = $paralleldesc;
1119}
1120
bae179aa
DA
1121for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1122 $confdesc->{"serial$i"} = $serialdesc;
1123}
1124
74c17b7a
SR
1125for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1126 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
040b06b7 1127}
1e3baf05 1128
e0fd2b2f
FE
1129for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1130 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
cdb0931f
DA
1131}
1132
0cf8d56c
DC
1133for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1134 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1e3baf05
DM
1135}
1136
5cfa9f5f
SR
1137my $boot_fmt = {
1138 legacy => {
1139 optional => 1,
1140 default_key => 1,
1141 type => 'string',
1142 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1143 . " Deprecated, use 'order=' instead.",
1144 pattern => '[acdn]{1,4}',
1145 format_description => "[acdn]{1,4}",
1146
1147 # note: this is also the fallback if boot: is not given at all
1148 default => 'cdn',
1149 },
1150 order => {
1151 optional => 1,
1152 type => 'string',
1153 format => 'pve-qm-bootdev-list',
1154 format_description => "device[;device...]",
1155 description => <<EODESC,
1156The guest will attempt to boot from devices in the order they appear here.
1157
1158Disks, optical drives and passed-through storage USB devices will be directly
1159booted from, NICs will load PXE, and PCIe devices will either behave like disks
1160(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1161
1162Note that only devices in this list will be marked as bootable and thus loaded
1163by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1164(e.g. software-raid), you need to specify all of them here.
1165
1166Overrides the deprecated 'legacy=[acdn]*' value when given.
1167EODESC
1168 },
1169};
1170PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1171
1172PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1173sub verify_bootdev {
1174 my ($dev, $noerr) = @_;
1175
f9dde219
SR
1176 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1177 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
5cfa9f5f
SR
1178
1179 my $check = sub {
1180 my ($base) = @_;
1181 return 0 if $dev !~ m/^$base\d+$/;
1182 return 0 if !$confdesc->{$dev};
1183 return 1;
1184 };
1185
1186 return $dev if $check->("net");
1187 return $dev if $check->("usb");
1188 return $dev if $check->("hostpci");
1189
d1c1af4b 1190 return if $noerr;
5cfa9f5f
SR
1191 die "invalid boot device '$dev'\n";
1192}
1193
1194sub print_bootorder {
1195 my ($devs) = @_;
4c27b18c 1196 return "" if !@$devs;
5cfa9f5f
SR
1197 my $data = { order => join(';', @$devs) };
1198 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1199}
1200
1e3baf05
DM
1201my $kvm_api_version = 0;
1202
1203sub kvm_version {
1e3baf05
DM
1204 return $kvm_api_version if $kvm_api_version;
1205
808a65b5 1206 open my $fh, '<', '/dev/kvm' or return;
1e3baf05 1207
646f2df4
WB
1208 # 0xae00 => KVM_GET_API_VERSION
1209 $kvm_api_version = ioctl($fh, 0xae00, 0);
808a65b5 1210 close($fh);
1e3baf05 1211
646f2df4 1212 return $kvm_api_version;
1e3baf05
DM
1213}
1214
1476b99f
DC
1215my $kvm_user_version = {};
1216my $kvm_mtime = {};
1e3baf05
DM
1217
1218sub kvm_user_version {
1476b99f 1219 my ($binary) = @_;
1e3baf05 1220
1476b99f
DC
1221 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1222 my $st = stat($binary);
1e3baf05 1223
1476b99f
DC
1224 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1225 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1226 $cachedmtime == $st->mtime;
1227
1228 $kvm_user_version->{$binary} = 'unknown';
1229 $kvm_mtime->{$binary} = $st->mtime;
1e3baf05 1230
09b11429
TL
1231 my $code = sub {
1232 my $line = shift;
1233 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1476b99f 1234 $kvm_user_version->{$binary} = $2;
09b11429
TL
1235 }
1236 };
19672434 1237
1476b99f 1238 eval { run_command([$binary, '--version'], outfunc => $code); };
09b11429 1239 warn $@ if $@;
1e3baf05 1240
1476b99f 1241 return $kvm_user_version->{$binary};
1e3baf05
DM
1242
1243}
4df98f2f
TL
1244my sub extract_version {
1245 my ($machine_type, $version) = @_;
1246 $version = kvm_user_version() if !defined($version);
8eb73377 1247 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
4df98f2f 1248}
1e3baf05 1249
db70021b
TL
1250sub kernel_has_vhost_net {
1251 return -c '/dev/vhost-net';
1252}
1e3baf05 1253
1e3baf05
DM
1254sub option_exists {
1255 my $key = shift;
1256 return defined($confdesc->{$key});
19672434 1257}
1e3baf05 1258
1e3baf05 1259my $cdrom_path;
1e3baf05
DM
1260sub get_cdrom_path {
1261
259470ee 1262 return $cdrom_path if defined($cdrom_path);
1e3baf05 1263
c3d15108
TL
1264 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1265
1266 if (!defined($cdrom_path)) {
490b7308
SS
1267 log_warn("no physical CD-ROM available, ignoring");
1268 $cdrom_path = '';
1269 }
259470ee
SS
1270
1271 return $cdrom_path;
1e3baf05
DM
1272}
1273
1274sub get_iso_path {
1275 my ($storecfg, $vmid, $cdrom) = @_;
1276
1277 if ($cdrom eq 'cdrom') {
1278 return get_cdrom_path();
1279 } elsif ($cdrom eq 'none') {
1280 return '';
1281 } elsif ($cdrom =~ m|^/|) {
1282 return $cdrom;
1283 } else {
6b64503e 1284 return PVE::Storage::path($storecfg, $cdrom);
1e3baf05
DM
1285 }
1286}
1287
1288# try to convert old style file names to volume IDs
1289sub filename_to_volume_id {
1290 my ($vmid, $file, $media) = @_;
1291
0c9a7596 1292 if (!($file eq 'none' || $file eq 'cdrom' ||
1e3baf05 1293 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
19672434 1294
d1c1af4b 1295 return if $file =~ m|/|;
19672434 1296
1e3baf05
DM
1297 if ($media && $media eq 'cdrom') {
1298 $file = "local:iso/$file";
1299 } else {
1300 $file = "local:$vmid/$file";
1301 }
1302 }
1303
1304 return $file;
1305}
1306
1307sub verify_media_type {
1308 my ($opt, $vtype, $media) = @_;
1309
1310 return if !$media;
1311
1312 my $etype;
1313 if ($media eq 'disk') {
a125592c 1314 $etype = 'images';
1e3baf05
DM
1315 } elsif ($media eq 'cdrom') {
1316 $etype = 'iso';
1317 } else {
1318 die "internal error";
1319 }
1320
1321 return if ($vtype eq $etype);
19672434 1322
1e3baf05
DM
1323 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1324}
1325
1326sub cleanup_drive_path {
1327 my ($opt, $storecfg, $drive) = @_;
1328
1329 # try to convert filesystem paths to volume IDs
1330
1331 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1332 ($drive->{file} !~ m|^/dev/.+|) &&
1333 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
19672434 1334 ($drive->{file} !~ m/^\d+$/)) {
1e3baf05 1335 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
4df98f2f
TL
1336 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1337 if !$vtype;
1e3baf05
DM
1338 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1339 verify_media_type($opt, $vtype, $drive->{media});
1340 $drive->{file} = $volid;
1341 }
1342
1343 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1344}
1345
b3c2bdd1
DM
1346sub parse_hotplug_features {
1347 my ($data) = @_;
1348
1349 my $res = {};
1350
1351 return $res if $data eq '0';
a1b7d579 1352
b3c2bdd1
DM
1353 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1354
45827685 1355 foreach my $feature (PVE::Tools::split_list($data)) {
94ec5e7c 1356 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
b3c2bdd1
DM
1357 $res->{$1} = 1;
1358 } else {
596a0a20 1359 die "invalid hotplug feature '$feature'\n";
b3c2bdd1
DM
1360 }
1361 }
1362 return $res;
1363}
1364
1365PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1366sub pve_verify_hotplug_features {
1367 my ($value, $noerr) = @_;
1368
1369 return $value if parse_hotplug_features($value);
1370
d1c1af4b 1371 return if $noerr;
b3c2bdd1
DM
1372
1373 die "unable to parse hotplug option\n";
1374}
1375
28ef82d3
DM
1376sub scsi_inquiry {
1377 my($fh, $noerr) = @_;
1378
1379 my $SG_IO = 0x2285;
1380 my $SG_GET_VERSION_NUM = 0x2282;
1381
1382 my $versionbuf = "\x00" x 8;
1383 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1384 if (!$ret) {
1385 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
d1c1af4b 1386 return;
28ef82d3 1387 }
97d62eb7 1388 my $version = unpack("I", $versionbuf);
28ef82d3
DM
1389 if ($version < 30000) {
1390 die "scsi generic interface too old\n" if !$noerr;
d1c1af4b 1391 return;
28ef82d3 1392 }
97d62eb7 1393
28ef82d3
DM
1394 my $buf = "\x00" x 36;
1395 my $sensebuf = "\x00" x 8;
f334aa3e 1396 my $cmd = pack("C x3 C x1", 0x12, 36);
97d62eb7 1397
28ef82d3
DM
1398 # see /usr/include/scsi/sg.h
1399 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1400
f606d5bd
TL
1401 my $packet = pack(
1402 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1403 );
28ef82d3
DM
1404
1405 $ret = ioctl($fh, $SG_IO, $packet);
1406 if (!$ret) {
1407 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
d1c1af4b 1408 return;
28ef82d3 1409 }
97d62eb7 1410
28ef82d3
DM
1411 my @res = unpack($sg_io_hdr_t, $packet);
1412 if ($res[17] || $res[18]) {
1413 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
d1c1af4b 1414 return;
28ef82d3
DM
1415 }
1416
1417 my $res = {};
f606d5bd 1418 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
28ef82d3 1419
f606d5bd
TL
1420 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1421 $res->{type} &= 0x1F;
09984754 1422
28ef82d3
DM
1423 return $res;
1424}
1425
1426sub path_is_scsi {
1427 my ($path) = @_;
1428
d1c1af4b 1429 my $fh = IO::File->new("+<$path") || return;
28ef82d3
DM
1430 my $res = scsi_inquiry($fh, 1);
1431 close($fh);
1432
1433 return $res;
1434}
1435
db656e5f 1436sub print_tabletdevice_full {
d559309f 1437 my ($conf, $arch) = @_;
b467f79a 1438
3392d6ca 1439 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f
DM
1440
1441 # we use uhci for old VMs because tablet driver was buggy in older qemu
d559309f 1442 my $usbbus;
2b938c7d 1443 if ($q35 || $arch eq 'aarch64') {
d559309f
WB
1444 $usbbus = 'ehci';
1445 } else {
1446 $usbbus = 'uhci';
1447 }
b467f79a 1448
db656e5f
DM
1449 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1450}
1451
d559309f 1452sub print_keyboarddevice_full {
6971c38e 1453 my ($conf, $arch) = @_;
d559309f 1454
d1c1af4b 1455 return if $arch ne 'aarch64';
d559309f
WB
1456
1457 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1458}
1459
a183df68
TL
1460my sub get_drive_id {
1461 my ($drive) = @_;
1462 return "$drive->{interface}$drive->{index}";
1463}
1464
ca916ecc 1465sub print_drivedevice_full {
d559309f 1466 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
ca916ecc
DA
1467
1468 my $device = '';
1469 my $maxdev = 0;
19672434 1470
a183df68 1471 my $drive_id = get_drive_id($drive);
ca916ecc 1472 if ($drive->{interface} eq 'virtio') {
4df98f2f
TL
1473 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1474 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1475 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
2ed36a41 1476 } elsif ($drive->{interface} eq 'scsi') {
6731a4cf 1477
ee034f5c 1478 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
2ed36a41
DM
1479 my $unit = $drive->{index} % $maxdev;
1480 my $devicetype = 'hd';
69bcf246
WB
1481 my $path = '';
1482 if (drive_is_cdrom($drive)) {
1483 $devicetype = 'cd';
29b19529 1484 } else {
69bcf246
WB
1485 if ($drive->{file} =~ m|^/|) {
1486 $path = $drive->{file};
1487 if (my $info = path_is_scsi($path)) {
8e3c33ab 1488 if ($info->{type} == 0 && $drive->{scsiblock}) {
69bcf246
WB
1489 $devicetype = 'block';
1490 } elsif ($info->{type} == 1) { # tape
1491 $devicetype = 'generic';
1492 }
1493 }
1494 } else {
1495 $path = PVE::Storage::path($storecfg, $drive->{file});
1496 }
1497
a034e3d6 1498 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
4df98f2f 1499 my $version = extract_version($machine_type, kvm_user_version());
a034e3d6 1500 if ($path =~ m/^iscsi\:\/\// &&
2ea5fb7e 1501 !min_version($version, 4, 1)) {
69bcf246
WB
1502 $devicetype = 'generic';
1503 }
1504 }
1505
ef88eaaa 1506 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
4df98f2f 1507 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
69bcf246 1508 } else {
4df98f2f
TL
1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1510 .",lun=$drive->{index}";
69bcf246 1511 }
4df98f2f 1512 $device .= ",drive=drive-$drive_id,id=$drive_id";
cdd20088 1513
6c875f9f
NC
1514 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1515 $device .= ",rotation_rate=1";
1516 }
e741c516 1517 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
6c875f9f
NC
1518
1519 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
e0fd2b2f 1520 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
2ed36a41
DM
1521 my $controller = int($drive->{index} / $maxdev);
1522 my $unit = $drive->{index} % $maxdev;
1523 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1524
6c875f9f
NC
1525 $device = "ide-$devicetype";
1526 if ($drive->{interface} eq 'ide') {
1527 $device .= ",bus=ide.$controller,unit=$unit";
1528 } else {
1529 $device .= ",bus=ahci$controller.$unit";
1530 }
4df98f2f 1531 $device .= ",drive=drive-$drive_id,id=$drive_id";
6c875f9f
NC
1532
1533 if ($devicetype eq 'hd') {
1534 if (my $model = $drive->{model}) {
1535 $model = URI::Escape::uri_unescape($model);
1536 $device .= ",model=$model";
1537 }
1538 if ($drive->{ssd}) {
1539 $device .= ",rotation_rate=1";
1540 }
0f2812c2 1541 }
e741c516 1542 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
2ed36a41
DM
1543 } elsif ($drive->{interface} eq 'usb') {
1544 die "implement me";
1545 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1546 } else {
1547 die "unsupported interface type";
ca916ecc
DA
1548 }
1549
3b408e82
DM
1550 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1551
a70e7e6c
TL
1552 if (my $serial = $drive->{serial}) {
1553 $serial = URI::Escape::uri_unescape($serial);
1554 $device .= ",serial=$serial";
1555 }
1556
1557
ca916ecc
DA
1558 return $device;
1559}
1560
15b21acc 1561sub get_initiator_name {
46f58b5f 1562 my $initiator;
15b21acc 1563
d1c1af4b 1564 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
46f58b5f
DM
1565 while (defined(my $line = <$fh>)) {
1566 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
15b21acc
MR
1567 $initiator = $1;
1568 last;
1569 }
46f58b5f
DM
1570 $fh->close();
1571
15b21acc
MR
1572 return $initiator;
1573}
1574
eec9f9fe
FE
1575my sub storage_allows_io_uring_default {
1576 my ($scfg, $cache_direct) = @_;
1577
1578 # io_uring with cache mode writeback or writethrough on krbd will hang...
1579 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1580
1581 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1582 # sometimes, just plain disable...
1583 return if $scfg && $scfg->{type} eq 'lvm';
1584
1585 # io_uring causes problems when used with CIFS since kernel 5.15
1586 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1587 return if $scfg && $scfg->{type} eq 'cifs';
1588
1589 return 1;
1590}
1591
b7071d6c
FE
1592my sub drive_uses_cache_direct {
1593 my ($drive, $scfg) = @_;
1594
1595 my $cache_direct = 0;
1596
1597 if (my $cache = $drive->{cache}) {
1598 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1599 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1600 $cache_direct = 1;
1601 }
1602
1603 return $cache_direct;
1604}
1605
776c5f50 1606sub print_drive_commandline_full {
6d5673c3 1607 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1e3baf05 1608
d81f0f09
DM
1609 my $path;
1610 my $volid = $drive->{file};
5921764c 1611 my $format = $drive->{format};
a183df68 1612 my $drive_id = get_drive_id($drive);
370b05e7 1613
0fe779a6
WB
1614 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1615 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1616
d81f0f09
DM
1617 if (drive_is_cdrom($drive)) {
1618 $path = get_iso_path($storecfg, $vmid, $volid);
a183df68 1619 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
d81f0f09 1620 } else {
d81f0f09
DM
1621 if ($storeid) {
1622 $path = PVE::Storage::path($storecfg, $volid);
5921764c 1623 $format //= qemu_img_format($scfg, $volname);
d81f0f09
DM
1624 } else {
1625 $path = $volid;
5921764c 1626 $format //= "raw";
d81f0f09
DM
1627 }
1628 }
1629
5921764c
SR
1630 my $is_rbd = $path =~ m/^rbd:/;
1631
1e3baf05 1632 my $opts = '';
5921764c 1633 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1e3baf05 1634 foreach my $o (@qemu_drive_options) {
5fc74861 1635 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
19672434 1636 }
8a267708
DC
1637
1638 # snapshot only accepts on|off
1639 if (defined($drive->{snapshot})) {
1640 my $v = $drive->{snapshot} ? 'on' : 'off';
1641 $opts .= ",snapshot=$v";
1642 }
1643
1f91f7b4
TL
1644 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1645 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
12e1d472
DC
1646 }
1647
fb8e95a2
WB
1648 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1649 my ($dir, $qmpname) = @$type;
1650 if (my $v = $drive->{"mbps$dir"}) {
1651 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1652 }
1653 if (my $v = $drive->{"mbps${dir}_max"}) {
1654 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1655 }
1656 if (my $v = $drive->{"bps${dir}_max_length"}) {
1657 $opts .= ",throttling.bps$qmpname-max-length=$v";
1658 }
1659 if (my $v = $drive->{"iops${dir}"}) {
1660 $opts .= ",throttling.iops$qmpname=$v";
1661 }
1662 if (my $v = $drive->{"iops${dir}_max"}) {
8aca1654 1663 $opts .= ",throttling.iops$qmpname-max=$v";
fb8e95a2
WB
1664 }
1665 if (my $v = $drive->{"iops${dir}_max_length"}) {
8aca1654 1666 $opts .= ",throttling.iops$qmpname-max-length=$v";
fb8e95a2
WB
1667 }
1668 }
1669
5921764c
SR
1670 if ($pbs_name) {
1671 $format = "rbd" if $is_rbd;
a183df68
TL
1672 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1673 if !$format;
5921764c
SR
1674 $opts .= ",format=alloc-track,file.driver=$format";
1675 } elsif ($format) {
1676 $opts .= ",format=$format";
1677 }
d81f0f09 1678
b7071d6c 1679 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
b2ee900e 1680
b7071d6c 1681 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
b2ee900e 1682
b2ee900e 1683 if (!$drive->{aio}) {
eec9f9fe 1684 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
6d5673c3
SR
1685 # io_uring supports all cache modes
1686 $opts .= ",aio=io_uring";
b2ee900e 1687 } else {
6d5673c3
SR
1688 # aio native works only with O_DIRECT
1689 if($cache_direct) {
1690 $opts .= ",aio=native";
1691 } else {
1692 $opts .= ",aio=threads";
1693 }
b2ee900e
WB
1694 }
1695 }
11490cf2 1696
6e47c3b4
WB
1697 if (!drive_is_cdrom($drive)) {
1698 my $detectzeroes;
7d4e30f3 1699 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
6e47c3b4
WB
1700 $detectzeroes = 'off';
1701 } elsif ($drive->{discard}) {
1702 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1703 } else {
1704 # This used to be our default with discard not being specified:
1705 $detectzeroes = 'on';
1706 }
5921764c
SR
1707
1708 # note: 'detect-zeroes' works per blockdev and we want it to persist
1709 # after the alloc-track is removed, so put it on 'file' directly
1710 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1711 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
6e47c3b4 1712 }
f1e05305 1713
5921764c
SR
1714 if ($pbs_name) {
1715 $opts .= ",backing=$pbs_name";
1716 $opts .= ",auto-remove=on";
1717 }
1718
1719 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1720 my $file_param = "file";
1721 if ($pbs_name) {
1722 # non-rbd drivers require the underlying file to be a seperate block
1723 # node, so add a second .file indirection
1724 $file_param .= ".file" if !$is_rbd;
1725 $file_param .= ".filename";
1726 }
1727 my $pathinfo = $path ? "$file_param=$path," : '';
1e3baf05 1728
3ebfcc86 1729 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1e3baf05
DM
1730}
1731
5921764c
SR
1732sub print_pbs_blockdev {
1733 my ($pbs_conf, $pbs_name) = @_;
1734 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1735 $blockdev .= ",repository=$pbs_conf->{repository}";
2dda626d 1736 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
5921764c
SR
1737 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1738 $blockdev .= ",archive=$pbs_conf->{archive}";
1739 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1740 return $blockdev;
1741}
1742
cc4d6182 1743sub print_netdevice_full {
0c03a390 1744 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
cc4d6182 1745
cc4d6182
DA
1746 my $device = $net->{model};
1747 if ($net->{model} eq 'virtio') {
1748 $device = 'virtio-net-pci';
1749 };
1750
d559309f 1751 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
5e2068d2 1752 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
a9410357 1753 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
4df98f2f
TL
1754 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1755 # and out of each queue plus one config interrupt and control vector queue
a9410357
AD
1756 my $vectors = $net->{queues} * 2 + 2;
1757 $tmpstr .= ",vectors=$vectors,mq=on";
0c03a390
AD
1758 if (min_version($machine_version, 7, 1)) {
1759 $tmpstr .= ",packed=on";
1760 }
a9410357 1761 }
620d6b32
AD
1762
1763 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1764 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1765 }
1766
cc4d6182 1767 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
ba9e1000 1768
0530177b
TL
1769 if (my $mtu = $net->{mtu}) {
1770 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1771 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1772 if ($mtu == 1) {
1773 $mtu = $bridge_mtu;
1774 } elsif ($mtu < 576) {
1775 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1776 } elsif ($mtu > $bridge_mtu) {
1777 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1778 }
1779 $tmpstr .= ",host_mtu=$mtu";
1780 } else {
1781 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
61a14cde 1782 }
61a14cde
AD
1783 }
1784
ba9e1000
DM
1785 if ($use_old_bios_files) {
1786 my $romfile;
1787 if ($device eq 'virtio-net-pci') {
1788 $romfile = 'pxe-virtio.rom';
1789 } elsif ($device eq 'e1000') {
1790 $romfile = 'pxe-e1000.rom';
e83dd50a
TL
1791 } elsif ($device eq 'e1000e') {
1792 $romfile = 'pxe-e1000e.rom';
ba9e1000
DM
1793 } elsif ($device eq 'ne2k') {
1794 $romfile = 'pxe-ne2k_pci.rom';
1795 } elsif ($device eq 'pcnet') {
1796 $romfile = 'pxe-pcnet.rom';
1797 } elsif ($device eq 'rtl8139') {
1798 $romfile = 'pxe-rtl8139.rom';
1799 }
1800 $tmpstr .= ",romfile=$romfile" if $romfile;
1801 }
1802
cc4d6182
DA
1803 return $tmpstr;
1804}
1805
1806sub print_netdev_full {
d559309f 1807 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
cc4d6182
DA
1808
1809 my $i = '';
1810 if ($netid =~ m/^net(\d+)$/) {
1811 $i = int($1);
1812 }
1813
1814 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1815
1816 my $ifname = "tap${vmid}i$i";
1817
1818 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1819 die "interface name '$ifname' is too long (max 15 character)\n"
1820 if length($ifname) >= 16;
1821
1822 my $vhostparam = '';
6f0cb675 1823 if (is_native($arch)) {
db70021b 1824 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
6f0cb675 1825 }
cc4d6182
DA
1826
1827 my $vmname = $conf->{name} || "vm$vmid";
1828
a9410357 1829 my $netdev = "";
208ba94e 1830 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
a9410357 1831
cc4d6182 1832 if ($net->{bridge}) {
4df98f2f
TL
1833 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1834 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
cc4d6182 1835 } else {
a9410357 1836 $netdev = "type=user,id=$netid,hostname=$vmname";
cc4d6182 1837 }
a9410357
AD
1838
1839 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1840
1841 return $netdev;
cc4d6182 1842}
1e3baf05 1843
55655ebc
DC
1844my $vga_map = {
1845 'cirrus' => 'cirrus-vga',
1846 'std' => 'VGA',
1847 'vmware' => 'vmware-svga',
1848 'virtio' => 'virtio-vga',
6f070e39 1849 'virtio-gl' => 'virtio-vga-gl',
55655ebc
DC
1850};
1851
1852sub print_vga_device {
2ea5fb7e 1853 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
55655ebc
DC
1854
1855 my $type = $vga_map->{$vga->{type}};
86c9fafe 1856 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
d559309f
WB
1857 $type = 'virtio-gpu';
1858 }
55655ebc 1859 my $vgamem_mb = $vga->{memory};
6021c7a5
AL
1860
1861 my $max_outputs = '';
55655ebc
DC
1862 if ($qxlnum) {
1863 $type = $id ? 'qxl' : 'qxl-vga';
6021c7a5 1864
c5a4c92c 1865 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
6021c7a5 1866 # set max outputs so linux can have up to 4 qxl displays with one device
2ea5fb7e 1867 if (min_version($machine_version, 4, 1)) {
9e8976ea
TL
1868 $max_outputs = ",max_outputs=4";
1869 }
6021c7a5 1870 }
55655ebc 1871 }
6021c7a5 1872
55655ebc
DC
1873 die "no devicetype for $vga->{type}\n" if !$type;
1874
1875 my $memory = "";
1876 if ($vgamem_mb) {
6f070e39 1877 if ($vga->{type} =~ /^virtio/) {
55655ebc
DC
1878 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1879 $memory = ",max_hostmem=$bytes";
1880 } elsif ($qxlnum) {
1881 # from https://www.spice-space.org/multiple-monitors.html
1882 $memory = ",vgamem_mb=$vga->{memory}";
1883 my $ram = $vgamem_mb * 4;
1884 my $vram = $vgamem_mb * 2;
1885 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1886 } else {
1887 $memory = ",vgamem_mb=$vga->{memory}";
1888 }
1889 } elsif ($qxlnum && $id) {
1890 $memory = ",ram_size=67108864,vram_size=33554432";
1891 }
1892
789fe8e8
AL
1893 my $edidoff = "";
1894 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
b5d32c6b 1895 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
789fe8e8
AL
1896 }
1897
3392d6ca 1898 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
55655ebc
DC
1899 my $vgaid = "vga" . ($id // '');
1900 my $pciaddr;
1901 if ($q35 && $vgaid eq 'vga') {
daadd5a4 1902 # the first display uses pcie.0 bus on q35 machines
d559309f 1903 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
55655ebc 1904 } else {
d559309f 1905 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
55655ebc
DC
1906 }
1907
9f979d9f 1908 if ($vga->{type} eq 'virtio-gl') {
8884a8bf
TL
1909 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1910 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1911 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
9f979d9f 1912
8884a8bf
TL
1913 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1914 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
9f979d9f
SI
1915 }
1916
789fe8e8 1917 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
55655ebc
DC
1918}
1919
ffc0d8c7
WB
1920sub parse_number_sets {
1921 my ($set) = @_;
1922 my $res = [];
1923 foreach my $part (split(/;/, $set)) {
1924 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1925 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1926 push @$res, [ $1, $2 ];
2ed5d572 1927 } else {
ffc0d8c7 1928 die "invalid range: $part\n";
2ed5d572
AD
1929 }
1930 }
ffc0d8c7
WB
1931 return $res;
1932}
2ed5d572 1933
ffc0d8c7
WB
1934sub parse_numa {
1935 my ($data) = @_;
1936
4df98f2f 1937 my $res = parse_property_string($numa_fmt, $data);
ffc0d8c7
WB
1938 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1939 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
2ed5d572
AD
1940 return $res;
1941}
1942
1e3baf05
DM
1943# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1944sub parse_net {
4ddd2ca2 1945 my ($data, $disable_mac_autogen) = @_;
1e3baf05 1946
4df98f2f 1947 my $res = eval { parse_property_string($net_fmt, $data) };
cd9c34d1
WB
1948 if ($@) {
1949 warn $@;
d1c1af4b 1950 return;
1e3baf05 1951 }
4ddd2ca2 1952 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
b5b99790
WB
1953 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1954 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1955 }
0c9a7596
AD
1956 return $res;
1957}
1958
1959# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1960sub parse_ipconfig {
1961 my ($data) = @_;
1962
4df98f2f 1963 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
0c9a7596
AD
1964 if ($@) {
1965 warn $@;
d1c1af4b 1966 return;
0c9a7596
AD
1967 }
1968
1969 if ($res->{gw} && !$res->{ip}) {
1970 warn 'gateway specified without specifying an IP address';
d1c1af4b 1971 return;
0c9a7596
AD
1972 }
1973 if ($res->{gw6} && !$res->{ip6}) {
1974 warn 'IPv6 gateway specified without specifying an IPv6 address';
d1c1af4b 1975 return;
0c9a7596
AD
1976 }
1977 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1978 warn 'gateway specified together with DHCP';
d1c1af4b 1979 return;
0c9a7596
AD
1980 }
1981 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1982 # gw6 + auto/dhcp
1983 warn "IPv6 gateway specified together with $res->{ip6} address";
d1c1af4b 1984 return;
0c9a7596
AD
1985 }
1986
1987 if (!$res->{ip} && !$res->{ip6}) {
1988 return { ip => 'dhcp', ip6 => 'dhcp' };
1989 }
1990
1e3baf05
DM
1991 return $res;
1992}
1993
1994sub print_net {
1995 my $net = shift;
1996
cd9c34d1 1997 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1e3baf05
DM
1998}
1999
2000sub add_random_macs {
2001 my ($settings) = @_;
2002
2003 foreach my $opt (keys %$settings) {
2004 next if $opt !~ m/^net(\d+)$/;
2005 my $net = parse_net($settings->{$opt});
2006 next if !$net;
2007 $settings->{$opt} = print_net($net);
2008 }
2009}
2010
055d554d
DM
2011sub vm_is_volid_owner {
2012 my ($storecfg, $vmid, $volid) = @_;
2013
2014 if ($volid !~ m|^/|) {
2015 my ($path, $owner);
2016 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2017 if ($owner && ($owner == $vmid)) {
2018 return 1;
2019 }
2020 }
2021
d1c1af4b 2022 return;
055d554d
DM
2023}
2024
055d554d
DM
2025sub vmconfig_register_unused_drive {
2026 my ($storecfg, $vmid, $conf, $drive) = @_;
2027
2d9ddec5
WB
2028 if (drive_is_cloudinit($drive)) {
2029 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2030 warn $@ if $@;
95a5135d 2031 delete $conf->{cloudinit};
2d9ddec5 2032 } elsif (!drive_is_cdrom($drive)) {
055d554d
DM
2033 my $volid = $drive->{file};
2034 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
8793d495 2035 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
055d554d
DM
2036 }
2037 }
2038}
2039
1f30ac3a 2040# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
ff6ffe20 2041my $smbios1_fmt = {
bd27e851
WB
2042 uuid => {
2043 type => 'string',
2044 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2045 format_description => 'UUID',
52261945 2046 description => "Set SMBIOS1 UUID.",
bd27e851
WB
2047 optional => 1,
2048 },
2049 version => {
2050 type => 'string',
1f30ac3a
CE
2051 pattern => '[A-Za-z0-9+\/]+={0,2}',
2052 format_description => 'Base64 encoded string',
52261945 2053 description => "Set SMBIOS1 version.",
bd27e851
WB
2054 optional => 1,
2055 },
2056 serial => {
2057 type => 'string',
1f30ac3a
CE
2058 pattern => '[A-Za-z0-9+\/]+={0,2}',
2059 format_description => 'Base64 encoded string',
52261945 2060 description => "Set SMBIOS1 serial number.",
bd27e851
WB
2061 optional => 1,
2062 },
2063 manufacturer => {
2064 type => 'string',
1f30ac3a
CE
2065 pattern => '[A-Za-z0-9+\/]+={0,2}',
2066 format_description => 'Base64 encoded string',
52261945 2067 description => "Set SMBIOS1 manufacturer.",
bd27e851
WB
2068 optional => 1,
2069 },
2070 product => {
2071 type => 'string',
1f30ac3a
CE
2072 pattern => '[A-Za-z0-9+\/]+={0,2}',
2073 format_description => 'Base64 encoded string',
52261945 2074 description => "Set SMBIOS1 product ID.",
bd27e851
WB
2075 optional => 1,
2076 },
2077 sku => {
2078 type => 'string',
1f30ac3a
CE
2079 pattern => '[A-Za-z0-9+\/]+={0,2}',
2080 format_description => 'Base64 encoded string',
52261945 2081 description => "Set SMBIOS1 SKU string.",
bd27e851
WB
2082 optional => 1,
2083 },
2084 family => {
2085 type => 'string',
1f30ac3a
CE
2086 pattern => '[A-Za-z0-9+\/]+={0,2}',
2087 format_description => 'Base64 encoded string',
52261945 2088 description => "Set SMBIOS1 family string.",
bd27e851
WB
2089 optional => 1,
2090 },
1f30ac3a
CE
2091 base64 => {
2092 type => 'boolean',
2093 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2094 optional => 1,
2095 },
2796e7d5
DM
2096};
2097
2796e7d5
DM
2098sub parse_smbios1 {
2099 my ($data) = @_;
2100
4df98f2f 2101 my $res = eval { parse_property_string($smbios1_fmt, $data) };
bd27e851 2102 warn $@ if $@;
2796e7d5
DM
2103 return $res;
2104}
2105
cd11416f
DM
2106sub print_smbios1 {
2107 my ($smbios1) = @_;
ff6ffe20 2108 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
cd11416f
DM
2109}
2110
ff6ffe20 2111PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2796e7d5 2112
0ea9541d
DM
2113sub parse_watchdog {
2114 my ($value) = @_;
2115
d1c1af4b 2116 return if !$value;
0ea9541d 2117
4df98f2f 2118 my $res = eval { parse_property_string($watchdog_fmt, $value) };
ec3582b5 2119 warn $@ if $@;
0ea9541d
DM
2120 return $res;
2121}
2122
9d66b397 2123sub parse_guest_agent {
a2af1bbe 2124 my ($conf) = @_;
9d66b397 2125
a2af1bbe 2126 return {} if !defined($conf->{agent});
9d66b397 2127
a2af1bbe 2128 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
9d66b397
SI
2129 warn $@ if $@;
2130
2131 # if the agent is disabled ignore the other potentially set properties
2132 return {} if !$res->{enabled};
2133 return $res;
2134}
2135
a2af1bbe
TL
2136sub get_qga_key {
2137 my ($conf, $key) = @_;
2138 return undef if !defined($conf->{agent});
2139
2140 my $agent = parse_guest_agent($conf);
2141 return $agent->{$key};
2142}
2143
55655ebc
DC
2144sub parse_vga {
2145 my ($value) = @_;
2146
2147 return {} if !$value;
4df98f2f 2148 my $res = eval { parse_property_string($vga_fmt, $value) };
55655ebc
DC
2149 warn $@ if $@;
2150 return $res;
2151}
2152
2cf61f33
SR
2153sub parse_rng {
2154 my ($value) = @_;
2155
d1c1af4b 2156 return if !$value;
2cf61f33 2157
4df98f2f 2158 my $res = eval { parse_property_string($rng_fmt, $value) };
2cf61f33
SR
2159 warn $@ if $@;
2160 return $res;
2161}
2162
26b443c8
TL
2163sub parse_meta_info {
2164 my ($value) = @_;
2165
2166 return if !$value;
2167
2168 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2169 warn $@ if $@;
2170 return $res;
2171}
2172
2173sub new_meta_info_string {
2174 my () = @_; # for now do not allow to override any value
2175
2176 return PVE::JSONSchema::print_property_string(
2177 {
af2a1a1c 2178 'creation-qemu' => kvm_user_version(),
26b443c8
TL
2179 ctime => "". int(time()),
2180 },
2181 $meta_info_fmt
2182 );
2183}
2184
cc181036
TL
2185sub qemu_created_version_fixups {
2186 my ($conf, $forcemachine, $kvmver) = @_;
2187
2188 my $meta = parse_meta_info($conf->{meta}) // {};
2189 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2190
2191 # check if we need to apply some handling for VMs that always use the latest machine version but
2192 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2193 # would be required (we do not want to pin machine version for non-windows OS type)
2194 if (
2195 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2196 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2197 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2198 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2199 ) {
2200 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2201 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2202 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2203 # and thus with the predictable interface naming of systemd
2204 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2205 }
2206 }
2207 return;
2208}
2209
1e3baf05
DM
2210# add JSON properties for create and set function
2211sub json_config_properties {
c1accf9d 2212 my ($prop, $with_disk_alloc) = @_;
1e3baf05 2213
5a08fb9c
TL
2214 my $skip_json_config_opts = {
2215 parent => 1,
2216 snaptime => 1,
2217 vmstate => 1,
2218 runningmachine => 1,
2219 runningcpu => 1,
26b443c8 2220 meta => 1,
5a08fb9c
TL
2221 };
2222
1e3baf05 2223 foreach my $opt (keys %$confdesc) {
5a08fb9c 2224 next if $skip_json_config_opts->{$opt};
c1accf9d
FE
2225
2226 if ($with_disk_alloc && is_valid_drivename($opt)) {
2227 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2228 } else {
2229 $prop->{$opt} = $confdesc->{$opt};
2230 }
1e3baf05
DM
2231 }
2232
2233 return $prop;
2234}
2235
39051ac0
FE
2236# Properties that we can read from an OVF file
2237sub json_ovf_properties {
2238 my $prop = {};
2239
2240 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2241 $prop->{$device} = {
2242 type => 'string',
2243 format => 'pve-volume-id-or-absolute-path',
2244 description => "Disk image that gets imported to $device",
2245 optional => 1,
2246 };
2247 }
2248
2249 $prop->{cores} = {
2250 type => 'integer',
2251 description => "The number of CPU cores.",
2252 optional => 1,
2253 };
2254 $prop->{memory} = {
2255 type => 'integer',
2256 description => "Amount of RAM for the VM in MB.",
2257 optional => 1,
2258 };
2259 $prop->{name} = {
2260 type => 'string',
2261 description => "Name of the VM.",
2262 optional => 1,
2263 };
2264
2265 return $prop;
2266}
2267
d41121fd
DM
2268# return copy of $confdesc_cloudinit to generate documentation
2269sub cloudinit_config_properties {
2270
2271 return dclone($confdesc_cloudinit);
2272}
2273
f16cf6c3
WB
2274sub cloudinit_pending_properties {
2275 my $p = {
2276 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2277 name => 1,
2278 };
2279 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2280 return $p;
2281}
2282
1e3baf05
DM
2283sub check_type {
2284 my ($key, $value) = @_;
2285
2286 die "unknown setting '$key'\n" if !$confdesc->{$key};
2287
2288 my $type = $confdesc->{$key}->{type};
2289
6b64503e 2290 if (!defined($value)) {
1e3baf05
DM
2291 die "got undefined value\n";
2292 }
2293
2294 if ($value =~ m/[\n\r]/) {
2295 die "property contains a line feed\n";
2296 }
2297
2298 if ($type eq 'boolean') {
19672434
DM
2299 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2300 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2301 die "type check ('boolean') failed - got '$value'\n";
1e3baf05
DM
2302 } elsif ($type eq 'integer') {
2303 return int($1) if $value =~ m/^(\d+)$/;
2304 die "type check ('integer') failed - got '$value'\n";
04432191
AD
2305 } elsif ($type eq 'number') {
2306 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2307 die "type check ('number') failed - got '$value'\n";
1e3baf05
DM
2308 } elsif ($type eq 'string') {
2309 if (my $fmt = $confdesc->{$key}->{format}) {
1e3baf05 2310 PVE::JSONSchema::check_format($fmt, $value);
19672434
DM
2311 return $value;
2312 }
1e3baf05 2313 $value =~ s/^\"(.*)\"$/$1/;
19672434 2314 return $value;
1e3baf05
DM
2315 } else {
2316 die "internal error"
2317 }
2318}
2319
1e3baf05 2320sub destroy_vm {
75854662 2321 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
1e3baf05 2322
ffda963f 2323 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2324
30fdf99c
SH
2325 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2326 PVE::QemuConfig->check_lock($conf);
2327 }
1e3baf05 2328
5e67a2d2
DC
2329 if ($conf->{template}) {
2330 # check if any base image is still used by a linked clone
3ab0f925 2331 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
5e67a2d2 2332 my ($ds, $drive) = @_;
5e67a2d2
DC
2333 return if drive_is_cdrom($drive);
2334
2335 my $volid = $drive->{file};
5e67a2d2
DC
2336 return if !$volid || $volid =~ m|^/|;
2337
2338 die "base volume '$volid' is still in use by linked cloned\n"
2339 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2340
2341 });
2342 }
2343
3e07c6d5 2344 my $volids = {};
ba1a1984 2345 my $remove_owned_drive = sub {
1e3baf05 2346 my ($ds, $drive) = @_;
9c52f5ed 2347 return if drive_is_cdrom($drive, 1);
1e3baf05
DM
2348
2349 my $volid = $drive->{file};
ff1a2432 2350 return if !$volid || $volid =~ m|^/|;
3e07c6d5 2351 return if $volids->{$volid};
1e3baf05 2352
6b64503e 2353 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
ff1a2432 2354 return if !$path || !$owner || ($owner != $vmid);
1e3baf05 2355
3e07c6d5 2356 $volids->{$volid} = 1;
a2f50f01 2357 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
31b52247 2358 warn "Could not remove disk '$volid', check manually: $@" if $@;
ba1a1984
FE
2359 };
2360
2361 # only remove disks owned by this VM (referenced in the config)
2362 my $include_opts = {
2363 include_unused => 1,
2364 extra_keys => ['vmstate'],
2365 };
2366 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2367
2368 for my $snap (values %{$conf->{snapshots}}) {
2369 next if !defined($snap->{vmstate});
2370 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2371 next if !defined($drive);
2372 $remove_owned_drive->('vmstate', $drive);
2373 }
19672434 2374
3e07c6d5
FG
2375 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2376
75854662 2377 if ($purge_unreferenced) { # also remove unreferenced disk
d0ff75d9 2378 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
75854662
TL
2379 PVE::Storage::foreach_volid($vmdisks, sub {
2380 my ($volid, $sid, $volname, $d) = @_;
2381 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2382 warn $@ if $@;
2383 });
2384 }
dfda979e 2385
b04ea584 2386 if (defined $replacement_conf) {
8baf8bc7 2387 PVE::QemuConfig->write_config($vmid, $replacement_conf);
dfda979e
DJ
2388 } else {
2389 PVE::QemuConfig->destroy_config($vmid);
2390 }
1e3baf05
DM
2391}
2392
1e3baf05 2393sub parse_vm_config {
ad5812d8 2394 my ($filename, $raw, $strict) = @_;
1e3baf05 2395
d1c1af4b 2396 return if !defined($raw);
1e3baf05 2397
554ac7e7 2398 my $res = {
fc1ddcdc 2399 digest => Digest::SHA::sha1_hex($raw),
0d18dcfc 2400 snapshots => {},
0d732d16 2401 pending => {},
95a5135d 2402 cloudinit => {},
554ac7e7 2403 };
1e3baf05 2404
ad5812d8
FG
2405 my $handle_error = sub {
2406 my ($msg) = @_;
2407
2408 if ($strict) {
2409 die $msg;
2410 } else {
2411 warn $msg;
2412 }
2413 };
2414
19672434 2415 $filename =~ m|/qemu-server/(\d+)\.conf$|
1e3baf05
DM
2416 || die "got strange filename '$filename'";
2417
2418 my $vmid = $1;
2419
0d18dcfc 2420 my $conf = $res;
b0ec896e 2421 my $descr;
cbfc9d75
TL
2422 my $finish_description = sub {
2423 if (defined($descr)) {
2424 $descr =~ s/\s+$//;
2425 $conf->{description} = $descr;
2426 }
2427 $descr = undef;
2428 };
e297c490 2429 my $section = '';
0581fe4f 2430
0d18dcfc
DM
2431 my @lines = split(/\n/, $raw);
2432 foreach my $line (@lines) {
1e3baf05 2433 next if $line =~ m/^\s*$/;
be190583 2434
eab09f4e 2435 if ($line =~ m/^\[PENDING\]\s*$/i) {
e297c490 2436 $section = 'pending';
cbfc9d75 2437 $finish_description->();
e297c490 2438 $conf = $res->{$section} = {};
eab09f4e 2439 next;
95a5135d
AD
2440 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2441 $section = 'cloudinit';
eb9923f9 2442 $finish_description->();
95a5135d
AD
2443 $conf = $res->{$section} = {};
2444 next;
eab09f4e 2445
0d732d16 2446 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
e297c490 2447 $section = $1;
cbfc9d75 2448 $finish_description->();
e297c490 2449 $conf = $res->{snapshots}->{$section} = {};
0d18dcfc
DM
2450 next;
2451 }
1e3baf05 2452
d1e7b922 2453 if ($line =~ m/^\#(.*)$/) {
b0ec896e 2454 $descr = '' if !defined($descr);
0581fe4f
DM
2455 $descr .= PVE::Tools::decode_text($1) . "\n";
2456 next;
2457 }
2458
1e3baf05 2459 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
b0ec896e 2460 $descr = '' if !defined($descr);
0581fe4f 2461 $descr .= PVE::Tools::decode_text($2);
0d18dcfc
DM
2462 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2463 $conf->{snapstate} = $1;
1e3baf05
DM
2464 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2465 my $key = $1;
2466 my $value = $2;
0d18dcfc 2467 $conf->{$key} = $value;
ef824322 2468 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
e297c490 2469 my $value = $1;
ef824322
DM
2470 if ($section eq 'pending') {
2471 $conf->{delete} = $value; # we parse this later
2472 } else {
ad5812d8 2473 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
eab09f4e 2474 }
15cf7698 2475 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
1e3baf05
DM
2476 my $key = $1;
2477 my $value = $2;
c229961a
TL
2478 if ($section eq 'cloudinit') {
2479 # ignore validation only used for informative purpose
2480 $conf->{$key} = $value;
2481 next;
2482 }
1e3baf05
DM
2483 eval { $value = check_type($key, $value); };
2484 if ($@) {
ad5812d8 2485 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
1e3baf05 2486 } else {
b799312f 2487 $key = 'ide2' if $key eq 'cdrom';
1e3baf05 2488 my $fmt = $confdesc->{$key}->{format};
b799312f 2489 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
1e3baf05
DM
2490 my $v = parse_drive($key, $value);
2491 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2492 $v->{file} = $volid;
71c58bb7 2493 $value = print_drive($v);
1e3baf05 2494 } else {
ad5812d8 2495 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
1e3baf05
DM
2496 next;
2497 }
2498 }
2499
b799312f 2500 $conf->{$key} = $value;
1e3baf05 2501 }
f8d2a1ce 2502 } else {
ad5812d8 2503 $handle_error->("vm $vmid - unable to parse config: $line\n");
1e3baf05
DM
2504 }
2505 }
2506
cbfc9d75 2507 $finish_description->();
0d18dcfc 2508 delete $res->{snapstate}; # just to be sure
1e3baf05
DM
2509
2510 return $res;
2511}
2512
1858638f
DM
2513sub write_vm_config {
2514 my ($filename, $conf) = @_;
1e3baf05 2515
0d18dcfc
DM
2516 delete $conf->{snapstate}; # just to be sure
2517
1858638f
DM
2518 if ($conf->{cdrom}) {
2519 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2520 $conf->{ide2} = $conf->{cdrom};
2521 delete $conf->{cdrom};
2522 }
1e3baf05
DM
2523
2524 # we do not use 'smp' any longer
1858638f
DM
2525 if ($conf->{sockets}) {
2526 delete $conf->{smp};
2527 } elsif ($conf->{smp}) {
2528 $conf->{sockets} = $conf->{smp};
2529 delete $conf->{cores};
2530 delete $conf->{smp};
1e3baf05
DM
2531 }
2532
ee2f90b1 2533 my $used_volids = {};
0d18dcfc 2534
ee2f90b1 2535 my $cleanup_config = sub {
ef824322 2536 my ($cref, $pending, $snapname) = @_;
1858638f 2537
ee2f90b1
DM
2538 foreach my $key (keys %$cref) {
2539 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
95a5135d 2540 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
ee2f90b1 2541 my $value = $cref->{$key};
ef824322
DM
2542 if ($key eq 'delete') {
2543 die "propertry 'delete' is only allowed in [PENDING]\n"
2544 if !$pending;
2545 # fixme: check syntax?
2546 next;
2547 }
ee2f90b1
DM
2548 eval { $value = check_type($key, $value); };
2549 die "unable to parse value of '$key' - $@" if $@;
1858638f 2550
ee2f90b1
DM
2551 $cref->{$key} = $value;
2552
74479ee9 2553 if (!$snapname && is_valid_drivename($key)) {
ed221350 2554 my $drive = parse_drive($key, $value);
ee2f90b1
DM
2555 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2556 }
1e3baf05 2557 }
ee2f90b1
DM
2558 };
2559
2560 &$cleanup_config($conf);
ef824322
DM
2561
2562 &$cleanup_config($conf->{pending}, 1);
2563
ee2f90b1 2564 foreach my $snapname (keys %{$conf->{snapshots}}) {
15c6e277 2565 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
ef824322 2566 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
1e3baf05
DM
2567 }
2568
1858638f
DM
2569 # remove 'unusedX' settings if we re-add a volume
2570 foreach my $key (keys %$conf) {
2571 my $value = $conf->{$key};
ee2f90b1 2572 if ($key =~ m/^unused/ && $used_volids->{$value}) {
1858638f 2573 delete $conf->{$key};
1e3baf05 2574 }
1858638f 2575 }
be190583 2576
0d18dcfc 2577 my $generate_raw_config = sub {
b0ec896e 2578 my ($conf, $pending) = @_;
0581fe4f 2579
0d18dcfc
DM
2580 my $raw = '';
2581
2582 # add description as comment to top of file
b0ec896e
DM
2583 if (defined(my $descr = $conf->{description})) {
2584 if ($descr) {
2585 foreach my $cl (split(/\n/, $descr)) {
2586 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2587 }
2588 } else {
2589 $raw .= "#\n" if $pending;
2590 }
0d18dcfc
DM
2591 }
2592
2593 foreach my $key (sort keys %$conf) {
95a5135d 2594 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
0d18dcfc
DM
2595 $raw .= "$key: $conf->{$key}\n";
2596 }
2597 return $raw;
2598 };
0581fe4f 2599
0d18dcfc 2600 my $raw = &$generate_raw_config($conf);
ef824322
DM
2601
2602 if (scalar(keys %{$conf->{pending}})){
2603 $raw .= "\n[PENDING]\n";
b0ec896e 2604 $raw .= &$generate_raw_config($conf->{pending}, 1);
ef824322
DM
2605 }
2606
1e1d6f58 2607 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
95a5135d
AD
2608 $raw .= "\n[special:cloudinit]\n";
2609 $raw .= &$generate_raw_config($conf->{cloudinit});
2610 }
2611
0d18dcfc
DM
2612 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2613 $raw .= "\n[$snapname]\n";
2614 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
1858638f 2615 }
1e3baf05 2616
1858638f
DM
2617 return $raw;
2618}
1e3baf05 2619
19672434 2620sub load_defaults {
1e3baf05
DM
2621
2622 my $res = {};
2623
2624 # we use static defaults from our JSON schema configuration
2625 foreach my $key (keys %$confdesc) {
2626 if (defined(my $default = $confdesc->{$key}->{default})) {
2627 $res->{$key} = $default;
2628 }
2629 }
19672434 2630
1e3baf05
DM
2631 return $res;
2632}
2633
2634sub config_list {
2635 my $vmlist = PVE::Cluster::get_vmlist();
2636 my $res = {};
2637 return $res if !$vmlist || !$vmlist->{ids};
2638 my $ids = $vmlist->{ids};
38277afc 2639 my $nodename = nodename();
1e3baf05 2640
1e3baf05
DM
2641 foreach my $vmid (keys %$ids) {
2642 my $d = $ids->{$vmid};
2643 next if !$d->{node} || $d->{node} ne $nodename;
5ee957cc 2644 next if !$d->{type} || $d->{type} ne 'qemu';
1e3baf05
DM
2645 $res->{$vmid}->{exists} = 1;
2646 }
2647 return $res;
2648}
2649
64e13401
DM
2650# test if VM uses local resources (to prevent migration)
2651sub check_local_resources {
2652 my ($conf, $noerr) = @_;
2653
ca6abacf 2654 my @loc_res = ();
a52eb3c4
DC
2655 my $mapped_res = [];
2656
2657 my $nodelist = PVE::Cluster::get_nodelist();
2658 my $pci_map = PVE::Mapping::PCI::config();
2659 my $usb_map = PVE::Mapping::USB::config();
2660
2661 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2662
2663 my $add_missing_mapping = sub {
2664 my ($type, $key, $id) = @_;
2665 for my $node (@$nodelist) {
2666 my $entry;
2667 if ($type eq 'pci') {
2668 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2669 } elsif ($type eq 'usb') {
2670 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2671 }
2672 if (!scalar($entry->@*)) {
2673 push @{$missing_mappings_by_node->{$node}}, $key;
2674 }
2675 }
2676 };
19672434 2677
ca6abacf
TM
2678 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2679 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
64e13401 2680
ca6abacf 2681 push @loc_res, "ivshmem" if $conf->{ivshmem};
6dbcb073 2682
0d29ab3b 2683 foreach my $k (keys %$conf) {
a52eb3c4
DC
2684 if ($k =~ m/^usb/) {
2685 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2686 next if $entry->{host} =~ m/^spice$/i;
2687 if ($entry->{mapping}) {
2688 $add_missing_mapping->('usb', $k, $entry->{mapping});
2689 push @$mapped_res, $k;
2690 }
2691 }
2692 if ($k =~ m/^hostpci/) {
2693 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2694 if ($entry->{mapping}) {
2695 $add_missing_mapping->('pci', $k, $entry->{mapping});
2696 push @$mapped_res, $k;
2697 }
2698 }
d44712fc
EK
2699 # sockets are safe: they will recreated be on the target side post-migrate
2700 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
ca6abacf 2701 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
64e13401
DM
2702 }
2703
ca6abacf 2704 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
64e13401 2705
a52eb3c4 2706 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
64e13401
DM
2707}
2708
719893a9 2709# check if used storages are available on all nodes (use by migrate)
47152e2e
DM
2710sub check_storage_availability {
2711 my ($storecfg, $conf, $node) = @_;
2712
912792e2 2713 PVE::QemuConfig->foreach_volume($conf, sub {
47152e2e
DM
2714 my ($ds, $drive) = @_;
2715
2716 my $volid = $drive->{file};
2717 return if !$volid;
2718
2719 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2720 return if !$sid;
2721
2722 # check if storage is available on both nodes
0d2db084
FE
2723 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2724 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
24b84b47 2725
3148f0b0
TL
2726 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2727
2728 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2729 if !$scfg->{content}->{$vtype};
47152e2e
DM
2730 });
2731}
2732
719893a9
DM
2733# list nodes where all VM images are available (used by has_feature API)
2734sub shared_nodes {
2735 my ($conf, $storecfg) = @_;
2736
2737 my $nodelist = PVE::Cluster::get_nodelist();
2738 my $nodehash = { map { $_ => 1 } @$nodelist };
38277afc 2739 my $nodename = nodename();
be190583 2740
912792e2 2741 PVE::QemuConfig->foreach_volume($conf, sub {
719893a9
DM
2742 my ($ds, $drive) = @_;
2743
2744 my $volid = $drive->{file};
2745 return if !$volid;
2746
2747 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2748 if ($storeid) {
2749 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2750 if ($scfg->{disable}) {
2751 $nodehash = {};
2752 } elsif (my $avail = $scfg->{nodes}) {
2753 foreach my $node (keys %$nodehash) {
2754 delete $nodehash->{$node} if !$avail->{$node};
2755 }
2756 } elsif (!$scfg->{shared}) {
2757 foreach my $node (keys %$nodehash) {
2758 delete $nodehash->{$node} if $node ne $nodename
2759 }
2760 }
2761 }
2762 });
2763
2764 return $nodehash
2765}
2766
f25852c2
TM
2767sub check_local_storage_availability {
2768 my ($conf, $storecfg) = @_;
2769
2770 my $nodelist = PVE::Cluster::get_nodelist();
2771 my $nodehash = { map { $_ => {} } @$nodelist };
2772
912792e2 2773 PVE::QemuConfig->foreach_volume($conf, sub {
f25852c2
TM
2774 my ($ds, $drive) = @_;
2775
2776 my $volid = $drive->{file};
2777 return if !$volid;
2778
2779 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2780 if ($storeid) {
2781 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2782
2783 if ($scfg->{disable}) {
2784 foreach my $node (keys %$nodehash) {
32075a2c 2785 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2786 }
2787 } elsif (my $avail = $scfg->{nodes}) {
2788 foreach my $node (keys %$nodehash) {
2789 if (!$avail->{$node}) {
32075a2c 2790 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
f25852c2
TM
2791 }
2792 }
2793 }
2794 }
2795 });
2796
32075a2c
TL
2797 foreach my $node (values %$nodehash) {
2798 if (my $unavail = $node->{unavailable_storages}) {
2799 $node->{unavailable_storages} = [ sort keys %$unavail ];
2800 }
2801 }
2802
f25852c2
TM
2803 return $nodehash
2804}
2805
babf613a 2806# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
1e3baf05 2807sub check_running {
7e8dcf2c 2808 my ($vmid, $nocheck, $node) = @_;
1e3baf05 2809
a20dc58a
FG
2810 # $nocheck is set when called during a migration, in which case the config
2811 # file might still or already reside on the *other* node
2812 # - because rename has already happened, and current node is source
2813 # - because rename hasn't happened yet, and current node is target
2814 # - because rename has happened, current node is target, but hasn't yet
2815 # processed it yet
babf613a
SR
2816 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2817 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
1e3baf05
DM
2818}
2819
2820sub vzlist {
19672434 2821
1e3baf05
DM
2822 my $vzlist = config_list();
2823
d036e418 2824 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
1e3baf05 2825
19672434 2826 while (defined(my $de = $fd->read)) {
1e3baf05
DM
2827 next if $de !~ m/^(\d+)\.pid$/;
2828 my $vmid = $1;
6b64503e
DM
2829 next if !defined($vzlist->{$vmid});
2830 if (my $pid = check_running($vmid)) {
1e3baf05
DM
2831 $vzlist->{$vmid}->{pid} = $pid;
2832 }
2833 }
2834
2835 return $vzlist;
2836}
2837
b1a70cab
DM
2838our $vmstatus_return_properties = {
2839 vmid => get_standard_option('pve-vmid'),
2840 status => {
7bd9abd2 2841 description => "QEMU process status.",
b1a70cab
DM
2842 type => 'string',
2843 enum => ['stopped', 'running'],
2844 },
2845 maxmem => {
2846 description => "Maximum memory in bytes.",
2847 type => 'integer',
2848 optional => 1,
2849 renderer => 'bytes',
2850 },
2851 maxdisk => {
2852 description => "Root disk size in bytes.",
2853 type => 'integer',
2854 optional => 1,
2855 renderer => 'bytes',
2856 },
2857 name => {
2858 description => "VM name.",
2859 type => 'string',
2860 optional => 1,
2861 },
2862 qmpstatus => {
58542139 2863 description => "VM run state from the 'query-status' QMP monitor command.",
b1a70cab
DM
2864 type => 'string',
2865 optional => 1,
2866 },
2867 pid => {
2868 description => "PID of running qemu process.",
2869 type => 'integer',
2870 optional => 1,
2871 },
2872 uptime => {
2873 description => "Uptime.",
2874 type => 'integer',
2875 optional => 1,
2876 renderer => 'duration',
2877 },
2878 cpus => {
2879 description => "Maximum usable CPUs.",
2880 type => 'number',
2881 optional => 1,
2882 },
e6ed61b4 2883 lock => {
11efdfa5 2884 description => "The current config lock, if any.",
e6ed61b4
DC
2885 type => 'string',
2886 optional => 1,
b8e7068a
DC
2887 },
2888 tags => {
2889 description => "The current configured tags, if any",
2890 type => 'string',
2891 optional => 1,
2892 },
949112c3
FE
2893 'running-machine' => {
2894 description => "The currently running machine type (if running).",
2895 type => 'string',
2896 optional => 1,
2897 },
2898 'running-qemu' => {
2899 description => "The currently running QEMU version (if running).",
2900 type => 'string',
2901 optional => 1,
2902 },
b1a70cab
DM
2903};
2904
1e3baf05
DM
2905my $last_proc_pid_stat;
2906
03a33f30
DM
2907# get VM status information
2908# This must be fast and should not block ($full == false)
2909# We only query KVM using QMP if $full == true (this can be slow)
1e3baf05 2910sub vmstatus {
03a33f30 2911 my ($opt_vmid, $full) = @_;
1e3baf05
DM
2912
2913 my $res = {};
2914
19672434 2915 my $storecfg = PVE::Storage::config();
1e3baf05
DM
2916
2917 my $list = vzlist();
3618ee99
EK
2918 my $defaults = load_defaults();
2919
694fcad4 2920 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
1e3baf05 2921
ae4915a2
DM
2922 my $cpucount = $cpuinfo->{cpus} || 1;
2923
1e3baf05
DM
2924 foreach my $vmid (keys %$list) {
2925 next if $opt_vmid && ($vmid ne $opt_vmid);
2926
9f78b695 2927 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 2928
ad2cad72 2929 my $d = { vmid => int($vmid) };
8a0addab 2930 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
1e3baf05
DM
2931
2932 # fixme: better status?
2933 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2934
776c5f50 2935 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
af990afe
DM
2936 if (defined($size)) {
2937 $d->{disk} = 0; # no info available
1e3baf05
DM
2938 $d->{maxdisk} = $size;
2939 } else {
2940 $d->{disk} = 0;
2941 $d->{maxdisk} = 0;
2942 }
2943
3618ee99
EK
2944 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2945 * ($conf->{cores} || $defaults->{cores});
ae4915a2 2946 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
d7c8364b 2947 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
ae4915a2 2948
1e3baf05 2949 $d->{name} = $conf->{name} || "VM $vmid";
3618ee99
EK
2950 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2951 : $defaults->{memory}*(1024*1024);
1e3baf05 2952
8b1accf7 2953 if ($conf->{balloon}) {
4bdb0514 2954 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
3618ee99
EK
2955 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2956 : $defaults->{shares};
8b1accf7
DM
2957 }
2958
1e3baf05
DM
2959 $d->{uptime} = 0;
2960 $d->{cpu} = 0;
1e3baf05
DM
2961 $d->{mem} = 0;
2962
2963 $d->{netout} = 0;
2964 $d->{netin} = 0;
2965
2966 $d->{diskread} = 0;
2967 $d->{diskwrite} = 0;
2968
75a2a423 2969 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
4d8c851b 2970
8107b378 2971 $d->{serial} = 1 if conf_has_serial($conf);
e6ed61b4 2972 $d->{lock} = $conf->{lock} if $conf->{lock};
b8e7068a 2973 $d->{tags} = $conf->{tags} if defined($conf->{tags});
8107b378 2974
1e3baf05
DM
2975 $res->{$vmid} = $d;
2976 }
2977
2978 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2979 foreach my $dev (keys %$netdev) {
2980 next if $dev !~ m/^tap([1-9]\d*)i/;
2981 my $vmid = $1;
2982 my $d = $res->{$vmid};
2983 next if !$d;
19672434 2984
1e3baf05
DM
2985 $d->{netout} += $netdev->{$dev}->{receive};
2986 $d->{netin} += $netdev->{$dev}->{transmit};
604ea644
AD
2987
2988 if ($full) {
ad2cad72
FE
2989 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2990 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
604ea644
AD
2991 }
2992
1e3baf05
DM
2993 }
2994
1e3baf05
DM
2995 my $ctime = gettimeofday;
2996
2997 foreach my $vmid (keys %$list) {
2998
2999 my $d = $res->{$vmid};
3000 my $pid = $d->{pid};
3001 next if !$pid;
3002
694fcad4
DM
3003 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3004 next if !$pstat; # not running
19672434 3005
694fcad4 3006 my $used = $pstat->{utime} + $pstat->{stime};
1e3baf05 3007
694fcad4 3008 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
1e3baf05 3009
694fcad4 3010 if ($pstat->{vsize}) {
6b64503e 3011 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
1e3baf05
DM
3012 }
3013
3014 my $old = $last_proc_pid_stat->{$pid};
3015 if (!$old) {
19672434
DM
3016 $last_proc_pid_stat->{$pid} = {
3017 time => $ctime,
1e3baf05
DM
3018 used => $used,
3019 cpu => 0,
1e3baf05
DM
3020 };
3021 next;
3022 }
3023
7f0b5beb 3024 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
1e3baf05
DM
3025
3026 if ($dtime > 1000) {
3027 my $dutime = $used - $old->{used};
3028
ae4915a2 3029 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
1e3baf05 3030 $last_proc_pid_stat->{$pid} = {
19672434 3031 time => $ctime,
1e3baf05
DM
3032 used => $used,
3033 cpu => $d->{cpu},
1e3baf05
DM
3034 };
3035 } else {
3036 $d->{cpu} = $old->{cpu};
1e3baf05
DM
3037 }
3038 }
3039
f5eb281a 3040 return $res if !$full;
03a33f30
DM
3041
3042 my $qmpclient = PVE::QMPClient->new();
3043
64e7fcf2
DM
3044 my $ballooncb = sub {
3045 my ($vmid, $resp) = @_;
3046
3047 my $info = $resp->{'return'};
38babf81
DM
3048 return if !$info->{max_mem};
3049
64e7fcf2
DM
3050 my $d = $res->{$vmid};
3051
38babf81
DM
3052 # use memory assigned to VM
3053 $d->{maxmem} = $info->{max_mem};
3054 $d->{balloon} = $info->{actual};
3055
3056 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3057 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3058 $d->{freemem} = $info->{free_mem};
64e7fcf2
DM
3059 }
3060
604ea644 3061 $d->{ballooninfo} = $info;
64e7fcf2
DM
3062 };
3063
03a33f30
DM
3064 my $blockstatscb = sub {
3065 my ($vmid, $resp) = @_;
3066 my $data = $resp->{'return'} || [];
3067 my $totalrdbytes = 0;
3068 my $totalwrbytes = 0;
604ea644 3069
03a33f30
DM
3070 for my $blockstat (@$data) {
3071 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3072 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
604ea644
AD
3073
3074 $blockstat->{device} =~ s/drive-//;
3075 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
03a33f30
DM
3076 }
3077 $res->{$vmid}->{diskread} = $totalrdbytes;
3078 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3079 };
3080
949112c3
FE
3081 my $machinecb = sub {
3082 my ($vmid, $resp) = @_;
3083 my $data = $resp->{'return'} || [];
3084
3085 $res->{$vmid}->{'running-machine'} =
3086 PVE::QemuServer::Machine::current_from_query_machines($data);
3087 };
3088
3089 my $versioncb = sub {
3090 my ($vmid, $resp) = @_;
3091 my $data = $resp->{'return'} // {};
3092 my $version = 'unknown';
3093
3094 if (my $v = $data->{qemu}) {
3095 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3096 }
3097
3098 $res->{$vmid}->{'running-qemu'} = $version;
3099 };
3100
03a33f30
DM
3101 my $statuscb = sub {
3102 my ($vmid, $resp) = @_;
64e7fcf2 3103
03a33f30 3104 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
949112c3
FE
3105 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3106 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
64e7fcf2
DM
3107 # this fails if ballon driver is not loaded, so this must be
3108 # the last commnand (following command are aborted if this fails).
38babf81 3109 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
03a33f30
DM
3110
3111 my $status = 'unknown';
3112 if (!defined($status = $resp->{'return'}->{status})) {
3113 warn "unable to get VM status\n";
3114 return;
3115 }
3116
3117 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3118 };
3119
3120 foreach my $vmid (keys %$list) {
3121 next if $opt_vmid && ($vmid ne $opt_vmid);
3122 next if !$res->{$vmid}->{pid}; # not running
3123 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3124 }
3125
b017fbda 3126 $qmpclient->queue_execute(undef, 2);
03a33f30 3127
6891fd70
SR
3128 foreach my $vmid (keys %$list) {
3129 next if $opt_vmid && ($vmid ne $opt_vmid);
e5b18771
FG
3130 next if !$res->{$vmid}->{pid}; #not running
3131
6891fd70
SR
3132 # we can't use the $qmpclient since it might have already aborted on
3133 # 'query-balloon', but this might also fail for older versions...
3134 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3135 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3136 }
3137
03a33f30
DM
3138 foreach my $vmid (keys %$list) {
3139 next if $opt_vmid && ($vmid ne $opt_vmid);
3140 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3141 }
3142
1e3baf05
DM
3143 return $res;
3144}
3145
8107b378
DC
3146sub conf_has_serial {
3147 my ($conf) = @_;
3148
3149 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3150 if ($conf->{"serial$i"}) {
3151 return 1;
3152 }
3153 }
3154
3155 return 0;
3156}
3157
d5535a00
TL
3158sub conf_has_audio {
3159 my ($conf, $id) = @_;
3160
3161 $id //= 0;
3162 my $audio = $conf->{"audio$id"};
d1c1af4b 3163 return if !defined($audio);
d5535a00 3164
4df98f2f 3165 my $audioproperties = parse_property_string($audio_fmt, $audio);
d5535a00
TL
3166 my $audiodriver = $audioproperties->{driver} // 'spice';
3167
3168 return {
3169 dev => $audioproperties->{device},
b0f96836 3170 dev_id => "audiodev$id",
d5535a00
TL
3171 backend => $audiodriver,
3172 backend_id => "$audiodriver-backend${id}",
3173 };
3174}
3175
b01de199 3176sub audio_devs {
1cc5ed1b 3177 my ($audio, $audiopciaddr, $machine_version) = @_;
b01de199
TL
3178
3179 my $devs = [];
3180
3181 my $id = $audio->{dev_id};
1cc5ed1b
AL
3182 my $audiodev = "";
3183 if (min_version($machine_version, 4, 2)) {
3184 $audiodev = ",audiodev=$audio->{backend_id}";
3185 }
b01de199
TL
3186
3187 if ($audio->{dev} eq 'AC97') {
1cc5ed1b 3188 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
b01de199
TL
3189 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3190 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
1cc5ed1b
AL
3191 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3192 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
b01de199
TL
3193 } else {
3194 die "unkown audio device '$audio->{dev}', implement me!";
3195 }
3196
3197 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3198
3199 return $devs;
3200}
3201
f9dde219
SR
3202sub get_tpm_paths {
3203 my ($vmid) = @_;
3204 return {
3205 socket => "/var/run/qemu-server/$vmid.swtpm",
3206 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3207 };
3208}
3209
3210sub add_tpm_device {
3211 my ($vmid, $devices, $conf) = @_;
3212
3213 return if !$conf->{tpmstate0};
3214
3215 my $paths = get_tpm_paths($vmid);
3216
3217 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3218 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3219 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3220}
3221
3222sub start_swtpm {
3223 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3224
3225 return if !$tpmdrive;
3226
3227 my $state;
3228 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3229 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3230 if ($storeid) {
3231 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3232 } else {
3233 $state = $tpm->{file};
3234 }
3235
3236 my $paths = get_tpm_paths($vmid);
3237
3238 # during migration, we will get state from remote
3239 #
3240 if (!$migration) {
3241 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3242 my $setup_cmd = [
3243 "swtpm_setup",
3244 "--tpmstate",
3245 "file://$state",
3246 "--createek",
3247 "--create-ek-cert",
3248 "--create-platform-cert",
3249 "--lock-nvram",
3250 "--config",
3251 "/etc/swtpm_setup.conf", # do not use XDG configs
3252 "--runas",
3253 "0", # force creation as root, error if not possible
3254 "--not-overwrite", # ignore existing state, do not modify
3255 ];
3256
3257 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3258 # TPM 2.0 supports ECC crypto, use if possible
3259 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3260
3261 run_command($setup_cmd, outfunc => sub {
3262 print "swtpm_setup: $1\n";
3263 });
3264 }
3265
72a5a176
FE
3266 # Used to distinguish different invocations in the log.
3267 my $log_prefix = "[id=" . int(time()) . "] ";
3268
f9dde219
SR
3269 my $emulator_cmd = [
3270 "swtpm",
3271 "socket",
3272 "--tpmstate",
3273 "backend-uri=file://$state,mode=0600",
3274 "--ctrl",
3275 "type=unixio,path=$paths->{socket},mode=0600",
3276 "--pid",
3277 "file=$paths->{pid}",
3278 "--terminate", # terminate on QEMU disconnect
3279 "--daemon",
b2e9c4d3 3280 "--log",
72a5a176 3281 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
f9dde219
SR
3282 ];
3283 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3284 run_command($emulator_cmd, outfunc => sub { print $1; });
3285
6bbcd71f 3286 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
f85951dc 3287 while (! -e $paths->{pid}) {
90c41bac 3288 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
6bbcd71f 3289 usleep(50_000);
f85951dc
SR
3290 }
3291
f9dde219
SR
3292 # return untainted PID of swtpm daemon so it can be killed on error
3293 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3294 return $1;
3295}
3296
86b8228b
DM
3297sub vga_conf_has_spice {
3298 my ($vga) = @_;
3299
55655ebc
DC
3300 my $vgaconf = parse_vga($vga);
3301 my $vgatype = $vgaconf->{type};
3302 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
590e698c
DM
3303
3304 return $1 || 1;
86b8228b
DM
3305}
3306
d731ecbe
WB
3307sub is_native($) {
3308 my ($arch) = @_;
3309 return get_host_arch() eq $arch;
3310}
3311
045749f2
TL
3312sub get_vm_arch {
3313 my ($conf) = @_;
3314 return $conf->{arch} // get_host_arch();
3315}
3316
d731ecbe
WB
3317my $default_machines = {
3318 x86_64 => 'pc',
3319 aarch64 => 'virt',
3320};
3321
0761e619
TL
3322sub get_installed_machine_version {
3323 my ($kvmversion) = @_;
3324 $kvmversion = kvm_user_version() if !defined($kvmversion);
3325 $kvmversion =~ m/^(\d+\.\d+)/;
3326 return $1;
3327}
3328
3329sub windows_get_pinned_machine_version {
3330 my ($machine, $base_version, $kvmversion) = @_;
3331
3332 my $pin_version = $base_version;
3333 if (!defined($base_version) ||
3334 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3335 ) {
3336 $pin_version = get_installed_machine_version($kvmversion);
3337 }
3338 if (!$machine || $machine eq 'pc') {
3339 $machine = "pc-i440fx-$pin_version";
3340 } elsif ($machine eq 'q35') {
3341 $machine = "pc-q35-$pin_version";
3342 } elsif ($machine eq 'virt') {
3343 $machine = "virt-$pin_version";
3344 } else {
3345 warn "unknown machine type '$machine', not touching that!\n";
3346 }
3347
3348 return $machine;
3349}
3350
045749f2 3351sub get_vm_machine {
ac0077cc 3352 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
045749f2
TL
3353
3354 my $machine = $forcemachine || $conf->{machine};
d731ecbe 3355
9471e48b 3356 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
4dd1e83c
TL
3357 $kvmversion //= kvm_user_version();
3358 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3359 # layout which confuses windows quite a bit and may result in various regressions..
3360 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3361 if (windows_version($conf->{ostype})) {
0761e619 3362 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
4dd1e83c 3363 }
045749f2
TL
3364 $arch //= 'x86_64';
3365 $machine ||= $default_machines->{$arch};
ac0077cc 3366 if ($add_pve_version) {
ac0077cc
SR
3367 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3368 $machine .= "+pve$pvever";
3369 }
3370 }
3371
d4be7f31
SR
3372 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3373 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3374 $machine = $1 if $is_pxe;
3375
ac0077cc
SR
3376 # for version-pinned machines that do not include a pve-version (e.g.
3377 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3378 $machine .= '+pve0';
d4be7f31
SR
3379
3380 $machine .= '.pxe' if $is_pxe;
045749f2
TL
3381 }
3382
3383 return $machine;
d731ecbe
WB
3384}
3385
90b20b15
DC
3386sub get_ovmf_files($$$) {
3387 my ($arch, $efidisk, $smm) = @_;
96ed3574 3388
b5099b4f 3389 my $types = $OVMF->{$arch}
96ed3574
WB
3390 or die "no OVMF images known for architecture '$arch'\n";
3391
b5099b4f 3392 my $type = 'default';
1183c8f1 3393 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
90b20b15
DC
3394 $type = $smm ? "4m" : "4m-no-smm";
3395 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
b5099b4f
SR
3396 }
3397
f78c9b6b
NU
3398 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3399 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3400 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3401
3402 return ($ovmf_code, $ovmf_vars);
96ed3574
WB
3403}
3404
6908fd9b
WB
3405my $Arch2Qemu = {
3406 aarch64 => '/usr/bin/qemu-system-aarch64',
3407 x86_64 => '/usr/bin/qemu-system-x86_64',
3408};
3409sub get_command_for_arch($) {
3410 my ($arch) = @_;
3411 return '/usr/bin/kvm' if is_native($arch);
3412
3413 my $cmd = $Arch2Qemu->{$arch}
3414 or die "don't know how to emulate architecture '$arch'\n";
3415 return $cmd;
3416}
3417
05a4c550
SR
3418# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3419# to use in a QEMU command line (-cpu element), first array_intersect the result
3420# of query_supported_ with query_understood_. This is necessary because:
3421#
3422# a) query_understood_ returns flags the host cannot use and
3423# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3424# flags, but CPU settings - with most of them being flags. Those settings
3425# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3426#
3427# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3428# expensive. If you need the value returned from this, you can get it much
3429# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3430# $accel being 'kvm' or 'tcg'.
3431#
3432# pvestatd calls this function on startup and whenever the QEMU/KVM version
3433# changes, automatically populating pmxcfs.
3434#
3435# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3436# since kvm and tcg machines support different flags
3437#
3438sub query_supported_cpu_flags {
52cffab6 3439 my ($arch) = @_;
05a4c550 3440
52cffab6
SR
3441 $arch //= get_host_arch();
3442 my $default_machine = $default_machines->{$arch};
3443
3444 my $flags = {};
05a4c550
SR
3445
3446 # FIXME: Once this is merged, the code below should work for ARM as well:
3447 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3448 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3449 $arch eq "aarch64";
3450
3451 my $kvm_supported = defined(kvm_version());
3452 my $qemu_cmd = get_command_for_arch($arch);
3453 my $fakevmid = -1;
3454 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3455
3456 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3457 my $query_supported_run_qemu = sub {
3458 my ($kvm) = @_;
3459
3460 my $flags = {};
3461 my $cmd = [
3462 $qemu_cmd,
3463 '-machine', $default_machine,
3464 '-display', 'none',
378ad769 3465 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
05a4c550
SR
3466 '-mon', 'chardev=qmp,mode=control',
3467 '-pidfile', $pidfile,
3468 '-S', '-daemonize'
3469 ];
3470
3471 if (!$kvm) {
3472 push @$cmd, '-accel', 'tcg';
3473 }
3474
3475 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3476 die "QEMU flag querying VM exited with code " . $rc if $rc;
3477
3478 eval {
3479 my $cmd_result = mon_cmd(
3480 $fakevmid,
3481 'query-cpu-model-expansion',
3482 type => 'full',
3483 model => { name => 'host' }
3484 );
3485
3486 my $props = $cmd_result->{model}->{props};
3487 foreach my $prop (keys %$props) {
3488 next if $props->{$prop} ne '1';
3489 # QEMU returns some flags multiple times, with '_', '.' or '-'
3490 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3491 # We only keep those with underscores, to match /proc/cpuinfo
3492 $prop =~ s/\.|-/_/g;
3493 $flags->{$prop} = 1;
3494 }
3495 };
3496 my $err = $@;
3497
6bbcd71f 3498 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
05a4c550
SR
3499 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3500
3501 die $err if $err;
3502
3503 return [ sort keys %$flags ];
3504 };
3505
3506 # We need to query QEMU twice, since KVM and TCG have different supported flags
3507 PVE::QemuConfig->lock_config($fakevmid, sub {
3508 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3509 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3510
3511 if ($kvm_supported) {
3512 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3513 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3514 }
3515 });
3516
3517 return $flags;
3518}
3519
3520# Understood CPU flags are written to a file at 'pve-qemu' compile time
3521my $understood_cpu_flag_dir = "/usr/share/kvm";
3522sub query_understood_cpu_flags {
3523 my $arch = get_host_arch();
3524 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3525
3526 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3527 if ! -e $filepath;
3528
3529 my $raw = file_get_contents($filepath);
3530 $raw =~ s/^\s+|\s+$//g;
3531 my @flags = split(/\s+/, $raw);
3532
3533 return \@flags;
3534}
3535
e5a6919c
FE
3536# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3537# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3538my sub should_disable_smm {
e4263214
FE
3539 my ($conf, $vga, $machine) = @_;
3540
3541 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
e5a6919c
FE
3542
3543 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
bec87424 3544 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
e5a6919c
FE
3545}
3546
b7d80c79
FE
3547my sub print_ovmf_drive_commandlines {
3548 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3549
3d07669c 3550 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
b7d80c79
FE
3551
3552 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
b7d80c79 3553
3d07669c 3554 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
b7d80c79
FE
3555 if ($d) {
3556 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3d07669c 3557 my ($path, $format) = $d->@{'file', 'format'};
b7d80c79
FE
3558 if ($storeid) {
3559 $path = PVE::Storage::path($storecfg, $d->{file});
3560 if (!defined($format)) {
3561 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3562 $format = qemu_img_format($scfg, $volname);
3563 }
3d07669c
TL
3564 } elsif (!defined($format)) {
3565 die "efidisk format must be specified\n";
3566 }
3567 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3568 if ($path =~ m/^rbd:/) {
3569 $var_drive_str .= ',cache=writeback';
3570 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
b7d80c79 3571 }
3d07669c 3572 $var_drive_str .= ",format=$format,file=$path";
b7d80c79 3573
3d07669c
TL
3574 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3575 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
b7d80c79
FE
3576 } else {
3577 log_warn("no efidisk configured! Using temporary efivars disk.");
3d07669c 3578 my $path = "/tmp/$vmid-ovmf.fd";
b7d80c79 3579 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3d07669c
TL
3580 $var_drive_str .= ",format=raw,file=$path";
3581 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
b7d80c79
FE
3582 }
3583
3d07669c 3584 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
b7d80c79
FE
3585}
3586
1e3baf05 3587sub config_to_command {
5921764c
SR
3588 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3589 $pbs_backing) = @_;
1e3baf05 3590
3326ae19 3591 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
5bdcf937 3592 my $devices = [];
5bdcf937 3593 my $bridges = {};
b42d3cf9 3594 my $ostype = $conf->{ostype};
4317f69f 3595 my $winversion = windows_version($ostype);
d731ecbe 3596 my $kvm = $conf->{kvm};
38277afc 3597 my $nodename = nodename();
d731ecbe 3598
045749f2 3599 my $arch = get_vm_arch($conf);
1476b99f
DC
3600 my $kvm_binary = get_command_for_arch($arch);
3601 my $kvmver = kvm_user_version($kvm_binary);
045749f2 3602
a04dd5c4
SR
3603 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3604 $kvmver //= "undefined";
3605 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3606 }
3607
9471e48b
TL
3608 my $add_pve_version = min_version($kvmver, 4, 1);
3609
3610 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
4df98f2f 3611 my $machine_version = extract_version($machine_type, $kvmver);
d731ecbe 3612 $kvm //= 1 if is_native($arch);
4317f69f 3613
a77a53ae 3614 $machine_version =~ m/(\d+)\.(\d+)/;
ac0077cc 3615 my ($machine_major, $machine_minor) = ($1, $2);
ac0077cc 3616
b516c848
SR
3617 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3618 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3619 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
4df98f2f
TL
3620 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3621 ." please upgrade node '$nodename'\n"
b516c848 3622 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
ac0077cc 3623 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
4df98f2f
TL
3624 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3625 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3626 ." node '$nodename'\n";
ac0077cc
SR
3627 }
3628
3629 # if a specific +pve version is required for a feature, use $version_guard
3630 # instead of min_version to allow machines to be run with the minimum
3631 # required version
3632 my $required_pve_version = 0;
3633 my $version_guard = sub {
3634 my ($major, $minor, $pve) = @_;
3635 return 0 if !min_version($machine_version, $major, $minor, $pve);
47f35977
SR
3636 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3637 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
ac0077cc
SR
3638 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3639 return 1;
3640 };
a77a53ae 3641
4df98f2f
TL
3642 if ($kvm && !defined kvm_version()) {
3643 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3644 ." or enable in BIOS.\n";
d731ecbe 3645 }
bfcd9b7e 3646
3392d6ca 3647 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4d3f29ed 3648 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
249c4a6c
AD
3649 my $use_old_bios_files = undef;
3650 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
db656e5f 3651
74fe3d9a 3652 my $cmd = [];
83870398 3653 if ($conf->{affinity}) {
74fe3d9a 3654 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
83870398
DB
3655 }
3656
1476b99f 3657 push @$cmd, $kvm_binary;
1e3baf05
DM
3658
3659 push @$cmd, '-id', $vmid;
3660
e4d4cda1
HR
3661 my $vmname = $conf->{name} || "vm$vmid";
3662
6884a7d7 3663 push @$cmd, '-name', "$vmname,debug-threads=on";
e4d4cda1 3664
27b25d03
SR
3665 push @$cmd, '-no-shutdown';
3666
1e3baf05
DM
3667 my $use_virtio = 0;
3668
d036e418 3669 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
378ad769 3670 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
c971c4f2
AD
3671 push @$cmd, '-mon', "chardev=qmp,mode=control";
3672
2ea5fb7e 3673 if (min_version($machine_version, 2, 12)) {
b4496b9e 3674 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
71bd73b5
DC
3675 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3676 }
1e3baf05 3677
d036e418 3678 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
19672434 3679
1e3baf05
DM
3680 push @$cmd, '-daemonize';
3681
2796e7d5 3682 if ($conf->{smbios1}) {
1f30ac3a
CE
3683 my $smbios_conf = parse_smbios1($conf->{smbios1});
3684 if ($smbios_conf->{base64}) {
3685 # Do not pass base64 flag to qemu
3686 delete $smbios_conf->{base64};
3687 my $smbios_string = "";
3688 foreach my $key (keys %$smbios_conf) {
3689 my $value;
3690 if ($key eq "uuid") {
3691 $value = $smbios_conf->{uuid}
3692 } else {
3693 $value = decode_base64($smbios_conf->{$key});
3694 }
3695 # qemu accepts any binary data, only commas need escaping by double comma
3696 $value =~ s/,/,,/g;
3697 $smbios_string .= "," . $key . "=" . $value if $value;
3698 }
3699 push @$cmd, '-smbios', "type=1" . $smbios_string;
3700 } else {
3701 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3702 }
2796e7d5
DM
3703 }
3704
3edb45e7 3705 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
b7d80c79
FE
3706 my ($code_drive_str, $var_drive_str) =
3707 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3708 push $cmd->@*, '-drive', $code_drive_str;
3709 push $cmd->@*, '-drive', $var_drive_str;
a783c78e
AD
3710 }
3711
483ceeab 3712 if ($q35) { # tell QEMU to load q35 config early
7583d156 3713 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
2ea5fb7e 3714 if (min_version($machine_version, 4, 0)) {
7583d156
DC
3715 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3716 } else {
3717 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3718 }
3719 }
da8b4189 3720
cc181036
TL
3721 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3722 push @$cmd, $fixups->@*;
3723 }
3724
844d8fa6
DC
3725 if ($conf->{vmgenid}) {
3726 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3727 }
3728
d40e5e18 3729 # add usb controllers
4df98f2f 3730 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
0cf8d56c 3731 $conf, $bridges, $arch, $machine_type, $machine_version);
d40e5e18 3732 push @$devices, @usbcontrollers if @usbcontrollers;
55655ebc 3733 my $vga = parse_vga($conf->{vga});
2fa3151e 3734
55655ebc
DC
3735 my $qxlnum = vga_conf_has_spice($conf->{vga});
3736 $vga->{type} = 'qxl' if $qxlnum;
2fa3151e 3737
55655ebc 3738 if (!$vga->{type}) {
869ad4a7
WB
3739 if ($arch eq 'aarch64') {
3740 $vga->{type} = 'virtio';
2ea5fb7e 3741 } elsif (min_version($machine_version, 2, 9)) {
55655ebc 3742 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3743 } else {
55655ebc 3744 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
a2a5cd64 3745 }
5acbfe9e
DM
3746 }
3747
1e3baf05 3748 # enable absolute mouse coordinates (needed by vnc)
fa3b3ce0
TL
3749 my $tablet = $conf->{tablet};
3750 if (!defined($tablet)) {
5acbfe9e 3751 $tablet = $defaults->{tablet};
590e698c 3752 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
55655ebc 3753 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
5acbfe9e
DM
3754 }
3755
d559309f
WB
3756 if ($tablet) {
3757 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3758 my $kbd = print_keyboarddevice_full($conf, $arch);
3759 push @$devices, '-device', $kbd if defined($kbd);
3760 }
b467f79a 3761
e5d611c3 3762 my $bootorder = device_bootorder($conf);
2141a802 3763
74c17b7a 3764 # host pci device passthrough
9b71c34d
DC
3765 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3766 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
1e3baf05
DM
3767
3768 # usb devices
ae36393d 3769 my $usb_dev_features = {};
2ea5fb7e 3770 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
ae36393d 3771
4df98f2f 3772 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
0cf8d56c 3773 $conf, $usb_dev_features, $bootorder, $machine_version);
d40e5e18 3774 push @$devices, @usbdevices if @usbdevices;
2141a802 3775
1e3baf05 3776 # serial devices
bae179aa 3777 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
fa3b3ce0
TL
3778 my $path = $conf->{"serial$i"} or next;
3779 if ($path eq 'socket') {
3780 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3781 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
7bd9abd2 3782 # On aarch64, serial0 is the UART device. QEMU only allows
fa3b3ce0
TL
3783 # connecting UART devices via the '-serial' command line, as
3784 # the device has a fixed slot on the hardware...
3785 if ($arch eq 'aarch64' && $i == 0) {
3786 push @$devices, '-serial', "chardev:serial$i";
9f9d2fb2 3787 } else {
9f9d2fb2
DM
3788 push @$devices, '-device', "isa-serial,chardev=serial$i";
3789 }
fa3b3ce0
TL
3790 } else {
3791 die "no such serial device\n" if ! -c $path;
e35eb876 3792 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
fa3b3ce0 3793 push @$devices, '-device', "isa-serial,chardev=serial$i";
34978be3 3794 }
1e3baf05
DM
3795 }
3796
3797 # parallel devices
1989a89c 3798 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
34978be3 3799 if (my $path = $conf->{"parallel$i"}) {
19672434 3800 die "no such parallel device\n" if ! -c $path;
e35eb876 3801 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
4c5dbaf6 3802 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
5bdcf937 3803 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
34978be3 3804 }
1e3baf05
DM
3805 }
3806
b01de199 3807 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
2e7b5925 3808 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
1cc5ed1b 3809 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
b01de199 3810 push @$devices, @$audio_devs;
2e7b5925 3811 }
19672434 3812
f9dde219
SR
3813 add_tpm_device($vmid, $devices, $conf);
3814
1e3baf05
DM
3815 my $sockets = 1;
3816 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3817 $sockets = $conf->{sockets} if $conf->{sockets};
3818
3819 my $cores = $conf->{cores} || 1;
3bd18e48 3820
de9d1e55 3821 my $maxcpus = $sockets * $cores;
76267728 3822
de9d1e55 3823 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
76267728 3824
de9d1e55
AD
3825 my $allowed_vcpus = $cpuinfo->{cpus};
3826
483ceeab 3827 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
1e3baf05 3828
483ceeab 3829 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
69c81430
AD
3830 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3831 for (my $i = 2; $i <= $vcpus; $i++) {
3832 my $cpustr = print_cpu_device($conf,$i);
3833 push @$cmd, '-device', $cpustr;
3834 }
3835
3836 } else {
3837
3838 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3839 }
1e3baf05
DM
3840 push @$cmd, '-nodefaults';
3841
dbea4415 3842 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
1e3baf05 3843
0f704640 3844 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
1e3baf05 3845
6b64503e 3846 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
1e3baf05 3847
84902837 3848 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
4df98f2f
TL
3849 push @$devices, '-device', print_vga_device(
3850 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
6f070e39
TL
3851
3852 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3853
d036e418 3854 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
378ad769 3855 push @$cmd, '-vnc', "unix:$socket,password=on";
b7be4ba9 3856 } else {
55655ebc 3857 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
b7be4ba9
AD
3858 push @$cmd, '-nographic';
3859 }
3860
1e3baf05 3861 # time drift fix
6b64503e 3862 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
8c559505 3863 my $useLocaltime = $conf->{localtime};
1e3baf05 3864
4317f69f
AD
3865 if ($winversion >= 5) { # windows
3866 $useLocaltime = 1 if !defined($conf->{localtime});
7a131888 3867
4317f69f
AD
3868 # use time drift fix when acpi is enabled
3869 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3870 $tdf = 1 if !defined($conf->{tdf});
462e8d19 3871 }
4317f69f 3872 }
462e8d19 3873
4317f69f
AD
3874 if ($winversion >= 6) {
3875 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
17bacc21 3876 push @$machineFlags, 'hpet=off';
1e3baf05
DM
3877 }
3878
8c559505
DM
3879 push @$rtcFlags, 'driftfix=slew' if $tdf;
3880
2f6f002c 3881 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
8c559505
DM
3882 push @$rtcFlags, "base=$conf->{startdate}";
3883 } elsif ($useLocaltime) {
3884 push @$rtcFlags, 'base=localtime';
3885 }
1e3baf05 3886
58c64ad5
SR
3887 if ($forcecpu) {
3888 push @$cmd, '-cpu', $forcecpu;
3889 } else {
2f6f002c 3890 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
58c64ad5 3891 }
519ed28c 3892
dafb728c
AD
3893 PVE::QemuServer::Memory::config(
3894 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
370b05e7 3895
1e3baf05
DM
3896 push @$cmd, '-S' if $conf->{freeze};
3897
b20df606 3898 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
1e3baf05 3899
48657158
MD
3900 my $guest_agent = parse_guest_agent($conf);
3901
3902 if ($guest_agent->{enabled}) {
d036e418 3903 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
378ad769 3904 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
48657158 3905
60f03a11 3906 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
48657158
MD
3907 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3908 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3909 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3910 } elsif ($guest_agent->{type} eq 'isa') {
3911 push @$devices, '-device', "isa-serial,chardev=qga0";
3912 }
ab6a046f
AD
3913 }
3914
e5d611c3
TL
3915 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3916 if ($rng && $version_guard->(4, 1, 2)) {
05853188
SR
3917 check_rng_source($rng->{source});
3918
2cf61f33
SR
3919 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3920 my $period = $rng->{period} // $rng_fmt->{period}->{default};
2cf61f33
SR
3921 my $limiter_str = "";
3922 if ($max_bytes) {
3923 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3924 }
3925
2cf61f33 3926 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
2cf61f33
SR
3927 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3928 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3929 }
3930
1d794448 3931 my $spice_port;
2fa3151e 3932
f8ea1b30 3933 if ($qxlnum || $vga->{type} =~ /^virtio/) {
590e698c 3934 if ($qxlnum > 1) {
ac087616 3935 if ($winversion){
2f6f002c 3936 for (my $i = 1; $i < $qxlnum; $i++){
4df98f2f
TL
3937 push @$devices, '-device', print_vga_device(
3938 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
590e698c
DM
3939 }
3940 } else {
3941 # assume other OS works like Linux
55655ebc
DC
3942 my ($ram, $vram) = ("134217728", "67108864");
3943 if ($vga->{memory}) {
3944 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3945 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3946 }
3947 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3948 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
2fa3151e
AD
3949 }
3950 }
3951
d559309f 3952 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
95a4b4a9 3953
af0eba7e 3954 my $pfamily = PVE::Tools::get_host_address_family($nodename);
91152441
WB
3955 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3956 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
4d316a63
AL
3957
3958 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3959 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3960 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3961
91152441
WB
3962 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3963 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
943340a6 3964
4df98f2f
TL
3965 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3966 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
caab114a
TL
3967 if ($spice_enhancement->{foldersharing}) {
3968 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3969 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3970 }
c4df18db 3971
caab114a 3972 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
4df98f2f
TL
3973 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3974 if $spice_enhancement->{videostreaming};
3975
caab114a 3976 push @$devices, '-spice', "$spice_opts";
1011b570
DM
3977 }
3978
8d9ae0d2
DM
3979 # enable balloon by default, unless explicitly disabled
3980 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3326ae19 3981 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
c70e4ec3
AD
3982 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3983 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3984 push @$devices, '-device', $ballooncmd;
8d9ae0d2 3985 }
1e3baf05 3986
0ea9541d
DM
3987 if ($conf->{watchdog}) {
3988 my $wdopts = parse_watchdog($conf->{watchdog});
3326ae19 3989 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
0a40e8ea 3990 my $watchdog = $wdopts->{model} || 'i6300esb';
5bdcf937
AD
3991 push @$devices, '-device', "$watchdog$pciaddr";
3992 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
0ea9541d
DM
3993 }
3994
1e3baf05 3995 my $vollist = [];
941e0c42 3996 my $scsicontroller = {};
26ee04b6 3997 my $ahcicontroller = {};
cdd20088 3998 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
1e3baf05 3999
5881b913
DM
4000 # Add iscsi initiator name if available
4001 if (my $initiator = get_initiator_name()) {
4002 push @$devices, '-iscsi', "initiator-name=$initiator";
4003 }
4004
912792e2 4005 PVE::QemuConfig->foreach_volume($conf, sub {
1e3baf05
DM
4006 my ($ds, $drive) = @_;
4007
ff1a2432 4008 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3f11f0d7 4009 check_volume_storage_type($storecfg, $drive->{file});
1e3baf05 4010 push @$vollist, $drive->{file};
ff1a2432 4011 }
afdb31d5 4012
4dcce9ee
TL
4013 # ignore efidisk here, already added in bios/fw handling code above
4014 return if $drive->{interface} eq 'efidisk';
f9dde219
SR
4015 # similar for TPM
4016 return if $drive->{interface} eq 'tpmstate';
4dcce9ee 4017
1e3baf05 4018 $use_virtio = 1 if $ds =~ m/^virtio/;
3b408e82 4019
2141a802 4020 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3b408e82 4021
2f6f002c 4022 if ($drive->{interface} eq 'virtio'){
51f492cd
AD
4023 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4024 }
4025
2f6f002c 4026 if ($drive->{interface} eq 'scsi') {
cdd20088 4027
ee034f5c 4028 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
6731a4cf 4029
b8fb1c03
SR
4030 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4031 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4032
3326ae19 4033 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
a1b7d579 4034 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
fc8b40fd
AD
4035
4036 my $iothread = '';
4037 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4038 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4039 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
e7a5104d 4040 } elsif ($drive->{iothread}) {
d80ad18c
MH
4041 log_warn(
4042 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4043 );
fc8b40fd
AD
4044 }
4045
6e11f143
AD
4046 my $queues = '';
4047 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4048 $queues = ",num_queues=$drive->{queues}";
370b05e7 4049 }
6e11f143 4050
4df98f2f
TL
4051 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4052 if !$scsicontroller->{$controller};
cdd20088 4053 $scsicontroller->{$controller}=1;
2f6f002c 4054 }
3b408e82 4055
26ee04b6 4056 if ($drive->{interface} eq 'sata') {
2f6f002c 4057 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3326ae19 4058 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4df98f2f
TL
4059 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4060 if !$ahcicontroller->{$controller};
2f6f002c 4061 $ahcicontroller->{$controller}=1;
26ee04b6 4062 }
46f58b5f 4063
5921764c
SR
4064 my $pbs_conf = $pbs_backing->{$ds};
4065 my $pbs_name = undef;
4066 if ($pbs_conf) {
4067 $pbs_name = "drive-$ds-pbs";
4068 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4069 }
4070
6d5673c3
SR
4071 my $drive_cmd = print_drive_commandline_full(
4072 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
3dc33a72
FG
4073
4074 # extra protection for templates, but SATA and IDE don't support it..
75748d44 4075 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4ef13a7f 4076
15b21acc 4077 push @$devices, '-drive',$drive_cmd;
4df98f2f
TL
4078 push @$devices, '-device', print_drivedevice_full(
4079 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
1e3baf05
DM
4080 });
4081
cc4d6182 4082 for (my $i = 0; $i < $MAX_NETS; $i++) {
2141a802
SR
4083 my $netname = "net$i";
4084
4085 next if !$conf->{$netname};
4086 my $d = parse_net($conf->{$netname});
d0a86b24 4087 next if !$d;
4ddd2ca2 4088 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
1e3baf05 4089
d0a86b24 4090 $use_virtio = 1 if $d->{model} eq 'virtio';
1e3baf05 4091
2141a802 4092 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
1e3baf05 4093
2141a802 4094 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
d0a86b24 4095 push @$devices, '-netdev', $netdevfull;
5bdcf937 4096
d0a86b24 4097 my $netdevicefull = print_netdevice_full(
0c03a390 4098 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4df98f2f 4099
d0a86b24 4100 push @$devices, '-device', $netdevicefull;
5bdcf937 4101 }
1e3baf05 4102
6dbcb073 4103 if ($conf->{ivshmem}) {
4df98f2f 4104 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
e3c27a6a 4105
6dbcb073
DC
4106 my $bus;
4107 if ($q35) {
4108 $bus = print_pcie_addr("ivshmem");
4109 } else {
4110 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4111 }
e3c27a6a
TL
4112
4113 my $ivshmem_name = $ivshmem->{name} // $vmid;
4114 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4115
6dbcb073 4116 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4df98f2f
TL
4117 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4118 .",size=$ivshmem->{size}M";
6dbcb073
DC
4119 }
4120
2513b862
DC
4121 # pci.4 is nested in pci.1
4122 $bridges->{1} = 1 if $bridges->{4};
4123
3326ae19
TL
4124 if (!$q35) { # add pci bridges
4125 if (min_version($machine_version, 2, 3)) {
fc79e813
AD
4126 $bridges->{1} = 1;
4127 $bridges->{2} = 1;
4128 }
6731a4cf 4129 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
2513b862
DC
4130 }
4131
4132 for my $k (sort {$b cmp $a} keys %$bridges) {
4133 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
13d68979
SR
4134
4135 my $k_name = $k;
4136 if ($k == 2 && $legacy_igd) {
4137 $k_name = "$k-igd";
4138 }
3326ae19 4139 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
2513b862 4140 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
3326ae19
TL
4141
4142 if ($q35) { # add after -readconfig pve-q35.cfg
2513b862
DC
4143 splice @$devices, 2, 0, '-device', $devstr;
4144 } else {
4145 unshift @$devices, '-device', $devstr if $k > 0;
f8e83f05 4146 }
19672434
DM
4147 }
4148
ac0077cc
SR
4149 if (!$kvm) {
4150 push @$machineFlags, 'accel=tcg';
4151 }
4152
e4263214 4153 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
e5a6919c 4154
ac0077cc
SR
4155 my $machine_type_min = $machine_type;
4156 if ($add_pve_version) {
4157 $machine_type_min =~ s/\+pve\d+$//;
4158 $machine_type_min .= "+pve$required_pve_version";
4159 }
4160 push @$machineFlags, "type=${machine_type_min}";
4161
5bdcf937 4162 push @$cmd, @$devices;
2f6f002c
TL
4163 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4164 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4165 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
8c559505 4166
7ceade4c
DC
4167 if (my $vmstate = $conf->{vmstate}) {
4168 my $statepath = PVE::Storage::path($storecfg, $vmstate);
24d1f93a 4169 push @$vollist, $vmstate;
7ceade4c 4170 push @$cmd, '-loadstate', $statepath;
b85666cf 4171 print "activating and using '$vmstate' as vmstate\n";
7ceade4c
DC
4172 }
4173
85fcf79e
FG
4174 if (PVE::QemuConfig->is_template($conf)) {
4175 # needed to workaround base volumes being read-only
4176 push @$cmd, '-snapshot';
4177 }
4178
76350670
DC
4179 # add custom args
4180 if ($conf->{args}) {
4181 my $aa = PVE::Tools::split_args($conf->{args});
4182 push @$cmd, @$aa;
4183 }
4184
9b71c34d 4185 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
1e3baf05 4186}
19672434 4187
05853188
SR
4188sub check_rng_source {
4189 my ($source) = @_;
4190
4191 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4192 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4193 if ! -e $source;
4194
4195 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4196 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4df98f2f
TL
4197 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4198 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4199 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4200 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4201 ." to the host.\n";
05853188
SR
4202 }
4203}
4204
943340a6 4205sub spice_port {
1011b570 4206 my ($vmid) = @_;
943340a6 4207
0a13e08e 4208 my $res = mon_cmd($vmid, 'query-spice');
943340a6
DM
4209
4210 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
1011b570
DM
4211}
4212
86fdcfb2
DA
4213sub vm_devices_list {
4214 my ($vmid) = @_;
4215
0a13e08e 4216 my $res = mon_cmd($vmid, 'query-pci');
f721624b 4217 my $devices_to_check = [];
ceea9078
DM
4218 my $devices = {};
4219 foreach my $pcibus (@$res) {
f721624b
DC
4220 push @$devices_to_check, @{$pcibus->{devices}},
4221 }
4222
4223 while (@$devices_to_check) {
4224 my $to_check = [];
4225 for my $d (@$devices_to_check) {
4226 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
b3a3e929 4227 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
f721624b
DC
4228
4229 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4230 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
f78cc802 4231 }
f721624b 4232 $devices_to_check = $to_check;
f78cc802
AD
4233 }
4234
0a13e08e 4235 my $resblock = mon_cmd($vmid, 'query-block');
f78cc802
AD
4236 foreach my $block (@$resblock) {
4237 if($block->{device} =~ m/^drive-(\S+)/){
4238 $devices->{$1} = 1;
1dc4f496
DM
4239 }
4240 }
86fdcfb2 4241
0a13e08e 4242 my $resmice = mon_cmd($vmid, 'query-mice');
3d7389fe
DM
4243 foreach my $mice (@$resmice) {
4244 if ($mice->{name} eq 'QEMU HID Tablet') {
4245 $devices->{tablet} = 1;
4246 last;
4247 }
4248 }
4249
deb091c5
DC
4250 # for usb devices there is no query-usb
4251 # but we can iterate over the entries in
4252 # qom-list path=/machine/peripheral
0a13e08e 4253 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
deb091c5 4254 foreach my $per (@$resperipheral) {
c60cad61 4255 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
deb091c5
DC
4256 $devices->{$per->{name}} = 1;
4257 }
4258 }
4259
1dc4f496 4260 return $devices;
86fdcfb2
DA
4261}
4262
ec21aa11 4263sub vm_deviceplug {
d559309f 4264 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
ae57f6b3 4265
3392d6ca 4266 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
db656e5f 4267
95d6343b
DA
4268 my $devices_list = vm_devices_list($vmid);
4269 return 1 if defined($devices_list->{$deviceid});
4270
4df98f2f
TL
4271 # add PCI bridge if we need it for the device
4272 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
fee46675 4273
3d7389fe 4274 if ($deviceid eq 'tablet') {
d559309f 4275 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
d559309f 4276 } elsif ($deviceid eq 'keyboard') {
d559309f 4277 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
c60cad61
DC
4278 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4279 my $id = $1;
4280 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4281 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4eb68604 4282 } elsif ($deviceid =~ m/^usb(\d+)$/) {
c60cad61 4283 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
fee46675 4284 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
22de899a
AD
4285 qemu_iothread_add($vmid, $deviceid, $device);
4286
3326ae19
TL
4287 qemu_driveadd($storecfg, $vmid, $device);
4288 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675 4289
3326ae19 4290 qemu_deviceadd($vmid, $devicefull);
fee46675
DM
4291 eval { qemu_deviceaddverify($vmid, $deviceid); };
4292 if (my $err = $@) {
63c2da2f
DM
4293 eval { qemu_drivedel($vmid, $deviceid); };
4294 warn $@ if $@;
fee46675 4295 die $err;
5e5dcb73 4296 }
2733141c 4297 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
3326ae19
TL
4298 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4299 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
a1b7d579 4300 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
2733141c 4301
3326ae19 4302 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
fee46675 4303
fc8b40fd
AD
4304 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4305 qemu_iothread_add($vmid, $deviceid, $device);
4306 $devicefull .= ",iothread=iothread-$deviceid";
4307 }
4308
6e11f143
AD
4309 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4310 $devicefull .= ",num_queues=$device->{queues}";
4311 }
4312
3326ae19
TL
4313 qemu_deviceadd($vmid, $devicefull);
4314 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4315 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
d559309f 4316 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
fee46675 4317 qemu_driveadd($storecfg, $vmid, $device);
a1b7d579 4318
acfc6ef8 4319 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
fee46675
DM
4320 eval { qemu_deviceadd($vmid, $devicefull); };
4321 if (my $err = $@) {
63c2da2f
DM
4322 eval { qemu_drivedel($vmid, $deviceid); };
4323 warn $@ if $@;
fee46675 4324 die $err;
a4f091a0 4325 }
fee46675 4326 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
d1c1af4b 4327 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
8718099c 4328
3392d6ca 4329 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
0c03a390 4330 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
95d3be58
DC
4331 my $use_old_bios_files = undef;
4332 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
8718099c 4333
4df98f2f 4334 my $netdevicefull = print_netdevice_full(
0c03a390 4335 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
95d3be58 4336 qemu_deviceadd($vmid, $netdevicefull);
79046fd1
DC
4337 eval {
4338 qemu_deviceaddverify($vmid, $deviceid);
4339 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4340 };
fee46675
DM
4341 if (my $err = $@) {
4342 eval { qemu_netdevdel($vmid, $deviceid); };
4343 warn $@ if $@;
4344 die $err;
95d3be58 4345 }
fee46675 4346 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
40f28a9f 4347 my $bridgeid = $2;
d559309f 4348 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
40f28a9f 4349 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
a1b7d579 4350
40f28a9f 4351 qemu_deviceadd($vmid, $devicefull);
fee46675 4352 qemu_deviceaddverify($vmid, $deviceid);
fee46675 4353 } else {
a1b7d579 4354 die "can't hotplug device '$deviceid'\n";
40f28a9f
AD
4355 }
4356
5e5dcb73 4357 return 1;
a4dea331
DA
4358}
4359
3eec5767 4360# fixme: this should raise exceptions on error!
ec21aa11 4361sub vm_deviceunplug {
f19d1c47 4362 my ($vmid, $conf, $deviceid) = @_;
873c2d69 4363
95d6343b
DA
4364 my $devices_list = vm_devices_list($vmid);
4365 return 1 if !defined($devices_list->{$deviceid});
4366
2141a802
SR
4367 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4368 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
63c2da2f 4369
c60cad61 4370 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
3d7389fe 4371 qemu_devicedel($vmid, $deviceid);
c60cad61
DC
4372 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4373 qemu_devicedel($vmid, $deviceid);
4374 qemu_devicedelverify($vmid, $deviceid);
4eb68604 4375 } elsif ($deviceid =~ m/^usb\d+$/) {
c60cad61
DC
4376 qemu_devicedel($vmid, $deviceid);
4377 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4378 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
9a66c311 4379 my $device = parse_drive($deviceid, $conf->{$deviceid});
f19d1c47 4380
a8d0fec3
TL
4381 qemu_devicedel($vmid, $deviceid);
4382 qemu_devicedelverify($vmid, $deviceid);
4383 qemu_drivedel($vmid, $deviceid);
9a66c311 4384 qemu_iothread_del($vmid, $deviceid, $device);
2733141c 4385 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
63c2da2f 4386 qemu_devicedel($vmid, $deviceid);
8ce30dde 4387 qemu_devicedelverify($vmid, $deviceid);
63c2da2f 4388 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
9a66c311 4389 my $device = parse_drive($deviceid, $conf->{$deviceid});
cfc817c7 4390
a8d0fec3 4391 qemu_devicedel($vmid, $deviceid);
52b361af 4392 qemu_devicedelverify($vmid, $deviceid);
a8d0fec3 4393 qemu_drivedel($vmid, $deviceid);
a1b7d579 4394 qemu_deletescsihw($conf, $vmid, $deviceid);
8ce30dde 4395
9a66c311
FG
4396 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4397 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
63c2da2f 4398 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
a8d0fec3
TL
4399 qemu_devicedel($vmid, $deviceid);
4400 qemu_devicedelverify($vmid, $deviceid);
4401 qemu_netdevdel($vmid, $deviceid);
63c2da2f
DM
4402 } else {
4403 die "can't unplug device '$deviceid'\n";
2630d2a9
DA
4404 }
4405
5e5dcb73
DA
4406 return 1;
4407}
4408
c60cad61
DC
4409sub qemu_spice_usbredir_chardev_add {
4410 my ($vmid, $id) = @_;
4411
4412 mon_cmd($vmid, "chardev-add" , (
4413 id => $id,
4414 backend => {
4415 type => 'spicevmc',
4416 data => {
4417 type => "usbredir",
4418 },
4419 },
4420 ));
4421}
4422
5e5dcb73
DA
4423sub qemu_deviceadd {
4424 my ($vmid, $devicefull) = @_;
873c2d69 4425
d695b5b7
AD
4426 $devicefull = "driver=".$devicefull;
4427 my %options = split(/[=,]/, $devicefull);
f19d1c47 4428
0a13e08e 4429 mon_cmd($vmid, "device_add" , %options);
5e5dcb73 4430}
afdb31d5 4431
5e5dcb73 4432sub qemu_devicedel {
fee46675 4433 my ($vmid, $deviceid) = @_;
63c2da2f 4434
0a13e08e 4435 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
5e5dcb73
DA
4436}
4437
22de899a 4438sub qemu_iothread_add {
a8d0fec3 4439 my ($vmid, $deviceid, $device) = @_;
22de899a
AD
4440
4441 if ($device->{iothread}) {
4442 my $iothreads = vm_iothreads_list($vmid);
4443 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4444 }
4445}
4446
4447sub qemu_iothread_del {
a8d0fec3 4448 my ($vmid, $deviceid, $device) = @_;
22de899a 4449
22de899a
AD
4450 if ($device->{iothread}) {
4451 my $iothreads = vm_iothreads_list($vmid);
4452 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4453 }
4454}
4455
4d3f29ed 4456sub qemu_objectadd {
a8d0fec3 4457 my ($vmid, $objectid, $qomtype) = @_;
4d3f29ed 4458
0a13e08e 4459 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4d3f29ed
AD
4460
4461 return 1;
4462}
4463
4464sub qemu_objectdel {
a8d0fec3 4465 my ($vmid, $objectid) = @_;
4d3f29ed 4466
0a13e08e 4467 mon_cmd($vmid, "object-del", id => $objectid);
4d3f29ed
AD
4468
4469 return 1;
4470}
4471
5e5dcb73 4472sub qemu_driveadd {
fee46675 4473 my ($storecfg, $vmid, $device) = @_;
5e5dcb73 4474
6d5673c3
SR
4475 my $kvmver = get_running_qemu_version($vmid);
4476 my $io_uring = min_version($kvmver, 6, 0);
4477 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
7a69fc3c 4478 $drive =~ s/\\/\\\\/g;
0a13e08e 4479 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
fee46675 4480
5e5dcb73 4481 # If the command succeeds qemu prints: "OK"
fee46675
DM
4482 return 1 if $ret =~ m/OK/s;
4483
4484 die "adding drive failed: $ret\n";
5e5dcb73 4485}
afdb31d5 4486
5e5dcb73 4487sub qemu_drivedel {
a8d0fec3 4488 my ($vmid, $deviceid) = @_;
873c2d69 4489
0a13e08e 4490 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
5e5dcb73 4491 $ret =~ s/^\s+//;
a1b7d579 4492
63c2da2f 4493 return 1 if $ret eq "";
a1b7d579 4494
63c2da2f 4495 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
a1b7d579
DM
4496 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4497
63c2da2f 4498 die "deleting drive $deviceid failed : $ret\n";
5e5dcb73 4499}
f19d1c47 4500
5e5dcb73 4501sub qemu_deviceaddverify {
fee46675 4502 my ($vmid, $deviceid) = @_;
873c2d69 4503
5e5dcb73
DA
4504 for (my $i = 0; $i <= 5; $i++) {
4505 my $devices_list = vm_devices_list($vmid);
4506 return 1 if defined($devices_list->{$deviceid});
4507 sleep 1;
afdb31d5 4508 }
fee46675
DM
4509
4510 die "error on hotplug device '$deviceid'\n";
5e5dcb73 4511}
afdb31d5 4512
5e5dcb73
DA
4513
4514sub qemu_devicedelverify {
63c2da2f
DM
4515 my ($vmid, $deviceid) = @_;
4516
a1b7d579 4517 # need to verify that the device is correctly removed as device_del
63c2da2f 4518 # is async and empty return is not reliable
5e5dcb73 4519
5e5dcb73
DA
4520 for (my $i = 0; $i <= 5; $i++) {
4521 my $devices_list = vm_devices_list($vmid);
4522 return 1 if !defined($devices_list->{$deviceid});
4523 sleep 1;
afdb31d5 4524 }
63c2da2f
DM
4525
4526 die "error on hot-unplugging device '$deviceid'\n";
873c2d69
DA
4527}
4528
cdd20088 4529sub qemu_findorcreatescsihw {
d559309f 4530 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
cfc817c7 4531
ee034f5c 4532 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
2733141c
AD
4533
4534 my $scsihwid="$controller_prefix$controller";
cfc817c7
DA
4535 my $devices_list = vm_devices_list($vmid);
4536
a8d0fec3 4537 if (!defined($devices_list->{$scsihwid})) {
d559309f 4538 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
cfc817c7 4539 }
fee46675 4540
cfc817c7
DA
4541 return 1;
4542}
4543
8ce30dde
AD
4544sub qemu_deletescsihw {
4545 my ($conf, $vmid, $opt) = @_;
4546
4547 my $device = parse_drive($opt, $conf->{$opt});
4548
a1511b3c 4549 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
2733141c
AD
4550 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4551 return 1;
4552 }
4553
ee034f5c 4554 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
8ce30dde
AD
4555
4556 my $devices_list = vm_devices_list($vmid);
4557 foreach my $opt (keys %{$devices_list}) {
e0fd2b2f
FE
4558 if (is_valid_drivename($opt)) {
4559 my $drive = parse_drive($opt, $conf->{$opt});
a8d0fec3 4560 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
8ce30dde
AD
4561 return 1;
4562 }
4563 }
4564 }
4565
4566 my $scsihwid="scsihw$controller";
4567
4568 vm_deviceunplug($vmid, $conf, $scsihwid);
4569
4570 return 1;
4571}
4572
281fedb3 4573sub qemu_add_pci_bridge {
d559309f 4574 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
40f28a9f
AD
4575
4576 my $bridges = {};
281fedb3
DM
4577
4578 my $bridgeid;
4579
d559309f 4580 print_pci_addr($device, $bridges, $arch, $machine_type);
40f28a9f
AD
4581
4582 while (my ($k, $v) = each %$bridges) {
4583 $bridgeid = $k;
4584 }
fee46675 4585 return 1 if !defined($bridgeid) || $bridgeid < 1;
281fedb3 4586
40f28a9f
AD
4587 my $bridge = "pci.$bridgeid";
4588 my $devices_list = vm_devices_list($vmid);
4589
281fedb3 4590 if (!defined($devices_list->{$bridge})) {
d559309f 4591 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
40f28a9f 4592 }
281fedb3 4593
40f28a9f
AD
4594 return 1;
4595}
4596
25088687
DM
4597sub qemu_set_link_status {
4598 my ($vmid, $device, $up) = @_;
4599
0a13e08e 4600 mon_cmd($vmid, "set_link", name => $device,
25088687
DM
4601 up => $up ? JSON::true : JSON::false);
4602}
4603
2630d2a9 4604sub qemu_netdevadd {
d559309f 4605 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
2630d2a9 4606
d559309f 4607 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
73aa03b8 4608 my %options = split(/[=,]/, $netdev);
2630d2a9 4609
bf5aef9b
DC
4610 if (defined(my $vhost = $options{vhost})) {
4611 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4612 }
4613
4614 if (defined(my $queues = $options{queues})) {
4615 $options{queues} = $queues + 0;
4616 }
4617
0a13e08e 4618 mon_cmd($vmid, "netdev_add", %options);
73aa03b8 4619 return 1;
2630d2a9
DA
4620}
4621
4622sub qemu_netdevdel {
4623 my ($vmid, $deviceid) = @_;
4624
0a13e08e 4625 mon_cmd($vmid, "netdev_del", id => $deviceid);
2630d2a9
DA
4626}
4627
16521d63 4628sub qemu_usb_hotplug {
d559309f 4629 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
16521d63
DC
4630
4631 return if !$device;
4632
4633 # remove the old one first
4634 vm_deviceunplug($vmid, $conf, $deviceid);
4635
4636 # check if xhci controller is necessary and available
c60cad61 4637 my $devicelist = vm_devices_list($vmid);
16521d63 4638
c60cad61
DC
4639 if (!$devicelist->{xhci}) {
4640 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4641 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
16521d63 4642 }
c60cad61 4643
16521d63 4644 # add the new one
0cf8d56c 4645 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
16521d63
DC
4646}
4647
838776ab 4648sub qemu_cpu_hotplug {
8edc9c08 4649 my ($vmid, $conf, $vcpus) = @_;
838776ab 4650
3392d6ca 4651 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
1e881b75 4652
8edc9c08
AD
4653 my $sockets = 1;
4654 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4655 $sockets = $conf->{sockets} if $conf->{sockets};
4656 my $cores = $conf->{cores} || 1;
4657 my $maxcpus = $sockets * $cores;
838776ab 4658
8edc9c08 4659 $vcpus = $maxcpus if !$vcpus;
3a11fadb 4660
8edc9c08
AD
4661 die "you can't add more vcpus than maxcpus\n"
4662 if $vcpus > $maxcpus;
3a11fadb 4663
8edc9c08 4664 my $currentvcpus = $conf->{vcpus} || $maxcpus;
1e881b75 4665
eba3e64d 4666 if ($vcpus < $currentvcpus) {
1e881b75 4667
2ea5fb7e 4668 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
1e881b75
AD
4669
4670 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4671 qemu_devicedel($vmid, "cpu$i");
4672 my $retry = 0;
4673 my $currentrunningvcpus = undef;
4674 while (1) {
65af8c31 4675 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
1e881b75 4676 last if scalar(@{$currentrunningvcpus}) == $i-1;
961af8a3 4677 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
1e881b75
AD
4678 $retry++;
4679 sleep 1;
4680 }
4681 #update conf after each succesfull cpu unplug
4682 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4683 PVE::QemuConfig->write_config($vmid, $conf);
4684 }
4685 } else {
961af8a3 4686 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
1e881b75
AD
4687 }
4688
4689 return;
4690 }
838776ab 4691
65af8c31 4692 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
961af8a3 4693 die "vcpus in running vm does not match its configuration\n"
8edc9c08 4694 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
838776ab 4695
2ea5fb7e 4696 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
eba3e64d
AD
4697
4698 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4699 my $cpustr = print_cpu_device($conf, $i);
4700 qemu_deviceadd($vmid, $cpustr);
4701
4702 my $retry = 0;
4703 my $currentrunningvcpus = undef;
4704 while (1) {
65af8c31 4705 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
eba3e64d 4706 last if scalar(@{$currentrunningvcpus}) == $i;
961af8a3 4707 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
eba3e64d
AD
4708 sleep 1;
4709 $retry++;
4710 }
4711 #update conf after each succesfull cpu hotplug
4712 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4713 PVE::QemuConfig->write_config($vmid, $conf);
4714 }
4715 } else {
4716
4717 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
0a13e08e 4718 mon_cmd($vmid, "cpu-add", id => int($i));
eba3e64d 4719 }
838776ab
AD
4720 }
4721}
4722
affd2f88 4723sub qemu_block_set_io_throttle {
277ca170
WB
4724 my ($vmid, $deviceid,
4725 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
9196a8ec
WB
4726 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4727 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4728 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
affd2f88 4729
f3f323a3
AD
4730 return if !check_running($vmid) ;
4731
0a13e08e 4732 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
277ca170
WB
4733 bps => int($bps),
4734 bps_rd => int($bps_rd),
4735 bps_wr => int($bps_wr),
4736 iops => int($iops),
4737 iops_rd => int($iops_rd),
4738 iops_wr => int($iops_wr),
4739 bps_max => int($bps_max),
4740 bps_rd_max => int($bps_rd_max),
4741 bps_wr_max => int($bps_wr_max),
4742 iops_max => int($iops_max),
4743 iops_rd_max => int($iops_rd_max),
9196a8ec
WB
4744 iops_wr_max => int($iops_wr_max),
4745 bps_max_length => int($bps_max_length),
4746 bps_rd_max_length => int($bps_rd_max_length),
4747 bps_wr_max_length => int($bps_wr_max_length),
4748 iops_max_length => int($iops_max_length),
4749 iops_rd_max_length => int($iops_rd_max_length),
4750 iops_wr_max_length => int($iops_wr_max_length),
277ca170 4751 );
f3f323a3 4752
affd2f88
AD
4753}
4754
c1175c92
AD
4755sub qemu_block_resize {
4756 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4757
ed221350 4758 my $running = check_running($vmid);
c1175c92 4759
2e4357c5 4760 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
c1175c92
AD
4761
4762 return if !$running;
4763
375db731
FE
4764 my $padding = (1024 - $size % 1024) % 1024;
4765 $size = $size + $padding;
4766
190c8461
SR
4767 mon_cmd(
4768 $vmid,
4769 "block_resize",
4770 device => $deviceid,
4771 size => int($size),
4772 timeout => 60,
4773 );
c1175c92
AD
4774}
4775
1ab0057c
AD
4776sub qemu_volume_snapshot {
4777 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4778
ed221350 4779 my $running = check_running($vmid);
1ab0057c 4780
9d83932d 4781 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4782 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
e5eaa028
WL
4783 } else {
4784 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4785 }
1ab0057c
AD
4786}
4787
fc46aff9
AD
4788sub qemu_volume_snapshot_delete {
4789 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4790
ed221350 4791 my $running = check_running($vmid);
fc46aff9 4792
a2f1554b
AD
4793 if($running) {
4794
4795 $running = undef;
4796 my $conf = PVE::QemuConfig->load_config($vmid);
912792e2 4797 PVE::QemuConfig->foreach_volume($conf, sub {
a2f1554b
AD
4798 my ($ds, $drive) = @_;
4799 $running = 1 if $drive->{file} eq $volid;
4800 });
4801 }
4802
9d83932d 4803 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
0a13e08e 4804 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
1ef7592f
AD
4805 } else {
4806 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4807 }
fc46aff9
AD
4808}
4809
264e519f 4810sub set_migration_caps {
27a5be53 4811 my ($vmid, $savevm) = @_;
a89fded1 4812
acc10e51
SR
4813 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4814
27a5be53
SR
4815 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4816 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4817
8b8345f3 4818 my $cap_ref = [];
a89fded1
AD
4819
4820 my $enabled_cap = {
8b8345f3 4821 "auto-converge" => 1,
0b0a47e8 4822 "xbzrle" => 1,
8b8345f3
DM
4823 "x-rdma-pin-all" => 0,
4824 "zero-blocks" => 0,
acc10e51 4825 "compress" => 0,
27a5be53 4826 "dirty-bitmaps" => $dirty_bitmaps,
a89fded1
AD
4827 };
4828
0a13e08e 4829 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
a89fded1 4830
8b8345f3 4831 for my $supported_capability (@$supported_capabilities) {
b463a3ce
SP
4832 push @$cap_ref, {
4833 capability => $supported_capability->{capability},
22430fa2
DM
4834 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4835 };
a89fded1
AD
4836 }
4837
0a13e08e 4838 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
8b8345f3 4839}
a89fded1 4840
912792e2
FE
4841sub foreach_volid {
4842 my ($conf, $func, @param) = @_;
4843
4844 my $volhash = {};
4845
4846 my $test_volid = sub {
ae180b8f 4847 my ($key, $drive, $snapname) = @_;
912792e2 4848
ae180b8f 4849 my $volid = $drive->{file};
912792e2
FE
4850 return if !$volid;
4851
4852 $volhash->{$volid}->{cdrom} //= 1;
ae180b8f 4853 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
912792e2 4854
ae180b8f 4855 my $replicate = $drive->{replicate} // 1;
912792e2
FE
4856 $volhash->{$volid}->{replicate} //= 0;
4857 $volhash->{$volid}->{replicate} = 1 if $replicate;
4858
4859 $volhash->{$volid}->{shared} //= 0;
ae180b8f 4860 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
912792e2
FE
4861
4862 $volhash->{$volid}->{referenced_in_config} //= 0;
4863 $volhash->{$volid}->{referenced_in_config} = 1 if !defined($snapname);
4864
4865 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4866 if defined($snapname);
ae180b8f
FE
4867
4868 my $size = $drive->{size};
4869 $volhash->{$volid}->{size} //= $size if $size;
4870
4871 $volhash->{$volid}->{is_vmstate} //= 0;
4872 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4873
f9dde219
SR
4874 $volhash->{$volid}->{is_tpmstate} //= 0;
4875 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4876
ae180b8f
FE
4877 $volhash->{$volid}->{is_unused} //= 0;
4878 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
a6be63ac
FE
4879
4880 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
912792e2
FE
4881 };
4882
ae180b8f
FE
4883 my $include_opts = {
4884 extra_keys => ['vmstate'],
4885 include_unused => 1,
4886 };
4887
0b953b8e 4888 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
912792e2
FE
4889 foreach my $snapname (keys %{$conf->{snapshots}}) {
4890 my $snap = $conf->{snapshots}->{$snapname};
0b953b8e 4891 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
912792e2
FE
4892 }
4893
4894 foreach my $volid (keys %$volhash) {
4895 &$func($volid, $volhash->{$volid}, @param);
4896 }
4897}
4898
81d95ae1 4899my $fast_plug_option = {
f68910a0
FE
4900 'description' => 1,
4901 'hookscript' => 1,
7498eb64 4902 'lock' => 1,
d62bdac5
FE
4903 'migrate_downtime' => 1,
4904 'migrate_speed' => 1,
81d95ae1 4905 'name' => 1,
a1b7d579 4906 'onboot' => 1,
f68910a0 4907 'protection' => 1,
81d95ae1
DM
4908 'shares' => 1,
4909 'startup' => 1,
b8e7068a 4910 'tags' => 1,
f68910a0 4911 'vmstatestorage' => 1,
81d95ae1
DM
4912};
4913
71d9006b
AD
4914for my $opt (keys %$confdesc_cloudinit) {
4915 $fast_plug_option->{$opt} = 1;
4916};
4917
3a11fadb
DM
4918# hotplug changes in [PENDING]
4919# $selection hash can be used to only apply specified options, for
4920# example: { cores => 1 } (only apply changed 'cores')
4921# $errors ref is used to return error messages
c427973b 4922sub vmconfig_hotplug_pending {
3a11fadb 4923 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
c427973b 4924
8e90138a 4925 my $defaults = load_defaults();
045749f2
TL
4926 my $arch = get_vm_arch($conf);
4927 my $machine_type = get_vm_machine($conf, undef, $arch);
c427973b
DM
4928
4929 # commit values which do not have any impact on running VM first
3a11fadb
DM
4930 # Note: those option cannot raise errors, we we do not care about
4931 # $selection and always apply them.
4932
4933 my $add_error = sub {
4934 my ($opt, $msg) = @_;
4935 $errors->{$opt} = "hotplug problem - $msg";
4936 };
c427973b 4937
f16cf6c3
WB
4938 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4939
4940 my $cloudinit_record_changed = sub {
4941 my ($conf, $opt, $old, $new) = @_;
4942 return if !$cloudinit_pending_properties->{$opt};
4943
4944 my $ci = ($conf->{cloudinit} //= {});
4945
4946 my $recorded = $ci->{$opt};
a5409851
WB
4947 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4948
4949 if (defined($new)) {
4950 if (defined($old)) {
4951 # an existing value is being modified
4952 if (defined($recorded)) {
4953 # the value was already not in sync
4954 if ($new eq $recorded) {
4955 # a value is being reverted to the cloud-init state:
4956 delete $ci->{$opt};
4957 delete $added{$opt};
4958 } else {
4959 # the value was changed multiple times, do nothing
4960 }
4961 } elsif ($added{$opt}) {
4962 # the value had been marked as added and is being changed, do nothing
4963 } else {
4964 # the value is new, record it:
4965 $ci->{$opt} = $old;
4966 }
f16cf6c3 4967 } else {
a5409851
WB
4968 # a new value is being added
4969 if (defined($recorded)) {
4970 # it was already not in sync
4971 if ($new eq $recorded) {
4972 # a value is being reverted to the cloud-init state:
4973 delete $ci->{$opt};
4974 delete $added{$opt};
4975 } else {
4976 # the value had temporarily been removed, do nothing
4977 }
4978 } elsif ($added{$opt}) {
4979 # the value had been marked as added already, do nothing
4980 } else {
4981 # the value is new, add it
4982 $added{$opt} = 1;
4983 }
f16cf6c3 4984 }
a5409851
WB
4985 } elsif (!defined($old)) {
4986 # a non-existent value is being removed? ignore...
f16cf6c3 4987 } else {
a5409851
WB
4988 # a value is being deleted
4989 if (defined($recorded)) {
4990 # a value was already recorded, just keep it
4991 } elsif ($added{$opt}) {
4992 # the value was marked as added, remove it
4993 delete $added{$opt};
f16cf6c3 4994 } else {
a5409851
WB
4995 # a previously unrecorded value is being removed, record the old value:
4996 $ci->{$opt} = $old;
f16cf6c3
WB
4997 }
4998 }
a5409851
WB
4999
5000 my $added = join(',', sort keys %added);
5001 $ci->{added} = $added if length($added);
f16cf6c3
WB
5002 };
5003
c427973b
DM
5004 my $changes = 0;
5005 foreach my $opt (keys %{$conf->{pending}}) { # add/change
81d95ae1 5006 if ($fast_plug_option->{$opt}) {
f16cf6c3
WB
5007 my $new = delete $conf->{pending}->{$opt};
5008 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5009 $conf->{$opt} = $new;
c427973b
DM
5010 $changes = 1;
5011 }
5012 }
5013
5014 if ($changes) {
ffda963f 5015 PVE::QemuConfig->write_config($vmid, $conf);
c427973b
DM
5016 }
5017
c60cad61
DC
5018 my $ostype = $conf->{ostype};
5019 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
b3c2bdd1 5020 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
c60cad61
DC
5021 my $usb_hotplug = $hotplug_features->{usb}
5022 && min_version($version, 7, 1)
5023 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
c427973b 5024
5b65b00d 5025 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
98bc3aeb 5026 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
f16cf6c3 5027
d321c4a9 5028 foreach my $opt (sort keys %$pending_delete_hash) {
3a11fadb 5029 next if $selection && !$selection->{$opt};
d321c4a9 5030 my $force = $pending_delete_hash->{$opt}->{force};
3a11fadb 5031 eval {
51a6f637
AD
5032 if ($opt eq 'hotplug') {
5033 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5034 } elsif ($opt eq 'tablet') {
b3c2bdd1 5035 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5036 if ($defaults->{tablet}) {
d559309f
WB
5037 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5038 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5039 if $arch eq 'aarch64';
3a11fadb 5040 } else {
d559309f
WB
5041 vm_deviceunplug($vmid, $conf, 'tablet');
5042 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5043 }
c60cad61
DC
5044 } elsif ($opt =~ m/^usb(\d+)$/) {
5045 my $index = $1;
5046 die "skip\n" if !$usb_hotplug;
5047 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5048 vm_deviceunplug($vmid, $conf, $opt);
8edc9c08 5049 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5050 die "skip\n" if !$hotplug_features->{cpu};
8edc9c08 5051 qemu_cpu_hotplug($vmid, $conf, undef);
9c2f7069 5052 } elsif ($opt eq 'balloon') {
81d95ae1 5053 # enable balloon device is not hotpluggable
75b51053
DC
5054 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5055 # here we reset the ballooning value to memory
5056 my $balloon = $conf->{memory} || $defaults->{memory};
0a13e08e 5057 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
81d95ae1
DM
5058 } elsif ($fast_plug_option->{$opt}) {
5059 # do nothing
3eec5767 5060 } elsif ($opt =~ m/^net(\d+)$/) {
b3c2bdd1 5061 die "skip\n" if !$hotplug_features->{network};
3eec5767 5062 vm_deviceunplug($vmid, $conf, $opt);
74479ee9 5063 } elsif (is_valid_drivename($opt)) {
b3c2bdd1 5064 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
19120f99 5065 vm_deviceunplug($vmid, $conf, $opt);
3dc38fbb 5066 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4d3f29ed
AD
5067 } elsif ($opt =~ m/^memory$/) {
5068 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5069 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
c8effec3 5070 } elsif ($opt eq 'cpuunits') {
0d318453 5071 $cgroup->change_cpu_shares(undef);
58be00f1 5072 } elsif ($opt eq 'cpulimit') {
25de70ae 5073 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
3d7389fe 5074 } else {
e56beeda 5075 die "skip\n";
3d7389fe 5076 }
3a11fadb
DM
5077 };
5078 if (my $err = $@) {
e56beeda
DM
5079 &$add_error($opt, $err) if $err ne "skip\n";
5080 } else {
f16cf6c3
WB
5081 my $old = delete $conf->{$opt};
5082 $cloudinit_record_changed->($conf, $opt, $old, undef);
98bc3aeb 5083 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
3d7389fe 5084 }
3d7389fe
DM
5085 }
5086
4b785da1 5087 my $cloudinit_opt;
3d7389fe 5088 foreach my $opt (keys %{$conf->{pending}}) {
3a11fadb 5089 next if $selection && !$selection->{$opt};
3d7389fe 5090 my $value = $conf->{pending}->{$opt};
3a11fadb 5091 eval {
51a6f637
AD
5092 if ($opt eq 'hotplug') {
5093 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5094 } elsif ($opt eq 'tablet') {
b3c2bdd1 5095 die "skip\n" if !$hotplug_features->{usb};
3a11fadb 5096 if ($value == 1) {
d559309f
WB
5097 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5098 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5099 if $arch eq 'aarch64';
3a11fadb 5100 } elsif ($value == 0) {
d559309f
WB
5101 vm_deviceunplug($vmid, $conf, 'tablet');
5102 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
3a11fadb 5103 }
c60cad61
DC
5104 } elsif ($opt =~ m/^usb(\d+)$/) {
5105 my $index = $1;
5106 die "skip\n" if !$usb_hotplug;
0cf8d56c 5107 my $d = eval { parse_property_string('pve-qm-usb', $value) };
c60cad61 5108 my $id = $opt;
0cf8d56c 5109 if ($d->{host} =~ m/^spice$/i) {
c60cad61
DC
5110 $id = "usbredirdev$index";
5111 }
5112 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
8edc9c08 5113 } elsif ($opt eq 'vcpus') {
b3c2bdd1 5114 die "skip\n" if !$hotplug_features->{cpu};
3a11fadb
DM
5115 qemu_cpu_hotplug($vmid, $conf, $value);
5116 } elsif ($opt eq 'balloon') {
81d95ae1 5117 # enable/disable balloning device is not hotpluggable
8fe689e7 5118 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
a1b7d579 5119 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
81d95ae1
DM
5120 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5121
3a11fadb 5122 # allow manual ballooning if shares is set to zero
4cc1efa6 5123 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
9c2f7069 5124 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
0a13e08e 5125 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
9c2f7069 5126 }
a1b7d579 5127 } elsif ($opt =~ m/^net(\d+)$/) {
3eec5767 5128 # some changes can be done without hotplug
a1b7d579 5129 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
d559309f 5130 $vmid, $opt, $value, $arch, $machine_type);
74479ee9 5131 } elsif (is_valid_drivename($opt)) {
f9dde219 5132 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
a05cff86 5133 # some changes can be done without hotplug
9ed7a77c
WB
5134 my $drive = parse_drive($opt, $value);
5135 if (drive_is_cloudinit($drive)) {
4b785da1
WB
5136 $cloudinit_opt = [$opt, $drive];
5137 # apply all the other changes first, then generate the cloudinit disk
5138 die "skip\n";
9ed7a77c 5139 }
b3c2bdd1 5140 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
9e7bce2c 5141 $vmid, $opt, $value, $arch, $machine_type);
4d3f29ed
AD
5142 } elsif ($opt =~ m/^memory$/) { #dimms
5143 die "skip\n" if !$hotplug_features->{memory};
39c074fe 5144 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
c8effec3 5145 } elsif ($opt eq 'cpuunits') {
6b7ef5e5 5146 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
0d318453 5147 $cgroup->change_cpu_shares($new_cpuunits);
58be00f1 5148 } elsif ($opt eq 'cpulimit') {
c6f773b8 5149 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5b65b00d 5150 $cgroup->change_cpu_quota($cpulimit, 100000);
74ea2c65
AD
5151 } elsif ($opt eq 'agent') {
5152 vmconfig_update_agent($conf, $opt, $value);
3a11fadb 5153 } else {
e56beeda 5154 die "skip\n"; # skip non-hot-pluggable options
3d7389fe 5155 }
3a11fadb 5156 };
4b785da1
WB
5157 if (my $err = $@) {
5158 &$add_error($opt, $err) if $err ne "skip\n";
5159 } else {
5160 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5161 $conf->{$opt} = $value;
5162 delete $conf->{pending}->{$opt};
5163 }
5164 }
5165
5166 if (defined($cloudinit_opt)) {
5167 my ($opt, $drive) = @$cloudinit_opt;
5168 my $value = $conf->{pending}->{$opt};
5169 eval {
9660e606
WB
5170 my $temp = {%$conf, $opt => $value};
5171 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
4b785da1
WB
5172 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5173 $vmid, $opt, $value, $arch, $machine_type);
5174 };
3a11fadb 5175 if (my $err = $@) {
e56beeda
DM
5176 &$add_error($opt, $err) if $err ne "skip\n";
5177 } else {
3a11fadb
DM
5178 $conf->{$opt} = $value;
5179 delete $conf->{pending}->{$opt};
3d7389fe 5180 }
3d7389fe 5181 }
c60cad61
DC
5182
5183 # unplug xhci controller if no usb device is left
5184 if ($usb_hotplug) {
5185 my $has_usb = 0;
0cf8d56c 5186 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
c60cad61
DC
5187 next if !defined($conf->{"usb$i"});
5188 $has_usb = 1;
5189 last;
5190 }
5191 if (!$has_usb) {
5192 vm_deviceunplug($vmid, $conf, 'xhci');
5193 }
5194 }
5195
4df15a03 5196 PVE::QemuConfig->write_config($vmid, $conf);
94ec5e7c 5197
4b785da1
WB
5198 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5199 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
94ec5e7c 5200 }
c427973b 5201}
055d554d 5202
3dc38fbb
WB
5203sub try_deallocate_drive {
5204 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5205
5206 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5207 my $volid = $drive->{file};
5208 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5209 my $sid = PVE::Storage::parse_volume_id($volid);
5210 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
cee01bcb
WB
5211
5212 # check if the disk is really unused
cee01bcb 5213 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
e0fd2b2f 5214 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
cee01bcb 5215 PVE::Storage::vdisk_free($storecfg, $volid);
3dc38fbb 5216 return 1;
40b977f3
WL
5217 } else {
5218 # If vm is not owner of this disk remove from config
5219 return 1;
3dc38fbb
WB
5220 }
5221 }
5222
d1c1af4b 5223 return;
3dc38fbb
WB
5224}
5225
5226sub vmconfig_delete_or_detach_drive {
5227 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5228
5229 my $drive = parse_drive($opt, $conf->{$opt});
5230
5231 my $rpcenv = PVE::RPCEnvironment::get();
5232 my $authuser = $rpcenv->get_user();
5233
5234 if ($force) {
5235 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5236 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5237 } else {
5238 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5239 }
5240}
5241
98bc3aeb
OB
5242
5243
055d554d 5244sub vmconfig_apply_pending {
e97bbbb6 5245 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
eb5e482d 5246
a644de29
OB
5247 return if !scalar(keys %{$conf->{pending}});
5248
eb5e482d
OB
5249 my $add_apply_error = sub {
5250 my ($opt, $msg) = @_;
5251 my $err_msg = "unable to apply pending change $opt : $msg";
5252 $errors->{$opt} = $err_msg;
5253 warn $err_msg;
5254 };
c427973b
DM
5255
5256 # cold plug
055d554d 5257
98bc3aeb 5258 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
d321c4a9 5259 foreach my $opt (sort keys %$pending_delete_hash) {
fb4d1ba2 5260 my $force = $pending_delete_hash->{$opt}->{force};
eb5e482d 5261 eval {
3d48b95a
OB
5262 if ($opt =~ m/^unused/) {
5263 die "internal error";
5264 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5265 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
eb5e482d
OB
5266 }
5267 };
5268 if (my $err = $@) {
5269 $add_apply_error->($opt, $err);
055d554d 5270 } else {
98bc3aeb 5271 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
055d554d 5272 delete $conf->{$opt};
055d554d
DM
5273 }
5274 }
5275
3d48b95a 5276 PVE::QemuConfig->cleanup_pending($conf);
055d554d 5277
7a24c98a 5278 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
c930f99e 5279
055d554d 5280 foreach my $opt (keys %{$conf->{pending}}) { # add/change
3d48b95a 5281 next if $opt eq 'delete'; # just to be sure
eb5e482d 5282 eval {
3d48b95a 5283 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
eb5e482d 5284 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
eb5e482d
OB
5285 }
5286 };
5287 if (my $err = $@) {
5288 $add_apply_error->($opt, $err);
055d554d 5289 } else {
c930f99e
AD
5290
5291 if (is_valid_drivename($opt)) {
5292 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
7a24c98a 5293 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
c930f99e
AD
5294 }
5295
eb5e482d 5296 $conf->{$opt} = delete $conf->{pending}->{$opt};
055d554d 5297 }
055d554d 5298 }
3d48b95a
OB
5299
5300 # write all changes at once to avoid unnecessary i/o
5301 PVE::QemuConfig->write_config($vmid, $conf);
7a24c98a 5302 if ($generate_cloudinit) {
4b785da1
WB
5303 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5304 # After successful generation and if there were changes to be applied, update the
5305 # config to drop the {cloudinit} entry.
5306 PVE::QemuConfig->write_config($vmid, $conf);
5307 }
5308 }
055d554d
DM
5309}
5310
3eec5767 5311sub vmconfig_update_net {
d559309f 5312 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
3eec5767
DM
5313
5314 my $newnet = parse_net($value);
5315
5316 if ($conf->{$opt}) {
5317 my $oldnet = parse_net($conf->{$opt});
5318
0f1af9e7
OB
5319 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5320 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5321 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
62fdcfd4 5322 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
3eec5767
DM
5323 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5324
5325 # for non online change, we try to hot-unplug
7196b757 5326 die "skip\n" if !$hotplug;
3eec5767
DM
5327 vm_deviceunplug($vmid, $conf, $opt);
5328 } else {
5329
5330 die "internal error" if $opt !~ m/net(\d+)/;
5331 my $iface = "tap${vmid}i$1";
a1b7d579 5332
0f1af9e7
OB
5333 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5334 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5335 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5336 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
3eec5767 5337 PVE::Network::tap_unplug($iface);
28e129cc
AD
5338
5339 if ($have_sdn) {
5340 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5341 } else {
5342 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5343 }
0f1af9e7 5344 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
4f4fbeb0
WB
5345 # Rate can be applied on its own but any change above needs to
5346 # include the rate in tap_plug since OVS resets everything.
5347 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
3eec5767 5348 }
38c590d9 5349
0f1af9e7 5350 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
25088687
DM
5351 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5352 }
5353
38c590d9 5354 return 1;
3eec5767
DM
5355 }
5356 }
a1b7d579 5357
7196b757 5358 if ($hotplug) {
d559309f 5359 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
38c590d9
DM
5360 } else {
5361 die "skip\n";
5362 }
3eec5767
DM
5363}
5364
74ea2c65
AD
5365sub vmconfig_update_agent {
5366 my ($conf, $opt, $value) = @_;
5367
5368 die "skip\n" if !$conf->{$opt};
5369
5370 my $hotplug_options = { fstrim_cloned_disks => 1 };
5371
5372 my $old_agent = parse_guest_agent($conf);
5373 my $agent = parse_guest_agent({$opt => $value});
5374
33f8b887 5375 for my $option (keys %$agent) { # added/changed options
74ea2c65
AD
5376 next if defined($hotplug_options->{$option});
5377 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5378 }
5379
33f8b887 5380 for my $option (keys %$old_agent) { # removed options
74ea2c65
AD
5381 next if defined($hotplug_options->{$option});
5382 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5383 }
33f8b887
TL
5384
5385 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
74ea2c65
AD
5386}
5387
a05cff86 5388sub vmconfig_update_disk {
9e7bce2c 5389 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
a05cff86
DM
5390
5391 my $drive = parse_drive($opt, $value);
5392
4df98f2f
TL
5393 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5394 my $media = $drive->{media} || 'disk';
5395 my $oldmedia = $old_drive->{media} || 'disk';
5396 die "unable to change media type\n" if $media ne $oldmedia;
a05cff86 5397
4df98f2f 5398 if (!drive_is_cdrom($old_drive)) {
a05cff86 5399
4df98f2f 5400 if ($drive->{file} ne $old_drive->{file}) {
a05cff86 5401
4df98f2f 5402 die "skip\n" if !$hotplug;
a05cff86 5403
4df98f2f
TL
5404 # unplug and register as unused
5405 vm_deviceunplug($vmid, $conf, $opt);
5406 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
a1b7d579 5407
4df98f2f
TL
5408 } else {
5409 # update existing disk
5410
5411 # skip non hotpluggable value
ea7c3b39
FE
5412 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5413 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
4df98f2f
TL
5414 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5415 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5416 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
e0e036e1
LN
5417 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5418 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
4df98f2f
TL
5419 die "skip\n";
5420 }
a05cff86 5421
4df98f2f
TL
5422 # apply throttle
5423 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5424 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5425 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5426 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5427 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5428 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5429 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5430 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5431 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5432 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5433 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5434 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5435 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5436 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5437 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5438 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5439 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5440 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5441
5442 qemu_block_set_io_throttle(
5443 $vmid,"drive-$opt",
5444 ($drive->{mbps} || 0)*1024*1024,
5445 ($drive->{mbps_rd} || 0)*1024*1024,
5446 ($drive->{mbps_wr} || 0)*1024*1024,
5447 $drive->{iops} || 0,
5448 $drive->{iops_rd} || 0,
5449 $drive->{iops_wr} || 0,
5450 ($drive->{mbps_max} || 0)*1024*1024,
5451 ($drive->{mbps_rd_max} || 0)*1024*1024,
5452 ($drive->{mbps_wr_max} || 0)*1024*1024,
5453 $drive->{iops_max} || 0,
5454 $drive->{iops_rd_max} || 0,
5455 $drive->{iops_wr_max} || 0,
5456 $drive->{bps_max_length} || 1,
5457 $drive->{bps_rd_max_length} || 1,
5458 $drive->{bps_wr_max_length} || 1,
5459 $drive->{iops_max_length} || 1,
5460 $drive->{iops_rd_max_length} || 1,
5461 $drive->{iops_wr_max_length} || 1,
5462 );
a05cff86 5463
4df98f2f 5464 }
a1b7d579 5465
4df98f2f
TL
5466 return 1;
5467 }
4de1bb25 5468
4df98f2f 5469 } else { # cdrom
a1b7d579 5470
4df98f2f
TL
5471 if ($drive->{file} eq 'none') {
5472 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5473 if (drive_is_cloudinit($old_drive)) {
5474 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5475 }
5476 } else {
5477 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
ce9fce79 5478
4df98f2f
TL
5479 # force eject if locked
5480 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
ce9fce79 5481
4df98f2f
TL
5482 if ($path) {
5483 mon_cmd($vmid, "blockdev-change-medium",
5484 id => "$opt", filename => "$path");
4de1bb25 5485 }
a05cff86 5486 }
4df98f2f
TL
5487
5488 return 1;
a05cff86
DM
5489 }
5490 }
5491
a1b7d579 5492 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
4de1bb25 5493 # hotplug new disks
f7b4356f 5494 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
d559309f 5495 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
a05cff86
DM
5496}
5497
9687287b
AD
5498sub vmconfig_update_cloudinit_drive {
5499 my ($storecfg, $conf, $vmid) = @_;
5500
5501 my $cloudinit_ds = undef;
5502 my $cloudinit_drive = undef;
5503
5504 PVE::QemuConfig->foreach_volume($conf, sub {
5505 my ($ds, $drive) = @_;
5506 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5507 $cloudinit_ds = $ds;
5508 $cloudinit_drive = $drive;
5509 }
5510 });
5511
5512 return if !$cloudinit_drive;
5513
4b785da1
WB
5514 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5515 PVE::QemuConfig->write_config($vmid, $conf);
5516 }
5517
9687287b
AD
5518 my $running = PVE::QemuServer::check_running($vmid);
5519
5520 if ($running) {
5521 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5522 if ($path) {
5523 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5524 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5525 }
5526 }
5527}
5528
13cfe3b7 5529# called in locked context by incoming migration
ba5396b5
FG
5530sub vm_migrate_get_nbd_disks {
5531 my ($storecfg, $conf, $replicated_volumes) = @_;
13cfe3b7
FG
5532
5533 my $local_volumes = {};
912792e2 5534 PVE::QemuConfig->foreach_volume($conf, sub {
13cfe3b7
FG
5535 my ($ds, $drive) = @_;
5536
5537 return if drive_is_cdrom($drive);
41c8671e 5538 return if $ds eq 'tpmstate0';
13cfe3b7
FG
5539
5540 my $volid = $drive->{file};
5541
5542 return if !$volid;
5543
5544 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5545
5546 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5547 return if $scfg->{shared};
ba5396b5
FG
5548
5549 # replicated disks re-use existing state via bitmap
5550 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5551 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
13cfe3b7 5552 });
ba5396b5
FG
5553 return $local_volumes;
5554}
5555
5556# called in locked context by incoming migration
5557sub vm_migrate_alloc_nbd_disks {
5558 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
13cfe3b7 5559
13cfe3b7 5560 my $nbd = {};
ba5396b5 5561 foreach my $opt (sort keys %$source_volumes) {
5668463b 5562 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
ba5396b5
FG
5563
5564 if ($use_existing) {
5565 $nbd->{$opt}->{drivestr} = print_drive($drive);
5566 $nbd->{$opt}->{volid} = $volid;
5567 $nbd->{$opt}->{replicated} = 1;
13cfe3b7
FG
5568 next;
5569 }
13cfe3b7 5570
5668463b
FG
5571 # storage mapping + volname = regular migration
5572 # storage mapping + format = remote migration
5573 # order of precedence, filtered by whether storage supports it:
5574 # 1. explicit requested format
5575 # 2. format of current volume
5576 # 3. default format of storage
bf8fc5a3 5577 if (!$storagemap->{identity}) {
82a03671 5578 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
13cfe3b7 5579 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5668463b
FG
5580 if (!$format || !grep { $format eq $_ } @$validFormats) {
5581 if ($volname) {
5582 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5583 my $fileFormat = qemu_img_format($scfg, $volname);
5584 $format = $fileFormat
5585 if grep { $fileFormat eq $_ } @$validFormats;
5586 }
5587 $format //= $defFormat;
5588 }
13cfe3b7 5589 } else {
5668463b 5590 # can't happen for remote migration, so $volname is always defined
13cfe3b7
FG
5591 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5592 $format = qemu_img_format($scfg, $volname);
5593 }
5594
4df98f2f
TL
5595 my $size = $drive->{size} / 1024;
5596 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
13cfe3b7
FG
5597 my $newdrive = $drive;
5598 $newdrive->{format} = $format;
5599 $newdrive->{file} = $newvolid;
5600 my $drivestr = print_drive($newdrive);
ba5396b5
FG
5601 $nbd->{$opt}->{drivestr} = $drivestr;
5602 $nbd->{$opt}->{volid} = $newvolid;
13cfe3b7
FG
5603 }
5604
5605 return $nbd;
5606}
5607
5608# see vm_start_nolock for parameters, additionally:
5609# migrate_opts:
bf8fc5a3 5610# storagemap = parsed storage map for allocating NBD disks
3898a563
FG
5611sub vm_start {
5612 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5613
84da8217 5614 return PVE::QemuConfig->lock_config($vmid, sub {
3898a563
FG
5615 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5616
4ef13a7f
FG
5617 die "you can't start a vm if it's a template\n"
5618 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
3898a563 5619
d544e0e0 5620 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
8e0c97bb
SR
5621 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5622
5623 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5624
5625 if ($has_backup_lock && $running) {
5626 # a backup is currently running, attempt to start the guest in the
5627 # existing QEMU instance
5628 return vm_resume($vmid);
5629 }
3898a563
FG
5630
5631 PVE::QemuConfig->check_lock($conf)
d544e0e0
FE
5632 if !($params->{skiplock} || $has_suspended_lock);
5633
5634 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
3898a563 5635
8e0c97bb 5636 die "VM $vmid already running\n" if $running;
3898a563 5637
ba5396b5
FG
5638 if (my $storagemap = $migrate_opts->{storagemap}) {
5639 my $replicated = $migrate_opts->{replicated_volumes};
5640 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5641 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5642
5643 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5644 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5645 }
5646 }
13cfe3b7 5647
84da8217 5648 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
3898a563
FG
5649 });
5650}
5651
5652
0c498cca
FG
5653# params:
5654# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5655# skiplock => 0/1, skip checking for config lock
4ef13a7f 5656# skiptemplate => 0/1, skip checking whether VM is template
7bd9abd2 5657# forcemachine => to force QEMU machine (rollback/migration)
58c64ad5 5658# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
0c498cca
FG
5659# timeout => in seconds
5660# paused => start VM in paused state (backup)
3898a563 5661# resume => resume from hibernation
5921764c
SR
5662# pbs-backing => {
5663# sata0 => {
5664# repository
5665# snapshot
5666# keyfile
5667# archive
5668# },
5669# virtio2 => ...
5670# }
0c498cca 5671# migrate_opts:
ba5396b5 5672# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
0c498cca
FG
5673# migratedfrom => source node
5674# spice_ticket => used for spice migration, passed via tunnel/stdin
5675# network => CIDR of migration network
5676# type => secure/insecure - tunnel over encrypted connection or plain-text
0c498cca 5677# nbd_proto_version => int, 0 for TCP, 1 for UNIX
fd95d780 5678# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
13d121d7
FE
5679# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5680# contained in config
3898a563
FG
5681sub vm_start_nolock {
5682 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
1e3baf05 5683
3898a563
FG
5684 my $statefile = $params->{statefile};
5685 my $resume = $params->{resume};
3dcb98d5 5686
3898a563
FG
5687 my $migratedfrom = $migrate_opts->{migratedfrom};
5688 my $migration_type = $migrate_opts->{type};
7ceade4c 5689
84da8217
FG
5690 my $res = {};
5691
3898a563
FG
5692 # clean up leftover reboot request files
5693 eval { clear_reboot_request($vmid); };
5694 warn $@ if $@;
1e3baf05 5695
3898a563
FG
5696 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5697 vmconfig_apply_pending($vmid, $conf, $storecfg);
5698 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5699 }
64457ed4 5700
3de134ef
WB
5701 # don't regenerate the ISO if the VM is started as part of a live migration
5702 # this way we can reuse the old ISO with the correct config
4b785da1
WB
5703 if (!$migratedfrom) {
5704 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5705 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5706 # $conf->{cloudinit}, so we could just not do this?
5707 # But we do it above, so for now let's be consistent.
5708 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5709 }
5710 }
3de134ef 5711
13d121d7
FE
5712 # override offline migrated volumes, conf is out of date still
5713 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5714 for my $key (sort keys $offline_volumes->%*) {
5715 my $parsed = parse_drive($key, $conf->{$key});
5716 $parsed->{file} = $offline_volumes->{$key};
5717 $conf->{$key} = print_drive($parsed);
5718 }
fd95d780
FG
5719 }
5720
3898a563 5721 my $defaults = load_defaults();
0c9a7596 5722
3898a563 5723 # set environment variable useful inside network script
eef93bc5
FG
5724 # for remote migration the config is available on the target node!
5725 if (!$migrate_opts->{remote_node}) {
5726 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5727 }
6c47d546 5728
3898a563 5729 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
9e784b11 5730
3898a563 5731 my $forcemachine = $params->{forcemachine};
ea1c2110 5732 my $forcecpu = $params->{forcecpu};
3898a563 5733 if ($resume) {
ea1c2110 5734 # enforce machine and CPU type on suspended vm to ensure HW compatibility
3898a563 5735 $forcemachine = $conf->{runningmachine};
ea1c2110 5736 $forcecpu = $conf->{runningcpu};
3898a563
FG
5737 print "Resuming suspended VM\n";
5738 }
7ceade4c 5739
9b71c34d 5740 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5921764c 5741 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
6c47d546 5742
3898a563
FG
5743 my $migration_ip;
5744 my $get_migration_ip = sub {
5745 my ($nodename) = @_;
b24e1ac2 5746
3898a563 5747 return $migration_ip if defined($migration_ip);
b24e1ac2 5748
3898a563 5749 my $cidr = $migrate_opts->{network};
0c498cca 5750
3898a563
FG
5751 if (!defined($cidr)) {
5752 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5753 $cidr = $dc_conf->{migration}->{network};
5754 }
b24e1ac2 5755
3898a563
FG
5756 if (defined($cidr)) {
5757 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
b24e1ac2 5758
3898a563
FG
5759 die "could not get IP: no address configured on local " .
5760 "node for network '$cidr'\n" if scalar(@$ips) == 0;
b24e1ac2 5761
3898a563
FG
5762 die "could not get IP: multiple addresses configured on local " .
5763 "node for network '$cidr'\n" if scalar(@$ips) > 1;
b24e1ac2 5764
3898a563
FG
5765 $migration_ip = @$ips[0];
5766 }
b24e1ac2 5767
3898a563
FG
5768 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5769 if !defined($migration_ip);
b24e1ac2 5770
3898a563
FG
5771 return $migration_ip;
5772 };
b24e1ac2 5773
3898a563
FG
5774 if ($statefile) {
5775 if ($statefile eq 'tcp') {
05b2a4ae
FG
5776 my $migrate = $res->{migrate} = { proto => 'tcp' };
5777 $migrate->{addr} = "localhost";
3898a563
FG
5778 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5779 my $nodename = nodename();
2de2d6f7 5780
3898a563
FG
5781 if (!defined($migration_type)) {
5782 if (defined($datacenterconf->{migration}->{type})) {
5783 $migration_type = $datacenterconf->{migration}->{type};
5784 } else {
5785 $migration_type = 'secure';
b7a5a225 5786 }
3898a563 5787 }
b7a5a225 5788
3898a563 5789 if ($migration_type eq 'insecure') {
05b2a4ae
FG
5790 $migrate->{addr} = $get_migration_ip->($nodename);
5791 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
3898a563 5792 }
2de2d6f7 5793
3898a563 5794 my $pfamily = PVE::Tools::get_host_address_family($nodename);
05b2a4ae
FG
5795 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5796 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5797 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5798 push @$cmd, '-S';
1c9d54bf 5799
3898a563
FG
5800 } elsif ($statefile eq 'unix') {
5801 # should be default for secure migrations as a ssh TCP forward
5802 # tunnel is not deterministic reliable ready and fails regurarly
5803 # to set up in time, so use UNIX socket forwards
05b2a4ae
FG
5804 my $migrate = $res->{migrate} = { proto => 'unix' };
5805 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5806 unlink $migrate->{addr};
54323eed 5807
05b2a4ae
FG
5808 $migrate->{uri} = "unix:$migrate->{addr}";
5809 push @$cmd, '-incoming', $migrate->{uri};
3898a563 5810 push @$cmd, '-S';
1c9d54bf 5811
3898a563
FG
5812 } elsif (-e $statefile) {
5813 push @$cmd, '-loadstate', $statefile;
5814 } else {
5815 my $statepath = PVE::Storage::path($storecfg, $statefile);
5816 push @$vollist, $statefile;
5817 push @$cmd, '-loadstate', $statepath;
5818 }
5819 } elsif ($params->{paused}) {
5820 push @$cmd, '-S';
5821 }
5822
1fb1822e
DC
5823 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5824
9b71c34d
DC
5825 my $pci_reserve_list = [];
5826 for my $device (values $pci_devices->%*) {
5827 next if $device->{mdev}; # we don't reserve for mdev devices
5828 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
1fb1822e
DC
5829 }
5830
1fb1822e 5831 # reserve all PCI IDs before actually doing anything with them
9b71c34d 5832 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
1fb1822e
DC
5833
5834 eval {
bbf96e0f 5835 my $uuid;
1fb1822e
DC
5836 for my $id (sort keys %$pci_devices) {
5837 my $d = $pci_devices->{$id};
9b71c34d
DC
5838 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5839
5840 my $chosen_mdev;
5841 for my $dev ($d->{ids}->@*) {
5842 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5843 if ($d->{mdev}) {
5844 warn $@ if $@;
5845 $chosen_mdev = $info;
5846 last if $chosen_mdev; # if successful, we're done
5847 } else {
5848 die $@ if $@;
5849 }
5850 }
5851
5852 next if !$d->{mdev};
5853 die "could not create mediated device\n" if !defined($chosen_mdev);
5854
5855 # nvidia grid needs the uuid of the mdev as qemu parameter
5856 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5857 if (defined($conf->{smbios1})) {
5858 my $smbios_conf = parse_smbios1($conf->{smbios1});
5859 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
bbf96e0f 5860 }
9b71c34d 5861 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
1fb1822e
DC
5862 }
5863 }
bbf96e0f 5864 push @$cmd, '-uuid', $uuid if defined($uuid);
1fb1822e
DC
5865 };
5866 if (my $err = $@) {
1b189121 5867 eval { cleanup_pci_devices($vmid, $conf) };
1fb1822e
DC
5868 warn $@ if $@;
5869 die $err;
3898a563 5870 }
1e3baf05 5871
3898a563 5872 PVE::Storage::activate_volumes($storecfg, $vollist);
1e3baf05 5873
728404c0
TL
5874
5875 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5876 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
3898a563
FG
5877 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5878 # timeout should be more than enough here...
39abafc8 5879 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
3898a563 5880
6b7ef5e5 5881 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
3898a563 5882
3898a563
FG
5883 my %run_params = (
5884 timeout => $statefile ? undef : $start_timeout,
5885 umask => 0077,
5886 noerr => 1,
5887 );
1e3baf05 5888
3898a563
FG
5889 # when migrating, prefix QEMU output so other side can pick up any
5890 # errors that might occur and show the user
5891 if ($migratedfrom) {
5892 $run_params{quiet} = 1;
5893 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5894 }
8bf30c2a 5895
212220a4 5896 my %systemd_properties = (
3898a563 5897 Slice => 'qemu.slice',
354e61aa
SR
5898 KillMode => 'process',
5899 SendSIGKILL => 0,
5900 TimeoutStopUSec => ULONG_MAX, # infinity
3898a563 5901 );
7023f3ea 5902
6cbd3eb8 5903 if (PVE::CGroup::cgroup_mode() == 2) {
212220a4 5904 $systemd_properties{CPUWeight} = $cpuunits;
6cbd3eb8 5905 } else {
212220a4 5906 $systemd_properties{CPUShares} = $cpuunits;
6cbd3eb8
AD
5907 }
5908
3898a563 5909 if (my $cpulimit = $conf->{cpulimit}) {
212220a4 5910 $systemd_properties{CPUQuota} = int($cpulimit * 100);
3898a563 5911 }
212220a4 5912 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
7023f3ea 5913
3898a563
FG
5914 my $run_qemu = sub {
5915 PVE::Tools::run_fork sub {
212220a4 5916 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
6e0216d8 5917
f9dde219
SR
5918 my $tpmpid;
5919 if (my $tpm = $conf->{tpmstate0}) {
5920 # start the TPM emulator so QEMU can connect on start
5921 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5922 }
5923
3898a563 5924 my $exitcode = run_command($cmd, %run_params);
f9dde219 5925 if ($exitcode) {
23bee97d
FE
5926 if ($tpmpid) {
5927 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5928 kill 'TERM', $tpmpid;
5929 }
f9dde219
SR
5930 die "QEMU exited with code $exitcode\n";
5931 }
503308ed 5932 };
3898a563 5933 };
503308ed 5934
3898a563 5935 if ($conf->{hugepages}) {
7023f3ea 5936
3898a563 5937 my $code = sub {
dafb728c
AD
5938 my $hotplug_features =
5939 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5940 my $hugepages_topology =
5941 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5942
3898a563 5943 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
7023f3ea 5944
3898a563
FG
5945 PVE::QemuServer::Memory::hugepages_mount();
5946 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
7023f3ea 5947
503308ed 5948 eval { $run_qemu->() };
3898a563 5949 if (my $err = $@) {
f36e9894
SR
5950 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5951 if !$conf->{keephugepages};
3898a563
FG
5952 die $err;
5953 }
77cde36b 5954
f36e9894
SR
5955 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5956 if !$conf->{keephugepages};
3898a563
FG
5957 };
5958 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
1e3baf05 5959
3898a563
FG
5960 } else {
5961 eval { $run_qemu->() };
5962 }
afdb31d5 5963
3898a563
FG
5964 if (my $err = $@) {
5965 # deactivate volumes if start fails
5966 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
1b189121
DC
5967 warn $@ if $@;
5968 eval { cleanup_pci_devices($vmid, $conf) };
5969 warn $@ if $@;
1fb1822e 5970
3898a563
FG
5971 die "start failed: $err";
5972 }
62de2cbd 5973
1fb1822e
DC
5974 # re-reserve all PCI IDs now that we can know the actual VM PID
5975 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
9b71c34d 5976 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
1fb1822e
DC
5977 warn $@ if $@;
5978
05b2a4ae
FG
5979 if (defined($res->{migrate})) {
5980 print "migration listens on $res->{migrate}->{uri}\n";
5981 } elsif ($statefile) {
3898a563
FG
5982 eval { mon_cmd($vmid, "cont"); };
5983 warn $@ if $@;
5984 }
2189246c 5985
3898a563 5986 #start nbd server for storage migration
13cfe3b7 5987 if (my $nbd = $migrate_opts->{nbd}) {
3898a563 5988 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
2189246c 5989
3898a563
FG
5990 my $migrate_storage_uri;
5991 # nbd_protocol_version > 0 for unix socket support
eef93bc5 5992 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
3898a563
FG
5993 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5994 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5995 $migrate_storage_uri = "nbd:unix:$socket_path";
05b2a4ae 5996 $res->{migrate}->{unix_sockets} = [$socket_path];
3898a563
FG
5997 } else {
5998 my $nodename = nodename();
5999 my $localip = $get_migration_ip->($nodename);
6000 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6001 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6002
4df98f2f
TL
6003 mon_cmd($vmid, "nbd-server-start", addr => {
6004 type => 'inet',
6005 data => {
6006 host => "${localip}",
6007 port => "${storage_migrate_port}",
6008 },
6009 });
3898a563
FG
6010 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6011 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
2189246c
AD
6012 }
6013
83f04be3
FE
6014 my $block_info = mon_cmd($vmid, "query-block");
6015 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6016
13cfe3b7 6017 foreach my $opt (sort keys %$nbd) {
ba5396b5
FG
6018 my $drivestr = $nbd->{$opt}->{drivestr};
6019 my $volid = $nbd->{$opt}->{volid};
83f04be3
FE
6020
6021 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6022
6023 mon_cmd(
6024 $vmid,
6025 "block-export-add",
6026 id => "drive-$opt",
6027 'node-name' => $block_node,
6028 writable => JSON::true,
6029 type => "nbd",
6030 name => "drive-$opt", # NBD export name
6031 );
6032
84da8217
FG
6033 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6034 print "storage migration listens on $nbd_uri volume:$drivestr\n";
ba5396b5
FG
6035 print "re-using replicated volume: $opt - $volid\n"
6036 if $nbd->{$opt}->{replicated};
84da8217
FG
6037
6038 $res->{drives}->{$opt} = $nbd->{$opt};
6039 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
3898a563
FG
6040 }
6041 }
a89fded1 6042
3898a563
FG
6043 if ($migratedfrom) {
6044 eval {
6045 set_migration_caps($vmid);
6046 };
6047 warn $@ if $@;
6048
6049 if ($spice_port) {
6050 print "spice listens on port $spice_port\n";
84da8217 6051 $res->{spice_port} = $spice_port;
3898a563 6052 if ($migrate_opts->{spice_ticket}) {
4df98f2f
TL
6053 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6054 $migrate_opts->{spice_ticket});
3898a563 6055 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
95a4b4a9 6056 }
3898a563 6057 }
95a4b4a9 6058
3898a563
FG
6059 } else {
6060 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6061 if !$statefile && $conf->{balloon};
25088687 6062
3898a563
FG
6063 foreach my $opt (keys %$conf) {
6064 next if $opt !~ m/^net\d+$/;
6065 my $nicconf = parse_net($conf->{$opt});
6066 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
e18b0b99 6067 }
21947fea 6068 add_nets_bridge_fdb($conf, $vmid);
3898a563 6069 }
a1b7d579 6070
4044ae1f 6071 if (!defined($conf->{balloon}) || $conf->{balloon}) {
cb64a643
FE
6072 eval {
6073 mon_cmd(
6074 $vmid,
6075 'qom-set',
6076 path => "machine/peripheral/balloon0",
6077 property => "guest-stats-polling-interval",
6078 value => 2
6079 );
6080 };
6081 log_warn("could not set polling interval for ballooning - $@") if $@;
4044ae1f 6082 }
eb065317 6083
3898a563
FG
6084 if ($resume) {
6085 print "Resumed VM, removing state\n";
6086 if (my $vmstate = $conf->{vmstate}) {
6087 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6088 PVE::Storage::vdisk_free($storecfg, $vmstate);
7ceade4c 6089 }
ea1c2110 6090 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
3898a563
FG
6091 PVE::QemuConfig->write_config($vmid, $conf);
6092 }
7ceade4c 6093
3898a563 6094 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
84da8217
FG
6095
6096 return $res;
1e3baf05
DM
6097}
6098
1e3baf05 6099sub vm_commandline {
b14477e7 6100 my ($storecfg, $vmid, $snapname) = @_;
1e3baf05 6101
ffda963f 6102 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6103
e8a26810 6104 my ($forcemachine, $forcecpu);
b14477e7
RV
6105 if ($snapname) {
6106 my $snapshot = $conf->{snapshots}->{$snapname};
87d92707
TL
6107 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6108
ea1c2110
SR
6109 # check for machine or CPU overrides in snapshot
6110 $forcemachine = $snapshot->{runningmachine};
6111 $forcecpu = $snapshot->{runningcpu};
092868c4 6112
87d92707 6113 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
b14477e7 6114
b14477e7
RV
6115 $conf = $snapshot;
6116 }
6117
1e3baf05
DM
6118 my $defaults = load_defaults();
6119
e8a26810 6120 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
1e3baf05 6121
5930c1ff 6122 return PVE::Tools::cmd2string($cmd);
1e3baf05
DM
6123}
6124
6125sub vm_reset {
6126 my ($vmid, $skiplock) = @_;
6127
ffda963f 6128 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6129
ffda963f 6130 my $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6131
ffda963f 6132 PVE::QemuConfig->check_lock($conf) if !$skiplock;
1e3baf05 6133
0a13e08e 6134 mon_cmd($vmid, "system_reset");
ff1a2432
DM
6135 });
6136}
6137
6138sub get_vm_volumes {
6139 my ($conf) = @_;
1e3baf05 6140
ff1a2432 6141 my $vollist = [];
d5769dc2 6142 foreach_volid($conf, sub {
392f8b5d 6143 my ($volid, $attr) = @_;
ff1a2432 6144
d5769dc2 6145 return if $volid =~ m|^/|;
ff1a2432 6146
d5769dc2
DM
6147 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6148 return if !$sid;
ff1a2432
DM
6149
6150 push @$vollist, $volid;
1e3baf05 6151 });
ff1a2432
DM
6152
6153 return $vollist;
6154}
6155
1b189121
DC
6156sub cleanup_pci_devices {
6157 my ($vmid, $conf) = @_;
6158
6159 foreach my $key (keys %$conf) {
6160 next if $key !~ m/^hostpci(\d+)$/;
6161 my $hostpciindex = $1;
6162 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6163 my $d = parse_hostpci($conf->{$key});
faf72d6c
TL
6164 if ($d->{mdev}) {
6165 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6166 # don't want to break ABI just for this two liner
6167 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
49c51a60
DC
6168
6169 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6170 # out when we do it first. so wait for 10 seconds and then try it
9b71c34d 6171 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
49c51a60
DC
6172 sleep 10;
6173 }
6174
faf72d6c
TL
6175 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6176 }
1b189121
DC
6177 }
6178 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6179}
6180
ff1a2432 6181sub vm_stop_cleanup {
70b04821 6182 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
ff1a2432 6183
745fed70 6184 eval {
ff1a2432 6185
254575e9
DM
6186 if (!$keepActive) {
6187 my $vollist = get_vm_volumes($conf);
6188 PVE::Storage::deactivate_volumes($storecfg, $vollist);
f9dde219
SR
6189
6190 if (my $tpmdrive = $conf->{tpmstate0}) {
6191 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6192 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6193 if ($storeid) {
6194 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6195 }
6196 }
254575e9 6197 }
a1b7d579 6198
ab6a046f 6199 foreach my $ext (qw(mon qmp pid vnc qga)) {
961bfcb2
DM
6200 unlink "/var/run/qemu-server/${vmid}.$ext";
6201 }
a1b7d579 6202
6dbcb073 6203 if ($conf->{ivshmem}) {
4df98f2f 6204 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4c5a6a24
TL
6205 # just delete it for now, VMs which have this already open do not
6206 # are affected, but new VMs will get a separated one. If this
6207 # becomes an issue we either add some sort of ref-counting or just
6208 # add a "don't delete on stop" flag to the ivshmem format.
6dbcb073
DC
6209 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6210 }
6211
1b189121 6212 cleanup_pci_devices($vmid, $conf);
6ab45bd7 6213
70b04821 6214 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
745fed70
DM
6215 };
6216 warn $@ if $@; # avoid errors - just warn
1e3baf05
DM
6217}
6218
575d19da
DC
6219# call only in locked context
6220sub _do_vm_stop {
6221 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
9269013a 6222
575d19da
DC
6223 my $pid = check_running($vmid, $nocheck);
6224 return if !$pid;
1e3baf05 6225
575d19da
DC
6226 my $conf;
6227 if (!$nocheck) {
6228 $conf = PVE::QemuConfig->load_config($vmid);
6229 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6230 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6231 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6232 $timeout = $opts->{down} if $opts->{down};
e6c3b671 6233 }
575d19da
DC
6234 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6235 }
19672434 6236
575d19da
DC
6237 eval {
6238 if ($shutdown) {
a2af1bbe 6239 if (defined($conf) && get_qga_key($conf, 'enabled')) {
0a13e08e 6240 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
9269013a 6241 } else {
0a13e08e 6242 mon_cmd($vmid, "system_powerdown");
1e3baf05
DM
6243 }
6244 } else {
0a13e08e 6245 mon_cmd($vmid, "quit");
1e3baf05 6246 }
575d19da
DC
6247 };
6248 my $err = $@;
1e3baf05 6249
575d19da
DC
6250 if (!$err) {
6251 $timeout = 60 if !defined($timeout);
1e3baf05
DM
6252
6253 my $count = 0;
e6c3b671 6254 while (($count < $timeout) && check_running($vmid, $nocheck)) {
1e3baf05
DM
6255 $count++;
6256 sleep 1;
6257 }
6258
6259 if ($count >= $timeout) {
575d19da
DC
6260 if ($force) {
6261 warn "VM still running - terminating now with SIGTERM\n";
6262 kill 15, $pid;
6263 } else {
6264 die "VM quit/powerdown failed - got timeout\n";
6265 }
6266 } else {
6267 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6268 return;
1e3baf05 6269 }
575d19da 6270 } else {
d60cbb97
TL
6271 if (!check_running($vmid, $nocheck)) {
6272 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6273 return;
6274 }
6275 if ($force) {
575d19da
DC
6276 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6277 kill 15, $pid;
6278 } else {
6279 die "VM quit/powerdown failed\n";
6280 }
6281 }
6282
6283 # wait again
6284 $timeout = 10;
6285
6286 my $count = 0;
6287 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6288 $count++;
6289 sleep 1;
6290 }
6291
6292 if ($count >= $timeout) {
6293 warn "VM still running - terminating now with SIGKILL\n";
6294 kill 9, $pid;
6295 sleep 1;
6296 }
1e3baf05 6297
575d19da
DC
6298 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6299}
6300
6301# Note: use $nocheck to skip tests if VM configuration file exists.
6302# We need that when migration VMs to other nodes (files already moved)
6303# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6304sub vm_stop {
6305 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6306
6307 $force = 1 if !defined($force) && !$shutdown;
6308
6309 if ($migratedfrom){
6310 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6311 kill 15, $pid if $pid;
6312 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6313 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6314 return;
6315 }
6316
6317 PVE::QemuConfig->lock_config($vmid, sub {
6318 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
ff1a2432 6319 });
1e3baf05
DM
6320}
6321
165411f0
DC
6322sub vm_reboot {
6323 my ($vmid, $timeout) = @_;
6324
6325 PVE::QemuConfig->lock_config($vmid, sub {
66026117 6326 eval {
165411f0 6327
66026117
OB
6328 # only reboot if running, as qmeventd starts it again on a stop event
6329 return if !check_running($vmid);
165411f0 6330
66026117 6331 create_reboot_request($vmid);
165411f0 6332
66026117
OB
6333 my $storecfg = PVE::Storage::config();
6334 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
165411f0 6335
66026117
OB
6336 };
6337 if (my $err = $@) {
3c1c3fe6 6338 # avoid that the next normal shutdown will be confused for a reboot
66026117
OB
6339 clear_reboot_request($vmid);
6340 die $err;
6341 }
165411f0
DC
6342 });
6343}
6344
75c24bba 6345# note: if using the statestorage parameter, the caller has to check privileges
1e3baf05 6346sub vm_suspend {
48b4cdc2 6347 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
159719e5
DC
6348
6349 my $conf;
6350 my $path;
6351 my $storecfg;
6352 my $vmstate;
1e3baf05 6353
ffda963f 6354 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6355
159719e5 6356 $conf = PVE::QemuConfig->load_config($vmid);
1e3baf05 6357
159719e5 6358 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
e79706d4 6359 PVE::QemuConfig->check_lock($conf)
159719e5
DC
6360 if !($skiplock || $is_backing_up);
6361
6362 die "cannot suspend to disk during backup\n"
6363 if $is_backing_up && $includestate;
bcb7c9cf 6364
159719e5
DC
6365 if ($includestate) {
6366 $conf->{lock} = 'suspending';
6367 my $date = strftime("%Y-%m-%d", localtime(time()));
6368 $storecfg = PVE::Storage::config();
75c24bba
DC
6369 if (!$statestorage) {
6370 $statestorage = find_vmstate_storage($conf, $storecfg);
6371 # check permissions for the storage
6372 my $rpcenv = PVE::RPCEnvironment::get();
6373 if ($rpcenv->{type} ne 'cli') {
6374 my $authuser = $rpcenv->get_user();
6375 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6376 }
6377 }
6378
6379
4df98f2f
TL
6380 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6381 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
159719e5
DC
6382 $path = PVE::Storage::path($storecfg, $vmstate);
6383 PVE::QemuConfig->write_config($vmid, $conf);
6384 } else {
0a13e08e 6385 mon_cmd($vmid, "stop");
159719e5 6386 }
1e3baf05 6387 });
159719e5
DC
6388
6389 if ($includestate) {
6390 # save vm state
6391 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6392
6393 eval {
27a5be53 6394 set_migration_caps($vmid, 1);
0a13e08e 6395 mon_cmd($vmid, "savevm-start", statefile => $path);
159719e5 6396 for(;;) {
0a13e08e 6397 my $state = mon_cmd($vmid, "query-savevm");
159719e5
DC
6398 if (!$state->{status}) {
6399 die "savevm not active\n";
6400 } elsif ($state->{status} eq 'active') {
6401 sleep(1);
6402 next;
6403 } elsif ($state->{status} eq 'completed') {
b0a9a385 6404 print "State saved, quitting\n";
159719e5
DC
6405 last;
6406 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6407 die "query-savevm failed with error '$state->{error}'\n"
6408 } else {
6409 die "query-savevm returned status '$state->{status}'\n";
6410 }
6411 }
6412 };
6413 my $err = $@;
6414
6415 PVE::QemuConfig->lock_config($vmid, sub {
6416 $conf = PVE::QemuConfig->load_config($vmid);
6417 if ($err) {
6418 # cleanup, but leave suspending lock, to indicate something went wrong
6419 eval {
0a13e08e 6420 mon_cmd($vmid, "savevm-end");
159719e5
DC
6421 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6422 PVE::Storage::vdisk_free($storecfg, $vmstate);
ea1c2110 6423 delete $conf->@{qw(vmstate runningmachine runningcpu)};
159719e5
DC
6424 PVE::QemuConfig->write_config($vmid, $conf);
6425 };
6426 warn $@ if $@;
6427 die $err;
6428 }
6429
6430 die "lock changed unexpectedly\n"
6431 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6432
0a13e08e 6433 mon_cmd($vmid, "quit");
159719e5
DC
6434 $conf->{lock} = 'suspended';
6435 PVE::QemuConfig->write_config($vmid, $conf);
6436 });
6437 }
1e3baf05
DM
6438}
6439
a20dc58a
FG
6440# $nocheck is set when called as part of a migration - in this context the
6441# location of the config file (source or target node) is not deterministic,
6442# since migration cannot wait for pmxcfs to process the rename
1e3baf05 6443sub vm_resume {
289e0b85 6444 my ($vmid, $skiplock, $nocheck) = @_;
1e3baf05 6445
ffda963f 6446 PVE::QemuConfig->lock_config($vmid, sub {
0a13e08e 6447 my $res = mon_cmd($vmid, 'query-status');
c2786bed 6448 my $resume_cmd = 'cont';
8e0c97bb 6449 my $reset = 0;
270bfff2
FG
6450 my $conf;
6451 if ($nocheck) {
ad9e347c
FG
6452 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6453 if ($@) {
6454 my $vmlist = PVE::Cluster::get_vmlist();
6455 if (exists($vmlist->{ids}->{$vmid})) {
6456 my $node = $vmlist->{ids}->{$vmid}->{node};
6457 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6458 }
6459 if (!$conf) {
6460 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6461 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6462 }
270bfff2 6463 }
270bfff2
FG
6464 } else {
6465 $conf = PVE::QemuConfig->load_config($vmid);
6466 }
c2786bed 6467
8e0c97bb
SR
6468 if ($res->{status}) {
6469 return if $res->{status} eq 'running'; # job done, go home
6470 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6471 $reset = 1 if $res->{status} eq 'shutdown';
c2786bed
DC
6472 }
6473
289e0b85 6474 if (!$nocheck) {
e79706d4
FG
6475 PVE::QemuConfig->check_lock($conf)
6476 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
289e0b85 6477 }
3e24733b 6478
8e0c97bb
SR
6479 if ($reset) {
6480 # required if a VM shuts down during a backup and we get a resume
6481 # request before the backup finishes for example
6482 mon_cmd($vmid, "system_reset");
6483 }
21947fea
AD
6484
6485 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6486
0a13e08e 6487 mon_cmd($vmid, $resume_cmd);
1e3baf05
DM
6488 });
6489}
6490
5fdbe4f0
DM
6491sub vm_sendkey {
6492 my ($vmid, $skiplock, $key) = @_;
1e3baf05 6493
ffda963f 6494 PVE::QemuConfig->lock_config($vmid, sub {
1e3baf05 6495
ffda963f 6496 my $conf = PVE::QemuConfig->load_config($vmid);
f5eb281a 6497
7b7c6d1b 6498 # there is no qmp command, so we use the human monitor command
0a13e08e 6499 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
d30820d6 6500 die $res if $res ne '';
1e3baf05
DM
6501 });
6502}
6503
d6deb7f6
TL
6504sub check_bridge_access {
6505 my ($rpcenv, $authuser, $conf) = @_;
6506
6507 return 1 if $authuser eq 'root@pam';
6508
6509 for my $opt (sort keys $conf->%*) {
6510 next if $opt !~ m/^net\d+$/;
6511 my $net = parse_net($conf->{$opt});
6512 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6513 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6514 }
6515 return 1;
6516};
6517
e3971865
DC
6518sub check_mapping_access {
6519 my ($rpcenv, $user, $conf) = @_;
6520
6521 for my $opt (keys $conf->%*) {
6522 if ($opt =~ m/^usb\d+$/) {
6523 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6524 if (my $host = $device->{host}) {
6525 die "only root can set '$opt' config for real devices\n"
6526 if $host !~ m/^spice$/i && $user ne 'root@pam';
6527 } elsif ($device->{mapping}) {
6528 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6529 } else {
6530 die "either 'host' or 'mapping' must be set.\n";
6531 }
9b71c34d
DC
6532 } elsif ($opt =~ m/^hostpci\d+$/) {
6533 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6534 if ($device->{host}) {
6535 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6536 } elsif ($device->{mapping}) {
6537 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6538 } else {
6539 die "either 'host' or 'mapping' must be set.\n";
6540 }
e3971865
DC
6541 }
6542 }
6543};
6544
e3971865
DC
6545sub check_restore_permissions {
6546 my ($rpcenv, $user, $conf) = @_;
621edb2b 6547
e3971865
DC
6548 check_bridge_access($rpcenv, $user, $conf);
6549 check_mapping_access($rpcenv, $user, $conf);
6550}
3e16d5fc
DM
6551# vzdump restore implementaion
6552
ed221350 6553sub tar_archive_read_firstfile {
3e16d5fc 6554 my $archive = shift;
afdb31d5 6555
3e16d5fc
DM
6556 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6557
6558 # try to detect archive type first
387ba257 6559 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
3e16d5fc 6560 die "unable to open file '$archive'\n";
387ba257 6561 my $firstfile = <$fh>;
3e16d5fc 6562 kill 15, $pid;
387ba257 6563 close $fh;
3e16d5fc
DM
6564
6565 die "ERROR: archive contaions no data\n" if !$firstfile;
6566 chomp $firstfile;
6567
6568 return $firstfile;
6569}
6570
ed221350
DM
6571sub tar_restore_cleanup {
6572 my ($storecfg, $statfile) = @_;
3e16d5fc
DM
6573
6574 print STDERR "starting cleanup\n";
6575
6576 if (my $fd = IO::File->new($statfile, "r")) {
6577 while (defined(my $line = <$fd>)) {
6578 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6579 my $volid = $2;
6580 eval {
6581 if ($volid =~ m|^/|) {
6582 unlink $volid || die 'unlink failed\n';
6583 } else {
ed221350 6584 PVE::Storage::vdisk_free($storecfg, $volid);
3e16d5fc 6585 }
afdb31d5 6586 print STDERR "temporary volume '$volid' sucessfuly removed\n";
3e16d5fc
DM
6587 };
6588 print STDERR "unable to cleanup '$volid' - $@" if $@;
6589 } else {
6590 print STDERR "unable to parse line in statfile - $line";
afdb31d5 6591 }
3e16d5fc
DM
6592 }
6593 $fd->close();
6594 }
6595}
6596
d1e92cf6 6597sub restore_file_archive {
a0d1b1a2 6598 my ($archive, $vmid, $user, $opts) = @_;
3e16d5fc 6599
a2ec5a67
FG
6600 return restore_vma_archive($archive, $vmid, $user, $opts)
6601 if $archive eq '-';
6602
c6d51783
AA
6603 my $info = PVE::Storage::archive_info($archive);
6604 my $format = $opts->{format} // $info->{format};
6605 my $comp = $info->{compression};
91bd6c90
DM
6606
6607 # try to detect archive format
6608 if ($format eq 'tar') {
6609 return restore_tar_archive($archive, $vmid, $user, $opts);
6610 } else {
6611 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6612 }
6613}
6614
d1e92cf6
DM
6615# hepler to remove disks that will not be used after restore
6616my $restore_cleanup_oldconf = sub {
6617 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6618
01a4377f
FE
6619 my $kept_disks = {};
6620
912792e2 6621 PVE::QemuConfig->foreach_volume($oldconf, sub {
d1e92cf6
DM
6622 my ($ds, $drive) = @_;
6623
6624 return if drive_is_cdrom($drive, 1);
6625
6626 my $volid = $drive->{file};
6627 return if !$volid || $volid =~ m|^/|;
6628
6629 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6630 return if !$path || !$owner || ($owner != $vmid);
6631
6632 # Note: only delete disk we want to restore
6633 # other volumes will become unused
6634 if ($virtdev_hash->{$ds}) {
6635 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6636 if (my $err = $@) {
6637 warn $err;
6638 }
01a4377f
FE
6639 } else {
6640 $kept_disks->{$volid} = 1;
d1e92cf6
DM
6641 }
6642 });
6643
01a4377f
FE
6644 # after the restore we have no snapshots anymore
6645 for my $snapname (keys $oldconf->{snapshots}->%*) {
d1e92cf6
DM
6646 my $snap = $oldconf->{snapshots}->{$snapname};
6647 if ($snap->{vmstate}) {
6648 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6649 if (my $err = $@) {
6650 warn $err;
6651 }
6652 }
01a4377f
FE
6653
6654 for my $volid (keys $kept_disks->%*) {
6655 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6656 warn $@ if $@;
6657 }
d1e92cf6
DM
6658 }
6659};
6660
9f3d73bc
DM
6661# Helper to parse vzdump backup device hints
6662#
6663# $rpcenv: Environment, used to ckeck storage permissions
6664# $user: User ID, to check storage permissions
6665# $storecfg: Storage configuration
6666# $fh: the file handle for reading the configuration
6667# $devinfo: should contain device sizes for all backu-up'ed devices
6668# $options: backup options (pool, default storage)
6669#
6670# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6671my $parse_backup_hints = sub {
6672 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
d1e92cf6 6673
36d4bdcb
TL
6674 my $check_storage = sub { # assert if an image can be allocate
6675 my ($storeid, $scfg) = @_;
6676 die "Content type 'images' is not available on storage '$storeid'\n"
6677 if !$scfg->{content}->{images};
6678 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6679 if $user ne 'root@pam';
6680 };
d1e92cf6 6681
36d4bdcb 6682 my $virtdev_hash = {};
9f3d73bc
DM
6683 while (defined(my $line = <$fh>)) {
6684 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6685 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6686 die "archive does not contain data for drive '$virtdev'\n"
6687 if !$devinfo->{$devname};
6688
6689 if (defined($options->{storage})) {
6690 $storeid = $options->{storage} || 'local';
6691 } elsif (!$storeid) {
6692 $storeid = 'local';
d1e92cf6 6693 }
9f3d73bc
DM
6694 $format = 'raw' if !$format;
6695 $devinfo->{$devname}->{devname} = $devname;
6696 $devinfo->{$devname}->{virtdev} = $virtdev;
6697 $devinfo->{$devname}->{format} = $format;
6698 $devinfo->{$devname}->{storeid} = $storeid;
6699
62af60cd 6700 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
36d4bdcb 6701 $check_storage->($storeid, $scfg); # permission and content type check
d1e92cf6 6702
9f3d73bc
DM
6703 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6704 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6705 my $virtdev = $1;
6706 my $drive = parse_drive($virtdev, $2);
36d4bdcb 6707
9f3d73bc
DM
6708 if (drive_is_cloudinit($drive)) {
6709 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
5364990d
TL
6710 $storeid = $options->{storage} if defined ($options->{storage});
6711 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
9f3d73bc 6712 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
d1e92cf6 6713
36d4bdcb 6714 $check_storage->($storeid, $scfg); # permission and content type check
9f8ba326 6715
9f3d73bc
DM
6716 $virtdev_hash->{$virtdev} = {
6717 format => $format,
5364990d 6718 storeid => $storeid,
9f3d73bc
DM
6719 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6720 is_cloudinit => 1,
6721 };
d1e92cf6 6722 }
9f3d73bc
DM
6723 }
6724 }
d1e92cf6 6725
9f3d73bc
DM
6726 return $virtdev_hash;
6727};
d1e92cf6 6728
9f3d73bc
DM
6729# Helper to allocate and activate all volumes required for a restore
6730#
6731# $storecfg: Storage configuration
6732# $virtdev_hash: as returned by parse_backup_hints()
6733#
6734# Returns: { $virtdev => $volid }
6735my $restore_allocate_devices = sub {
6736 my ($storecfg, $virtdev_hash, $vmid) = @_;
d1e92cf6 6737
9f3d73bc
DM
6738 my $map = {};
6739 foreach my $virtdev (sort keys %$virtdev_hash) {
6740 my $d = $virtdev_hash->{$virtdev};
6741 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6742 my $storeid = $d->{storeid};
6743 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
d1e92cf6 6744
9f3d73bc
DM
6745 # test if requested format is supported
6746 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6747 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6748 $d->{format} = $defFormat if !$supported;
d1e92cf6 6749
9f3d73bc
DM
6750 my $name;
6751 if ($d->{is_cloudinit}) {
6752 $name = "vm-$vmid-cloudinit";
c997e24a
ML
6753 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6754 if ($scfg->{path}) {
6755 $name .= ".$d->{format}";
6756 }
d1e92cf6
DM
6757 }
6758
4df98f2f
TL
6759 my $volid = PVE::Storage::vdisk_alloc(
6760 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
d1e92cf6 6761
9f3d73bc
DM
6762 print STDERR "new volume ID is '$volid'\n";
6763 $d->{volid} = $volid;
d1e92cf6 6764
9f3d73bc 6765 PVE::Storage::activate_volumes($storecfg, [$volid]);
d1e92cf6 6766
9f3d73bc 6767 $map->{$virtdev} = $volid;
d1e92cf6
DM
6768 }
6769
9f3d73bc
DM
6770 return $map;
6771};
d1e92cf6 6772
c62d7cf5 6773sub restore_update_config_line {
eabac302 6774 my ($cookie, $map, $line, $unique) = @_;
91bd6c90 6775
98a4b3fb
FE
6776 return '' if $line =~ m/^\#qmdump\#/;
6777 return '' if $line =~ m/^\#vzdump\#/;
6778 return '' if $line =~ m/^lock:/;
6779 return '' if $line =~ m/^unused\d+:/;
6780 return '' if $line =~ m/^parent:/;
6781
6782 my $res = '';
91bd6c90 6783
b5b99790 6784 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
91bd6c90
DM
6785 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6786 # try to convert old 1.X settings
6787 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6788 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6789 my ($model, $macaddr) = split(/\=/, $devconfig);
b5b99790 6790 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
91bd6c90
DM
6791 my $net = {
6792 model => $model,
6793 bridge => "vmbr$ind",
6794 macaddr => $macaddr,
6795 };
6796 my $netstr = print_net($net);
6797
98a4b3fb 6798 $res .= "net$cookie->{netcount}: $netstr\n";
91bd6c90
DM
6799 $cookie->{netcount}++;
6800 }
6801 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6802 my ($id, $netstr) = ($1, $2);
6803 my $net = parse_net($netstr);
b5b99790 6804 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
91bd6c90 6805 $netstr = print_net($net);
98a4b3fb 6806 $res .= "$id: $netstr\n";
f9dde219 6807 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
91bd6c90 6808 my $virtdev = $1;
907ea891 6809 my $value = $3;
d9faf790
WB
6810 my $di = parse_drive($virtdev, $value);
6811 if (defined($di->{backup}) && !$di->{backup}) {
98a4b3fb 6812 $res .= "#$line";
c0f7406e 6813 } elsif ($map->{$virtdev}) {
8fd57431 6814 delete $di->{format}; # format can change on restore
91bd6c90 6815 $di->{file} = $map->{$virtdev};
71c58bb7 6816 $value = print_drive($di);
98a4b3fb 6817 $res .= "$virtdev: $value\n";
91bd6c90 6818 } else {
98a4b3fb 6819 $res .= $line;
91bd6c90 6820 }
1a0c2f03 6821 } elsif (($line =~ m/^vmgenid: (.*)/)) {
babecffe 6822 my $vmgenid = $1;
6ee499ff 6823 if ($vmgenid ne '0') {
1a0c2f03 6824 # always generate a new vmgenid if there was a valid one setup
6ee499ff
DC
6825 $vmgenid = generate_uuid();
6826 }
98a4b3fb 6827 $res .= "vmgenid: $vmgenid\n";
19a5dd55
WL
6828 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6829 my ($uuid, $uuid_str);
6830 UUID::generate($uuid);
6831 UUID::unparse($uuid, $uuid_str);
6832 my $smbios1 = parse_smbios1($2);
6833 $smbios1->{uuid} = $uuid_str;
98a4b3fb 6834 $res .= $1.print_smbios1($smbios1)."\n";
91bd6c90 6835 } else {
98a4b3fb 6836 $res .= $line;
91bd6c90 6837 }
98a4b3fb
FE
6838
6839 return $res;
c62d7cf5 6840}
9f3d73bc
DM
6841
6842my $restore_deactivate_volumes = sub {
e8b07b29 6843 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc
DM
6844
6845 my $vollist = [];
e8b07b29
FE
6846 for my $dev (values $virtdev_hash->%*) {
6847 push $vollist->@*, $dev->{volid} if $dev->{volid};
9f3d73bc
DM
6848 }
6849
ff86112c
FE
6850 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6851 print STDERR $@ if $@;
9f3d73bc
DM
6852};
6853
6854my $restore_destroy_volumes = sub {
e8b07b29 6855 my ($storecfg, $virtdev_hash) = @_;
9f3d73bc 6856
e8b07b29
FE
6857 for my $dev (values $virtdev_hash->%*) {
6858 my $volid = $dev->{volid} or next;
9f3d73bc 6859 eval {
e60afe82 6860 PVE::Storage::vdisk_free($storecfg, $volid);
9f3d73bc
DM
6861 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6862 };
6863 print STDERR "unable to cleanup '$volid' - $@" if $@;
6864 }
6865};
91bd6c90 6866
621edb2b 6867sub restore_merge_config {
202a2a0b
FE
6868 my ($filename, $backup_conf_raw, $override_conf) = @_;
6869
6870 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6871 for my $key (keys $override_conf->%*) {
6872 $backup_conf->{$key} = $override_conf->{$key};
6873 }
6874
6875 return $backup_conf;
621edb2b 6876}
202a2a0b 6877
91bd6c90 6878sub scan_volids {
9a8ba127 6879 my ($cfg, $vmid) = @_;
91bd6c90 6880
9a8ba127 6881 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
91bd6c90
DM
6882
6883 my $volid_hash = {};
6884 foreach my $storeid (keys %$info) {
6885 foreach my $item (@{$info->{$storeid}}) {
6886 next if !($item->{volid} && $item->{size});
5996a936 6887 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
91bd6c90
DM
6888 $volid_hash->{$item->{volid}} = $item;
6889 }
6890 }
6891
6892 return $volid_hash;
6893}
6894
68b108ee 6895sub update_disk_config {
91bd6c90 6896 my ($vmid, $conf, $volid_hash) = @_;
be190583 6897
91bd6c90 6898 my $changes;
9b29cbd0 6899 my $prefix = "VM $vmid";
91bd6c90 6900
c449137a
DC
6901 # used and unused disks
6902 my $referenced = {};
91bd6c90 6903
5996a936
DM
6904 # Note: it is allowed to define multiple storages with same path (alias), so
6905 # we need to check both 'volid' and real 'path' (two different volid can point
6906 # to the same path).
6907
c449137a 6908 my $referencedpath = {};
be190583 6909
91bd6c90 6910 # update size info
0c4fef3f 6911 PVE::QemuConfig->foreach_volume($conf, sub {
ca04977c 6912 my ($opt, $drive) = @_;
91bd6c90 6913
ca04977c
FE
6914 my $volid = $drive->{file};
6915 return if !$volid;
4df98f2f 6916 my $volume = $volid_hash->{$volid};
91bd6c90 6917
ca04977c
FE
6918 # mark volid as "in-use" for next step
6919 $referenced->{$volid} = 1;
4df98f2f 6920 if ($volume && (my $path = $volume->{path})) {
ca04977c 6921 $referencedpath->{$path} = 1;
91bd6c90 6922 }
ca04977c
FE
6923
6924 return if drive_is_cdrom($drive);
4df98f2f 6925 return if !$volume;
ca04977c 6926
4df98f2f 6927 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
ca04977c
FE
6928 if (defined($updated)) {
6929 $changes = 1;
6930 $conf->{$opt} = print_drive($updated);
9b29cbd0 6931 print "$prefix ($opt): $msg\n";
ca04977c
FE
6932 }
6933 });
91bd6c90 6934
5996a936 6935 # remove 'unusedX' entry if volume is used
ca04977c
FE
6936 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6937 my ($opt, $drive) = @_;
6938
6939 my $volid = $drive->{file};
6940 return if !$volid;
6941
f7d1505b
TL
6942 my $path;
6943 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
c449137a 6944 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
68b108ee 6945 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
5996a936
DM
6946 $changes = 1;
6947 delete $conf->{$opt};
6948 }
c449137a
DC
6949
6950 $referenced->{$volid} = 1;
6951 $referencedpath->{$path} = 1 if $path;
ca04977c 6952 });
5996a936 6953
91bd6c90
DM
6954 foreach my $volid (sort keys %$volid_hash) {
6955 next if $volid =~ m/vm-$vmid-state-/;
c449137a 6956 next if $referenced->{$volid};
5996a936
DM
6957 my $path = $volid_hash->{$volid}->{path};
6958 next if !$path; # just to be sure
c449137a 6959 next if $referencedpath->{$path};
91bd6c90 6960 $changes = 1;
53b81297 6961 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
68b108ee 6962 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
c449137a 6963 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
91bd6c90
DM
6964 }
6965
6966 return $changes;
6967}
6968
6969sub rescan {
9224dcee 6970 my ($vmid, $nolock, $dryrun) = @_;
91bd6c90 6971
20519efc 6972 my $cfg = PVE::Storage::config();
91bd6c90 6973
53b81297 6974 print "rescan volumes...\n";
9a8ba127 6975 my $volid_hash = scan_volids($cfg, $vmid);
91bd6c90
DM
6976
6977 my $updatefn = sub {
6978 my ($vmid) = @_;
6979
ffda963f 6980 my $conf = PVE::QemuConfig->load_config($vmid);
be190583 6981
ffda963f 6982 PVE::QemuConfig->check_lock($conf);
91bd6c90 6983
03da3f0d
DM
6984 my $vm_volids = {};
6985 foreach my $volid (keys %$volid_hash) {
6986 my $info = $volid_hash->{$volid};
6987 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6988 }
6989
68b108ee 6990 my $changes = update_disk_config($vmid, $conf, $vm_volids);
91bd6c90 6991
9224dcee 6992 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
91bd6c90
DM
6993 };
6994
6995 if (defined($vmid)) {
6996 if ($nolock) {
6997 &$updatefn($vmid);
6998 } else {
ffda963f 6999 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
91bd6c90
DM
7000 }
7001 } else {
7002 my $vmlist = config_list();
7003 foreach my $vmid (keys %$vmlist) {
7004 if ($nolock) {
7005 &$updatefn($vmid);
7006 } else {
ffda963f 7007 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
be190583 7008 }
91bd6c90
DM
7009 }
7010 }
7011}
7012
9f3d73bc
DM
7013sub restore_proxmox_backup_archive {
7014 my ($archive, $vmid, $user, $options) = @_;
7015
7016 my $storecfg = PVE::Storage::config();
7017
7018 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7019 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7020
9f3d73bc 7021 my $fingerprint = $scfg->{fingerprint};
503e96f8 7022 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
9f3d73bc 7023
fbec3f89 7024 my $repo = PVE::PBSClient::get_repository($scfg);
2dda626d 7025 my $namespace = $scfg->{namespace};
dea4b04c 7026
26731a3c 7027 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
9f3d73bc
DM
7028 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7029 local $ENV{PBS_PASSWORD} = $password;
7030 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7031
7032 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7033 PVE::Storage::parse_volname($storecfg, $archive);
7034
7035 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7036
7037 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7038
7039 my $tmpdir = "/var/tmp/vzdumptmp$$";
7040 rmtree $tmpdir;
7041 mkpath $tmpdir;
7042
7043 my $conffile = PVE::QemuConfig->config_file($vmid);
9f3d73bc
DM
7044 # disable interrupts (always do cleanups)
7045 local $SIG{INT} =
7046 local $SIG{TERM} =
7047 local $SIG{QUIT} =
7048 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7049
7050 # Note: $oldconf is undef if VM does not exists
7051 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7052 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7053 my $new_conf_raw = '';
9f3d73bc
DM
7054
7055 my $rpcenv = PVE::RPCEnvironment::get();
e8b07b29
FE
7056 my $devinfo = {}; # info about drives included in backup
7057 my $virtdev_hash = {}; # info about allocated drives
9f3d73bc
DM
7058
7059 eval {
7060 # enable interrupts
7061 local $SIG{INT} =
7062 local $SIG{TERM} =
7063 local $SIG{QUIT} =
7064 local $SIG{HUP} =
7065 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7066
7067 my $cfgfn = "$tmpdir/qemu-server.conf";
7068 my $firewall_config_fn = "$tmpdir/fw.conf";
7069 my $index_fn = "$tmpdir/index.json";
7070
7071 my $cmd = "restore";
7072
7073 my $param = [$pbs_backup_name, "index.json", $index_fn];
7074 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7075 my $index = PVE::Tools::file_get_contents($index_fn);
7076 $index = decode_json($index);
7077
9f3d73bc
DM
7078 foreach my $info (@{$index->{files}}) {
7079 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7080 my $devname = $1;
7081 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7082 $devinfo->{$devname}->{size} = $1;
7083 } else {
7084 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7085 }
7086 }
7087 }
7088
4df98f2f
TL
7089 my $is_qemu_server_backup = scalar(
7090 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7091 );
9f3d73bc
DM
7092 if (!$is_qemu_server_backup) {
7093 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7094 }
7095 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7096
7097 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7098 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7099
7100 if ($has_firewall_config) {
7101 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7102 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7103
7104 my $pve_firewall_dir = '/etc/pve/firewall';
7105 mkdir $pve_firewall_dir; # make sure the dir exists
7106 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7107 }
7108
7109 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7110 die "unable to read qemu-server.conf - $!\n";
9f3d73bc 7111
e8b07b29 7112 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
9f3d73bc
DM
7113
7114 # fixme: rate limit?
7115
7116 # create empty/temp config
7117 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7118
7119 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7120
7121 # allocate volumes
7122 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7123
6f94e162
SR
7124 foreach my $virtdev (sort keys %$virtdev_hash) {
7125 my $d = $virtdev_hash->{$virtdev};
7126 next if $d->{is_cloudinit}; # no need to restore cloudinit
9f3d73bc 7127
55c7f9cf 7128 # this fails if storage is unavailable
6f94e162 7129 my $volid = $d->{volid};
6f94e162 7130 my $path = PVE::Storage::path($storecfg, $volid);
9f3d73bc 7131
f9dde219
SR
7132 # for live-restore we only want to preload the efidisk and TPM state
7133 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
55c7f9cf 7134
21a9ec2a
WB
7135 my @ns_arg;
7136 if (defined(my $ns = $scfg->{namespace})) {
7137 @ns_arg = ('--ns', $ns);
7138 }
7139
6f94e162
SR
7140 my $pbs_restore_cmd = [
7141 '/usr/bin/pbs-restore',
7142 '--repository', $repo,
21a9ec2a 7143 @ns_arg,
6f94e162
SR
7144 $pbs_backup_name,
7145 "$d->{devname}.img.fidx",
7146 $path,
7147 '--verbose',
7148 ];
55fb78aa 7149
6f94e162
SR
7150 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7151 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
9f3d73bc 7152
6f94e162
SR
7153 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7154 push @$pbs_restore_cmd, '--skip-zero';
26731a3c 7155 }
6f94e162
SR
7156
7157 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7158 print "restore proxmox backup image: $dbg_cmdstring\n";
7159 run_command($pbs_restore_cmd);
9f3d73bc
DM
7160 }
7161
7162 $fh->seek(0, 0) || die "seek failed - $!\n";
7163
9f3d73bc
DM
7164 my $cookie = { netcount => 0 };
7165 while (defined(my $line = <$fh>)) {
c62d7cf5 7166 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7167 $cookie,
98a4b3fb
FE
7168 $map,
7169 $line,
7170 $options->{unique},
7171 );
9f3d73bc
DM
7172 }
7173
7174 $fh->close();
9f3d73bc
DM
7175 };
7176 my $err = $@;
7177
26731a3c 7178 if ($err || !$options->{live}) {
e8b07b29 7179 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
26731a3c 7180 }
9f3d73bc
DM
7181
7182 rmtree $tmpdir;
7183
7184 if ($err) {
e8b07b29 7185 $restore_destroy_volumes->($storecfg, $virtdev_hash);
9f3d73bc
DM
7186 die $err;
7187 }
7188
f7551170
SR
7189 if ($options->{live}) {
7190 # keep lock during live-restore
7191 $new_conf_raw .= "\nlock: create";
7192 }
7193
621edb2b 7194 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
e3971865 7195 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7196 PVE::QemuConfig->write_config($vmid, $new_conf);
9f3d73bc
DM
7197
7198 eval { rescan($vmid, 1); };
7199 warn $@ if $@;
26731a3c
SR
7200
7201 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7202
7203 if ($options->{live}) {
fefd65a1
SR
7204 # enable interrupts
7205 local $SIG{INT} =
7206 local $SIG{TERM} =
7207 local $SIG{QUIT} =
7208 local $SIG{HUP} =
7209 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
26731a3c 7210
fefd65a1
SR
7211 my $conf = PVE::QemuConfig->load_config($vmid);
7212 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
26731a3c 7213
f9dde219
SR
7214 # these special drives are already restored before start
7215 delete $devinfo->{'drive-efidisk0'};
7216 delete $devinfo->{'drive-tpmstate0-backup'};
2dda626d
DC
7217
7218 my $pbs_opts = {
7219 repo => $repo,
7220 keyfile => $keyfile,
7221 snapshot => $pbs_backup_name,
7222 namespace => $namespace,
7223 };
7224 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
f7551170
SR
7225
7226 PVE::QemuConfig->remove_lock($vmid, "create");
26731a3c
SR
7227 }
7228}
7229
7230sub pbs_live_restore {
2dda626d 7231 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
26731a3c 7232
88cabb62 7233 print "starting VM for live-restore\n";
2dda626d 7234 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
26731a3c
SR
7235
7236 my $pbs_backing = {};
8986e36e 7237 for my $ds (keys %$restored_disks) {
26731a3c 7238 $ds =~ m/^drive-(.*)$/;
88cabb62
SR
7239 my $confname = $1;
7240 $pbs_backing->{$confname} = {
2dda626d
DC
7241 repository => $opts->{repo},
7242 snapshot => $opts->{snapshot},
26731a3c
SR
7243 archive => "$ds.img.fidx",
7244 };
2dda626d
DC
7245 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7246 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
88cabb62
SR
7247
7248 my $drive = parse_drive($confname, $conf->{$confname});
7249 print "restoring '$ds' to '$drive->{file}'\n";
26731a3c
SR
7250 }
7251
fd70c843 7252 my $drives_streamed = 0;
26731a3c
SR
7253 eval {
7254 # make sure HA doesn't interrupt our restore by stopping the VM
7255 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
fd70c843 7256 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
26731a3c
SR
7257 }
7258
fd70c843
TL
7259 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7260 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
bfb12678 7261 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
26731a3c 7262
26697640
SR
7263 my $qmeventd_fd = register_qmeventd_handle($vmid);
7264
26731a3c
SR
7265 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7266 # this will effectively collapse the backing image chain consisting of
7267 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7268 # removes itself once all backing images vanish with 'auto-remove=on')
7269 my $jobs = {};
8986e36e 7270 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7271 my $job_id = "restore-$ds";
7272 mon_cmd($vmid, 'block-stream',
7273 'job-id' => $job_id,
7274 device => "$ds",
7275 );
7276 $jobs->{$job_id} = {};
7277 }
7278
7279 mon_cmd($vmid, 'cont');
7280 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7281
a09b39f1
TL
7282 print "restore-drive jobs finished successfully, removing all tracking block devices"
7283 ." to disconnect from Proxmox Backup Server\n";
7284
8986e36e 7285 for my $ds (sort keys %$restored_disks) {
26731a3c
SR
7286 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7287 }
26697640
SR
7288
7289 close($qmeventd_fd);
26731a3c
SR
7290 };
7291
7292 my $err = $@;
7293
7294 if ($err) {
8b8893c3 7295 warn "An error occurred during live-restore: $err\n";
26731a3c
SR
7296 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7297 die "live-restore failed\n";
7298 }
9f3d73bc
DM
7299}
7300
91bd6c90
DM
7301sub restore_vma_archive {
7302 my ($archive, $vmid, $user, $opts, $comp) = @_;
7303
91bd6c90
DM
7304 my $readfrom = $archive;
7305
7c536e11
WB
7306 my $cfg = PVE::Storage::config();
7307 my $commands = [];
7308 my $bwlimit = $opts->{bwlimit};
7309
7310 my $dbg_cmdstring = '';
7311 my $add_pipe = sub {
7312 my ($cmd) = @_;
7313 push @$commands, $cmd;
7314 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7315 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
91bd6c90 7316 $readfrom = '-';
7c536e11
WB
7317 };
7318
7319 my $input = undef;
7320 if ($archive eq '-') {
7321 $input = '<&STDIN';
7322 } else {
7323 # If we use a backup from a PVE defined storage we also consider that
7324 # storage's rate limit:
7325 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7326 if (defined($volid)) {
7327 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7328 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7329 if ($readlimit) {
7330 print STDERR "applying read rate limit: $readlimit\n";
9444c6e4 7331 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7c536e11
WB
7332 $add_pipe->($cstream);
7333 }
7334 }
7335 }
7336
7337 if ($comp) {
c6d51783
AA
7338 my $info = PVE::Storage::decompressor_info('vma', $comp);
7339 my $cmd = $info->{decompressor};
7340 push @$cmd, $readfrom;
7c536e11 7341 $add_pipe->($cmd);
91bd6c90
DM
7342 }
7343
7344 my $tmpdir = "/var/tmp/vzdumptmp$$";
7345 rmtree $tmpdir;
7346
7347 # disable interrupts (always do cleanups)
5b97ef24
TL
7348 local $SIG{INT} =
7349 local $SIG{TERM} =
7350 local $SIG{QUIT} =
7351 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
91bd6c90
DM
7352
7353 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7354 POSIX::mkfifo($mapfifo, 0600);
7355 my $fifofh;
808a65b5 7356 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
91bd6c90 7357
7c536e11 7358 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
91bd6c90
DM
7359
7360 my $oldtimeout;
7361 my $timeout = 5;
7362
e8b07b29
FE
7363 my $devinfo = {}; # info about drives included in backup
7364 my $virtdev_hash = {}; # info about allocated drives
91bd6c90
DM
7365
7366 my $rpcenv = PVE::RPCEnvironment::get();
7367
ffda963f 7368 my $conffile = PVE::QemuConfig->config_file($vmid);
91bd6c90 7369
ae200950 7370 # Note: $oldconf is undef if VM does not exist
ffda963f
FG
7371 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7372 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
98a4b3fb 7373 my $new_conf_raw = '';
ed221350 7374
7c536e11
WB
7375 my %storage_limits;
7376
91bd6c90 7377 my $print_devmap = sub {
91bd6c90
DM
7378 my $cfgfn = "$tmpdir/qemu-server.conf";
7379
7380 # we can read the config - that is already extracted
7381 my $fh = IO::File->new($cfgfn, "r") ||
a1cbe55c 7382 die "unable to read qemu-server.conf - $!\n";
91bd6c90 7383
6738ab9c 7384 my $fwcfgfn = "$tmpdir/qemu-server.fw";
3457d090
WL
7385 if (-f $fwcfgfn) {
7386 my $pve_firewall_dir = '/etc/pve/firewall';
7387 mkdir $pve_firewall_dir; # make sure the dir exists
7388 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7389 }
6738ab9c 7390
e8b07b29 7391 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
91bd6c90 7392
c8964278
FE
7393 foreach my $info (values %{$virtdev_hash}) {
7394 my $storeid = $info->{storeid};
7395 next if defined($storage_limits{$storeid});
7396
7397 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7398 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7399 $storage_limits{$storeid} = $limit * 1024;
7c536e11
WB
7400 }
7401
91bd6c90 7402 foreach my $devname (keys %$devinfo) {
be190583
DM
7403 die "found no device mapping information for device '$devname'\n"
7404 if !$devinfo->{$devname}->{virtdev};
91bd6c90
DM
7405 }
7406
ed221350 7407 # create empty/temp config
be190583 7408 if ($oldconf) {
ed221350 7409 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
d1e92cf6 7410 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
ed221350
DM
7411 }
7412
9f3d73bc
DM
7413 # allocate volumes
7414 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7415
7416 # print restore information to $fifofh
91bd6c90
DM
7417 foreach my $virtdev (sort keys %$virtdev_hash) {
7418 my $d = $virtdev_hash->{$virtdev};
9f3d73bc
DM
7419 next if $d->{is_cloudinit}; # no need to restore cloudinit
7420
7c536e11 7421 my $storeid = $d->{storeid};
9f3d73bc 7422 my $volid = $d->{volid};
7c536e11
WB
7423
7424 my $map_opts = '';
7425 if (my $limit = $storage_limits{$storeid}) {
7426 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7427 }
8fd57431 7428
91bd6c90 7429 my $write_zeros = 1;
88240a83 7430 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
91bd6c90
DM
7431 $write_zeros = 0;
7432 }
7433
9f3d73bc 7434 my $path = PVE::Storage::path($cfg, $volid);
87056e18 7435
9f3d73bc 7436 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
91bd6c90 7437
9f3d73bc 7438 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
91bd6c90
DM
7439 }
7440
7441 $fh->seek(0, 0) || die "seek failed - $!\n";
7442
91bd6c90
DM
7443 my $cookie = { netcount => 0 };
7444 while (defined(my $line = <$fh>)) {
c62d7cf5 7445 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7446 $cookie,
98a4b3fb
FE
7447 $map,
7448 $line,
7449 $opts->{unique},
7450 );
91bd6c90
DM
7451 }
7452
7453 $fh->close();
91bd6c90
DM
7454 };
7455
7456 eval {
7457 # enable interrupts
6cb0144a
EK
7458 local $SIG{INT} =
7459 local $SIG{TERM} =
7460 local $SIG{QUIT} =
7461 local $SIG{HUP} =
7462 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
91bd6c90
DM
7463 local $SIG{ALRM} = sub { die "got timeout\n"; };
7464
7465 $oldtimeout = alarm($timeout);
7466
7467 my $parser = sub {
7468 my $line = shift;
7469
7470 print "$line\n";
7471
7472 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7473 my ($dev_id, $size, $devname) = ($1, $2, $3);
7474 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7475 } elsif ($line =~ m/^CTIME: /) {
46f58b5f 7476 # we correctly received the vma config, so we can disable
3cf90d7a
DM
7477 # the timeout now for disk allocation (set to 10 minutes, so
7478 # that we always timeout if something goes wrong)
7479 alarm(600);
91bd6c90
DM
7480 &$print_devmap();
7481 print $fifofh "done\n";
7482 my $tmp = $oldtimeout || 0;
7483 $oldtimeout = undef;
7484 alarm($tmp);
7485 close($fifofh);
808a65b5 7486 $fifofh = undef;
91bd6c90
DM
7487 }
7488 };
be190583 7489
7c536e11
WB
7490 print "restore vma archive: $dbg_cmdstring\n";
7491 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
91bd6c90
DM
7492 };
7493 my $err = $@;
7494
7495 alarm($oldtimeout) if $oldtimeout;
7496
e8b07b29 7497 $restore_deactivate_volumes->($cfg, $virtdev_hash);
5f96f4df 7498
808a65b5 7499 close($fifofh) if $fifofh;
91bd6c90 7500 unlink $mapfifo;
9f3d73bc 7501 rmtree $tmpdir;
91bd6c90
DM
7502
7503 if ($err) {
e8b07b29 7504 $restore_destroy_volumes->($cfg, $virtdev_hash);
91bd6c90
DM
7505 die $err;
7506 }
7507
621edb2b 7508 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
e3971865 7509 check_restore_permissions($rpcenv, $user, $new_conf);
202a2a0b 7510 PVE::QemuConfig->write_config($vmid, $new_conf);
ed221350 7511
91bd6c90
DM
7512 eval { rescan($vmid, 1); };
7513 warn $@ if $@;
26731a3c
SR
7514
7515 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
91bd6c90
DM
7516}
7517
7518sub restore_tar_archive {
7519 my ($archive, $vmid, $user, $opts) = @_;
7520
202a2a0b
FE
7521 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7522 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7523 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7524 }
7525
9c502e26 7526 if ($archive ne '-') {
ed221350 7527 my $firstfile = tar_archive_read_firstfile($archive);
32e54050 7528 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
9c502e26
DM
7529 if $firstfile ne 'qemu-server.conf';
7530 }
3e16d5fc 7531
20519efc 7532 my $storecfg = PVE::Storage::config();
ebb55558 7533
4b026937
TL
7534 # avoid zombie disks when restoring over an existing VM -> cleanup first
7535 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7536 # skiplock=1 because qmrestore has set the 'create' lock itself already
ffda963f 7537 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
b04ea584 7538 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
ed221350 7539
3e16d5fc
DM
7540 my $tocmd = "/usr/lib/qemu-server/qmextract";
7541
2415a446 7542 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
a0d1b1a2 7543 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
3e16d5fc
DM
7544 $tocmd .= ' --prealloc' if $opts->{prealloc};
7545 $tocmd .= ' --info' if $opts->{info};
7546
a0d1b1a2 7547 # tar option "xf" does not autodetect compression when read from STDIN,
9c502e26 7548 # so we pipe to zcat
2415a446
DM
7549 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7550 PVE::Tools::shellquote("--to-command=$tocmd");
3e16d5fc
DM
7551
7552 my $tmpdir = "/var/tmp/vzdumptmp$$";
7553 mkpath $tmpdir;
7554
7555 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7556 local $ENV{VZDUMP_VMID} = $vmid;
a0d1b1a2 7557 local $ENV{VZDUMP_USER} = $user;
3e16d5fc 7558
ffda963f 7559 my $conffile = PVE::QemuConfig->config_file($vmid);
98a4b3fb 7560 my $new_conf_raw = '';
3e16d5fc
DM
7561
7562 # disable interrupts (always do cleanups)
6cb0144a
EK
7563 local $SIG{INT} =
7564 local $SIG{TERM} =
7565 local $SIG{QUIT} =
7566 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
3e16d5fc 7567
afdb31d5 7568 eval {
3e16d5fc 7569 # enable interrupts
6cb0144a
EK
7570 local $SIG{INT} =
7571 local $SIG{TERM} =
7572 local $SIG{QUIT} =
7573 local $SIG{HUP} =
7574 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
3e16d5fc 7575
9c502e26
DM
7576 if ($archive eq '-') {
7577 print "extracting archive from STDIN\n";
7578 run_command($cmd, input => "<&STDIN");
7579 } else {
7580 print "extracting archive '$archive'\n";
7581 run_command($cmd);
7582 }
3e16d5fc
DM
7583
7584 return if $opts->{info};
7585
7586 # read new mapping
7587 my $map = {};
7588 my $statfile = "$tmpdir/qmrestore.stat";
7589 if (my $fd = IO::File->new($statfile, "r")) {
7590 while (defined (my $line = <$fd>)) {
7591 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7592 $map->{$1} = $2 if $1;
7593 } else {
7594 print STDERR "unable to parse line in statfile - $line\n";
7595 }
7596 }
7597 $fd->close();
7598 }
7599
7600 my $confsrc = "$tmpdir/qemu-server.conf";
7601
f7d1505b 7602 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
3e16d5fc 7603
91bd6c90 7604 my $cookie = { netcount => 0 };
3e16d5fc 7605 while (defined (my $line = <$srcfd>)) {
c62d7cf5 7606 $new_conf_raw .= restore_update_config_line(
98a4b3fb 7607 $cookie,
98a4b3fb
FE
7608 $map,
7609 $line,
7610 $opts->{unique},
7611 );
3e16d5fc
DM
7612 }
7613
7614 $srcfd->close();
3e16d5fc 7615 };
7dc7f315 7616 if (my $err = $@) {
ed221350 7617 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
3e16d5fc 7618 die $err;
afdb31d5 7619 }
3e16d5fc
DM
7620
7621 rmtree $tmpdir;
7622
98a4b3fb 7623 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
91bd6c90 7624
ed221350
DM
7625 PVE::Cluster::cfs_update(); # make sure we read new file
7626
91bd6c90
DM
7627 eval { rescan($vmid, 1); };
7628 warn $@ if $@;
3e16d5fc
DM
7629};
7630
65a5ce88 7631sub foreach_storage_used_by_vm {
18bfb361
DM
7632 my ($conf, $func) = @_;
7633
7634 my $sidhash = {};
7635
912792e2 7636 PVE::QemuConfig->foreach_volume($conf, sub {
8ddbcf8b
FG
7637 my ($ds, $drive) = @_;
7638 return if drive_is_cdrom($drive);
18bfb361
DM
7639
7640 my $volid = $drive->{file};
7641
7642 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
be190583 7643 $sidhash->{$sid} = $sid if $sid;
8ddbcf8b 7644 });
18bfb361
DM
7645
7646 foreach my $sid (sort keys %$sidhash) {
7647 &$func($sid);
7648 }
7649}
7650
6c9f59c1
TL
7651my $qemu_snap_storage = {
7652 rbd => 1,
7653};
e5eaa028 7654sub do_snapshots_with_qemu {
9d83932d
SR
7655 my ($storecfg, $volid, $deviceid) = @_;
7656
7657 return if $deviceid =~ m/tpmstate0/;
e5eaa028
WL
7658
7659 my $storage_name = PVE::Storage::parse_volume_id($volid);
8aa2ed7c 7660 my $scfg = $storecfg->{ids}->{$storage_name};
f7d1505b 7661 die "could not find storage '$storage_name'\n" if !defined($scfg);
e5eaa028 7662
8aa2ed7c 7663 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
e5eaa028
WL
7664 return 1;
7665 }
7666
7667 if ($volid =~ m/\.(qcow2|qed)$/){
7668 return 1;
7669 }
7670
d1c1af4b 7671 return;
e5eaa028
WL
7672}
7673
4dcc780c 7674sub qga_check_running {
a4938c72 7675 my ($vmid, $nowarn) = @_;
4dcc780c 7676
0a13e08e 7677 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
4dcc780c 7678 if ($@) {
7bd9abd2 7679 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
4dcc780c
WL
7680 return 0;
7681 }
7682 return 1;
7683}
7684
04a69bb4
AD
7685sub template_create {
7686 my ($vmid, $conf, $disk) = @_;
7687
04a69bb4 7688 my $storecfg = PVE::Storage::config();
04a69bb4 7689
912792e2 7690 PVE::QemuConfig->foreach_volume($conf, sub {
9cd07842
DM
7691 my ($ds, $drive) = @_;
7692
7693 return if drive_is_cdrom($drive);
7694 return if $disk && $ds ne $disk;
7695
7696 my $volid = $drive->{file};
bbd56097 7697 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
9cd07842 7698
04a69bb4
AD
7699 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7700 $drive->{file} = $voliddst;
71c58bb7 7701 $conf->{$ds} = print_drive($drive);
ffda963f 7702 PVE::QemuConfig->write_config($vmid, $conf);
04a69bb4 7703 });
04a69bb4
AD
7704}
7705
92bdc3f0
DC
7706sub convert_iscsi_path {
7707 my ($path) = @_;
7708
7709 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7710 my $portal = $1;
7711 my $target = $2;
7712 my $lun = $3;
7713
7714 my $initiator_name = get_initiator_name();
7715
7716 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7717 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7718 }
7719
7720 die "cannot convert iscsi path '$path', unkown format\n";
7721}
7722
5133de42 7723sub qemu_img_convert {
56d16f16 7724 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
5133de42
AD
7725
7726 my $storecfg = PVE::Storage::config();
7727 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7728 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7729
af1f1ec0 7730 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
6bb91c17 7731
af1f1ec0
DC
7732 my $cachemode;
7733 my $src_path;
7734 my $src_is_iscsi = 0;
bdd1feef 7735 my $src_format;
6bb91c17 7736
af1f1ec0
DC
7737 if ($src_storeid) {
7738 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
5133de42 7739 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
af1f1ec0
DC
7740 $src_format = qemu_img_format($src_scfg, $src_volname);
7741 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7742 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7743 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
a23d57d5 7744 } elsif (-f $src_volid || -b $src_volid) {
af1f1ec0 7745 $src_path = $src_volid;
e0fd2b2f 7746 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
af1f1ec0
DC
7747 $src_format = $1;
7748 }
7749 }
5133de42 7750
af1f1ec0 7751 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
5133de42 7752
af1f1ec0
DC
7753 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7754 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7755 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7756 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
5133de42 7757
af1f1ec0
DC
7758 my $cmd = [];
7759 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
bdd1feef
TL
7760 push @$cmd, '-l', "snapshot.name=$snapname"
7761 if $snapname && $src_format && $src_format eq "qcow2";
af1f1ec0
DC
7762 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7763 push @$cmd, '-T', $cachemode if defined($cachemode);
56d16f16 7764 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
af1f1ec0
DC
7765
7766 if ($src_is_iscsi) {
7767 push @$cmd, '--image-opts';
7768 $src_path = convert_iscsi_path($src_path);
bdd1feef 7769 } elsif ($src_format) {
af1f1ec0
DC
7770 push @$cmd, '-f', $src_format;
7771 }
92bdc3f0 7772
af1f1ec0
DC
7773 if ($dst_is_iscsi) {
7774 push @$cmd, '--target-image-opts';
7775 $dst_path = convert_iscsi_path($dst_path);
7776 } else {
7777 push @$cmd, '-O', $dst_format;
7778 }
92bdc3f0 7779
af1f1ec0 7780 push @$cmd, $src_path;
92bdc3f0 7781
af1f1ec0
DC
7782 if (!$dst_is_iscsi && $is_zero_initialized) {
7783 push @$cmd, "zeroinit:$dst_path";
7784 } else {
7785 push @$cmd, $dst_path;
7786 }
92bdc3f0 7787
af1f1ec0
DC
7788 my $parser = sub {
7789 my $line = shift;
7790 if($line =~ m/\((\S+)\/100\%\)/){
7791 my $percent = $1;
7792 my $transferred = int($size * $percent / 100);
b5e9d97b
TL
7793 my $total_h = render_bytes($size, 1);
7794 my $transferred_h = render_bytes($transferred, 1);
92bdc3f0 7795
6629f976 7796 print "transferred $transferred_h of $total_h ($percent%)\n";
988e2714 7797 }
5133de42 7798
af1f1ec0 7799 };
5133de42 7800
af1f1ec0
DC
7801 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7802 my $err = $@;
7803 die "copy failed: $err" if $err;
5133de42
AD
7804}
7805
7806sub qemu_img_format {
7807 my ($scfg, $volname) = @_;
7808
e0fd2b2f 7809 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
5133de42 7810 return $1;
be190583 7811 } else {
5133de42 7812 return "raw";
5133de42
AD
7813 }
7814}
7815
cfad42af 7816sub qemu_drive_mirror {
bc6c8231 7817 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
cfad42af 7818
5a345967
AD
7819 $jobs = {} if !$jobs;
7820
7821 my $qemu_target;
7822 my $format;
35e4ab04 7823 $jobs->{"drive-$drive"} = {};
152fe752 7824
1e5143de 7825 if ($dst_volid =~ /^nbd:/) {
87955688 7826 $qemu_target = $dst_volid;
5a345967 7827 $format = "nbd";
5a345967 7828 } else {
5a345967
AD
7829 my $storecfg = PVE::Storage::config();
7830 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7831
7832 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
cfad42af 7833
5a345967 7834 $format = qemu_img_format($dst_scfg, $dst_volname);
21ccdb50 7835
5a345967 7836 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
21ccdb50 7837
5a345967
AD
7838 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7839 }
988e2714
WB
7840
7841 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
88383920
DM
7842 $opts->{format} = $format if $format;
7843
bc6c8231
FG
7844 if (defined($src_bitmap)) {
7845 $opts->{sync} = 'incremental';
7846 $opts->{bitmap} = $src_bitmap;
7847 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7848 }
7849
9fa05d31 7850 if (defined($bwlimit)) {
f6409f61
TL
7851 $opts->{speed} = $bwlimit * 1024;
7852 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
9fa05d31
SI
7853 } else {
7854 print "drive mirror is starting for drive-$drive\n";
7855 }
21ccdb50 7856
6dde5ea2 7857 # if a job already runs for this device we get an error, catch it for cleanup
0a13e08e 7858 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
5a345967
AD
7859 if (my $err = $@) {
7860 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
6dde5ea2
TL
7861 warn "$@\n" if $@;
7862 die "mirroring error: $err\n";
5a345967
AD
7863 }
7864
e02fb126 7865 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
5a345967
AD
7866}
7867
db1f8b39
FG
7868# $completion can be either
7869# 'complete': wait until all jobs are ready, block-job-complete them (default)
7870# 'cancel': wait until all jobs are ready, block-job-cancel them
7871# 'skip': wait until all jobs are ready, return with block jobs in ready state
9e671722 7872# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
5a345967 7873sub qemu_drive_mirror_monitor {
9e671722 7874 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
e02fb126 7875
db1f8b39 7876 $completion //= 'complete';
9e671722 7877 $op //= "mirror";
2e953867 7878
08ac653f 7879 eval {
5a345967
AD
7880 my $err_complete = 0;
7881
3b56383b 7882 my $starttime = time ();
08ac653f 7883 while (1) {
9e671722 7884 die "block job ('$op') timed out\n" if $err_complete > 300;
5a345967 7885
0a13e08e 7886 my $stats = mon_cmd($vmid, "query-block-jobs");
3b56383b 7887 my $ctime = time();
08ac653f 7888
9e671722 7889 my $running_jobs = {};
0ea24bf0 7890 for my $stat (@$stats) {
9e671722
SR
7891 next if $stat->{type} ne $op;
7892 $running_jobs->{$stat->{device}} = $stat;
5a345967 7893 }
08ac653f 7894
5a345967 7895 my $readycounter = 0;
67fb9de6 7896
0ea24bf0 7897 for my $job_id (sort keys %$jobs) {
1057fc74 7898 my $job = $running_jobs->{$job_id};
5a345967 7899
1057fc74 7900 my $vanished = !defined($job);
0ea24bf0 7901 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
9e671722 7902 if($complete || ($vanished && $completion eq 'auto')) {
3b56383b 7903 print "$job_id: $op-job finished\n";
0ea24bf0 7904 delete $jobs->{$job_id};
5a345967
AD
7905 next;
7906 }
7907
1057fc74 7908 die "$job_id: '$op' has been cancelled\n" if !defined($job);
f34ebd52 7909
1057fc74
TL
7910 my $busy = $job->{busy};
7911 my $ready = $job->{ready};
7912 if (my $total = $job->{len}) {
7913 my $transferred = $job->{offset} || 0;
5a345967
AD
7914 my $remaining = $total - $transferred;
7915 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
08ac653f 7916
3b56383b
TL
7917 my $duration = $ctime - $starttime;
7918 my $total_h = render_bytes($total, 1);
7919 my $transferred_h = render_bytes($transferred, 1);
7920
7921 my $status = sprintf(
7922 "transferred $transferred_h of $total_h ($percent%%) in %s",
7923 render_duration($duration),
7924 );
7925
7926 if ($ready) {
7927 if ($busy) {
7928 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7929 } else {
7930 $status .= ", ready";
7931 }
7932 }
67daf692
TL
7933 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7934 $jobs->{$job_id}->{ready} = $ready;
5a345967 7935 }
f34ebd52 7936
1057fc74 7937 $readycounter++ if $job->{ready};
5a345967 7938 }
b467f79a 7939
5a345967
AD
7940 last if scalar(keys %$jobs) == 0;
7941
7942 if ($readycounter == scalar(keys %$jobs)) {
9e671722
SR
7943 print "all '$op' jobs are ready\n";
7944
7945 # do the complete later (or has already been done)
7946 last if $completion eq 'skip' || $completion eq 'auto';
5a345967
AD
7947
7948 if ($vmiddst && $vmiddst != $vmid) {
1a988fd2
DC
7949 my $agent_running = $qga && qga_check_running($vmid);
7950 if ($agent_running) {
5619e74a 7951 print "freeze filesystem\n";
0a13e08e 7952 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
d6cdfae4 7953 warn $@ if $@;
5619e74a
AD
7954 } else {
7955 print "suspend vm\n";
7956 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
d6cdfae4 7957 warn $@ if $@;
5619e74a
AD
7958 }
7959
5a345967
AD
7960 # if we clone a disk for a new target vm, we don't switch the disk
7961 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
5619e74a 7962
1a988fd2 7963 if ($agent_running) {
5619e74a 7964 print "unfreeze filesystem\n";
0a13e08e 7965 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
d6cdfae4 7966 warn $@ if $@;
5619e74a
AD
7967 } else {
7968 print "resume vm\n";
d6cdfae4
FE
7969 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7970 warn $@ if $@;
5619e74a
AD
7971 }
7972
2e953867 7973 last;
5a345967
AD
7974 } else {
7975
0ea24bf0 7976 for my $job_id (sort keys %$jobs) {
5a345967 7977 # try to switch the disk if source and destination are on the same guest
0ea24bf0 7978 print "$job_id: Completing block job_id...\n";
5a345967 7979
e02fb126 7980 my $op;
db1f8b39 7981 if ($completion eq 'complete') {
e02fb126 7982 $op = 'block-job-complete';
db1f8b39 7983 } elsif ($completion eq 'cancel') {
e02fb126
ML
7984 $op = 'block-job-cancel';
7985 } else {
7986 die "invalid completion value: $completion\n";
7987 }
0ea24bf0 7988 eval { mon_cmd($vmid, $op, device => $job_id) };
5a345967 7989 if ($@ =~ m/cannot be completed/) {
3b56383b 7990 print "$job_id: block job cannot be completed, trying again.\n";
5a345967
AD
7991 $err_complete++;
7992 }else {
0ea24bf0
TL
7993 print "$job_id: Completed successfully.\n";
7994 $jobs->{$job_id}->{complete} = 1;
5a345967
AD
7995 }
7996 }
2e953867 7997 }
08ac653f 7998 }
08ac653f 7999 sleep 1;
cfad42af 8000 }
08ac653f 8001 };
88383920 8002 my $err = $@;
08ac653f 8003
88383920 8004 if ($err) {
5a345967 8005 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
3b56383b 8006 die "block job ($op) error: $err";
88383920 8007 }
5a345967
AD
8008}
8009
8010sub qemu_blockjobs_cancel {
8011 my ($vmid, $jobs) = @_;
8012
8013 foreach my $job (keys %$jobs) {
bd2d5fe6 8014 print "$job: Cancelling block job\n";
0a13e08e 8015 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
5a345967
AD
8016 $jobs->{$job}->{cancel} = 1;
8017 }
8018
8019 while (1) {
0a13e08e 8020 my $stats = mon_cmd($vmid, "query-block-jobs");
5a345967
AD
8021
8022 my $running_jobs = {};
8023 foreach my $stat (@$stats) {
8024 $running_jobs->{$stat->{device}} = $stat;
8025 }
8026
8027 foreach my $job (keys %$jobs) {
8028
bd2d5fe6
WB
8029 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8030 print "$job: Done.\n";
5a345967
AD
8031 delete $jobs->{$job};
8032 }
8033 }
8034
8035 last if scalar(keys %$jobs) == 0;
8036
8037 sleep 1;
cfad42af
AD
8038 }
8039}
8040
8fbae1dc
FE
8041# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8042# source, but some storages have problems with io_uring, sometimes even leading to crashes.
8043my sub clone_disk_check_io_uring {
8044 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8045
8046 return if !$use_drive_mirror;
8047
8048 # Don't complain when not changing storage.
8049 # Assume if it works for the source, it'll work for the target too.
8050 return if $src_storeid eq $dst_storeid;
8051
8052 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8053 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8054
8055 my $cache_direct = drive_uses_cache_direct($src_drive);
8056
8057 my $src_uses_io_uring;
8058 if ($src_drive->{aio}) {
8059 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8060 } else {
8061 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8062 }
8063
8064 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8065 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8066}
8067
152fe752 8068sub clone_disk {
1196086f
FE
8069 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8070
8071 my ($vmid, $running) = $source->@{qw(vmid running)};
25166060 8072 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
1196086f 8073
25166060 8074 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
1196086f 8075 my ($storage, $format) = $dest->@{qw(storage format)};
152fe752 8076
5f957592
FE
8077 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8078
25166060
FE
8079 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8080 die "cloning from/to EFI disk requires EFI disk\n"
8081 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8082 die "cloning from/to TPM state requires TPM state\n"
8083 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
5f957592
FE
8084
8085 # This would lead to two device nodes in QEMU pointing to the same backing image!
8086 die "cannot change drive name when cloning disk from/to the same VM\n"
8087 if $use_drive_mirror && $vmid == $newvmid;
25166060
FE
8088 }
8089
1d1f8f9a
FE
8090 die "cannot move TPM state while VM is running\n"
8091 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8092
152fe752
DM
8093 my $newvolid;
8094
25166060
FE
8095 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8096 print "$src_drivename " if $src_drivename;
8097 print "($drive->{file})\n";
8098
152fe752 8099 if (!$full) {
258e646c 8100 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
152fe752
DM
8101 push @$newvollist, $newvolid;
8102 } else {
8fbae1dc
FE
8103 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8104 my $storeid = $storage || $src_storeid;
152fe752 8105
44549149 8106 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
152fe752 8107
931432bd 8108 my $name = undef;
d0abc774 8109 my $size = undef;
7fe8b44c
TL
8110 if (drive_is_cloudinit($drive)) {
8111 $name = "vm-$newvmid-cloudinit";
c997e24a
ML
8112 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8113 if ($scfg->{path}) {
8114 $name .= ".$dst_format";
8115 }
7fe8b44c
TL
8116 $snapname = undef;
8117 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
25166060 8118 } elsif ($dst_drivename eq 'efidisk0') {
7344af7b 8119 $size = $efisize or die "internal error - need to specify EFI disk size\n";
25166060 8120 } elsif ($dst_drivename eq 'tpmstate0') {
5f5aba25 8121 $dst_format = 'raw';
f9dde219 8122 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
d0abc774 8123 } else {
8fbae1dc
FE
8124 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8125
efa3aa24 8126 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
7fe8b44c 8127 }
b5688f69
FE
8128 $newvolid = PVE::Storage::vdisk_alloc(
8129 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8130 );
152fe752
DM
8131 push @$newvollist, $newvolid;
8132
3999f370 8133 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
1dbd6d30 8134
7fe8b44c 8135 if (drive_is_cloudinit($drive)) {
1b485263
ML
8136 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8137 # if this is the case, we have to complete any block-jobs still there from
8138 # previous drive-mirrors
8139 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8140 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8141 }
7fe8b44c
TL
8142 goto no_data_clone;
8143 }
8144
988e2714 8145 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
5f957592 8146 if ($use_drive_mirror) {
5f957592
FE
8147 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8148 $completion, $qga, $bwlimit);
8149 } else {
25166060 8150 if ($dst_drivename eq 'efidisk0') {
818ce80e
DC
8151 # the relevant data on the efidisk may be smaller than the source
8152 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8153 # that is given by the OVMF_VARS.fd
62375438 8154 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
818ce80e 8155 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
fdfdc80e 8156
62375438
FE
8157 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8158
fdfdc80e
FE
8159 # better for Ceph if block size is not too small, see bug #3324
8160 my $bs = 1024*1024;
8161
62375438 8162 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
a9c45bd4
FE
8163
8164 if ($src_format eq 'qcow2' && $snapname) {
8165 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8166 if !min_version(kvm_user_version(), 6, 2);
8167 push $cmd->@*, '-l', $snapname;
8168 }
62375438
FE
8169 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8170 run_command($cmd);
818ce80e 8171 } else {
56d16f16 8172 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
818ce80e 8173 }
be190583 8174 }
152fe752
DM
8175 }
8176
7fe8b44c 8177no_data_clone:
efa3aa24 8178 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
152fe752 8179
3b53c471
FE
8180 my $disk = dclone($drive);
8181 delete $disk->{format};
152fe752 8182 $disk->{file} = $newvolid;
3bae384f 8183 $disk->{size} = $size if defined($size);
152fe752
DM
8184
8185 return $disk;
8186}
8187
98cfd8b6
AD
8188sub get_running_qemu_version {
8189 my ($vmid) = @_;
0a13e08e 8190 my $res = mon_cmd($vmid, "query-version");
98cfd8b6
AD
8191 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8192}
8193
249c4a6c
AD
8194sub qemu_use_old_bios_files {
8195 my ($machine_type) = @_;
8196
8197 return if !$machine_type;
8198
8199 my $use_old_bios_files = undef;
8200
8201 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8202 $machine_type = $1;
8203 $use_old_bios_files = 1;
8204 } else {
4df98f2f 8205 my $version = extract_version($machine_type, kvm_user_version());
249c4a6c
AD
8206 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8207 # load new efi bios files on migration. So this hack is required to allow
8208 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8209 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
2ea5fb7e 8210 $use_old_bios_files = !min_version($version, 2, 4);
249c4a6c
AD
8211 }
8212
8213 return ($use_old_bios_files, $machine_type);
8214}
8215
818ce80e 8216sub get_efivars_size {
ff84f0e3
FE
8217 my ($conf, $efidisk) = @_;
8218
818ce80e 8219 my $arch = get_vm_arch($conf);
ff84f0e3 8220 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
90b20b15
DC
8221 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8222 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
818ce80e
DC
8223 return -s $ovmf_vars;
8224}
8225
8226sub update_efidisk_size {
8227 my ($conf) = @_;
8228
8229 return if !defined($conf->{efidisk0});
8230
8231 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8232 $disk->{size} = get_efivars_size($conf);
8233 $conf->{efidisk0} = print_drive($disk);
8234
8235 return;
8236}
8237
f9dde219
SR
8238sub update_tpmstate_size {
8239 my ($conf) = @_;
8240
8241 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8242 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8243 $conf->{tpmstate0} = print_drive($disk);
8244}
8245
90b20b15
DC
8246sub create_efidisk($$$$$$$) {
8247 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
3e1f1122 8248
90b20b15 8249 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
3e1f1122 8250
af1f1ec0
DC
8251 my $vars_size_b = -s $ovmf_vars;
8252 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
3e1f1122
TL
8253 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8254 PVE::Storage::activate_volumes($storecfg, [$volid]);
8255
af1f1ec0 8256 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
efa3aa24 8257 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
3e1f1122 8258
340dbcf7 8259 return ($volid, $size/1024);
3e1f1122
TL
8260}
8261
22de899a
AD
8262sub vm_iothreads_list {
8263 my ($vmid) = @_;
8264
0a13e08e 8265 my $res = mon_cmd($vmid, 'query-iothreads');
22de899a
AD
8266
8267 my $iothreads = {};
8268 foreach my $iothread (@$res) {
8269 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8270 }
8271
8272 return $iothreads;
8273}
8274
ee034f5c
AD
8275sub scsihw_infos {
8276 my ($conf, $drive) = @_;
8277
8278 my $maxdev = 0;
8279
7fe1b688 8280 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
ee034f5c 8281 $maxdev = 7;
a1511b3c 8282 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
ee034f5c
AD
8283 $maxdev = 1;
8284 } else {
8285 $maxdev = 256;
8286 }
8287
8288 my $controller = int($drive->{index} / $maxdev);
4df98f2f
TL
8289 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8290 ? "virtioscsi"
8291 : "scsihw";
ee034f5c
AD
8292
8293 return ($maxdev, $controller, $controller_prefix);
8294}
a1511b3c 8295
44549149
EK
8296sub resolve_dst_disk_format {
8297 my ($storecfg, $storeid, $src_volname, $format) = @_;
8298 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8299
8300 if (!$format) {
8301 # if no target format is specified, use the source disk format as hint
8302 if ($src_volname) {
8303 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8304 $format = qemu_img_format($scfg, $src_volname);
8305 } else {
8306 return $defFormat;
8307 }
8308 }
8309
8310 # test if requested format is supported - else use default
8311 my $supported = grep { $_ eq $format } @$validFormats;
8312 $format = $defFormat if !$supported;
8313 return $format;
8314}
8315
66cebc46
DC
8316# NOTE: if this logic changes, please update docs & possibly gui logic
8317sub find_vmstate_storage {
8318 my ($conf, $storecfg) = @_;
8319
8320 # first, return storage from conf if set
8321 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8322
8323 my ($target, $shared, $local);
8324
8325 foreach_storage_used_by_vm($conf, sub {
8326 my ($sid) = @_;
8327 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8328 my $dst = $scfg->{shared} ? \$shared : \$local;
8329 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8330 });
8331
8332 # second, use shared storage where VM has at least one disk
8333 # third, use local storage where VM has at least one disk
8334 # fall back to local storage
8335 $target = $shared // $local // 'local';
8336
8337 return $target;
8338}
8339
6ee499ff 8340sub generate_uuid {
ae2fcb3b
EK
8341 my ($uuid, $uuid_str);
8342 UUID::generate($uuid);
8343 UUID::unparse($uuid, $uuid_str);
6ee499ff
DC
8344 return $uuid_str;
8345}
8346
8347sub generate_smbios1_uuid {
8348 return "uuid=".generate_uuid();
ae2fcb3b
EK
8349}
8350
9c152e87
TL
8351sub nbd_stop {
8352 my ($vmid) = @_;
8353
0a13e08e 8354 mon_cmd($vmid, 'nbd-server-stop');
9c152e87
TL
8355}
8356
dae98db9
DC
8357sub create_reboot_request {
8358 my ($vmid) = @_;
8359 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8360 or die "failed to create reboot trigger file: $!\n";
8361 close($fh);
8362}
8363
8364sub clear_reboot_request {
8365 my ($vmid) = @_;
8366 my $path = "/run/qemu-server/$vmid.reboot";
8367 my $res = 0;
8368
8369 $res = unlink($path);
8370 die "could not remove reboot request for $vmid: $!"
8371 if !$res && $! != POSIX::ENOENT;
8372
8373 return $res;
8374}
8375
5cfa9f5f
SR
8376sub bootorder_from_legacy {
8377 my ($conf, $bootcfg) = @_;
8378
8379 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8380 my $bootindex_hash = {};
8381 my $i = 1;
8382 foreach my $o (split(//, $boot)) {
8383 $bootindex_hash->{$o} = $i*100;
8384 $i++;
8385 }
8386
8387 my $bootorder = {};
8388
8389 PVE::QemuConfig->foreach_volume($conf, sub {
8390 my ($ds, $drive) = @_;
8391
8392 if (drive_is_cdrom ($drive, 1)) {
8393 if ($bootindex_hash->{d}) {
8394 $bootorder->{$ds} = $bootindex_hash->{d};
8395 $bootindex_hash->{d} += 1;
8396 }
8397 } elsif ($bootindex_hash->{c}) {
8398 $bootorder->{$ds} = $bootindex_hash->{c}
8399 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8400 $bootindex_hash->{c} += 1;
8401 }
8402 });
8403
8404 if ($bootindex_hash->{n}) {
8405 for (my $i = 0; $i < $MAX_NETS; $i++) {
8406 my $netname = "net$i";
8407 next if !$conf->{$netname};
8408 $bootorder->{$netname} = $bootindex_hash->{n};
8409 $bootindex_hash->{n} += 1;
8410 }
8411 }
8412
8413 return $bootorder;
8414}
8415
8416# Generate default device list for 'boot: order=' property. Matches legacy
8417# default boot order, but with explicit device names. This is important, since
8418# the fallback for when neither 'order' nor the old format is specified relies
8419# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8420sub get_default_bootdevices {
8421 my ($conf) = @_;
8422
8423 my @ret = ();
8424
8425 # harddisk
8426 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8427 push @ret, $first if $first;
8428
8429 # cdrom
8430 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8431 push @ret, $first if $first;
8432
8433 # network
8434 for (my $i = 0; $i < $MAX_NETS; $i++) {
8435 my $netname = "net$i";
8436 next if !$conf->{$netname};
8437 push @ret, $netname;
8438 last;
8439 }
8440
8441 return \@ret;
8442}
8443
e5d611c3
TL
8444sub device_bootorder {
8445 my ($conf) = @_;
8446
8447 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8448
8449 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8450
8451 my $bootorder = {};
8452 if (!defined($boot) || $boot->{legacy}) {
8453 $bootorder = bootorder_from_legacy($conf, $boot);
8454 } elsif ($boot->{order}) {
8455 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8456 for my $dev (PVE::Tools::split_list($boot->{order})) {
8457 $bootorder->{$dev} = $i++;
8458 }
8459 }
8460
8461 return $bootorder;
8462}
8463
65911545
SR
8464sub register_qmeventd_handle {
8465 my ($vmid) = @_;
8466
8467 my $fh;
8468 my $peer = "/var/run/qmeventd.sock";
8469 my $count = 0;
8470
8471 for (;;) {
8472 $count++;
8473 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8474 last if $fh;
8475 if ($! != EINTR && $! != EAGAIN) {
8476 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8477 }
8478 if ($count > 4) {
8479 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8480 . "after $count retries\n";
8481 }
8482 usleep(25000);
8483 }
8484
8485 # send handshake to mark VM as backing up
8486 print $fh to_json({vzdump => {vmid => "$vmid"}});
8487
8488 # return handle to be closed later when inhibit is no longer required
8489 return $fh;
8490}
8491
65e866e5
DM
8492# bash completion helper
8493
8494sub complete_backup_archives {
8495 my ($cmdname, $pname, $cvalue) = @_;
8496
8497 my $cfg = PVE::Storage::config();
8498
8499 my $storeid;
8500
8501 if ($cvalue =~ m/^([^:]+):/) {
8502 $storeid = $1;
8503 }
8504
8505 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8506
8507 my $res = [];
8508 foreach my $id (keys %$data) {
8509 foreach my $item (@{$data->{$id}}) {
f43a4f12 8510 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
65e866e5
DM
8511 push @$res, $item->{volid} if defined($item->{volid});
8512 }
8513 }
8514
8515 return $res;
8516}
8517
8518my $complete_vmid_full = sub {
8519 my ($running) = @_;
8520
8521 my $idlist = vmstatus();
8522
8523 my $res = [];
8524
8525 foreach my $id (keys %$idlist) {
8526 my $d = $idlist->{$id};
8527 if (defined($running)) {
8528 next if $d->{template};
8529 next if $running && $d->{status} ne 'running';
8530 next if !$running && $d->{status} eq 'running';
8531 }
8532 push @$res, $id;
8533
8534 }
8535 return $res;
8536};
8537
8538sub complete_vmid {
8539 return &$complete_vmid_full();
8540}
8541
8542sub complete_vmid_stopped {
8543 return &$complete_vmid_full(0);
8544}
8545
8546sub complete_vmid_running {
8547 return &$complete_vmid_full(1);
8548}
8549
335af808
DM
8550sub complete_storage {
8551
8552 my $cfg = PVE::Storage::config();
8553 my $ids = $cfg->{ids};
8554
8555 my $res = [];
8556 foreach my $sid (keys %$ids) {
8557 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
c4c844ef 8558 next if !$ids->{$sid}->{content}->{images};
335af808
DM
8559 push @$res, $sid;
8560 }
8561
8562 return $res;
8563}
8564
255e9c54
AL
8565sub complete_migration_storage {
8566 my ($cmd, $param, $current_value, $all_args) = @_;
8567
8568 my $targetnode = @$all_args[1];
8569
8570 my $cfg = PVE::Storage::config();
8571 my $ids = $cfg->{ids};
8572
8573 my $res = [];
8574 foreach my $sid (keys %$ids) {
8575 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8576 next if !$ids->{$sid}->{content}->{images};
8577 push @$res, $sid;
8578 }
8579
8580 return $res;
8581}
8582
b08c37c3
DC
8583sub vm_is_paused {
8584 my ($vmid) = @_;
8585 my $qmpstatus = eval {
8586 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8587 mon_cmd($vmid, "query-status");
8588 };
8589 warn "$@\n" if $@;
8590 return $qmpstatus && $qmpstatus->{status} eq "paused";
8591}
8592
3f11f0d7
LS
8593sub check_volume_storage_type {
8594 my ($storecfg, $vol) = @_;
8595
8596 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8597 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8598 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8599
8600 die "storage '$storeid' does not support content-type '$vtype'\n"
8601 if !$scfg->{content}->{$vtype};
8602
8603 return 1;
8604}
8605
21947fea
AD
8606sub add_nets_bridge_fdb {
8607 my ($conf, $vmid) = @_;
8608
1b5ba4dd
TL
8609 for my $opt (keys %$conf) {
8610 next if $opt !~ m/^net(\d+)$/;
8611 my $iface = "tap${vmid}i$1";
4ddd2ca2
TL
8612 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8613 my $net = parse_net($conf->{$opt}, 1) or next;
8614
8615 my $mac = $net->{macaddr};
8616 if (!$mac) {
8617 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8618 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8619 next;
8620 }
21947fea 8621
f81c9843 8622 my $bridge = $net->{bridge};
bb547dcb
CE
8623 if (!$bridge) {
8624 log_warn("Interface '$iface' not attached to any bridge.");
8625 next;
8626 }
1b5ba4dd 8627 if ($have_sdn) {
f81c9843 8628 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8629 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
1b5ba4dd 8630 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
21947fea
AD
8631 }
8632 }
8633}
1b5ba4dd 8634
73ed6496
AD
8635sub del_nets_bridge_fdb {
8636 my ($conf, $vmid) = @_;
8637
8638 for my $opt (keys %$conf) {
8639 next if $opt !~ m/^net(\d+)$/;
8640 my $iface = "tap${vmid}i$1";
8641
8642 my $net = parse_net($conf->{$opt}) or next;
8643 my $mac = $net->{macaddr} or next;
8644
f81c9843 8645 my $bridge = $net->{bridge};
73ed6496 8646 if ($have_sdn) {
f81c9843 8647 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
fe62da4f 8648 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
73ed6496
AD
8649 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8650 }
8651 }
8652}
8653
1e3baf05 86541;